query_guard 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +89 -1
- data/DESIGN.md +420 -0
- data/INDEX.md +309 -0
- data/README.md +579 -30
- data/exe/queryguard +23 -0
- data/lib/query_guard/action_controller_subscriber.rb +27 -0
- data/lib/query_guard/analysis/query_risk_classifier.rb +124 -0
- data/lib/query_guard/analysis/risk_detectors.rb +258 -0
- data/lib/query_guard/analysis/risk_level.rb +35 -0
- data/lib/query_guard/analyzers/base.rb +30 -0
- data/lib/query_guard/analyzers/query_count_analyzer.rb +31 -0
- data/lib/query_guard/analyzers/query_risk_analyzer.rb +146 -0
- data/lib/query_guard/analyzers/registry.rb +57 -0
- data/lib/query_guard/analyzers/select_star_analyzer.rb +42 -0
- data/lib/query_guard/analyzers/slow_query_analyzer.rb +39 -0
- data/lib/query_guard/budget.rb +148 -0
- data/lib/query_guard/cli/batch_report_formatter.rb +129 -0
- data/lib/query_guard/cli/command.rb +93 -0
- data/lib/query_guard/cli/commands/analyze.rb +52 -0
- data/lib/query_guard/cli/commands/check.rb +58 -0
- data/lib/query_guard/cli/formatter.rb +278 -0
- data/lib/query_guard/cli/json_reporter.rb +247 -0
- data/lib/query_guard/cli/paged_report_formatter.rb +137 -0
- data/lib/query_guard/cli/source_metadata_collector.rb +297 -0
- data/lib/query_guard/cli.rb +197 -0
- data/lib/query_guard/client.rb +4 -6
- data/lib/query_guard/config.rb +145 -6
- data/lib/query_guard/core/context.rb +80 -0
- data/lib/query_guard/core/finding.rb +162 -0
- data/lib/query_guard/core/finding_builders.rb +152 -0
- data/lib/query_guard/core/query.rb +40 -0
- data/lib/query_guard/explain/adapter_interface.rb +89 -0
- data/lib/query_guard/explain/explain_enricher.rb +367 -0
- data/lib/query_guard/explain/plan_signals.rb +385 -0
- data/lib/query_guard/explain/postgresql_adapter.rb +208 -0
- data/lib/query_guard/exporter.rb +124 -0
- data/lib/query_guard/fingerprint.rb +96 -0
- data/lib/query_guard/middleware.rb +101 -15
- data/lib/query_guard/migrations/database_adapter.rb +88 -0
- data/lib/query_guard/migrations/migration_analyzer.rb +100 -0
- data/lib/query_guard/migrations/migration_risk_detectors.rb +287 -0
- data/lib/query_guard/migrations/postgresql_adapter.rb +157 -0
- data/lib/query_guard/migrations/table_risk_analyzer.rb +154 -0
- data/lib/query_guard/migrations/table_size_resolver.rb +152 -0
- data/lib/query_guard/publish.rb +38 -0
- data/lib/query_guard/rspec.rb +119 -0
- data/lib/query_guard/security.rb +99 -0
- data/lib/query_guard/store.rb +38 -0
- data/lib/query_guard/subscriber.rb +46 -15
- data/lib/query_guard/suggest/index_suggester.rb +176 -0
- data/lib/query_guard/suggest/pattern_extractors.rb +137 -0
- data/lib/query_guard/trace.rb +106 -0
- data/lib/query_guard/uploader/http_uploader.rb +166 -0
- data/lib/query_guard/uploader/interface.rb +79 -0
- data/lib/query_guard/uploader/no_op_uploader.rb +46 -0
- data/lib/query_guard/uploader/registry.rb +37 -0
- data/lib/query_guard/uploader/upload_service.rb +80 -0
- data/lib/query_guard/version.rb +1 -1
- data/lib/query_guard.rb +54 -7
- metadata +78 -10
- data/.rspec +0 -3
- data/Rakefile +0 -21
- data/config/initializers/query_guard.rb +0 -9
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module QueryGuard
|
|
4
|
+
module Explain
|
|
5
|
+
# Data structures for query plan signals extracted from EXPLAIN output.
|
|
6
|
+
# These immutable objects represent actionable insights from actual query plans.
|
|
7
|
+
|
|
8
|
+
# Represents a single node in the query plan tree
|
|
9
|
+
class PlanNode
|
|
10
|
+
attr_reader :node_type, :relation_name, :depth, :estimated_rows,
|
|
11
|
+
:estimated_cost, :actual_rows, :actual_duration_ms,
|
|
12
|
+
:children, :filter, :index_name, :sort_key
|
|
13
|
+
|
|
14
|
+
def initialize(data, depth = 0)
|
|
15
|
+
@raw_data = data
|
|
16
|
+
@node_type = data.dig("Node Type") || "Unknown" # Seq Scan, Index Scan, etc.
|
|
17
|
+
@relation_name = data.dig("Relation Name") # users, orders, etc.
|
|
18
|
+
@index_name = data.dig("Index Name") # idx_users_email, etc.
|
|
19
|
+
@depth = depth
|
|
20
|
+
|
|
21
|
+
# Estimated metrics (from planner)
|
|
22
|
+
@estimated_rows = data.dig("Estimated Rows") || 0
|
|
23
|
+
@estimated_cost = data.dig("Total Cost") || 0.0
|
|
24
|
+
|
|
25
|
+
# Actual metrics (only with ANALYZE)
|
|
26
|
+
@actual_rows = data.dig("Actual Rows")
|
|
27
|
+
@actual_duration_ms = data.dig("Actual Total Time")
|
|
28
|
+
|
|
29
|
+
# Plan details
|
|
30
|
+
@filter = data.dig("Filter") # WHERE condition
|
|
31
|
+
@sort_key = data.dig("Sort Key") # ORDER BY clause
|
|
32
|
+
|
|
33
|
+
# Recursively process child plans
|
|
34
|
+
child_plans = data.dig("Plans") || []
|
|
35
|
+
@children = child_plans.map { |child| PlanNode.new(child, depth + 1) }
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Check if this node performs a sequential scan
|
|
39
|
+
#
|
|
40
|
+
# @return [Boolean]
|
|
41
|
+
def sequential_scan?
|
|
42
|
+
node_type == "Seq Scan"
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Check if this node uses an index
|
|
46
|
+
#
|
|
47
|
+
# @return [Boolean]
|
|
48
|
+
def index_scan?
|
|
49
|
+
node_type.include?("Index")
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Find all sequential scans in plan tree
|
|
53
|
+
#
|
|
54
|
+
# @return [Array<PlanNode>]
|
|
55
|
+
def sequential_scans
|
|
56
|
+
scans = sequential_scan? ? [self] : []
|
|
57
|
+
children.each { |child| scans.concat(child.sequential_scans) }
|
|
58
|
+
scans
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Find all nodes of given type
|
|
62
|
+
#
|
|
63
|
+
# @param type [String] Node type to search for
|
|
64
|
+
# @return [Array<PlanNode>]
|
|
65
|
+
def nodes_of_type(type)
|
|
66
|
+
nodes = node_type == type ? [self] : []
|
|
67
|
+
children.each { |child| nodes.concat(child.nodes_of_type(type)) }
|
|
68
|
+
nodes
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Estimate quality: ratio of actual vs estimated rows
|
|
72
|
+
# Returns nil if no actual execution data
|
|
73
|
+
#
|
|
74
|
+
# @return [Float, nil] Actual rows / Estimated rows (or nil)
|
|
75
|
+
def estimate_accuracy_ratio
|
|
76
|
+
return nil if actual_rows.nil? || estimated_rows.zero?
|
|
77
|
+
|
|
78
|
+
(actual_rows.to_f / estimated_rows).round(2)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Check if estimates were wildly inaccurate
|
|
82
|
+
#
|
|
83
|
+
# @param threshold [Float] How many times off is "bad"? (default 10x)
|
|
84
|
+
# @return [Boolean]
|
|
85
|
+
def estimate_inaccurate?(threshold = 10)
|
|
86
|
+
ratio = estimate_accuracy_ratio
|
|
87
|
+
return false if ratio.nil?
|
|
88
|
+
|
|
89
|
+
ratio > threshold || ratio < (1.0 / threshold)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Human-readable node description
|
|
93
|
+
#
|
|
94
|
+
# @return [String]
|
|
95
|
+
def to_s
|
|
96
|
+
parts = [node_type]
|
|
97
|
+
parts << "on #{relation_name}" if relation_name
|
|
98
|
+
parts << "using #{index_name}" if index_name
|
|
99
|
+
parts.join(" ")
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Convert to hash for serialization
|
|
103
|
+
#
|
|
104
|
+
# @return [Hash]
|
|
105
|
+
def to_h
|
|
106
|
+
{
|
|
107
|
+
node_type: node_type,
|
|
108
|
+
relation_name: relation_name,
|
|
109
|
+
index_name: index_name,
|
|
110
|
+
depth: depth,
|
|
111
|
+
estimated_rows: estimated_rows,
|
|
112
|
+
estimated_cost: estimated_cost,
|
|
113
|
+
actual_rows: actual_rows,
|
|
114
|
+
actual_duration_ms: actual_duration_ms,
|
|
115
|
+
is_sequential_scan: sequential_scan?,
|
|
116
|
+
is_index_scan: index_scan?,
|
|
117
|
+
estimate_friendly: estimate_accuracy_ratio,
|
|
118
|
+
children: children.map(&:to_h)
|
|
119
|
+
}
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Complete query plan and extracted signals
|
|
124
|
+
class QueryPlan
|
|
125
|
+
attr_reader :root_node, :planning_time_ms, :execution_time_ms, :triggers
|
|
126
|
+
|
|
127
|
+
def initialize(explain_output)
|
|
128
|
+
@raw_output = explain_output
|
|
129
|
+
|
|
130
|
+
# Top-level metrics
|
|
131
|
+
@planning_time_ms = explain_output.dig("Planning Time") || 0
|
|
132
|
+
@execution_time_ms = explain_output.dig("Execution Time") || 0
|
|
133
|
+
@triggers = explain_output.dig("Triggers") || []
|
|
134
|
+
|
|
135
|
+
# Root plan node
|
|
136
|
+
@root_node = PlanNode.new(explain_output.dig("Plan") || {})
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Extract all sequential scans from entire plan tree
|
|
140
|
+
#
|
|
141
|
+
# @return [Array<PlanNode>]
|
|
142
|
+
def sequential_scans
|
|
143
|
+
root_node.sequential_scans
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Find specific relation scans
|
|
147
|
+
#
|
|
148
|
+
# @param table_name [String] Table to find scans for
|
|
149
|
+
# @return [Array<PlanNode>]
|
|
150
|
+
def scans_for_table(table_name)
|
|
151
|
+
root_node.nodes_of_type("Seq Scan")
|
|
152
|
+
.select { |node| node.relation_name == table_name }
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Check if plan uses any indexes
|
|
156
|
+
#
|
|
157
|
+
# @return [Boolean]
|
|
158
|
+
def uses_indexes?
|
|
159
|
+
root_node.nodes_of_type("Index Scan").any? ||
|
|
160
|
+
root_node.nodes_of_type("Bitmap Index Scan").any? ||
|
|
161
|
+
root_node.nodes_of_type("Index Only Scan").any?
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Total estimated cost of query
|
|
165
|
+
#
|
|
166
|
+
# @return [Float]
|
|
167
|
+
def total_estimated_cost
|
|
168
|
+
root_node.estimated_cost
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Convert to hash for serialization
|
|
172
|
+
#
|
|
173
|
+
# @return [Hash]
|
|
174
|
+
def to_h
|
|
175
|
+
{
|
|
176
|
+
planning_time_ms: planning_time_ms,
|
|
177
|
+
execution_time_ms: execution_time_ms,
|
|
178
|
+
total_estimated_cost: total_estimated_cost,
|
|
179
|
+
has_sequential_scans: sequential_scans.any?,
|
|
180
|
+
sequential_scan_count: sequential_scans.length,
|
|
181
|
+
uses_indexes: uses_indexes?,
|
|
182
|
+
root_node: root_node.to_h
|
|
183
|
+
}
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Actionable signals extracted from query plan
|
|
188
|
+
class PlanSignals
|
|
189
|
+
attr_reader :plan, :signals, :extracted_at
|
|
190
|
+
|
|
191
|
+
def initialize(plan)
|
|
192
|
+
@plan = plan
|
|
193
|
+
@extracted_at = Time.now
|
|
194
|
+
@signals = extract_signals
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# All extracted signals as array
|
|
198
|
+
#
|
|
199
|
+
# @return [Array<Hash>]
|
|
200
|
+
def to_a
|
|
201
|
+
signals
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Find signals of specific type
|
|
205
|
+
#
|
|
206
|
+
# @param type [String] Signal type
|
|
207
|
+
# @return [Array<Hash>]
|
|
208
|
+
def signals_of_type(type)
|
|
209
|
+
signals.select { |s| s[:type] == type }
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# Get most critical signals
|
|
213
|
+
#
|
|
214
|
+
# @param severity [Symbol] :high, :medium, :low
|
|
215
|
+
# @return [Array<Hash>]
|
|
216
|
+
def critical_signals(severity = :high)
|
|
217
|
+
signals.select { |s| s[:severity] == severity }
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# Convert to hash for serialization
|
|
221
|
+
#
|
|
222
|
+
# @return [Hash]
|
|
223
|
+
def to_h
|
|
224
|
+
{
|
|
225
|
+
plan: plan.to_h,
|
|
226
|
+
signals: signals,
|
|
227
|
+
extracted_at: extracted_at
|
|
228
|
+
}
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
private
|
|
232
|
+
|
|
233
|
+
def extract_signals
|
|
234
|
+
signals = []
|
|
235
|
+
|
|
236
|
+
# Signal: Sequential scans on large tables
|
|
237
|
+
sequential_scans.each do |scan|
|
|
238
|
+
signals << {
|
|
239
|
+
type: :sequential_scan,
|
|
240
|
+
severity: :high,
|
|
241
|
+
table: scan.relation_name,
|
|
242
|
+
estimated_rows: scan.estimated_rows,
|
|
243
|
+
message: "Sequential scan on #{scan.relation_name} (#{scan.estimated_rows} estimated rows)",
|
|
244
|
+
recommendation: "Consider adding an index to support query predicates"
|
|
245
|
+
}
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
# Signal: Missing indexes (likely)
|
|
249
|
+
plan.root_node.nodes_of_type("Seq Scan").each do |scan|
|
|
250
|
+
if scan.filter && !plan.scans_for_table(scan.relation_name).any? { |s| s.index_scan? }
|
|
251
|
+
signals << {
|
|
252
|
+
type: :likely_missing_index,
|
|
253
|
+
severity: :high,
|
|
254
|
+
table: scan.relation_name,
|
|
255
|
+
filter: scan.filter,
|
|
256
|
+
message: "Sequential scan with filter on #{scan.relation_name}",
|
|
257
|
+
recommendation: "Analyze columns in filter condition for index opportunities"
|
|
258
|
+
}
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
# Signal: Inaccurate estimate
|
|
263
|
+
walk_plan(plan.root_node).each do |node|
|
|
264
|
+
if node.estimate_inaccurate?
|
|
265
|
+
ratio = node.estimate_accuracy_ratio
|
|
266
|
+
signals << {
|
|
267
|
+
type: :estimate_inaccuracy,
|
|
268
|
+
severity: :medium,
|
|
269
|
+
node: node.to_s,
|
|
270
|
+
ratio: ratio,
|
|
271
|
+
message: "Estimate off by #{(ratio * 100).to_i}% (estimated: #{node.estimated_rows}, actual: #{node.actual_rows})",
|
|
272
|
+
recommendation: "Consider running ANALYZE on table statistics"
|
|
273
|
+
}
|
|
274
|
+
end
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
# Signal: High cost query
|
|
278
|
+
if plan.total_estimated_cost > 10000
|
|
279
|
+
signals << {
|
|
280
|
+
type: :high_estimated_cost,
|
|
281
|
+
severity: :medium,
|
|
282
|
+
cost: plan.total_estimated_cost,
|
|
283
|
+
message: "Query has high estimated cost: #{plan.total_estimated_cost.round(2)}",
|
|
284
|
+
recommendation: "Review query structure and indexes"
|
|
285
|
+
}
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
# Signal: Nested loop joins (potential N+1 equivalent)
|
|
289
|
+
walk_plan(plan.root_node).each do |node|
|
|
290
|
+
if nested_loop_join?(node)
|
|
291
|
+
inner_relation = find_inner_scan(node)
|
|
292
|
+
signals << {
|
|
293
|
+
type: :nested_loop_join,
|
|
294
|
+
severity: :medium,
|
|
295
|
+
message: "Nested loop join detected#{inner_relation ? " with #{inner_relation}" : ""}",
|
|
296
|
+
recommendation: "Consider adding indexes on inner table join columns or using hash join with increased work_mem",
|
|
297
|
+
inner_table: inner_relation
|
|
298
|
+
}
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
# Signal: High planning time (possible to_sql overhead or complex stats)
|
|
303
|
+
if plan.planning_time_ms > 100
|
|
304
|
+
signals << {
|
|
305
|
+
type: :high_planning_time,
|
|
306
|
+
severity: :low,
|
|
307
|
+
planning_time_ms: plan.planning_time_ms,
|
|
308
|
+
message: "High planning time: #{plan.planning_time_ms.round(2)}ms",
|
|
309
|
+
recommendation: "Query planner took significant time; check for complex joins, CTEs, or stale statistics"
|
|
310
|
+
}
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# Signal: Sort operations on large result sets
|
|
314
|
+
walk_plan(plan.root_node).each do |node|
|
|
315
|
+
if node.sort_key && node.estimated_rows > 1000
|
|
316
|
+
signals << {
|
|
317
|
+
type: :expensive_sort,
|
|
318
|
+
severity: :medium,
|
|
319
|
+
estimated_rows: node.estimated_rows,
|
|
320
|
+
message: "Sorting large result set (#{node.estimated_rows} estimated rows) on #{node.sort_key}",
|
|
321
|
+
recommendation: "Verify sort is necessary; consider index-backed ordering or pagination"
|
|
322
|
+
}
|
|
323
|
+
end
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
# Signal: Bitmap scans (generally less efficient than proper indexes)
|
|
327
|
+
walk_plan(plan.root_node).each do |node|
|
|
328
|
+
if bitmap_scan?(node)
|
|
329
|
+
signals << {
|
|
330
|
+
type: :bitmap_scan,
|
|
331
|
+
severity: :low,
|
|
332
|
+
table: node.relation_name,
|
|
333
|
+
message: "Bitmap index scan on #{node.relation_name}",
|
|
334
|
+
recommendation: "Confirm index choice is appropriate; may indicate need for composite index"
|
|
335
|
+
}
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
signals
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
def sequential_scans
|
|
343
|
+
plan.sequential_scans
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
def walk_plan(node)
|
|
347
|
+
nodes = [node]
|
|
348
|
+
node.children.each { |child| nodes.concat(walk_plan(child)) }
|
|
349
|
+
nodes
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
# Check if node is a nested loop join
|
|
353
|
+
#
|
|
354
|
+
# @param node [PlanNode] Node to check
|
|
355
|
+
# @return [Boolean]
|
|
356
|
+
def nested_loop_join?(node)
|
|
357
|
+
node.node_type == "Nested Loop"
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
# Check if node is a bitmap scan
|
|
361
|
+
#
|
|
362
|
+
# @param node [PlanNode] Node to check
|
|
363
|
+
# @return [Boolean]
|
|
364
|
+
def bitmap_scan?(node)
|
|
365
|
+
node.node_type.include?("Bitmap")
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
# Find inner scan node in nested loop (typically the rightmost scan)
|
|
369
|
+
#
|
|
370
|
+
# @param node [PlanNode] Nested loop node
|
|
371
|
+
# @return [String, nil] Table name of inner scan
|
|
372
|
+
def find_inner_scan(node)
|
|
373
|
+
return nil if node.children.empty?
|
|
374
|
+
|
|
375
|
+
# The last child is typically the inner scan
|
|
376
|
+
inner_child = node.children.last
|
|
377
|
+
# Traverse to find the actual table scan
|
|
378
|
+
scan_node = walk_plan(inner_child).find do |n|
|
|
379
|
+
n.sequential_scan? || n.index_scan?
|
|
380
|
+
end
|
|
381
|
+
scan_node&.relation_name
|
|
382
|
+
end
|
|
383
|
+
end
|
|
384
|
+
end
|
|
385
|
+
end
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module QueryGuard
|
|
6
|
+
module Explain
|
|
7
|
+
# PostgreSQL-specific EXPLAIN plan adapter.
|
|
8
|
+
# Executes EXPLAIN (FORMAT JSON, ANALYZE) for PostgreSQL queries.
|
|
9
|
+
#
|
|
10
|
+
# Features:
|
|
11
|
+
# - Safely executes EXPLAIN with ANALYZE for actual metrics
|
|
12
|
+
# - Returns well-structured JSON plan data
|
|
13
|
+
# - Fails gracefully if connection unavailable
|
|
14
|
+
# - Filters out unsafe query patterns
|
|
15
|
+
# - Configurable timeout and ANALYZE flag
|
|
16
|
+
#
|
|
17
|
+
# Example:
|
|
18
|
+
# adapter = PostgreSQLAdapter.new(ActiveRecord::Base.connection)
|
|
19
|
+
# plan = adapter.get_plan("SELECT * FROM users WHERE status = 'active'")
|
|
20
|
+
#
|
|
21
|
+
# Note: Requires postgres adapter (pg gem) with access to query execution
|
|
22
|
+
class PostgreSQLAdapter < AdapterInterface
|
|
23
|
+
DEFAULT_EXPLAIN_TIMEOUT = 5.0 # seconds
|
|
24
|
+
|
|
25
|
+
# Initialize PostgreSQL adapter
|
|
26
|
+
#
|
|
27
|
+
# @param connection [PG::Connection or ActiveRecord Adapter] PostgreSQL connection
|
|
28
|
+
# @param options [Hash] Configuration options
|
|
29
|
+
# @option options [Float] :timeout Explain query timeout (default: 5.0s)
|
|
30
|
+
# @option options [Boolean] :use_analyze Whether to use ANALYZE (default: false for safety)
|
|
31
|
+
# @option options [Logger] :logger Optional logger instance for debugging
|
|
32
|
+
# @option options [Boolean] :validate_connection Check connection is valid on init (default: true)
|
|
33
|
+
def initialize(connection, options = {})
|
|
34
|
+
super(connection)
|
|
35
|
+
@timeout = options[:timeout] || DEFAULT_EXPLAIN_TIMEOUT
|
|
36
|
+
@use_analyze = options.fetch(:use_analyze, false)
|
|
37
|
+
@logger = options[:logger]
|
|
38
|
+
@validate_connection = options.fetch(:validate_connection, true)
|
|
39
|
+
|
|
40
|
+
validate_connection! if @validate_connection
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Execute EXPLAIN and return parsed JSON plan
|
|
44
|
+
#
|
|
45
|
+
# @param sql [String] SQL query to analyze
|
|
46
|
+
# @param options [Hash] Override default options
|
|
47
|
+
# @return [Hash] Parsed EXPLAIN plan JSON
|
|
48
|
+
# @raise [AdapterError] If query can't be explained
|
|
49
|
+
def get_plan(sql, options = {})
|
|
50
|
+
unless can_explain?(sql)
|
|
51
|
+
error_msg = "Cannot EXPLAIN this query type: #{sql.strip[0..50]}..."
|
|
52
|
+
log_warn(error_msg)
|
|
53
|
+
raise UnsupportedQueryError, error_msg
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
use_analyze = options.fetch(:use_analyze, @use_analyze)
|
|
57
|
+
explain_sql = build_explain_query(sql, use_analyze)
|
|
58
|
+
|
|
59
|
+
log_debug("Executing EXPLAIN query", use_analyze: use_analyze)
|
|
60
|
+
plan_json = execute_explain(explain_sql)
|
|
61
|
+
JSON.parse(plan_json)
|
|
62
|
+
rescue JSON::ParserError => e
|
|
63
|
+
error_msg = "Failed to parse EXPLAIN output: #{e.message}"
|
|
64
|
+
log_warn(error_msg)
|
|
65
|
+
raise PlanParseError, error_msg
|
|
66
|
+
rescue Timeout::Error => e
|
|
67
|
+
error_msg = "EXPLAIN query timed out after #{@timeout}s"
|
|
68
|
+
log_warn(error_msg)
|
|
69
|
+
raise TimeoutError, error_msg
|
|
70
|
+
rescue StandardError => e
|
|
71
|
+
log_warn("EXPLAIN execution failed: #{e.message}")
|
|
72
|
+
raise AdapterError, "EXPLAIN execution failed: #{e.message}"
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Check if query can be safely explained
|
|
76
|
+
#
|
|
77
|
+
# @param sql [String] SQL query
|
|
78
|
+
# @return [Boolean] True if query is safe to EXPLAIN
|
|
79
|
+
def can_explain?(sql)
|
|
80
|
+
normalized = sql.strip.upcase
|
|
81
|
+
# Only EXPLAIN SELECT, WITH (CTE), and simple UPDATE/DELETE
|
|
82
|
+
return false if normalized.start_with?("PRAGMA", "BEGIN", "COMMIT")
|
|
83
|
+
return false if normalized.start_with?("DROP", "ALTER", "CREATE", "TRUNCATE")
|
|
84
|
+
return false if normalized.include?("RETURNING") && !normalized.start_with?("SELECT")
|
|
85
|
+
|
|
86
|
+
true
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Engine identifier
|
|
90
|
+
#
|
|
91
|
+
# @return [Symbol]
|
|
92
|
+
def engine_name
|
|
93
|
+
:postgresql
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Get version of PostgreSQL server
|
|
97
|
+
#
|
|
98
|
+
# @return [String] PostgreSQL version
|
|
99
|
+
def server_version
|
|
100
|
+
execute_query("SELECT version()").first.first
|
|
101
|
+
rescue StandardError => e
|
|
102
|
+
raise ConnectionError, "Cannot connect to PostgreSQL: #{e.message}"
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
private
|
|
106
|
+
|
|
107
|
+
def build_explain_query(sql, use_analyze)
|
|
108
|
+
# EXPLAIN (FORMAT JSON) provides output in JSON format
|
|
109
|
+
# ANALYZE actually runs the query (for real metrics) but is slower
|
|
110
|
+
# For safety, default to just EXPLAIN without ANALYZE
|
|
111
|
+
|
|
112
|
+
options = "FORMAT JSON"
|
|
113
|
+
options += ", ANALYZE" if use_analyze
|
|
114
|
+
options += ", BUFFERS" if use_analyze # useful with ANALYZE
|
|
115
|
+
|
|
116
|
+
"EXPLAIN (#{options}) #{sql}"
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def execute_explain(explain_sql)
|
|
120
|
+
begin
|
|
121
|
+
result = execute_query_with_timeout(explain_sql)
|
|
122
|
+
# Result is array of rows, each with plan JSON
|
|
123
|
+
# PostgreSQL returns one row with the full plan as JSON
|
|
124
|
+
plan_or_nil = result.first&.first
|
|
125
|
+
plan_or_nil || raise(AdapterError, "EXPLAIN returned empty result")
|
|
126
|
+
rescue Timeout::Error
|
|
127
|
+
raise TimeoutError, "EXPLAIN query timed out after #{@timeout}s"
|
|
128
|
+
rescue AdapterError
|
|
129
|
+
raise
|
|
130
|
+
rescue StandardError => e
|
|
131
|
+
raise AdapterError, "Failed to execute EXPLAIN: #{e.message}", original_error: e
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def execute_query_with_timeout(sql)
|
|
136
|
+
if connection.respond_to?(:execute)
|
|
137
|
+
# ActiveRecord adapter (most common)
|
|
138
|
+
execute_activerecord(sql)
|
|
139
|
+
elsif connection.respond_to?(:query)
|
|
140
|
+
# pg gem raw connection
|
|
141
|
+
execute_pg_gem(sql)
|
|
142
|
+
else
|
|
143
|
+
raise ConnectionError, "Cannot determine connection type for EXPLAIN"
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def execute_activerecord(sql)
|
|
148
|
+
# Using ActiveRecord's query execution
|
|
149
|
+
# This handles connection pooling automatically
|
|
150
|
+
result = connection.execute(sql)
|
|
151
|
+
|
|
152
|
+
# ActiveRecord result sets have different formats per adapter
|
|
153
|
+
if result.respond_to?(:rows)
|
|
154
|
+
# Most adapters
|
|
155
|
+
result.rows
|
|
156
|
+
elsif result.respond_to?(:first)
|
|
157
|
+
# Compatibility with some adapters
|
|
158
|
+
[result]
|
|
159
|
+
else
|
|
160
|
+
raise AdapterError, "Unable to read EXPLAIN result"
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def execute_pg_gem(sql)
|
|
165
|
+
# Raw pg gem connection
|
|
166
|
+
# Note: This is rarely used in Rails but supported for completeness
|
|
167
|
+
begin
|
|
168
|
+
result = connection.query(sql)
|
|
169
|
+
result.map(&:values)
|
|
170
|
+
rescue PG::Error => e
|
|
171
|
+
raise AdapterError, "PostgreSQL query failed: #{e.message}"
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def execute_query(sql)
|
|
176
|
+
if connection.respond_to?(:execute)
|
|
177
|
+
connection.execute(sql)
|
|
178
|
+
elsif connection.respond_to?(:query)
|
|
179
|
+
connection.query(sql)
|
|
180
|
+
else
|
|
181
|
+
raise ConnectionError, "Cannot execute query with this connection"
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def validate_connection!
|
|
186
|
+
server_version
|
|
187
|
+
log_debug("PostgreSQL connection validated")
|
|
188
|
+
rescue StandardError => e
|
|
189
|
+
connection_error = "Cannot validate PostgreSQL connection: #{e.message}"
|
|
190
|
+
log_warn(connection_error)
|
|
191
|
+
raise ConnectionError, connection_error
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def log_debug(message, **context)
|
|
195
|
+
return unless @logger
|
|
196
|
+
|
|
197
|
+
context_str = context.empty? ? "" : " (#{context.to_s})"
|
|
198
|
+
@logger.debug("[QueryGuard::PostgreSQLAdapter] #{message}#{context_str}")
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def log_warn(message)
|
|
202
|
+
return unless @logger
|
|
203
|
+
|
|
204
|
+
@logger.warn("[QueryGuard::PostgreSQLAdapter] #{message}")
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
end
|