query_guard 0.4.2 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +89 -1
  3. data/DESIGN.md +420 -0
  4. data/INDEX.md +309 -0
  5. data/README.md +579 -30
  6. data/exe/queryguard +23 -0
  7. data/lib/query_guard/action_controller_subscriber.rb +27 -0
  8. data/lib/query_guard/analysis/query_risk_classifier.rb +124 -0
  9. data/lib/query_guard/analysis/risk_detectors.rb +258 -0
  10. data/lib/query_guard/analysis/risk_level.rb +35 -0
  11. data/lib/query_guard/analyzers/base.rb +30 -0
  12. data/lib/query_guard/analyzers/query_count_analyzer.rb +31 -0
  13. data/lib/query_guard/analyzers/query_risk_analyzer.rb +146 -0
  14. data/lib/query_guard/analyzers/registry.rb +57 -0
  15. data/lib/query_guard/analyzers/select_star_analyzer.rb +42 -0
  16. data/lib/query_guard/analyzers/slow_query_analyzer.rb +39 -0
  17. data/lib/query_guard/budget.rb +148 -0
  18. data/lib/query_guard/cli/batch_report_formatter.rb +129 -0
  19. data/lib/query_guard/cli/command.rb +93 -0
  20. data/lib/query_guard/cli/commands/analyze.rb +52 -0
  21. data/lib/query_guard/cli/commands/check.rb +58 -0
  22. data/lib/query_guard/cli/formatter.rb +278 -0
  23. data/lib/query_guard/cli/json_reporter.rb +247 -0
  24. data/lib/query_guard/cli/paged_report_formatter.rb +137 -0
  25. data/lib/query_guard/cli/source_metadata_collector.rb +297 -0
  26. data/lib/query_guard/cli.rb +197 -0
  27. data/lib/query_guard/client.rb +4 -6
  28. data/lib/query_guard/config.rb +145 -6
  29. data/lib/query_guard/core/context.rb +80 -0
  30. data/lib/query_guard/core/finding.rb +162 -0
  31. data/lib/query_guard/core/finding_builders.rb +152 -0
  32. data/lib/query_guard/core/query.rb +40 -0
  33. data/lib/query_guard/explain/adapter_interface.rb +89 -0
  34. data/lib/query_guard/explain/explain_enricher.rb +367 -0
  35. data/lib/query_guard/explain/plan_signals.rb +385 -0
  36. data/lib/query_guard/explain/postgresql_adapter.rb +208 -0
  37. data/lib/query_guard/exporter.rb +124 -0
  38. data/lib/query_guard/fingerprint.rb +96 -0
  39. data/lib/query_guard/middleware.rb +101 -15
  40. data/lib/query_guard/migrations/database_adapter.rb +88 -0
  41. data/lib/query_guard/migrations/migration_analyzer.rb +100 -0
  42. data/lib/query_guard/migrations/migration_risk_detectors.rb +390 -0
  43. data/lib/query_guard/migrations/postgresql_adapter.rb +157 -0
  44. data/lib/query_guard/migrations/table_risk_analyzer.rb +154 -0
  45. data/lib/query_guard/migrations/table_size_resolver.rb +152 -0
  46. data/lib/query_guard/publish.rb +38 -0
  47. data/lib/query_guard/rspec.rb +119 -0
  48. data/lib/query_guard/security.rb +99 -0
  49. data/lib/query_guard/store.rb +38 -0
  50. data/lib/query_guard/subscriber.rb +46 -15
  51. data/lib/query_guard/suggest/index_suggester.rb +176 -0
  52. data/lib/query_guard/suggest/pattern_extractors.rb +137 -0
  53. data/lib/query_guard/trace.rb +106 -0
  54. data/lib/query_guard/uploader/http_uploader.rb +166 -0
  55. data/lib/query_guard/uploader/interface.rb +79 -0
  56. data/lib/query_guard/uploader/no_op_uploader.rb +46 -0
  57. data/lib/query_guard/uploader/registry.rb +37 -0
  58. data/lib/query_guard/uploader/upload_service.rb +80 -0
  59. data/lib/query_guard/version.rb +1 -1
  60. data/lib/query_guard.rb +54 -7
  61. metadata +78 -10
  62. data/.rspec +0 -3
  63. data/Rakefile +0 -21
  64. data/config/initializers/query_guard.rb +0 -9
@@ -0,0 +1,367 @@
1
+ # frozen_string_literal: true
2
+
3
+ module QueryGuard
4
+ module Explain
5
+ # Converts EXPLAIN signals into QueryGuard Finding objects.
6
+ # Enriches existing risk findings with actual plan data and index suggestions.
7
+ #
8
+ # Example:
9
+ # adapter = PostgreSQLAdapter.new(connection)
10
+ # enricher = ExplainEnricher.new(adapter)
11
+ # findings = enricher.enrich(existing_findings, query)
12
+ class ExplainEnricher
13
+ def initialize(adapter, config = {})
14
+ @adapter = adapter
15
+ @config = config
16
+ @logger = config[:logger]
17
+ @index_suggester = Suggest::IndexSuggester.new
18
+ end
19
+
20
+ # Enrich findings with EXPLAIN analysis
21
+ #
22
+ # @param findings [Array<Core::Finding>] Existing findings from risk analyzers
23
+ # @param query [Core::Query] The query to analyze
24
+ # @param context [Core::Context, nil] Request context
25
+ # @return [Array<Core::Finding>] Original + new findings from EXPLAIN
26
+ def enrich(findings, query, context = nil)
27
+ return findings if findings.nil? || query.nil?
28
+ return findings unless can_enrich?(query)
29
+
30
+ new_findings = []
31
+
32
+ begin
33
+ explain_findings = analyze_with_explain(query, context)
34
+ new_findings.concat(explain_findings)
35
+ rescue StandardError => e
36
+ log_error("EXPLAIN enrichment failed: #{e.message}", query)
37
+ # Graceful degradation: return original findings on error
38
+ end
39
+
40
+ findings + new_findings
41
+ end
42
+
43
+ # Check if this query can be enriched with EXPLAIN
44
+ #
45
+ # @param query [Core::Query] The query
46
+ # @return [Boolean]
47
+ def can_enrich?(query)
48
+ return false if query.nil? || query.sql.nil?
49
+
50
+ @adapter.can_explain?(query.sql)
51
+ end
52
+
53
+ private
54
+
55
+ def analyze_with_explain(query, context)
56
+ findings = []
57
+
58
+ begin
59
+ # Get EXPLAIN plan
60
+ plan_json = @adapter.get_plan(query.sql)
61
+ plan = PlanSignals::QueryPlan.new(plan_json)
62
+ signals = PlanSignals::PlanSignals.new(plan)
63
+
64
+ # Convert signals to findings
65
+ signals.to_a.each do |signal|
66
+ finding = signal_to_finding(signal, query)
67
+ findings << finding if finding
68
+ end
69
+
70
+ findings
71
+ rescue AdapterError => e
72
+ log_error("Could not get EXPLAIN plan: #{e.message}", query)
73
+ log_debug("Adapter error details", error: e.inspect) if @logger
74
+ []
75
+ rescue StandardError => e
76
+ log_error("Unexpected error during EXPLAIN analysis: #{e.message}", query)
77
+ []
78
+ end
79
+ end
80
+
81
+ def signal_to_finding(signal, query)
82
+ case signal[:type]
83
+ when :sequential_scan
84
+ create_sequential_scan_finding(signal, query)
85
+ when :likely_missing_index
86
+ create_missing_index_finding(signal, query)
87
+ when :estimate_inaccuracy
88
+ create_estimate_inaccuracy_finding(signal, query)
89
+ when :high_estimated_cost
90
+ create_high_cost_finding(signal, query)
91
+ when :nested_loop_join
92
+ create_nested_loop_finding(signal, query)
93
+ when :high_planning_time
94
+ create_planning_time_finding(signal, query)
95
+ when :expensive_sort
96
+ create_expensive_sort_finding(signal, query)
97
+ when :bitmap_scan
98
+ create_bitmap_scan_finding(signal, query)
99
+ else
100
+ nil
101
+ end
102
+ end
103
+
104
+ def create_sequential_scan_finding(signal, query)
105
+ metadata = {
106
+ table: signal[:table],
107
+ estimated_rows: signal[:estimated_rows],
108
+ source: "PostgreSQL EXPLAIN",
109
+ recommendation: signal[:recommendation]
110
+ }
111
+
112
+ # Add index suggestion if available
113
+ index_suggestion = @index_suggester.suggest_for_sequential_scan(
114
+ query.sql,
115
+ table_name: signal[:table]
116
+ )
117
+
118
+ if index_suggestion
119
+ metadata[:suggested_index_sql] = index_suggestion[:suggested_index_sql]
120
+ metadata[:suggested_index_columns] = index_suggestion[:columns]
121
+ metadata[:suggested_index_confidence] = index_suggestion[:confidence]
122
+ metadata[:suggested_index_name] = index_suggestion[:index_name]
123
+ end
124
+
125
+ recommendations = [
126
+ signal[:recommendation],
127
+ "Add index on frequently filtered columns",
128
+ "Run ANALYZE to update table statistics if stale"
129
+ ]
130
+
131
+ if index_suggestion
132
+ recommendations << @index_suggester.build_recommendation_text(index_suggestion)
133
+ end
134
+
135
+ Core::FindingBuilders.build(
136
+ analyzer_name: :query_risk,
137
+ rule_name: :sequential_scan_via_explain,
138
+ severity: :error,
139
+ title: "Sequential Table Scan Detected",
140
+ description: "Query uses sequential scan; table is scanned row-by-row without index",
141
+ message: signal[:message],
142
+ sql: query.sql,
143
+ recommendations: recommendations,
144
+ metadata: metadata
145
+ )
146
+ end
147
+
148
+ def create_missing_index_finding(signal, query)
149
+ metadata = {
150
+ table: signal[:table],
151
+ filter: signal[:filter],
152
+ source: "PostgreSQL EXPLAIN"
153
+ }
154
+
155
+ # Add index suggestion if available
156
+ index_suggestion = @index_suggester.suggest_for_sequential_scan(
157
+ query.sql,
158
+ table_name: signal[:table],
159
+ filter_condition: signal[:filter]
160
+ )
161
+
162
+ if index_suggestion
163
+ metadata[:suggested_index_sql] = index_suggestion[:suggested_index_sql]
164
+ metadata[:suggested_index_columns] = index_suggestion[:columns]
165
+ metadata[:suggested_index_confidence] = index_suggestion[:confidence]
166
+ metadata[:suggested_index_name] = index_suggestion[:index_name]
167
+ end
168
+
169
+ recommendations = [
170
+ signal[:recommendation],
171
+ "Review index design based on query predicates"
172
+ ]
173
+
174
+ if index_suggestion
175
+ recommendations << @index_suggester.build_recommendation_text(index_suggestion)
176
+ end
177
+
178
+ Core::FindingBuilders.build(
179
+ analyzer_name: :query_risk,
180
+ rule_name: :missing_index_via_explain,
181
+ severity: :error,
182
+ title: "Missing Index Detected",
183
+ description: "Sequential scan with WHERE filter suggests missing index",
184
+ message: signal[:message],
185
+ sql: query.sql,
186
+ recommendations: recommendations,
187
+ metadata: metadata
188
+ )
189
+ end
190
+
191
+ def create_estimate_inaccuracy_finding(signal, query)
192
+ Core::FindingBuilders.build(
193
+ analyzer_name: :query_risk,
194
+ rule_name: :estimate_inaccuracy,
195
+ severity: :warn,
196
+ title: "Query Plan Estimate Inaccuracy",
197
+ description: "Planner estimate differs significantly from actual execution",
198
+ message: signal[:message],
199
+ sql: query.sql,
200
+ recommendations: [
201
+ signal[:recommendation],
202
+ "Check if table statistics are up-to-date: ANALYZE table_name",
203
+ "Review query structure for optimization opportunities"
204
+ ],
205
+ metadata: {
206
+ node: signal[:node],
207
+ accuracy_ratio: signal[:ratio],
208
+ source: "PostgreSQL EXPLAIN ANALYZE"
209
+ }
210
+ )
211
+ end
212
+
213
+ def create_high_cost_finding(signal, query)
214
+ Core::FindingBuilders.build(
215
+ analyzer_name: :query_risk,
216
+ rule_name: :high_query_cost,
217
+ severity: :warn,
218
+ title: "High Query Cost Estimated",
219
+ description: "Query planner estimates significant resource consumption",
220
+ message: signal[:message],
221
+ sql: query.sql,
222
+ recommendations: [
223
+ signal[:recommendation],
224
+ "Consider query refactoring (JOINs, WHERE clauses, sorting)",
225
+ "Verify indexes exist on join keys"
226
+ ],
227
+ metadata: {
228
+ estimated_cost: signal[:cost],
229
+ source: "PostgreSQL EXPLAIN"
230
+ }
231
+ )
232
+ end
233
+
234
+ def create_nested_loop_finding(signal, query)
235
+ Core::FindingBuilders.build(
236
+ analyzer_name: :query_risk,
237
+ rule_name: :nested_loop_join,
238
+ severity: :warn,
239
+ title: "Nested Loop Join Detected",
240
+ description: "Query uses nested loop join which may be inefficient for large datasets",
241
+ message: signal[:message],
242
+ sql: query.sql,
243
+ recommendations: [
244
+ signal[:recommendation],
245
+ "Consider using hash join if appropriate (increase work_mem setting)",
246
+ "Verify join condition uses indexed columns on the inner table"
247
+ ],
248
+ metadata: {
249
+ inner_table: signal[:inner_table],
250
+ source: "PostgreSQL EXPLAIN"
251
+ }
252
+ )
253
+ end
254
+
255
+ def create_planning_time_finding(signal, query)
256
+ Core::FindingBuilders.build(
257
+ analyzer_name: :query_risk,
258
+ rule_name: :high_planning_time,
259
+ severity: :info,
260
+ title: "High Query Planning Time",
261
+ description: "PostgreSQL planner took significant time to generate query plan",
262
+ message: signal[:message],
263
+ sql: query.sql,
264
+ recommendations: [
265
+ signal[:recommendation],
266
+ "Review for complex JOINs or CTEs",
267
+ "Ensure table statistics are current: ANALYZE"
268
+ ],
269
+ metadata: {
270
+ planning_time_ms: signal[:planning_time_ms],
271
+ source: "PostgreSQL EXPLAIN"
272
+ }
273
+ )
274
+ end
275
+
276
+ def create_expensive_sort_finding(signal, query)
277
+ metadata = {
278
+ estimated_rows: signal[:estimated_rows],
279
+ source: "PostgreSQL EXPLAIN"
280
+ }
281
+
282
+ # Add index suggestion for ORDER BY clause
283
+ # Extract table name from signal if available
284
+ table_name = extract_table_name(query.sql)
285
+
286
+ index_suggestion = nil
287
+ if table_name
288
+ index_suggestion = @index_suggester.suggest_for_expensive_sort(
289
+ query.sql,
290
+ table_name: table_name
291
+ )
292
+
293
+ if index_suggestion
294
+ metadata[:suggested_index_sql] = index_suggestion[:suggested_index_sql]
295
+ metadata[:suggested_index_columns] = index_suggestion[:columns]
296
+ metadata[:suggested_index_confidence] = index_suggestion[:confidence]
297
+ metadata[:suggested_index_name] = index_suggestion[:index_name]
298
+ end
299
+ end
300
+
301
+ recommendations = [
302
+ signal[:recommendation],
303
+ "Consider index-backed ordering if possible",
304
+ "If using pagination, only fetch needed rows"
305
+ ]
306
+
307
+ if index_suggestion
308
+ recommendations << @index_suggester.build_recommendation_text(index_suggestion)
309
+ end
310
+
311
+ Core::FindingBuilders.build(
312
+ analyzer_name: :query_risk,
313
+ rule_name: :expensive_sort,
314
+ severity: :warn,
315
+ title: "Expensive Sort Operation",
316
+ description: "Query performs sort on large result set which requires memory/disk I/O",
317
+ message: signal[:message],
318
+ sql: query.sql,
319
+ recommendations: recommendations,
320
+ metadata: metadata
321
+ )
322
+ end
323
+
324
+ def create_bitmap_scan_finding(signal, query)
325
+ Core::FindingBuilders.build(
326
+ analyzer_name: :query_risk,
327
+ rule_name: :bitmap_scan,
328
+ severity: :info,
329
+ title: "Bitmap Index Scan",
330
+ description: "Query uses bitmap index scan for range queries; confirm index design",
331
+ message: signal[:message],
332
+ sql: query.sql,
333
+ recommendations: [
334
+ signal[:recommendation],
335
+ "Bitmap scans are appropriate for range predicates with multiple OR conditions",
336
+ "Consider composite indexes if bitmap scan isn't optimal"
337
+ ],
338
+ metadata: {
339
+ table: signal[:table],
340
+ source: "PostgreSQL EXPLAIN"
341
+ }
342
+ )
343
+ end
344
+
345
+ def log_error(message, query)
346
+ return unless @logger
347
+
348
+ @logger.warn("[QueryGuard::Explain] #{message} - Query: #{query.sql.strip[0..80]}")
349
+ end
350
+
351
+ def log_debug(message, **context)
352
+ return unless @logger
353
+
354
+ context_str = context.empty? ? "" : " - #{context.to_s}"
355
+ @logger.debug("[QueryGuard::Explain] #{message}#{context_str}")
356
+ end
357
+
358
+ private
359
+
360
+ def extract_table_name(sql)
361
+ # Simple extraction of main table name from SELECT/FROM clause
362
+ # Used for index suggestions
363
+ Suggest::PatternExtractors.new.extract_table_name(sql)
364
+ end
365
+ end
366
+ end
367
+ end