query_guard 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +89 -1
  3. data/DESIGN.md +420 -0
  4. data/INDEX.md +309 -0
  5. data/README.md +579 -30
  6. data/exe/queryguard +23 -0
  7. data/lib/query_guard/action_controller_subscriber.rb +27 -0
  8. data/lib/query_guard/analysis/query_risk_classifier.rb +124 -0
  9. data/lib/query_guard/analysis/risk_detectors.rb +258 -0
  10. data/lib/query_guard/analysis/risk_level.rb +35 -0
  11. data/lib/query_guard/analyzers/base.rb +30 -0
  12. data/lib/query_guard/analyzers/query_count_analyzer.rb +31 -0
  13. data/lib/query_guard/analyzers/query_risk_analyzer.rb +146 -0
  14. data/lib/query_guard/analyzers/registry.rb +57 -0
  15. data/lib/query_guard/analyzers/select_star_analyzer.rb +42 -0
  16. data/lib/query_guard/analyzers/slow_query_analyzer.rb +39 -0
  17. data/lib/query_guard/budget.rb +148 -0
  18. data/lib/query_guard/cli/batch_report_formatter.rb +129 -0
  19. data/lib/query_guard/cli/command.rb +93 -0
  20. data/lib/query_guard/cli/commands/analyze.rb +52 -0
  21. data/lib/query_guard/cli/commands/check.rb +58 -0
  22. data/lib/query_guard/cli/formatter.rb +278 -0
  23. data/lib/query_guard/cli/json_reporter.rb +247 -0
  24. data/lib/query_guard/cli/paged_report_formatter.rb +137 -0
  25. data/lib/query_guard/cli/source_metadata_collector.rb +297 -0
  26. data/lib/query_guard/cli.rb +197 -0
  27. data/lib/query_guard/client.rb +4 -6
  28. data/lib/query_guard/config.rb +145 -6
  29. data/lib/query_guard/core/context.rb +80 -0
  30. data/lib/query_guard/core/finding.rb +162 -0
  31. data/lib/query_guard/core/finding_builders.rb +152 -0
  32. data/lib/query_guard/core/query.rb +40 -0
  33. data/lib/query_guard/explain/adapter_interface.rb +89 -0
  34. data/lib/query_guard/explain/explain_enricher.rb +367 -0
  35. data/lib/query_guard/explain/plan_signals.rb +385 -0
  36. data/lib/query_guard/explain/postgresql_adapter.rb +208 -0
  37. data/lib/query_guard/exporter.rb +124 -0
  38. data/lib/query_guard/fingerprint.rb +96 -0
  39. data/lib/query_guard/middleware.rb +101 -15
  40. data/lib/query_guard/migrations/database_adapter.rb +88 -0
  41. data/lib/query_guard/migrations/migration_analyzer.rb +100 -0
  42. data/lib/query_guard/migrations/migration_risk_detectors.rb +287 -0
  43. data/lib/query_guard/migrations/postgresql_adapter.rb +157 -0
  44. data/lib/query_guard/migrations/table_risk_analyzer.rb +154 -0
  45. data/lib/query_guard/migrations/table_size_resolver.rb +152 -0
  46. data/lib/query_guard/publish.rb +38 -0
  47. data/lib/query_guard/rspec.rb +119 -0
  48. data/lib/query_guard/security.rb +99 -0
  49. data/lib/query_guard/store.rb +38 -0
  50. data/lib/query_guard/subscriber.rb +46 -15
  51. data/lib/query_guard/suggest/index_suggester.rb +176 -0
  52. data/lib/query_guard/suggest/pattern_extractors.rb +137 -0
  53. data/lib/query_guard/trace.rb +106 -0
  54. data/lib/query_guard/uploader/http_uploader.rb +166 -0
  55. data/lib/query_guard/uploader/interface.rb +79 -0
  56. data/lib/query_guard/uploader/no_op_uploader.rb +46 -0
  57. data/lib/query_guard/uploader/registry.rb +37 -0
  58. data/lib/query_guard/uploader/upload_service.rb +80 -0
  59. data/lib/query_guard/version.rb +1 -1
  60. data/lib/query_guard.rb +54 -7
  61. metadata +78 -10
  62. data/.rspec +0 -3
  63. data/Rakefile +0 -21
  64. data/config/initializers/query_guard.rb +0 -9
@@ -0,0 +1,152 @@
1
+ # frozen_string_literal: true
2
+
3
+ module QueryGuard
4
+ module Migrations
5
+ # Extracts table names from Rails migration code
6
+ #
7
+ # Pragmatically identifies which tables a migration affects
8
+ # by parsing common Rails migration method calls.
9
+ #
10
+ # Detects: add_column, remove_column, change_column, create_table, drop_table,
11
+ # etc. on both active table operations and raw SQL.
12
+ class TableSizeResolver
13
+ # Extract all table names referenced in a migration
14
+ #
15
+ # @param migration_content [String] Migration file content
16
+ # @return [Array<String>] Table names (deduplicated)
17
+ def self.extract_table_names(migration_content)
18
+ tables = Set.new
19
+
20
+ lines = migration_content.lines
21
+
22
+ lines.each do |line|
23
+ next if line.strip.start_with?("#")
24
+ next if line.strip.empty?
25
+
26
+ # Try to capture table name: could be :symbol or "string" or 'string'
27
+ # add_column :users, :name, :string
28
+ if line.include?("add_column")
29
+ # Match: add_column :table_name or add_column "table_name" or add_column 'table_name'
30
+ if match = line.match(/add_column\s+[:"`']?(\w+)/)
31
+ tables << match[1]
32
+ end
33
+ end
34
+
35
+ # remove_column :posts, :old_field
36
+ if line.include?("remove_column")
37
+ if match = line.match(/remove_column\s+[:"`']?(\w+)/)
38
+ tables << match[1]
39
+ end
40
+ end
41
+
42
+ # change_column :users, :email, :text
43
+ if line.include?("change_column")
44
+ if match = line.match(/change_column\s+[:"`']?(\w+)/)
45
+ tables << match[1]
46
+ end
47
+ end
48
+
49
+ # rename_column :users, :old_name, :new_name
50
+ if line.include?("rename_column")
51
+ if match = line.match(/rename_column\s+[:"`']?(\w+)/)
52
+ tables << match[1]
53
+ end
54
+ end
55
+
56
+ # add_index :users, :email
57
+ if line.include?("add_index")
58
+ if match = line.match(/add_index\s+[:"`']?(\w+)/)
59
+ tables << match[1]
60
+ end
61
+ end
62
+
63
+ # remove_index :users, :email
64
+ if line.include?("remove_index")
65
+ if match = line.match(/remove_index\s+[:"`']?(\w+)/)
66
+ tables << match[1]
67
+ end
68
+ end
69
+
70
+ # drop_table :posts
71
+ if line.include?("drop_table")
72
+ if match = line.match(/drop_table\s+[:"`']?(\w+)/)
73
+ tables << match[1]
74
+ end
75
+ end
76
+
77
+ # create_table :accounts do ... end
78
+ if line.include?("create_table")
79
+ if match = line.match(/create_table\s+[:"`']?(\w+)/)
80
+ tables << match[1]
81
+ end
82
+ end
83
+
84
+ # Raw SQL UPDATE public.users SET ...
85
+ if line.upcase.include?("UPDATE")
86
+ if match = line.match(/UPDATE\s+(?:public\.)?(\w+)/i)
87
+ tables << match[1]
88
+ end
89
+ end
90
+
91
+ # Raw SQL DELETE FROM public.users
92
+ if line.upcase.include?("DELETE")
93
+ if match = line.match(/DELETE\s+FROM\s+(?:public\.)?(\w+)/i)
94
+ tables << match[1]
95
+ end
96
+ end
97
+
98
+ # Raw SQL INSERT INTO public.users
99
+ if line.upcase.include?("INSERT")
100
+ if match = line.match(/INSERT\s+INTO\s+(?:public\.)?(\w+)/i)
101
+ tables << match[1]
102
+ end
103
+ end
104
+
105
+ # Model.update_all(status: 'active') - convert model to table name
106
+ # or User.delete_all
107
+ if (line.include?("update_all") || line.include?("delete_all"))
108
+ if match = line.match(/(\b[A-Z]\w*)\s*\.\s*(?:update_all|delete_all)/)
109
+ model_name = match[1]
110
+ # Convert CamelCase to snake_case: User -> user, UserProfile -> user_profile
111
+ table_name = model_name
112
+ .gsub(/([A-Z])/, '_\1')
113
+ .downcase
114
+ .sub(/^_/, '')
115
+ # Pluralize simple table names: add 's' for most words
116
+ # This is a simple heuristic; full pluralization would require a library
117
+ table_name = table_name + 's' unless table_name.end_with?('s', 'x', 'z')
118
+ tables << table_name if table_name && !table_name.empty?
119
+ end
120
+ end
121
+ end
122
+
123
+ tables.to_a.sort
124
+ end
125
+
126
+ # Filter tables to only those likely affected by schema changes
127
+ #
128
+ # Excludes internal Rails tables like schema_migrations
129
+ #
130
+ # @param table_names [Array<String>] List of table names
131
+ # @return [Array<String>] Filtered list
132
+ def self.filter_schema_tables(table_names)
133
+ # Internal rails tables to ignore
134
+ internal_tables = %w[
135
+ schema_migrations ar_internal_metadata
136
+ delayed_jobs sidekiq_jobs
137
+ ]
138
+
139
+ table_names.reject { |name| internal_tables.include?(name) }
140
+ end
141
+
142
+ # Extract table names that schema changes would directly affect
143
+ #
144
+ # @param migration_content [String] Migration file content
145
+ # @return [Array<String>] Directly affected tables
146
+ def self.affected_tables(migration_content)
147
+ all_tables = extract_table_names(migration_content)
148
+ filter_schema_tables(all_tables)
149
+ end
150
+ end
151
+ end
152
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+ require "time"
3
+ require_relative "client"
4
+
5
+ module QueryGuard
6
+ class << self
7
+ def client
8
+ @client ||= Client.new(
9
+ base_url: (config.api_base_url || ENV["QUERYGUARD_API_URL"] || "http://localhost:4000"),
10
+ api_key: (config.api_key || ENV["QUERYGUARD_API_KEY"]),
11
+ project: (config.project || ENV["QUERYGUARD_PROJECT"] || "dev"),
12
+ env: (config.env || (defined?(Rails) ? Rails.env : "development"))
13
+ )
14
+ end
15
+
16
+ # thread-local queue for the current request
17
+ def buffer; Thread.current[:qg_buf] ||= [] end
18
+ def clear!; Thread.current[:qg_buf] = [] end
19
+
20
+ def track!(attrs)
21
+ return unless enabled?
22
+ buffer << default_fields.merge(attrs)
23
+ end
24
+
25
+ def flush_now!
26
+ return if buffer.empty?
27
+ client.post("/api/v1/events", { events: buffer.dup })
28
+ ensure
29
+ clear!
30
+ end
31
+
32
+ private
33
+
34
+ def default_fields
35
+ { occurred_at: Time.now.utc.iso8601 }
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,119 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rspec/expectations"
4
+ require "query_guard/trace"
5
+
6
+ # RSpec matcher for asserting query budgets.
7
+ #
8
+ # Usage:
9
+ # expect {
10
+ # User.where(active: true).to_a
11
+ # }.to_not exceed_query_budget(count: 5)
12
+ #
13
+ # expect {
14
+ # User.all.to_a
15
+ # }.to_not exceed_query_budget(count: 10, duration_ms: 500)
16
+ #
17
+ # With a labeled budget:
18
+ # QueryGuard.config.budget.for("users#index", count: 5)
19
+ #
20
+ # expect {
21
+ # # code
22
+ # }.to_not exceed_query_budget("users#index")
23
+ RSpec::Matchers.define :exceed_query_budget do |budget_key_or_limits = nil, **limits|
24
+ match do |block|
25
+ # Determine if we're checking a named budget or inline limits
26
+ if budget_key_or_limits.is_a?(String)
27
+ budget_key = budget_key_or_limits
28
+ budget_limits = QueryGuard.config&.budget&.budget_for(budget_key)
29
+
30
+ if budget_limits.nil?
31
+ raise ArgumentError, "No budget defined for '#{budget_key}'. Define it with QueryGuard.config.budget.for('#{budget_key}', ...)"
32
+ end
33
+
34
+ limits = budget_limits
35
+ elsif budget_key_or_limits.is_a?(Hash)
36
+ limits = budget_key_or_limits.merge(limits)
37
+ end
38
+
39
+ # Execute the block within a trace
40
+ _, report = QueryGuard::Trace.trace("rspec_matcher") do
41
+ block.call
42
+ end
43
+
44
+ @actual_count = report.query_count
45
+ @actual_duration = report.total_duration_ms
46
+ @limit_count = limits[:count]
47
+ @limit_duration = limits[:duration_ms]
48
+
49
+ # Check if any budget was exceeded
50
+ @exceeded = false
51
+ @violations = []
52
+
53
+ if @limit_count && @actual_count > @limit_count
54
+ @exceeded = true
55
+ @violations << "count: #{@actual_count} > #{@limit_count}"
56
+ end
57
+
58
+ if @limit_duration && @actual_duration > @limit_duration
59
+ @exceeded = true
60
+ @violations << "duration: #{@actual_duration.round(2)}ms > #{@limit_duration}ms"
61
+ end
62
+
63
+ @exceeded
64
+ end
65
+
66
+ failure_message do
67
+ "expected query budget not to be exceeded, but it was:\n #{@violations.join("\n ")}"
68
+ end
69
+
70
+ failure_message_when_negated do
71
+ parts = []
72
+ parts << "count: #{@actual_count} <= #{@limit_count}" if @limit_count
73
+ parts << "duration: #{@actual_duration.round(2)}ms <= #{@limit_duration}ms" if @limit_duration
74
+ "expected query budget to be exceeded, but it was within limits:\n #{parts.join("\n ")}"
75
+ end
76
+
77
+ description do
78
+ parts = []
79
+ parts << "count <= #{@limit_count}" if @limit_count
80
+ parts << "duration <= #{@limit_duration}ms" if @limit_duration
81
+ "not exceed query budget (#{parts.join(", ")})"
82
+ end
83
+
84
+ supports_block_expectations
85
+ end
86
+
87
+ module QueryGuard
88
+ module RSpec
89
+ # Convenience helper to check if code stays within a query budget
90
+ def within_query_budget(**limits, &block)
91
+ _, report = QueryGuard::Trace.trace("within_query_budget") do
92
+ block.call
93
+ end
94
+
95
+ violations = []
96
+
97
+ if limits[:count] && report.query_count > limits[:count]
98
+ violations << "Query count exceeded: #{report.query_count} > #{limits[:count]}"
99
+ end
100
+
101
+ if limits[:duration_ms] && report.total_duration_ms > limits[:duration_ms]
102
+ violations << "Duration exceeded: #{report.total_duration_ms.round(2)}ms > #{limits[:duration_ms]}ms"
103
+ end
104
+
105
+ if violations.any?
106
+ raise QueryGuard::Budget::Violation, violations.join("; ")
107
+ end
108
+
109
+ report
110
+ end
111
+ end
112
+ end
113
+
114
+ # Auto-include helpers when RSpec is available
115
+ if defined?(::RSpec)
116
+ ::RSpec.configure do |config|
117
+ config.include QueryGuard::RSpec
118
+ end
119
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+ require "digest"
3
+
4
+ module QueryGuard
5
+ module Security
6
+ module_function
7
+
8
+ # Normalize SQL into a stable fingerprint:
9
+ # - collapse whitespace
10
+ # - replace quoted strings + numbers with ?
11
+ def fingerprint(sql)
12
+ s = sql.to_s.dup
13
+ s.gsub!(/\s+/, " ")
14
+ s.gsub!(/'(?:''|[^'])*'/, "?") # strings
15
+ s.gsub!(/\b\d+\b/, "?") # integers
16
+ Digest::SHA1.hexdigest(s.strip.downcase)
17
+ end
18
+
19
+ def suspicious_sql_injection?(sql, patterns)
20
+ s = sql.to_s
21
+ patterns.any? { |re| re.match?(s) }
22
+ end
23
+
24
+ def possible_exfiltration_query?(sql)
25
+ s = sql.to_s.strip
26
+ return false unless s =~ /\ASELECT\b/i
27
+ # Heuristic: SELECT without WHERE and without LIMIT
28
+ no_where = !s.match?(/\bwhere\b/i)
29
+ no_limit = !s.match?(/\blimit\b/i)
30
+ no_where && no_limit
31
+ end
32
+
33
+ def post_request_checks!(env, stats, config)
34
+ return unless config.enable_security
35
+
36
+ actor = resolve_actor(env, config)
37
+ store = config.store || QueryGuard::Store.new
38
+
39
+ # --- Unusual query pattern (rate/variety) ---
40
+ if config.detect_unusual_query_pattern
41
+ bucket = Time.now.utc.strftime("%Y%m%d%H%M") # minute bucket
42
+ base = "qg:actor:#{actor}:#{bucket}"
43
+
44
+ total = store.incr("#{base}:queries", ttl: 120, by: stats[:count].to_i)
45
+ uniq_count = stats[:fingerprints]&.keys&.size.to_i
46
+ store.add_to_set("#{base}:uniqfp", stats[:request_id], ttl: 120) # keep request marker
47
+
48
+ uniq_fp_total = store.incr("#{base}:uniqfp_count", ttl: 120, by: uniq_count)
49
+
50
+ if total > config.max_queries_per_minute_per_actor
51
+ stats[:violations] << {
52
+ type: :unusual_query_rate,
53
+ actor: actor,
54
+ per_minute: total,
55
+ limit: config.max_queries_per_minute_per_actor
56
+ }
57
+ end
58
+
59
+ if uniq_fp_total > config.max_unique_query_fingerprints_per_minute_per_actor
60
+ stats[:violations] << {
61
+ type: :unusual_query_variety,
62
+ actor: actor,
63
+ unique_fingerprints_per_minute: uniq_fp_total,
64
+ limit: config.max_unique_query_fingerprints_per_minute_per_actor
65
+ }
66
+ end
67
+ end
68
+
69
+ # --- Data exfiltration (response size + endpoint hint) ---
70
+ if config.detect_data_exfiltration
71
+ bytes = stats[:response_bytes].to_i
72
+ path = env["PATH_INFO"].to_s
73
+
74
+ if bytes > config.max_response_bytes_per_request
75
+ stats[:violations] << {
76
+ type: :data_exfiltration_large_response,
77
+ bytes: bytes,
78
+ limit: config.max_response_bytes_per_request,
79
+ path: path
80
+ }
81
+ end
82
+
83
+ if bytes > (config.max_response_bytes_per_request / 2) && path.match?(config.exfiltration_path_regex)
84
+ stats[:violations] << {
85
+ type: :data_exfiltration_suspected_export,
86
+ bytes: bytes,
87
+ path: path
88
+ }
89
+ end
90
+ end
91
+ end
92
+
93
+ def resolve_actor(env, config)
94
+ (config.actor_resolver && config.actor_resolver.call(env)) || "unknown"
95
+ rescue
96
+ "unknown"
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+ require "active_support"
3
+ require "active_support/cache"
4
+
5
+ module QueryGuard
6
+ # Minimal store abstraction for rate counters.
7
+ # Default: in-process MemoryStore. In production you can swap with Redis-based adapter.
8
+ class Store
9
+ def initialize(cache: nil)
10
+ @cache = cache || ActiveSupport::Cache::MemoryStore.new(size: 8.megabytes)
11
+ end
12
+
13
+ # Increment integer key, expiring after ttl seconds
14
+ def incr(key, ttl: 60, by: 1)
15
+ val = (@cache.read(key) || 0).to_i + by
16
+ @cache.write(key, val, expires_in: ttl)
17
+ val
18
+ end
19
+
20
+ def read(key)
21
+ @cache.read(key)
22
+ end
23
+
24
+ # "Set" emulation: store a Hash of members => true with ttl
25
+ def add_to_set(key, member, ttl: 60)
26
+ h = @cache.read(key)
27
+ h = {} unless h.is_a?(Hash)
28
+ h[member] = true
29
+ @cache.write(key, h, expires_in: ttl)
30
+ h.size
31
+ end
32
+
33
+ def set_size(key)
34
+ h = @cache.read(key)
35
+ h.is_a?(Hash) ? h.size : 0
36
+ end
37
+ end
38
+ end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
  require "active_support/notifications"
3
+ require "query_guard/security"
3
4
 
4
5
  module QueryGuard
5
6
  module Subscriber
@@ -7,12 +8,14 @@ module QueryGuard
7
8
 
8
9
  def self.install!(config)
9
10
  return if @installed
10
- @config = config
11
+
11
12
  @subscriber = ActiveSupport::Notifications.subscribe(SQL_EVENT) do |_, started, finished, _, payload|
13
+ context = Thread.current[:query_guard_context]
14
+ next unless context # only track inside our middleware window
15
+
12
16
  stats = Thread.current[:query_guard_stats]
13
- next unless stats # only track inside our middleware window
17
+ next unless stats
14
18
 
15
- # Skip schema and ignored
16
19
  name = payload[:name].to_s
17
20
  next if name == "SCHEMA"
18
21
 
@@ -20,29 +23,57 @@ module QueryGuard
20
23
  next if config.ignored_sql.any? { |r| r === sql }
21
24
 
22
25
  duration_ms = (finished - started) * 1000.0
26
+
27
+ # Collect query into context (new behavior)
28
+ context.add_query(
29
+ sql: sql,
30
+ duration_ms: duration_ms,
31
+ name: name,
32
+ started_at: Time.at(started),
33
+ finished_at: Time.at(finished)
34
+ )
35
+
36
+ # Legacy: Also update Thread.current stats for backward compatibility
37
+ stats = Thread.current[:query_guard_stats] ||= { count: 0, total_duration_ms: 0.0, violations: [] }
23
38
  stats[:count] += 1
24
39
  stats[:total_duration_ms] += duration_ms
25
40
 
41
+ fp = QueryGuard::Security.fingerprint(sql)
42
+ stats[:fingerprints][fp] += 1
43
+
26
44
  if config.max_duration_ms_per_query && duration_ms > config.max_duration_ms_per_query
27
- stats[:violations] << {
28
- type: :slow_query,
29
- duration_ms: duration_ms.round(2),
30
- sql: sql
31
- }
45
+ stats[:violations] << { type: :slow_query, duration_ms: duration_ms.round(2), sql: sql }
32
46
  end
33
47
 
34
48
  if config.block_select_star && sql =~ /\bSELECT\s+\*/i
35
49
  stats[:violations] << { type: :select_star, sql: sql }
36
50
  end
51
+
52
+ # --- SQL Injection detection ---
53
+ if config.enable_security && config.detect_sql_injection
54
+ if QueryGuard::Security.suspicious_sql_injection?(sql, config.sql_injection_patterns)
55
+ stats[:violations] << { type: :sql_injection_suspected, sql: sql }
56
+ end
57
+ end
58
+
59
+ # --- Data exfiltration query-shape heuristic ---
60
+ if config.enable_security && config.detect_data_exfiltration
61
+ if QueryGuard::Security.possible_exfiltration_query?(sql)
62
+ stats[:violations] << { type: :possible_data_exfiltration_query, sql: sql }
63
+ end
64
+ end
65
+
66
+ max = config.max_query_events_per_req || 200
67
+ if stats[:queries].length < max
68
+ stats[:queries] << {
69
+ sql: sql,
70
+ duration_ms: duration_ms.round(2),
71
+ occurred_at: Time.now.utc.iso8601
72
+ }
73
+ end
37
74
  end
38
- @installed = true
39
- end
40
75
 
41
- def self.uninstall!
42
- return unless @installed && @subscriber
43
- ActiveSupport::Notifications.unsubscribe(@subscriber)
44
- @installed = false
45
- @subscriber = nil
76
+ @installed = true
46
77
  end
47
78
  end
48
79
  end