woods 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +169 -0
  3. data/README.md +20 -8
  4. data/exe/woods-console +51 -6
  5. data/exe/woods-console-mcp +24 -4
  6. data/exe/woods-mcp +30 -7
  7. data/exe/woods-mcp-http +47 -6
  8. data/lib/generators/woods/install_generator.rb +13 -4
  9. data/lib/generators/woods/templates/woods.rb.tt +155 -0
  10. data/lib/tasks/woods.rake +15 -50
  11. data/lib/woods/builder.rb +174 -9
  12. data/lib/woods/cache/cache_middleware.rb +360 -31
  13. data/lib/woods/chunking/semantic_chunker.rb +334 -7
  14. data/lib/woods/console/adapters/job_adapter.rb +10 -4
  15. data/lib/woods/console/audit_logger.rb +76 -4
  16. data/lib/woods/console/bridge.rb +48 -15
  17. data/lib/woods/console/bridge_protocol.rb +44 -0
  18. data/lib/woods/console/confirmation.rb +3 -4
  19. data/lib/woods/console/console_response_renderer.rb +56 -18
  20. data/lib/woods/console/credential_index.rb +201 -0
  21. data/lib/woods/console/credential_scanner.rb +302 -0
  22. data/lib/woods/console/dispatch_pipeline.rb +138 -0
  23. data/lib/woods/console/embedded_executor.rb +682 -35
  24. data/lib/woods/console/eval_guard.rb +319 -0
  25. data/lib/woods/console/model_validator.rb +1 -3
  26. data/lib/woods/console/rack_middleware.rb +185 -29
  27. data/lib/woods/console/redactor.rb +161 -0
  28. data/lib/woods/console/response_context.rb +127 -0
  29. data/lib/woods/console/safe_context.rb +220 -23
  30. data/lib/woods/console/scope_predicate_parser.rb +131 -0
  31. data/lib/woods/console/server.rb +417 -486
  32. data/lib/woods/console/sql_noise_stripper.rb +87 -0
  33. data/lib/woods/console/sql_table_scanner.rb +213 -0
  34. data/lib/woods/console/sql_validator.rb +81 -31
  35. data/lib/woods/console/table_gate.rb +93 -0
  36. data/lib/woods/console/tool_specs.rb +552 -0
  37. data/lib/woods/console/tools/tier1.rb +3 -3
  38. data/lib/woods/console/tools/tier4.rb +7 -1
  39. data/lib/woods/dependency_graph.rb +66 -7
  40. data/lib/woods/embedding/indexer.rb +190 -6
  41. data/lib/woods/embedding/openai.rb +40 -4
  42. data/lib/woods/embedding/provider.rb +104 -8
  43. data/lib/woods/embedding/text_preparer.rb +23 -3
  44. data/lib/woods/embedding/token_counter.rb +133 -0
  45. data/lib/woods/evaluation/baseline_runner.rb +20 -2
  46. data/lib/woods/evaluation/metrics.rb +4 -1
  47. data/lib/woods/extracted_unit.rb +1 -0
  48. data/lib/woods/extractor.rb +7 -1
  49. data/lib/woods/extractors/controller_extractor.rb +6 -0
  50. data/lib/woods/extractors/mailer_extractor.rb +16 -2
  51. data/lib/woods/extractors/model_extractor.rb +6 -1
  52. data/lib/woods/extractors/phlex_extractor.rb +13 -4
  53. data/lib/woods/extractors/rails_source_extractor.rb +2 -0
  54. data/lib/woods/extractors/route_helper_resolver.rb +130 -0
  55. data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
  56. data/lib/woods/extractors/view_component_extractor.rb +12 -1
  57. data/lib/woods/extractors/view_engines/base.rb +141 -0
  58. data/lib/woods/extractors/view_engines/erb.rb +145 -0
  59. data/lib/woods/extractors/view_template_extractor.rb +92 -133
  60. data/lib/woods/flow_assembler.rb +23 -15
  61. data/lib/woods/flow_precomputer.rb +21 -2
  62. data/lib/woods/graph_analyzer.rb +3 -4
  63. data/lib/woods/index_artifact.rb +173 -0
  64. data/lib/woods/mcp/bearer_auth.rb +45 -0
  65. data/lib/woods/mcp/bootstrap_state.rb +94 -0
  66. data/lib/woods/mcp/bootstrapper.rb +337 -16
  67. data/lib/woods/mcp/config_resolver.rb +288 -0
  68. data/lib/woods/mcp/errors.rb +134 -0
  69. data/lib/woods/mcp/index_reader.rb +265 -30
  70. data/lib/woods/mcp/origin_guard.rb +132 -0
  71. data/lib/woods/mcp/provider_probe.rb +166 -0
  72. data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
  73. data/lib/woods/mcp/renderers/markdown_renderer.rb +39 -3
  74. data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
  75. data/lib/woods/mcp/server.rb +737 -137
  76. data/lib/woods/model_name_cache.rb +78 -2
  77. data/lib/woods/notion/client.rb +25 -2
  78. data/lib/woods/notion/mappers/model_mapper.rb +36 -2
  79. data/lib/woods/railtie.rb +55 -15
  80. data/lib/woods/resilience/circuit_breaker.rb +9 -2
  81. data/lib/woods/resilience/retryable_provider.rb +40 -3
  82. data/lib/woods/resolved_config.rb +299 -0
  83. data/lib/woods/retrieval/context_assembler.rb +112 -5
  84. data/lib/woods/retrieval/query_classifier.rb +1 -1
  85. data/lib/woods/retrieval/ranker.rb +55 -6
  86. data/lib/woods/retrieval/search_executor.rb +42 -13
  87. data/lib/woods/retriever.rb +330 -24
  88. data/lib/woods/session_tracer/middleware.rb +35 -1
  89. data/lib/woods/storage/graph_store.rb +39 -0
  90. data/lib/woods/storage/inapplicable_backend.rb +14 -0
  91. data/lib/woods/storage/metadata_store.rb +129 -1
  92. data/lib/woods/storage/pgvector.rb +70 -8
  93. data/lib/woods/storage/qdrant.rb +196 -5
  94. data/lib/woods/storage/snapshotter/metadata.rb +172 -0
  95. data/lib/woods/storage/snapshotter/vector.rb +238 -0
  96. data/lib/woods/storage/snapshotter.rb +24 -0
  97. data/lib/woods/storage/vector_store.rb +184 -35
  98. data/lib/woods/tasks.rb +85 -0
  99. data/lib/woods/temporal/snapshot_store.rb +49 -1
  100. data/lib/woods/token_utils.rb +44 -5
  101. data/lib/woods/unblocked/client.rb +1 -1
  102. data/lib/woods/unblocked/document_builder.rb +35 -10
  103. data/lib/woods/unblocked/exporter.rb +1 -1
  104. data/lib/woods/util/host_guard.rb +61 -0
  105. data/lib/woods/version.rb +1 -1
  106. data/lib/woods.rb +126 -6
  107. metadata +69 -4
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ # @see Woods
4
+ module Woods
5
+ module Console
6
+ # Strips SQL comments and string literals from a SQL string so that
7
+ # downstream checks (keyword scanning, table scanning) are not confused
8
+ # by content embedded inside comments or literals.
9
+ #
10
+ # This is a shared utility used by {SqlValidator} and {TableGate} to
11
+ # avoid duplicating comment- and literal-stripping logic. All methods
12
+ # are module-level and stateless — pass a SQL string in, receive a
13
+ # stripped string out.
14
+ #
15
+ # @example Strip comments only
16
+ # SqlNoiseStripper.strip_comments("SELECT 1 -- pick one\nFROM t")
17
+ # # => "SELECT 1 \nFROM t"
18
+ #
19
+ # @example Strip literals (PostgreSQL dialect)
20
+ # SqlNoiseStripper.strip_literals("SELECT 'it''s ok' FROM t")
21
+ # # => "SELECT '' FROM t"
22
+ #
23
+ # @example Strip literals (MySQL dialect — backslash escapes)
24
+ # SqlNoiseStripper.strip_literals("SELECT 'it\\'s ok' FROM t", dialect: :mysql)
25
+ # # => "SELECT '' FROM t"
26
+ #
27
+ module SqlNoiseStripper
28
+ # Strips SQL line comments (`-- ...`) and block comments (`/* ... */`).
29
+ # Line comments are stripped to (but not including) the newline so that
30
+ # newline-separated statement structure is preserved for callers that
31
+ # check for multiple statements.
32
+ #
33
+ # Block comments are non-nested — real SQL engines do not support nested
34
+ # block comments, and neither does this stripper.
35
+ #
36
+ # @param sql [String] the SQL string to process
37
+ # @return [String] a new string with all SQL comments removed
38
+ LINE_COMMENT = /--[^\n]*/
39
+ BLOCK_COMMENT = %r{/\*.*?\*/}m
40
+
41
+ def self.strip_comments(sql)
42
+ out = sql.gsub(LINE_COMMENT, '')
43
+ out.gsub(BLOCK_COMMENT, '')
44
+ end
45
+
46
+ # Strips single-quoted string literals and (for the `:postgres` dialect)
47
+ # PostgreSQL dollar-quoted string literals from a SQL string, replacing
48
+ # each with an empty `''` placeholder so that the structure of the SQL
49
+ # is maintained for subsequent checks.
50
+ #
51
+ # Dollar-quoted strings are stripped before single-quoted strings so that
52
+ # stray apostrophes inside a dollar-quoted body do not confuse the
53
+ # single-quote scanner.
54
+ #
55
+ # @param sql [String] the SQL string to process
56
+ # @param dialect [Symbol] `:postgres` (default) or `:mysql`.
57
+ # - `:postgres` — single-quoted strings support `''` as an apostrophe
58
+ # escape. Backslash is treated literally and does not escape quotes.
59
+ # Dollar-quoted strings (`$$...$$`, `$tag$...$tag$`) are also stripped.
60
+ # - `:mysql` — single-quoted strings support both `\'` (backslash-escape)
61
+ # and `''` (doubled-quote) as apostrophe escapes. Dollar-quoted strings
62
+ # are also stripped (MySQL does not use them, but stripping them is
63
+ # harmless and keeps the two dialects consistent).
64
+ # @return [String] a new string with all string literals replaced by `''`
65
+ # @raise [ArgumentError] if an unsupported dialect is provided
66
+ DOLLAR_QUOTED = /\$(\w*)\$.*?\$\1\$/m
67
+ SINGLE_QUOTED_POSTGRES = /'(?:''|[^'])*'/m
68
+ SINGLE_QUOTED_MYSQL = /'(?:\\.|''|[^'])*'/m
69
+
70
+ SUPPORTED_DIALECTS = %i[postgres mysql].freeze
71
+ private_constant :SUPPORTED_DIALECTS
72
+
73
+ def self.strip_literals(sql, dialect: :postgres)
74
+ unless SUPPORTED_DIALECTS.include?(dialect)
75
+ raise ArgumentError, "Unknown dialect #{dialect.inspect}. Supported: #{SUPPORTED_DIALECTS.inspect}"
76
+ end
77
+
78
+ # Strip dollar-quoted strings first so stray apostrophes inside them
79
+ # do not interfere with the single-quote scanner.
80
+ out = sql.gsub(DOLLAR_QUOTED, "''")
81
+
82
+ pattern = dialect == :mysql ? SINGLE_QUOTED_MYSQL : SINGLE_QUOTED_POSTGRES
83
+ out.gsub(pattern, "''")
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,213 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'woods/console/sql_noise_stripper'
4
+
5
+ # @see Woods
6
+ module Woods
7
+ module Console
8
+ # Extracts table and schema-qualified identifiers from a SQL string.
9
+ #
10
+ # Handles both JOIN-style and ANSI-89 comma-join syntax across MySQL and
11
+ # PostgreSQL quoting styles (`backtick`, `"double"`, bare). Schema-qualified
12
+ # identifiers (`schema.table`, `"schema"."table"`, `` `db`.`table` ``) are
13
+ # returned as `schema.table` strings so callers can compare against either
14
+ # the bare or qualified form.
15
+ #
16
+ # Noise (comments, string literals, dollar-quoted bodies) is stripped via
17
+ # {SqlNoiseStripper} before scanning so that identifiers embedded in literal
18
+ # content are never surfaced.
19
+ #
20
+ # All methods are module-level and stateless — pass a SQL string in, receive
21
+ # an array of identifier strings out.
22
+ #
23
+ # @example
24
+ # SqlTableScanner.identifiers_in('SELECT * FROM users JOIN orders ON ...')
25
+ # # => ["users", "orders"]
26
+ #
27
+ # SqlTableScanner.identifiers_in('SELECT * FROM "audit"."events"')
28
+ # # => ["audit.events"]
29
+ #
30
+ module SqlTableScanner # rubocop:disable Metrics/ModuleLength
31
+ # Matches a JOIN token followed by its target identifier. The identifier
32
+ # may be schema-qualified in any quoting style — `"schema"."table"`,
33
+ # `` `db`.`table` ``, bare `schema.table`, or the mixed
34
+ # `schema."table"` / `` schema.`table` `` forms — and the optional
35
+ # schema prefix is captured separately so callers can compare against
36
+ # either the bare or qualified configured form. An optional `ONLY`
37
+ # keyword (PostgreSQL inheritance opt-out) is consumed before the
38
+ # identifier so it does not hide the table name. ANSI-89 comma joins
39
+ # are handled separately — see FROM_CLAUSE.
40
+ JOIN_REFERENCE = /
41
+ \b(?:STRAIGHT_)?JOIN\s+
42
+ (?:ONLY\s+)?
43
+ (?:
44
+ (?:
45
+ `(?<jschema_bt>[^`]+)` |
46
+ "(?<jschema_dq>[^"]+)" |
47
+ (?<jschema_bare>\w+)
48
+ )
49
+ \.
50
+ )?
51
+ (?:
52
+ `(?<backtick>[^`]+)` |
53
+ "(?<double>[^"]+)" |
54
+ (?<bare>\w+(?:\.\w+)?)
55
+ )
56
+ /xi
57
+
58
+ # Matches a FROM clause and captures its body up to the next clause
59
+ # terminator. The body may be a single table or a comma-joined list.
60
+ #
61
+ # An inner `FROM` is also a terminator — this is H-3 of the bypass
62
+ # series. Without it, a FROM-clause subquery like
63
+ # `FROM (SELECT * FROM blocked) AS a` would be swallowed by the outer
64
+ # clause's `.+?` match, and the inner `FROM blocked` would never be
65
+ # re-scanned because `.scan` advances past consumed input. Treating
66
+ # every `FROM` as its own independent scan match is what keeps CTEs,
67
+ # UNIONs, and nested subqueries in coverage.
68
+ FROM_CLAUSE = /
69
+ \bFROM\s+
70
+ (?<clause>.+?)
71
+ (?=
72
+ \b(?:WHERE|GROUP|HAVING|ORDER|LIMIT|OFFSET|UNION|INTERSECT|EXCEPT|
73
+ STRAIGHT_JOIN|JOIN|INNER|OUTER|LEFT|RIGHT|FULL|CROSS|FROM)\b
74
+ | [;)]
75
+ | \z
76
+ )
77
+ /xim
78
+
79
+ # Matches a leading table identifier at the start of a FROM-list chunk.
80
+ # The identifier may carry an optional schema prefix in any quoting
81
+ # style — `"schema"."table"`, `` `db`.`table` ``, or the mixed
82
+ # `schema."table"` / `` schema.`table` `` form — captured separately so
83
+ # callers can match against bare or qualified configured forms.
84
+ LEAD_IDENT = /
85
+ \A
86
+ (?:
87
+ (?:
88
+ `(?<schema_bt>[^`]+)` |
89
+ "(?<schema_dq>[^"]+)" |
90
+ (?<schema_bare>\w+)
91
+ )
92
+ \.
93
+ )?
94
+ (?:
95
+ `(?<backtick>[^`]+)` |
96
+ "(?<double>[^"]+)" |
97
+ (?<bare>\w+(?:\.\w+)?)
98
+ )
99
+ /xi
100
+
101
+ # PostgreSQL ONLY keyword that appears between FROM and the table
102
+ # identifier. Strip it so the lead-identifier regex sees the table
103
+ # directly. Anchored with `\A` because callers strip leading whitespace
104
+ # first via #strip.
105
+ ONLY_PREFIX = /\AONLY\s+/i
106
+
107
+ # Returns every table/schema-qualified identifier referenced in the SQL
108
+ # string. Noise (comments, string literals, dollar-quoted bodies) is
109
+ # stripped before scanning. Both JOIN-style and ANSI-89 comma-join syntax
110
+ # are handled.
111
+ #
112
+ # @param sql [String, nil] the SQL string to scan
113
+ # @return [Array<String>] identifiers in the order they were encountered;
114
+ # may contain duplicates if the same table is referenced multiple times
115
+ def self.identifiers_in(sql)
116
+ return [] if sql.nil? || sql.empty?
117
+
118
+ stripped = strip_noise(sql)
119
+ results = []
120
+ collect_join_identifiers(stripped, results)
121
+ collect_from_identifiers(stripped, results)
122
+ results
123
+ end
124
+
125
+ # @api private
126
+ def self.strip_noise(sql)
127
+ out = SqlNoiseStripper.strip_comments(sql)
128
+ SqlNoiseStripper.strip_literals(out, dialect: :mysql)
129
+ end
130
+ private_class_method :strip_noise
131
+
132
+ # @api private
133
+ def self.collect_join_identifiers(sql, results)
134
+ sql.scan(JOIN_REFERENCE) do
135
+ match = Regexp.last_match
136
+ results << qualified_identifier(match)
137
+ end
138
+ end
139
+ private_class_method :collect_join_identifiers
140
+
141
+ # @api private
142
+ def self.collect_from_identifiers(sql, results)
143
+ sql.scan(FROM_CLAUSE) do
144
+ clause = Regexp.last_match[:clause]
145
+ split_top_level_commas(clause).each do |chunk|
146
+ ident = lead_identifier(chunk)
147
+ results << ident if ident
148
+ end
149
+ end
150
+ end
151
+ private_class_method :collect_from_identifiers
152
+
153
+ # @api private
154
+ # Split a comma-separated list at depth 0, skipping commas inside parens.
155
+ def self.split_top_level_commas(clause) # rubocop:disable Metrics/MethodLength
156
+ depth = 0
157
+ buf = +''
158
+ parts = []
159
+ clause.each_char do |ch|
160
+ case ch
161
+ when '('
162
+ depth += 1
163
+ buf << ch
164
+ when ')'
165
+ depth -= 1
166
+ buf << ch
167
+ when ','
168
+ if depth.zero?
169
+ parts << buf
170
+ buf = +''
171
+ else
172
+ buf << ch
173
+ end
174
+ else
175
+ buf << ch
176
+ end
177
+ end
178
+ parts << buf unless buf.strip.empty?
179
+ parts
180
+ end
181
+ private_class_method :split_top_level_commas
182
+
183
+ # @api private
184
+ # Extract the table identifier at the start of a FROM-list chunk,
185
+ # joining a schema prefix to the table when both are present. The
186
+ # PostgreSQL `ONLY` inheritance keyword is stripped first so it does
187
+ # not hide the table.
188
+ def self.lead_identifier(chunk)
189
+ stripped = chunk.to_s.strip.sub(ONLY_PREFIX, '')
190
+ return nil if stripped.empty?
191
+
192
+ match = LEAD_IDENT.match(stripped)
193
+ return nil unless match
194
+
195
+ qualified_identifier(match)
196
+ end
197
+ private_class_method :lead_identifier
198
+
199
+ # @api private
200
+ # Combine a schema prefix with the table identifier captured by
201
+ # JOIN_REFERENCE / LEAD_IDENT into a single `schema.table` string.
202
+ def self.qualified_identifier(match)
203
+ table = match[:backtick] || match[:double] || match[:bare]
204
+ schema = match.named_captures.values_at(
205
+ 'schema_bt', 'schema_dq', 'schema_bare',
206
+ 'jschema_bt', 'jschema_dq', 'jschema_bare'
207
+ ).compact.first
208
+ schema ? "#{schema}.#{table}" : table
209
+ end
210
+ private_class_method :qualified_identifier
211
+ end
212
+ end
213
+ end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'woods/console/sql_noise_stripper'
4
+
3
5
  # @see Woods
4
6
  module Woods
5
7
  class Error < StandardError; end unless defined?(Woods::Error)
@@ -17,18 +19,44 @@ module Woods
17
19
  #
18
20
  # @example
19
21
  # validator = SqlValidator.new
20
- # validator.validate!('SELECT * FROM users') # => true
21
- # validator.validate!('DELETE FROM users') # => raises SqlValidationError
22
+ # validator.validate!('SELECT * FROM users') # passes
23
+ # validator.validate!('DELETE FROM users') # raises SqlValidationError
22
24
  # validator.valid?('SELECT 1') # => true
23
25
  #
24
26
  class SqlValidator
25
27
  # Forbidden statement prefixes (case-insensitive).
28
+ #
29
+ # Expanded beyond DML/DDL to cover:
30
+ # - PG procedural (`DO`, `CALL`) which can run arbitrary plpgsql.
31
+ # - Session-state mutation (`SET`, `RESET`) — `SET ROLE`, `SET search_path`
32
+ # can swap out the effective permission set for the rest of the session
33
+ # even under rollback.
34
+ # - Admin/cluster ops (`VACUUM`, `ANALYZE`, `CLUSTER`, `REINDEX`,
35
+ # `REFRESH`, `LOCK`) which are reads in the English-language sense
36
+ # but carry side effects or heavy locks.
37
+ # - Async signalling (`LISTEN`, `NOTIFY`).
38
+ # - Prepared-statement lifecycle (`PREPARE`, `EXECUTE`, `DEALLOCATE`).
39
+ # - Transaction control (`BEGIN`, `COMMIT`, `ROLLBACK`, `SAVEPOINT`,
40
+ # `RELEASE`, `START`) — SafeContext already owns the surrounding
41
+ # transaction; inner tx control would corrupt it.
42
+ # - File I/O vectors (`LOAD`, `HANDLER`, `COPY`).
26
43
  FORBIDDEN_KEYWORDS = %w[
27
44
  INSERT UPDATE DELETE DROP ALTER TRUNCATE CREATE GRANT REVOKE
45
+ DO CALL SET RESET LISTEN NOTIFY
46
+ VACUUM ANALYZE CLUSTER REINDEX REFRESH LOCK
47
+ PREPARE EXECUTE DEALLOCATE
48
+ BEGIN COMMIT ROLLBACK SAVEPOINT RELEASE START
49
+ LOAD HANDLER COPY
28
50
  ].freeze
29
51
 
30
52
  # Keywords that are forbidden anywhere in the SQL (not just at start).
31
- BODY_FORBIDDEN_KEYWORDS = %w[UNION INTO COPY].freeze
53
+ #
54
+ # UNION / INTERSECT / EXCEPT are SQL set operators — any of them can graft
55
+ # a second SELECT onto a validated one, which defeats the "single SELECT"
56
+ # posture even though TableGate still catches references to blocked tables.
57
+ # INTO / COPY are PostgreSQL write vectors that must not appear in read
58
+ # contexts.
59
+ BODY_FORBIDDEN_KEYWORDS = %w[UNION INTERSECT EXCEPT INTO COPY].freeze
32
60
 
33
61
  # Dangerous functions that can be used for DoS or file access.
34
62
  DANGEROUS_FUNCTIONS = %w[
@@ -37,11 +65,44 @@ module Woods
37
65
  ].freeze
38
66
 
39
67
  # Allowed statement prefixes (case-insensitive).
40
- ALLOWED_PREFIXES = /\A\s*(SELECT|WITH|EXPLAIN)\b/i
68
+ #
69
+ # `EXPLAIN ANALYZE` actually executes the planned query on PostgreSQL
70
+ # (and the MySQL 8.0+ `EXPLAIN ANALYZE` does the same) — explicitly
71
+ # reject the `ANALYZE` variant. PostgreSQL also accepts an option-list
72
+ # form `EXPLAIN (ANALYZE, FORMAT JSON) SELECT …` where `ANALYZE` follows
73
+ # `(` rather than whitespace; the `(?!\s*\(?\s*ANALYZE)` lookahead
74
+ # rejects both spellings so SafeContext doesn't silently trust
75
+ # "we're just planning, not running" for what is a side-effectful
76
+ # execution. `EXPLAIN (…)` without `ANALYZE` is still permitted
77
+ # (e.g. `EXPLAIN (FORMAT JSON) SELECT 1`).
78
+ ALLOWED_PREFIXES = /\A\s*(SELECT|WITH|EXPLAIN(?!\s+ANALYZE)(?!\s*\([^)]*\bANALYZE\b))\b/i
79
+
80
+ # Frozen map of forbidden keyword => regex matching the keyword at statement start.
81
+ # Used by {#check_forbidden_keywords!} and {#check_forbidden_keywords_in_body!}.
82
+ FORBIDDEN_PREFIX_REGEXES = FORBIDDEN_KEYWORDS.to_h do |kw|
83
+ [kw, /\A\s*#{kw}\b/i]
84
+ end.freeze
85
+
86
+ # Frozen map of forbidden body keyword => regex matching the keyword anywhere.
87
+ # Used by {#check_body_forbidden_keywords!}.
88
+ BODY_FORBIDDEN_REGEXES = BODY_FORBIDDEN_KEYWORDS.to_h do |kw|
89
+ [kw, /\b#{kw}\b/i]
90
+ end.freeze
91
+
92
+ # Frozen map of forbidden keyword => regex matching the keyword anywhere in the body.
93
+ # Used by {#check_forbidden_keywords_in_body!} for the whole-body scan.
94
+ FORBIDDEN_BODY_REGEXES = FORBIDDEN_KEYWORDS.to_h do |kw|
95
+ [kw, /\b#{kw}\b/i]
96
+ end.freeze
97
+
98
+ # Frozen map of dangerous function name => regex matching a call to that function.
99
+ # Used by {#check_dangerous_functions!}.
100
+ DANGEROUS_FUNCTION_REGEXES = DANGEROUS_FUNCTIONS.to_h do |func|
101
+ [func, /\b#{func}\s*\(/i]
102
+ end.freeze
41
103
 
42
- # @return [true]
43
104
  # @raise [SqlValidationError] if the SQL is not a safe read-only statement
44
- def validate!(sql) # rubocop:disable Naming/PredicateMethod
105
+ def validate!(sql)
45
106
  raise SqlValidationError, 'SQL is empty' if sql.nil? || sql.strip.empty?
46
107
 
47
108
  normalized = sql.strip
@@ -67,11 +128,9 @@ module Woods
67
128
  check_forbidden_keywords_in_body!(normalized)
68
129
 
69
130
  # Must start with an allowed prefix
70
- unless normalized.match?(ALLOWED_PREFIXES)
71
- raise SqlValidationError, 'Rejected: SQL must start with SELECT, WITH, or EXPLAIN'
72
- end
131
+ return if normalized.match?(ALLOWED_PREFIXES)
73
132
 
74
- true
133
+ raise SqlValidationError, 'Rejected: SQL must start with SELECT, WITH, or EXPLAIN'
75
134
  end
76
135
 
77
136
  # Check if SQL is valid without raising.
@@ -93,11 +152,8 @@ module Woods
93
152
  # @param sql [String]
94
153
  # @return [Boolean]
95
154
  def contains_multiple_statements?(sql)
96
- # Strip SQL comments before checking
97
- stripped = sql.gsub(/--[^\n]*/, '') # line comments
98
- stripped = stripped.gsub(%r{/\*.*?\*/}m, '') # block comments
99
- # Strip single-quoted strings to avoid false positives
100
- stripped = stripped.gsub(/'[^']*'/, '')
155
+ stripped = SqlNoiseStripper.strip_comments(sql)
156
+ stripped = SqlNoiseStripper.strip_literals(stripped)
101
157
  stripped.include?(';')
102
158
  end
103
159
 
@@ -106,10 +162,8 @@ module Woods
106
162
  # @param sql [String]
107
163
  # @raise [SqlValidationError] if a forbidden keyword is found
108
164
  def check_forbidden_keywords!(sql)
109
- FORBIDDEN_KEYWORDS.each do |keyword|
110
- if sql.match?(/\A\s*#{keyword}\b/i)
111
- raise SqlValidationError, "Rejected: #{keyword} statements are not allowed"
112
- end
165
+ FORBIDDEN_PREFIX_REGEXES.each do |keyword, pattern|
166
+ raise SqlValidationError, "Rejected: #{keyword} statements are not allowed" if sql.match?(pattern)
113
167
  end
114
168
  end
115
169
 
@@ -118,8 +172,8 @@ module Woods
118
172
  # @param sql [String]
119
173
  # @raise [SqlValidationError] if a forbidden keyword is found
120
174
  def check_body_forbidden_keywords!(sql)
121
- BODY_FORBIDDEN_KEYWORDS.each do |keyword|
122
- raise SqlValidationError, "Rejected: #{keyword} is not allowed" if sql.match?(/\b#{keyword}\b/i)
175
+ BODY_FORBIDDEN_REGEXES.each do |keyword, pattern|
176
+ raise SqlValidationError, "Rejected: #{keyword} is not allowed" if sql.match?(pattern)
123
177
  end
124
178
  end
125
179
 
@@ -138,10 +192,8 @@ module Woods
138
192
  # @param sql [String]
139
193
  # @raise [SqlValidationError] if a dangerous function is found
140
194
  def check_dangerous_functions!(sql)
141
- DANGEROUS_FUNCTIONS.each do |func|
142
- if sql.match?(/\b#{func}\s*\(/i)
143
- raise SqlValidationError, "Rejected: dangerous function #{func} is not allowed"
144
- end
195
+ DANGEROUS_FUNCTION_REGEXES.each do |func, pattern|
196
+ raise SqlValidationError, "Rejected: dangerous function #{func} is not allowed" if sql.match?(pattern)
145
197
  end
146
198
  end
147
199
 
@@ -151,17 +203,15 @@ module Woods
151
203
  # @param sql [String]
152
204
  # @raise [SqlValidationError] if a forbidden keyword is found
153
205
  def check_forbidden_keywords_in_body!(sql)
154
- # Strip comments to reveal hidden statements
155
- stripped = sql.gsub(/--[^\n]*/, '') # line comments
156
- stripped = stripped.gsub(%r{/\*.*?\*/}m, '') # block comments
206
+ stripped = SqlNoiseStripper.strip_comments(sql)
157
207
 
158
208
  # Check if any forbidden keyword appears anywhere (not just at start)
159
- FORBIDDEN_KEYWORDS.each do |keyword|
209
+ FORBIDDEN_BODY_REGEXES.each do |keyword, body_pattern|
160
210
  # Look for keyword as a whole word anywhere in the stripped SQL
161
- next unless stripped.match?(/\b#{keyword}\b/i)
211
+ next unless stripped.match?(body_pattern)
162
212
 
163
213
  # Make sure it's not at the very start (already checked)
164
- unless stripped.match?(/\A\s*#{keyword}\b/i)
214
+ unless stripped.match?(FORBIDDEN_PREFIX_REGEXES[keyword])
165
215
  raise SqlValidationError,
166
216
  "Rejected: #{keyword} statements are not allowed (found in SQL body)"
167
217
  end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+ require 'woods/console/sql_table_scanner'
5
+
6
+ # @see Woods
7
+ module Woods
8
+ class Error < StandardError; end unless defined?(Woods::Error)
9
+
10
+ module Console
11
+ class TableGateError < Woods::Error; end
12
+
13
+ # Layer 1 of the Console defense-in-depth stack: rejects requests touching
14
+ # blocked tables. SQL parsing is delegated to {SqlTableScanner}; this class
15
+ # handles permission enforcement only. Raises {TableGateError} on violations.
16
+ class TableGate
17
+ # @param blocked_tables [Array<String>] case-insensitive; bare names match every schema.
18
+ # @param model_tables [Hash{String=>String}] model => table.
19
+ # @param model_reflections [Hash{String=>Hash{String=>String}}] model => assoc => table.
20
+ def initialize(blocked_tables:, model_tables:, model_reflections: {})
21
+ @blocked_bare = Set.new
22
+ @blocked_qualified = Set.new
23
+ Array(blocked_tables).each do |entry|
24
+ name = entry.to_s.downcase
25
+ next if name.empty?
26
+
27
+ name.include?('.') ? @blocked_qualified << name : @blocked_bare << name
28
+ end
29
+ @model_tables = model_tables || {}
30
+ @model_reflections = model_reflections || {}
31
+ end
32
+
33
+ def active? = !(@blocked_bare.empty? && @blocked_qualified.empty?)
34
+
35
+ def check_sql!(sql)
36
+ return unless active? && sql&.length&.positive?
37
+
38
+ SqlTableScanner.identifiers_in(sql).each do |raw|
39
+ raise TableGateError, reject_message(raw) if blocked?(raw)
40
+ end
41
+ end
42
+
43
+ def check_model!(model_name)
44
+ return unless active?
45
+
46
+ table = @model_tables[model_name.to_s]
47
+ check_table!(table) unless table.nil?
48
+ end
49
+
50
+ def check_table!(table_name)
51
+ return unless active?
52
+ return if table_name.nil? || table_name.to_s.empty?
53
+ raise TableGateError, reject_message(table_name) if blocked?(table_name)
54
+ end
55
+
56
+ def check_joins!(model_name, joins) # rubocop:disable Metrics/CyclomaticComplexity
57
+ return unless active? && joins && Array(joins).any?
58
+
59
+ reflections = @model_reflections[model_name.to_s]
60
+ return unless reflections
61
+
62
+ Array(joins).each do |join|
63
+ table = reflections[join.to_s]
64
+ raise TableGateError, reject_message(table) if table && blocked?(table)
65
+ end
66
+ end
67
+
68
+ def check_association!(model_name, association)
69
+ return unless active? && association
70
+
71
+ reflections = @model_reflections[model_name.to_s]
72
+ return unless reflections
73
+
74
+ table = reflections[association.to_s]
75
+ raise TableGateError, reject_message(table) if table && blocked?(table)
76
+ end
77
+
78
+ private
79
+
80
+ def blocked?(raw)
81
+ name = raw.to_s.downcase
82
+ @blocked_qualified.include?(name) || @blocked_bare.include?(strip_schema(name))
83
+ end
84
+
85
+ def strip_schema(raw) = raw.to_s.split('.').last.to_s
86
+
87
+ def reject_message(name)
88
+ "Rejected: table '#{name}' is on console_blocked_tables. " \
89
+ 'This tool is gated in Console MCP configuration.'
90
+ end
91
+ end
92
+ end
93
+ end