woods 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +169 -0
- data/README.md +20 -8
- data/exe/woods-console +51 -6
- data/exe/woods-console-mcp +24 -4
- data/exe/woods-mcp +30 -7
- data/exe/woods-mcp-http +47 -6
- data/lib/generators/woods/install_generator.rb +13 -4
- data/lib/generators/woods/templates/woods.rb.tt +155 -0
- data/lib/tasks/woods.rake +15 -50
- data/lib/woods/builder.rb +174 -9
- data/lib/woods/cache/cache_middleware.rb +360 -31
- data/lib/woods/chunking/semantic_chunker.rb +334 -7
- data/lib/woods/console/adapters/job_adapter.rb +10 -4
- data/lib/woods/console/audit_logger.rb +76 -4
- data/lib/woods/console/bridge.rb +48 -15
- data/lib/woods/console/bridge_protocol.rb +44 -0
- data/lib/woods/console/confirmation.rb +3 -4
- data/lib/woods/console/console_response_renderer.rb +56 -18
- data/lib/woods/console/credential_index.rb +201 -0
- data/lib/woods/console/credential_scanner.rb +302 -0
- data/lib/woods/console/dispatch_pipeline.rb +138 -0
- data/lib/woods/console/embedded_executor.rb +682 -35
- data/lib/woods/console/eval_guard.rb +319 -0
- data/lib/woods/console/model_validator.rb +1 -3
- data/lib/woods/console/rack_middleware.rb +185 -29
- data/lib/woods/console/redactor.rb +161 -0
- data/lib/woods/console/response_context.rb +127 -0
- data/lib/woods/console/safe_context.rb +220 -23
- data/lib/woods/console/scope_predicate_parser.rb +131 -0
- data/lib/woods/console/server.rb +417 -486
- data/lib/woods/console/sql_noise_stripper.rb +87 -0
- data/lib/woods/console/sql_table_scanner.rb +213 -0
- data/lib/woods/console/sql_validator.rb +81 -31
- data/lib/woods/console/table_gate.rb +93 -0
- data/lib/woods/console/tool_specs.rb +552 -0
- data/lib/woods/console/tools/tier1.rb +3 -3
- data/lib/woods/console/tools/tier4.rb +7 -1
- data/lib/woods/dependency_graph.rb +66 -7
- data/lib/woods/embedding/indexer.rb +190 -6
- data/lib/woods/embedding/openai.rb +40 -4
- data/lib/woods/embedding/provider.rb +104 -8
- data/lib/woods/embedding/text_preparer.rb +23 -3
- data/lib/woods/embedding/token_counter.rb +133 -0
- data/lib/woods/evaluation/baseline_runner.rb +20 -2
- data/lib/woods/evaluation/metrics.rb +4 -1
- data/lib/woods/extracted_unit.rb +1 -0
- data/lib/woods/extractor.rb +7 -1
- data/lib/woods/extractors/controller_extractor.rb +6 -0
- data/lib/woods/extractors/mailer_extractor.rb +16 -2
- data/lib/woods/extractors/model_extractor.rb +6 -1
- data/lib/woods/extractors/phlex_extractor.rb +13 -4
- data/lib/woods/extractors/rails_source_extractor.rb +2 -0
- data/lib/woods/extractors/route_helper_resolver.rb +130 -0
- data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
- data/lib/woods/extractors/view_component_extractor.rb +12 -1
- data/lib/woods/extractors/view_engines/base.rb +141 -0
- data/lib/woods/extractors/view_engines/erb.rb +145 -0
- data/lib/woods/extractors/view_template_extractor.rb +92 -133
- data/lib/woods/flow_assembler.rb +23 -15
- data/lib/woods/flow_precomputer.rb +21 -2
- data/lib/woods/graph_analyzer.rb +3 -4
- data/lib/woods/index_artifact.rb +173 -0
- data/lib/woods/mcp/bearer_auth.rb +45 -0
- data/lib/woods/mcp/bootstrap_state.rb +94 -0
- data/lib/woods/mcp/bootstrapper.rb +337 -16
- data/lib/woods/mcp/config_resolver.rb +288 -0
- data/lib/woods/mcp/errors.rb +134 -0
- data/lib/woods/mcp/index_reader.rb +265 -30
- data/lib/woods/mcp/origin_guard.rb +132 -0
- data/lib/woods/mcp/provider_probe.rb +166 -0
- data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
- data/lib/woods/mcp/renderers/markdown_renderer.rb +39 -3
- data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
- data/lib/woods/mcp/server.rb +737 -137
- data/lib/woods/model_name_cache.rb +78 -2
- data/lib/woods/notion/client.rb +25 -2
- data/lib/woods/notion/mappers/model_mapper.rb +36 -2
- data/lib/woods/railtie.rb +55 -15
- data/lib/woods/resilience/circuit_breaker.rb +9 -2
- data/lib/woods/resilience/retryable_provider.rb +40 -3
- data/lib/woods/resolved_config.rb +299 -0
- data/lib/woods/retrieval/context_assembler.rb +112 -5
- data/lib/woods/retrieval/query_classifier.rb +1 -1
- data/lib/woods/retrieval/ranker.rb +55 -6
- data/lib/woods/retrieval/search_executor.rb +42 -13
- data/lib/woods/retriever.rb +330 -24
- data/lib/woods/session_tracer/middleware.rb +35 -1
- data/lib/woods/storage/graph_store.rb +39 -0
- data/lib/woods/storage/inapplicable_backend.rb +14 -0
- data/lib/woods/storage/metadata_store.rb +129 -1
- data/lib/woods/storage/pgvector.rb +70 -8
- data/lib/woods/storage/qdrant.rb +196 -5
- data/lib/woods/storage/snapshotter/metadata.rb +172 -0
- data/lib/woods/storage/snapshotter/vector.rb +238 -0
- data/lib/woods/storage/snapshotter.rb +24 -0
- data/lib/woods/storage/vector_store.rb +184 -35
- data/lib/woods/tasks.rb +85 -0
- data/lib/woods/temporal/snapshot_store.rb +49 -1
- data/lib/woods/token_utils.rb +44 -5
- data/lib/woods/unblocked/client.rb +1 -1
- data/lib/woods/unblocked/document_builder.rb +35 -10
- data/lib/woods/unblocked/exporter.rb +1 -1
- data/lib/woods/util/host_guard.rb +61 -0
- data/lib/woods/version.rb +1 -1
- data/lib/woods.rb +126 -6
- metadata +69 -4
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# @see Woods
|
|
4
|
+
module Woods
|
|
5
|
+
module Console
|
|
6
|
+
# Strips SQL comments and string literals from a SQL string so that
|
|
7
|
+
# downstream checks (keyword scanning, table scanning) are not confused
|
|
8
|
+
# by content embedded inside comments or literals.
|
|
9
|
+
#
|
|
10
|
+
# This is a shared utility used by {SqlValidator} and {TableGate} to
|
|
11
|
+
# avoid duplicating comment- and literal-stripping logic. All methods
|
|
12
|
+
# are module-level and stateless — pass a SQL string in, receive a
|
|
13
|
+
# stripped string out.
|
|
14
|
+
#
|
|
15
|
+
# @example Strip comments only
|
|
16
|
+
# SqlNoiseStripper.strip_comments("SELECT 1 -- pick one\nFROM t")
|
|
17
|
+
# # => "SELECT 1 \nFROM t"
|
|
18
|
+
#
|
|
19
|
+
# @example Strip literals (PostgreSQL dialect)
|
|
20
|
+
# SqlNoiseStripper.strip_literals("SELECT 'it''s ok' FROM t")
|
|
21
|
+
# # => "SELECT '' FROM t"
|
|
22
|
+
#
|
|
23
|
+
# @example Strip literals (MySQL dialect — backslash escapes)
|
|
24
|
+
# SqlNoiseStripper.strip_literals("SELECT 'it\\'s ok' FROM t", dialect: :mysql)
|
|
25
|
+
# # => "SELECT '' FROM t"
|
|
26
|
+
#
|
|
27
|
+
module SqlNoiseStripper
|
|
28
|
+
# Strips SQL line comments (`-- ...`) and block comments (`/* ... */`).
|
|
29
|
+
# Line comments are stripped to (but not including) the newline so that
|
|
30
|
+
# newline-separated statement structure is preserved for callers that
|
|
31
|
+
# check for multiple statements.
|
|
32
|
+
#
|
|
33
|
+
# Block comments are non-nested — real SQL engines do not support nested
|
|
34
|
+
# block comments, and neither does this stripper.
|
|
35
|
+
#
|
|
36
|
+
# @param sql [String] the SQL string to process
|
|
37
|
+
# @return [String] a new string with all SQL comments removed
|
|
38
|
+
LINE_COMMENT = /--[^\n]*/
|
|
39
|
+
BLOCK_COMMENT = %r{/\*.*?\*/}m
|
|
40
|
+
|
|
41
|
+
def self.strip_comments(sql)
|
|
42
|
+
out = sql.gsub(LINE_COMMENT, '')
|
|
43
|
+
out.gsub(BLOCK_COMMENT, '')
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Strips single-quoted string literals and (for the `:postgres` dialect)
|
|
47
|
+
# PostgreSQL dollar-quoted string literals from a SQL string, replacing
|
|
48
|
+
# each with an empty `''` placeholder so that the structure of the SQL
|
|
49
|
+
# is maintained for subsequent checks.
|
|
50
|
+
#
|
|
51
|
+
# Dollar-quoted strings are stripped before single-quoted strings so that
|
|
52
|
+
# stray apostrophes inside a dollar-quoted body do not confuse the
|
|
53
|
+
# single-quote scanner.
|
|
54
|
+
#
|
|
55
|
+
# @param sql [String] the SQL string to process
|
|
56
|
+
# @param dialect [Symbol] `:postgres` (default) or `:mysql`.
|
|
57
|
+
# - `:postgres` — single-quoted strings support `''` as an apostrophe
|
|
58
|
+
# escape. Backslash is treated literally and does not escape quotes.
|
|
59
|
+
# Dollar-quoted strings (`$$...$$`, `$tag$...$tag$`) are also stripped.
|
|
60
|
+
# - `:mysql` — single-quoted strings support both `\'` (backslash-escape)
|
|
61
|
+
# and `''` (doubled-quote) as apostrophe escapes. Dollar-quoted strings
|
|
62
|
+
# are also stripped (MySQL does not use them, but stripping them is
|
|
63
|
+
# harmless and keeps the two dialects consistent).
|
|
64
|
+
# @return [String] a new string with all string literals replaced by `''`
|
|
65
|
+
# @raise [ArgumentError] if an unsupported dialect is provided
|
|
66
|
+
DOLLAR_QUOTED = /\$(\w*)\$.*?\$\1\$/m
|
|
67
|
+
SINGLE_QUOTED_POSTGRES = /'(?:''|[^'])*'/m
|
|
68
|
+
SINGLE_QUOTED_MYSQL = /'(?:\\.|''|[^'])*'/m
|
|
69
|
+
|
|
70
|
+
SUPPORTED_DIALECTS = %i[postgres mysql].freeze
|
|
71
|
+
private_constant :SUPPORTED_DIALECTS
|
|
72
|
+
|
|
73
|
+
def self.strip_literals(sql, dialect: :postgres)
|
|
74
|
+
unless SUPPORTED_DIALECTS.include?(dialect)
|
|
75
|
+
raise ArgumentError, "Unknown dialect #{dialect.inspect}. Supported: #{SUPPORTED_DIALECTS.inspect}"
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Strip dollar-quoted strings first so stray apostrophes inside them
|
|
79
|
+
# do not interfere with the single-quote scanner.
|
|
80
|
+
out = sql.gsub(DOLLAR_QUOTED, "''")
|
|
81
|
+
|
|
82
|
+
pattern = dialect == :mysql ? SINGLE_QUOTED_MYSQL : SINGLE_QUOTED_POSTGRES
|
|
83
|
+
out.gsub(pattern, "''")
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'woods/console/sql_noise_stripper'
|
|
4
|
+
|
|
5
|
+
# @see Woods
|
|
6
|
+
module Woods
|
|
7
|
+
module Console
|
|
8
|
+
# Extracts table and schema-qualified identifiers from a SQL string.
|
|
9
|
+
#
|
|
10
|
+
# Handles both JOIN-style and ANSI-89 comma-join syntax across MySQL and
|
|
11
|
+
# PostgreSQL quoting styles (`backtick`, `"double"`, bare). Schema-qualified
|
|
12
|
+
# identifiers (`schema.table`, `"schema"."table"`, `` `db`.`table` ``) are
|
|
13
|
+
# returned as `schema.table` strings so callers can compare against either
|
|
14
|
+
# the bare or qualified form.
|
|
15
|
+
#
|
|
16
|
+
# Noise (comments, string literals, dollar-quoted bodies) is stripped via
|
|
17
|
+
# {SqlNoiseStripper} before scanning so that identifiers embedded in literal
|
|
18
|
+
# content are never surfaced.
|
|
19
|
+
#
|
|
20
|
+
# All methods are module-level and stateless — pass a SQL string in, receive
|
|
21
|
+
# an array of identifier strings out.
|
|
22
|
+
#
|
|
23
|
+
# @example
|
|
24
|
+
# SqlTableScanner.identifiers_in('SELECT * FROM users JOIN orders ON ...')
|
|
25
|
+
# # => ["users", "orders"]
|
|
26
|
+
#
|
|
27
|
+
# SqlTableScanner.identifiers_in('SELECT * FROM "audit"."events"')
|
|
28
|
+
# # => ["audit.events"]
|
|
29
|
+
#
|
|
30
|
+
module SqlTableScanner # rubocop:disable Metrics/ModuleLength
|
|
31
|
+
# Matches a JOIN token followed by its target identifier. The identifier
|
|
32
|
+
# may be schema-qualified in any quoting style — `"schema"."table"`,
|
|
33
|
+
# `` `db`.`table` ``, bare `schema.table`, or the mixed
|
|
34
|
+
# `schema."table"` / `` schema.`table` `` forms — and the optional
|
|
35
|
+
# schema prefix is captured separately so callers can compare against
|
|
36
|
+
# either the bare or qualified configured form. An optional `ONLY`
|
|
37
|
+
# keyword (PostgreSQL inheritance opt-out) is consumed before the
|
|
38
|
+
# identifier so it does not hide the table name. ANSI-89 comma joins
|
|
39
|
+
# are handled separately — see FROM_CLAUSE.
|
|
40
|
+
JOIN_REFERENCE = /
|
|
41
|
+
\b(?:STRAIGHT_)?JOIN\s+
|
|
42
|
+
(?:ONLY\s+)?
|
|
43
|
+
(?:
|
|
44
|
+
(?:
|
|
45
|
+
`(?<jschema_bt>[^`]+)` |
|
|
46
|
+
"(?<jschema_dq>[^"]+)" |
|
|
47
|
+
(?<jschema_bare>\w+)
|
|
48
|
+
)
|
|
49
|
+
\.
|
|
50
|
+
)?
|
|
51
|
+
(?:
|
|
52
|
+
`(?<backtick>[^`]+)` |
|
|
53
|
+
"(?<double>[^"]+)" |
|
|
54
|
+
(?<bare>\w+(?:\.\w+)?)
|
|
55
|
+
)
|
|
56
|
+
/xi
|
|
57
|
+
|
|
58
|
+
# Matches a FROM clause and captures its body up to the next clause
|
|
59
|
+
# terminator. The body may be a single table or a comma-joined list.
|
|
60
|
+
#
|
|
61
|
+
# An inner `FROM` is also a terminator — this is H-3 of the bypass
|
|
62
|
+
# series. Without it, a FROM-clause subquery like
|
|
63
|
+
# `FROM (SELECT * FROM blocked) AS a` would be swallowed by the outer
|
|
64
|
+
# clause's `.+?` match, and the inner `FROM blocked` would never be
|
|
65
|
+
# re-scanned because `.scan` advances past consumed input. Treating
|
|
66
|
+
# every `FROM` as its own independent scan match is what keeps CTEs,
|
|
67
|
+
# UNIONs, and nested subqueries in coverage.
|
|
68
|
+
FROM_CLAUSE = /
|
|
69
|
+
\bFROM\s+
|
|
70
|
+
(?<clause>.+?)
|
|
71
|
+
(?=
|
|
72
|
+
\b(?:WHERE|GROUP|HAVING|ORDER|LIMIT|OFFSET|UNION|INTERSECT|EXCEPT|
|
|
73
|
+
STRAIGHT_JOIN|JOIN|INNER|OUTER|LEFT|RIGHT|FULL|CROSS|FROM)\b
|
|
74
|
+
| [;)]
|
|
75
|
+
| \z
|
|
76
|
+
)
|
|
77
|
+
/xim
|
|
78
|
+
|
|
79
|
+
# Matches a leading table identifier at the start of a FROM-list chunk.
|
|
80
|
+
# The identifier may carry an optional schema prefix in any quoting
|
|
81
|
+
# style — `"schema"."table"`, `` `db`.`table` ``, or the mixed
|
|
82
|
+
# `schema."table"` / `` schema.`table` `` form — captured separately so
|
|
83
|
+
# callers can match against bare or qualified configured forms.
|
|
84
|
+
LEAD_IDENT = /
|
|
85
|
+
\A
|
|
86
|
+
(?:
|
|
87
|
+
(?:
|
|
88
|
+
`(?<schema_bt>[^`]+)` |
|
|
89
|
+
"(?<schema_dq>[^"]+)" |
|
|
90
|
+
(?<schema_bare>\w+)
|
|
91
|
+
)
|
|
92
|
+
\.
|
|
93
|
+
)?
|
|
94
|
+
(?:
|
|
95
|
+
`(?<backtick>[^`]+)` |
|
|
96
|
+
"(?<double>[^"]+)" |
|
|
97
|
+
(?<bare>\w+(?:\.\w+)?)
|
|
98
|
+
)
|
|
99
|
+
/xi
|
|
100
|
+
|
|
101
|
+
# PostgreSQL ONLY keyword that appears between FROM and the table
|
|
102
|
+
# identifier. Strip it so the lead-identifier regex sees the table
|
|
103
|
+
# directly. Anchored with `\A` because callers strip leading whitespace
|
|
104
|
+
# first via #strip.
|
|
105
|
+
ONLY_PREFIX = /\AONLY\s+/i
|
|
106
|
+
|
|
107
|
+
# Returns every table/schema-qualified identifier referenced in the SQL
|
|
108
|
+
# string. Noise (comments, string literals, dollar-quoted bodies) is
|
|
109
|
+
# stripped before scanning. Both JOIN-style and ANSI-89 comma-join syntax
|
|
110
|
+
# are handled.
|
|
111
|
+
#
|
|
112
|
+
# @param sql [String, nil] the SQL string to scan
|
|
113
|
+
# @return [Array<String>] identifiers in the order they were encountered;
|
|
114
|
+
# may contain duplicates if the same table is referenced multiple times
|
|
115
|
+
def self.identifiers_in(sql)
|
|
116
|
+
return [] if sql.nil? || sql.empty?
|
|
117
|
+
|
|
118
|
+
stripped = strip_noise(sql)
|
|
119
|
+
results = []
|
|
120
|
+
collect_join_identifiers(stripped, results)
|
|
121
|
+
collect_from_identifiers(stripped, results)
|
|
122
|
+
results
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# @api private
|
|
126
|
+
def self.strip_noise(sql)
|
|
127
|
+
out = SqlNoiseStripper.strip_comments(sql)
|
|
128
|
+
SqlNoiseStripper.strip_literals(out, dialect: :mysql)
|
|
129
|
+
end
|
|
130
|
+
private_class_method :strip_noise
|
|
131
|
+
|
|
132
|
+
# @api private
|
|
133
|
+
def self.collect_join_identifiers(sql, results)
|
|
134
|
+
sql.scan(JOIN_REFERENCE) do
|
|
135
|
+
match = Regexp.last_match
|
|
136
|
+
results << qualified_identifier(match)
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
private_class_method :collect_join_identifiers
|
|
140
|
+
|
|
141
|
+
# @api private
|
|
142
|
+
def self.collect_from_identifiers(sql, results)
|
|
143
|
+
sql.scan(FROM_CLAUSE) do
|
|
144
|
+
clause = Regexp.last_match[:clause]
|
|
145
|
+
split_top_level_commas(clause).each do |chunk|
|
|
146
|
+
ident = lead_identifier(chunk)
|
|
147
|
+
results << ident if ident
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
private_class_method :collect_from_identifiers
|
|
152
|
+
|
|
153
|
+
# @api private
|
|
154
|
+
# Split a comma-separated list at depth 0, skipping commas inside parens.
|
|
155
|
+
def self.split_top_level_commas(clause) # rubocop:disable Metrics/MethodLength
|
|
156
|
+
depth = 0
|
|
157
|
+
buf = +''
|
|
158
|
+
parts = []
|
|
159
|
+
clause.each_char do |ch|
|
|
160
|
+
case ch
|
|
161
|
+
when '('
|
|
162
|
+
depth += 1
|
|
163
|
+
buf << ch
|
|
164
|
+
when ')'
|
|
165
|
+
depth -= 1
|
|
166
|
+
buf << ch
|
|
167
|
+
when ','
|
|
168
|
+
if depth.zero?
|
|
169
|
+
parts << buf
|
|
170
|
+
buf = +''
|
|
171
|
+
else
|
|
172
|
+
buf << ch
|
|
173
|
+
end
|
|
174
|
+
else
|
|
175
|
+
buf << ch
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
parts << buf unless buf.strip.empty?
|
|
179
|
+
parts
|
|
180
|
+
end
|
|
181
|
+
private_class_method :split_top_level_commas
|
|
182
|
+
|
|
183
|
+
# @api private
|
|
184
|
+
# Extract the table identifier at the start of a FROM-list chunk,
|
|
185
|
+
# joining a schema prefix to the table when both are present. The
|
|
186
|
+
# PostgreSQL `ONLY` inheritance keyword is stripped first so it does
|
|
187
|
+
# not hide the table.
|
|
188
|
+
def self.lead_identifier(chunk)
|
|
189
|
+
stripped = chunk.to_s.strip.sub(ONLY_PREFIX, '')
|
|
190
|
+
return nil if stripped.empty?
|
|
191
|
+
|
|
192
|
+
match = LEAD_IDENT.match(stripped)
|
|
193
|
+
return nil unless match
|
|
194
|
+
|
|
195
|
+
qualified_identifier(match)
|
|
196
|
+
end
|
|
197
|
+
private_class_method :lead_identifier
|
|
198
|
+
|
|
199
|
+
# @api private
|
|
200
|
+
# Combine a schema prefix with the table identifier captured by
|
|
201
|
+
# JOIN_REFERENCE / LEAD_IDENT into a single `schema.table` string.
|
|
202
|
+
def self.qualified_identifier(match)
|
|
203
|
+
table = match[:backtick] || match[:double] || match[:bare]
|
|
204
|
+
schema = match.named_captures.values_at(
|
|
205
|
+
'schema_bt', 'schema_dq', 'schema_bare',
|
|
206
|
+
'jschema_bt', 'jschema_dq', 'jschema_bare'
|
|
207
|
+
).compact.first
|
|
208
|
+
schema ? "#{schema}.#{table}" : table
|
|
209
|
+
end
|
|
210
|
+
private_class_method :qualified_identifier
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
end
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'woods/console/sql_noise_stripper'
|
|
4
|
+
|
|
3
5
|
# @see Woods
|
|
4
6
|
module Woods
|
|
5
7
|
class Error < StandardError; end unless defined?(Woods::Error)
|
|
@@ -17,18 +19,44 @@ module Woods
|
|
|
17
19
|
#
|
|
18
20
|
# @example
|
|
19
21
|
# validator = SqlValidator.new
|
|
20
|
-
# validator.validate!('SELECT * FROM users') #
|
|
21
|
-
# validator.validate!('DELETE FROM users') #
|
|
22
|
+
# validator.validate!('SELECT * FROM users') # passes
|
|
23
|
+
# validator.validate!('DELETE FROM users') # raises SqlValidationError
|
|
22
24
|
# validator.valid?('SELECT 1') # => true
|
|
23
25
|
#
|
|
24
26
|
class SqlValidator
|
|
25
27
|
# Forbidden statement prefixes (case-insensitive).
|
|
28
|
+
#
|
|
29
|
+
# Expanded beyond DML/DDL to cover:
|
|
30
|
+
# - PG procedural (`DO`, `CALL`) which can run arbitrary plpgsql.
|
|
31
|
+
# - Session-state mutation (`SET`, `RESET`) — `SET ROLE`, `SET search_path`
|
|
32
|
+
# can swap out the effective permission set for the rest of the session
|
|
33
|
+
# even under rollback.
|
|
34
|
+
# - Admin/cluster ops (`VACUUM`, `ANALYZE`, `CLUSTER`, `REINDEX`,
|
|
35
|
+
# `REFRESH`, `LOCK`) which are reads in the English-language sense
|
|
36
|
+
# but carry side effects or heavy locks.
|
|
37
|
+
# - Async signalling (`LISTEN`, `NOTIFY`).
|
|
38
|
+
# - Prepared-statement lifecycle (`PREPARE`, `EXECUTE`, `DEALLOCATE`).
|
|
39
|
+
# - Transaction control (`BEGIN`, `COMMIT`, `ROLLBACK`, `SAVEPOINT`,
|
|
40
|
+
# `RELEASE`, `START`) — SafeContext already owns the surrounding
|
|
41
|
+
# transaction; inner tx control would corrupt it.
|
|
42
|
+
# - File I/O vectors (`LOAD`, `HANDLER`, `COPY`).
|
|
26
43
|
FORBIDDEN_KEYWORDS = %w[
|
|
27
44
|
INSERT UPDATE DELETE DROP ALTER TRUNCATE CREATE GRANT REVOKE
|
|
45
|
+
DO CALL SET RESET LISTEN NOTIFY
|
|
46
|
+
VACUUM ANALYZE CLUSTER REINDEX REFRESH LOCK
|
|
47
|
+
PREPARE EXECUTE DEALLOCATE
|
|
48
|
+
BEGIN COMMIT ROLLBACK SAVEPOINT RELEASE START
|
|
49
|
+
LOAD HANDLER COPY
|
|
28
50
|
].freeze
|
|
29
51
|
|
|
30
52
|
# Keywords that are forbidden anywhere in the SQL (not just at start).
|
|
31
|
-
|
|
53
|
+
#
|
|
54
|
+
# UNION / INTERSECT / EXCEPT are SQL set operators — any of them can graft
|
|
55
|
+
# a second SELECT onto a validated one, which defeats the "single SELECT"
|
|
56
|
+
# posture even though TableGate still catches references to blocked tables.
|
|
57
|
+
# INTO / COPY are PostgreSQL write vectors that must not appear in read
|
|
58
|
+
# contexts.
|
|
59
|
+
BODY_FORBIDDEN_KEYWORDS = %w[UNION INTERSECT EXCEPT INTO COPY].freeze
|
|
32
60
|
|
|
33
61
|
# Dangerous functions that can be used for DoS or file access.
|
|
34
62
|
DANGEROUS_FUNCTIONS = %w[
|
|
@@ -37,11 +65,44 @@ module Woods
|
|
|
37
65
|
].freeze
|
|
38
66
|
|
|
39
67
|
# Allowed statement prefixes (case-insensitive).
|
|
40
|
-
|
|
68
|
+
#
|
|
69
|
+
# `EXPLAIN ANALYZE` actually executes the planned query on PostgreSQL
|
|
70
|
+
# (and the MySQL 8.0+ `EXPLAIN ANALYZE` does the same) — explicitly
|
|
71
|
+
# reject the `ANALYZE` variant. PostgreSQL also accepts an option-list
|
|
72
|
+
# form `EXPLAIN (ANALYZE, FORMAT JSON) SELECT …` where `ANALYZE` follows
|
|
73
|
+
# `(` rather than whitespace; the `(?!\s*\(?\s*ANALYZE)` lookahead
|
|
74
|
+
# rejects both spellings so SafeContext doesn't silently trust
|
|
75
|
+
# "we're just planning, not running" for what is a side-effectful
|
|
76
|
+
# execution. `EXPLAIN (…)` without `ANALYZE` is still permitted
|
|
77
|
+
# (e.g. `EXPLAIN (FORMAT JSON) SELECT 1`).
|
|
78
|
+
ALLOWED_PREFIXES = /\A\s*(SELECT|WITH|EXPLAIN(?!\s+ANALYZE)(?!\s*\([^)]*\bANALYZE\b))\b/i
|
|
79
|
+
|
|
80
|
+
# Frozen map of forbidden keyword => regex matching the keyword at statement start.
|
|
81
|
+
# Used by {#check_forbidden_keywords!} and {#check_forbidden_keywords_in_body!}.
|
|
82
|
+
FORBIDDEN_PREFIX_REGEXES = FORBIDDEN_KEYWORDS.to_h do |kw|
|
|
83
|
+
[kw, /\A\s*#{kw}\b/i]
|
|
84
|
+
end.freeze
|
|
85
|
+
|
|
86
|
+
# Frozen map of forbidden body keyword => regex matching the keyword anywhere.
|
|
87
|
+
# Used by {#check_body_forbidden_keywords!}.
|
|
88
|
+
BODY_FORBIDDEN_REGEXES = BODY_FORBIDDEN_KEYWORDS.to_h do |kw|
|
|
89
|
+
[kw, /\b#{kw}\b/i]
|
|
90
|
+
end.freeze
|
|
91
|
+
|
|
92
|
+
# Frozen map of forbidden keyword => regex matching the keyword anywhere in the body.
|
|
93
|
+
# Used by {#check_forbidden_keywords_in_body!} for the whole-body scan.
|
|
94
|
+
FORBIDDEN_BODY_REGEXES = FORBIDDEN_KEYWORDS.to_h do |kw|
|
|
95
|
+
[kw, /\b#{kw}\b/i]
|
|
96
|
+
end.freeze
|
|
97
|
+
|
|
98
|
+
# Frozen map of dangerous function name => regex matching a call to that function.
|
|
99
|
+
# Used by {#check_dangerous_functions!}.
|
|
100
|
+
DANGEROUS_FUNCTION_REGEXES = DANGEROUS_FUNCTIONS.to_h do |func|
|
|
101
|
+
[func, /\b#{func}\s*\(/i]
|
|
102
|
+
end.freeze
|
|
41
103
|
|
|
42
|
-
# @return [true]
|
|
43
104
|
# @raise [SqlValidationError] if the SQL is not a safe read-only statement
|
|
44
|
-
def validate!(sql)
|
|
105
|
+
def validate!(sql)
|
|
45
106
|
raise SqlValidationError, 'SQL is empty' if sql.nil? || sql.strip.empty?
|
|
46
107
|
|
|
47
108
|
normalized = sql.strip
|
|
@@ -67,11 +128,9 @@ module Woods
|
|
|
67
128
|
check_forbidden_keywords_in_body!(normalized)
|
|
68
129
|
|
|
69
130
|
# Must start with an allowed prefix
|
|
70
|
-
|
|
71
|
-
raise SqlValidationError, 'Rejected: SQL must start with SELECT, WITH, or EXPLAIN'
|
|
72
|
-
end
|
|
131
|
+
return if normalized.match?(ALLOWED_PREFIXES)
|
|
73
132
|
|
|
74
|
-
|
|
133
|
+
raise SqlValidationError, 'Rejected: SQL must start with SELECT, WITH, or EXPLAIN'
|
|
75
134
|
end
|
|
76
135
|
|
|
77
136
|
# Check if SQL is valid without raising.
|
|
@@ -93,11 +152,8 @@ module Woods
|
|
|
93
152
|
# @param sql [String]
|
|
94
153
|
# @return [Boolean]
|
|
95
154
|
def contains_multiple_statements?(sql)
|
|
96
|
-
|
|
97
|
-
stripped =
|
|
98
|
-
stripped = stripped.gsub(%r{/\*.*?\*/}m, '') # block comments
|
|
99
|
-
# Strip single-quoted strings to avoid false positives
|
|
100
|
-
stripped = stripped.gsub(/'[^']*'/, '')
|
|
155
|
+
stripped = SqlNoiseStripper.strip_comments(sql)
|
|
156
|
+
stripped = SqlNoiseStripper.strip_literals(stripped)
|
|
101
157
|
stripped.include?(';')
|
|
102
158
|
end
|
|
103
159
|
|
|
@@ -106,10 +162,8 @@ module Woods
|
|
|
106
162
|
# @param sql [String]
|
|
107
163
|
# @raise [SqlValidationError] if a forbidden keyword is found
|
|
108
164
|
def check_forbidden_keywords!(sql)
|
|
109
|
-
|
|
110
|
-
if sql.match?(
|
|
111
|
-
raise SqlValidationError, "Rejected: #{keyword} statements are not allowed"
|
|
112
|
-
end
|
|
165
|
+
FORBIDDEN_PREFIX_REGEXES.each do |keyword, pattern|
|
|
166
|
+
raise SqlValidationError, "Rejected: #{keyword} statements are not allowed" if sql.match?(pattern)
|
|
113
167
|
end
|
|
114
168
|
end
|
|
115
169
|
|
|
@@ -118,8 +172,8 @@ module Woods
|
|
|
118
172
|
# @param sql [String]
|
|
119
173
|
# @raise [SqlValidationError] if a forbidden keyword is found
|
|
120
174
|
def check_body_forbidden_keywords!(sql)
|
|
121
|
-
|
|
122
|
-
raise SqlValidationError, "Rejected: #{keyword} is not allowed" if sql.match?(
|
|
175
|
+
BODY_FORBIDDEN_REGEXES.each do |keyword, pattern|
|
|
176
|
+
raise SqlValidationError, "Rejected: #{keyword} is not allowed" if sql.match?(pattern)
|
|
123
177
|
end
|
|
124
178
|
end
|
|
125
179
|
|
|
@@ -138,10 +192,8 @@ module Woods
|
|
|
138
192
|
# @param sql [String]
|
|
139
193
|
# @raise [SqlValidationError] if a dangerous function is found
|
|
140
194
|
def check_dangerous_functions!(sql)
|
|
141
|
-
|
|
142
|
-
if sql.match?(
|
|
143
|
-
raise SqlValidationError, "Rejected: dangerous function #{func} is not allowed"
|
|
144
|
-
end
|
|
195
|
+
DANGEROUS_FUNCTION_REGEXES.each do |func, pattern|
|
|
196
|
+
raise SqlValidationError, "Rejected: dangerous function #{func} is not allowed" if sql.match?(pattern)
|
|
145
197
|
end
|
|
146
198
|
end
|
|
147
199
|
|
|
@@ -151,17 +203,15 @@ module Woods
|
|
|
151
203
|
# @param sql [String]
|
|
152
204
|
# @raise [SqlValidationError] if a forbidden keyword is found
|
|
153
205
|
def check_forbidden_keywords_in_body!(sql)
|
|
154
|
-
|
|
155
|
-
stripped = sql.gsub(/--[^\n]*/, '') # line comments
|
|
156
|
-
stripped = stripped.gsub(%r{/\*.*?\*/}m, '') # block comments
|
|
206
|
+
stripped = SqlNoiseStripper.strip_comments(sql)
|
|
157
207
|
|
|
158
208
|
# Check if any forbidden keyword appears anywhere (not just at start)
|
|
159
|
-
|
|
209
|
+
FORBIDDEN_BODY_REGEXES.each do |keyword, body_pattern|
|
|
160
210
|
# Look for keyword as a whole word anywhere in the stripped SQL
|
|
161
|
-
next unless stripped.match?(
|
|
211
|
+
next unless stripped.match?(body_pattern)
|
|
162
212
|
|
|
163
213
|
# Make sure it's not at the very start (already checked)
|
|
164
|
-
unless stripped.match?(
|
|
214
|
+
unless stripped.match?(FORBIDDEN_PREFIX_REGEXES[keyword])
|
|
165
215
|
raise SqlValidationError,
|
|
166
216
|
"Rejected: #{keyword} statements are not allowed (found in SQL body)"
|
|
167
217
|
end
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'set'
|
|
4
|
+
require 'woods/console/sql_table_scanner'
|
|
5
|
+
|
|
6
|
+
# @see Woods
|
|
7
|
+
module Woods
|
|
8
|
+
class Error < StandardError; end unless defined?(Woods::Error)
|
|
9
|
+
|
|
10
|
+
module Console
|
|
11
|
+
class TableGateError < Woods::Error; end
|
|
12
|
+
|
|
13
|
+
# Layer 1 of the Console defense-in-depth stack: rejects requests touching
|
|
14
|
+
# blocked tables. SQL parsing is delegated to {SqlTableScanner}; this class
|
|
15
|
+
# handles permission enforcement only. Raises {TableGateError} on violations.
|
|
16
|
+
class TableGate
|
|
17
|
+
# @param blocked_tables [Array<String>] case-insensitive; bare names match every schema.
|
|
18
|
+
# @param model_tables [Hash{String=>String}] model => table.
|
|
19
|
+
# @param model_reflections [Hash{String=>Hash{String=>String}}] model => assoc => table.
|
|
20
|
+
def initialize(blocked_tables:, model_tables:, model_reflections: {})
|
|
21
|
+
@blocked_bare = Set.new
|
|
22
|
+
@blocked_qualified = Set.new
|
|
23
|
+
Array(blocked_tables).each do |entry|
|
|
24
|
+
name = entry.to_s.downcase
|
|
25
|
+
next if name.empty?
|
|
26
|
+
|
|
27
|
+
name.include?('.') ? @blocked_qualified << name : @blocked_bare << name
|
|
28
|
+
end
|
|
29
|
+
@model_tables = model_tables || {}
|
|
30
|
+
@model_reflections = model_reflections || {}
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def active? = !(@blocked_bare.empty? && @blocked_qualified.empty?)
|
|
34
|
+
|
|
35
|
+
def check_sql!(sql)
|
|
36
|
+
return unless active? && sql&.length&.positive?
|
|
37
|
+
|
|
38
|
+
SqlTableScanner.identifiers_in(sql).each do |raw|
|
|
39
|
+
raise TableGateError, reject_message(raw) if blocked?(raw)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def check_model!(model_name)
|
|
44
|
+
return unless active?
|
|
45
|
+
|
|
46
|
+
table = @model_tables[model_name.to_s]
|
|
47
|
+
check_table!(table) unless table.nil?
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def check_table!(table_name)
|
|
51
|
+
return unless active?
|
|
52
|
+
return if table_name.nil? || table_name.to_s.empty?
|
|
53
|
+
raise TableGateError, reject_message(table_name) if blocked?(table_name)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def check_joins!(model_name, joins) # rubocop:disable Metrics/CyclomaticComplexity
|
|
57
|
+
return unless active? && joins && Array(joins).any?
|
|
58
|
+
|
|
59
|
+
reflections = @model_reflections[model_name.to_s]
|
|
60
|
+
return unless reflections
|
|
61
|
+
|
|
62
|
+
Array(joins).each do |join|
|
|
63
|
+
table = reflections[join.to_s]
|
|
64
|
+
raise TableGateError, reject_message(table) if table && blocked?(table)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def check_association!(model_name, association)
|
|
69
|
+
return unless active? && association
|
|
70
|
+
|
|
71
|
+
reflections = @model_reflections[model_name.to_s]
|
|
72
|
+
return unless reflections
|
|
73
|
+
|
|
74
|
+
table = reflections[association.to_s]
|
|
75
|
+
raise TableGateError, reject_message(table) if table && blocked?(table)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
private
|
|
79
|
+
|
|
80
|
+
def blocked?(raw)
|
|
81
|
+
name = raw.to_s.downcase
|
|
82
|
+
@blocked_qualified.include?(name) || @blocked_bare.include?(strip_schema(name))
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def strip_schema(raw) = raw.to_s.split('.').last.to_s
|
|
86
|
+
|
|
87
|
+
def reject_message(name)
|
|
88
|
+
"Rejected: table '#{name}' is on console_blocked_tables. " \
|
|
89
|
+
'This tool is gated in Console MCP configuration.'
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|