woods 1.1.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +186 -0
- data/README.md +20 -8
- data/exe/woods-console +51 -6
- data/exe/woods-console-mcp +24 -4
- data/exe/woods-mcp +30 -7
- data/exe/woods-mcp-http +47 -6
- data/lib/generators/woods/install_generator.rb +13 -4
- data/lib/generators/woods/templates/woods.rb.tt +155 -0
- data/lib/tasks/woods.rake +69 -50
- data/lib/woods/builder.rb +174 -9
- data/lib/woods/cache/cache_middleware.rb +360 -31
- data/lib/woods/chunking/semantic_chunker.rb +334 -7
- data/lib/woods/console/adapters/job_adapter.rb +10 -4
- data/lib/woods/console/audit_logger.rb +76 -4
- data/lib/woods/console/bridge.rb +48 -15
- data/lib/woods/console/bridge_protocol.rb +44 -0
- data/lib/woods/console/confirmation.rb +3 -4
- data/lib/woods/console/console_response_renderer.rb +56 -18
- data/lib/woods/console/credential_index.rb +201 -0
- data/lib/woods/console/credential_scanner.rb +302 -0
- data/lib/woods/console/dispatch_pipeline.rb +138 -0
- data/lib/woods/console/embedded_executor.rb +682 -35
- data/lib/woods/console/eval_guard.rb +319 -0
- data/lib/woods/console/model_validator.rb +1 -3
- data/lib/woods/console/rack_middleware.rb +185 -29
- data/lib/woods/console/redactor.rb +161 -0
- data/lib/woods/console/response_context.rb +127 -0
- data/lib/woods/console/safe_context.rb +220 -23
- data/lib/woods/console/scope_predicate_parser.rb +131 -0
- data/lib/woods/console/server.rb +417 -486
- data/lib/woods/console/sql_noise_stripper.rb +87 -0
- data/lib/woods/console/sql_table_scanner.rb +213 -0
- data/lib/woods/console/sql_validator.rb +81 -31
- data/lib/woods/console/table_gate.rb +93 -0
- data/lib/woods/console/tool_specs.rb +552 -0
- data/lib/woods/console/tools/tier1.rb +3 -3
- data/lib/woods/console/tools/tier4.rb +7 -1
- data/lib/woods/dependency_graph.rb +66 -7
- data/lib/woods/embedding/indexer.rb +190 -6
- data/lib/woods/embedding/openai.rb +40 -4
- data/lib/woods/embedding/provider.rb +104 -8
- data/lib/woods/embedding/text_preparer.rb +23 -3
- data/lib/woods/embedding/token_counter.rb +133 -0
- data/lib/woods/evaluation/baseline_runner.rb +20 -2
- data/lib/woods/evaluation/metrics.rb +4 -1
- data/lib/woods/extracted_unit.rb +1 -0
- data/lib/woods/extractor.rb +7 -1
- data/lib/woods/extractors/controller_extractor.rb +6 -0
- data/lib/woods/extractors/mailer_extractor.rb +16 -2
- data/lib/woods/extractors/model_extractor.rb +6 -1
- data/lib/woods/extractors/phlex_extractor.rb +13 -4
- data/lib/woods/extractors/rails_source_extractor.rb +2 -0
- data/lib/woods/extractors/route_helper_resolver.rb +130 -0
- data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
- data/lib/woods/extractors/view_component_extractor.rb +12 -1
- data/lib/woods/extractors/view_engines/base.rb +141 -0
- data/lib/woods/extractors/view_engines/erb.rb +145 -0
- data/lib/woods/extractors/view_template_extractor.rb +92 -133
- data/lib/woods/flow_assembler.rb +23 -15
- data/lib/woods/flow_precomputer.rb +21 -2
- data/lib/woods/graph_analyzer.rb +210 -0
- data/lib/woods/index_artifact.rb +173 -0
- data/lib/woods/mcp/bearer_auth.rb +45 -0
- data/lib/woods/mcp/bootstrap_state.rb +94 -0
- data/lib/woods/mcp/bootstrapper.rb +337 -16
- data/lib/woods/mcp/config_resolver.rb +288 -0
- data/lib/woods/mcp/errors.rb +134 -0
- data/lib/woods/mcp/index_reader.rb +265 -30
- data/lib/woods/mcp/origin_guard.rb +132 -0
- data/lib/woods/mcp/provider_probe.rb +166 -0
- data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
- data/lib/woods/mcp/renderers/markdown_renderer.rb +100 -3
- data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
- data/lib/woods/mcp/server.rb +771 -137
- data/lib/woods/model_name_cache.rb +78 -2
- data/lib/woods/notion/client.rb +25 -2
- data/lib/woods/notion/mappers/model_mapper.rb +36 -2
- data/lib/woods/railtie.rb +55 -15
- data/lib/woods/resilience/circuit_breaker.rb +9 -2
- data/lib/woods/resilience/retryable_provider.rb +40 -3
- data/lib/woods/resolved_config.rb +299 -0
- data/lib/woods/retrieval/context_assembler.rb +112 -5
- data/lib/woods/retrieval/query_classifier.rb +1 -1
- data/lib/woods/retrieval/ranker.rb +55 -6
- data/lib/woods/retrieval/search_executor.rb +42 -13
- data/lib/woods/retriever.rb +330 -24
- data/lib/woods/session_tracer/middleware.rb +35 -1
- data/lib/woods/storage/graph_store.rb +39 -0
- data/lib/woods/storage/inapplicable_backend.rb +14 -0
- data/lib/woods/storage/metadata_store.rb +129 -1
- data/lib/woods/storage/pgvector.rb +70 -8
- data/lib/woods/storage/qdrant.rb +196 -5
- data/lib/woods/storage/snapshotter/metadata.rb +172 -0
- data/lib/woods/storage/snapshotter/vector.rb +238 -0
- data/lib/woods/storage/snapshotter.rb +24 -0
- data/lib/woods/storage/vector_store.rb +184 -35
- data/lib/woods/tasks.rb +85 -0
- data/lib/woods/temporal/snapshot_store.rb +49 -1
- data/lib/woods/token_utils.rb +44 -5
- data/lib/woods/unblocked/client.rb +163 -0
- data/lib/woods/unblocked/document_builder.rb +326 -0
- data/lib/woods/unblocked/exporter.rb +201 -0
- data/lib/woods/unblocked/rate_limiter.rb +94 -0
- data/lib/woods/util/host_guard.rb +61 -0
- data/lib/woods/version.rb +1 -1
- data/lib/woods.rb +130 -6
- metadata +73 -4
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../ast/parser'
|
|
4
|
+
|
|
5
|
+
# @see Woods
|
|
6
|
+
module Woods
|
|
7
|
+
class Error < StandardError; end unless defined?(Woods::Error)
|
|
8
|
+
|
|
9
|
+
module Console
|
|
10
|
+
# Raised when EvalGuard rejects a `console_eval` payload.
|
|
11
|
+
class ForbiddenExpressionError < Woods::Error; end
|
|
12
|
+
|
|
13
|
+
# Parse-time refusal layer for `console_eval`.
|
|
14
|
+
#
|
|
15
|
+
# ## Reachability (v0.2)
|
|
16
|
+
#
|
|
17
|
+
# EvalGuard is the first of five controls on the embedded `console_eval`
|
|
18
|
+
# opt-in path. `EmbeddedExecutor#handle_eval` calls `check!` before
|
|
19
|
+
# anything else — ahead of the Confirmation prompt, the SafeContext
|
|
20
|
+
# rollback, the timeout, and the audit log. When the opt-in is off
|
|
21
|
+
# (the default), `refusal_for('eval')` still short-circuits with the
|
|
22
|
+
# `eval_disabled` payload and this guard is not reached. See
|
|
23
|
+
# docs/CONSOLE_MCP_SETUP.md "console_eval opt-in" and backlog B-053.
|
|
24
|
+
#
|
|
25
|
+
# Bridge-process mode (in development) will call the same guard before
|
|
26
|
+
# shipping the payload to the remote Rails worker.
|
|
27
|
+
#
|
|
28
|
+
# ## Behaviour
|
|
29
|
+
#
|
|
30
|
+
# Walks the normalized {Woods::Ast::Parser} tree of the proposed Ruby
|
|
31
|
+
# snippet and refuses any expression that reaches a known credential or
|
|
32
|
+
# reflection escape — so an LLM-generated `Rails.application.credentials
|
|
33
|
+
# .stripe.secret_key` or a reflection escape is rejected before the bridge
|
|
34
|
+
# ever sees it.
|
|
35
|
+
#
|
|
36
|
+
# This is defense in depth, not the only line: the bridge process must
|
|
37
|
+
# re-enforce the same rules at execution time. The gem-side check exists
|
|
38
|
+
# so the LLM sees a fast, visible refusal instead of relying on the host
|
|
39
|
+
# app's bridge configuration.
|
|
40
|
+
#
|
|
41
|
+
# @example
|
|
42
|
+
# EvalGuard.check!('User.count') # => true
|
|
43
|
+
# EvalGuard.check!('Rails.application.credentials.stripe.key') # raises
|
|
44
|
+
#
|
|
45
|
+
class EvalGuard # rubocop:disable Metrics/ClassLength
|
|
46
|
+
# Receivers/calls whose presence in the AST is always a refusal.
|
|
47
|
+
# Each entry is matched against the dotted source text of every send
|
|
48
|
+
# node's receiver (and qualified call name) — so a denial of
|
|
49
|
+
# `Rails.application.credentials` catches every chained access through it
|
|
50
|
+
# (e.g. `Rails.application.credentials.dig(:stripe)`).
|
|
51
|
+
DENIED_CALL_CHAINS = %w[
|
|
52
|
+
Rails.application.credentials
|
|
53
|
+
Rails.application.secrets
|
|
54
|
+
Rails::Secrets
|
|
55
|
+
Devise.secret_key
|
|
56
|
+
].freeze
|
|
57
|
+
|
|
58
|
+
# Constants whose bare reference (or use as a receiver) is denied.
|
|
59
|
+
#
|
|
60
|
+
# - `ENV` — reads host secrets as a string-keyed hash.
|
|
61
|
+
# - Threading: `Thread`, `Fiber`, `Ractor`, `Process` — concurrent
|
|
62
|
+
# execution escapes the rolled-back transaction (the spawned block
|
|
63
|
+
# leases its own connection outside SafeContext's tx).
|
|
64
|
+
# - Deserialization: `Marshal`, `YAML`, `Psych` — unsafe load paths
|
|
65
|
+
# can execute arbitrary code during object instantiation.
|
|
66
|
+
# - Network: `Net`, `Socket`, `TCPSocket`, `UDPSocket`, `URI`,
|
|
67
|
+
# `OpenURI`, `Resolv`, `Faraday`, `HTTP` — every HTTP/network egress
|
|
68
|
+
# point available in a standard Rails install.
|
|
69
|
+
# - File I/O: `File`, `FileUtils`, `IO`, `Dir`, `Pathname`,
|
|
70
|
+
# `Tempfile`, `StringIO`, `BasicObject` — broad filesystem access.
|
|
71
|
+
# - Kernel-ish: `Kernel`, `Object`, `ObjectSpace`, `GC`,
|
|
72
|
+
# `RubyVM`, `TracePoint`, `Gem`, `Bundler`.
|
|
73
|
+
# File/IO/Pathname are intentionally NOT in this list — legitimate
|
|
74
|
+
# non-credential file reads are a core use case. Credential-path
|
|
75
|
+
# access is handled by CREDENTIAL_FILE_READERS below, and shell-exec
|
|
76
|
+
# attempts (`Kernel.open("|cmd")`, backticks, `%x{}`) are caught by
|
|
77
|
+
# the backtick textual check in #check! and the DENIED_REFLECTION
|
|
78
|
+
# entries for `system`/`exec`/`popen`/etc.
|
|
79
|
+
DENIED_CONSTANTS = %w[
|
|
80
|
+
ENV
|
|
81
|
+
Thread Fiber Ractor Process Mutex ConditionVariable Queue SizedQueue
|
|
82
|
+
Marshal YAML Psych
|
|
83
|
+
Net Socket TCPSocket UDPSocket UNIXSocket URI OpenURI Resolv Faraday HTTP
|
|
84
|
+
ObjectSpace GC RubyVM TracePoint
|
|
85
|
+
Gem Bundler
|
|
86
|
+
].freeze
|
|
87
|
+
|
|
88
|
+
# Method names that escape the AST sandbox regardless of receiver.
|
|
89
|
+
#
|
|
90
|
+
# Covers, in order:
|
|
91
|
+
# - Eval family: the classic `eval`/`instance_eval`/`class_eval`/
|
|
92
|
+
# `module_eval` plus `binding` (which enables reconstructing an eval
|
|
93
|
+
# in the caller's scope).
|
|
94
|
+
# - Dynamic dispatch: `send` / `public_send` / `__send__` / `method` /
|
|
95
|
+
# `public_method` (returns a callable, indirect dispatch) and the
|
|
96
|
+
# `const_get` / `const_set` / `remove_const` / `define_method` /
|
|
97
|
+
# `define_singleton_method` / `alias_method` / `undef_method` /
|
|
98
|
+
# `remove_method` / `method_defined?` / `prepend` / `include_module`
|
|
99
|
+
# reflection family.
|
|
100
|
+
# - State mutation: `instance_variable_set` / `instance_variable_get`,
|
|
101
|
+
# `class_variable_set` / `class_variable_get` / `freeze` / `taint`.
|
|
102
|
+
# - Object-space escapes: `_id2ref`, `each_object`, `const_source_location`.
|
|
103
|
+
# - System / process: `system`, `exec`, `spawn`, `fork`, `popen`, `%x{}`
|
|
104
|
+
# (AST method name `backtick` / xstr) so they can't be invoked
|
|
105
|
+
# implicitly.
|
|
106
|
+
# - File / IO: `open` (bare Kernel#open — the File-specific reader is
|
|
107
|
+
# handled separately via CREDENTIAL_FILE_READERS, but the bare
|
|
108
|
+
# `Kernel.open("|shell-command")` form is how most shellshock-style
|
|
109
|
+
# escapes slip through).
|
|
110
|
+
# - Network: `URI.open` (when called as `open` on URI, the AST method
|
|
111
|
+
# name is `open` so the string match above catches it). HTTP / Socket
|
|
112
|
+
# constants are denied separately via DENIED_CONSTANTS.
|
|
113
|
+
# - Loader: `load`, `require`, `require_relative`, `autoload`.
|
|
114
|
+
# - Unsafe deserialization: `unsafe_load` / `_load` (Marshal.load and
|
|
115
|
+
# YAML.load are denied via DENIED_CONSTANTS + method gate below).
|
|
116
|
+
# - Threading escapes from SafeContext's rollback: `new` on Thread /
|
|
117
|
+
# Fiber / Process is denied via DENIED_CONSTANTS so the
|
|
118
|
+
# {Kernel.fork, Thread.new} pair can't slip past.
|
|
119
|
+
DENIED_REFLECTION = %w[
|
|
120
|
+
eval instance_eval class_eval module_eval binding
|
|
121
|
+
instance_exec class_exec module_exec
|
|
122
|
+
send public_send __send__ method public_method
|
|
123
|
+
const_get const_set remove_const define_method define_singleton_method
|
|
124
|
+
alias_method undef_method remove_method method_defined? singleton_method
|
|
125
|
+
instance_variable_get instance_variable_set
|
|
126
|
+
class_variable_get class_variable_set
|
|
127
|
+
_id2ref each_object const_source_location instance_variables
|
|
128
|
+
prepend include_module
|
|
129
|
+
system exec spawn fork popen popen2 popen2e popen3 backtick
|
|
130
|
+
require require_relative autoload
|
|
131
|
+
unsafe_load _load
|
|
132
|
+
taint untaint
|
|
133
|
+
].freeze
|
|
134
|
+
|
|
135
|
+
# Receivers + method-name pairs that read credential files from disk.
|
|
136
|
+
# Triggers when the receiver matches AND any literal argument source
|
|
137
|
+
# contains a known credential path fragment. `Pathname.new(...)` is
|
|
138
|
+
# included so `Pathname.new(...).read` chains are caught at construction.
|
|
139
|
+
#
|
|
140
|
+
# `open` is included for File and IO to catch chained patterns like
|
|
141
|
+
# `File.open("config/master.key").read` — the inner `File.open(path)`
|
|
142
|
+
# node is visited by `scan_send_nodes` and refused here before the
|
|
143
|
+
# outer `.read` call is even examined (PR #34 review medium #3).
|
|
144
|
+
CREDENTIAL_FILE_READERS = {
|
|
145
|
+
'File' => %w[read binread readlines open],
|
|
146
|
+
'IO' => %w[read binread readlines open],
|
|
147
|
+
'Pathname' => %w[read binread new open]
|
|
148
|
+
}.freeze
|
|
149
|
+
CREDENTIAL_PATH_HINTS = %w[
|
|
150
|
+
master.key credentials.yml.enc credentials/
|
|
151
|
+
secrets.yml secrets.yml.enc
|
|
152
|
+
].freeze
|
|
153
|
+
|
|
154
|
+
class << self
|
|
155
|
+
# @param code [String] Ruby source proposed for `console_eval`.
|
|
156
|
+
# @raise [ForbiddenExpressionError] on any denial or parse failure.
|
|
157
|
+
def check!(code)
|
|
158
|
+
new.check!(code)
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def initialize(parser: Woods::Ast::Parser.new)
|
|
163
|
+
@parser = parser
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Textual token for class-variable (`@@foo`) and global-variable
|
|
167
|
+
# (`$foo`) writes. {Woods::Ast::Parser} doesn't normalize cvasgn /
|
|
168
|
+
# gvasgn to a dedicated node type, so we catch them at the source
|
|
169
|
+
# level the same way shell-execution literals are caught. Instance-
|
|
170
|
+
# variable writes (`@foo`) ARE normalized to `:ivasgn` and are
|
|
171
|
+
# refused via the AST walk — see {#scan_assignment_nodes}.
|
|
172
|
+
#
|
|
173
|
+
# Covers plain assignment (`=`) AND op-assign forms (`+=`, `-=`,
|
|
174
|
+
# `*=`, `/=`, `%=`, `**=`, `<<=`, `>>=`, `|=`, `&=`, `^=`, `||=`,
|
|
175
|
+
# `&&=`) — all of which are writes. Excludes the non-assignment
|
|
176
|
+
# `==`, `=~`, `=>` forms via the trailing negative lookahead.
|
|
177
|
+
OP_ASSIGN_SUFFIX = %r{(?:\|\|?|&&?|<<|>>|\*\*?|[-+/%^])?=(?![=~>])}
|
|
178
|
+
private_constant :OP_ASSIGN_SUFFIX
|
|
179
|
+
|
|
180
|
+
CLASS_OR_GLOBAL_VAR_ASSIGNMENT = /
|
|
181
|
+
(?:^|[^\w]) # not mid-identifier
|
|
182
|
+
(@@\w+|\$\w+) # @@cvar or $gvar
|
|
183
|
+
\s*
|
|
184
|
+
#{OP_ASSIGN_SUFFIX.source}
|
|
185
|
+
/x
|
|
186
|
+
private_constant :CLASS_OR_GLOBAL_VAR_ASSIGNMENT
|
|
187
|
+
|
|
188
|
+
# @param code [String]
|
|
189
|
+
# @raise [ForbiddenExpressionError]
|
|
190
|
+
def check!(code)
|
|
191
|
+
raise ForbiddenExpressionError, 'payload is empty' if code.nil? || code.strip.empty?
|
|
192
|
+
|
|
193
|
+
# Fail-safe textual check for backtick literals (` `cmd` ` and
|
|
194
|
+
# `%x{cmd}`) — the AST flavor of these is `:xstr`/`:xstr_heredoc`,
|
|
195
|
+
# which {Woods::Ast::Parser} may normalize differently across
|
|
196
|
+
# Prism/parser-gem backends. A source-level refusal is both cheap
|
|
197
|
+
# and impossible to evade via AST normalization.
|
|
198
|
+
if code.include?('`') || code =~ /%x[{<|!@#(\[]/
|
|
199
|
+
raise ForbiddenExpressionError, 'payload contains a shell-execution literal (backtick or %x)'
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
refuse_class_or_global_var_assignment!(code)
|
|
203
|
+
|
|
204
|
+
tree = parse_or_refuse(code)
|
|
205
|
+
scan_send_nodes(tree)
|
|
206
|
+
scan_const_nodes(tree)
|
|
207
|
+
scan_assignment_nodes(tree)
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
private
|
|
211
|
+
|
|
212
|
+
def parse_or_refuse(code)
|
|
213
|
+
@parser.parse(code)
|
|
214
|
+
rescue Woods::ExtractionError => e
|
|
215
|
+
raise ForbiddenExpressionError, "payload could not be parsed safely: #{e.message}"
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def scan_send_nodes(tree)
|
|
219
|
+
tree.find_all(:send).each do |node|
|
|
220
|
+
refuse_reflection!(node)
|
|
221
|
+
refuse_denied_constant_receiver!(node)
|
|
222
|
+
refuse_denied_constant_in_args!(node)
|
|
223
|
+
refuse_denied_call_chain!(node)
|
|
224
|
+
refuse_credential_file_read!(node)
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def scan_const_nodes(tree)
|
|
229
|
+
tree.find_all(:const).each do |node|
|
|
230
|
+
if DENIED_CONSTANTS.include?(node.method_name.to_s)
|
|
231
|
+
raise ForbiddenExpressionError,
|
|
232
|
+
"payload references denied constant #{node.method_name}"
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
# Refuse `@ivar = …` writes. The embedded `console_eval` path runs
|
|
238
|
+
# inside a throwaway receiver, so a payload setting `@audit_logger
|
|
239
|
+
# = nil` would only affect the throwaway — but we deny the syntactic
|
|
240
|
+
# form anyway as defense-in-depth for any future caller that might
|
|
241
|
+
# hand EvalGuard a payload evaluated in a non-isolated binding.
|
|
242
|
+
def scan_assignment_nodes(tree)
|
|
243
|
+
node = tree.find_all(:ivasgn).first
|
|
244
|
+
return unless node
|
|
245
|
+
|
|
246
|
+
raise ForbiddenExpressionError,
|
|
247
|
+
"payload writes to instance variable #{node.method_name}"
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# Textual refusal for `@@cvar = …` / `$gvar = …`. The parser
|
|
251
|
+
# normalization doesn't distinguish cvasgn / gvasgn today; the
|
|
252
|
+
# source-level scan is the same shape as the backtick check above.
|
|
253
|
+
def refuse_class_or_global_var_assignment!(code)
|
|
254
|
+
return unless (match = CLASS_OR_GLOBAL_VAR_ASSIGNMENT.match(code))
|
|
255
|
+
|
|
256
|
+
raise ForbiddenExpressionError,
|
|
257
|
+
"payload writes to #{match[1]} (class/global variable assignment)"
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def refuse_reflection!(node)
|
|
261
|
+
return unless DENIED_REFLECTION.include?(node.method_name.to_s)
|
|
262
|
+
|
|
263
|
+
raise ForbiddenExpressionError,
|
|
264
|
+
"payload calls reflection method `#{node.method_name}`"
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def refuse_denied_constant_receiver!(node)
|
|
268
|
+
return unless node.receiver && DENIED_CONSTANTS.include?(node.receiver.to_s)
|
|
269
|
+
|
|
270
|
+
raise ForbiddenExpressionError,
|
|
271
|
+
"payload references denied constant #{node.receiver}"
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# Catches `puts ENV` — Prism flattens method-call argument nodes into
|
|
275
|
+
# source-text strings, so a bare ENV passed as an argument never appears
|
|
276
|
+
# as its own :const node. Match it as a whole-word token in arg text.
|
|
277
|
+
def refuse_denied_constant_in_args!(node)
|
|
278
|
+
DENIED_CONSTANTS.each do |const|
|
|
279
|
+
pattern = /\b#{Regexp.escape(const)}\b/
|
|
280
|
+
next unless Array(node.arguments).any? { |arg| arg.to_s.match?(pattern) }
|
|
281
|
+
|
|
282
|
+
raise ForbiddenExpressionError,
|
|
283
|
+
"payload references denied constant #{const}"
|
|
284
|
+
end
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def refuse_denied_call_chain!(node)
|
|
288
|
+
qualified = qualified_call(node)
|
|
289
|
+
DENIED_CALL_CHAINS.each do |chain|
|
|
290
|
+
next unless qualified.include?(chain)
|
|
291
|
+
|
|
292
|
+
raise ForbiddenExpressionError,
|
|
293
|
+
"payload references denied call chain `#{chain}`"
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
def refuse_credential_file_read!(node)
|
|
298
|
+
receiver = node.receiver.to_s
|
|
299
|
+
return unless CREDENTIAL_FILE_READERS.key?(receiver)
|
|
300
|
+
return unless CREDENTIAL_FILE_READERS.fetch(receiver).include?(node.method_name.to_s)
|
|
301
|
+
return unless Array(node.arguments).any? { |arg| credential_path?(arg) }
|
|
302
|
+
|
|
303
|
+
raise ForbiddenExpressionError,
|
|
304
|
+
"payload reads credential file via `#{receiver}.#{node.method_name}`"
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
def qualified_call(node)
|
|
308
|
+
return node.method_name.to_s unless node.receiver
|
|
309
|
+
|
|
310
|
+
"#{node.receiver}.#{node.method_name}"
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def credential_path?(arg_text)
|
|
314
|
+
text = arg_text.to_s
|
|
315
|
+
CREDENTIAL_PATH_HINTS.any? { |hint| text.include?(hint) }
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
end
|
|
319
|
+
end
|
|
@@ -54,11 +54,9 @@ module Woods
|
|
|
54
54
|
#
|
|
55
55
|
# @param model_name [String]
|
|
56
56
|
# @param column_names [Array<String>]
|
|
57
|
-
# @return [true]
|
|
58
57
|
# @raise [ValidationError] if any column is unknown
|
|
59
|
-
def validate_columns!(model_name, column_names)
|
|
58
|
+
def validate_columns!(model_name, column_names)
|
|
60
59
|
column_names.each { |col| validate_column!(model_name, col) }
|
|
61
|
-
true
|
|
62
60
|
end
|
|
63
61
|
|
|
64
62
|
# List all known model names.
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'json'
|
|
4
|
+
require 'woods/observability/structured_logger'
|
|
4
5
|
|
|
5
6
|
module Woods
|
|
6
7
|
module Console
|
|
@@ -10,78 +11,233 @@ module Woods
|
|
|
10
11
|
# and all models are loaded. Uses ActiveRecord connection pool for thread
|
|
11
12
|
# safety under Puma.
|
|
12
13
|
#
|
|
13
|
-
#
|
|
14
|
+
# == Basic setup (Tier 1 tools only)
|
|
15
|
+
#
|
|
16
|
+
# Add to config/application.rb or an initializer:
|
|
17
|
+
#
|
|
14
18
|
# config.middleware.use Woods::Console::RackMiddleware, path: '/mcp/console'
|
|
15
19
|
#
|
|
20
|
+
# This mounts 31 console tools at /mcp/console. By default, console_sql and
|
|
21
|
+
# console_query are blocked in embedded mode and return an "unsupported" error
|
|
22
|
+
# pointing users to enable the flag.
|
|
23
|
+
#
|
|
24
|
+
# == Enabling the feature
|
|
25
|
+
#
|
|
26
|
+
# The Console MCP is disabled by default. Enable it in your Woods initializer:
|
|
27
|
+
#
|
|
28
|
+
# Woods.configure do |config|
|
|
29
|
+
# config.console_mcp_enabled = true
|
|
30
|
+
# config.console_blocked_tables = %w[authorizations credentials]
|
|
31
|
+
# config.console_redacted_columns = %w[api_token password_digest]
|
|
32
|
+
# end
|
|
33
|
+
#
|
|
34
|
+
# With the flag off, requests to the mounted path return 410 Gone so
|
|
35
|
+
# operators can see the endpoint exists but is gated. See
|
|
36
|
+
# docs/CONSOLE_MCP_SETUP.md for the full security posture (blocked tables,
|
|
37
|
+
# credential scanner, column/EAV redaction, SafeContext rollback).
|
|
38
|
+
#
|
|
39
|
+
# == Enabling read tools (console_sql + console_query)
|
|
40
|
+
#
|
|
41
|
+
# Set embedded_read_tools: true to unlock the sql and query tools:
|
|
42
|
+
#
|
|
43
|
+
# # config/initializers/woods_console.rb
|
|
44
|
+
# Rails.application.config.middleware.use \
|
|
45
|
+
# Woods::Console::RackMiddleware,
|
|
46
|
+
# path: '/mcp/console',
|
|
47
|
+
# embedded_read_tools: true
|
|
48
|
+
#
|
|
49
|
+
# Security posture with embedded_read_tools: true:
|
|
50
|
+
#
|
|
51
|
+
# 1. SqlValidator denylist — console_sql rejects INSERT/UPDATE/DELETE/DROP/TRUNCATE/
|
|
52
|
+
# ALTER/CREATE/REPLACE and similar DML/DDL at the string level before any database
|
|
53
|
+
# interaction. Only SELECT and WITH...SELECT are allowed.
|
|
54
|
+
#
|
|
55
|
+
# 2. SafeContext rollback — every request (including console_query) runs inside
|
|
56
|
+
# a database transaction that is always rolled back on completion. Even if a
|
|
57
|
+
# query somehow mutated state (e.g. a function with side effects), the rollback
|
|
58
|
+
# ensures nothing persists.
|
|
59
|
+
#
|
|
60
|
+
# 3. Per-request connection pooling — each HTTP request draws a connection from
|
|
61
|
+
# ActiveRecord::Base's pool and returns it after the response. No shared
|
|
62
|
+
# mutable state leaks between requests.
|
|
63
|
+
#
|
|
64
|
+
# These three layers make embedded_read_tools: true safe for read-only workloads.
|
|
65
|
+
# If your threat model requires stricter isolation, use the bridge mode instead
|
|
66
|
+
# (docs/CONSOLE_MCP_SETUP.md) which runs the executor in a separate process.
|
|
67
|
+
#
|
|
16
68
|
class RackMiddleware
|
|
17
69
|
# @param app [#call] The next Rack app in the middleware stack
|
|
18
70
|
# @param path [String] URL path to mount the MCP endpoint (default: '/mcp/console')
|
|
19
71
|
# @param embedded_read_tools [Boolean] Enable sql/query tools in embedded mode (default: false)
|
|
20
|
-
|
|
72
|
+
# @param unsafe_eval_confirmation [Confirmation, nil] Approval callback for the
|
|
73
|
+
# `console_eval` opt-in. Required when `WOODS_CONSOLE_UNSAFE_EVAL=true` (or
|
|
74
|
+
# `config.console_unsafe_eval_enabled = true`); the server refuses to boot
|
|
75
|
+
# without it. Takes precedence over `config.console_unsafe_eval_confirmation`.
|
|
76
|
+
# @param unsafe_eval_audit_log_path [String, Pathname, nil] JSONL audit log
|
|
77
|
+
# path for every `console_eval` run. Required on the opt-in path. Takes
|
|
78
|
+
# precedence over `config.console_unsafe_eval_audit_log_path`.
|
|
79
|
+
def initialize(app, path: '/mcp/console', embedded_read_tools: false,
|
|
80
|
+
unsafe_eval_confirmation: nil, unsafe_eval_audit_log_path: nil)
|
|
21
81
|
@app = app
|
|
22
82
|
@path = path
|
|
23
83
|
@embedded_read_tools = embedded_read_tools
|
|
84
|
+
@unsafe_eval_confirmation = unsafe_eval_confirmation
|
|
85
|
+
@unsafe_eval_audit_log_path = unsafe_eval_audit_log_path
|
|
24
86
|
@mutex = Mutex.new
|
|
25
87
|
@transport = nil
|
|
26
88
|
end
|
|
27
89
|
|
|
90
|
+
DISABLED_BODY = JSON.generate(
|
|
91
|
+
error: 'woods_console_disabled',
|
|
92
|
+
message: 'Woods Console MCP is disabled. Set ' \
|
|
93
|
+
'Woods.configuration.console_mcp_enabled = true to enable. ' \
|
|
94
|
+
'See docs/CONSOLE_MCP_SETUP.md for the full security posture.'
|
|
95
|
+
).freeze
|
|
96
|
+
|
|
28
97
|
# Rack interface — intercepts requests at the configured path.
|
|
29
98
|
#
|
|
99
|
+
# Returns 410 Gone when Woods.configuration.console_mcp_enabled is false
|
|
100
|
+
# (the default). This keeps the middleware inert on hosts that have
|
|
101
|
+
# mounted it but not yet opted into the feature. All other requests at
|
|
102
|
+
# non-matching paths pass through to the wrapped app unchanged.
|
|
103
|
+
#
|
|
30
104
|
# @param env [Hash] Rack environment
|
|
31
105
|
# @return [Array] Rack response triple
|
|
32
106
|
def call(env)
|
|
33
107
|
return @app.call(env) unless env['PATH_INFO'].start_with?(@path)
|
|
108
|
+
return [410, { 'content-type' => 'application/json' }, [DISABLED_BODY]] unless enabled?
|
|
34
109
|
|
|
35
|
-
|
|
36
|
-
request = Rack::Request.new(env)
|
|
37
|
-
transport.handle_request(request)
|
|
110
|
+
ensure_transport.handle_request(Rack::Request.new(env))
|
|
38
111
|
end
|
|
39
112
|
|
|
40
113
|
private
|
|
41
114
|
|
|
115
|
+
def enabled?
|
|
116
|
+
Woods.configuration.console_mcp_enabled
|
|
117
|
+
end
|
|
118
|
+
|
|
42
119
|
# Thread-safe lazy initialization of the MCP server and transport.
|
|
43
120
|
#
|
|
44
|
-
# @return [MCP::Server::Transports::StreamableHTTPTransport]
|
|
45
|
-
def ensure_transport
|
|
121
|
+
# @return [::MCP::Server::Transports::StreamableHTTPTransport]
|
|
122
|
+
def ensure_transport
|
|
46
123
|
return @transport if @transport
|
|
47
124
|
|
|
48
125
|
@mutex.synchronize do
|
|
49
126
|
return @transport if @transport
|
|
50
127
|
|
|
51
|
-
|
|
128
|
+
check_blocked_tables_config!
|
|
52
129
|
|
|
130
|
+
require 'woods/console/server'
|
|
53
131
|
Rails.application.eager_load!
|
|
54
132
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
133
|
+
server = build_embedded_server
|
|
134
|
+
@transport = ::MCP::Server::Transports::StreamableHTTPTransport.new(server)
|
|
135
|
+
server.transport = @transport
|
|
136
|
+
@transport
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Emit a prominent warning (or raise in production) when the Console MCP
|
|
141
|
+
# is enabled but no tables are blocked. An empty block list means Layer 1
|
|
142
|
+
# of the defense stack is fully inactive — every table in the database is
|
|
143
|
+
# reachable via console_sql, console_query, and the model tools.
|
|
144
|
+
#
|
|
145
|
+
# Remediation:
|
|
146
|
+
# Woods.configure { |c| c.console_blocked_tables =
|
|
147
|
+
# Woods::DEFAULT_CONSOLE_BLOCKED_TABLES + %w[your_sensitive_table] }
|
|
148
|
+
#
|
|
149
|
+
# @raise [Woods::ConfigurationError] in production environments
|
|
150
|
+
def check_blocked_tables_config!
|
|
151
|
+
return unless Woods.configuration.console_blocked_tables.empty?
|
|
58
152
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
153
|
+
message =
|
|
154
|
+
'[Woods Console] console_blocked_tables is empty — Layer 1 (table gate) is INACTIVE. ' \
|
|
155
|
+
'All tables are reachable via the Console MCP. ' \
|
|
156
|
+
'Set console_blocked_tables in your Woods initializer to restrict access. ' \
|
|
157
|
+
'Example: Woods.configure { |c| c.console_blocked_tables = ' \
|
|
158
|
+
'Woods::DEFAULT_CONSOLE_BLOCKED_TABLES + %w[your_table] }'
|
|
63
159
|
|
|
64
|
-
|
|
160
|
+
raise Woods::ConfigurationError, message if defined?(Rails) && Rails.env.production?
|
|
65
161
|
|
|
66
|
-
|
|
67
|
-
|
|
162
|
+
warn message
|
|
163
|
+
end
|
|
68
164
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
165
|
+
# Build the embedded MCP server. SafeContext is given the writing
|
|
166
|
+
# connection pool (not a connection) so each request leases a fresh
|
|
167
|
+
# connection via `pool.with_connection { ... }` and returns it to the
|
|
168
|
+
# pool when the rolled-back transaction completes. Capturing a
|
|
169
|
+
# connection at build time would leak it out of its lease and pin it
|
|
170
|
+
# for the lifetime of the process.
|
|
171
|
+
#
|
|
172
|
+
# Multi-DB / sharded hosts are still served from the writing pool
|
|
173
|
+
# only — extending SafeContext to route per role/shard is tracked as
|
|
174
|
+
# `WOODS-CONSOLE-PERREQ-CONN`.
|
|
175
|
+
def build_embedded_server
|
|
176
|
+
config = Woods.configuration
|
|
177
|
+
introspection = build_model_introspection
|
|
178
|
+
Server.build_embedded(
|
|
179
|
+
model_validator: ModelValidator.new(registry: introspection[:registry]),
|
|
180
|
+
safe_context: SafeContext.new(pool: ActiveRecord::Base.connection_pool),
|
|
181
|
+
redacted_columns: Array(config&.console_redacted_columns),
|
|
182
|
+
redacted_key_values: Array(config&.console_redacted_key_values),
|
|
183
|
+
read_tools_enabled: @embedded_read_tools,
|
|
184
|
+
model_tables: introspection[:tables],
|
|
185
|
+
model_reflections: introspection[:reflections],
|
|
186
|
+
unsafe_eval_confirmation: @unsafe_eval_confirmation,
|
|
187
|
+
unsafe_eval_audit_log_path: @unsafe_eval_audit_log_path
|
|
188
|
+
)
|
|
189
|
+
end
|
|
72
190
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
191
|
+
# Walk ActiveRecord::Base.descendants once and collect the registry,
|
|
192
|
+
# table map, and reflection map in a single pass. Models that raise
|
|
193
|
+
# during introspection are silently skipped — same semantics as the
|
|
194
|
+
# three separate methods this replaces.
|
|
195
|
+
#
|
|
196
|
+
# Map every model to its association-name → target-table registry so
|
|
197
|
+
# TableGate can resolve `joins:` / `association:` arguments before the
|
|
198
|
+
# executor loads data. Polymorphic associations and anything that
|
|
199
|
+
# raises during reflection are skipped gracefully.
|
|
200
|
+
#
|
|
201
|
+
# @return [Hash] frozen hash with keys :registry, :tables, :reflections
|
|
202
|
+
def build_model_introspection
|
|
203
|
+
registry = {}
|
|
204
|
+
tables = {}
|
|
205
|
+
reflections = {}
|
|
206
|
+
|
|
207
|
+
ActiveRecord::Base.descendants.each do |model|
|
|
208
|
+
next if model.abstract_class?
|
|
209
|
+
next unless model.table_exists?
|
|
210
|
+
|
|
211
|
+
registry[model.name] = model.column_names
|
|
212
|
+
tables[model.name] = model.table_name
|
|
213
|
+
reflections[model.name] = reflections_for(model)
|
|
214
|
+
rescue StandardError => e
|
|
215
|
+
structured_logger.debug(
|
|
216
|
+
'console.model_introspection.skipped',
|
|
217
|
+
model: model.name,
|
|
218
|
+
error_class: e.class.name,
|
|
219
|
+
error_message: e.message
|
|
78
220
|
)
|
|
221
|
+
next
|
|
222
|
+
end
|
|
79
223
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
224
|
+
{ registry: registry, tables: tables, reflections: reflections }.freeze
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
def reflections_for(model)
|
|
228
|
+
model.reflect_on_all_associations.each_with_object({}) do |reflection, assoc_map|
|
|
229
|
+
next if reflection.polymorphic?
|
|
230
|
+
|
|
231
|
+
klass = reflection.klass
|
|
232
|
+
assoc_map[reflection.name.to_s] = klass.table_name if klass.respond_to?(:table_name)
|
|
233
|
+
rescue StandardError
|
|
234
|
+
next
|
|
83
235
|
end
|
|
84
236
|
end
|
|
237
|
+
|
|
238
|
+
def structured_logger
|
|
239
|
+
@structured_logger ||= Woods::Observability::StructuredLogger.new
|
|
240
|
+
end
|
|
85
241
|
end
|
|
86
242
|
end
|
|
87
243
|
end
|