woods 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +186 -0
  3. data/README.md +20 -8
  4. data/exe/woods-console +51 -6
  5. data/exe/woods-console-mcp +24 -4
  6. data/exe/woods-mcp +30 -7
  7. data/exe/woods-mcp-http +47 -6
  8. data/lib/generators/woods/install_generator.rb +13 -4
  9. data/lib/generators/woods/templates/woods.rb.tt +155 -0
  10. data/lib/tasks/woods.rake +69 -50
  11. data/lib/woods/builder.rb +174 -9
  12. data/lib/woods/cache/cache_middleware.rb +360 -31
  13. data/lib/woods/chunking/semantic_chunker.rb +334 -7
  14. data/lib/woods/console/adapters/job_adapter.rb +10 -4
  15. data/lib/woods/console/audit_logger.rb +76 -4
  16. data/lib/woods/console/bridge.rb +48 -15
  17. data/lib/woods/console/bridge_protocol.rb +44 -0
  18. data/lib/woods/console/confirmation.rb +3 -4
  19. data/lib/woods/console/console_response_renderer.rb +56 -18
  20. data/lib/woods/console/credential_index.rb +201 -0
  21. data/lib/woods/console/credential_scanner.rb +302 -0
  22. data/lib/woods/console/dispatch_pipeline.rb +138 -0
  23. data/lib/woods/console/embedded_executor.rb +682 -35
  24. data/lib/woods/console/eval_guard.rb +319 -0
  25. data/lib/woods/console/model_validator.rb +1 -3
  26. data/lib/woods/console/rack_middleware.rb +185 -29
  27. data/lib/woods/console/redactor.rb +161 -0
  28. data/lib/woods/console/response_context.rb +127 -0
  29. data/lib/woods/console/safe_context.rb +220 -23
  30. data/lib/woods/console/scope_predicate_parser.rb +131 -0
  31. data/lib/woods/console/server.rb +417 -486
  32. data/lib/woods/console/sql_noise_stripper.rb +87 -0
  33. data/lib/woods/console/sql_table_scanner.rb +213 -0
  34. data/lib/woods/console/sql_validator.rb +81 -31
  35. data/lib/woods/console/table_gate.rb +93 -0
  36. data/lib/woods/console/tool_specs.rb +552 -0
  37. data/lib/woods/console/tools/tier1.rb +3 -3
  38. data/lib/woods/console/tools/tier4.rb +7 -1
  39. data/lib/woods/dependency_graph.rb +66 -7
  40. data/lib/woods/embedding/indexer.rb +190 -6
  41. data/lib/woods/embedding/openai.rb +40 -4
  42. data/lib/woods/embedding/provider.rb +104 -8
  43. data/lib/woods/embedding/text_preparer.rb +23 -3
  44. data/lib/woods/embedding/token_counter.rb +133 -0
  45. data/lib/woods/evaluation/baseline_runner.rb +20 -2
  46. data/lib/woods/evaluation/metrics.rb +4 -1
  47. data/lib/woods/extracted_unit.rb +1 -0
  48. data/lib/woods/extractor.rb +7 -1
  49. data/lib/woods/extractors/controller_extractor.rb +6 -0
  50. data/lib/woods/extractors/mailer_extractor.rb +16 -2
  51. data/lib/woods/extractors/model_extractor.rb +6 -1
  52. data/lib/woods/extractors/phlex_extractor.rb +13 -4
  53. data/lib/woods/extractors/rails_source_extractor.rb +2 -0
  54. data/lib/woods/extractors/route_helper_resolver.rb +130 -0
  55. data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
  56. data/lib/woods/extractors/view_component_extractor.rb +12 -1
  57. data/lib/woods/extractors/view_engines/base.rb +141 -0
  58. data/lib/woods/extractors/view_engines/erb.rb +145 -0
  59. data/lib/woods/extractors/view_template_extractor.rb +92 -133
  60. data/lib/woods/flow_assembler.rb +23 -15
  61. data/lib/woods/flow_precomputer.rb +21 -2
  62. data/lib/woods/graph_analyzer.rb +210 -0
  63. data/lib/woods/index_artifact.rb +173 -0
  64. data/lib/woods/mcp/bearer_auth.rb +45 -0
  65. data/lib/woods/mcp/bootstrap_state.rb +94 -0
  66. data/lib/woods/mcp/bootstrapper.rb +337 -16
  67. data/lib/woods/mcp/config_resolver.rb +288 -0
  68. data/lib/woods/mcp/errors.rb +134 -0
  69. data/lib/woods/mcp/index_reader.rb +265 -30
  70. data/lib/woods/mcp/origin_guard.rb +132 -0
  71. data/lib/woods/mcp/provider_probe.rb +166 -0
  72. data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
  73. data/lib/woods/mcp/renderers/markdown_renderer.rb +100 -3
  74. data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
  75. data/lib/woods/mcp/server.rb +771 -137
  76. data/lib/woods/model_name_cache.rb +78 -2
  77. data/lib/woods/notion/client.rb +25 -2
  78. data/lib/woods/notion/mappers/model_mapper.rb +36 -2
  79. data/lib/woods/railtie.rb +55 -15
  80. data/lib/woods/resilience/circuit_breaker.rb +9 -2
  81. data/lib/woods/resilience/retryable_provider.rb +40 -3
  82. data/lib/woods/resolved_config.rb +299 -0
  83. data/lib/woods/retrieval/context_assembler.rb +112 -5
  84. data/lib/woods/retrieval/query_classifier.rb +1 -1
  85. data/lib/woods/retrieval/ranker.rb +55 -6
  86. data/lib/woods/retrieval/search_executor.rb +42 -13
  87. data/lib/woods/retriever.rb +330 -24
  88. data/lib/woods/session_tracer/middleware.rb +35 -1
  89. data/lib/woods/storage/graph_store.rb +39 -0
  90. data/lib/woods/storage/inapplicable_backend.rb +14 -0
  91. data/lib/woods/storage/metadata_store.rb +129 -1
  92. data/lib/woods/storage/pgvector.rb +70 -8
  93. data/lib/woods/storage/qdrant.rb +196 -5
  94. data/lib/woods/storage/snapshotter/metadata.rb +172 -0
  95. data/lib/woods/storage/snapshotter/vector.rb +238 -0
  96. data/lib/woods/storage/snapshotter.rb +24 -0
  97. data/lib/woods/storage/vector_store.rb +184 -35
  98. data/lib/woods/tasks.rb +85 -0
  99. data/lib/woods/temporal/snapshot_store.rb +49 -1
  100. data/lib/woods/token_utils.rb +44 -5
  101. data/lib/woods/unblocked/client.rb +163 -0
  102. data/lib/woods/unblocked/document_builder.rb +326 -0
  103. data/lib/woods/unblocked/exporter.rb +201 -0
  104. data/lib/woods/unblocked/rate_limiter.rb +94 -0
  105. data/lib/woods/util/host_guard.rb +61 -0
  106. data/lib/woods/version.rb +1 -1
  107. data/lib/woods.rb +130 -6
  108. metadata +73 -4
@@ -0,0 +1,319 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../ast/parser'
4
+
5
+ # @see Woods
6
+ module Woods
7
+ class Error < StandardError; end unless defined?(Woods::Error)
8
+
9
+ module Console
10
+ # Raised when EvalGuard rejects a `console_eval` payload.
11
+ class ForbiddenExpressionError < Woods::Error; end
12
+
13
+ # Parse-time refusal layer for `console_eval`.
14
+ #
15
+ # ## Reachability (v0.2)
16
+ #
17
+ # EvalGuard is the first of five controls on the embedded `console_eval`
18
+ # opt-in path. `EmbeddedExecutor#handle_eval` calls `check!` before
19
+ # anything else — ahead of the Confirmation prompt, the SafeContext
20
+ # rollback, the timeout, and the audit log. When the opt-in is off
21
+ # (the default), `refusal_for('eval')` still short-circuits with the
22
+ # `eval_disabled` payload and this guard is not reached. See
23
+ # docs/CONSOLE_MCP_SETUP.md "console_eval opt-in" and backlog B-053.
24
+ #
25
+ # Bridge-process mode (in development) will call the same guard before
26
+ # shipping the payload to the remote Rails worker.
27
+ #
28
+ # ## Behaviour
29
+ #
30
+ # Walks the normalized {Woods::Ast::Parser} tree of the proposed Ruby
31
+ # snippet and refuses any expression that reaches a known credential or
32
+ # reflection escape — so an LLM-generated `Rails.application.credentials
33
+ # .stripe.secret_key` or a reflection escape is rejected before the bridge
34
+ # ever sees it.
35
+ #
36
+ # This is defense in depth, not the only line: the bridge process must
37
+ # re-enforce the same rules at execution time. The gem-side check exists
38
+ # so the LLM sees a fast, visible refusal instead of relying on the host
39
+ # app's bridge configuration.
40
+ #
41
+ # @example
42
+ # EvalGuard.check!('User.count') # => true
43
+ # EvalGuard.check!('Rails.application.credentials.stripe.key') # raises
44
+ #
45
+ class EvalGuard # rubocop:disable Metrics/ClassLength
46
+ # Receivers/calls whose presence in the AST is always a refusal.
47
+ # Each entry is matched against the dotted source text of every send
48
+ # node's receiver (and qualified call name) — so a denial of
49
+ # `Rails.application.credentials` catches every chained access through it
50
+ # (e.g. `Rails.application.credentials.dig(:stripe)`).
51
+ DENIED_CALL_CHAINS = %w[
52
+ Rails.application.credentials
53
+ Rails.application.secrets
54
+ Rails::Secrets
55
+ Devise.secret_key
56
+ ].freeze
57
+
58
+ # Constants whose bare reference (or use as a receiver) is denied.
59
+ #
60
+ # - `ENV` — reads host secrets as a string-keyed hash.
61
+ # - Threading: `Thread`, `Fiber`, `Ractor`, `Process` — concurrent
62
+ # execution escapes the rolled-back transaction (the spawned block
63
+ # leases its own connection outside SafeContext's tx).
64
+ # - Deserialization: `Marshal`, `YAML`, `Psych` — unsafe load paths
65
+ # can execute arbitrary code during object instantiation.
66
+ # - Network: `Net`, `Socket`, `TCPSocket`, `UDPSocket`, `URI`,
67
+ # `OpenURI`, `Resolv`, `Faraday`, `HTTP` — every HTTP/network egress
68
+ # point available in a standard Rails install.
69
+ # - File I/O: `File`, `FileUtils`, `IO`, `Dir`, `Pathname`,
70
+ # `Tempfile`, `StringIO`, `BasicObject` — broad filesystem access.
71
+ # - Kernel-ish: `Kernel`, `Object`, `ObjectSpace`, `GC`,
72
+ # `RubyVM`, `TracePoint`, `Gem`, `Bundler`.
73
+ # File/IO/Pathname are intentionally NOT in this list — legitimate
74
+ # non-credential file reads are a core use case. Credential-path
75
+ # access is handled by CREDENTIAL_FILE_READERS below, and shell-exec
76
+ # attempts (`Kernel.open("|cmd")`, backticks, `%x{}`) are caught by
77
+ # the backtick textual check in #check! and the DENIED_REFLECTION
78
+ # entries for `system`/`exec`/`popen`/etc.
79
+ DENIED_CONSTANTS = %w[
80
+ ENV
81
+ Thread Fiber Ractor Process Mutex ConditionVariable Queue SizedQueue
82
+ Marshal YAML Psych
83
+ Net Socket TCPSocket UDPSocket UNIXSocket URI OpenURI Resolv Faraday HTTP
84
+ ObjectSpace GC RubyVM TracePoint
85
+ Gem Bundler
86
+ ].freeze
87
+
88
+ # Method names that escape the AST sandbox regardless of receiver.
89
+ #
90
+ # Covers, in order:
91
+ # - Eval family: the classic `eval`/`instance_eval`/`class_eval`/
92
+ # `module_eval` plus `binding` (which enables reconstructing an eval
93
+ # in the caller's scope).
94
+ # - Dynamic dispatch: `send` / `public_send` / `__send__` / `method` /
95
+ # `public_method` (returns a callable, indirect dispatch) and the
96
+ # `const_get` / `const_set` / `remove_const` / `define_method` /
97
+ # `define_singleton_method` / `alias_method` / `undef_method` /
98
+ # `remove_method` / `method_defined?` / `prepend` / `include_module`
99
+ # reflection family.
100
+ # - State mutation: `instance_variable_set` / `instance_variable_get`,
101
+ # `class_variable_set` / `class_variable_get` / `freeze` / `taint`.
102
+ # - Object-space escapes: `_id2ref`, `each_object`, `const_source_location`.
103
+ # - System / process: `system`, `exec`, `spawn`, `fork`, `popen`, `%x{}`
104
+ # (AST method name `backtick` / xstr) so they can't be invoked
105
+ # implicitly.
106
+ # - File / IO: `open` (bare Kernel#open — the File-specific reader is
107
+ # handled separately via CREDENTIAL_FILE_READERS, but the bare
108
+ # `Kernel.open("|shell-command")` form is how most shellshock-style
109
+ # escapes slip through).
110
+ # - Network: `URI.open` (when called as `open` on URI, the AST method
111
+ # name is `open` so the string match above catches it). HTTP / Socket
112
+ # constants are denied separately via DENIED_CONSTANTS.
113
+ # - Loader: `load`, `require`, `require_relative`, `autoload`.
114
+ # - Unsafe deserialization: `unsafe_load` / `_load` (Marshal.load and
115
+ # YAML.load are denied via DENIED_CONSTANTS + method gate below).
116
+ # - Threading escapes from SafeContext's rollback: `new` on Thread /
117
+ # Fiber / Process is denied via DENIED_CONSTANTS so the
118
+ # {Kernel.fork, Thread.new} pair can't slip past.
119
+ DENIED_REFLECTION = %w[
120
+ eval instance_eval class_eval module_eval binding
121
+ instance_exec class_exec module_exec
122
+ send public_send __send__ method public_method
123
+ const_get const_set remove_const define_method define_singleton_method
124
+ alias_method undef_method remove_method method_defined? singleton_method
125
+ instance_variable_get instance_variable_set
126
+ class_variable_get class_variable_set
127
+ _id2ref each_object const_source_location instance_variables
128
+ prepend include_module
129
+ system exec spawn fork popen popen2 popen2e popen3 backtick
130
+ require require_relative autoload
131
+ unsafe_load _load
132
+ taint untaint
133
+ ].freeze
134
+
135
+ # Receivers + method-name pairs that read credential files from disk.
136
+ # Triggers when the receiver matches AND any literal argument source
137
+ # contains a known credential path fragment. `Pathname.new(...)` is
138
+ # included so `Pathname.new(...).read` chains are caught at construction.
139
+ #
140
+ # `open` is included for File and IO to catch chained patterns like
141
+ # `File.open("config/master.key").read` — the inner `File.open(path)`
142
+ # node is visited by `scan_send_nodes` and refused here before the
143
+ # outer `.read` call is even examined (PR #34 review medium #3).
144
+ CREDENTIAL_FILE_READERS = {
145
+ 'File' => %w[read binread readlines open],
146
+ 'IO' => %w[read binread readlines open],
147
+ 'Pathname' => %w[read binread new open]
148
+ }.freeze
149
+ CREDENTIAL_PATH_HINTS = %w[
150
+ master.key credentials.yml.enc credentials/
151
+ secrets.yml secrets.yml.enc
152
+ ].freeze
153
+
154
+ class << self
155
+ # @param code [String] Ruby source proposed for `console_eval`.
156
+ # @raise [ForbiddenExpressionError] on any denial or parse failure.
157
+ def check!(code)
158
+ new.check!(code)
159
+ end
160
+ end
161
+
162
+ def initialize(parser: Woods::Ast::Parser.new)
163
+ @parser = parser
164
+ end
165
+
166
+ # Textual token for class-variable (`@@foo`) and global-variable
167
+ # (`$foo`) writes. {Woods::Ast::Parser} doesn't normalize cvasgn /
168
+ # gvasgn to a dedicated node type, so we catch them at the source
169
+ # level the same way shell-execution literals are caught. Instance-
170
+ # variable writes (`@foo`) ARE normalized to `:ivasgn` and are
171
+ # refused via the AST walk — see {#scan_assignment_nodes}.
172
+ #
173
+ # Covers plain assignment (`=`) AND op-assign forms (`+=`, `-=`,
174
+ # `*=`, `/=`, `%=`, `**=`, `<<=`, `>>=`, `|=`, `&=`, `^=`, `||=`,
175
+ # `&&=`) — all of which are writes. Excludes the non-assignment
176
+ # `==`, `=~`, `=>` forms via the trailing negative lookahead.
177
+ OP_ASSIGN_SUFFIX = %r{(?:\|\|?|&&?|<<|>>|\*\*?|[-+/%^])?=(?![=~>])}
178
+ private_constant :OP_ASSIGN_SUFFIX
179
+
180
+ CLASS_OR_GLOBAL_VAR_ASSIGNMENT = /
181
+ (?:^|[^\w]) # not mid-identifier
182
+ (@@\w+|\$\w+) # @@cvar or $gvar
183
+ \s*
184
+ #{OP_ASSIGN_SUFFIX.source}
185
+ /x
186
+ private_constant :CLASS_OR_GLOBAL_VAR_ASSIGNMENT
187
+
188
+ # @param code [String]
189
+ # @raise [ForbiddenExpressionError]
190
+ def check!(code)
191
+ raise ForbiddenExpressionError, 'payload is empty' if code.nil? || code.strip.empty?
192
+
193
+ # Fail-safe textual check for backtick literals (` `cmd` ` and
194
+ # `%x{cmd}`) — the AST flavor of these is `:xstr`/`:xstr_heredoc`,
195
+ # which {Woods::Ast::Parser} may normalize differently across
196
+ # Prism/parser-gem backends. A source-level refusal is both cheap
197
+ # and impossible to evade via AST normalization.
198
+ if code.include?('`') || code =~ /%x[{<|!@#(\[]/
199
+ raise ForbiddenExpressionError, 'payload contains a shell-execution literal (backtick or %x)'
200
+ end
201
+
202
+ refuse_class_or_global_var_assignment!(code)
203
+
204
+ tree = parse_or_refuse(code)
205
+ scan_send_nodes(tree)
206
+ scan_const_nodes(tree)
207
+ scan_assignment_nodes(tree)
208
+ end
209
+
210
+ private
211
+
212
+ def parse_or_refuse(code)
213
+ @parser.parse(code)
214
+ rescue Woods::ExtractionError => e
215
+ raise ForbiddenExpressionError, "payload could not be parsed safely: #{e.message}"
216
+ end
217
+
218
+ def scan_send_nodes(tree)
219
+ tree.find_all(:send).each do |node|
220
+ refuse_reflection!(node)
221
+ refuse_denied_constant_receiver!(node)
222
+ refuse_denied_constant_in_args!(node)
223
+ refuse_denied_call_chain!(node)
224
+ refuse_credential_file_read!(node)
225
+ end
226
+ end
227
+
228
+ def scan_const_nodes(tree)
229
+ tree.find_all(:const).each do |node|
230
+ if DENIED_CONSTANTS.include?(node.method_name.to_s)
231
+ raise ForbiddenExpressionError,
232
+ "payload references denied constant #{node.method_name}"
233
+ end
234
+ end
235
+ end
236
+
237
+ # Refuse `@ivar = …` writes. The embedded `console_eval` path runs
238
+ # inside a throwaway receiver, so a payload setting `@audit_logger
239
+ # = nil` would only affect the throwaway — but we deny the syntactic
240
+ # form anyway as defense-in-depth for any future caller that might
241
+ # hand EvalGuard a payload evaluated in a non-isolated binding.
242
+ def scan_assignment_nodes(tree)
243
+ node = tree.find_all(:ivasgn).first
244
+ return unless node
245
+
246
+ raise ForbiddenExpressionError,
247
+ "payload writes to instance variable #{node.method_name}"
248
+ end
249
+
250
+ # Textual refusal for `@@cvar = …` / `$gvar = …`. The parser
251
+ # normalization doesn't distinguish cvasgn / gvasgn today; the
252
+ # source-level scan is the same shape as the backtick check above.
253
+ def refuse_class_or_global_var_assignment!(code)
254
+ return unless (match = CLASS_OR_GLOBAL_VAR_ASSIGNMENT.match(code))
255
+
256
+ raise ForbiddenExpressionError,
257
+ "payload writes to #{match[1]} (class/global variable assignment)"
258
+ end
259
+
260
+ def refuse_reflection!(node)
261
+ return unless DENIED_REFLECTION.include?(node.method_name.to_s)
262
+
263
+ raise ForbiddenExpressionError,
264
+ "payload calls reflection method `#{node.method_name}`"
265
+ end
266
+
267
+ def refuse_denied_constant_receiver!(node)
268
+ return unless node.receiver && DENIED_CONSTANTS.include?(node.receiver.to_s)
269
+
270
+ raise ForbiddenExpressionError,
271
+ "payload references denied constant #{node.receiver}"
272
+ end
273
+
274
+ # Catches `puts ENV` — Prism flattens method-call argument nodes into
275
+ # source-text strings, so a bare ENV passed as an argument never appears
276
+ # as its own :const node. Match it as a whole-word token in arg text.
277
+ def refuse_denied_constant_in_args!(node)
278
+ DENIED_CONSTANTS.each do |const|
279
+ pattern = /\b#{Regexp.escape(const)}\b/
280
+ next unless Array(node.arguments).any? { |arg| arg.to_s.match?(pattern) }
281
+
282
+ raise ForbiddenExpressionError,
283
+ "payload references denied constant #{const}"
284
+ end
285
+ end
286
+
287
+ def refuse_denied_call_chain!(node)
288
+ qualified = qualified_call(node)
289
+ DENIED_CALL_CHAINS.each do |chain|
290
+ next unless qualified.include?(chain)
291
+
292
+ raise ForbiddenExpressionError,
293
+ "payload references denied call chain `#{chain}`"
294
+ end
295
+ end
296
+
297
+ def refuse_credential_file_read!(node)
298
+ receiver = node.receiver.to_s
299
+ return unless CREDENTIAL_FILE_READERS.key?(receiver)
300
+ return unless CREDENTIAL_FILE_READERS.fetch(receiver).include?(node.method_name.to_s)
301
+ return unless Array(node.arguments).any? { |arg| credential_path?(arg) }
302
+
303
+ raise ForbiddenExpressionError,
304
+ "payload reads credential file via `#{receiver}.#{node.method_name}`"
305
+ end
306
+
307
+ def qualified_call(node)
308
+ return node.method_name.to_s unless node.receiver
309
+
310
+ "#{node.receiver}.#{node.method_name}"
311
+ end
312
+
313
+ def credential_path?(arg_text)
314
+ text = arg_text.to_s
315
+ CREDENTIAL_PATH_HINTS.any? { |hint| text.include?(hint) }
316
+ end
317
+ end
318
+ end
319
+ end
@@ -54,11 +54,9 @@ module Woods
54
54
  #
55
55
  # @param model_name [String]
56
56
  # @param column_names [Array<String>]
57
- # @return [true]
58
57
  # @raise [ValidationError] if any column is unknown
59
- def validate_columns!(model_name, column_names) # rubocop:disable Naming/PredicateMethod
58
+ def validate_columns!(model_name, column_names)
60
59
  column_names.each { |col| validate_column!(model_name, col) }
61
- true
62
60
  end
63
61
 
64
62
  # List all known model names.
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'json'
4
+ require 'woods/observability/structured_logger'
4
5
 
5
6
  module Woods
6
7
  module Console
@@ -10,78 +11,233 @@ module Woods
10
11
  # and all models are loaded. Uses ActiveRecord connection pool for thread
11
12
  # safety under Puma.
12
13
  #
13
- # @example In config/application.rb or an initializer:
14
+ # == Basic setup (Tier 1 tools only)
15
+ #
16
+ # Add to config/application.rb or an initializer:
17
+ #
14
18
  # config.middleware.use Woods::Console::RackMiddleware, path: '/mcp/console'
15
19
  #
20
+ # This mounts 31 console tools at /mcp/console. By default, console_sql and
21
+ # console_query are blocked in embedded mode and return an "unsupported" error
22
+ # pointing users to enable the flag.
23
+ #
24
+ # == Enabling the feature
25
+ #
26
+ # The Console MCP is disabled by default. Enable it in your Woods initializer:
27
+ #
28
+ # Woods.configure do |config|
29
+ # config.console_mcp_enabled = true
30
+ # config.console_blocked_tables = %w[authorizations credentials]
31
+ # config.console_redacted_columns = %w[api_token password_digest]
32
+ # end
33
+ #
34
+ # With the flag off, requests to the mounted path return 410 Gone so
35
+ # operators can see the endpoint exists but is gated. See
36
+ # docs/CONSOLE_MCP_SETUP.md for the full security posture (blocked tables,
37
+ # credential scanner, column/EAV redaction, SafeContext rollback).
38
+ #
39
+ # == Enabling read tools (console_sql + console_query)
40
+ #
41
+ # Set embedded_read_tools: true to unlock the sql and query tools:
42
+ #
43
+ # # config/initializers/woods_console.rb
44
+ # Rails.application.config.middleware.use \
45
+ # Woods::Console::RackMiddleware,
46
+ # path: '/mcp/console',
47
+ # embedded_read_tools: true
48
+ #
49
+ # Security posture with embedded_read_tools: true:
50
+ #
51
+ # 1. SqlValidator denylist — console_sql rejects INSERT/UPDATE/DELETE/DROP/TRUNCATE/
52
+ # ALTER/CREATE/REPLACE and similar DML/DDL at the string level before any database
53
+ # interaction. Only SELECT and WITH...SELECT are allowed.
54
+ #
55
+ # 2. SafeContext rollback — every request (including console_query) runs inside
56
+ # a database transaction that is always rolled back on completion. Even if a
57
+ # query somehow mutated state (e.g. a function with side effects), the rollback
58
+ # ensures nothing persists.
59
+ #
60
+ # 3. Per-request connection pooling — each HTTP request draws a connection from
61
+ # ActiveRecord::Base's pool and returns it after the response. No shared
62
+ # mutable state leaks between requests.
63
+ #
64
+ # These three layers make embedded_read_tools: true safe for read-only workloads.
65
+ # If your threat model requires stricter isolation, use the bridge mode instead
66
+ # (docs/CONSOLE_MCP_SETUP.md) which runs the executor in a separate process.
67
+ #
16
68
  class RackMiddleware
17
69
  # @param app [#call] The next Rack app in the middleware stack
18
70
  # @param path [String] URL path to mount the MCP endpoint (default: '/mcp/console')
19
71
  # @param embedded_read_tools [Boolean] Enable sql/query tools in embedded mode (default: false)
20
- def initialize(app, path: '/mcp/console', embedded_read_tools: false)
72
+ # @param unsafe_eval_confirmation [Confirmation, nil] Approval callback for the
73
+ # `console_eval` opt-in. Required when `WOODS_CONSOLE_UNSAFE_EVAL=true` (or
74
+ # `config.console_unsafe_eval_enabled = true`); the server refuses to boot
75
+ # without it. Takes precedence over `config.console_unsafe_eval_confirmation`.
76
+ # @param unsafe_eval_audit_log_path [String, Pathname, nil] JSONL audit log
77
+ # path for every `console_eval` run. Required on the opt-in path. Takes
78
+ # precedence over `config.console_unsafe_eval_audit_log_path`.
79
+ def initialize(app, path: '/mcp/console', embedded_read_tools: false,
80
+ unsafe_eval_confirmation: nil, unsafe_eval_audit_log_path: nil)
21
81
  @app = app
22
82
  @path = path
23
83
  @embedded_read_tools = embedded_read_tools
84
+ @unsafe_eval_confirmation = unsafe_eval_confirmation
85
+ @unsafe_eval_audit_log_path = unsafe_eval_audit_log_path
24
86
  @mutex = Mutex.new
25
87
  @transport = nil
26
88
  end
27
89
 
90
+ DISABLED_BODY = JSON.generate(
91
+ error: 'woods_console_disabled',
92
+ message: 'Woods Console MCP is disabled. Set ' \
93
+ 'Woods.configuration.console_mcp_enabled = true to enable. ' \
94
+ 'See docs/CONSOLE_MCP_SETUP.md for the full security posture.'
95
+ ).freeze
96
+
28
97
  # Rack interface — intercepts requests at the configured path.
29
98
  #
99
+ # Returns 410 Gone when Woods.configuration.console_mcp_enabled is false
100
+ # (the default). This keeps the middleware inert on hosts that have
101
+ # mounted it but not yet opted into the feature. All other requests at
102
+ # non-matching paths pass through to the wrapped app unchanged.
103
+ #
30
104
  # @param env [Hash] Rack environment
31
105
  # @return [Array] Rack response triple
32
106
  def call(env)
33
107
  return @app.call(env) unless env['PATH_INFO'].start_with?(@path)
108
+ return [410, { 'content-type' => 'application/json' }, [DISABLED_BODY]] unless enabled?
34
109
 
35
- transport = ensure_transport
36
- request = Rack::Request.new(env)
37
- transport.handle_request(request)
110
+ ensure_transport.handle_request(Rack::Request.new(env))
38
111
  end
39
112
 
40
113
  private
41
114
 
115
+ def enabled?
116
+ Woods.configuration.console_mcp_enabled
117
+ end
118
+
42
119
  # Thread-safe lazy initialization of the MCP server and transport.
43
120
  #
44
- # @return [MCP::Server::Transports::StreamableHTTPTransport]
45
- def ensure_transport # rubocop:disable Metrics/MethodLength
121
+ # @return [::MCP::Server::Transports::StreamableHTTPTransport]
122
+ def ensure_transport
46
123
  return @transport if @transport
47
124
 
48
125
  @mutex.synchronize do
49
126
  return @transport if @transport
50
127
 
51
- require 'woods/console/server'
128
+ check_blocked_tables_config!
52
129
 
130
+ require 'woods/console/server'
53
131
  Rails.application.eager_load!
54
132
 
55
- registry = ActiveRecord::Base.descendants.each_with_object({}) do |model, hash|
56
- next if model.abstract_class?
57
- next unless model.table_exists?
133
+ server = build_embedded_server
134
+ @transport = ::MCP::Server::Transports::StreamableHTTPTransport.new(server)
135
+ server.transport = @transport
136
+ @transport
137
+ end
138
+ end
139
+
140
+ # Emit a prominent warning (or raise in production) when the Console MCP
141
+ # is enabled but no tables are blocked. An empty block list means Layer 1
142
+ # of the defense stack is fully inactive — every table in the database is
143
+ # reachable via console_sql, console_query, and the model tools.
144
+ #
145
+ # Remediation:
146
+ # Woods.configure { |c| c.console_blocked_tables =
147
+ # Woods::DEFAULT_CONSOLE_BLOCKED_TABLES + %w[your_sensitive_table] }
148
+ #
149
+ # @raise [Woods::ConfigurationError] in production environments
150
+ def check_blocked_tables_config!
151
+ return unless Woods.configuration.console_blocked_tables.empty?
58
152
 
59
- hash[model.name] = model.column_names
60
- rescue StandardError
61
- next
62
- end
153
+ message =
154
+ '[Woods Console] console_blocked_tables is empty — Layer 1 (table gate) is INACTIVE. ' \
155
+ 'All tables are reachable via the Console MCP. ' \
156
+ 'Set console_blocked_tables in your Woods initializer to restrict access. ' \
157
+ 'Example: Woods.configure { |c| c.console_blocked_tables = ' \
158
+ 'Woods::DEFAULT_CONSOLE_BLOCKED_TABLES + %w[your_table] }'
63
159
 
64
- validator = ModelValidator.new(registry: registry)
160
+ raise Woods::ConfigurationError, message if defined?(Rails) && Rails.env.production?
65
161
 
66
- config = Woods.configuration
67
- redacted = Array(config.console_redacted_columns)
162
+ warn message
163
+ end
68
164
 
69
- # Each HTTP request gets its own connection from the pool.
70
- # SafeContext wraps that connection in a rolled-back transaction.
71
- safe_context = SafeContext.new(connection: ActiveRecord::Base.connection)
165
+ # Build the embedded MCP server. SafeContext is given the writing
166
+ # connection pool (not a connection) so each request leases a fresh
167
+ # connection via `pool.with_connection { ... }` and returns it to the
168
+ # pool when the rolled-back transaction completes. Capturing a
169
+ # connection at build time would leak it out of its lease and pin it
170
+ # for the lifetime of the process.
171
+ #
172
+ # Multi-DB / sharded hosts are still served from the writing pool
173
+ # only — extending SafeContext to route per role/shard is tracked as
174
+ # `WOODS-CONSOLE-PERREQ-CONN`.
175
+ def build_embedded_server
176
+ config = Woods.configuration
177
+ introspection = build_model_introspection
178
+ Server.build_embedded(
179
+ model_validator: ModelValidator.new(registry: introspection[:registry]),
180
+ safe_context: SafeContext.new(pool: ActiveRecord::Base.connection_pool),
181
+ redacted_columns: Array(config&.console_redacted_columns),
182
+ redacted_key_values: Array(config&.console_redacted_key_values),
183
+ read_tools_enabled: @embedded_read_tools,
184
+ model_tables: introspection[:tables],
185
+ model_reflections: introspection[:reflections],
186
+ unsafe_eval_confirmation: @unsafe_eval_confirmation,
187
+ unsafe_eval_audit_log_path: @unsafe_eval_audit_log_path
188
+ )
189
+ end
72
190
 
73
- server = Server.build_embedded(
74
- model_validator: validator,
75
- safe_context: safe_context,
76
- redacted_columns: redacted,
77
- read_tools_enabled: @embedded_read_tools
191
+ # Walk ActiveRecord::Base.descendants once and collect the registry,
192
+ # table map, and reflection map in a single pass. Models that raise
193
+ # during introspection are silently skipped — same semantics as the
194
+ # three separate methods this replaces.
195
+ #
196
+ # Map every model to its association-name → target-table registry so
197
+ # TableGate can resolve `joins:` / `association:` arguments before the
198
+ # executor loads data. Polymorphic associations and anything that
199
+ # raises during reflection are skipped gracefully.
200
+ #
201
+ # @return [Hash] frozen hash with keys :registry, :tables, :reflections
202
+ def build_model_introspection
203
+ registry = {}
204
+ tables = {}
205
+ reflections = {}
206
+
207
+ ActiveRecord::Base.descendants.each do |model|
208
+ next if model.abstract_class?
209
+ next unless model.table_exists?
210
+
211
+ registry[model.name] = model.column_names
212
+ tables[model.name] = model.table_name
213
+ reflections[model.name] = reflections_for(model)
214
+ rescue StandardError => e
215
+ structured_logger.debug(
216
+ 'console.model_introspection.skipped',
217
+ model: model.name,
218
+ error_class: e.class.name,
219
+ error_message: e.message
78
220
  )
221
+ next
222
+ end
79
223
 
80
- @transport = MCP::Server::Transports::StreamableHTTPTransport.new(server)
81
- server.transport = @transport
82
- @transport
224
+ { registry: registry, tables: tables, reflections: reflections }.freeze
225
+ end
226
+
227
+ def reflections_for(model)
228
+ model.reflect_on_all_associations.each_with_object({}) do |reflection, assoc_map|
229
+ next if reflection.polymorphic?
230
+
231
+ klass = reflection.klass
232
+ assoc_map[reflection.name.to_s] = klass.table_name if klass.respond_to?(:table_name)
233
+ rescue StandardError
234
+ next
83
235
  end
84
236
  end
237
+
238
+ def structured_logger
239
+ @structured_logger ||= Woods::Observability::StructuredLogger.new
240
+ end
85
241
  end
86
242
  end
87
243
  end