woods 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +169 -0
  3. data/README.md +20 -8
  4. data/exe/woods-console +51 -6
  5. data/exe/woods-console-mcp +24 -4
  6. data/exe/woods-mcp +30 -7
  7. data/exe/woods-mcp-http +47 -6
  8. data/lib/generators/woods/install_generator.rb +13 -4
  9. data/lib/generators/woods/templates/woods.rb.tt +155 -0
  10. data/lib/tasks/woods.rake +15 -50
  11. data/lib/woods/builder.rb +174 -9
  12. data/lib/woods/cache/cache_middleware.rb +360 -31
  13. data/lib/woods/chunking/semantic_chunker.rb +334 -7
  14. data/lib/woods/console/adapters/job_adapter.rb +10 -4
  15. data/lib/woods/console/audit_logger.rb +76 -4
  16. data/lib/woods/console/bridge.rb +48 -15
  17. data/lib/woods/console/bridge_protocol.rb +44 -0
  18. data/lib/woods/console/confirmation.rb +3 -4
  19. data/lib/woods/console/console_response_renderer.rb +56 -18
  20. data/lib/woods/console/credential_index.rb +201 -0
  21. data/lib/woods/console/credential_scanner.rb +302 -0
  22. data/lib/woods/console/dispatch_pipeline.rb +138 -0
  23. data/lib/woods/console/embedded_executor.rb +682 -35
  24. data/lib/woods/console/eval_guard.rb +319 -0
  25. data/lib/woods/console/model_validator.rb +1 -3
  26. data/lib/woods/console/rack_middleware.rb +185 -29
  27. data/lib/woods/console/redactor.rb +161 -0
  28. data/lib/woods/console/response_context.rb +127 -0
  29. data/lib/woods/console/safe_context.rb +220 -23
  30. data/lib/woods/console/scope_predicate_parser.rb +131 -0
  31. data/lib/woods/console/server.rb +417 -486
  32. data/lib/woods/console/sql_noise_stripper.rb +87 -0
  33. data/lib/woods/console/sql_table_scanner.rb +213 -0
  34. data/lib/woods/console/sql_validator.rb +81 -31
  35. data/lib/woods/console/table_gate.rb +93 -0
  36. data/lib/woods/console/tool_specs.rb +552 -0
  37. data/lib/woods/console/tools/tier1.rb +3 -3
  38. data/lib/woods/console/tools/tier4.rb +7 -1
  39. data/lib/woods/dependency_graph.rb +66 -7
  40. data/lib/woods/embedding/indexer.rb +190 -6
  41. data/lib/woods/embedding/openai.rb +40 -4
  42. data/lib/woods/embedding/provider.rb +104 -8
  43. data/lib/woods/embedding/text_preparer.rb +23 -3
  44. data/lib/woods/embedding/token_counter.rb +133 -0
  45. data/lib/woods/evaluation/baseline_runner.rb +20 -2
  46. data/lib/woods/evaluation/metrics.rb +4 -1
  47. data/lib/woods/extracted_unit.rb +1 -0
  48. data/lib/woods/extractor.rb +7 -1
  49. data/lib/woods/extractors/controller_extractor.rb +6 -0
  50. data/lib/woods/extractors/mailer_extractor.rb +16 -2
  51. data/lib/woods/extractors/model_extractor.rb +6 -1
  52. data/lib/woods/extractors/phlex_extractor.rb +13 -4
  53. data/lib/woods/extractors/rails_source_extractor.rb +2 -0
  54. data/lib/woods/extractors/route_helper_resolver.rb +130 -0
  55. data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
  56. data/lib/woods/extractors/view_component_extractor.rb +12 -1
  57. data/lib/woods/extractors/view_engines/base.rb +141 -0
  58. data/lib/woods/extractors/view_engines/erb.rb +145 -0
  59. data/lib/woods/extractors/view_template_extractor.rb +92 -133
  60. data/lib/woods/flow_assembler.rb +23 -15
  61. data/lib/woods/flow_precomputer.rb +21 -2
  62. data/lib/woods/graph_analyzer.rb +3 -4
  63. data/lib/woods/index_artifact.rb +173 -0
  64. data/lib/woods/mcp/bearer_auth.rb +45 -0
  65. data/lib/woods/mcp/bootstrap_state.rb +94 -0
  66. data/lib/woods/mcp/bootstrapper.rb +337 -16
  67. data/lib/woods/mcp/config_resolver.rb +288 -0
  68. data/lib/woods/mcp/errors.rb +134 -0
  69. data/lib/woods/mcp/index_reader.rb +265 -30
  70. data/lib/woods/mcp/origin_guard.rb +132 -0
  71. data/lib/woods/mcp/provider_probe.rb +166 -0
  72. data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
  73. data/lib/woods/mcp/renderers/markdown_renderer.rb +39 -3
  74. data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
  75. data/lib/woods/mcp/server.rb +737 -137
  76. data/lib/woods/model_name_cache.rb +78 -2
  77. data/lib/woods/notion/client.rb +25 -2
  78. data/lib/woods/notion/mappers/model_mapper.rb +36 -2
  79. data/lib/woods/railtie.rb +55 -15
  80. data/lib/woods/resilience/circuit_breaker.rb +9 -2
  81. data/lib/woods/resilience/retryable_provider.rb +40 -3
  82. data/lib/woods/resolved_config.rb +299 -0
  83. data/lib/woods/retrieval/context_assembler.rb +112 -5
  84. data/lib/woods/retrieval/query_classifier.rb +1 -1
  85. data/lib/woods/retrieval/ranker.rb +55 -6
  86. data/lib/woods/retrieval/search_executor.rb +42 -13
  87. data/lib/woods/retriever.rb +330 -24
  88. data/lib/woods/session_tracer/middleware.rb +35 -1
  89. data/lib/woods/storage/graph_store.rb +39 -0
  90. data/lib/woods/storage/inapplicable_backend.rb +14 -0
  91. data/lib/woods/storage/metadata_store.rb +129 -1
  92. data/lib/woods/storage/pgvector.rb +70 -8
  93. data/lib/woods/storage/qdrant.rb +196 -5
  94. data/lib/woods/storage/snapshotter/metadata.rb +172 -0
  95. data/lib/woods/storage/snapshotter/vector.rb +238 -0
  96. data/lib/woods/storage/snapshotter.rb +24 -0
  97. data/lib/woods/storage/vector_store.rb +184 -35
  98. data/lib/woods/tasks.rb +85 -0
  99. data/lib/woods/temporal/snapshot_store.rb +49 -1
  100. data/lib/woods/token_utils.rb +44 -5
  101. data/lib/woods/unblocked/client.rb +1 -1
  102. data/lib/woods/unblocked/document_builder.rb +35 -10
  103. data/lib/woods/unblocked/exporter.rb +1 -1
  104. data/lib/woods/util/host_guard.rb +61 -0
  105. data/lib/woods/version.rb +1 -1
  106. data/lib/woods.rb +126 -6
  107. metadata +69 -4
@@ -1,15 +1,22 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'json'
3
4
  require 'logger'
4
5
  require 'mcp'
6
+ require 'open3'
7
+ require 'time'
5
8
  require 'set'
9
+ require_relative '../tasks'
6
10
  require_relative 'index_reader'
7
11
  require_relative 'tool_response_renderer'
8
12
 
9
13
  module Woods
10
14
  module MCP
11
- # Builds an MCP::Server with 27 tools, 2 resources, and 2 resource templates for querying
12
- # Woods extraction output, managing pipelines, and collecting feedback.
15
+ # Builds an MCP::Server with up to 29 tools, 2 resources, and 2 resource templates
16
+ # for querying Woods extraction output, managing pipelines, and collecting feedback.
17
+ # 14 tools are always registered; 15 more register conditionally based on wiring:
18
+ # 5 operator tools, 4 feedback tools, 4 snapshot tools, 1 session_trace tool,
19
+ # 1 Notion sync tool.
13
20
  #
14
21
  # All tools are defined inline via closures over an IndexReader instance.
15
22
  # No Rails required at runtime — reads JSON files from disk.
@@ -27,10 +34,19 @@ module Woods
27
34
  # @param retriever [Woods::Retriever, nil] Optional retriever for semantic search
28
35
  # @param operator [Hash, nil] Optional operator config with :status_reporter, :error_escalator, :pipeline_guard, :pipeline_lock
29
36
  # @param feedback_store [Woods::Feedback::Store, nil] Optional feedback store
37
+ # @param bootstrap_state [Woods::MCP::BootstrapState, nil] Optional state
38
+ # from the bootstrap flow. When provided, woods_status reports the
39
+ # hydrated/degraded/failed lifecycle plus the reason so operators can
40
+ # diagnose "why is semantic search disabled" without reading the Ruby
41
+ # source. Nil just means the caller didn't go through Bootstrapper.
42
+ # @param warmup [Boolean] Pre-populate the index reader's caches during build,
43
+ # shifting first-tool-call latency to startup. Default: true. Pass false for
44
+ # tests or when startup time matters more than first-query latency.
30
45
  # @return [MCP::Server] Configured server ready for transport
31
46
  def build(index_dir:, retriever: nil, operator: nil, feedback_store: nil, snapshot_store: nil,
32
- response_format: nil)
47
+ bootstrap_state: nil, response_format: nil, warmup: true, retriever_reloader: nil)
33
48
  reader = IndexReader.new(index_dir)
49
+ reader.warmup! if warmup
34
50
  config = Woods.configuration
35
51
  format = response_format || (config.respond_to?(:context_format) ? config.context_format : nil) || :markdown
36
52
  renderer = ToolResponseRenderer.for(format)
@@ -39,6 +55,31 @@ module Woods
39
55
 
40
56
  # Lambda captured by all tool blocks for building responses.
41
57
  respond = method(:text_response)
58
+ respond_err = method(:error_response)
59
+ op_missing = lambda do |tool|
60
+ error_response(
61
+ 'Pipeline operator is not configured. Pass `operator:` to Woods::MCP::Server.build ' \
62
+ 'or use Woods::MCP::Bootstrapper to wire StatusReporter, ErrorEscalator, and PipelineGuard.',
63
+ code: :not_configured, config_key: 'operator',
64
+ doc_link: 'docs/OPERATOR_GUIDE.md', tool: tool
65
+ )
66
+ end
67
+ fb_missing = lambda do |tool|
68
+ error_response(
69
+ 'Feedback store is not configured. Pass `feedback_store:` to Woods::MCP::Server.build ' \
70
+ 'to enable retrieval feedback capture.',
71
+ code: :not_configured, config_key: 'feedback_store',
72
+ doc_link: 'docs/FEEDBACK_STORE.md', tool: tool
73
+ )
74
+ end
75
+ snap_missing = lambda do |tool|
76
+ error_response(
77
+ 'Snapshot store is not configured. Set `enable_snapshots: true` in Woods.configure ' \
78
+ 'and pass `snapshot_store:` to Woods::MCP::Server.build.',
79
+ code: :not_configured, config_key: 'enable_snapshots',
80
+ doc_link: 'docs/TEMPORAL_SNAPSHOTS.md', tool: tool
81
+ )
82
+ end
42
83
 
43
84
  server = ::MCP::Server.new(
44
85
  name: 'woods',
@@ -47,8 +88,8 @@ module Woods
47
88
  resource_templates: resource_templates
48
89
  )
49
90
 
50
- define_lookup_tool(server, reader, respond, renderer)
51
- define_search_tool(server, reader, respond, renderer)
91
+ define_lookup_tool(server, reader, respond, respond_err, renderer)
92
+ define_search_tool(server, reader, respond, respond_err, renderer)
52
93
  define_traversal_tool(server, reader, respond, renderer,
53
94
  name: 'dependencies',
54
95
  description: 'Traverse forward dependencies of a unit (what it depends on). Returns a BFS tree with depth.',
@@ -65,14 +106,21 @@ module Woods
65
106
  define_pagerank_tool(server, reader, respond, renderer)
66
107
  define_framework_tool(server, reader, respond, renderer)
67
108
  define_recent_changes_tool(server, reader, respond, renderer)
68
- define_reload_tool(server, reader, respond)
69
- define_retrieve_tool(server, retriever, respond)
70
- define_trace_flow_tool(server, reader, index_dir, respond, renderer)
71
- define_session_trace_tool(server, reader, respond)
72
- define_operator_tools(server, operator, respond)
73
- define_feedback_tools(server, feedback_store, respond)
74
- define_snapshot_tools(server, snapshot_store, respond)
75
- define_notion_sync_tool(server, reader, index_dir, respond)
109
+ define_reload_tool(server, reader, respond, retriever_reloader)
110
+ define_retrieve_tool(server, retriever, respond, respond_err)
111
+ define_trace_flow_tool(server, reader, index_dir, respond, respond_err, renderer)
112
+ # Conditionally register collaborator-dependent tools. Historically
113
+ # all 15 stubs were registered unconditionally and returned
114
+ # isError: true when the wiring was missing — that added token
115
+ # noise to every LLM turn's tool catalog and invited the model to
116
+ # try tools guaranteed to fail. Only register when the collaborator
117
+ # is wired, so tools/list reflects what the server can actually do.
118
+ define_session_trace_tool(server, reader, respond, respond_err) if session_tracer_wired?
119
+ define_operator_tools(server, operator, respond, respond_err, op_missing) if operator
120
+ define_feedback_tools(server, feedback_store, respond, respond_err, fb_missing) if feedback_store
121
+ define_snapshot_tools(server, snapshot_store, respond, respond_err, snap_missing) if snapshot_store
122
+ define_notion_sync_tool(server, reader, index_dir, respond, respond_err) if notion_wired?
123
+ define_woods_status_tool(server, reader, retriever, index_dir, bootstrap_state, respond)
76
124
  register_resource_handler(server, reader)
77
125
 
78
126
  server
@@ -80,10 +128,67 @@ module Woods
80
128
 
81
129
  private
82
130
 
131
+ # Session tracer requires a configured session_store on Woods.configuration.
132
+ # The tool reads the store inside its handler; skipping registration when
133
+ # the store is absent keeps tools/list honest.
134
+ #
135
+ # The `session_trace` handler itself only calls `store.read`. We
136
+ # ALSO probe `:sessions` as a defense-in-depth cheap contract
137
+ # check — every shipped store (File/Redis/SolidCache) implements
138
+ # both, so if a misconfigured store lacks `:sessions` it is almost
139
+ # certainly missing `:read` too, and we'd rather fail at wire-up
140
+ # than at first invocation. A record-only store (permitted by the
141
+ # middleware for backward-compatibility) will correctly drop out
142
+ # of tools/list here.
143
+ def session_tracer_wired?
144
+ config = Woods.configuration
145
+ return false unless config
146
+ return false unless config.respond_to?(:session_store)
147
+
148
+ store = config.session_store
149
+ return false if store.nil?
150
+
151
+ %i[read sessions].all? { |m| store.respond_to?(m) }
152
+ end
153
+
154
+ # Notion export needs both an API token and at least one database ID.
155
+ # NOTION_API_TOKEN env var overrides the config token (see
156
+ # docs/NOTION_EXPORT.md).
157
+ def notion_wired?
158
+ config = Woods.configuration
159
+ return false unless config
160
+
161
+ token = ENV['NOTION_API_TOKEN'] || (config.respond_to?(:notion_api_token) ? config.notion_api_token : nil)
162
+ ids = config.respond_to?(:notion_database_ids) ? config.notion_database_ids : nil
163
+ token && !token.empty? && ids && !ids.empty?
164
+ end
165
+
83
166
  def text_response(text)
84
167
  ::MCP::Tool::Response.new([{ type: 'text', text: text }])
85
168
  end
86
169
 
170
+ # Build a structured error response that carries machine-readable
171
+ # metadata alongside the human-readable text. Agents can branch on
172
+ # `_meta.error_code` (e.g. `:not_configured`, `:not_found`,
173
+ # `:rate_limited`, `:unsupported_argument`) without parsing the text.
174
+ #
175
+ # @param message [String] Human-readable explanation
176
+ # @param code [Symbol] Stable error code (machine-readable)
177
+ # @param config_key [String, nil] Offending configuration key when relevant
178
+ # @param doc_link [String, nil] Relative docs path explaining the fix
179
+ # @param extra [Hash] Additional meta fields (e.g., identifier:, tool:)
180
+ def error_response(message, code:, config_key: nil, doc_link: nil, **extra)
181
+ meta = { error_code: code }
182
+ meta[:config_key] = config_key if config_key
183
+ meta[:doc_link] = doc_link if doc_link
184
+ meta.merge!(extra) unless extra.empty?
185
+ ::MCP::Tool::Response.new(
186
+ [{ type: 'text', text: message }],
187
+ error: true,
188
+ meta: meta
189
+ )
190
+ end
191
+
87
192
  def truncate_section(array, limit)
88
193
  return array unless array.is_a?(Array)
89
194
 
@@ -108,14 +213,55 @@ module Woods
108
213
  value.is_a?(String) ? [value] : value
109
214
  end
110
215
 
111
- # Coerce a value to an Integer. Converts String representations
112
- # to Integer; leaves existing Integers and nil unchanged.
113
- # MCP clients may send "2" (string) instead of 2 (integer).
216
+ # Coerce a value to an Integer.
114
217
  #
115
- # @param value [String, Integer, nil] The input value
218
+ # - `nil` passes through unchanged.
219
+ # - `Integer` passes through unchanged.
220
+ # - `String` is accepted iff it represents a decimal integer with an
221
+ # optional leading `+`/`-`. `"abc"` and `"1abc"` used to silently
222
+ # coerce to `0` via `String#to_i`; that was a footgun for tools with
223
+ # integer bounds (limit, offset, budget, timeout) — they'd receive
224
+ # the wrong value without any feedback to the client. Now we raise
225
+ # `ArgumentError` so the MCP dispatch layer can surface a proper
226
+ # JSON-RPC error back to the caller.
227
+ # - Any other type raises `ArgumentError`.
228
+ #
229
+ # @param value [String, Integer, nil]
116
230
  # @return [Integer, nil]
231
+ # @raise [ArgumentError] if `value` is not nil, Integer, or an Integer-shaped String.
232
+ INTEGER_STRING = /\A[+-]?\d+\z/
233
+ private_constant :INTEGER_STRING
117
234
  def coerce_integer(value)
118
- value.is_a?(String) ? value.to_i : value
235
+ return nil if value.nil?
236
+ return value if value.is_a?(Integer)
237
+
238
+ return Integer(value, 10) if value.is_a?(String) && value.match?(INTEGER_STRING)
239
+
240
+ raise ArgumentError, "expected integer, got #{value.class}: #{value.inspect}"
241
+ end
242
+
243
+ # Load a precomputed flow document written by FlowPrecomputer, when
244
+ # `config.precompute_flows` was enabled during extraction. Returns nil
245
+ # when the entry point is missing a method suffix, the JSON file isn't
246
+ # on disk, or the file can't be parsed — callers fall back to
247
+ # FlowAssembler.
248
+ #
249
+ # @param index_dir [String]
250
+ # @param entry_point [String] e.g., "PostsController#create"
251
+ # @return [Woods::FlowDocument, nil]
252
+ def load_precomputed_flow(index_dir, entry_point)
253
+ return nil unless entry_point.to_s.include?('#')
254
+
255
+ controller, action = entry_point.split('#', 2)
256
+ return nil if controller.empty? || action.empty?
257
+
258
+ filename = "#{controller.gsub('::', '__')}_#{action}.json"
259
+ path = File.join(index_dir, 'flows', filename)
260
+ return nil unless File.exist?(path)
261
+
262
+ Woods::FlowDocument.from_h(JSON.parse(File.read(path)))
263
+ rescue JSON::ParserError, Errno::ENOENT
264
+ nil
119
265
  end
120
266
 
121
267
  # Apply offset+limit pagination to a single section key within a container hash.
@@ -139,26 +285,40 @@ module Woods
139
285
  container["#{key}_offset"] = offset if offset.positive?
140
286
  end
141
287
 
142
- def define_lookup_tool(server, reader, respond, renderer)
288
+ def define_lookup_tool(server, reader, respond, respond_err, renderer)
143
289
  coerce = method(:coerce_array)
144
290
  server.define_tool(
145
291
  name: 'lookup',
146
292
  description: 'Look up a code unit by its exact identifier. Returns full source code, metadata, ' \
147
293
  'dependencies, and dependents. Use include_source: false to omit source_code. ' \
148
- 'Use sections to select specific keys (type, identifier, file_path, namespace are always included).',
294
+ 'Use sections to select specific keys (type, identifier, file_path, namespace are always included). ' \
295
+ '`name` is accepted as an alias for `identifier` for discoverability.',
149
296
  input_schema: {
150
297
  properties: {
151
298
  identifier: { type: 'string',
152
299
  description: 'Exact unit identifier (e.g. "Post", "PostsController", "Api::V1::HealthController")' },
300
+ name: { type: 'string', description: 'Alias for `identifier`. Either one works.' },
153
301
  include_source: { type: 'boolean', description: 'Include source_code in response (default: true)' },
154
302
  sections: {
155
303
  type: 'array', items: { type: 'string' },
156
304
  description: 'Select specific keys to return (e.g. ["metadata", "dependencies"]). Always includes type, identifier, file_path, namespace.'
157
305
  }
158
- },
159
- required: ['identifier']
306
+ }
307
+ # NOTE: 'identifier' is not listed as required — `name` is an
308
+ # accepted alias. The handler validates that one of the two
309
+ # was provided.
160
310
  }
161
- ) do |identifier:, server_context:, include_source: nil, sections: nil|
311
+ ) do |server_context:, identifier: nil, name: nil, include_source: nil, sections: nil|
312
+ identifier ||= name
313
+ if identifier.nil? || identifier.empty?
314
+ next respond_err.call(
315
+ 'lookup requires `identifier` (or its alias `name`).',
316
+ code: :unsupported_argument,
317
+ tool: 'lookup',
318
+ argument: 'identifier',
319
+ hint: 'Pass identifier: "PostsController" (or name: "PostsController").'
320
+ )
321
+ end
162
322
  sections = coerce.call(sections)
163
323
  unit = reader.find_unit(identifier)
164
324
  if unit
@@ -171,47 +331,87 @@ module Woods
171
331
  end
172
332
  respond.call(renderer.render(:lookup, filtered))
173
333
  else
174
- respond.call("Unit not found: #{identifier}")
334
+ respond_err.call(
335
+ "Unit not found: #{identifier}",
336
+ code: :not_found,
337
+ identifier: identifier,
338
+ tool: 'lookup',
339
+ hint: 'Use `search` to find identifiers by pattern, then `lookup` on the exact match.'
340
+ )
175
341
  end
176
342
  end
177
343
  end
178
344
 
179
- def define_search_tool(server, reader, respond, renderer)
345
+ def define_search_tool(server, reader, respond, respond_err, renderer)
180
346
  coerce = method(:coerce_array)
181
347
  coerce_int = method(:coerce_integer)
182
348
  server.define_tool(
183
349
  name: 'search',
184
- description: 'Search code units by pattern. Matches against identifiers by default; can also search source_code and metadata fields.',
350
+ description: 'Find code units whose identifiers (or source/metadata) match a regex. ' \
351
+ 'Example: search("Worker|Job") returns all workers and jobs; search("^Post") ' \
352
+ 'returns units starting with "Post". Returns [{identifier, type, match_field}]. ' \
353
+ 'Use `lookup` for exact identifiers, `dependencies`/`dependents` for graph traversal. ' \
354
+ 'Gotchas: query is a Ruby regex — literal pipe needs escaping as \\|; ' \
355
+ 'types restricts which index directories are scanned (e.g. ["mailer"] scans only ' \
356
+ 'the mailers dir); invalid regex falls back to literal match. ' \
357
+ 'For plain prefix/suffix matching on namespaces, prefer exact_prefix / exact_suffix ' \
358
+ '(literal, case-insensitive) over escaping regex anchors.',
185
359
  input_schema: {
186
360
  properties: {
187
- query: { type: 'string', description: 'Search pattern (case-insensitive regex)' },
361
+ query: { type: 'string', description: 'Case-insensitive Ruby regex pattern (e.g. "Worker|Job", "^Post", ".*Service$")' },
188
362
  types: {
189
363
  type: 'array', items: { type: 'string' },
190
- description: 'Filter to these types: model, controller, service, job, mailer, etc.'
364
+ description: 'Restrict scan to these unit types: model, controller, service, job, mailer, etc.'
191
365
  },
192
366
  fields: {
193
367
  type: 'array', items: { type: 'string' },
194
- description: 'Fields to search: identifier, source_code, metadata. Default: [identifier]'
368
+ description: 'Fields to search: identifier (default), source_code, metadata'
195
369
  },
196
- limit: { type: 'integer', description: 'Maximum results (default: 20)' }
197
- },
198
- required: ['query']
370
+ limit: { type: 'integer', description: 'Maximum results (default: 20)' },
371
+ exact_prefix: {
372
+ type: 'string',
373
+ description: 'Literal (non-regex) case-insensitive identifier prefix filter. ' \
374
+ 'Use for namespace scoping like "Next::Settings::" without escaping regex metacharacters.'
375
+ },
376
+ exact_suffix: {
377
+ type: 'string',
378
+ description: 'Literal (non-regex) case-insensitive identifier suffix filter. ' \
379
+ 'Use for suffix matching like "Controller" without escaping regex metacharacters.'
380
+ }
381
+ }
199
382
  }
200
- ) do |query:, server_context:, types: nil, fields: nil, limit: nil|
383
+ ) do |server_context:, query: nil, types: nil, fields: nil, limit: nil, exact_prefix: nil, exact_suffix: nil|
384
+ if (query.nil? || query.empty?) &&
385
+ (exact_prefix.nil? || exact_prefix.empty?) &&
386
+ (exact_suffix.nil? || exact_suffix.empty?)
387
+ next respond_err.call(
388
+ 'search requires `query` or at least one of `exact_prefix` / `exact_suffix`.',
389
+ code: :unsupported_argument,
390
+ tool: 'search',
391
+ argument: 'query',
392
+ hint: 'Pass query: "Worker|Job" for regex matching, or exact_prefix: "Next::Settings::" for literal prefix scoping.'
393
+ )
394
+ end
201
395
  types = coerce.call(types)
202
396
  fields = coerce.call(fields)
203
397
  limit = coerce_int.call(limit)
204
- results = reader.search(
398
+ search_result = reader.search(
205
399
  query,
206
400
  types: types,
207
401
  fields: fields || %w[identifier],
208
- limit: limit || 20
402
+ limit: limit || 20,
403
+ exact_prefix: exact_prefix,
404
+ exact_suffix: exact_suffix
209
405
  )
210
- respond.call(renderer.render(:search, {
211
- query: query,
212
- result_count: results.size,
213
- results: results
214
- }))
406
+ results = search_result[:results]
407
+ payload = {
408
+ query: query,
409
+ result_count: results.size,
410
+ results: results
411
+ }
412
+ payload[:note] = search_result[:note] if search_result[:note]
413
+ payload[:partial] = true if search_result[:partial]
414
+ respond.call(renderer.render(:search, payload))
215
415
  end
216
416
  end
217
417
 
@@ -228,14 +428,23 @@ module Woods
228
428
  types: {
229
429
  type: 'array', items: { type: 'string' },
230
430
  description: 'Filter to these types'
431
+ },
432
+ via: {
433
+ type: 'array', items: { type: 'string' },
434
+ description: 'Filter by relationship type. Accepts either a single string ' \
435
+ "(e.g. 'code_reference') or an array " \
436
+ "(e.g. ['code_reference','render']); both forms are coerced to an array internally. " \
437
+ 'Known values: link_to, redirect_to, form_action, render, code_reference, ' \
438
+ 'belongs_to, has_many, has_one, has_and_belongs_to_many.'
231
439
  }
232
440
  },
233
441
  required: ['identifier']
234
442
  }
235
- ) do |identifier:, server_context:, depth: nil, types: nil|
443
+ ) do |identifier:, server_context:, depth: nil, types: nil, via: nil|
236
444
  types = coerce.call(types)
445
+ via = coerce.call(via)
237
446
  depth = coerce_int.call(depth)
238
- result = reader.send(reader_method, identifier, depth: depth || 2, types: types)
447
+ result = reader.send(reader_method, identifier, depth: depth || 2, types: types, via: via)
239
448
  if result[:found] == false
240
449
  result[:message] =
241
450
  "Identifier '#{identifier}' not found in the index. Use 'search' to find valid identifiers."
@@ -257,7 +466,7 @@ module Woods
257
466
  }
258
467
  }
259
468
  ) do |server_context:, detail: nil|
260
- result = { manifest: reader.manifest }
469
+ result = { manifest: reader.manifest, template_engines: reader.template_engines }
261
470
  result[:summary] = reader.summary if (detail || 'summary') == 'full'
262
471
  respond.call(renderer.render(:structure, result))
263
472
  end
@@ -434,56 +643,127 @@ module Woods
434
643
  end
435
644
  end
436
645
 
437
- def define_reload_tool(server, reader, respond)
646
+ def define_reload_tool(server, reader, respond, retriever_reloader)
438
647
  server.define_tool(
439
648
  name: 'reload',
440
- description: 'Reload extraction data from disk. Use after re-running extraction to pick up changes ' \
441
- 'without restarting the server.',
649
+ description: 'Reload extraction data from disk. Use after re-running extraction or woods:embed to pick ' \
650
+ 'up changes without restarting the server. Refreshes the JSON index (manifest, dependency ' \
651
+ 'graph, unit cache) AND re-hydrates the retriever\'s in-memory vector/metadata/graph ' \
652
+ 'stores from the latest dumps. Durable backends (pgvector, Qdrant) are auto-refreshed ' \
653
+ 'externally — their counts in the response reflect the read-through state.',
442
654
  input_schema: { type: 'object', properties: {} }
443
655
  ) do |server_context:|
444
656
  reader.reload!
445
657
  manifest = reader.manifest
446
- respond.call(JSON.pretty_generate({
447
- reloaded: true,
448
- extracted_at: manifest['extracted_at'],
449
- total_units: manifest['total_units'],
450
- counts: manifest['counts']
451
- }))
658
+ payload = {
659
+ reloaded: true,
660
+ extracted_at: manifest['extracted_at'],
661
+ total_units: manifest['total_units'],
662
+ counts: manifest['counts']
663
+ }
664
+ if retriever_reloader
665
+ begin
666
+ payload[:retriever] = retriever_reloader.call
667
+ rescue StandardError => e
668
+ payload[:retriever] = { error: "#{e.class}: #{e.message}" }
669
+ end
670
+ end
671
+ respond.call(JSON.pretty_generate(payload))
452
672
  end
453
673
  end
454
674
 
455
- def define_retrieve_tool(server, retriever, respond)
675
+ def define_retrieve_tool(server, retriever, respond, respond_err)
456
676
  coerce_int = method(:coerce_integer)
677
+ coerce = method(:coerce_array)
457
678
  server.define_tool(
458
679
  name: 'codebase_retrieve',
459
- description: 'Retrieve relevant codebase context for a natural language query using semantic search. ' \
460
- 'Returns ranked code units assembled into a token-budgeted context string.',
680
+ description: 'Semantic search: retrieve relevant code units for a natural-language question. ' \
681
+ 'Example: codebase_retrieve("how does billing work?") returns ranked source context. ' \
682
+ 'Returns a token-budgeted context string ready to paste into a prompt. ' \
683
+ 'Use `search` for exact name/pattern matching; use this for conceptual questions. ' \
684
+ 'Requires an embedding provider — disabled if OPENAI_API_KEY is unset and Ollama is unreachable. ' \
685
+ 'By default excludes test_mappings (~33% of a typical index) so spec filenames do not ' \
686
+ 'dominate semantic rank; pass types: ["test_mapping"] to opt back in. ' \
687
+ 'Parameter: use `budget` for the token budget (not `limit` — that means result count ' \
688
+ 'on sibling tools, and mapping it here would silently produce a near-empty response).',
461
689
  input_schema: {
462
690
  properties: {
463
691
  query: { type: 'string',
464
- description: 'Natural language query (e.g. "How does user authentication work?")' },
465
- budget: { type: 'integer', description: 'Token budget for context assembly (default: 8000)' }
692
+ description: 'Natural language question (e.g. "How does user authentication work?")' },
693
+ budget: { type: 'integer',
694
+ description: 'Token budget for context assembly (default: 8000).' },
695
+ types: {
696
+ type: 'array', items: { type: 'string' },
697
+ description: 'Restrict results to these unit types (model, controller, service, job, mailer, ' \
698
+ 'rails_source, test_mapping, etc.). Overrides the default test_mapping exclusion. ' \
699
+ 'When the unfiltered top-K has no candidate of a requested type, the retriever ' \
700
+ 'falls back to rank-within-type so the response is populated whenever units of ' \
701
+ 'the requested type exist in the index. The response appends a "Type rank ' \
702
+ 'context" table with per-type: source, rank in unfiltered top-K, global_k, ' \
703
+ 'total_of_type. Read source to tell the cases apart: in_top_k (strong match), ' \
704
+ 'within_type_fallback (weak match surfaced by the fallback), outside_top_k ' \
705
+ '(index has this type but other requested types filled the result), absent ' \
706
+ '(zero units of this type in the index).'
707
+ },
708
+ exclude_types: {
709
+ type: 'array', items: { type: 'string' },
710
+ description: 'Additional types to exclude on top of the default test_mapping exclusion.'
711
+ }
466
712
  },
467
713
  required: ['query']
468
714
  }
469
- ) do |query:, server_context:, budget: nil|
715
+ ) do |query:, server_context:, budget: nil, limit: nil, types: nil, exclude_types: nil|
716
+ # `limit` isn't declared in the schema but clients still send it
717
+ # because sibling tools (search, recent_changes, pagerank) use
718
+ # `limit` as a result count. Mapping it to `budget` here would
719
+ # silently produce a near-empty response (limit: 10 → 10-token
720
+ # budget). Surface a helpful typed error instead.
721
+ unless limit.nil?
722
+ next respond_err.call(
723
+ 'codebase_retrieve uses `budget` (token budget, default 8000), not `limit`. ' \
724
+ '`limit` is the result-count parameter on sibling tools (search, recent_changes, pagerank). ' \
725
+ "Pass `budget: #{coerce_int.call(limit)}` if you meant a #{coerce_int.call(limit)}-token context, " \
726
+ 'or drop the kwarg entirely for the default 8000.',
727
+ code: :unsupported_argument,
728
+ tool: 'codebase_retrieve',
729
+ argument: 'limit',
730
+ hint: 'Use `budget:` for tokens. Retrieval does not cap by result count — the token budget ' \
731
+ 'governs how many ranked units fit in the returned context.'
732
+ )
733
+ end
734
+
470
735
  budget = coerce_int.call(budget)
736
+ types = coerce.call(types)
737
+ exclude_types = coerce.call(exclude_types)
471
738
  if retriever
472
- result = retriever.retrieve(query, budget: budget || 8000)
739
+ result = retriever.retrieve(
740
+ query,
741
+ budget: budget || 8000,
742
+ types: types,
743
+ exclude_types: exclude_types
744
+ )
473
745
  respond.call(result.context)
474
746
  else
475
- respond.call(
476
- 'Semantic search is not available. Embedding provider is not configured. ' \
477
- 'Use the search tool for pattern-based search instead.'
747
+ respond_err.call(
748
+ 'Semantic search is disabled no embedding provider is configured. ' \
749
+ 'To enable: set OPENAI_API_KEY, or run Ollama locally ' \
750
+ '(brew install ollama && ollama serve && ollama pull nomic-embed-text). ' \
751
+ 'Use the `search` tool for pattern-based matching in the meantime.',
752
+ code: :not_configured,
753
+ config_key: 'embedding_provider',
754
+ doc_link: 'docs/RETRIEVAL_GUIDE.md#configuring-retrieval',
755
+ tool: 'codebase_retrieve'
478
756
  )
479
757
  end
480
758
  end
481
759
  end
482
760
 
483
- def define_trace_flow_tool(server, reader, index_dir, respond, renderer)
761
+ def define_trace_flow_tool(server, reader, index_dir, respond, respond_err, renderer)
484
762
  require_relative '../flow_assembler'
763
+ require_relative '../flow_document'
485
764
  require_relative '../dependency_graph'
486
765
  coerce_int = method(:coerce_integer)
766
+ load_precomputed = method(:load_precomputed_flow)
487
767
 
488
768
  server.define_tool(
489
769
  name: 'trace_flow',
@@ -503,21 +783,33 @@ module Woods
503
783
  }
504
784
  ) do |entry_point:, server_context:, depth: nil|
505
785
  max_depth = coerce_int.call(depth) || 3
506
- graph = reader.dependency_graph
507
786
 
508
- assembler = Woods::FlowAssembler.new(
509
- graph: graph,
510
- extracted_dir: index_dir
511
- )
512
- flow_doc = assembler.assemble(entry_point, max_depth: max_depth)
787
+ # Prefer the precomputed flow JSON written by FlowPrecomputer during
788
+ # extraction (gated on `config.precompute_flows`) — it avoids
789
+ # re-parsing source on every request. Fall back to query-time
790
+ # reassembly when no precomputed document exists.
791
+ flow_doc = load_precomputed.call(index_dir, entry_point)
792
+ flow_doc ||= begin
793
+ graph = reader.dependency_graph
794
+ assembler = Woods::FlowAssembler.new(graph: graph, extracted_dir: index_dir)
795
+ assembler.assemble(entry_point, max_depth: max_depth)
796
+ end
513
797
 
514
798
  respond.call(renderer.render(:trace_flow, flow_doc.to_h))
515
799
  rescue StandardError => e
516
- respond.call(JSON.pretty_generate({ error: e.message }))
800
+ # Emit an MCP error so clients can detect the failure and
801
+ # surface it, rather than wrapping the error payload in a
802
+ # successful response — consistent with session_trace and
803
+ # codebase_retrieve.
804
+ respond_err.call(
805
+ "trace_flow failed: #{e.message}",
806
+ code: :internal_error,
807
+ data: { entry_point: entry_point, exception: e.class.name }
808
+ )
517
809
  end
518
810
  end
519
811
 
520
- def define_session_trace_tool(server, reader, respond)
812
+ def define_session_trace_tool(server, reader, respond, respond_err)
521
813
  coerce_int = method(:coerce_integer)
522
814
  server.define_tool(
523
815
  name: 'session_trace',
@@ -534,7 +826,16 @@ module Woods
534
826
  budget = coerce_int.call(budget)
535
827
  depth = coerce_int.call(depth)
536
828
  store = Woods.configuration.session_store
537
- next respond.call(JSON.pretty_generate({ error: 'Session tracer not configured' })) unless store
829
+ unless store
830
+ next respond_err.call(
831
+ 'Session tracer is not configured. Assign `session_store` (FileStore, RedisStore, or SolidCacheStore) ' \
832
+ 'and set `session_tracer_enabled = true` in Woods.configure.',
833
+ code: :not_configured,
834
+ config_key: 'session_store',
835
+ doc_link: 'docs/SESSION_TRACER.md',
836
+ tool: 'session_trace'
837
+ )
838
+ end
538
839
 
539
840
  require_relative '../session_tracer/session_flow_assembler'
540
841
 
@@ -544,26 +845,31 @@ module Woods
544
845
  doc = assembler.assemble(session_id, budget: budget || 8000, depth: depth || 1)
545
846
  respond.call(doc.to_markdown)
546
847
  rescue StandardError => e
547
- respond.call(JSON.pretty_generate({ error: e.message }))
848
+ respond_err.call(
849
+ "Session trace failed: #{e.message}",
850
+ code: :internal_error,
851
+ tool: 'session_trace',
852
+ session_id: session_id
853
+ )
548
854
  end
549
855
  end
550
856
 
551
- def define_operator_tools(server, operator, respond)
552
- define_pipeline_extract_tool(server, operator, respond)
553
- define_pipeline_embed_tool(server, operator, respond)
554
- define_pipeline_status_tool(server, operator, respond)
555
- define_pipeline_diagnose_tool(server, operator, respond)
556
- define_pipeline_repair_tool(server, operator, respond)
857
+ def define_operator_tools(server, operator, respond, respond_err, op_missing)
858
+ define_pipeline_extract_tool(server, operator, respond, respond_err, op_missing)
859
+ define_pipeline_embed_tool(server, operator, respond, respond_err, op_missing)
860
+ define_pipeline_status_tool(server, operator, respond, respond_err, op_missing)
861
+ define_pipeline_diagnose_tool(server, operator, respond, respond_err, op_missing)
862
+ define_pipeline_repair_tool(server, operator, respond, respond_err, op_missing)
557
863
  end
558
864
 
559
- def define_feedback_tools(server, feedback_store, respond)
560
- define_retrieval_rate_tool(server, feedback_store, respond)
561
- define_retrieval_report_gap_tool(server, feedback_store, respond)
562
- define_retrieval_explain_tool(server, feedback_store, respond)
563
- define_retrieval_suggest_tool(server, feedback_store, respond)
865
+ def define_feedback_tools(server, feedback_store, respond, _respond_err, fb_missing)
866
+ define_retrieval_rate_tool(server, feedback_store, respond, fb_missing)
867
+ define_retrieval_report_gap_tool(server, feedback_store, respond, fb_missing)
868
+ define_retrieval_explain_tool(server, feedback_store, respond, fb_missing)
869
+ define_retrieval_suggest_tool(server, feedback_store, respond, fb_missing)
564
870
  end
565
871
 
566
- def define_pipeline_extract_tool(server, operator, respond)
872
+ def define_pipeline_extract_tool(server, operator, respond, respond_err, op_missing)
567
873
  server.define_tool(
568
874
  name: 'pipeline_extract',
569
875
  description: 'Trigger a codebase extraction pipeline run. Checks rate limits before proceeding.',
@@ -573,11 +879,31 @@ module Woods
573
879
  }
574
880
  }
575
881
  ) do |server_context:, incremental: nil|
576
- next respond.call('Pipeline operator is not configured.') unless operator
882
+ next op_missing.call('pipeline_extract') unless operator
577
883
 
578
884
  guard = operator[:pipeline_guard]
579
- next respond.call('Extraction is rate-limited. Try again later.') if guard && !guard.allow?(:extraction)
885
+ if guard && !guard.allow?(:extraction)
886
+ next respond_err.call(
887
+ 'Extraction is rate-limited. Try again later.',
888
+ code: :rate_limited,
889
+ tool: 'pipeline_extract',
890
+ retry_after_seconds: 300
891
+ )
892
+ end
580
893
 
894
+ # Acquire the in-process lock BEFORE recording to the guard.
895
+ # Otherwise a refused "already running" request still resets
896
+ # the cooldown clock and blocks the next legitimate attempt
897
+ # for the full 5-minute window once the current run finishes.
898
+ unless Woods::MCP::Server.send(:pipeline_start, :extraction)
899
+ next respond_err.call(
900
+ 'Extraction pipeline is already running. Wait for it to complete.',
901
+ code: :already_running,
902
+ tool: 'pipeline_extract'
903
+ )
904
+ end
905
+
906
+ # Lock acquired — now it's safe to record the run.
581
907
  guard&.record!(:extraction)
582
908
 
583
909
  Thread.new do
@@ -588,6 +914,8 @@ module Woods
588
914
  rescue StandardError => e
589
915
  logger = defined?(Rails) ? Rails.logger : Logger.new($stderr)
590
916
  logger.error("[Woods] Pipeline extract failed: #{e.message}")
917
+ ensure
918
+ Woods::MCP::Server.send(:pipeline_finish, :extraction)
591
919
  end
592
920
 
593
921
  respond.call(JSON.pretty_generate({
@@ -597,7 +925,7 @@ module Woods
597
925
  end
598
926
  end
599
927
 
600
- def define_pipeline_embed_tool(server, operator, respond)
928
+ def define_pipeline_embed_tool(server, operator, respond, respond_err, op_missing)
601
929
  server.define_tool(
602
930
  name: 'pipeline_embed',
603
931
  description: 'Trigger embedding generation for extracted units. Checks rate limits before proceeding.',
@@ -607,29 +935,43 @@ module Woods
607
935
  }
608
936
  }
609
937
  ) do |server_context:, incremental: nil|
610
- next respond.call('Pipeline operator is not configured.') unless operator
938
+ next op_missing.call('pipeline_embed') unless operator
611
939
 
612
940
  guard = operator[:pipeline_guard]
613
- next respond.call('Embedding is rate-limited. Try again later.') if guard && !guard.allow?(:embedding)
941
+ if guard && !guard.allow?(:embedding)
942
+ next respond_err.call(
943
+ 'Embedding is rate-limited. Try again later.',
944
+ code: :rate_limited,
945
+ tool: 'pipeline_embed',
946
+ retry_after_seconds: 300
947
+ )
948
+ end
949
+
950
+ # Acquire the in-process lock first so a refused "already
951
+ # running" request doesn't burn the cooldown clock.
952
+ unless Woods::MCP::Server.send(:pipeline_start, :embedding)
953
+ next respond_err.call(
954
+ 'Embedding pipeline is already running. Wait for it to complete.',
955
+ code: :already_running,
956
+ tool: 'pipeline_embed'
957
+ )
958
+ end
614
959
 
615
960
  guard&.record!(:embedding)
616
961
 
617
962
  Thread.new do
618
- config = Woods.configuration
619
- builder = Woods::Builder.new(config)
620
- provider = builder.build_embedding_provider
621
- text_preparer = Woods::Embedding::TextPreparer.new
622
- vector_store = builder.build_vector_store
623
- indexer = Woods::Embedding::Indexer.new(
624
- provider: provider,
625
- text_preparer: text_preparer,
626
- vector_store: vector_store,
627
- output_dir: config.output_dir
628
- )
963
+ # Share the rake-task wiring so the MCP path picks up the
964
+ # provider-tuned TextPreparer + token-aware chunker. Without
965
+ # this, MCP-triggered embedding still hit Ollama's "input
966
+ # length exceeds context length" error after the rake path
967
+ # was fixed in PR #70.
968
+ indexer = Woods::Tasks.build_embed_indexer
629
969
  incremental ? indexer.index_incremental : indexer.index_all
630
970
  rescue StandardError => e
631
971
  logger = defined?(Rails) ? Rails.logger : Logger.new($stderr)
632
972
  logger.error("[Woods] Pipeline embed failed: #{e.message}")
973
+ ensure
974
+ Woods::MCP::Server.send(:pipeline_finish, :embedding)
633
975
  end
634
976
 
635
977
  respond.call(JSON.pretty_generate({
@@ -639,23 +981,50 @@ module Woods
639
981
  end
640
982
  end
641
983
 
642
- def define_pipeline_status_tool(server, operator, respond)
984
+ # Acquire a pipeline-kind lock atomically. Returns false when
985
+ # another thread is already running that kind of pipeline (so the
986
+ # caller can refuse the new request instead of racing the running
987
+ # pipeline). Module-level state — a single MCP server process
988
+ # serializes its own pipelines.
989
+ def pipeline_start(kind)
990
+ @pipeline_mutex ||= Mutex.new
991
+ @pipeline_in_flight ||= {}
992
+ @pipeline_mutex.synchronize do
993
+ return false if @pipeline_in_flight[kind]
994
+
995
+ @pipeline_in_flight[kind] = true
996
+ true
997
+ end
998
+ end
999
+
1000
+ def pipeline_finish(kind)
1001
+ @pipeline_mutex&.synchronize { @pipeline_in_flight&.delete(kind) }
1002
+ end
1003
+
1004
+ def define_pipeline_status_tool(server, operator, respond, respond_err, op_missing)
643
1005
  server.define_tool(
644
1006
  name: 'pipeline_status',
645
1007
  description: 'Get the current pipeline status: last extraction time, unit counts, staleness.',
646
1008
  input_schema: { type: 'object', properties: {} }
647
1009
  ) do |server_context:|
648
- next respond.call('Pipeline operator is not configured.') unless operator
1010
+ next op_missing.call('pipeline_status') unless operator
649
1011
 
650
1012
  reporter = operator[:status_reporter]
651
- next respond.call('Status reporter is not configured.') unless reporter
1013
+ unless reporter
1014
+ next respond_err.call(
1015
+ 'Status reporter is not configured.',
1016
+ code: :not_configured,
1017
+ config_key: 'operator.status_reporter',
1018
+ tool: 'pipeline_status'
1019
+ )
1020
+ end
652
1021
 
653
1022
  status = reporter.report
654
1023
  respond.call(JSON.pretty_generate(status))
655
1024
  end
656
1025
  end
657
1026
 
658
- def define_pipeline_diagnose_tool(server, operator, respond)
1027
+ def define_pipeline_diagnose_tool(server, operator, respond, respond_err, op_missing)
659
1028
  server.define_tool(
660
1029
  name: 'pipeline_diagnose',
661
1030
  description: 'Classify a recent pipeline error and suggest remediation.',
@@ -667,10 +1036,17 @@ module Woods
667
1036
  required: %w[error_class error_message]
668
1037
  }
669
1038
  ) do |error_class:, error_message:, server_context:|
670
- next respond.call('Pipeline operator is not configured.') unless operator
1039
+ next op_missing.call('pipeline_diagnose') unless operator
671
1040
 
672
1041
  escalator = operator[:error_escalator]
673
- next respond.call('Error escalator is not configured.') unless escalator
1042
+ unless escalator
1043
+ next respond_err.call(
1044
+ 'Error escalator is not configured.',
1045
+ code: :not_configured,
1046
+ config_key: 'operator.error_escalator',
1047
+ tool: 'pipeline_diagnose'
1048
+ )
1049
+ end
674
1050
 
675
1051
  error = StandardError.new(error_message)
676
1052
  # Set the class name in the error string for pattern matching
@@ -680,7 +1056,7 @@ module Woods
680
1056
  end
681
1057
  end
682
1058
 
683
- def define_pipeline_repair_tool(server, operator, respond)
1059
+ def define_pipeline_repair_tool(server, operator, respond, respond_err, op_missing)
684
1060
  server.define_tool(
685
1061
  name: 'pipeline_repair',
686
1062
  description: 'Attempt to repair pipeline state: clear stale locks, reset rate limits.',
@@ -695,7 +1071,7 @@ module Woods
695
1071
  required: ['action']
696
1072
  }
697
1073
  ) do |action:, server_context:|
698
- next respond.call('Pipeline operator is not configured.') unless operator
1074
+ next op_missing.call('pipeline_repair') unless operator
699
1075
 
700
1076
  case action
701
1077
  when 'clear_locks'
@@ -704,17 +1080,29 @@ module Woods
704
1080
  lock.release
705
1081
  respond.call(JSON.pretty_generate({ repaired: true, action: 'clear_locks' }))
706
1082
  else
707
- respond.call('Pipeline lock is not configured.')
1083
+ respond_err.call(
1084
+ 'Pipeline lock is not configured.',
1085
+ code: :not_configured,
1086
+ config_key: 'operator.pipeline_lock',
1087
+ tool: 'pipeline_repair'
1088
+ )
708
1089
  end
709
1090
  when 'reset_cooldowns'
710
1091
  respond.call(JSON.pretty_generate({ repaired: true, action: 'reset_cooldowns' }))
711
1092
  else
712
- respond.call("Unknown repair action: #{action}")
1093
+ respond_err.call(
1094
+ "Unknown repair action: #{action}",
1095
+ code: :unsupported_argument,
1096
+ tool: 'pipeline_repair',
1097
+ argument: 'action',
1098
+ value: action,
1099
+ allowed: %w[clear_locks reset_cooldowns]
1100
+ )
713
1101
  end
714
1102
  end
715
1103
  end
716
1104
 
717
- def define_retrieval_rate_tool(server, feedback_store, respond)
1105
+ def define_retrieval_rate_tool(server, feedback_store, respond, fb_missing)
718
1106
  coerce_int = method(:coerce_integer)
719
1107
  server.define_tool(
720
1108
  name: 'retrieval_rate',
@@ -728,7 +1116,7 @@ module Woods
728
1116
  required: %w[query score]
729
1117
  }
730
1118
  ) do |query:, score:, server_context:, comment: nil|
731
- next respond.call('Feedback store is not configured.') unless feedback_store
1119
+ next fb_missing.call('retrieval_rate') unless feedback_store
732
1120
 
733
1121
  score = coerce_int.call(score)
734
1122
  feedback_store.record_rating(query: query, score: score, comment: comment)
@@ -736,7 +1124,7 @@ module Woods
736
1124
  end
737
1125
  end
738
1126
 
739
- def define_retrieval_report_gap_tool(server, feedback_store, respond)
1127
+ def define_retrieval_report_gap_tool(server, feedback_store, respond, fb_missing)
740
1128
  server.define_tool(
741
1129
  name: 'retrieval_report_gap',
742
1130
  description: 'Report a missing unit that should have appeared in retrieval results.',
@@ -749,7 +1137,7 @@ module Woods
749
1137
  required: %w[query missing_unit unit_type]
750
1138
  }
751
1139
  ) do |query:, missing_unit:, unit_type:, server_context:|
752
- next respond.call('Feedback store is not configured.') unless feedback_store
1140
+ next fb_missing.call('retrieval_report_gap') unless feedback_store
753
1141
 
754
1142
  feedback_store.record_gap(query: query, missing_unit: missing_unit, unit_type: unit_type)
755
1143
  respond.call(JSON.pretty_generate({
@@ -760,13 +1148,13 @@ module Woods
760
1148
  end
761
1149
  end
762
1150
 
763
- def define_retrieval_explain_tool(server, feedback_store, respond)
1151
+ def define_retrieval_explain_tool(server, feedback_store, respond, fb_missing)
764
1152
  server.define_tool(
765
1153
  name: 'retrieval_explain',
766
1154
  description: 'Get feedback statistics: average score, total ratings, gap count.',
767
1155
  input_schema: { type: 'object', properties: {} }
768
1156
  ) do |server_context:|
769
- next respond.call('Feedback store is not configured.') unless feedback_store
1157
+ next fb_missing.call('retrieval_explain') unless feedback_store
770
1158
 
771
1159
  ratings = feedback_store.ratings
772
1160
  gaps = feedback_store.gaps
@@ -780,13 +1168,13 @@ module Woods
780
1168
  end
781
1169
  end
782
1170
 
783
- def define_retrieval_suggest_tool(server, feedback_store, respond)
1171
+ def define_retrieval_suggest_tool(server, feedback_store, respond, fb_missing)
784
1172
  server.define_tool(
785
1173
  name: 'retrieval_suggest',
786
1174
  description: 'Analyze feedback to suggest improvements: detect patterns in low scores and missing units.',
787
1175
  input_schema: { type: 'object', properties: {} }
788
1176
  ) do |server_context:|
789
- next respond.call('Feedback store is not configured.') unless feedback_store
1177
+ next fb_missing.call('retrieval_suggest') unless feedback_store
790
1178
 
791
1179
  require_relative '../feedback/gap_detector'
792
1180
  detector = Woods::Feedback::GapDetector.new(feedback_store: feedback_store)
@@ -798,14 +1186,14 @@ module Woods
798
1186
  end
799
1187
  end
800
1188
 
801
- def define_snapshot_tools(server, snapshot_store, respond)
802
- define_list_snapshots_tool(server, snapshot_store, respond)
803
- define_snapshot_diff_tool(server, snapshot_store, respond)
804
- define_unit_history_tool(server, snapshot_store, respond)
805
- define_snapshot_detail_tool(server, snapshot_store, respond)
1189
+ def define_snapshot_tools(server, snapshot_store, respond, respond_err, snap_missing)
1190
+ define_list_snapshots_tool(server, snapshot_store, respond, snap_missing)
1191
+ define_snapshot_diff_tool(server, snapshot_store, respond, snap_missing)
1192
+ define_unit_history_tool(server, snapshot_store, respond, snap_missing)
1193
+ define_snapshot_detail_tool(server, snapshot_store, respond, respond_err, snap_missing)
806
1194
  end
807
1195
 
808
- def define_list_snapshots_tool(server, snapshot_store, respond)
1196
+ def define_list_snapshots_tool(server, snapshot_store, respond, snap_missing)
809
1197
  coerce_int = method(:coerce_integer)
810
1198
  server.define_tool(
811
1199
  name: 'list_snapshots',
@@ -817,7 +1205,7 @@ module Woods
817
1205
  }
818
1206
  }
819
1207
  ) do |server_context:, limit: nil, branch: nil|
820
- next respond.call('Snapshot store is not configured. Set enable_snapshots: true.') unless snapshot_store
1208
+ next snap_missing.call('list_snapshots') unless snapshot_store
821
1209
 
822
1210
  limit = coerce_int.call(limit)
823
1211
  results = snapshot_store.list(limit: limit || 20, branch: branch)
@@ -825,7 +1213,7 @@ module Woods
825
1213
  end
826
1214
  end
827
1215
 
828
- def define_snapshot_diff_tool(server, snapshot_store, respond)
1216
+ def define_snapshot_diff_tool(server, snapshot_store, respond, snap_missing)
829
1217
  server.define_tool(
830
1218
  name: 'snapshot_diff',
831
1219
  description: 'Compare two extraction snapshots by git SHA. Returns lists of added, modified, and deleted units.',
@@ -837,7 +1225,7 @@ module Woods
837
1225
  required: %w[sha_a sha_b]
838
1226
  }
839
1227
  ) do |sha_a:, sha_b:, server_context:|
840
- next respond.call('Snapshot store is not configured. Set enable_snapshots: true.') unless snapshot_store
1228
+ next snap_missing.call('snapshot_diff') unless snapshot_store
841
1229
 
842
1230
  result = snapshot_store.diff(sha_a, sha_b)
843
1231
  respond.call(JSON.pretty_generate({
@@ -850,7 +1238,7 @@ module Woods
850
1238
  end
851
1239
  end
852
1240
 
853
- def define_unit_history_tool(server, snapshot_store, respond)
1241
+ def define_unit_history_tool(server, snapshot_store, respond, snap_missing)
854
1242
  coerce_int = method(:coerce_integer)
855
1243
  server.define_tool(
856
1244
  name: 'unit_history',
@@ -863,7 +1251,7 @@ module Woods
863
1251
  required: ['identifier']
864
1252
  }
865
1253
  ) do |identifier:, server_context:, limit: nil|
866
- next respond.call('Snapshot store is not configured. Set enable_snapshots: true.') unless snapshot_store
1254
+ next snap_missing.call('unit_history') unless snapshot_store
867
1255
 
868
1256
  limit = coerce_int.call(limit)
869
1257
  entries = snapshot_store.unit_history(identifier, limit: limit || 20)
@@ -875,7 +1263,7 @@ module Woods
875
1263
  end
876
1264
  end
877
1265
 
878
- def define_snapshot_detail_tool(server, snapshot_store, respond)
1266
+ def define_snapshot_detail_tool(server, snapshot_store, respond, respond_err, snap_missing)
879
1267
  server.define_tool(
880
1268
  name: 'snapshot_detail',
881
1269
  description: 'Get full metadata for a specific extraction snapshot by git SHA.',
@@ -886,18 +1274,24 @@ module Woods
886
1274
  required: ['git_sha']
887
1275
  }
888
1276
  ) do |git_sha:, server_context:|
889
- next respond.call('Snapshot store is not configured. Set enable_snapshots: true.') unless snapshot_store
1277
+ next snap_missing.call('snapshot_detail') unless snapshot_store
890
1278
 
891
1279
  snapshot = snapshot_store.find(git_sha)
892
1280
  if snapshot
893
1281
  respond.call(JSON.pretty_generate(snapshot))
894
1282
  else
895
- respond.call("Snapshot not found for git SHA: #{git_sha}")
1283
+ respond_err.call(
1284
+ "Snapshot not found for git SHA: #{git_sha}",
1285
+ code: :not_found,
1286
+ tool: 'snapshot_detail',
1287
+ git_sha: git_sha,
1288
+ hint: 'Use `list_snapshots` to see available SHAs.'
1289
+ )
896
1290
  end
897
1291
  end
898
1292
  end
899
1293
 
900
- def define_notion_sync_tool(server, reader, index_dir, respond)
1294
+ def define_notion_sync_tool(server, reader, index_dir, respond, respond_err)
901
1295
  server.define_tool(
902
1296
  name: 'notion_sync',
903
1297
  description: 'Sync extracted codebase data (Data Models + Columns) to Notion databases. ' \
@@ -909,11 +1303,23 @@ module Woods
909
1303
  ) do |server_context:|
910
1304
  config = Woods.configuration
911
1305
  unless config.notion_api_token
912
- next respond.call('Error: notion_api_token is not configured. Set it in Woods.configure.')
1306
+ next respond_err.call(
1307
+ 'notion_api_token is not configured. Set it in Woods.configure or via the NOTION_API_TOKEN env var.',
1308
+ code: :not_configured,
1309
+ config_key: 'notion_api_token',
1310
+ doc_link: 'docs/NOTION_EXPORT.md',
1311
+ tool: 'notion_sync'
1312
+ )
913
1313
  end
914
1314
 
915
1315
  if (config.notion_database_ids || {}).empty?
916
- next respond.call('Error: notion_database_ids is not configured. Set it in Woods.configure.')
1316
+ next respond_err.call(
1317
+ 'notion_database_ids is not configured. Set it in Woods.configure.',
1318
+ code: :not_configured,
1319
+ config_key: 'notion_database_ids',
1320
+ doc_link: 'docs/NOTION_EXPORT.md',
1321
+ tool: 'notion_sync'
1322
+ )
917
1323
  end
918
1324
 
919
1325
  require_relative '../notion/exporter'
@@ -927,7 +1333,11 @@ module Woods
927
1333
  errors: stats[:errors].first(10)
928
1334
  }))
929
1335
  rescue StandardError => e
930
- respond.call("Notion sync failed: #{e.message}")
1336
+ respond_err.call(
1337
+ "Notion sync failed: #{e.message}",
1338
+ code: :api_error,
1339
+ tool: 'notion_sync'
1340
+ )
931
1341
  end
932
1342
  end
933
1343
 
@@ -965,6 +1375,196 @@ module Woods
965
1375
  ]
966
1376
  end
967
1377
 
1378
+ def define_woods_status_tool(server, reader, retriever, index_dir, bootstrap_state, respond)
1379
+ server.define_tool(
1380
+ name: 'woods_status',
1381
+ description: 'Diagnose whether the Woods index and server are healthy. Returns extraction metadata ' \
1382
+ '(last run, unit counts, git SHA, staleness in seconds), retriever/embedding configuration, ' \
1383
+ 'bootstrap state (hydrated / degraded / failed + reason), feature flags, and a ready flag. ' \
1384
+ 'Call this first on cold connect to learn what the server knows.',
1385
+ input_schema: { type: 'object', properties: {} }
1386
+ ) do |server_context:|
1387
+ _ = server_context
1388
+ status = Woods::MCP::Server.build_status(
1389
+ reader: reader, retriever: retriever, index_dir: index_dir,
1390
+ bootstrap_state: bootstrap_state
1391
+ )
1392
+ respond.call(JSON.pretty_generate(status))
1393
+ end
1394
+ end
1395
+
1396
+ public
1397
+
1398
+ # Build the woods_status payload. Exposed at module level so specs (and future
1399
+ # console/unified-server entry points) can assemble the same shape without
1400
+ # reaching through the MCP::Server internals.
1401
+ #
1402
+ # +features.embedding_model+ / +features.embedding_provider+ /
1403
+ # +features.vector_store+ prefer the ResolvedConfig captured at embed time
1404
+ # (+bootstrap_state.resolved_config+, which is read back from +woods.json+)
1405
+ # over +Woods.configuration+, whose defaults can contradict the actual
1406
+ # provider in use. Without this, operators debugging "wrong provider" see
1407
+ # status claiming +embedding_model: "text-embedding-3-small"+ next to
1408
+ # +embedding_provider: "ollama"+ and reasonably distrust every field.
1409
+ def build_status(reader:, retriever:, index_dir:, bootstrap_state: nil)
1410
+ manifest = safe_manifest(reader)
1411
+ extracted_at = manifest && manifest['extracted_at']
1412
+ staleness = staleness_seconds(extracted_at)
1413
+ # Tolerate a nil Woods.configuration — specs that reset it between
1414
+ # runs can leave a transient nil window, and build_status should
1415
+ # still produce a readable payload during that window.
1416
+ config = Woods.configuration || Woods::Configuration.new
1417
+ resolved = bootstrap_state&.resolved_config
1418
+
1419
+ {
1420
+ ready: manifest && !manifest['counts'].to_h.empty?,
1421
+ server: {
1422
+ name: 'woods',
1423
+ version: Woods::VERSION,
1424
+ index_dir: index_dir.to_s
1425
+ },
1426
+ index: index_section(manifest, extracted_at, staleness, index_dir),
1427
+ retriever: {
1428
+ configured: !retriever.nil?,
1429
+ class: retriever&.class&.name
1430
+ },
1431
+ bootstrap: bootstrap_state&.to_h,
1432
+ features: features_from(config, resolved)
1433
+ }
1434
+ end
1435
+
1436
+ private
1437
+
1438
+ # Assemble the +index+ sub-hash of woods_status, including a staleness
1439
+ # gate that compares +manifest.git_sha+ against the current HEAD. The
1440
+ # manifest captures +git_sha+ / +gemfile_lock_sha+ / +schema_sha+ at
1441
+ # extraction time; until this change nothing compared them against the
1442
+ # live working tree, so an agent asking questions after 40 uncommitted
1443
+ # changes and an MCP restart silently got pre-change answers.
1444
+ #
1445
+ # +git_sha_matches_head+ is a tri-state:
1446
+ # - true — manifest.git_sha == current HEAD
1447
+ # - false — mismatch (stale)
1448
+ # - nil — couldn't resolve (not a git repo, git unavailable,
1449
+ # or manifest has no git_sha)
1450
+ #
1451
+ # When stale, +head_git_sha+ carries the live HEAD so operators can
1452
+ # diff directly. This is an observability signal, not a hard gate —
1453
+ # hard-refusing responses would be much more disruptive than a loudly-
1454
+ # visible staleness flag that agents can branch on.
1455
+ def index_section(manifest, extracted_at, staleness, index_dir)
1456
+ base = {
1457
+ extracted_at: extracted_at,
1458
+ staleness_seconds: staleness,
1459
+ rails_version: manifest && manifest['rails_version'],
1460
+ ruby_version: manifest && manifest['ruby_version'],
1461
+ total_units: manifest && manifest['total_units'],
1462
+ counts: (manifest && manifest['counts']) || {},
1463
+ git_sha: manifest && manifest['git_sha'],
1464
+ git_branch: manifest && manifest['git_branch'],
1465
+ gemfile_lock_sha: manifest && manifest['gemfile_lock_sha'],
1466
+ schema_sha: manifest && manifest['schema_sha']
1467
+ }
1468
+
1469
+ manifest_sha = manifest && manifest['git_sha']
1470
+ head_sha = manifest_sha ? resolve_head_sha(index_dir) : nil
1471
+ return base unless head_sha
1472
+
1473
+ base[:head_git_sha] = head_sha
1474
+ base[:git_sha_matches_head] = (manifest_sha == head_sha)
1475
+ base
1476
+ end
1477
+
1478
+ # Resolve the current HEAD SHA for the git repo containing +index_dir+.
1479
+ # Returns nil when git is unavailable or +index_dir+ is not in a repo —
1480
+ # callers treat nil as "can't compare" rather than "mismatch".
1481
+ #
1482
+ # Uses +capture2e+ so git's "fatal: not a git repository" stderr banner
1483
+ # does not leak through the MCP stdio transport. MCP clients that parse
1484
+ # stderr for protocol framing can't tolerate stray lines.
1485
+ def resolve_head_sha(index_dir)
1486
+ return nil unless index_dir
1487
+
1488
+ dir = index_dir.to_s
1489
+ return nil unless File.directory?(dir)
1490
+
1491
+ output, status = Open3.capture2e('git', '-C', dir, 'rev-parse', 'HEAD')
1492
+ status.success? ? output.strip : nil
1493
+ rescue Errno::ENOENT, Errno::EACCES
1494
+ # git not installed or not executable on this host — equivalent to
1495
+ # "can't compare". Any other exception is a genuine bug and should
1496
+ # propagate.
1497
+ nil
1498
+ end
1499
+
1500
+ # Assemble the +features+ sub-hash of woods_status, preferring the
1501
+ # ResolvedConfig captured at embed time over live {Woods::Configuration}.
1502
+ #
1503
+ # Fields that read from resolved+config (when present): embedding_model,
1504
+ # embedding_provider, vector_store. Everything else is host-process
1505
+ # state (snapshots_enabled, notion_configured, session_tracer_enabled)
1506
+ # and comes from the running config.
1507
+ #
1508
+ # +console_mcp_enabled+ is intentionally omitted — the index MCP process
1509
+ # has no visibility into the host Rails app's Woods initializer, so
1510
+ # historic status payloads always reported +false+ regardless of the
1511
+ # actual console MCP state. Advertising a misleading field is worse
1512
+ # than not advertising it at all.
1513
+ def features_from(config, resolved)
1514
+ provider_hash = resolved&.embedding_provider || {}
1515
+ resolved_provider = resolved_provider_symbol(provider_hash[:class])
1516
+ resolved_model = provider_hash[:model]
1517
+ resolved_vector = resolved&.stores&.dig(:vector_store)
1518
+
1519
+ {
1520
+ embedding_model: resolved_model || (config.respond_to?(:embedding_model) ? config.embedding_model : nil),
1521
+ embedding_provider: presence(resolved_provider ||
1522
+ (config.respond_to?(:embedding_provider) ? config.embedding_provider : nil)),
1523
+ vector_store: presence(resolved_vector ||
1524
+ (config.respond_to?(:vector_store) ? config.vector_store : nil)),
1525
+ session_tracer_enabled: config.respond_to?(:session_tracer_enabled) ? config.session_tracer_enabled : false,
1526
+ snapshots_enabled: config.respond_to?(:enable_snapshots) ? config.enable_snapshots : false,
1527
+ notion_configured: config.respond_to?(:notion_api_token) && !presence(config.notion_api_token).nil?
1528
+ }
1529
+ end
1530
+
1531
+ # Convert a fully-qualified provider class name (as serialised in
1532
+ # woods.json — e.g. +"Woods::Embedding::Provider::Ollama"+) into the
1533
+ # short symbol form used by +Woods.configuration.embedding_provider+
1534
+ # (+:ollama+, +:openai+). Returns nil when +class_name+ is unknown or
1535
+ # absent so callers fall back to the live config value.
1536
+ def resolved_provider_symbol(class_name)
1537
+ return nil if class_name.nil? || class_name.empty?
1538
+
1539
+ case class_name
1540
+ when /Ollama\z/ then :ollama
1541
+ when /OpenAI\z/ then :openai
1542
+ end
1543
+ end
1544
+
1545
+ # Return a Hash of manifest content, or nil if unreadable.
1546
+ def safe_manifest(reader)
1547
+ reader.manifest
1548
+ rescue StandardError
1549
+ nil
1550
+ end
1551
+
1552
+ # Seconds since extraction. Returns nil if timestamp is missing or unparsable.
1553
+ def staleness_seconds(iso8601)
1554
+ return nil if iso8601.nil? || iso8601.empty?
1555
+
1556
+ (Time.now - Time.parse(iso8601)).to_i
1557
+ rescue ArgumentError
1558
+ nil
1559
+ end
1560
+
1561
+ def presence(value)
1562
+ return nil if value.nil?
1563
+ return nil if value.respond_to?(:empty?) && value.empty?
1564
+
1565
+ value.to_s
1566
+ end
1567
+
968
1568
  def register_resource_handler(server, reader)
969
1569
  server.resources_read_handler do |params|
970
1570
  uri = params[:uri]