woods 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +169 -0
- data/README.md +20 -8
- data/exe/woods-console +51 -6
- data/exe/woods-console-mcp +24 -4
- data/exe/woods-mcp +30 -7
- data/exe/woods-mcp-http +47 -6
- data/lib/generators/woods/install_generator.rb +13 -4
- data/lib/generators/woods/templates/woods.rb.tt +155 -0
- data/lib/tasks/woods.rake +15 -50
- data/lib/woods/builder.rb +174 -9
- data/lib/woods/cache/cache_middleware.rb +360 -31
- data/lib/woods/chunking/semantic_chunker.rb +334 -7
- data/lib/woods/console/adapters/job_adapter.rb +10 -4
- data/lib/woods/console/audit_logger.rb +76 -4
- data/lib/woods/console/bridge.rb +48 -15
- data/lib/woods/console/bridge_protocol.rb +44 -0
- data/lib/woods/console/confirmation.rb +3 -4
- data/lib/woods/console/console_response_renderer.rb +56 -18
- data/lib/woods/console/credential_index.rb +201 -0
- data/lib/woods/console/credential_scanner.rb +302 -0
- data/lib/woods/console/dispatch_pipeline.rb +138 -0
- data/lib/woods/console/embedded_executor.rb +682 -35
- data/lib/woods/console/eval_guard.rb +319 -0
- data/lib/woods/console/model_validator.rb +1 -3
- data/lib/woods/console/rack_middleware.rb +185 -29
- data/lib/woods/console/redactor.rb +161 -0
- data/lib/woods/console/response_context.rb +127 -0
- data/lib/woods/console/safe_context.rb +220 -23
- data/lib/woods/console/scope_predicate_parser.rb +131 -0
- data/lib/woods/console/server.rb +417 -486
- data/lib/woods/console/sql_noise_stripper.rb +87 -0
- data/lib/woods/console/sql_table_scanner.rb +213 -0
- data/lib/woods/console/sql_validator.rb +81 -31
- data/lib/woods/console/table_gate.rb +93 -0
- data/lib/woods/console/tool_specs.rb +552 -0
- data/lib/woods/console/tools/tier1.rb +3 -3
- data/lib/woods/console/tools/tier4.rb +7 -1
- data/lib/woods/dependency_graph.rb +66 -7
- data/lib/woods/embedding/indexer.rb +190 -6
- data/lib/woods/embedding/openai.rb +40 -4
- data/lib/woods/embedding/provider.rb +104 -8
- data/lib/woods/embedding/text_preparer.rb +23 -3
- data/lib/woods/embedding/token_counter.rb +133 -0
- data/lib/woods/evaluation/baseline_runner.rb +20 -2
- data/lib/woods/evaluation/metrics.rb +4 -1
- data/lib/woods/extracted_unit.rb +1 -0
- data/lib/woods/extractor.rb +7 -1
- data/lib/woods/extractors/controller_extractor.rb +6 -0
- data/lib/woods/extractors/mailer_extractor.rb +16 -2
- data/lib/woods/extractors/model_extractor.rb +6 -1
- data/lib/woods/extractors/phlex_extractor.rb +13 -4
- data/lib/woods/extractors/rails_source_extractor.rb +2 -0
- data/lib/woods/extractors/route_helper_resolver.rb +130 -0
- data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
- data/lib/woods/extractors/view_component_extractor.rb +12 -1
- data/lib/woods/extractors/view_engines/base.rb +141 -0
- data/lib/woods/extractors/view_engines/erb.rb +145 -0
- data/lib/woods/extractors/view_template_extractor.rb +92 -133
- data/lib/woods/flow_assembler.rb +23 -15
- data/lib/woods/flow_precomputer.rb +21 -2
- data/lib/woods/graph_analyzer.rb +3 -4
- data/lib/woods/index_artifact.rb +173 -0
- data/lib/woods/mcp/bearer_auth.rb +45 -0
- data/lib/woods/mcp/bootstrap_state.rb +94 -0
- data/lib/woods/mcp/bootstrapper.rb +337 -16
- data/lib/woods/mcp/config_resolver.rb +288 -0
- data/lib/woods/mcp/errors.rb +134 -0
- data/lib/woods/mcp/index_reader.rb +265 -30
- data/lib/woods/mcp/origin_guard.rb +132 -0
- data/lib/woods/mcp/provider_probe.rb +166 -0
- data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
- data/lib/woods/mcp/renderers/markdown_renderer.rb +39 -3
- data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
- data/lib/woods/mcp/server.rb +737 -137
- data/lib/woods/model_name_cache.rb +78 -2
- data/lib/woods/notion/client.rb +25 -2
- data/lib/woods/notion/mappers/model_mapper.rb +36 -2
- data/lib/woods/railtie.rb +55 -15
- data/lib/woods/resilience/circuit_breaker.rb +9 -2
- data/lib/woods/resilience/retryable_provider.rb +40 -3
- data/lib/woods/resolved_config.rb +299 -0
- data/lib/woods/retrieval/context_assembler.rb +112 -5
- data/lib/woods/retrieval/query_classifier.rb +1 -1
- data/lib/woods/retrieval/ranker.rb +55 -6
- data/lib/woods/retrieval/search_executor.rb +42 -13
- data/lib/woods/retriever.rb +330 -24
- data/lib/woods/session_tracer/middleware.rb +35 -1
- data/lib/woods/storage/graph_store.rb +39 -0
- data/lib/woods/storage/inapplicable_backend.rb +14 -0
- data/lib/woods/storage/metadata_store.rb +129 -1
- data/lib/woods/storage/pgvector.rb +70 -8
- data/lib/woods/storage/qdrant.rb +196 -5
- data/lib/woods/storage/snapshotter/metadata.rb +172 -0
- data/lib/woods/storage/snapshotter/vector.rb +238 -0
- data/lib/woods/storage/snapshotter.rb +24 -0
- data/lib/woods/storage/vector_store.rb +184 -35
- data/lib/woods/tasks.rb +85 -0
- data/lib/woods/temporal/snapshot_store.rb +49 -1
- data/lib/woods/token_utils.rb +44 -5
- data/lib/woods/unblocked/client.rb +1 -1
- data/lib/woods/unblocked/document_builder.rb +35 -10
- data/lib/woods/unblocked/exporter.rb +1 -1
- data/lib/woods/util/host_guard.rb +61 -0
- data/lib/woods/version.rb +1 -1
- data/lib/woods.rb +126 -6
- metadata +69 -4
data/lib/woods/mcp/server.rb
CHANGED
|
@@ -1,15 +1,22 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'json'
|
|
3
4
|
require 'logger'
|
|
4
5
|
require 'mcp'
|
|
6
|
+
require 'open3'
|
|
7
|
+
require 'time'
|
|
5
8
|
require 'set'
|
|
9
|
+
require_relative '../tasks'
|
|
6
10
|
require_relative 'index_reader'
|
|
7
11
|
require_relative 'tool_response_renderer'
|
|
8
12
|
|
|
9
13
|
module Woods
|
|
10
14
|
module MCP
|
|
11
|
-
# Builds an MCP::Server with
|
|
12
|
-
# Woods extraction output, managing pipelines, and collecting feedback.
|
|
15
|
+
# Builds an MCP::Server with up to 29 tools, 2 resources, and 2 resource templates
|
|
16
|
+
# for querying Woods extraction output, managing pipelines, and collecting feedback.
|
|
17
|
+
# 14 tools are always registered; 15 more register conditionally based on wiring:
|
|
18
|
+
# 5 operator tools, 4 feedback tools, 4 snapshot tools, 1 session_trace tool,
|
|
19
|
+
# 1 Notion sync tool.
|
|
13
20
|
#
|
|
14
21
|
# All tools are defined inline via closures over an IndexReader instance.
|
|
15
22
|
# No Rails required at runtime — reads JSON files from disk.
|
|
@@ -27,10 +34,19 @@ module Woods
|
|
|
27
34
|
# @param retriever [Woods::Retriever, nil] Optional retriever for semantic search
|
|
28
35
|
# @param operator [Hash, nil] Optional operator config with :status_reporter, :error_escalator, :pipeline_guard, :pipeline_lock
|
|
29
36
|
# @param feedback_store [Woods::Feedback::Store, nil] Optional feedback store
|
|
37
|
+
# @param bootstrap_state [Woods::MCP::BootstrapState, nil] Optional state
|
|
38
|
+
# from the bootstrap flow. When provided, woods_status reports the
|
|
39
|
+
# hydrated/degraded/failed lifecycle plus the reason so operators can
|
|
40
|
+
# diagnose "why is semantic search disabled" without reading the Ruby
|
|
41
|
+
# source. Nil just means the caller didn't go through Bootstrapper.
|
|
42
|
+
# @param warmup [Boolean] Pre-populate the index reader's caches during build,
|
|
43
|
+
# shifting first-tool-call latency to startup. Default: true. Pass false for
|
|
44
|
+
# tests or when startup time matters more than first-query latency.
|
|
30
45
|
# @return [MCP::Server] Configured server ready for transport
|
|
31
46
|
def build(index_dir:, retriever: nil, operator: nil, feedback_store: nil, snapshot_store: nil,
|
|
32
|
-
response_format: nil)
|
|
47
|
+
bootstrap_state: nil, response_format: nil, warmup: true, retriever_reloader: nil)
|
|
33
48
|
reader = IndexReader.new(index_dir)
|
|
49
|
+
reader.warmup! if warmup
|
|
34
50
|
config = Woods.configuration
|
|
35
51
|
format = response_format || (config.respond_to?(:context_format) ? config.context_format : nil) || :markdown
|
|
36
52
|
renderer = ToolResponseRenderer.for(format)
|
|
@@ -39,6 +55,31 @@ module Woods
|
|
|
39
55
|
|
|
40
56
|
# Lambda captured by all tool blocks for building responses.
|
|
41
57
|
respond = method(:text_response)
|
|
58
|
+
respond_err = method(:error_response)
|
|
59
|
+
op_missing = lambda do |tool|
|
|
60
|
+
error_response(
|
|
61
|
+
'Pipeline operator is not configured. Pass `operator:` to Woods::MCP::Server.build ' \
|
|
62
|
+
'or use Woods::MCP::Bootstrapper to wire StatusReporter, ErrorEscalator, and PipelineGuard.',
|
|
63
|
+
code: :not_configured, config_key: 'operator',
|
|
64
|
+
doc_link: 'docs/OPERATOR_GUIDE.md', tool: tool
|
|
65
|
+
)
|
|
66
|
+
end
|
|
67
|
+
fb_missing = lambda do |tool|
|
|
68
|
+
error_response(
|
|
69
|
+
'Feedback store is not configured. Pass `feedback_store:` to Woods::MCP::Server.build ' \
|
|
70
|
+
'to enable retrieval feedback capture.',
|
|
71
|
+
code: :not_configured, config_key: 'feedback_store',
|
|
72
|
+
doc_link: 'docs/FEEDBACK_STORE.md', tool: tool
|
|
73
|
+
)
|
|
74
|
+
end
|
|
75
|
+
snap_missing = lambda do |tool|
|
|
76
|
+
error_response(
|
|
77
|
+
'Snapshot store is not configured. Set `enable_snapshots: true` in Woods.configure ' \
|
|
78
|
+
'and pass `snapshot_store:` to Woods::MCP::Server.build.',
|
|
79
|
+
code: :not_configured, config_key: 'enable_snapshots',
|
|
80
|
+
doc_link: 'docs/TEMPORAL_SNAPSHOTS.md', tool: tool
|
|
81
|
+
)
|
|
82
|
+
end
|
|
42
83
|
|
|
43
84
|
server = ::MCP::Server.new(
|
|
44
85
|
name: 'woods',
|
|
@@ -47,8 +88,8 @@ module Woods
|
|
|
47
88
|
resource_templates: resource_templates
|
|
48
89
|
)
|
|
49
90
|
|
|
50
|
-
define_lookup_tool(server, reader, respond, renderer)
|
|
51
|
-
define_search_tool(server, reader, respond, renderer)
|
|
91
|
+
define_lookup_tool(server, reader, respond, respond_err, renderer)
|
|
92
|
+
define_search_tool(server, reader, respond, respond_err, renderer)
|
|
52
93
|
define_traversal_tool(server, reader, respond, renderer,
|
|
53
94
|
name: 'dependencies',
|
|
54
95
|
description: 'Traverse forward dependencies of a unit (what it depends on). Returns a BFS tree with depth.',
|
|
@@ -65,14 +106,21 @@ module Woods
|
|
|
65
106
|
define_pagerank_tool(server, reader, respond, renderer)
|
|
66
107
|
define_framework_tool(server, reader, respond, renderer)
|
|
67
108
|
define_recent_changes_tool(server, reader, respond, renderer)
|
|
68
|
-
define_reload_tool(server, reader, respond)
|
|
69
|
-
define_retrieve_tool(server, retriever, respond)
|
|
70
|
-
define_trace_flow_tool(server, reader, index_dir, respond, renderer)
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
109
|
+
define_reload_tool(server, reader, respond, retriever_reloader)
|
|
110
|
+
define_retrieve_tool(server, retriever, respond, respond_err)
|
|
111
|
+
define_trace_flow_tool(server, reader, index_dir, respond, respond_err, renderer)
|
|
112
|
+
# Conditionally register collaborator-dependent tools. Historically
|
|
113
|
+
# all 15 stubs were registered unconditionally and returned
|
|
114
|
+
# isError: true when the wiring was missing — that added token
|
|
115
|
+
# noise to every LLM turn's tool catalog and invited the model to
|
|
116
|
+
# try tools guaranteed to fail. Only register when the collaborator
|
|
117
|
+
# is wired, so tools/list reflects what the server can actually do.
|
|
118
|
+
define_session_trace_tool(server, reader, respond, respond_err) if session_tracer_wired?
|
|
119
|
+
define_operator_tools(server, operator, respond, respond_err, op_missing) if operator
|
|
120
|
+
define_feedback_tools(server, feedback_store, respond, respond_err, fb_missing) if feedback_store
|
|
121
|
+
define_snapshot_tools(server, snapshot_store, respond, respond_err, snap_missing) if snapshot_store
|
|
122
|
+
define_notion_sync_tool(server, reader, index_dir, respond, respond_err) if notion_wired?
|
|
123
|
+
define_woods_status_tool(server, reader, retriever, index_dir, bootstrap_state, respond)
|
|
76
124
|
register_resource_handler(server, reader)
|
|
77
125
|
|
|
78
126
|
server
|
|
@@ -80,10 +128,67 @@ module Woods
|
|
|
80
128
|
|
|
81
129
|
private
|
|
82
130
|
|
|
131
|
+
# Session tracer requires a configured session_store on Woods.configuration.
|
|
132
|
+
# The tool reads the store inside its handler; skipping registration when
|
|
133
|
+
# the store is absent keeps tools/list honest.
|
|
134
|
+
#
|
|
135
|
+
# The `session_trace` handler itself only calls `store.read`. We
|
|
136
|
+
# ALSO probe `:sessions` as a defense-in-depth cheap contract
|
|
137
|
+
# check — every shipped store (File/Redis/SolidCache) implements
|
|
138
|
+
# both, so if a misconfigured store lacks `:sessions` it is almost
|
|
139
|
+
# certainly missing `:read` too, and we'd rather fail at wire-up
|
|
140
|
+
# than at first invocation. A record-only store (permitted by the
|
|
141
|
+
# middleware for backward-compatibility) will correctly drop out
|
|
142
|
+
# of tools/list here.
|
|
143
|
+
def session_tracer_wired?
|
|
144
|
+
config = Woods.configuration
|
|
145
|
+
return false unless config
|
|
146
|
+
return false unless config.respond_to?(:session_store)
|
|
147
|
+
|
|
148
|
+
store = config.session_store
|
|
149
|
+
return false if store.nil?
|
|
150
|
+
|
|
151
|
+
%i[read sessions].all? { |m| store.respond_to?(m) }
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Notion export needs both an API token and at least one database ID.
|
|
155
|
+
# NOTION_API_TOKEN env var overrides the config token (see
|
|
156
|
+
# docs/NOTION_EXPORT.md).
|
|
157
|
+
def notion_wired?
|
|
158
|
+
config = Woods.configuration
|
|
159
|
+
return false unless config
|
|
160
|
+
|
|
161
|
+
token = ENV['NOTION_API_TOKEN'] || (config.respond_to?(:notion_api_token) ? config.notion_api_token : nil)
|
|
162
|
+
ids = config.respond_to?(:notion_database_ids) ? config.notion_database_ids : nil
|
|
163
|
+
token && !token.empty? && ids && !ids.empty?
|
|
164
|
+
end
|
|
165
|
+
|
|
83
166
|
def text_response(text)
|
|
84
167
|
::MCP::Tool::Response.new([{ type: 'text', text: text }])
|
|
85
168
|
end
|
|
86
169
|
|
|
170
|
+
# Build a structured error response that carries machine-readable
|
|
171
|
+
# metadata alongside the human-readable text. Agents can branch on
|
|
172
|
+
# `_meta.error_code` (e.g. `:not_configured`, `:not_found`,
|
|
173
|
+
# `:rate_limited`, `:unsupported_argument`) without parsing the text.
|
|
174
|
+
#
|
|
175
|
+
# @param message [String] Human-readable explanation
|
|
176
|
+
# @param code [Symbol] Stable error code (machine-readable)
|
|
177
|
+
# @param config_key [String, nil] Offending configuration key when relevant
|
|
178
|
+
# @param doc_link [String, nil] Relative docs path explaining the fix
|
|
179
|
+
# @param extra [Hash] Additional meta fields (e.g., identifier:, tool:)
|
|
180
|
+
def error_response(message, code:, config_key: nil, doc_link: nil, **extra)
|
|
181
|
+
meta = { error_code: code }
|
|
182
|
+
meta[:config_key] = config_key if config_key
|
|
183
|
+
meta[:doc_link] = doc_link if doc_link
|
|
184
|
+
meta.merge!(extra) unless extra.empty?
|
|
185
|
+
::MCP::Tool::Response.new(
|
|
186
|
+
[{ type: 'text', text: message }],
|
|
187
|
+
error: true,
|
|
188
|
+
meta: meta
|
|
189
|
+
)
|
|
190
|
+
end
|
|
191
|
+
|
|
87
192
|
def truncate_section(array, limit)
|
|
88
193
|
return array unless array.is_a?(Array)
|
|
89
194
|
|
|
@@ -108,14 +213,55 @@ module Woods
|
|
|
108
213
|
value.is_a?(String) ? [value] : value
|
|
109
214
|
end
|
|
110
215
|
|
|
111
|
-
# Coerce a value to an Integer.
|
|
112
|
-
# to Integer; leaves existing Integers and nil unchanged.
|
|
113
|
-
# MCP clients may send "2" (string) instead of 2 (integer).
|
|
216
|
+
# Coerce a value to an Integer.
|
|
114
217
|
#
|
|
115
|
-
#
|
|
218
|
+
# - `nil` passes through unchanged.
|
|
219
|
+
# - `Integer` passes through unchanged.
|
|
220
|
+
# - `String` is accepted iff it represents a decimal integer with an
|
|
221
|
+
# optional leading `+`/`-`. `"abc"` and `"1abc"` used to silently
|
|
222
|
+
# coerce to `0` via `String#to_i`; that was a footgun for tools with
|
|
223
|
+
# integer bounds (limit, offset, budget, timeout) — they'd receive
|
|
224
|
+
# the wrong value without any feedback to the client. Now we raise
|
|
225
|
+
# `ArgumentError` so the MCP dispatch layer can surface a proper
|
|
226
|
+
# JSON-RPC error back to the caller.
|
|
227
|
+
# - Any other type raises `ArgumentError`.
|
|
228
|
+
#
|
|
229
|
+
# @param value [String, Integer, nil]
|
|
116
230
|
# @return [Integer, nil]
|
|
231
|
+
# @raise [ArgumentError] if `value` is not nil, Integer, or an Integer-shaped String.
|
|
232
|
+
INTEGER_STRING = /\A[+-]?\d+\z/
|
|
233
|
+
private_constant :INTEGER_STRING
|
|
117
234
|
def coerce_integer(value)
|
|
118
|
-
|
|
235
|
+
return nil if value.nil?
|
|
236
|
+
return value if value.is_a?(Integer)
|
|
237
|
+
|
|
238
|
+
return Integer(value, 10) if value.is_a?(String) && value.match?(INTEGER_STRING)
|
|
239
|
+
|
|
240
|
+
raise ArgumentError, "expected integer, got #{value.class}: #{value.inspect}"
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# Load a precomputed flow document written by FlowPrecomputer, when
|
|
244
|
+
# `config.precompute_flows` was enabled during extraction. Returns nil
|
|
245
|
+
# when the entry point is missing a method suffix, the JSON file isn't
|
|
246
|
+
# on disk, or the file can't be parsed — callers fall back to
|
|
247
|
+
# FlowAssembler.
|
|
248
|
+
#
|
|
249
|
+
# @param index_dir [String]
|
|
250
|
+
# @param entry_point [String] e.g., "PostsController#create"
|
|
251
|
+
# @return [Woods::FlowDocument, nil]
|
|
252
|
+
def load_precomputed_flow(index_dir, entry_point)
|
|
253
|
+
return nil unless entry_point.to_s.include?('#')
|
|
254
|
+
|
|
255
|
+
controller, action = entry_point.split('#', 2)
|
|
256
|
+
return nil if controller.empty? || action.empty?
|
|
257
|
+
|
|
258
|
+
filename = "#{controller.gsub('::', '__')}_#{action}.json"
|
|
259
|
+
path = File.join(index_dir, 'flows', filename)
|
|
260
|
+
return nil unless File.exist?(path)
|
|
261
|
+
|
|
262
|
+
Woods::FlowDocument.from_h(JSON.parse(File.read(path)))
|
|
263
|
+
rescue JSON::ParserError, Errno::ENOENT
|
|
264
|
+
nil
|
|
119
265
|
end
|
|
120
266
|
|
|
121
267
|
# Apply offset+limit pagination to a single section key within a container hash.
|
|
@@ -139,26 +285,40 @@ module Woods
|
|
|
139
285
|
container["#{key}_offset"] = offset if offset.positive?
|
|
140
286
|
end
|
|
141
287
|
|
|
142
|
-
def define_lookup_tool(server, reader, respond, renderer)
|
|
288
|
+
def define_lookup_tool(server, reader, respond, respond_err, renderer)
|
|
143
289
|
coerce = method(:coerce_array)
|
|
144
290
|
server.define_tool(
|
|
145
291
|
name: 'lookup',
|
|
146
292
|
description: 'Look up a code unit by its exact identifier. Returns full source code, metadata, ' \
|
|
147
293
|
'dependencies, and dependents. Use include_source: false to omit source_code. ' \
|
|
148
|
-
'Use sections to select specific keys (type, identifier, file_path, namespace are always included).'
|
|
294
|
+
'Use sections to select specific keys (type, identifier, file_path, namespace are always included). ' \
|
|
295
|
+
'`name` is accepted as an alias for `identifier` for discoverability.',
|
|
149
296
|
input_schema: {
|
|
150
297
|
properties: {
|
|
151
298
|
identifier: { type: 'string',
|
|
152
299
|
description: 'Exact unit identifier (e.g. "Post", "PostsController", "Api::V1::HealthController")' },
|
|
300
|
+
name: { type: 'string', description: 'Alias for `identifier`. Either one works.' },
|
|
153
301
|
include_source: { type: 'boolean', description: 'Include source_code in response (default: true)' },
|
|
154
302
|
sections: {
|
|
155
303
|
type: 'array', items: { type: 'string' },
|
|
156
304
|
description: 'Select specific keys to return (e.g. ["metadata", "dependencies"]). Always includes type, identifier, file_path, namespace.'
|
|
157
305
|
}
|
|
158
|
-
}
|
|
159
|
-
|
|
306
|
+
}
|
|
307
|
+
# NOTE: 'identifier' is not listed as required — `name` is an
|
|
308
|
+
# accepted alias. The handler validates that one of the two
|
|
309
|
+
# was provided.
|
|
160
310
|
}
|
|
161
|
-
) do |
|
|
311
|
+
) do |server_context:, identifier: nil, name: nil, include_source: nil, sections: nil|
|
|
312
|
+
identifier ||= name
|
|
313
|
+
if identifier.nil? || identifier.empty?
|
|
314
|
+
next respond_err.call(
|
|
315
|
+
'lookup requires `identifier` (or its alias `name`).',
|
|
316
|
+
code: :unsupported_argument,
|
|
317
|
+
tool: 'lookup',
|
|
318
|
+
argument: 'identifier',
|
|
319
|
+
hint: 'Pass identifier: "PostsController" (or name: "PostsController").'
|
|
320
|
+
)
|
|
321
|
+
end
|
|
162
322
|
sections = coerce.call(sections)
|
|
163
323
|
unit = reader.find_unit(identifier)
|
|
164
324
|
if unit
|
|
@@ -171,47 +331,87 @@ module Woods
|
|
|
171
331
|
end
|
|
172
332
|
respond.call(renderer.render(:lookup, filtered))
|
|
173
333
|
else
|
|
174
|
-
|
|
334
|
+
respond_err.call(
|
|
335
|
+
"Unit not found: #{identifier}",
|
|
336
|
+
code: :not_found,
|
|
337
|
+
identifier: identifier,
|
|
338
|
+
tool: 'lookup',
|
|
339
|
+
hint: 'Use `search` to find identifiers by pattern, then `lookup` on the exact match.'
|
|
340
|
+
)
|
|
175
341
|
end
|
|
176
342
|
end
|
|
177
343
|
end
|
|
178
344
|
|
|
179
|
-
def define_search_tool(server, reader, respond, renderer)
|
|
345
|
+
def define_search_tool(server, reader, respond, respond_err, renderer)
|
|
180
346
|
coerce = method(:coerce_array)
|
|
181
347
|
coerce_int = method(:coerce_integer)
|
|
182
348
|
server.define_tool(
|
|
183
349
|
name: 'search',
|
|
184
|
-
description: '
|
|
350
|
+
description: 'Find code units whose identifiers (or source/metadata) match a regex. ' \
|
|
351
|
+
'Example: search("Worker|Job") returns all workers and jobs; search("^Post") ' \
|
|
352
|
+
'returns units starting with "Post". Returns [{identifier, type, match_field}]. ' \
|
|
353
|
+
'Use `lookup` for exact identifiers, `dependencies`/`dependents` for graph traversal. ' \
|
|
354
|
+
'Gotchas: query is a Ruby regex — literal pipe needs escaping as \\|; ' \
|
|
355
|
+
'types restricts which index directories are scanned (e.g. ["mailer"] scans only ' \
|
|
356
|
+
'the mailers dir); invalid regex falls back to literal match. ' \
|
|
357
|
+
'For plain prefix/suffix matching on namespaces, prefer exact_prefix / exact_suffix ' \
|
|
358
|
+
'(literal, case-insensitive) over escaping regex anchors.',
|
|
185
359
|
input_schema: {
|
|
186
360
|
properties: {
|
|
187
|
-
query: { type: 'string', description: '
|
|
361
|
+
query: { type: 'string', description: 'Case-insensitive Ruby regex pattern (e.g. "Worker|Job", "^Post", ".*Service$")' },
|
|
188
362
|
types: {
|
|
189
363
|
type: 'array', items: { type: 'string' },
|
|
190
|
-
description: '
|
|
364
|
+
description: 'Restrict scan to these unit types: model, controller, service, job, mailer, etc.'
|
|
191
365
|
},
|
|
192
366
|
fields: {
|
|
193
367
|
type: 'array', items: { type: 'string' },
|
|
194
|
-
description: 'Fields to search: identifier, source_code, metadata
|
|
368
|
+
description: 'Fields to search: identifier (default), source_code, metadata'
|
|
195
369
|
},
|
|
196
|
-
limit: { type: 'integer', description: 'Maximum results (default: 20)' }
|
|
197
|
-
|
|
198
|
-
|
|
370
|
+
limit: { type: 'integer', description: 'Maximum results (default: 20)' },
|
|
371
|
+
exact_prefix: {
|
|
372
|
+
type: 'string',
|
|
373
|
+
description: 'Literal (non-regex) case-insensitive identifier prefix filter. ' \
|
|
374
|
+
'Use for namespace scoping like "Next::Settings::" without escaping regex metacharacters.'
|
|
375
|
+
},
|
|
376
|
+
exact_suffix: {
|
|
377
|
+
type: 'string',
|
|
378
|
+
description: 'Literal (non-regex) case-insensitive identifier suffix filter. ' \
|
|
379
|
+
'Use for suffix matching like "Controller" without escaping regex metacharacters.'
|
|
380
|
+
}
|
|
381
|
+
}
|
|
199
382
|
}
|
|
200
|
-
) do |
|
|
383
|
+
) do |server_context:, query: nil, types: nil, fields: nil, limit: nil, exact_prefix: nil, exact_suffix: nil|
|
|
384
|
+
if (query.nil? || query.empty?) &&
|
|
385
|
+
(exact_prefix.nil? || exact_prefix.empty?) &&
|
|
386
|
+
(exact_suffix.nil? || exact_suffix.empty?)
|
|
387
|
+
next respond_err.call(
|
|
388
|
+
'search requires `query` or at least one of `exact_prefix` / `exact_suffix`.',
|
|
389
|
+
code: :unsupported_argument,
|
|
390
|
+
tool: 'search',
|
|
391
|
+
argument: 'query',
|
|
392
|
+
hint: 'Pass query: "Worker|Job" for regex matching, or exact_prefix: "Next::Settings::" for literal prefix scoping.'
|
|
393
|
+
)
|
|
394
|
+
end
|
|
201
395
|
types = coerce.call(types)
|
|
202
396
|
fields = coerce.call(fields)
|
|
203
397
|
limit = coerce_int.call(limit)
|
|
204
|
-
|
|
398
|
+
search_result = reader.search(
|
|
205
399
|
query,
|
|
206
400
|
types: types,
|
|
207
401
|
fields: fields || %w[identifier],
|
|
208
|
-
limit: limit || 20
|
|
402
|
+
limit: limit || 20,
|
|
403
|
+
exact_prefix: exact_prefix,
|
|
404
|
+
exact_suffix: exact_suffix
|
|
209
405
|
)
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
406
|
+
results = search_result[:results]
|
|
407
|
+
payload = {
|
|
408
|
+
query: query,
|
|
409
|
+
result_count: results.size,
|
|
410
|
+
results: results
|
|
411
|
+
}
|
|
412
|
+
payload[:note] = search_result[:note] if search_result[:note]
|
|
413
|
+
payload[:partial] = true if search_result[:partial]
|
|
414
|
+
respond.call(renderer.render(:search, payload))
|
|
215
415
|
end
|
|
216
416
|
end
|
|
217
417
|
|
|
@@ -228,14 +428,23 @@ module Woods
|
|
|
228
428
|
types: {
|
|
229
429
|
type: 'array', items: { type: 'string' },
|
|
230
430
|
description: 'Filter to these types'
|
|
431
|
+
},
|
|
432
|
+
via: {
|
|
433
|
+
type: 'array', items: { type: 'string' },
|
|
434
|
+
description: 'Filter by relationship type. Accepts either a single string ' \
|
|
435
|
+
"(e.g. 'code_reference') or an array " \
|
|
436
|
+
"(e.g. ['code_reference','render']); both forms are coerced to an array internally. " \
|
|
437
|
+
'Known values: link_to, redirect_to, form_action, render, code_reference, ' \
|
|
438
|
+
'belongs_to, has_many, has_one, has_and_belongs_to_many.'
|
|
231
439
|
}
|
|
232
440
|
},
|
|
233
441
|
required: ['identifier']
|
|
234
442
|
}
|
|
235
|
-
) do |identifier:, server_context:, depth: nil, types: nil|
|
|
443
|
+
) do |identifier:, server_context:, depth: nil, types: nil, via: nil|
|
|
236
444
|
types = coerce.call(types)
|
|
445
|
+
via = coerce.call(via)
|
|
237
446
|
depth = coerce_int.call(depth)
|
|
238
|
-
result = reader.send(reader_method, identifier, depth: depth || 2, types: types)
|
|
447
|
+
result = reader.send(reader_method, identifier, depth: depth || 2, types: types, via: via)
|
|
239
448
|
if result[:found] == false
|
|
240
449
|
result[:message] =
|
|
241
450
|
"Identifier '#{identifier}' not found in the index. Use 'search' to find valid identifiers."
|
|
@@ -257,7 +466,7 @@ module Woods
|
|
|
257
466
|
}
|
|
258
467
|
}
|
|
259
468
|
) do |server_context:, detail: nil|
|
|
260
|
-
result = { manifest: reader.manifest }
|
|
469
|
+
result = { manifest: reader.manifest, template_engines: reader.template_engines }
|
|
261
470
|
result[:summary] = reader.summary if (detail || 'summary') == 'full'
|
|
262
471
|
respond.call(renderer.render(:structure, result))
|
|
263
472
|
end
|
|
@@ -434,56 +643,127 @@ module Woods
|
|
|
434
643
|
end
|
|
435
644
|
end
|
|
436
645
|
|
|
437
|
-
def define_reload_tool(server, reader, respond)
|
|
646
|
+
def define_reload_tool(server, reader, respond, retriever_reloader)
|
|
438
647
|
server.define_tool(
|
|
439
648
|
name: 'reload',
|
|
440
|
-
description: 'Reload extraction data from disk. Use after re-running extraction to pick
|
|
441
|
-
'without restarting the server.'
|
|
649
|
+
description: 'Reload extraction data from disk. Use after re-running extraction or woods:embed to pick ' \
|
|
650
|
+
'up changes without restarting the server. Refreshes the JSON index (manifest, dependency ' \
|
|
651
|
+
'graph, unit cache) AND re-hydrates the retriever\'s in-memory vector/metadata/graph ' \
|
|
652
|
+
'stores from the latest dumps. Durable backends (pgvector, Qdrant) are auto-refreshed ' \
|
|
653
|
+
'externally — their counts in the response reflect the read-through state.',
|
|
442
654
|
input_schema: { type: 'object', properties: {} }
|
|
443
655
|
) do |server_context:|
|
|
444
656
|
reader.reload!
|
|
445
657
|
manifest = reader.manifest
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
658
|
+
payload = {
|
|
659
|
+
reloaded: true,
|
|
660
|
+
extracted_at: manifest['extracted_at'],
|
|
661
|
+
total_units: manifest['total_units'],
|
|
662
|
+
counts: manifest['counts']
|
|
663
|
+
}
|
|
664
|
+
if retriever_reloader
|
|
665
|
+
begin
|
|
666
|
+
payload[:retriever] = retriever_reloader.call
|
|
667
|
+
rescue StandardError => e
|
|
668
|
+
payload[:retriever] = { error: "#{e.class}: #{e.message}" }
|
|
669
|
+
end
|
|
670
|
+
end
|
|
671
|
+
respond.call(JSON.pretty_generate(payload))
|
|
452
672
|
end
|
|
453
673
|
end
|
|
454
674
|
|
|
455
|
-
def define_retrieve_tool(server, retriever, respond)
|
|
675
|
+
def define_retrieve_tool(server, retriever, respond, respond_err)
|
|
456
676
|
coerce_int = method(:coerce_integer)
|
|
677
|
+
coerce = method(:coerce_array)
|
|
457
678
|
server.define_tool(
|
|
458
679
|
name: 'codebase_retrieve',
|
|
459
|
-
description: '
|
|
460
|
-
'
|
|
680
|
+
description: 'Semantic search: retrieve relevant code units for a natural-language question. ' \
|
|
681
|
+
'Example: codebase_retrieve("how does billing work?") returns ranked source context. ' \
|
|
682
|
+
'Returns a token-budgeted context string ready to paste into a prompt. ' \
|
|
683
|
+
'Use `search` for exact name/pattern matching; use this for conceptual questions. ' \
|
|
684
|
+
'Requires an embedding provider — disabled if OPENAI_API_KEY is unset and Ollama is unreachable. ' \
|
|
685
|
+
'By default excludes test_mappings (~33% of a typical index) so spec filenames do not ' \
|
|
686
|
+
'dominate semantic rank; pass types: ["test_mapping"] to opt back in. ' \
|
|
687
|
+
'Parameter: use `budget` for the token budget (not `limit` — that means result count ' \
|
|
688
|
+
'on sibling tools, and mapping it here would silently produce a near-empty response).',
|
|
461
689
|
input_schema: {
|
|
462
690
|
properties: {
|
|
463
691
|
query: { type: 'string',
|
|
464
|
-
description: 'Natural language
|
|
465
|
-
budget: { type: 'integer',
|
|
692
|
+
description: 'Natural language question (e.g. "How does user authentication work?")' },
|
|
693
|
+
budget: { type: 'integer',
|
|
694
|
+
description: 'Token budget for context assembly (default: 8000).' },
|
|
695
|
+
types: {
|
|
696
|
+
type: 'array', items: { type: 'string' },
|
|
697
|
+
description: 'Restrict results to these unit types (model, controller, service, job, mailer, ' \
|
|
698
|
+
'rails_source, test_mapping, etc.). Overrides the default test_mapping exclusion. ' \
|
|
699
|
+
'When the unfiltered top-K has no candidate of a requested type, the retriever ' \
|
|
700
|
+
'falls back to rank-within-type so the response is populated whenever units of ' \
|
|
701
|
+
'the requested type exist in the index. The response appends a "Type rank ' \
|
|
702
|
+
'context" table with per-type: source, rank in unfiltered top-K, global_k, ' \
|
|
703
|
+
'total_of_type. Read source to tell the cases apart: in_top_k (strong match), ' \
|
|
704
|
+
'within_type_fallback (weak match surfaced by the fallback), outside_top_k ' \
|
|
705
|
+
'(index has this type but other requested types filled the result), absent ' \
|
|
706
|
+
'(zero units of this type in the index).'
|
|
707
|
+
},
|
|
708
|
+
exclude_types: {
|
|
709
|
+
type: 'array', items: { type: 'string' },
|
|
710
|
+
description: 'Additional types to exclude on top of the default test_mapping exclusion.'
|
|
711
|
+
}
|
|
466
712
|
},
|
|
467
713
|
required: ['query']
|
|
468
714
|
}
|
|
469
|
-
) do |query:, server_context:, budget: nil|
|
|
715
|
+
) do |query:, server_context:, budget: nil, limit: nil, types: nil, exclude_types: nil|
|
|
716
|
+
# `limit` isn't declared in the schema but clients still send it
|
|
717
|
+
# because sibling tools (search, recent_changes, pagerank) use
|
|
718
|
+
# `limit` as a result count. Mapping it to `budget` here would
|
|
719
|
+
# silently produce a near-empty response (limit: 10 → 10-token
|
|
720
|
+
# budget). Surface a helpful typed error instead.
|
|
721
|
+
unless limit.nil?
|
|
722
|
+
next respond_err.call(
|
|
723
|
+
'codebase_retrieve uses `budget` (token budget, default 8000), not `limit`. ' \
|
|
724
|
+
'`limit` is the result-count parameter on sibling tools (search, recent_changes, pagerank). ' \
|
|
725
|
+
"Pass `budget: #{coerce_int.call(limit)}` if you meant a #{coerce_int.call(limit)}-token context, " \
|
|
726
|
+
'or drop the kwarg entirely for the default 8000.',
|
|
727
|
+
code: :unsupported_argument,
|
|
728
|
+
tool: 'codebase_retrieve',
|
|
729
|
+
argument: 'limit',
|
|
730
|
+
hint: 'Use `budget:` for tokens. Retrieval does not cap by result count — the token budget ' \
|
|
731
|
+
'governs how many ranked units fit in the returned context.'
|
|
732
|
+
)
|
|
733
|
+
end
|
|
734
|
+
|
|
470
735
|
budget = coerce_int.call(budget)
|
|
736
|
+
types = coerce.call(types)
|
|
737
|
+
exclude_types = coerce.call(exclude_types)
|
|
471
738
|
if retriever
|
|
472
|
-
result = retriever.retrieve(
|
|
739
|
+
result = retriever.retrieve(
|
|
740
|
+
query,
|
|
741
|
+
budget: budget || 8000,
|
|
742
|
+
types: types,
|
|
743
|
+
exclude_types: exclude_types
|
|
744
|
+
)
|
|
473
745
|
respond.call(result.context)
|
|
474
746
|
else
|
|
475
|
-
|
|
476
|
-
'Semantic search is
|
|
477
|
-
'
|
|
747
|
+
respond_err.call(
|
|
748
|
+
'Semantic search is disabled — no embedding provider is configured. ' \
|
|
749
|
+
'To enable: set OPENAI_API_KEY, or run Ollama locally ' \
|
|
750
|
+
'(brew install ollama && ollama serve && ollama pull nomic-embed-text). ' \
|
|
751
|
+
'Use the `search` tool for pattern-based matching in the meantime.',
|
|
752
|
+
code: :not_configured,
|
|
753
|
+
config_key: 'embedding_provider',
|
|
754
|
+
doc_link: 'docs/RETRIEVAL_GUIDE.md#configuring-retrieval',
|
|
755
|
+
tool: 'codebase_retrieve'
|
|
478
756
|
)
|
|
479
757
|
end
|
|
480
758
|
end
|
|
481
759
|
end
|
|
482
760
|
|
|
483
|
-
def define_trace_flow_tool(server, reader, index_dir, respond, renderer)
|
|
761
|
+
def define_trace_flow_tool(server, reader, index_dir, respond, respond_err, renderer)
|
|
484
762
|
require_relative '../flow_assembler'
|
|
763
|
+
require_relative '../flow_document'
|
|
485
764
|
require_relative '../dependency_graph'
|
|
486
765
|
coerce_int = method(:coerce_integer)
|
|
766
|
+
load_precomputed = method(:load_precomputed_flow)
|
|
487
767
|
|
|
488
768
|
server.define_tool(
|
|
489
769
|
name: 'trace_flow',
|
|
@@ -503,21 +783,33 @@ module Woods
|
|
|
503
783
|
}
|
|
504
784
|
) do |entry_point:, server_context:, depth: nil|
|
|
505
785
|
max_depth = coerce_int.call(depth) || 3
|
|
506
|
-
graph = reader.dependency_graph
|
|
507
786
|
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
flow_doc =
|
|
787
|
+
# Prefer the precomputed flow JSON written by FlowPrecomputer during
|
|
788
|
+
# extraction (gated on `config.precompute_flows`) — it avoids
|
|
789
|
+
# re-parsing source on every request. Fall back to query-time
|
|
790
|
+
# reassembly when no precomputed document exists.
|
|
791
|
+
flow_doc = load_precomputed.call(index_dir, entry_point)
|
|
792
|
+
flow_doc ||= begin
|
|
793
|
+
graph = reader.dependency_graph
|
|
794
|
+
assembler = Woods::FlowAssembler.new(graph: graph, extracted_dir: index_dir)
|
|
795
|
+
assembler.assemble(entry_point, max_depth: max_depth)
|
|
796
|
+
end
|
|
513
797
|
|
|
514
798
|
respond.call(renderer.render(:trace_flow, flow_doc.to_h))
|
|
515
799
|
rescue StandardError => e
|
|
516
|
-
|
|
800
|
+
# Emit an MCP error so clients can detect the failure and
|
|
801
|
+
# surface it, rather than wrapping the error payload in a
|
|
802
|
+
# successful response — consistent with session_trace and
|
|
803
|
+
# codebase_retrieve.
|
|
804
|
+
respond_err.call(
|
|
805
|
+
"trace_flow failed: #{e.message}",
|
|
806
|
+
code: :internal_error,
|
|
807
|
+
data: { entry_point: entry_point, exception: e.class.name }
|
|
808
|
+
)
|
|
517
809
|
end
|
|
518
810
|
end
|
|
519
811
|
|
|
520
|
-
def define_session_trace_tool(server, reader, respond)
|
|
812
|
+
def define_session_trace_tool(server, reader, respond, respond_err)
|
|
521
813
|
coerce_int = method(:coerce_integer)
|
|
522
814
|
server.define_tool(
|
|
523
815
|
name: 'session_trace',
|
|
@@ -534,7 +826,16 @@ module Woods
|
|
|
534
826
|
budget = coerce_int.call(budget)
|
|
535
827
|
depth = coerce_int.call(depth)
|
|
536
828
|
store = Woods.configuration.session_store
|
|
537
|
-
|
|
829
|
+
unless store
|
|
830
|
+
next respond_err.call(
|
|
831
|
+
'Session tracer is not configured. Assign `session_store` (FileStore, RedisStore, or SolidCacheStore) ' \
|
|
832
|
+
'and set `session_tracer_enabled = true` in Woods.configure.',
|
|
833
|
+
code: :not_configured,
|
|
834
|
+
config_key: 'session_store',
|
|
835
|
+
doc_link: 'docs/SESSION_TRACER.md',
|
|
836
|
+
tool: 'session_trace'
|
|
837
|
+
)
|
|
838
|
+
end
|
|
538
839
|
|
|
539
840
|
require_relative '../session_tracer/session_flow_assembler'
|
|
540
841
|
|
|
@@ -544,26 +845,31 @@ module Woods
|
|
|
544
845
|
doc = assembler.assemble(session_id, budget: budget || 8000, depth: depth || 1)
|
|
545
846
|
respond.call(doc.to_markdown)
|
|
546
847
|
rescue StandardError => e
|
|
547
|
-
|
|
848
|
+
respond_err.call(
|
|
849
|
+
"Session trace failed: #{e.message}",
|
|
850
|
+
code: :internal_error,
|
|
851
|
+
tool: 'session_trace',
|
|
852
|
+
session_id: session_id
|
|
853
|
+
)
|
|
548
854
|
end
|
|
549
855
|
end
|
|
550
856
|
|
|
551
|
-
def define_operator_tools(server, operator, respond)
|
|
552
|
-
define_pipeline_extract_tool(server, operator, respond)
|
|
553
|
-
define_pipeline_embed_tool(server, operator, respond)
|
|
554
|
-
define_pipeline_status_tool(server, operator, respond)
|
|
555
|
-
define_pipeline_diagnose_tool(server, operator, respond)
|
|
556
|
-
define_pipeline_repair_tool(server, operator, respond)
|
|
857
|
+
def define_operator_tools(server, operator, respond, respond_err, op_missing)
|
|
858
|
+
define_pipeline_extract_tool(server, operator, respond, respond_err, op_missing)
|
|
859
|
+
define_pipeline_embed_tool(server, operator, respond, respond_err, op_missing)
|
|
860
|
+
define_pipeline_status_tool(server, operator, respond, respond_err, op_missing)
|
|
861
|
+
define_pipeline_diagnose_tool(server, operator, respond, respond_err, op_missing)
|
|
862
|
+
define_pipeline_repair_tool(server, operator, respond, respond_err, op_missing)
|
|
557
863
|
end
|
|
558
864
|
|
|
559
|
-
def define_feedback_tools(server, feedback_store, respond)
|
|
560
|
-
define_retrieval_rate_tool(server, feedback_store, respond)
|
|
561
|
-
define_retrieval_report_gap_tool(server, feedback_store, respond)
|
|
562
|
-
define_retrieval_explain_tool(server, feedback_store, respond)
|
|
563
|
-
define_retrieval_suggest_tool(server, feedback_store, respond)
|
|
865
|
+
def define_feedback_tools(server, feedback_store, respond, _respond_err, fb_missing)
|
|
866
|
+
define_retrieval_rate_tool(server, feedback_store, respond, fb_missing)
|
|
867
|
+
define_retrieval_report_gap_tool(server, feedback_store, respond, fb_missing)
|
|
868
|
+
define_retrieval_explain_tool(server, feedback_store, respond, fb_missing)
|
|
869
|
+
define_retrieval_suggest_tool(server, feedback_store, respond, fb_missing)
|
|
564
870
|
end
|
|
565
871
|
|
|
566
|
-
def define_pipeline_extract_tool(server, operator, respond)
|
|
872
|
+
def define_pipeline_extract_tool(server, operator, respond, respond_err, op_missing)
|
|
567
873
|
server.define_tool(
|
|
568
874
|
name: 'pipeline_extract',
|
|
569
875
|
description: 'Trigger a codebase extraction pipeline run. Checks rate limits before proceeding.',
|
|
@@ -573,11 +879,31 @@ module Woods
|
|
|
573
879
|
}
|
|
574
880
|
}
|
|
575
881
|
) do |server_context:, incremental: nil|
|
|
576
|
-
next
|
|
882
|
+
next op_missing.call('pipeline_extract') unless operator
|
|
577
883
|
|
|
578
884
|
guard = operator[:pipeline_guard]
|
|
579
|
-
|
|
885
|
+
if guard && !guard.allow?(:extraction)
|
|
886
|
+
next respond_err.call(
|
|
887
|
+
'Extraction is rate-limited. Try again later.',
|
|
888
|
+
code: :rate_limited,
|
|
889
|
+
tool: 'pipeline_extract',
|
|
890
|
+
retry_after_seconds: 300
|
|
891
|
+
)
|
|
892
|
+
end
|
|
580
893
|
|
|
894
|
+
# Acquire the in-process lock BEFORE recording to the guard.
|
|
895
|
+
# Otherwise a refused "already running" request still resets
|
|
896
|
+
# the cooldown clock and blocks the next legitimate attempt
|
|
897
|
+
# for the full 5-minute window once the current run finishes.
|
|
898
|
+
unless Woods::MCP::Server.send(:pipeline_start, :extraction)
|
|
899
|
+
next respond_err.call(
|
|
900
|
+
'Extraction pipeline is already running. Wait for it to complete.',
|
|
901
|
+
code: :already_running,
|
|
902
|
+
tool: 'pipeline_extract'
|
|
903
|
+
)
|
|
904
|
+
end
|
|
905
|
+
|
|
906
|
+
# Lock acquired — now it's safe to record the run.
|
|
581
907
|
guard&.record!(:extraction)
|
|
582
908
|
|
|
583
909
|
Thread.new do
|
|
@@ -588,6 +914,8 @@ module Woods
|
|
|
588
914
|
rescue StandardError => e
|
|
589
915
|
logger = defined?(Rails) ? Rails.logger : Logger.new($stderr)
|
|
590
916
|
logger.error("[Woods] Pipeline extract failed: #{e.message}")
|
|
917
|
+
ensure
|
|
918
|
+
Woods::MCP::Server.send(:pipeline_finish, :extraction)
|
|
591
919
|
end
|
|
592
920
|
|
|
593
921
|
respond.call(JSON.pretty_generate({
|
|
@@ -597,7 +925,7 @@ module Woods
|
|
|
597
925
|
end
|
|
598
926
|
end
|
|
599
927
|
|
|
600
|
-
def define_pipeline_embed_tool(server, operator, respond)
|
|
928
|
+
def define_pipeline_embed_tool(server, operator, respond, respond_err, op_missing)
|
|
601
929
|
server.define_tool(
|
|
602
930
|
name: 'pipeline_embed',
|
|
603
931
|
description: 'Trigger embedding generation for extracted units. Checks rate limits before proceeding.',
|
|
@@ -607,29 +935,43 @@ module Woods
|
|
|
607
935
|
}
|
|
608
936
|
}
|
|
609
937
|
) do |server_context:, incremental: nil|
|
|
610
|
-
next
|
|
938
|
+
next op_missing.call('pipeline_embed') unless operator
|
|
611
939
|
|
|
612
940
|
guard = operator[:pipeline_guard]
|
|
613
|
-
|
|
941
|
+
if guard && !guard.allow?(:embedding)
|
|
942
|
+
next respond_err.call(
|
|
943
|
+
'Embedding is rate-limited. Try again later.',
|
|
944
|
+
code: :rate_limited,
|
|
945
|
+
tool: 'pipeline_embed',
|
|
946
|
+
retry_after_seconds: 300
|
|
947
|
+
)
|
|
948
|
+
end
|
|
949
|
+
|
|
950
|
+
# Acquire the in-process lock first so a refused "already
|
|
951
|
+
# running" request doesn't burn the cooldown clock.
|
|
952
|
+
unless Woods::MCP::Server.send(:pipeline_start, :embedding)
|
|
953
|
+
next respond_err.call(
|
|
954
|
+
'Embedding pipeline is already running. Wait for it to complete.',
|
|
955
|
+
code: :already_running,
|
|
956
|
+
tool: 'pipeline_embed'
|
|
957
|
+
)
|
|
958
|
+
end
|
|
614
959
|
|
|
615
960
|
guard&.record!(:embedding)
|
|
616
961
|
|
|
617
962
|
Thread.new do
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
indexer = Woods::
|
|
624
|
-
provider: provider,
|
|
625
|
-
text_preparer: text_preparer,
|
|
626
|
-
vector_store: vector_store,
|
|
627
|
-
output_dir: config.output_dir
|
|
628
|
-
)
|
|
963
|
+
# Share the rake-task wiring so the MCP path picks up the
|
|
964
|
+
# provider-tuned TextPreparer + token-aware chunker. Without
|
|
965
|
+
# this, MCP-triggered embedding still hit Ollama's "input
|
|
966
|
+
# length exceeds context length" error after the rake path
|
|
967
|
+
# was fixed in PR #70.
|
|
968
|
+
indexer = Woods::Tasks.build_embed_indexer
|
|
629
969
|
incremental ? indexer.index_incremental : indexer.index_all
|
|
630
970
|
rescue StandardError => e
|
|
631
971
|
logger = defined?(Rails) ? Rails.logger : Logger.new($stderr)
|
|
632
972
|
logger.error("[Woods] Pipeline embed failed: #{e.message}")
|
|
973
|
+
ensure
|
|
974
|
+
Woods::MCP::Server.send(:pipeline_finish, :embedding)
|
|
633
975
|
end
|
|
634
976
|
|
|
635
977
|
respond.call(JSON.pretty_generate({
|
|
@@ -639,23 +981,50 @@ module Woods
|
|
|
639
981
|
end
|
|
640
982
|
end
|
|
641
983
|
|
|
642
|
-
|
|
984
|
+
# Acquire a pipeline-kind lock atomically. Returns false when
|
|
985
|
+
# another thread is already running that kind of pipeline (so the
|
|
986
|
+
# caller can refuse the new request instead of racing the running
|
|
987
|
+
# pipeline). Module-level state — a single MCP server process
|
|
988
|
+
# serializes its own pipelines.
|
|
989
|
+
def pipeline_start(kind)
|
|
990
|
+
@pipeline_mutex ||= Mutex.new
|
|
991
|
+
@pipeline_in_flight ||= {}
|
|
992
|
+
@pipeline_mutex.synchronize do
|
|
993
|
+
return false if @pipeline_in_flight[kind]
|
|
994
|
+
|
|
995
|
+
@pipeline_in_flight[kind] = true
|
|
996
|
+
true
|
|
997
|
+
end
|
|
998
|
+
end
|
|
999
|
+
|
|
1000
|
+
def pipeline_finish(kind)
|
|
1001
|
+
@pipeline_mutex&.synchronize { @pipeline_in_flight&.delete(kind) }
|
|
1002
|
+
end
|
|
1003
|
+
|
|
1004
|
+
def define_pipeline_status_tool(server, operator, respond, respond_err, op_missing)
|
|
643
1005
|
server.define_tool(
|
|
644
1006
|
name: 'pipeline_status',
|
|
645
1007
|
description: 'Get the current pipeline status: last extraction time, unit counts, staleness.',
|
|
646
1008
|
input_schema: { type: 'object', properties: {} }
|
|
647
1009
|
) do |server_context:|
|
|
648
|
-
next
|
|
1010
|
+
next op_missing.call('pipeline_status') unless operator
|
|
649
1011
|
|
|
650
1012
|
reporter = operator[:status_reporter]
|
|
651
|
-
|
|
1013
|
+
unless reporter
|
|
1014
|
+
next respond_err.call(
|
|
1015
|
+
'Status reporter is not configured.',
|
|
1016
|
+
code: :not_configured,
|
|
1017
|
+
config_key: 'operator.status_reporter',
|
|
1018
|
+
tool: 'pipeline_status'
|
|
1019
|
+
)
|
|
1020
|
+
end
|
|
652
1021
|
|
|
653
1022
|
status = reporter.report
|
|
654
1023
|
respond.call(JSON.pretty_generate(status))
|
|
655
1024
|
end
|
|
656
1025
|
end
|
|
657
1026
|
|
|
658
|
-
def define_pipeline_diagnose_tool(server, operator, respond)
|
|
1027
|
+
def define_pipeline_diagnose_tool(server, operator, respond, respond_err, op_missing)
|
|
659
1028
|
server.define_tool(
|
|
660
1029
|
name: 'pipeline_diagnose',
|
|
661
1030
|
description: 'Classify a recent pipeline error and suggest remediation.',
|
|
@@ -667,10 +1036,17 @@ module Woods
|
|
|
667
1036
|
required: %w[error_class error_message]
|
|
668
1037
|
}
|
|
669
1038
|
) do |error_class:, error_message:, server_context:|
|
|
670
|
-
next
|
|
1039
|
+
next op_missing.call('pipeline_diagnose') unless operator
|
|
671
1040
|
|
|
672
1041
|
escalator = operator[:error_escalator]
|
|
673
|
-
|
|
1042
|
+
unless escalator
|
|
1043
|
+
next respond_err.call(
|
|
1044
|
+
'Error escalator is not configured.',
|
|
1045
|
+
code: :not_configured,
|
|
1046
|
+
config_key: 'operator.error_escalator',
|
|
1047
|
+
tool: 'pipeline_diagnose'
|
|
1048
|
+
)
|
|
1049
|
+
end
|
|
674
1050
|
|
|
675
1051
|
error = StandardError.new(error_message)
|
|
676
1052
|
# Set the class name in the error string for pattern matching
|
|
@@ -680,7 +1056,7 @@ module Woods
|
|
|
680
1056
|
end
|
|
681
1057
|
end
|
|
682
1058
|
|
|
683
|
-
def define_pipeline_repair_tool(server, operator, respond)
|
|
1059
|
+
def define_pipeline_repair_tool(server, operator, respond, respond_err, op_missing)
|
|
684
1060
|
server.define_tool(
|
|
685
1061
|
name: 'pipeline_repair',
|
|
686
1062
|
description: 'Attempt to repair pipeline state: clear stale locks, reset rate limits.',
|
|
@@ -695,7 +1071,7 @@ module Woods
|
|
|
695
1071
|
required: ['action']
|
|
696
1072
|
}
|
|
697
1073
|
) do |action:, server_context:|
|
|
698
|
-
next
|
|
1074
|
+
next op_missing.call('pipeline_repair') unless operator
|
|
699
1075
|
|
|
700
1076
|
case action
|
|
701
1077
|
when 'clear_locks'
|
|
@@ -704,17 +1080,29 @@ module Woods
|
|
|
704
1080
|
lock.release
|
|
705
1081
|
respond.call(JSON.pretty_generate({ repaired: true, action: 'clear_locks' }))
|
|
706
1082
|
else
|
|
707
|
-
|
|
1083
|
+
respond_err.call(
|
|
1084
|
+
'Pipeline lock is not configured.',
|
|
1085
|
+
code: :not_configured,
|
|
1086
|
+
config_key: 'operator.pipeline_lock',
|
|
1087
|
+
tool: 'pipeline_repair'
|
|
1088
|
+
)
|
|
708
1089
|
end
|
|
709
1090
|
when 'reset_cooldowns'
|
|
710
1091
|
respond.call(JSON.pretty_generate({ repaired: true, action: 'reset_cooldowns' }))
|
|
711
1092
|
else
|
|
712
|
-
|
|
1093
|
+
respond_err.call(
|
|
1094
|
+
"Unknown repair action: #{action}",
|
|
1095
|
+
code: :unsupported_argument,
|
|
1096
|
+
tool: 'pipeline_repair',
|
|
1097
|
+
argument: 'action',
|
|
1098
|
+
value: action,
|
|
1099
|
+
allowed: %w[clear_locks reset_cooldowns]
|
|
1100
|
+
)
|
|
713
1101
|
end
|
|
714
1102
|
end
|
|
715
1103
|
end
|
|
716
1104
|
|
|
717
|
-
def define_retrieval_rate_tool(server, feedback_store, respond)
|
|
1105
|
+
def define_retrieval_rate_tool(server, feedback_store, respond, fb_missing)
|
|
718
1106
|
coerce_int = method(:coerce_integer)
|
|
719
1107
|
server.define_tool(
|
|
720
1108
|
name: 'retrieval_rate',
|
|
@@ -728,7 +1116,7 @@ module Woods
|
|
|
728
1116
|
required: %w[query score]
|
|
729
1117
|
}
|
|
730
1118
|
) do |query:, score:, server_context:, comment: nil|
|
|
731
|
-
next
|
|
1119
|
+
next fb_missing.call('retrieval_rate') unless feedback_store
|
|
732
1120
|
|
|
733
1121
|
score = coerce_int.call(score)
|
|
734
1122
|
feedback_store.record_rating(query: query, score: score, comment: comment)
|
|
@@ -736,7 +1124,7 @@ module Woods
|
|
|
736
1124
|
end
|
|
737
1125
|
end
|
|
738
1126
|
|
|
739
|
-
def define_retrieval_report_gap_tool(server, feedback_store, respond)
|
|
1127
|
+
def define_retrieval_report_gap_tool(server, feedback_store, respond, fb_missing)
|
|
740
1128
|
server.define_tool(
|
|
741
1129
|
name: 'retrieval_report_gap',
|
|
742
1130
|
description: 'Report a missing unit that should have appeared in retrieval results.',
|
|
@@ -749,7 +1137,7 @@ module Woods
|
|
|
749
1137
|
required: %w[query missing_unit unit_type]
|
|
750
1138
|
}
|
|
751
1139
|
) do |query:, missing_unit:, unit_type:, server_context:|
|
|
752
|
-
next
|
|
1140
|
+
next fb_missing.call('retrieval_report_gap') unless feedback_store
|
|
753
1141
|
|
|
754
1142
|
feedback_store.record_gap(query: query, missing_unit: missing_unit, unit_type: unit_type)
|
|
755
1143
|
respond.call(JSON.pretty_generate({
|
|
@@ -760,13 +1148,13 @@ module Woods
|
|
|
760
1148
|
end
|
|
761
1149
|
end
|
|
762
1150
|
|
|
763
|
-
def define_retrieval_explain_tool(server, feedback_store, respond)
|
|
1151
|
+
def define_retrieval_explain_tool(server, feedback_store, respond, fb_missing)
|
|
764
1152
|
server.define_tool(
|
|
765
1153
|
name: 'retrieval_explain',
|
|
766
1154
|
description: 'Get feedback statistics: average score, total ratings, gap count.',
|
|
767
1155
|
input_schema: { type: 'object', properties: {} }
|
|
768
1156
|
) do |server_context:|
|
|
769
|
-
next
|
|
1157
|
+
next fb_missing.call('retrieval_explain') unless feedback_store
|
|
770
1158
|
|
|
771
1159
|
ratings = feedback_store.ratings
|
|
772
1160
|
gaps = feedback_store.gaps
|
|
@@ -780,13 +1168,13 @@ module Woods
|
|
|
780
1168
|
end
|
|
781
1169
|
end
|
|
782
1170
|
|
|
783
|
-
def define_retrieval_suggest_tool(server, feedback_store, respond)
|
|
1171
|
+
def define_retrieval_suggest_tool(server, feedback_store, respond, fb_missing)
|
|
784
1172
|
server.define_tool(
|
|
785
1173
|
name: 'retrieval_suggest',
|
|
786
1174
|
description: 'Analyze feedback to suggest improvements: detect patterns in low scores and missing units.',
|
|
787
1175
|
input_schema: { type: 'object', properties: {} }
|
|
788
1176
|
) do |server_context:|
|
|
789
|
-
next
|
|
1177
|
+
next fb_missing.call('retrieval_suggest') unless feedback_store
|
|
790
1178
|
|
|
791
1179
|
require_relative '../feedback/gap_detector'
|
|
792
1180
|
detector = Woods::Feedback::GapDetector.new(feedback_store: feedback_store)
|
|
@@ -798,14 +1186,14 @@ module Woods
|
|
|
798
1186
|
end
|
|
799
1187
|
end
|
|
800
1188
|
|
|
801
|
-
def define_snapshot_tools(server, snapshot_store, respond)
|
|
802
|
-
define_list_snapshots_tool(server, snapshot_store, respond)
|
|
803
|
-
define_snapshot_diff_tool(server, snapshot_store, respond)
|
|
804
|
-
define_unit_history_tool(server, snapshot_store, respond)
|
|
805
|
-
define_snapshot_detail_tool(server, snapshot_store, respond)
|
|
1189
|
+
def define_snapshot_tools(server, snapshot_store, respond, respond_err, snap_missing)
|
|
1190
|
+
define_list_snapshots_tool(server, snapshot_store, respond, snap_missing)
|
|
1191
|
+
define_snapshot_diff_tool(server, snapshot_store, respond, snap_missing)
|
|
1192
|
+
define_unit_history_tool(server, snapshot_store, respond, snap_missing)
|
|
1193
|
+
define_snapshot_detail_tool(server, snapshot_store, respond, respond_err, snap_missing)
|
|
806
1194
|
end
|
|
807
1195
|
|
|
808
|
-
def define_list_snapshots_tool(server, snapshot_store, respond)
|
|
1196
|
+
def define_list_snapshots_tool(server, snapshot_store, respond, snap_missing)
|
|
809
1197
|
coerce_int = method(:coerce_integer)
|
|
810
1198
|
server.define_tool(
|
|
811
1199
|
name: 'list_snapshots',
|
|
@@ -817,7 +1205,7 @@ module Woods
|
|
|
817
1205
|
}
|
|
818
1206
|
}
|
|
819
1207
|
) do |server_context:, limit: nil, branch: nil|
|
|
820
|
-
next
|
|
1208
|
+
next snap_missing.call('list_snapshots') unless snapshot_store
|
|
821
1209
|
|
|
822
1210
|
limit = coerce_int.call(limit)
|
|
823
1211
|
results = snapshot_store.list(limit: limit || 20, branch: branch)
|
|
@@ -825,7 +1213,7 @@ module Woods
|
|
|
825
1213
|
end
|
|
826
1214
|
end
|
|
827
1215
|
|
|
828
|
-
def define_snapshot_diff_tool(server, snapshot_store, respond)
|
|
1216
|
+
def define_snapshot_diff_tool(server, snapshot_store, respond, snap_missing)
|
|
829
1217
|
server.define_tool(
|
|
830
1218
|
name: 'snapshot_diff',
|
|
831
1219
|
description: 'Compare two extraction snapshots by git SHA. Returns lists of added, modified, and deleted units.',
|
|
@@ -837,7 +1225,7 @@ module Woods
|
|
|
837
1225
|
required: %w[sha_a sha_b]
|
|
838
1226
|
}
|
|
839
1227
|
) do |sha_a:, sha_b:, server_context:|
|
|
840
|
-
next
|
|
1228
|
+
next snap_missing.call('snapshot_diff') unless snapshot_store
|
|
841
1229
|
|
|
842
1230
|
result = snapshot_store.diff(sha_a, sha_b)
|
|
843
1231
|
respond.call(JSON.pretty_generate({
|
|
@@ -850,7 +1238,7 @@ module Woods
|
|
|
850
1238
|
end
|
|
851
1239
|
end
|
|
852
1240
|
|
|
853
|
-
def define_unit_history_tool(server, snapshot_store, respond)
|
|
1241
|
+
def define_unit_history_tool(server, snapshot_store, respond, snap_missing)
|
|
854
1242
|
coerce_int = method(:coerce_integer)
|
|
855
1243
|
server.define_tool(
|
|
856
1244
|
name: 'unit_history',
|
|
@@ -863,7 +1251,7 @@ module Woods
|
|
|
863
1251
|
required: ['identifier']
|
|
864
1252
|
}
|
|
865
1253
|
) do |identifier:, server_context:, limit: nil|
|
|
866
|
-
next
|
|
1254
|
+
next snap_missing.call('unit_history') unless snapshot_store
|
|
867
1255
|
|
|
868
1256
|
limit = coerce_int.call(limit)
|
|
869
1257
|
entries = snapshot_store.unit_history(identifier, limit: limit || 20)
|
|
@@ -875,7 +1263,7 @@ module Woods
|
|
|
875
1263
|
end
|
|
876
1264
|
end
|
|
877
1265
|
|
|
878
|
-
def define_snapshot_detail_tool(server, snapshot_store, respond)
|
|
1266
|
+
def define_snapshot_detail_tool(server, snapshot_store, respond, respond_err, snap_missing)
|
|
879
1267
|
server.define_tool(
|
|
880
1268
|
name: 'snapshot_detail',
|
|
881
1269
|
description: 'Get full metadata for a specific extraction snapshot by git SHA.',
|
|
@@ -886,18 +1274,24 @@ module Woods
|
|
|
886
1274
|
required: ['git_sha']
|
|
887
1275
|
}
|
|
888
1276
|
) do |git_sha:, server_context:|
|
|
889
|
-
next
|
|
1277
|
+
next snap_missing.call('snapshot_detail') unless snapshot_store
|
|
890
1278
|
|
|
891
1279
|
snapshot = snapshot_store.find(git_sha)
|
|
892
1280
|
if snapshot
|
|
893
1281
|
respond.call(JSON.pretty_generate(snapshot))
|
|
894
1282
|
else
|
|
895
|
-
|
|
1283
|
+
respond_err.call(
|
|
1284
|
+
"Snapshot not found for git SHA: #{git_sha}",
|
|
1285
|
+
code: :not_found,
|
|
1286
|
+
tool: 'snapshot_detail',
|
|
1287
|
+
git_sha: git_sha,
|
|
1288
|
+
hint: 'Use `list_snapshots` to see available SHAs.'
|
|
1289
|
+
)
|
|
896
1290
|
end
|
|
897
1291
|
end
|
|
898
1292
|
end
|
|
899
1293
|
|
|
900
|
-
def define_notion_sync_tool(server, reader, index_dir, respond)
|
|
1294
|
+
def define_notion_sync_tool(server, reader, index_dir, respond, respond_err)
|
|
901
1295
|
server.define_tool(
|
|
902
1296
|
name: 'notion_sync',
|
|
903
1297
|
description: 'Sync extracted codebase data (Data Models + Columns) to Notion databases. ' \
|
|
@@ -909,11 +1303,23 @@ module Woods
|
|
|
909
1303
|
) do |server_context:|
|
|
910
1304
|
config = Woods.configuration
|
|
911
1305
|
unless config.notion_api_token
|
|
912
|
-
next
|
|
1306
|
+
next respond_err.call(
|
|
1307
|
+
'notion_api_token is not configured. Set it in Woods.configure or via the NOTION_API_TOKEN env var.',
|
|
1308
|
+
code: :not_configured,
|
|
1309
|
+
config_key: 'notion_api_token',
|
|
1310
|
+
doc_link: 'docs/NOTION_EXPORT.md',
|
|
1311
|
+
tool: 'notion_sync'
|
|
1312
|
+
)
|
|
913
1313
|
end
|
|
914
1314
|
|
|
915
1315
|
if (config.notion_database_ids || {}).empty?
|
|
916
|
-
next
|
|
1316
|
+
next respond_err.call(
|
|
1317
|
+
'notion_database_ids is not configured. Set it in Woods.configure.',
|
|
1318
|
+
code: :not_configured,
|
|
1319
|
+
config_key: 'notion_database_ids',
|
|
1320
|
+
doc_link: 'docs/NOTION_EXPORT.md',
|
|
1321
|
+
tool: 'notion_sync'
|
|
1322
|
+
)
|
|
917
1323
|
end
|
|
918
1324
|
|
|
919
1325
|
require_relative '../notion/exporter'
|
|
@@ -927,7 +1333,11 @@ module Woods
|
|
|
927
1333
|
errors: stats[:errors].first(10)
|
|
928
1334
|
}))
|
|
929
1335
|
rescue StandardError => e
|
|
930
|
-
|
|
1336
|
+
respond_err.call(
|
|
1337
|
+
"Notion sync failed: #{e.message}",
|
|
1338
|
+
code: :api_error,
|
|
1339
|
+
tool: 'notion_sync'
|
|
1340
|
+
)
|
|
931
1341
|
end
|
|
932
1342
|
end
|
|
933
1343
|
|
|
@@ -965,6 +1375,196 @@ module Woods
|
|
|
965
1375
|
]
|
|
966
1376
|
end
|
|
967
1377
|
|
|
1378
|
+
def define_woods_status_tool(server, reader, retriever, index_dir, bootstrap_state, respond)
|
|
1379
|
+
server.define_tool(
|
|
1380
|
+
name: 'woods_status',
|
|
1381
|
+
description: 'Diagnose whether the Woods index and server are healthy. Returns extraction metadata ' \
|
|
1382
|
+
'(last run, unit counts, git SHA, staleness in seconds), retriever/embedding configuration, ' \
|
|
1383
|
+
'bootstrap state (hydrated / degraded / failed + reason), feature flags, and a ready flag. ' \
|
|
1384
|
+
'Call this first on cold connect to learn what the server knows.',
|
|
1385
|
+
input_schema: { type: 'object', properties: {} }
|
|
1386
|
+
) do |server_context:|
|
|
1387
|
+
_ = server_context
|
|
1388
|
+
status = Woods::MCP::Server.build_status(
|
|
1389
|
+
reader: reader, retriever: retriever, index_dir: index_dir,
|
|
1390
|
+
bootstrap_state: bootstrap_state
|
|
1391
|
+
)
|
|
1392
|
+
respond.call(JSON.pretty_generate(status))
|
|
1393
|
+
end
|
|
1394
|
+
end
|
|
1395
|
+
|
|
1396
|
+
public
|
|
1397
|
+
|
|
1398
|
+
# Build the woods_status payload. Exposed at module level so specs (and future
|
|
1399
|
+
# console/unified-server entry points) can assemble the same shape without
|
|
1400
|
+
# reaching through the MCP::Server internals.
|
|
1401
|
+
#
|
|
1402
|
+
# +features.embedding_model+ / +features.embedding_provider+ /
|
|
1403
|
+
# +features.vector_store+ prefer the ResolvedConfig captured at embed time
|
|
1404
|
+
# (+bootstrap_state.resolved_config+, which is read back from +woods.json+)
|
|
1405
|
+
# over +Woods.configuration+, whose defaults can contradict the actual
|
|
1406
|
+
# provider in use. Without this, operators debugging "wrong provider" see
|
|
1407
|
+
# status claiming +embedding_model: "text-embedding-3-small"+ next to
|
|
1408
|
+
# +embedding_provider: "ollama"+ and reasonably distrust every field.
|
|
1409
|
+
def build_status(reader:, retriever:, index_dir:, bootstrap_state: nil)
|
|
1410
|
+
manifest = safe_manifest(reader)
|
|
1411
|
+
extracted_at = manifest && manifest['extracted_at']
|
|
1412
|
+
staleness = staleness_seconds(extracted_at)
|
|
1413
|
+
# Tolerate a nil Woods.configuration — specs that reset it between
|
|
1414
|
+
# runs can leave a transient nil window, and build_status should
|
|
1415
|
+
# still produce a readable payload during that window.
|
|
1416
|
+
config = Woods.configuration || Woods::Configuration.new
|
|
1417
|
+
resolved = bootstrap_state&.resolved_config
|
|
1418
|
+
|
|
1419
|
+
{
|
|
1420
|
+
ready: manifest && !manifest['counts'].to_h.empty?,
|
|
1421
|
+
server: {
|
|
1422
|
+
name: 'woods',
|
|
1423
|
+
version: Woods::VERSION,
|
|
1424
|
+
index_dir: index_dir.to_s
|
|
1425
|
+
},
|
|
1426
|
+
index: index_section(manifest, extracted_at, staleness, index_dir),
|
|
1427
|
+
retriever: {
|
|
1428
|
+
configured: !retriever.nil?,
|
|
1429
|
+
class: retriever&.class&.name
|
|
1430
|
+
},
|
|
1431
|
+
bootstrap: bootstrap_state&.to_h,
|
|
1432
|
+
features: features_from(config, resolved)
|
|
1433
|
+
}
|
|
1434
|
+
end
|
|
1435
|
+
|
|
1436
|
+
private
|
|
1437
|
+
|
|
1438
|
+
# Assemble the +index+ sub-hash of woods_status, including a staleness
|
|
1439
|
+
# gate that compares +manifest.git_sha+ against the current HEAD. The
|
|
1440
|
+
# manifest captures +git_sha+ / +gemfile_lock_sha+ / +schema_sha+ at
|
|
1441
|
+
# extraction time; until this change nothing compared them against the
|
|
1442
|
+
# live working tree, so an agent asking questions after 40 uncommitted
|
|
1443
|
+
# changes and an MCP restart silently got pre-change answers.
|
|
1444
|
+
#
|
|
1445
|
+
# +git_sha_matches_head+ is a tri-state:
|
|
1446
|
+
# - true — manifest.git_sha == current HEAD
|
|
1447
|
+
# - false — mismatch (stale)
|
|
1448
|
+
# - nil — couldn't resolve (not a git repo, git unavailable,
|
|
1449
|
+
# or manifest has no git_sha)
|
|
1450
|
+
#
|
|
1451
|
+
# When stale, +head_git_sha+ carries the live HEAD so operators can
|
|
1452
|
+
# diff directly. This is an observability signal, not a hard gate —
|
|
1453
|
+
# hard-refusing responses would be much more disruptive than a loudly-
|
|
1454
|
+
# visible staleness flag that agents can branch on.
|
|
1455
|
+
def index_section(manifest, extracted_at, staleness, index_dir)
|
|
1456
|
+
base = {
|
|
1457
|
+
extracted_at: extracted_at,
|
|
1458
|
+
staleness_seconds: staleness,
|
|
1459
|
+
rails_version: manifest && manifest['rails_version'],
|
|
1460
|
+
ruby_version: manifest && manifest['ruby_version'],
|
|
1461
|
+
total_units: manifest && manifest['total_units'],
|
|
1462
|
+
counts: (manifest && manifest['counts']) || {},
|
|
1463
|
+
git_sha: manifest && manifest['git_sha'],
|
|
1464
|
+
git_branch: manifest && manifest['git_branch'],
|
|
1465
|
+
gemfile_lock_sha: manifest && manifest['gemfile_lock_sha'],
|
|
1466
|
+
schema_sha: manifest && manifest['schema_sha']
|
|
1467
|
+
}
|
|
1468
|
+
|
|
1469
|
+
manifest_sha = manifest && manifest['git_sha']
|
|
1470
|
+
head_sha = manifest_sha ? resolve_head_sha(index_dir) : nil
|
|
1471
|
+
return base unless head_sha
|
|
1472
|
+
|
|
1473
|
+
base[:head_git_sha] = head_sha
|
|
1474
|
+
base[:git_sha_matches_head] = (manifest_sha == head_sha)
|
|
1475
|
+
base
|
|
1476
|
+
end
|
|
1477
|
+
|
|
1478
|
+
# Resolve the current HEAD SHA for the git repo containing +index_dir+.
|
|
1479
|
+
# Returns nil when git is unavailable or +index_dir+ is not in a repo —
|
|
1480
|
+
# callers treat nil as "can't compare" rather than "mismatch".
|
|
1481
|
+
#
|
|
1482
|
+
# Uses +capture2e+ so git's "fatal: not a git repository" stderr banner
|
|
1483
|
+
# does not leak through the MCP stdio transport. MCP clients that parse
|
|
1484
|
+
# stderr for protocol framing can't tolerate stray lines.
|
|
1485
|
+
def resolve_head_sha(index_dir)
|
|
1486
|
+
return nil unless index_dir
|
|
1487
|
+
|
|
1488
|
+
dir = index_dir.to_s
|
|
1489
|
+
return nil unless File.directory?(dir)
|
|
1490
|
+
|
|
1491
|
+
output, status = Open3.capture2e('git', '-C', dir, 'rev-parse', 'HEAD')
|
|
1492
|
+
status.success? ? output.strip : nil
|
|
1493
|
+
rescue Errno::ENOENT, Errno::EACCES
|
|
1494
|
+
# git not installed or not executable on this host — equivalent to
|
|
1495
|
+
# "can't compare". Any other exception is a genuine bug and should
|
|
1496
|
+
# propagate.
|
|
1497
|
+
nil
|
|
1498
|
+
end
|
|
1499
|
+
|
|
1500
|
+
# Assemble the +features+ sub-hash of woods_status, preferring the
|
|
1501
|
+
# ResolvedConfig captured at embed time over live {Woods::Configuration}.
|
|
1502
|
+
#
|
|
1503
|
+
# Fields that read from resolved+config (when present): embedding_model,
|
|
1504
|
+
# embedding_provider, vector_store. Everything else is host-process
|
|
1505
|
+
# state (snapshots_enabled, notion_configured, session_tracer_enabled)
|
|
1506
|
+
# and comes from the running config.
|
|
1507
|
+
#
|
|
1508
|
+
# +console_mcp_enabled+ is intentionally omitted — the index MCP process
|
|
1509
|
+
# has no visibility into the host Rails app's Woods initializer, so
|
|
1510
|
+
# historic status payloads always reported +false+ regardless of the
|
|
1511
|
+
# actual console MCP state. Advertising a misleading field is worse
|
|
1512
|
+
# than not advertising it at all.
|
|
1513
|
+
def features_from(config, resolved)
|
|
1514
|
+
provider_hash = resolved&.embedding_provider || {}
|
|
1515
|
+
resolved_provider = resolved_provider_symbol(provider_hash[:class])
|
|
1516
|
+
resolved_model = provider_hash[:model]
|
|
1517
|
+
resolved_vector = resolved&.stores&.dig(:vector_store)
|
|
1518
|
+
|
|
1519
|
+
{
|
|
1520
|
+
embedding_model: resolved_model || (config.respond_to?(:embedding_model) ? config.embedding_model : nil),
|
|
1521
|
+
embedding_provider: presence(resolved_provider ||
|
|
1522
|
+
(config.respond_to?(:embedding_provider) ? config.embedding_provider : nil)),
|
|
1523
|
+
vector_store: presence(resolved_vector ||
|
|
1524
|
+
(config.respond_to?(:vector_store) ? config.vector_store : nil)),
|
|
1525
|
+
session_tracer_enabled: config.respond_to?(:session_tracer_enabled) ? config.session_tracer_enabled : false,
|
|
1526
|
+
snapshots_enabled: config.respond_to?(:enable_snapshots) ? config.enable_snapshots : false,
|
|
1527
|
+
notion_configured: config.respond_to?(:notion_api_token) && !presence(config.notion_api_token).nil?
|
|
1528
|
+
}
|
|
1529
|
+
end
|
|
1530
|
+
|
|
1531
|
+
# Convert a fully-qualified provider class name (as serialised in
|
|
1532
|
+
# woods.json — e.g. +"Woods::Embedding::Provider::Ollama"+) into the
|
|
1533
|
+
# short symbol form used by +Woods.configuration.embedding_provider+
|
|
1534
|
+
# (+:ollama+, +:openai+). Returns nil when +class_name+ is unknown or
|
|
1535
|
+
# absent so callers fall back to the live config value.
|
|
1536
|
+
def resolved_provider_symbol(class_name)
|
|
1537
|
+
return nil if class_name.nil? || class_name.empty?
|
|
1538
|
+
|
|
1539
|
+
case class_name
|
|
1540
|
+
when /Ollama\z/ then :ollama
|
|
1541
|
+
when /OpenAI\z/ then :openai
|
|
1542
|
+
end
|
|
1543
|
+
end
|
|
1544
|
+
|
|
1545
|
+
# Return a Hash of manifest content, or nil if unreadable.
|
|
1546
|
+
def safe_manifest(reader)
|
|
1547
|
+
reader.manifest
|
|
1548
|
+
rescue StandardError
|
|
1549
|
+
nil
|
|
1550
|
+
end
|
|
1551
|
+
|
|
1552
|
+
# Seconds since extraction. Returns nil if timestamp is missing or unparsable.
|
|
1553
|
+
def staleness_seconds(iso8601)
|
|
1554
|
+
return nil if iso8601.nil? || iso8601.empty?
|
|
1555
|
+
|
|
1556
|
+
(Time.now - Time.parse(iso8601)).to_i
|
|
1557
|
+
rescue ArgumentError
|
|
1558
|
+
nil
|
|
1559
|
+
end
|
|
1560
|
+
|
|
1561
|
+
def presence(value)
|
|
1562
|
+
return nil if value.nil?
|
|
1563
|
+
return nil if value.respond_to?(:empty?) && value.empty?
|
|
1564
|
+
|
|
1565
|
+
value.to_s
|
|
1566
|
+
end
|
|
1567
|
+
|
|
968
1568
|
def register_resource_handler(server, reader)
|
|
969
1569
|
server.resources_read_handler do |params|
|
|
970
1570
|
uri = params[:uri]
|