woods 1.1.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +186 -0
- data/README.md +20 -8
- data/exe/woods-console +51 -6
- data/exe/woods-console-mcp +24 -4
- data/exe/woods-mcp +30 -7
- data/exe/woods-mcp-http +47 -6
- data/lib/generators/woods/install_generator.rb +13 -4
- data/lib/generators/woods/templates/woods.rb.tt +155 -0
- data/lib/tasks/woods.rake +69 -50
- data/lib/woods/builder.rb +174 -9
- data/lib/woods/cache/cache_middleware.rb +360 -31
- data/lib/woods/chunking/semantic_chunker.rb +334 -7
- data/lib/woods/console/adapters/job_adapter.rb +10 -4
- data/lib/woods/console/audit_logger.rb +76 -4
- data/lib/woods/console/bridge.rb +48 -15
- data/lib/woods/console/bridge_protocol.rb +44 -0
- data/lib/woods/console/confirmation.rb +3 -4
- data/lib/woods/console/console_response_renderer.rb +56 -18
- data/lib/woods/console/credential_index.rb +201 -0
- data/lib/woods/console/credential_scanner.rb +302 -0
- data/lib/woods/console/dispatch_pipeline.rb +138 -0
- data/lib/woods/console/embedded_executor.rb +682 -35
- data/lib/woods/console/eval_guard.rb +319 -0
- data/lib/woods/console/model_validator.rb +1 -3
- data/lib/woods/console/rack_middleware.rb +185 -29
- data/lib/woods/console/redactor.rb +161 -0
- data/lib/woods/console/response_context.rb +127 -0
- data/lib/woods/console/safe_context.rb +220 -23
- data/lib/woods/console/scope_predicate_parser.rb +131 -0
- data/lib/woods/console/server.rb +417 -486
- data/lib/woods/console/sql_noise_stripper.rb +87 -0
- data/lib/woods/console/sql_table_scanner.rb +213 -0
- data/lib/woods/console/sql_validator.rb +81 -31
- data/lib/woods/console/table_gate.rb +93 -0
- data/lib/woods/console/tool_specs.rb +552 -0
- data/lib/woods/console/tools/tier1.rb +3 -3
- data/lib/woods/console/tools/tier4.rb +7 -1
- data/lib/woods/dependency_graph.rb +66 -7
- data/lib/woods/embedding/indexer.rb +190 -6
- data/lib/woods/embedding/openai.rb +40 -4
- data/lib/woods/embedding/provider.rb +104 -8
- data/lib/woods/embedding/text_preparer.rb +23 -3
- data/lib/woods/embedding/token_counter.rb +133 -0
- data/lib/woods/evaluation/baseline_runner.rb +20 -2
- data/lib/woods/evaluation/metrics.rb +4 -1
- data/lib/woods/extracted_unit.rb +1 -0
- data/lib/woods/extractor.rb +7 -1
- data/lib/woods/extractors/controller_extractor.rb +6 -0
- data/lib/woods/extractors/mailer_extractor.rb +16 -2
- data/lib/woods/extractors/model_extractor.rb +6 -1
- data/lib/woods/extractors/phlex_extractor.rb +13 -4
- data/lib/woods/extractors/rails_source_extractor.rb +2 -0
- data/lib/woods/extractors/route_helper_resolver.rb +130 -0
- data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
- data/lib/woods/extractors/view_component_extractor.rb +12 -1
- data/lib/woods/extractors/view_engines/base.rb +141 -0
- data/lib/woods/extractors/view_engines/erb.rb +145 -0
- data/lib/woods/extractors/view_template_extractor.rb +92 -133
- data/lib/woods/flow_assembler.rb +23 -15
- data/lib/woods/flow_precomputer.rb +21 -2
- data/lib/woods/graph_analyzer.rb +210 -0
- data/lib/woods/index_artifact.rb +173 -0
- data/lib/woods/mcp/bearer_auth.rb +45 -0
- data/lib/woods/mcp/bootstrap_state.rb +94 -0
- data/lib/woods/mcp/bootstrapper.rb +337 -16
- data/lib/woods/mcp/config_resolver.rb +288 -0
- data/lib/woods/mcp/errors.rb +134 -0
- data/lib/woods/mcp/index_reader.rb +265 -30
- data/lib/woods/mcp/origin_guard.rb +132 -0
- data/lib/woods/mcp/provider_probe.rb +166 -0
- data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
- data/lib/woods/mcp/renderers/markdown_renderer.rb +100 -3
- data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
- data/lib/woods/mcp/server.rb +771 -137
- data/lib/woods/model_name_cache.rb +78 -2
- data/lib/woods/notion/client.rb +25 -2
- data/lib/woods/notion/mappers/model_mapper.rb +36 -2
- data/lib/woods/railtie.rb +55 -15
- data/lib/woods/resilience/circuit_breaker.rb +9 -2
- data/lib/woods/resilience/retryable_provider.rb +40 -3
- data/lib/woods/resolved_config.rb +299 -0
- data/lib/woods/retrieval/context_assembler.rb +112 -5
- data/lib/woods/retrieval/query_classifier.rb +1 -1
- data/lib/woods/retrieval/ranker.rb +55 -6
- data/lib/woods/retrieval/search_executor.rb +42 -13
- data/lib/woods/retriever.rb +330 -24
- data/lib/woods/session_tracer/middleware.rb +35 -1
- data/lib/woods/storage/graph_store.rb +39 -0
- data/lib/woods/storage/inapplicable_backend.rb +14 -0
- data/lib/woods/storage/metadata_store.rb +129 -1
- data/lib/woods/storage/pgvector.rb +70 -8
- data/lib/woods/storage/qdrant.rb +196 -5
- data/lib/woods/storage/snapshotter/metadata.rb +172 -0
- data/lib/woods/storage/snapshotter/vector.rb +238 -0
- data/lib/woods/storage/snapshotter.rb +24 -0
- data/lib/woods/storage/vector_store.rb +184 -35
- data/lib/woods/tasks.rb +85 -0
- data/lib/woods/temporal/snapshot_store.rb +49 -1
- data/lib/woods/token_utils.rb +44 -5
- data/lib/woods/unblocked/client.rb +163 -0
- data/lib/woods/unblocked/document_builder.rb +326 -0
- data/lib/woods/unblocked/exporter.rb +201 -0
- data/lib/woods/unblocked/rate_limiter.rb +94 -0
- data/lib/woods/util/host_guard.rb +61 -0
- data/lib/woods/version.rb +1 -1
- data/lib/woods.rb +130 -6
- metadata +73 -4
data/lib/woods/mcp/server.rb
CHANGED
|
@@ -1,15 +1,22 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'json'
|
|
3
4
|
require 'logger'
|
|
4
5
|
require 'mcp'
|
|
6
|
+
require 'open3'
|
|
7
|
+
require 'time'
|
|
5
8
|
require 'set'
|
|
9
|
+
require_relative '../tasks'
|
|
6
10
|
require_relative 'index_reader'
|
|
7
11
|
require_relative 'tool_response_renderer'
|
|
8
12
|
|
|
9
13
|
module Woods
|
|
10
14
|
module MCP
|
|
11
|
-
# Builds an MCP::Server with
|
|
12
|
-
# Woods extraction output, managing pipelines, and collecting feedback.
|
|
15
|
+
# Builds an MCP::Server with up to 29 tools, 2 resources, and 2 resource templates
|
|
16
|
+
# for querying Woods extraction output, managing pipelines, and collecting feedback.
|
|
17
|
+
# 14 tools are always registered; 15 more register conditionally based on wiring:
|
|
18
|
+
# 5 operator tools, 4 feedback tools, 4 snapshot tools, 1 session_trace tool,
|
|
19
|
+
# 1 Notion sync tool.
|
|
13
20
|
#
|
|
14
21
|
# All tools are defined inline via closures over an IndexReader instance.
|
|
15
22
|
# No Rails required at runtime — reads JSON files from disk.
|
|
@@ -27,10 +34,19 @@ module Woods
|
|
|
27
34
|
# @param retriever [Woods::Retriever, nil] Optional retriever for semantic search
|
|
28
35
|
# @param operator [Hash, nil] Optional operator config with :status_reporter, :error_escalator, :pipeline_guard, :pipeline_lock
|
|
29
36
|
# @param feedback_store [Woods::Feedback::Store, nil] Optional feedback store
|
|
37
|
+
# @param bootstrap_state [Woods::MCP::BootstrapState, nil] Optional state
|
|
38
|
+
# from the bootstrap flow. When provided, woods_status reports the
|
|
39
|
+
# hydrated/degraded/failed lifecycle plus the reason so operators can
|
|
40
|
+
# diagnose "why is semantic search disabled" without reading the Ruby
|
|
41
|
+
# source. Nil just means the caller didn't go through Bootstrapper.
|
|
42
|
+
# @param warmup [Boolean] Pre-populate the index reader's caches during build,
|
|
43
|
+
# shifting first-tool-call latency to startup. Default: true. Pass false for
|
|
44
|
+
# tests or when startup time matters more than first-query latency.
|
|
30
45
|
# @return [MCP::Server] Configured server ready for transport
|
|
31
46
|
def build(index_dir:, retriever: nil, operator: nil, feedback_store: nil, snapshot_store: nil,
|
|
32
|
-
response_format: nil)
|
|
47
|
+
bootstrap_state: nil, response_format: nil, warmup: true, retriever_reloader: nil)
|
|
33
48
|
reader = IndexReader.new(index_dir)
|
|
49
|
+
reader.warmup! if warmup
|
|
34
50
|
config = Woods.configuration
|
|
35
51
|
format = response_format || (config.respond_to?(:context_format) ? config.context_format : nil) || :markdown
|
|
36
52
|
renderer = ToolResponseRenderer.for(format)
|
|
@@ -39,6 +55,31 @@ module Woods
|
|
|
39
55
|
|
|
40
56
|
# Lambda captured by all tool blocks for building responses.
|
|
41
57
|
respond = method(:text_response)
|
|
58
|
+
respond_err = method(:error_response)
|
|
59
|
+
op_missing = lambda do |tool|
|
|
60
|
+
error_response(
|
|
61
|
+
'Pipeline operator is not configured. Pass `operator:` to Woods::MCP::Server.build ' \
|
|
62
|
+
'or use Woods::MCP::Bootstrapper to wire StatusReporter, ErrorEscalator, and PipelineGuard.',
|
|
63
|
+
code: :not_configured, config_key: 'operator',
|
|
64
|
+
doc_link: 'docs/OPERATOR_GUIDE.md', tool: tool
|
|
65
|
+
)
|
|
66
|
+
end
|
|
67
|
+
fb_missing = lambda do |tool|
|
|
68
|
+
error_response(
|
|
69
|
+
'Feedback store is not configured. Pass `feedback_store:` to Woods::MCP::Server.build ' \
|
|
70
|
+
'to enable retrieval feedback capture.',
|
|
71
|
+
code: :not_configured, config_key: 'feedback_store',
|
|
72
|
+
doc_link: 'docs/FEEDBACK_STORE.md', tool: tool
|
|
73
|
+
)
|
|
74
|
+
end
|
|
75
|
+
snap_missing = lambda do |tool|
|
|
76
|
+
error_response(
|
|
77
|
+
'Snapshot store is not configured. Set `enable_snapshots: true` in Woods.configure ' \
|
|
78
|
+
'and pass `snapshot_store:` to Woods::MCP::Server.build.',
|
|
79
|
+
code: :not_configured, config_key: 'enable_snapshots',
|
|
80
|
+
doc_link: 'docs/TEMPORAL_SNAPSHOTS.md', tool: tool
|
|
81
|
+
)
|
|
82
|
+
end
|
|
42
83
|
|
|
43
84
|
server = ::MCP::Server.new(
|
|
44
85
|
name: 'woods',
|
|
@@ -47,8 +88,8 @@ module Woods
|
|
|
47
88
|
resource_templates: resource_templates
|
|
48
89
|
)
|
|
49
90
|
|
|
50
|
-
define_lookup_tool(server, reader, respond, renderer)
|
|
51
|
-
define_search_tool(server, reader, respond, renderer)
|
|
91
|
+
define_lookup_tool(server, reader, respond, respond_err, renderer)
|
|
92
|
+
define_search_tool(server, reader, respond, respond_err, renderer)
|
|
52
93
|
define_traversal_tool(server, reader, respond, renderer,
|
|
53
94
|
name: 'dependencies',
|
|
54
95
|
description: 'Traverse forward dependencies of a unit (what it depends on). Returns a BFS tree with depth.',
|
|
@@ -61,17 +102,25 @@ module Woods
|
|
|
61
102
|
render_key: :dependents)
|
|
62
103
|
define_structure_tool(server, reader, respond, renderer)
|
|
63
104
|
define_graph_analysis_tool(server, reader, respond, renderer)
|
|
105
|
+
define_domain_clusters_tool(server, reader, respond, renderer)
|
|
64
106
|
define_pagerank_tool(server, reader, respond, renderer)
|
|
65
107
|
define_framework_tool(server, reader, respond, renderer)
|
|
66
108
|
define_recent_changes_tool(server, reader, respond, renderer)
|
|
67
|
-
define_reload_tool(server, reader, respond)
|
|
68
|
-
define_retrieve_tool(server, retriever, respond)
|
|
69
|
-
define_trace_flow_tool(server, reader, index_dir, respond, renderer)
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
109
|
+
define_reload_tool(server, reader, respond, retriever_reloader)
|
|
110
|
+
define_retrieve_tool(server, retriever, respond, respond_err)
|
|
111
|
+
define_trace_flow_tool(server, reader, index_dir, respond, respond_err, renderer)
|
|
112
|
+
# Conditionally register collaborator-dependent tools. Historically
|
|
113
|
+
# all 15 stubs were registered unconditionally and returned
|
|
114
|
+
# isError: true when the wiring was missing — that added token
|
|
115
|
+
# noise to every LLM turn's tool catalog and invited the model to
|
|
116
|
+
# try tools guaranteed to fail. Only register when the collaborator
|
|
117
|
+
# is wired, so tools/list reflects what the server can actually do.
|
|
118
|
+
define_session_trace_tool(server, reader, respond, respond_err) if session_tracer_wired?
|
|
119
|
+
define_operator_tools(server, operator, respond, respond_err, op_missing) if operator
|
|
120
|
+
define_feedback_tools(server, feedback_store, respond, respond_err, fb_missing) if feedback_store
|
|
121
|
+
define_snapshot_tools(server, snapshot_store, respond, respond_err, snap_missing) if snapshot_store
|
|
122
|
+
define_notion_sync_tool(server, reader, index_dir, respond, respond_err) if notion_wired?
|
|
123
|
+
define_woods_status_tool(server, reader, retriever, index_dir, bootstrap_state, respond)
|
|
75
124
|
register_resource_handler(server, reader)
|
|
76
125
|
|
|
77
126
|
server
|
|
@@ -79,10 +128,67 @@ module Woods
|
|
|
79
128
|
|
|
80
129
|
private
|
|
81
130
|
|
|
131
|
+
# Session tracer requires a configured session_store on Woods.configuration.
|
|
132
|
+
# The tool reads the store inside its handler; skipping registration when
|
|
133
|
+
# the store is absent keeps tools/list honest.
|
|
134
|
+
#
|
|
135
|
+
# The `session_trace` handler itself only calls `store.read`. We
|
|
136
|
+
# ALSO probe `:sessions` as a defense-in-depth cheap contract
|
|
137
|
+
# check — every shipped store (File/Redis/SolidCache) implements
|
|
138
|
+
# both, so if a misconfigured store lacks `:sessions` it is almost
|
|
139
|
+
# certainly missing `:read` too, and we'd rather fail at wire-up
|
|
140
|
+
# than at first invocation. A record-only store (permitted by the
|
|
141
|
+
# middleware for backward-compatibility) will correctly drop out
|
|
142
|
+
# of tools/list here.
|
|
143
|
+
def session_tracer_wired?
|
|
144
|
+
config = Woods.configuration
|
|
145
|
+
return false unless config
|
|
146
|
+
return false unless config.respond_to?(:session_store)
|
|
147
|
+
|
|
148
|
+
store = config.session_store
|
|
149
|
+
return false if store.nil?
|
|
150
|
+
|
|
151
|
+
%i[read sessions].all? { |m| store.respond_to?(m) }
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Notion export needs both an API token and at least one database ID.
|
|
155
|
+
# NOTION_API_TOKEN env var overrides the config token (see
|
|
156
|
+
# docs/NOTION_EXPORT.md).
|
|
157
|
+
def notion_wired?
|
|
158
|
+
config = Woods.configuration
|
|
159
|
+
return false unless config
|
|
160
|
+
|
|
161
|
+
token = ENV['NOTION_API_TOKEN'] || (config.respond_to?(:notion_api_token) ? config.notion_api_token : nil)
|
|
162
|
+
ids = config.respond_to?(:notion_database_ids) ? config.notion_database_ids : nil
|
|
163
|
+
token && !token.empty? && ids && !ids.empty?
|
|
164
|
+
end
|
|
165
|
+
|
|
82
166
|
def text_response(text)
|
|
83
167
|
::MCP::Tool::Response.new([{ type: 'text', text: text }])
|
|
84
168
|
end
|
|
85
169
|
|
|
170
|
+
# Build a structured error response that carries machine-readable
|
|
171
|
+
# metadata alongside the human-readable text. Agents can branch on
|
|
172
|
+
# `_meta.error_code` (e.g. `:not_configured`, `:not_found`,
|
|
173
|
+
# `:rate_limited`, `:unsupported_argument`) without parsing the text.
|
|
174
|
+
#
|
|
175
|
+
# @param message [String] Human-readable explanation
|
|
176
|
+
# @param code [Symbol] Stable error code (machine-readable)
|
|
177
|
+
# @param config_key [String, nil] Offending configuration key when relevant
|
|
178
|
+
# @param doc_link [String, nil] Relative docs path explaining the fix
|
|
179
|
+
# @param extra [Hash] Additional meta fields (e.g., identifier:, tool:)
|
|
180
|
+
def error_response(message, code:, config_key: nil, doc_link: nil, **extra)
|
|
181
|
+
meta = { error_code: code }
|
|
182
|
+
meta[:config_key] = config_key if config_key
|
|
183
|
+
meta[:doc_link] = doc_link if doc_link
|
|
184
|
+
meta.merge!(extra) unless extra.empty?
|
|
185
|
+
::MCP::Tool::Response.new(
|
|
186
|
+
[{ type: 'text', text: message }],
|
|
187
|
+
error: true,
|
|
188
|
+
meta: meta
|
|
189
|
+
)
|
|
190
|
+
end
|
|
191
|
+
|
|
86
192
|
def truncate_section(array, limit)
|
|
87
193
|
return array unless array.is_a?(Array)
|
|
88
194
|
|
|
@@ -107,14 +213,55 @@ module Woods
|
|
|
107
213
|
value.is_a?(String) ? [value] : value
|
|
108
214
|
end
|
|
109
215
|
|
|
110
|
-
# Coerce a value to an Integer.
|
|
111
|
-
# to Integer; leaves existing Integers and nil unchanged.
|
|
112
|
-
# MCP clients may send "2" (string) instead of 2 (integer).
|
|
216
|
+
# Coerce a value to an Integer.
|
|
113
217
|
#
|
|
114
|
-
#
|
|
218
|
+
# - `nil` passes through unchanged.
|
|
219
|
+
# - `Integer` passes through unchanged.
|
|
220
|
+
# - `String` is accepted iff it represents a decimal integer with an
|
|
221
|
+
# optional leading `+`/`-`. `"abc"` and `"1abc"` used to silently
|
|
222
|
+
# coerce to `0` via `String#to_i`; that was a footgun for tools with
|
|
223
|
+
# integer bounds (limit, offset, budget, timeout) — they'd receive
|
|
224
|
+
# the wrong value without any feedback to the client. Now we raise
|
|
225
|
+
# `ArgumentError` so the MCP dispatch layer can surface a proper
|
|
226
|
+
# JSON-RPC error back to the caller.
|
|
227
|
+
# - Any other type raises `ArgumentError`.
|
|
228
|
+
#
|
|
229
|
+
# @param value [String, Integer, nil]
|
|
115
230
|
# @return [Integer, nil]
|
|
231
|
+
# @raise [ArgumentError] if `value` is not nil, Integer, or an Integer-shaped String.
|
|
232
|
+
INTEGER_STRING = /\A[+-]?\d+\z/
|
|
233
|
+
private_constant :INTEGER_STRING
|
|
116
234
|
def coerce_integer(value)
|
|
117
|
-
|
|
235
|
+
return nil if value.nil?
|
|
236
|
+
return value if value.is_a?(Integer)
|
|
237
|
+
|
|
238
|
+
return Integer(value, 10) if value.is_a?(String) && value.match?(INTEGER_STRING)
|
|
239
|
+
|
|
240
|
+
raise ArgumentError, "expected integer, got #{value.class}: #{value.inspect}"
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# Load a precomputed flow document written by FlowPrecomputer, when
|
|
244
|
+
# `config.precompute_flows` was enabled during extraction. Returns nil
|
|
245
|
+
# when the entry point is missing a method suffix, the JSON file isn't
|
|
246
|
+
# on disk, or the file can't be parsed — callers fall back to
|
|
247
|
+
# FlowAssembler.
|
|
248
|
+
#
|
|
249
|
+
# @param index_dir [String]
|
|
250
|
+
# @param entry_point [String] e.g., "PostsController#create"
|
|
251
|
+
# @return [Woods::FlowDocument, nil]
|
|
252
|
+
def load_precomputed_flow(index_dir, entry_point)
|
|
253
|
+
return nil unless entry_point.to_s.include?('#')
|
|
254
|
+
|
|
255
|
+
controller, action = entry_point.split('#', 2)
|
|
256
|
+
return nil if controller.empty? || action.empty?
|
|
257
|
+
|
|
258
|
+
filename = "#{controller.gsub('::', '__')}_#{action}.json"
|
|
259
|
+
path = File.join(index_dir, 'flows', filename)
|
|
260
|
+
return nil unless File.exist?(path)
|
|
261
|
+
|
|
262
|
+
Woods::FlowDocument.from_h(JSON.parse(File.read(path)))
|
|
263
|
+
rescue JSON::ParserError, Errno::ENOENT
|
|
264
|
+
nil
|
|
118
265
|
end
|
|
119
266
|
|
|
120
267
|
# Apply offset+limit pagination to a single section key within a container hash.
|
|
@@ -138,26 +285,40 @@ module Woods
|
|
|
138
285
|
container["#{key}_offset"] = offset if offset.positive?
|
|
139
286
|
end
|
|
140
287
|
|
|
141
|
-
def define_lookup_tool(server, reader, respond, renderer)
|
|
288
|
+
def define_lookup_tool(server, reader, respond, respond_err, renderer)
|
|
142
289
|
coerce = method(:coerce_array)
|
|
143
290
|
server.define_tool(
|
|
144
291
|
name: 'lookup',
|
|
145
292
|
description: 'Look up a code unit by its exact identifier. Returns full source code, metadata, ' \
|
|
146
293
|
'dependencies, and dependents. Use include_source: false to omit source_code. ' \
|
|
147
|
-
'Use sections to select specific keys (type, identifier, file_path, namespace are always included).'
|
|
294
|
+
'Use sections to select specific keys (type, identifier, file_path, namespace are always included). ' \
|
|
295
|
+
'`name` is accepted as an alias for `identifier` for discoverability.',
|
|
148
296
|
input_schema: {
|
|
149
297
|
properties: {
|
|
150
298
|
identifier: { type: 'string',
|
|
151
299
|
description: 'Exact unit identifier (e.g. "Post", "PostsController", "Api::V1::HealthController")' },
|
|
300
|
+
name: { type: 'string', description: 'Alias for `identifier`. Either one works.' },
|
|
152
301
|
include_source: { type: 'boolean', description: 'Include source_code in response (default: true)' },
|
|
153
302
|
sections: {
|
|
154
303
|
type: 'array', items: { type: 'string' },
|
|
155
304
|
description: 'Select specific keys to return (e.g. ["metadata", "dependencies"]). Always includes type, identifier, file_path, namespace.'
|
|
156
305
|
}
|
|
157
|
-
}
|
|
158
|
-
|
|
306
|
+
}
|
|
307
|
+
# NOTE: 'identifier' is not listed as required — `name` is an
|
|
308
|
+
# accepted alias. The handler validates that one of the two
|
|
309
|
+
# was provided.
|
|
159
310
|
}
|
|
160
|
-
) do |
|
|
311
|
+
) do |server_context:, identifier: nil, name: nil, include_source: nil, sections: nil|
|
|
312
|
+
identifier ||= name
|
|
313
|
+
if identifier.nil? || identifier.empty?
|
|
314
|
+
next respond_err.call(
|
|
315
|
+
'lookup requires `identifier` (or its alias `name`).',
|
|
316
|
+
code: :unsupported_argument,
|
|
317
|
+
tool: 'lookup',
|
|
318
|
+
argument: 'identifier',
|
|
319
|
+
hint: 'Pass identifier: "PostsController" (or name: "PostsController").'
|
|
320
|
+
)
|
|
321
|
+
end
|
|
161
322
|
sections = coerce.call(sections)
|
|
162
323
|
unit = reader.find_unit(identifier)
|
|
163
324
|
if unit
|
|
@@ -170,47 +331,87 @@ module Woods
|
|
|
170
331
|
end
|
|
171
332
|
respond.call(renderer.render(:lookup, filtered))
|
|
172
333
|
else
|
|
173
|
-
|
|
334
|
+
respond_err.call(
|
|
335
|
+
"Unit not found: #{identifier}",
|
|
336
|
+
code: :not_found,
|
|
337
|
+
identifier: identifier,
|
|
338
|
+
tool: 'lookup',
|
|
339
|
+
hint: 'Use `search` to find identifiers by pattern, then `lookup` on the exact match.'
|
|
340
|
+
)
|
|
174
341
|
end
|
|
175
342
|
end
|
|
176
343
|
end
|
|
177
344
|
|
|
178
|
-
def define_search_tool(server, reader, respond, renderer)
|
|
345
|
+
def define_search_tool(server, reader, respond, respond_err, renderer)
|
|
179
346
|
coerce = method(:coerce_array)
|
|
180
347
|
coerce_int = method(:coerce_integer)
|
|
181
348
|
server.define_tool(
|
|
182
349
|
name: 'search',
|
|
183
|
-
description: '
|
|
350
|
+
description: 'Find code units whose identifiers (or source/metadata) match a regex. ' \
|
|
351
|
+
'Example: search("Worker|Job") returns all workers and jobs; search("^Post") ' \
|
|
352
|
+
'returns units starting with "Post". Returns [{identifier, type, match_field}]. ' \
|
|
353
|
+
'Use `lookup` for exact identifiers, `dependencies`/`dependents` for graph traversal. ' \
|
|
354
|
+
'Gotchas: query is a Ruby regex — literal pipe needs escaping as \\|; ' \
|
|
355
|
+
'types restricts which index directories are scanned (e.g. ["mailer"] scans only ' \
|
|
356
|
+
'the mailers dir); invalid regex falls back to literal match. ' \
|
|
357
|
+
'For plain prefix/suffix matching on namespaces, prefer exact_prefix / exact_suffix ' \
|
|
358
|
+
'(literal, case-insensitive) over escaping regex anchors.',
|
|
184
359
|
input_schema: {
|
|
185
360
|
properties: {
|
|
186
|
-
query: { type: 'string', description: '
|
|
361
|
+
query: { type: 'string', description: 'Case-insensitive Ruby regex pattern (e.g. "Worker|Job", "^Post", ".*Service$")' },
|
|
187
362
|
types: {
|
|
188
363
|
type: 'array', items: { type: 'string' },
|
|
189
|
-
description: '
|
|
364
|
+
description: 'Restrict scan to these unit types: model, controller, service, job, mailer, etc.'
|
|
190
365
|
},
|
|
191
366
|
fields: {
|
|
192
367
|
type: 'array', items: { type: 'string' },
|
|
193
|
-
description: 'Fields to search: identifier, source_code, metadata
|
|
368
|
+
description: 'Fields to search: identifier (default), source_code, metadata'
|
|
194
369
|
},
|
|
195
|
-
limit: { type: 'integer', description: 'Maximum results (default: 20)' }
|
|
196
|
-
|
|
197
|
-
|
|
370
|
+
limit: { type: 'integer', description: 'Maximum results (default: 20)' },
|
|
371
|
+
exact_prefix: {
|
|
372
|
+
type: 'string',
|
|
373
|
+
description: 'Literal (non-regex) case-insensitive identifier prefix filter. ' \
|
|
374
|
+
'Use for namespace scoping like "Next::Settings::" without escaping regex metacharacters.'
|
|
375
|
+
},
|
|
376
|
+
exact_suffix: {
|
|
377
|
+
type: 'string',
|
|
378
|
+
description: 'Literal (non-regex) case-insensitive identifier suffix filter. ' \
|
|
379
|
+
'Use for suffix matching like "Controller" without escaping regex metacharacters.'
|
|
380
|
+
}
|
|
381
|
+
}
|
|
198
382
|
}
|
|
199
|
-
) do |
|
|
383
|
+
) do |server_context:, query: nil, types: nil, fields: nil, limit: nil, exact_prefix: nil, exact_suffix: nil|
|
|
384
|
+
if (query.nil? || query.empty?) &&
|
|
385
|
+
(exact_prefix.nil? || exact_prefix.empty?) &&
|
|
386
|
+
(exact_suffix.nil? || exact_suffix.empty?)
|
|
387
|
+
next respond_err.call(
|
|
388
|
+
'search requires `query` or at least one of `exact_prefix` / `exact_suffix`.',
|
|
389
|
+
code: :unsupported_argument,
|
|
390
|
+
tool: 'search',
|
|
391
|
+
argument: 'query',
|
|
392
|
+
hint: 'Pass query: "Worker|Job" for regex matching, or exact_prefix: "Next::Settings::" for literal prefix scoping.'
|
|
393
|
+
)
|
|
394
|
+
end
|
|
200
395
|
types = coerce.call(types)
|
|
201
396
|
fields = coerce.call(fields)
|
|
202
397
|
limit = coerce_int.call(limit)
|
|
203
|
-
|
|
398
|
+
search_result = reader.search(
|
|
204
399
|
query,
|
|
205
400
|
types: types,
|
|
206
401
|
fields: fields || %w[identifier],
|
|
207
|
-
limit: limit || 20
|
|
402
|
+
limit: limit || 20,
|
|
403
|
+
exact_prefix: exact_prefix,
|
|
404
|
+
exact_suffix: exact_suffix
|
|
208
405
|
)
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
406
|
+
results = search_result[:results]
|
|
407
|
+
payload = {
|
|
408
|
+
query: query,
|
|
409
|
+
result_count: results.size,
|
|
410
|
+
results: results
|
|
411
|
+
}
|
|
412
|
+
payload[:note] = search_result[:note] if search_result[:note]
|
|
413
|
+
payload[:partial] = true if search_result[:partial]
|
|
414
|
+
respond.call(renderer.render(:search, payload))
|
|
214
415
|
end
|
|
215
416
|
end
|
|
216
417
|
|
|
@@ -227,14 +428,23 @@ module Woods
|
|
|
227
428
|
types: {
|
|
228
429
|
type: 'array', items: { type: 'string' },
|
|
229
430
|
description: 'Filter to these types'
|
|
431
|
+
},
|
|
432
|
+
via: {
|
|
433
|
+
type: 'array', items: { type: 'string' },
|
|
434
|
+
description: 'Filter by relationship type. Accepts either a single string ' \
|
|
435
|
+
"(e.g. 'code_reference') or an array " \
|
|
436
|
+
"(e.g. ['code_reference','render']); both forms are coerced to an array internally. " \
|
|
437
|
+
'Known values: link_to, redirect_to, form_action, render, code_reference, ' \
|
|
438
|
+
'belongs_to, has_many, has_one, has_and_belongs_to_many.'
|
|
230
439
|
}
|
|
231
440
|
},
|
|
232
441
|
required: ['identifier']
|
|
233
442
|
}
|
|
234
|
-
) do |identifier:, server_context:, depth: nil, types: nil|
|
|
443
|
+
) do |identifier:, server_context:, depth: nil, types: nil, via: nil|
|
|
235
444
|
types = coerce.call(types)
|
|
445
|
+
via = coerce.call(via)
|
|
236
446
|
depth = coerce_int.call(depth)
|
|
237
|
-
result = reader.send(reader_method, identifier, depth: depth || 2, types: types)
|
|
447
|
+
result = reader.send(reader_method, identifier, depth: depth || 2, types: types, via: via)
|
|
238
448
|
if result[:found] == false
|
|
239
449
|
result[:message] =
|
|
240
450
|
"Identifier '#{identifier}' not found in the index. Use 'search' to find valid identifiers."
|
|
@@ -256,7 +466,7 @@ module Woods
|
|
|
256
466
|
}
|
|
257
467
|
}
|
|
258
468
|
) do |server_context:, detail: nil|
|
|
259
|
-
result = { manifest: reader.manifest }
|
|
469
|
+
result = { manifest: reader.manifest, template_engines: reader.template_engines }
|
|
260
470
|
result[:summary] = reader.summary if (detail || 'summary') == 'full'
|
|
261
471
|
respond.call(renderer.render(:structure, result))
|
|
262
472
|
end
|
|
@@ -306,6 +516,39 @@ module Woods
|
|
|
306
516
|
end
|
|
307
517
|
end
|
|
308
518
|
|
|
519
|
+
def define_domain_clusters_tool(server, reader, respond, renderer)
|
|
520
|
+
coerce = method(:coerce_array)
|
|
521
|
+
coerce_int = method(:coerce_integer)
|
|
522
|
+
server.define_tool(
|
|
523
|
+
name: 'domain_clusters',
|
|
524
|
+
description: 'Group code units into semantic domains by namespace and graph connectivity. ' \
|
|
525
|
+
'Returns clusters with hub nodes, entry points, boundary edges, and type breakdowns. ' \
|
|
526
|
+
'Useful for understanding architectural domains and blast radius.',
|
|
527
|
+
input_schema: {
|
|
528
|
+
properties: {
|
|
529
|
+
min_size: {
|
|
530
|
+
type: 'integer',
|
|
531
|
+
description: 'Minimum units per cluster before merging into neighbors (default: 3)'
|
|
532
|
+
},
|
|
533
|
+
types: {
|
|
534
|
+
type: 'array', items: { type: 'string' },
|
|
535
|
+
description: 'Filter to these unit types (default: all). Example: ["model", "service", "job"]'
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
) do |server_context:, min_size: nil, types: nil|
|
|
540
|
+
min_size = coerce_int.call(min_size) || 3
|
|
541
|
+
types = coerce.call(types)
|
|
542
|
+
|
|
543
|
+
graph = reader.dependency_graph
|
|
544
|
+
analyzer = Woods::GraphAnalyzer.new(graph)
|
|
545
|
+
|
|
546
|
+
clusters = analyzer.domain_clusters(min_size: min_size, types: types)
|
|
547
|
+
|
|
548
|
+
respond.call(renderer.render(:domain_clusters, { clusters: clusters, total: clusters.size }))
|
|
549
|
+
end
|
|
550
|
+
end
|
|
551
|
+
|
|
309
552
|
def define_pagerank_tool(server, reader, respond, renderer)
|
|
310
553
|
coerce = method(:coerce_array)
|
|
311
554
|
coerce_int = method(:coerce_integer)
|
|
@@ -400,56 +643,127 @@ module Woods
|
|
|
400
643
|
end
|
|
401
644
|
end
|
|
402
645
|
|
|
403
|
-
def define_reload_tool(server, reader, respond)
|
|
646
|
+
def define_reload_tool(server, reader, respond, retriever_reloader)
|
|
404
647
|
server.define_tool(
|
|
405
648
|
name: 'reload',
|
|
406
|
-
description: 'Reload extraction data from disk. Use after re-running extraction to pick
|
|
407
|
-
'without restarting the server.'
|
|
649
|
+
description: 'Reload extraction data from disk. Use after re-running extraction or woods:embed to pick ' \
|
|
650
|
+
'up changes without restarting the server. Refreshes the JSON index (manifest, dependency ' \
|
|
651
|
+
'graph, unit cache) AND re-hydrates the retriever\'s in-memory vector/metadata/graph ' \
|
|
652
|
+
'stores from the latest dumps. Durable backends (pgvector, Qdrant) are auto-refreshed ' \
|
|
653
|
+
'externally — their counts in the response reflect the read-through state.',
|
|
408
654
|
input_schema: { type: 'object', properties: {} }
|
|
409
655
|
) do |server_context:|
|
|
410
656
|
reader.reload!
|
|
411
657
|
manifest = reader.manifest
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
658
|
+
payload = {
|
|
659
|
+
reloaded: true,
|
|
660
|
+
extracted_at: manifest['extracted_at'],
|
|
661
|
+
total_units: manifest['total_units'],
|
|
662
|
+
counts: manifest['counts']
|
|
663
|
+
}
|
|
664
|
+
if retriever_reloader
|
|
665
|
+
begin
|
|
666
|
+
payload[:retriever] = retriever_reloader.call
|
|
667
|
+
rescue StandardError => e
|
|
668
|
+
payload[:retriever] = { error: "#{e.class}: #{e.message}" }
|
|
669
|
+
end
|
|
670
|
+
end
|
|
671
|
+
respond.call(JSON.pretty_generate(payload))
|
|
418
672
|
end
|
|
419
673
|
end
|
|
420
674
|
|
|
421
|
-
def define_retrieve_tool(server, retriever, respond)
|
|
675
|
+
def define_retrieve_tool(server, retriever, respond, respond_err)
|
|
422
676
|
coerce_int = method(:coerce_integer)
|
|
677
|
+
coerce = method(:coerce_array)
|
|
423
678
|
server.define_tool(
|
|
424
679
|
name: 'codebase_retrieve',
|
|
425
|
-
description: '
|
|
426
|
-
'
|
|
680
|
+
description: 'Semantic search: retrieve relevant code units for a natural-language question. ' \
|
|
681
|
+
'Example: codebase_retrieve("how does billing work?") returns ranked source context. ' \
|
|
682
|
+
'Returns a token-budgeted context string ready to paste into a prompt. ' \
|
|
683
|
+
'Use `search` for exact name/pattern matching; use this for conceptual questions. ' \
|
|
684
|
+
'Requires an embedding provider — disabled if OPENAI_API_KEY is unset and Ollama is unreachable. ' \
|
|
685
|
+
'By default excludes test_mappings (~33% of a typical index) so spec filenames do not ' \
|
|
686
|
+
'dominate semantic rank; pass types: ["test_mapping"] to opt back in. ' \
|
|
687
|
+
'Parameter: use `budget` for the token budget (not `limit` — that means result count ' \
|
|
688
|
+
'on sibling tools, and mapping it here would silently produce a near-empty response).',
|
|
427
689
|
input_schema: {
|
|
428
690
|
properties: {
|
|
429
691
|
query: { type: 'string',
|
|
430
|
-
description: 'Natural language
|
|
431
|
-
budget: { type: 'integer',
|
|
692
|
+
description: 'Natural language question (e.g. "How does user authentication work?")' },
|
|
693
|
+
budget: { type: 'integer',
|
|
694
|
+
description: 'Token budget for context assembly (default: 8000).' },
|
|
695
|
+
types: {
|
|
696
|
+
type: 'array', items: { type: 'string' },
|
|
697
|
+
description: 'Restrict results to these unit types (model, controller, service, job, mailer, ' \
|
|
698
|
+
'rails_source, test_mapping, etc.). Overrides the default test_mapping exclusion. ' \
|
|
699
|
+
'When the unfiltered top-K has no candidate of a requested type, the retriever ' \
|
|
700
|
+
'falls back to rank-within-type so the response is populated whenever units of ' \
|
|
701
|
+
'the requested type exist in the index. The response appends a "Type rank ' \
|
|
702
|
+
'context" table with per-type: source, rank in unfiltered top-K, global_k, ' \
|
|
703
|
+
'total_of_type. Read source to tell the cases apart: in_top_k (strong match), ' \
|
|
704
|
+
'within_type_fallback (weak match surfaced by the fallback), outside_top_k ' \
|
|
705
|
+
'(index has this type but other requested types filled the result), absent ' \
|
|
706
|
+
'(zero units of this type in the index).'
|
|
707
|
+
},
|
|
708
|
+
exclude_types: {
|
|
709
|
+
type: 'array', items: { type: 'string' },
|
|
710
|
+
description: 'Additional types to exclude on top of the default test_mapping exclusion.'
|
|
711
|
+
}
|
|
432
712
|
},
|
|
433
713
|
required: ['query']
|
|
434
714
|
}
|
|
435
|
-
) do |query:, server_context:, budget: nil|
|
|
715
|
+
) do |query:, server_context:, budget: nil, limit: nil, types: nil, exclude_types: nil|
|
|
716
|
+
# `limit` isn't declared in the schema but clients still send it
|
|
717
|
+
# because sibling tools (search, recent_changes, pagerank) use
|
|
718
|
+
# `limit` as a result count. Mapping it to `budget` here would
|
|
719
|
+
# silently produce a near-empty response (limit: 10 → 10-token
|
|
720
|
+
# budget). Surface a helpful typed error instead.
|
|
721
|
+
unless limit.nil?
|
|
722
|
+
next respond_err.call(
|
|
723
|
+
'codebase_retrieve uses `budget` (token budget, default 8000), not `limit`. ' \
|
|
724
|
+
'`limit` is the result-count parameter on sibling tools (search, recent_changes, pagerank). ' \
|
|
725
|
+
"Pass `budget: #{coerce_int.call(limit)}` if you meant a #{coerce_int.call(limit)}-token context, " \
|
|
726
|
+
'or drop the kwarg entirely for the default 8000.',
|
|
727
|
+
code: :unsupported_argument,
|
|
728
|
+
tool: 'codebase_retrieve',
|
|
729
|
+
argument: 'limit',
|
|
730
|
+
hint: 'Use `budget:` for tokens. Retrieval does not cap by result count — the token budget ' \
|
|
731
|
+
'governs how many ranked units fit in the returned context.'
|
|
732
|
+
)
|
|
733
|
+
end
|
|
734
|
+
|
|
436
735
|
budget = coerce_int.call(budget)
|
|
736
|
+
types = coerce.call(types)
|
|
737
|
+
exclude_types = coerce.call(exclude_types)
|
|
437
738
|
if retriever
|
|
438
|
-
result = retriever.retrieve(
|
|
739
|
+
result = retriever.retrieve(
|
|
740
|
+
query,
|
|
741
|
+
budget: budget || 8000,
|
|
742
|
+
types: types,
|
|
743
|
+
exclude_types: exclude_types
|
|
744
|
+
)
|
|
439
745
|
respond.call(result.context)
|
|
440
746
|
else
|
|
441
|
-
|
|
442
|
-
'Semantic search is
|
|
443
|
-
'
|
|
747
|
+
respond_err.call(
|
|
748
|
+
'Semantic search is disabled — no embedding provider is configured. ' \
|
|
749
|
+
'To enable: set OPENAI_API_KEY, or run Ollama locally ' \
|
|
750
|
+
'(brew install ollama && ollama serve && ollama pull nomic-embed-text). ' \
|
|
751
|
+
'Use the `search` tool for pattern-based matching in the meantime.',
|
|
752
|
+
code: :not_configured,
|
|
753
|
+
config_key: 'embedding_provider',
|
|
754
|
+
doc_link: 'docs/RETRIEVAL_GUIDE.md#configuring-retrieval',
|
|
755
|
+
tool: 'codebase_retrieve'
|
|
444
756
|
)
|
|
445
757
|
end
|
|
446
758
|
end
|
|
447
759
|
end
|
|
448
760
|
|
|
449
|
-
def define_trace_flow_tool(server, reader, index_dir, respond, renderer)
|
|
761
|
+
def define_trace_flow_tool(server, reader, index_dir, respond, respond_err, renderer)
|
|
450
762
|
require_relative '../flow_assembler'
|
|
763
|
+
require_relative '../flow_document'
|
|
451
764
|
require_relative '../dependency_graph'
|
|
452
765
|
coerce_int = method(:coerce_integer)
|
|
766
|
+
load_precomputed = method(:load_precomputed_flow)
|
|
453
767
|
|
|
454
768
|
server.define_tool(
|
|
455
769
|
name: 'trace_flow',
|
|
@@ -469,21 +783,33 @@ module Woods
|
|
|
469
783
|
}
|
|
470
784
|
) do |entry_point:, server_context:, depth: nil|
|
|
471
785
|
max_depth = coerce_int.call(depth) || 3
|
|
472
|
-
graph = reader.dependency_graph
|
|
473
786
|
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
flow_doc =
|
|
787
|
+
# Prefer the precomputed flow JSON written by FlowPrecomputer during
|
|
788
|
+
# extraction (gated on `config.precompute_flows`) — it avoids
|
|
789
|
+
# re-parsing source on every request. Fall back to query-time
|
|
790
|
+
# reassembly when no precomputed document exists.
|
|
791
|
+
flow_doc = load_precomputed.call(index_dir, entry_point)
|
|
792
|
+
flow_doc ||= begin
|
|
793
|
+
graph = reader.dependency_graph
|
|
794
|
+
assembler = Woods::FlowAssembler.new(graph: graph, extracted_dir: index_dir)
|
|
795
|
+
assembler.assemble(entry_point, max_depth: max_depth)
|
|
796
|
+
end
|
|
479
797
|
|
|
480
798
|
respond.call(renderer.render(:trace_flow, flow_doc.to_h))
|
|
481
799
|
rescue StandardError => e
|
|
482
|
-
|
|
800
|
+
# Emit an MCP error so clients can detect the failure and
|
|
801
|
+
# surface it, rather than wrapping the error payload in a
|
|
802
|
+
# successful response — consistent with session_trace and
|
|
803
|
+
# codebase_retrieve.
|
|
804
|
+
respond_err.call(
|
|
805
|
+
"trace_flow failed: #{e.message}",
|
|
806
|
+
code: :internal_error,
|
|
807
|
+
data: { entry_point: entry_point, exception: e.class.name }
|
|
808
|
+
)
|
|
483
809
|
end
|
|
484
810
|
end
|
|
485
811
|
|
|
486
|
-
def define_session_trace_tool(server, reader, respond)
|
|
812
|
+
def define_session_trace_tool(server, reader, respond, respond_err)
|
|
487
813
|
coerce_int = method(:coerce_integer)
|
|
488
814
|
server.define_tool(
|
|
489
815
|
name: 'session_trace',
|
|
@@ -500,7 +826,16 @@ module Woods
|
|
|
500
826
|
budget = coerce_int.call(budget)
|
|
501
827
|
depth = coerce_int.call(depth)
|
|
502
828
|
store = Woods.configuration.session_store
|
|
503
|
-
|
|
829
|
+
unless store
|
|
830
|
+
next respond_err.call(
|
|
831
|
+
'Session tracer is not configured. Assign `session_store` (FileStore, RedisStore, or SolidCacheStore) ' \
|
|
832
|
+
'and set `session_tracer_enabled = true` in Woods.configure.',
|
|
833
|
+
code: :not_configured,
|
|
834
|
+
config_key: 'session_store',
|
|
835
|
+
doc_link: 'docs/SESSION_TRACER.md',
|
|
836
|
+
tool: 'session_trace'
|
|
837
|
+
)
|
|
838
|
+
end
|
|
504
839
|
|
|
505
840
|
require_relative '../session_tracer/session_flow_assembler'
|
|
506
841
|
|
|
@@ -510,26 +845,31 @@ module Woods
|
|
|
510
845
|
doc = assembler.assemble(session_id, budget: budget || 8000, depth: depth || 1)
|
|
511
846
|
respond.call(doc.to_markdown)
|
|
512
847
|
rescue StandardError => e
|
|
513
|
-
|
|
848
|
+
respond_err.call(
|
|
849
|
+
"Session trace failed: #{e.message}",
|
|
850
|
+
code: :internal_error,
|
|
851
|
+
tool: 'session_trace',
|
|
852
|
+
session_id: session_id
|
|
853
|
+
)
|
|
514
854
|
end
|
|
515
855
|
end
|
|
516
856
|
|
|
517
|
-
def define_operator_tools(server, operator, respond)
|
|
518
|
-
define_pipeline_extract_tool(server, operator, respond)
|
|
519
|
-
define_pipeline_embed_tool(server, operator, respond)
|
|
520
|
-
define_pipeline_status_tool(server, operator, respond)
|
|
521
|
-
define_pipeline_diagnose_tool(server, operator, respond)
|
|
522
|
-
define_pipeline_repair_tool(server, operator, respond)
|
|
857
|
+
def define_operator_tools(server, operator, respond, respond_err, op_missing)
|
|
858
|
+
define_pipeline_extract_tool(server, operator, respond, respond_err, op_missing)
|
|
859
|
+
define_pipeline_embed_tool(server, operator, respond, respond_err, op_missing)
|
|
860
|
+
define_pipeline_status_tool(server, operator, respond, respond_err, op_missing)
|
|
861
|
+
define_pipeline_diagnose_tool(server, operator, respond, respond_err, op_missing)
|
|
862
|
+
define_pipeline_repair_tool(server, operator, respond, respond_err, op_missing)
|
|
523
863
|
end
|
|
524
864
|
|
|
525
|
-
def define_feedback_tools(server, feedback_store, respond)
|
|
526
|
-
define_retrieval_rate_tool(server, feedback_store, respond)
|
|
527
|
-
define_retrieval_report_gap_tool(server, feedback_store, respond)
|
|
528
|
-
define_retrieval_explain_tool(server, feedback_store, respond)
|
|
529
|
-
define_retrieval_suggest_tool(server, feedback_store, respond)
|
|
865
|
+
def define_feedback_tools(server, feedback_store, respond, _respond_err, fb_missing)
|
|
866
|
+
define_retrieval_rate_tool(server, feedback_store, respond, fb_missing)
|
|
867
|
+
define_retrieval_report_gap_tool(server, feedback_store, respond, fb_missing)
|
|
868
|
+
define_retrieval_explain_tool(server, feedback_store, respond, fb_missing)
|
|
869
|
+
define_retrieval_suggest_tool(server, feedback_store, respond, fb_missing)
|
|
530
870
|
end
|
|
531
871
|
|
|
532
|
-
def define_pipeline_extract_tool(server, operator, respond)
|
|
872
|
+
def define_pipeline_extract_tool(server, operator, respond, respond_err, op_missing)
|
|
533
873
|
server.define_tool(
|
|
534
874
|
name: 'pipeline_extract',
|
|
535
875
|
description: 'Trigger a codebase extraction pipeline run. Checks rate limits before proceeding.',
|
|
@@ -539,11 +879,31 @@ module Woods
|
|
|
539
879
|
}
|
|
540
880
|
}
|
|
541
881
|
) do |server_context:, incremental: nil|
|
|
542
|
-
next
|
|
882
|
+
next op_missing.call('pipeline_extract') unless operator
|
|
543
883
|
|
|
544
884
|
guard = operator[:pipeline_guard]
|
|
545
|
-
|
|
885
|
+
if guard && !guard.allow?(:extraction)
|
|
886
|
+
next respond_err.call(
|
|
887
|
+
'Extraction is rate-limited. Try again later.',
|
|
888
|
+
code: :rate_limited,
|
|
889
|
+
tool: 'pipeline_extract',
|
|
890
|
+
retry_after_seconds: 300
|
|
891
|
+
)
|
|
892
|
+
end
|
|
546
893
|
|
|
894
|
+
# Acquire the in-process lock BEFORE recording to the guard.
|
|
895
|
+
# Otherwise a refused "already running" request still resets
|
|
896
|
+
# the cooldown clock and blocks the next legitimate attempt
|
|
897
|
+
# for the full 5-minute window once the current run finishes.
|
|
898
|
+
unless Woods::MCP::Server.send(:pipeline_start, :extraction)
|
|
899
|
+
next respond_err.call(
|
|
900
|
+
'Extraction pipeline is already running. Wait for it to complete.',
|
|
901
|
+
code: :already_running,
|
|
902
|
+
tool: 'pipeline_extract'
|
|
903
|
+
)
|
|
904
|
+
end
|
|
905
|
+
|
|
906
|
+
# Lock acquired — now it's safe to record the run.
|
|
547
907
|
guard&.record!(:extraction)
|
|
548
908
|
|
|
549
909
|
Thread.new do
|
|
@@ -554,6 +914,8 @@ module Woods
|
|
|
554
914
|
rescue StandardError => e
|
|
555
915
|
logger = defined?(Rails) ? Rails.logger : Logger.new($stderr)
|
|
556
916
|
logger.error("[Woods] Pipeline extract failed: #{e.message}")
|
|
917
|
+
ensure
|
|
918
|
+
Woods::MCP::Server.send(:pipeline_finish, :extraction)
|
|
557
919
|
end
|
|
558
920
|
|
|
559
921
|
respond.call(JSON.pretty_generate({
|
|
@@ -563,7 +925,7 @@ module Woods
|
|
|
563
925
|
end
|
|
564
926
|
end
|
|
565
927
|
|
|
566
|
-
def define_pipeline_embed_tool(server, operator, respond)
|
|
928
|
+
def define_pipeline_embed_tool(server, operator, respond, respond_err, op_missing)
|
|
567
929
|
server.define_tool(
|
|
568
930
|
name: 'pipeline_embed',
|
|
569
931
|
description: 'Trigger embedding generation for extracted units. Checks rate limits before proceeding.',
|
|
@@ -573,29 +935,43 @@ module Woods
|
|
|
573
935
|
}
|
|
574
936
|
}
|
|
575
937
|
) do |server_context:, incremental: nil|
|
|
576
|
-
next
|
|
938
|
+
next op_missing.call('pipeline_embed') unless operator
|
|
577
939
|
|
|
578
940
|
guard = operator[:pipeline_guard]
|
|
579
|
-
|
|
941
|
+
if guard && !guard.allow?(:embedding)
|
|
942
|
+
next respond_err.call(
|
|
943
|
+
'Embedding is rate-limited. Try again later.',
|
|
944
|
+
code: :rate_limited,
|
|
945
|
+
tool: 'pipeline_embed',
|
|
946
|
+
retry_after_seconds: 300
|
|
947
|
+
)
|
|
948
|
+
end
|
|
949
|
+
|
|
950
|
+
# Acquire the in-process lock first so a refused "already
|
|
951
|
+
# running" request doesn't burn the cooldown clock.
|
|
952
|
+
unless Woods::MCP::Server.send(:pipeline_start, :embedding)
|
|
953
|
+
next respond_err.call(
|
|
954
|
+
'Embedding pipeline is already running. Wait for it to complete.',
|
|
955
|
+
code: :already_running,
|
|
956
|
+
tool: 'pipeline_embed'
|
|
957
|
+
)
|
|
958
|
+
end
|
|
580
959
|
|
|
581
960
|
guard&.record!(:embedding)
|
|
582
961
|
|
|
583
962
|
Thread.new do
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
indexer = Woods::
|
|
590
|
-
provider: provider,
|
|
591
|
-
text_preparer: text_preparer,
|
|
592
|
-
vector_store: vector_store,
|
|
593
|
-
output_dir: config.output_dir
|
|
594
|
-
)
|
|
963
|
+
# Share the rake-task wiring so the MCP path picks up the
|
|
964
|
+
# provider-tuned TextPreparer + token-aware chunker. Without
|
|
965
|
+
# this, MCP-triggered embedding still hit Ollama's "input
|
|
966
|
+
# length exceeds context length" error after the rake path
|
|
967
|
+
# was fixed in PR #70.
|
|
968
|
+
indexer = Woods::Tasks.build_embed_indexer
|
|
595
969
|
incremental ? indexer.index_incremental : indexer.index_all
|
|
596
970
|
rescue StandardError => e
|
|
597
971
|
logger = defined?(Rails) ? Rails.logger : Logger.new($stderr)
|
|
598
972
|
logger.error("[Woods] Pipeline embed failed: #{e.message}")
|
|
973
|
+
ensure
|
|
974
|
+
Woods::MCP::Server.send(:pipeline_finish, :embedding)
|
|
599
975
|
end
|
|
600
976
|
|
|
601
977
|
respond.call(JSON.pretty_generate({
|
|
@@ -605,23 +981,50 @@ module Woods
|
|
|
605
981
|
end
|
|
606
982
|
end
|
|
607
983
|
|
|
608
|
-
|
|
984
|
+
# Acquire a pipeline-kind lock atomically. Returns false when
|
|
985
|
+
# another thread is already running that kind of pipeline (so the
|
|
986
|
+
# caller can refuse the new request instead of racing the running
|
|
987
|
+
# pipeline). Module-level state — a single MCP server process
|
|
988
|
+
# serializes its own pipelines.
|
|
989
|
+
def pipeline_start(kind)
|
|
990
|
+
@pipeline_mutex ||= Mutex.new
|
|
991
|
+
@pipeline_in_flight ||= {}
|
|
992
|
+
@pipeline_mutex.synchronize do
|
|
993
|
+
return false if @pipeline_in_flight[kind]
|
|
994
|
+
|
|
995
|
+
@pipeline_in_flight[kind] = true
|
|
996
|
+
true
|
|
997
|
+
end
|
|
998
|
+
end
|
|
999
|
+
|
|
1000
|
+
def pipeline_finish(kind)
|
|
1001
|
+
@pipeline_mutex&.synchronize { @pipeline_in_flight&.delete(kind) }
|
|
1002
|
+
end
|
|
1003
|
+
|
|
1004
|
+
def define_pipeline_status_tool(server, operator, respond, respond_err, op_missing)
|
|
609
1005
|
server.define_tool(
|
|
610
1006
|
name: 'pipeline_status',
|
|
611
1007
|
description: 'Get the current pipeline status: last extraction time, unit counts, staleness.',
|
|
612
1008
|
input_schema: { type: 'object', properties: {} }
|
|
613
1009
|
) do |server_context:|
|
|
614
|
-
next
|
|
1010
|
+
next op_missing.call('pipeline_status') unless operator
|
|
615
1011
|
|
|
616
1012
|
reporter = operator[:status_reporter]
|
|
617
|
-
|
|
1013
|
+
unless reporter
|
|
1014
|
+
next respond_err.call(
|
|
1015
|
+
'Status reporter is not configured.',
|
|
1016
|
+
code: :not_configured,
|
|
1017
|
+
config_key: 'operator.status_reporter',
|
|
1018
|
+
tool: 'pipeline_status'
|
|
1019
|
+
)
|
|
1020
|
+
end
|
|
618
1021
|
|
|
619
1022
|
status = reporter.report
|
|
620
1023
|
respond.call(JSON.pretty_generate(status))
|
|
621
1024
|
end
|
|
622
1025
|
end
|
|
623
1026
|
|
|
624
|
-
def define_pipeline_diagnose_tool(server, operator, respond)
|
|
1027
|
+
def define_pipeline_diagnose_tool(server, operator, respond, respond_err, op_missing)
|
|
625
1028
|
server.define_tool(
|
|
626
1029
|
name: 'pipeline_diagnose',
|
|
627
1030
|
description: 'Classify a recent pipeline error and suggest remediation.',
|
|
@@ -633,10 +1036,17 @@ module Woods
|
|
|
633
1036
|
required: %w[error_class error_message]
|
|
634
1037
|
}
|
|
635
1038
|
) do |error_class:, error_message:, server_context:|
|
|
636
|
-
next
|
|
1039
|
+
next op_missing.call('pipeline_diagnose') unless operator
|
|
637
1040
|
|
|
638
1041
|
escalator = operator[:error_escalator]
|
|
639
|
-
|
|
1042
|
+
unless escalator
|
|
1043
|
+
next respond_err.call(
|
|
1044
|
+
'Error escalator is not configured.',
|
|
1045
|
+
code: :not_configured,
|
|
1046
|
+
config_key: 'operator.error_escalator',
|
|
1047
|
+
tool: 'pipeline_diagnose'
|
|
1048
|
+
)
|
|
1049
|
+
end
|
|
640
1050
|
|
|
641
1051
|
error = StandardError.new(error_message)
|
|
642
1052
|
# Set the class name in the error string for pattern matching
|
|
@@ -646,7 +1056,7 @@ module Woods
|
|
|
646
1056
|
end
|
|
647
1057
|
end
|
|
648
1058
|
|
|
649
|
-
def define_pipeline_repair_tool(server, operator, respond)
|
|
1059
|
+
def define_pipeline_repair_tool(server, operator, respond, respond_err, op_missing)
|
|
650
1060
|
server.define_tool(
|
|
651
1061
|
name: 'pipeline_repair',
|
|
652
1062
|
description: 'Attempt to repair pipeline state: clear stale locks, reset rate limits.',
|
|
@@ -661,7 +1071,7 @@ module Woods
|
|
|
661
1071
|
required: ['action']
|
|
662
1072
|
}
|
|
663
1073
|
) do |action:, server_context:|
|
|
664
|
-
next
|
|
1074
|
+
next op_missing.call('pipeline_repair') unless operator
|
|
665
1075
|
|
|
666
1076
|
case action
|
|
667
1077
|
when 'clear_locks'
|
|
@@ -670,17 +1080,29 @@ module Woods
|
|
|
670
1080
|
lock.release
|
|
671
1081
|
respond.call(JSON.pretty_generate({ repaired: true, action: 'clear_locks' }))
|
|
672
1082
|
else
|
|
673
|
-
|
|
1083
|
+
respond_err.call(
|
|
1084
|
+
'Pipeline lock is not configured.',
|
|
1085
|
+
code: :not_configured,
|
|
1086
|
+
config_key: 'operator.pipeline_lock',
|
|
1087
|
+
tool: 'pipeline_repair'
|
|
1088
|
+
)
|
|
674
1089
|
end
|
|
675
1090
|
when 'reset_cooldowns'
|
|
676
1091
|
respond.call(JSON.pretty_generate({ repaired: true, action: 'reset_cooldowns' }))
|
|
677
1092
|
else
|
|
678
|
-
|
|
1093
|
+
respond_err.call(
|
|
1094
|
+
"Unknown repair action: #{action}",
|
|
1095
|
+
code: :unsupported_argument,
|
|
1096
|
+
tool: 'pipeline_repair',
|
|
1097
|
+
argument: 'action',
|
|
1098
|
+
value: action,
|
|
1099
|
+
allowed: %w[clear_locks reset_cooldowns]
|
|
1100
|
+
)
|
|
679
1101
|
end
|
|
680
1102
|
end
|
|
681
1103
|
end
|
|
682
1104
|
|
|
683
|
-
def define_retrieval_rate_tool(server, feedback_store, respond)
|
|
1105
|
+
def define_retrieval_rate_tool(server, feedback_store, respond, fb_missing)
|
|
684
1106
|
coerce_int = method(:coerce_integer)
|
|
685
1107
|
server.define_tool(
|
|
686
1108
|
name: 'retrieval_rate',
|
|
@@ -694,7 +1116,7 @@ module Woods
|
|
|
694
1116
|
required: %w[query score]
|
|
695
1117
|
}
|
|
696
1118
|
) do |query:, score:, server_context:, comment: nil|
|
|
697
|
-
next
|
|
1119
|
+
next fb_missing.call('retrieval_rate') unless feedback_store
|
|
698
1120
|
|
|
699
1121
|
score = coerce_int.call(score)
|
|
700
1122
|
feedback_store.record_rating(query: query, score: score, comment: comment)
|
|
@@ -702,7 +1124,7 @@ module Woods
|
|
|
702
1124
|
end
|
|
703
1125
|
end
|
|
704
1126
|
|
|
705
|
-
def define_retrieval_report_gap_tool(server, feedback_store, respond)
|
|
1127
|
+
def define_retrieval_report_gap_tool(server, feedback_store, respond, fb_missing)
|
|
706
1128
|
server.define_tool(
|
|
707
1129
|
name: 'retrieval_report_gap',
|
|
708
1130
|
description: 'Report a missing unit that should have appeared in retrieval results.',
|
|
@@ -715,7 +1137,7 @@ module Woods
|
|
|
715
1137
|
required: %w[query missing_unit unit_type]
|
|
716
1138
|
}
|
|
717
1139
|
) do |query:, missing_unit:, unit_type:, server_context:|
|
|
718
|
-
next
|
|
1140
|
+
next fb_missing.call('retrieval_report_gap') unless feedback_store
|
|
719
1141
|
|
|
720
1142
|
feedback_store.record_gap(query: query, missing_unit: missing_unit, unit_type: unit_type)
|
|
721
1143
|
respond.call(JSON.pretty_generate({
|
|
@@ -726,13 +1148,13 @@ module Woods
|
|
|
726
1148
|
end
|
|
727
1149
|
end
|
|
728
1150
|
|
|
729
|
-
def define_retrieval_explain_tool(server, feedback_store, respond)
|
|
1151
|
+
def define_retrieval_explain_tool(server, feedback_store, respond, fb_missing)
|
|
730
1152
|
server.define_tool(
|
|
731
1153
|
name: 'retrieval_explain',
|
|
732
1154
|
description: 'Get feedback statistics: average score, total ratings, gap count.',
|
|
733
1155
|
input_schema: { type: 'object', properties: {} }
|
|
734
1156
|
) do |server_context:|
|
|
735
|
-
next
|
|
1157
|
+
next fb_missing.call('retrieval_explain') unless feedback_store
|
|
736
1158
|
|
|
737
1159
|
ratings = feedback_store.ratings
|
|
738
1160
|
gaps = feedback_store.gaps
|
|
@@ -746,13 +1168,13 @@ module Woods
|
|
|
746
1168
|
end
|
|
747
1169
|
end
|
|
748
1170
|
|
|
749
|
-
def define_retrieval_suggest_tool(server, feedback_store, respond)
|
|
1171
|
+
def define_retrieval_suggest_tool(server, feedback_store, respond, fb_missing)
|
|
750
1172
|
server.define_tool(
|
|
751
1173
|
name: 'retrieval_suggest',
|
|
752
1174
|
description: 'Analyze feedback to suggest improvements: detect patterns in low scores and missing units.',
|
|
753
1175
|
input_schema: { type: 'object', properties: {} }
|
|
754
1176
|
) do |server_context:|
|
|
755
|
-
next
|
|
1177
|
+
next fb_missing.call('retrieval_suggest') unless feedback_store
|
|
756
1178
|
|
|
757
1179
|
require_relative '../feedback/gap_detector'
|
|
758
1180
|
detector = Woods::Feedback::GapDetector.new(feedback_store: feedback_store)
|
|
@@ -764,14 +1186,14 @@ module Woods
|
|
|
764
1186
|
end
|
|
765
1187
|
end
|
|
766
1188
|
|
|
767
|
-
def define_snapshot_tools(server, snapshot_store, respond)
|
|
768
|
-
define_list_snapshots_tool(server, snapshot_store, respond)
|
|
769
|
-
define_snapshot_diff_tool(server, snapshot_store, respond)
|
|
770
|
-
define_unit_history_tool(server, snapshot_store, respond)
|
|
771
|
-
define_snapshot_detail_tool(server, snapshot_store, respond)
|
|
1189
|
+
def define_snapshot_tools(server, snapshot_store, respond, respond_err, snap_missing)
|
|
1190
|
+
define_list_snapshots_tool(server, snapshot_store, respond, snap_missing)
|
|
1191
|
+
define_snapshot_diff_tool(server, snapshot_store, respond, snap_missing)
|
|
1192
|
+
define_unit_history_tool(server, snapshot_store, respond, snap_missing)
|
|
1193
|
+
define_snapshot_detail_tool(server, snapshot_store, respond, respond_err, snap_missing)
|
|
772
1194
|
end
|
|
773
1195
|
|
|
774
|
-
def define_list_snapshots_tool(server, snapshot_store, respond)
|
|
1196
|
+
def define_list_snapshots_tool(server, snapshot_store, respond, snap_missing)
|
|
775
1197
|
coerce_int = method(:coerce_integer)
|
|
776
1198
|
server.define_tool(
|
|
777
1199
|
name: 'list_snapshots',
|
|
@@ -783,7 +1205,7 @@ module Woods
|
|
|
783
1205
|
}
|
|
784
1206
|
}
|
|
785
1207
|
) do |server_context:, limit: nil, branch: nil|
|
|
786
|
-
next
|
|
1208
|
+
next snap_missing.call('list_snapshots') unless snapshot_store
|
|
787
1209
|
|
|
788
1210
|
limit = coerce_int.call(limit)
|
|
789
1211
|
results = snapshot_store.list(limit: limit || 20, branch: branch)
|
|
@@ -791,7 +1213,7 @@ module Woods
|
|
|
791
1213
|
end
|
|
792
1214
|
end
|
|
793
1215
|
|
|
794
|
-
def define_snapshot_diff_tool(server, snapshot_store, respond)
|
|
1216
|
+
def define_snapshot_diff_tool(server, snapshot_store, respond, snap_missing)
|
|
795
1217
|
server.define_tool(
|
|
796
1218
|
name: 'snapshot_diff',
|
|
797
1219
|
description: 'Compare two extraction snapshots by git SHA. Returns lists of added, modified, and deleted units.',
|
|
@@ -803,7 +1225,7 @@ module Woods
|
|
|
803
1225
|
required: %w[sha_a sha_b]
|
|
804
1226
|
}
|
|
805
1227
|
) do |sha_a:, sha_b:, server_context:|
|
|
806
|
-
next
|
|
1228
|
+
next snap_missing.call('snapshot_diff') unless snapshot_store
|
|
807
1229
|
|
|
808
1230
|
result = snapshot_store.diff(sha_a, sha_b)
|
|
809
1231
|
respond.call(JSON.pretty_generate({
|
|
@@ -816,7 +1238,7 @@ module Woods
|
|
|
816
1238
|
end
|
|
817
1239
|
end
|
|
818
1240
|
|
|
819
|
-
def define_unit_history_tool(server, snapshot_store, respond)
|
|
1241
|
+
def define_unit_history_tool(server, snapshot_store, respond, snap_missing)
|
|
820
1242
|
coerce_int = method(:coerce_integer)
|
|
821
1243
|
server.define_tool(
|
|
822
1244
|
name: 'unit_history',
|
|
@@ -829,7 +1251,7 @@ module Woods
|
|
|
829
1251
|
required: ['identifier']
|
|
830
1252
|
}
|
|
831
1253
|
) do |identifier:, server_context:, limit: nil|
|
|
832
|
-
next
|
|
1254
|
+
next snap_missing.call('unit_history') unless snapshot_store
|
|
833
1255
|
|
|
834
1256
|
limit = coerce_int.call(limit)
|
|
835
1257
|
entries = snapshot_store.unit_history(identifier, limit: limit || 20)
|
|
@@ -841,7 +1263,7 @@ module Woods
|
|
|
841
1263
|
end
|
|
842
1264
|
end
|
|
843
1265
|
|
|
844
|
-
def define_snapshot_detail_tool(server, snapshot_store, respond)
|
|
1266
|
+
def define_snapshot_detail_tool(server, snapshot_store, respond, respond_err, snap_missing)
|
|
845
1267
|
server.define_tool(
|
|
846
1268
|
name: 'snapshot_detail',
|
|
847
1269
|
description: 'Get full metadata for a specific extraction snapshot by git SHA.',
|
|
@@ -852,18 +1274,24 @@ module Woods
|
|
|
852
1274
|
required: ['git_sha']
|
|
853
1275
|
}
|
|
854
1276
|
) do |git_sha:, server_context:|
|
|
855
|
-
next
|
|
1277
|
+
next snap_missing.call('snapshot_detail') unless snapshot_store
|
|
856
1278
|
|
|
857
1279
|
snapshot = snapshot_store.find(git_sha)
|
|
858
1280
|
if snapshot
|
|
859
1281
|
respond.call(JSON.pretty_generate(snapshot))
|
|
860
1282
|
else
|
|
861
|
-
|
|
1283
|
+
respond_err.call(
|
|
1284
|
+
"Snapshot not found for git SHA: #{git_sha}",
|
|
1285
|
+
code: :not_found,
|
|
1286
|
+
tool: 'snapshot_detail',
|
|
1287
|
+
git_sha: git_sha,
|
|
1288
|
+
hint: 'Use `list_snapshots` to see available SHAs.'
|
|
1289
|
+
)
|
|
862
1290
|
end
|
|
863
1291
|
end
|
|
864
1292
|
end
|
|
865
1293
|
|
|
866
|
-
def define_notion_sync_tool(server, reader, index_dir, respond)
|
|
1294
|
+
def define_notion_sync_tool(server, reader, index_dir, respond, respond_err)
|
|
867
1295
|
server.define_tool(
|
|
868
1296
|
name: 'notion_sync',
|
|
869
1297
|
description: 'Sync extracted codebase data (Data Models + Columns) to Notion databases. ' \
|
|
@@ -875,11 +1303,23 @@ module Woods
|
|
|
875
1303
|
) do |server_context:|
|
|
876
1304
|
config = Woods.configuration
|
|
877
1305
|
unless config.notion_api_token
|
|
878
|
-
next
|
|
1306
|
+
next respond_err.call(
|
|
1307
|
+
'notion_api_token is not configured. Set it in Woods.configure or via the NOTION_API_TOKEN env var.',
|
|
1308
|
+
code: :not_configured,
|
|
1309
|
+
config_key: 'notion_api_token',
|
|
1310
|
+
doc_link: 'docs/NOTION_EXPORT.md',
|
|
1311
|
+
tool: 'notion_sync'
|
|
1312
|
+
)
|
|
879
1313
|
end
|
|
880
1314
|
|
|
881
1315
|
if (config.notion_database_ids || {}).empty?
|
|
882
|
-
next
|
|
1316
|
+
next respond_err.call(
|
|
1317
|
+
'notion_database_ids is not configured. Set it in Woods.configure.',
|
|
1318
|
+
code: :not_configured,
|
|
1319
|
+
config_key: 'notion_database_ids',
|
|
1320
|
+
doc_link: 'docs/NOTION_EXPORT.md',
|
|
1321
|
+
tool: 'notion_sync'
|
|
1322
|
+
)
|
|
883
1323
|
end
|
|
884
1324
|
|
|
885
1325
|
require_relative '../notion/exporter'
|
|
@@ -893,7 +1333,11 @@ module Woods
|
|
|
893
1333
|
errors: stats[:errors].first(10)
|
|
894
1334
|
}))
|
|
895
1335
|
rescue StandardError => e
|
|
896
|
-
|
|
1336
|
+
respond_err.call(
|
|
1337
|
+
"Notion sync failed: #{e.message}",
|
|
1338
|
+
code: :api_error,
|
|
1339
|
+
tool: 'notion_sync'
|
|
1340
|
+
)
|
|
897
1341
|
end
|
|
898
1342
|
end
|
|
899
1343
|
|
|
@@ -931,6 +1375,196 @@ module Woods
|
|
|
931
1375
|
]
|
|
932
1376
|
end
|
|
933
1377
|
|
|
1378
|
+
def define_woods_status_tool(server, reader, retriever, index_dir, bootstrap_state, respond)
|
|
1379
|
+
server.define_tool(
|
|
1380
|
+
name: 'woods_status',
|
|
1381
|
+
description: 'Diagnose whether the Woods index and server are healthy. Returns extraction metadata ' \
|
|
1382
|
+
'(last run, unit counts, git SHA, staleness in seconds), retriever/embedding configuration, ' \
|
|
1383
|
+
'bootstrap state (hydrated / degraded / failed + reason), feature flags, and a ready flag. ' \
|
|
1384
|
+
'Call this first on cold connect to learn what the server knows.',
|
|
1385
|
+
input_schema: { type: 'object', properties: {} }
|
|
1386
|
+
) do |server_context:|
|
|
1387
|
+
_ = server_context
|
|
1388
|
+
status = Woods::MCP::Server.build_status(
|
|
1389
|
+
reader: reader, retriever: retriever, index_dir: index_dir,
|
|
1390
|
+
bootstrap_state: bootstrap_state
|
|
1391
|
+
)
|
|
1392
|
+
respond.call(JSON.pretty_generate(status))
|
|
1393
|
+
end
|
|
1394
|
+
end
|
|
1395
|
+
|
|
1396
|
+
public
|
|
1397
|
+
|
|
1398
|
+
# Build the woods_status payload. Exposed at module level so specs (and future
|
|
1399
|
+
# console/unified-server entry points) can assemble the same shape without
|
|
1400
|
+
# reaching through the MCP::Server internals.
|
|
1401
|
+
#
|
|
1402
|
+
# +features.embedding_model+ / +features.embedding_provider+ /
|
|
1403
|
+
# +features.vector_store+ prefer the ResolvedConfig captured at embed time
|
|
1404
|
+
# (+bootstrap_state.resolved_config+, which is read back from +woods.json+)
|
|
1405
|
+
# over +Woods.configuration+, whose defaults can contradict the actual
|
|
1406
|
+
# provider in use. Without this, operators debugging "wrong provider" see
|
|
1407
|
+
# status claiming +embedding_model: "text-embedding-3-small"+ next to
|
|
1408
|
+
# +embedding_provider: "ollama"+ and reasonably distrust every field.
|
|
1409
|
+
def build_status(reader:, retriever:, index_dir:, bootstrap_state: nil)
|
|
1410
|
+
manifest = safe_manifest(reader)
|
|
1411
|
+
extracted_at = manifest && manifest['extracted_at']
|
|
1412
|
+
staleness = staleness_seconds(extracted_at)
|
|
1413
|
+
# Tolerate a nil Woods.configuration — specs that reset it between
|
|
1414
|
+
# runs can leave a transient nil window, and build_status should
|
|
1415
|
+
# still produce a readable payload during that window.
|
|
1416
|
+
config = Woods.configuration || Woods::Configuration.new
|
|
1417
|
+
resolved = bootstrap_state&.resolved_config
|
|
1418
|
+
|
|
1419
|
+
{
|
|
1420
|
+
ready: manifest && !manifest['counts'].to_h.empty?,
|
|
1421
|
+
server: {
|
|
1422
|
+
name: 'woods',
|
|
1423
|
+
version: Woods::VERSION,
|
|
1424
|
+
index_dir: index_dir.to_s
|
|
1425
|
+
},
|
|
1426
|
+
index: index_section(manifest, extracted_at, staleness, index_dir),
|
|
1427
|
+
retriever: {
|
|
1428
|
+
configured: !retriever.nil?,
|
|
1429
|
+
class: retriever&.class&.name
|
|
1430
|
+
},
|
|
1431
|
+
bootstrap: bootstrap_state&.to_h,
|
|
1432
|
+
features: features_from(config, resolved)
|
|
1433
|
+
}
|
|
1434
|
+
end
|
|
1435
|
+
|
|
1436
|
+
private
|
|
1437
|
+
|
|
1438
|
+
# Assemble the +index+ sub-hash of woods_status, including a staleness
|
|
1439
|
+
# gate that compares +manifest.git_sha+ against the current HEAD. The
|
|
1440
|
+
# manifest captures +git_sha+ / +gemfile_lock_sha+ / +schema_sha+ at
|
|
1441
|
+
# extraction time; until this change nothing compared them against the
|
|
1442
|
+
# live working tree, so an agent asking questions after 40 uncommitted
|
|
1443
|
+
# changes and an MCP restart silently got pre-change answers.
|
|
1444
|
+
#
|
|
1445
|
+
# +git_sha_matches_head+ is a tri-state:
|
|
1446
|
+
# - true — manifest.git_sha == current HEAD
|
|
1447
|
+
# - false — mismatch (stale)
|
|
1448
|
+
# - nil — couldn't resolve (not a git repo, git unavailable,
|
|
1449
|
+
# or manifest has no git_sha)
|
|
1450
|
+
#
|
|
1451
|
+
# When stale, +head_git_sha+ carries the live HEAD so operators can
|
|
1452
|
+
# diff directly. This is an observability signal, not a hard gate —
|
|
1453
|
+
# hard-refusing responses would be much more disruptive than a loudly-
|
|
1454
|
+
# visible staleness flag that agents can branch on.
|
|
1455
|
+
def index_section(manifest, extracted_at, staleness, index_dir)
|
|
1456
|
+
base = {
|
|
1457
|
+
extracted_at: extracted_at,
|
|
1458
|
+
staleness_seconds: staleness,
|
|
1459
|
+
rails_version: manifest && manifest['rails_version'],
|
|
1460
|
+
ruby_version: manifest && manifest['ruby_version'],
|
|
1461
|
+
total_units: manifest && manifest['total_units'],
|
|
1462
|
+
counts: (manifest && manifest['counts']) || {},
|
|
1463
|
+
git_sha: manifest && manifest['git_sha'],
|
|
1464
|
+
git_branch: manifest && manifest['git_branch'],
|
|
1465
|
+
gemfile_lock_sha: manifest && manifest['gemfile_lock_sha'],
|
|
1466
|
+
schema_sha: manifest && manifest['schema_sha']
|
|
1467
|
+
}
|
|
1468
|
+
|
|
1469
|
+
manifest_sha = manifest && manifest['git_sha']
|
|
1470
|
+
head_sha = manifest_sha ? resolve_head_sha(index_dir) : nil
|
|
1471
|
+
return base unless head_sha
|
|
1472
|
+
|
|
1473
|
+
base[:head_git_sha] = head_sha
|
|
1474
|
+
base[:git_sha_matches_head] = (manifest_sha == head_sha)
|
|
1475
|
+
base
|
|
1476
|
+
end
|
|
1477
|
+
|
|
1478
|
+
# Resolve the current HEAD SHA for the git repo containing +index_dir+.
|
|
1479
|
+
# Returns nil when git is unavailable or +index_dir+ is not in a repo —
|
|
1480
|
+
# callers treat nil as "can't compare" rather than "mismatch".
|
|
1481
|
+
#
|
|
1482
|
+
# Uses +capture2e+ so git's "fatal: not a git repository" stderr banner
|
|
1483
|
+
# does not leak through the MCP stdio transport. MCP clients that parse
|
|
1484
|
+
# stderr for protocol framing can't tolerate stray lines.
|
|
1485
|
+
def resolve_head_sha(index_dir)
|
|
1486
|
+
return nil unless index_dir
|
|
1487
|
+
|
|
1488
|
+
dir = index_dir.to_s
|
|
1489
|
+
return nil unless File.directory?(dir)
|
|
1490
|
+
|
|
1491
|
+
output, status = Open3.capture2e('git', '-C', dir, 'rev-parse', 'HEAD')
|
|
1492
|
+
status.success? ? output.strip : nil
|
|
1493
|
+
rescue Errno::ENOENT, Errno::EACCES
|
|
1494
|
+
# git not installed or not executable on this host — equivalent to
|
|
1495
|
+
# "can't compare". Any other exception is a genuine bug and should
|
|
1496
|
+
# propagate.
|
|
1497
|
+
nil
|
|
1498
|
+
end
|
|
1499
|
+
|
|
1500
|
+
# Assemble the +features+ sub-hash of woods_status, preferring the
|
|
1501
|
+
# ResolvedConfig captured at embed time over live {Woods::Configuration}.
|
|
1502
|
+
#
|
|
1503
|
+
# Fields that read from resolved+config (when present): embedding_model,
|
|
1504
|
+
# embedding_provider, vector_store. Everything else is host-process
|
|
1505
|
+
# state (snapshots_enabled, notion_configured, session_tracer_enabled)
|
|
1506
|
+
# and comes from the running config.
|
|
1507
|
+
#
|
|
1508
|
+
# +console_mcp_enabled+ is intentionally omitted — the index MCP process
|
|
1509
|
+
# has no visibility into the host Rails app's Woods initializer, so
|
|
1510
|
+
# historic status payloads always reported +false+ regardless of the
|
|
1511
|
+
# actual console MCP state. Advertising a misleading field is worse
|
|
1512
|
+
# than not advertising it at all.
|
|
1513
|
+
def features_from(config, resolved)
|
|
1514
|
+
provider_hash = resolved&.embedding_provider || {}
|
|
1515
|
+
resolved_provider = resolved_provider_symbol(provider_hash[:class])
|
|
1516
|
+
resolved_model = provider_hash[:model]
|
|
1517
|
+
resolved_vector = resolved&.stores&.dig(:vector_store)
|
|
1518
|
+
|
|
1519
|
+
{
|
|
1520
|
+
embedding_model: resolved_model || (config.respond_to?(:embedding_model) ? config.embedding_model : nil),
|
|
1521
|
+
embedding_provider: presence(resolved_provider ||
|
|
1522
|
+
(config.respond_to?(:embedding_provider) ? config.embedding_provider : nil)),
|
|
1523
|
+
vector_store: presence(resolved_vector ||
|
|
1524
|
+
(config.respond_to?(:vector_store) ? config.vector_store : nil)),
|
|
1525
|
+
session_tracer_enabled: config.respond_to?(:session_tracer_enabled) ? config.session_tracer_enabled : false,
|
|
1526
|
+
snapshots_enabled: config.respond_to?(:enable_snapshots) ? config.enable_snapshots : false,
|
|
1527
|
+
notion_configured: config.respond_to?(:notion_api_token) && !presence(config.notion_api_token).nil?
|
|
1528
|
+
}
|
|
1529
|
+
end
|
|
1530
|
+
|
|
1531
|
+
# Convert a fully-qualified provider class name (as serialised in
|
|
1532
|
+
# woods.json — e.g. +"Woods::Embedding::Provider::Ollama"+) into the
|
|
1533
|
+
# short symbol form used by +Woods.configuration.embedding_provider+
|
|
1534
|
+
# (+:ollama+, +:openai+). Returns nil when +class_name+ is unknown or
|
|
1535
|
+
# absent so callers fall back to the live config value.
|
|
1536
|
+
def resolved_provider_symbol(class_name)
|
|
1537
|
+
return nil if class_name.nil? || class_name.empty?
|
|
1538
|
+
|
|
1539
|
+
case class_name
|
|
1540
|
+
when /Ollama\z/ then :ollama
|
|
1541
|
+
when /OpenAI\z/ then :openai
|
|
1542
|
+
end
|
|
1543
|
+
end
|
|
1544
|
+
|
|
1545
|
+
# Return a Hash of manifest content, or nil if unreadable.
|
|
1546
|
+
def safe_manifest(reader)
|
|
1547
|
+
reader.manifest
|
|
1548
|
+
rescue StandardError
|
|
1549
|
+
nil
|
|
1550
|
+
end
|
|
1551
|
+
|
|
1552
|
+
# Seconds since extraction. Returns nil if timestamp is missing or unparsable.
|
|
1553
|
+
def staleness_seconds(iso8601)
|
|
1554
|
+
return nil if iso8601.nil? || iso8601.empty?
|
|
1555
|
+
|
|
1556
|
+
(Time.now - Time.parse(iso8601)).to_i
|
|
1557
|
+
rescue ArgumentError
|
|
1558
|
+
nil
|
|
1559
|
+
end
|
|
1560
|
+
|
|
1561
|
+
def presence(value)
|
|
1562
|
+
return nil if value.nil?
|
|
1563
|
+
return nil if value.respond_to?(:empty?) && value.empty?
|
|
1564
|
+
|
|
1565
|
+
value.to_s
|
|
1566
|
+
end
|
|
1567
|
+
|
|
934
1568
|
def register_resource_handler(server, reader)
|
|
935
1569
|
server.resources_read_handler do |params|
|
|
936
1570
|
uri = params[:uri]
|