woods 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +169 -0
- data/README.md +20 -8
- data/exe/woods-console +51 -6
- data/exe/woods-console-mcp +24 -4
- data/exe/woods-mcp +30 -7
- data/exe/woods-mcp-http +47 -6
- data/lib/generators/woods/install_generator.rb +13 -4
- data/lib/generators/woods/templates/woods.rb.tt +155 -0
- data/lib/tasks/woods.rake +15 -50
- data/lib/woods/builder.rb +174 -9
- data/lib/woods/cache/cache_middleware.rb +360 -31
- data/lib/woods/chunking/semantic_chunker.rb +334 -7
- data/lib/woods/console/adapters/job_adapter.rb +10 -4
- data/lib/woods/console/audit_logger.rb +76 -4
- data/lib/woods/console/bridge.rb +48 -15
- data/lib/woods/console/bridge_protocol.rb +44 -0
- data/lib/woods/console/confirmation.rb +3 -4
- data/lib/woods/console/console_response_renderer.rb +56 -18
- data/lib/woods/console/credential_index.rb +201 -0
- data/lib/woods/console/credential_scanner.rb +302 -0
- data/lib/woods/console/dispatch_pipeline.rb +138 -0
- data/lib/woods/console/embedded_executor.rb +682 -35
- data/lib/woods/console/eval_guard.rb +319 -0
- data/lib/woods/console/model_validator.rb +1 -3
- data/lib/woods/console/rack_middleware.rb +185 -29
- data/lib/woods/console/redactor.rb +161 -0
- data/lib/woods/console/response_context.rb +127 -0
- data/lib/woods/console/safe_context.rb +220 -23
- data/lib/woods/console/scope_predicate_parser.rb +131 -0
- data/lib/woods/console/server.rb +417 -486
- data/lib/woods/console/sql_noise_stripper.rb +87 -0
- data/lib/woods/console/sql_table_scanner.rb +213 -0
- data/lib/woods/console/sql_validator.rb +81 -31
- data/lib/woods/console/table_gate.rb +93 -0
- data/lib/woods/console/tool_specs.rb +552 -0
- data/lib/woods/console/tools/tier1.rb +3 -3
- data/lib/woods/console/tools/tier4.rb +7 -1
- data/lib/woods/dependency_graph.rb +66 -7
- data/lib/woods/embedding/indexer.rb +190 -6
- data/lib/woods/embedding/openai.rb +40 -4
- data/lib/woods/embedding/provider.rb +104 -8
- data/lib/woods/embedding/text_preparer.rb +23 -3
- data/lib/woods/embedding/token_counter.rb +133 -0
- data/lib/woods/evaluation/baseline_runner.rb +20 -2
- data/lib/woods/evaluation/metrics.rb +4 -1
- data/lib/woods/extracted_unit.rb +1 -0
- data/lib/woods/extractor.rb +7 -1
- data/lib/woods/extractors/controller_extractor.rb +6 -0
- data/lib/woods/extractors/mailer_extractor.rb +16 -2
- data/lib/woods/extractors/model_extractor.rb +6 -1
- data/lib/woods/extractors/phlex_extractor.rb +13 -4
- data/lib/woods/extractors/rails_source_extractor.rb +2 -0
- data/lib/woods/extractors/route_helper_resolver.rb +130 -0
- data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
- data/lib/woods/extractors/view_component_extractor.rb +12 -1
- data/lib/woods/extractors/view_engines/base.rb +141 -0
- data/lib/woods/extractors/view_engines/erb.rb +145 -0
- data/lib/woods/extractors/view_template_extractor.rb +92 -133
- data/lib/woods/flow_assembler.rb +23 -15
- data/lib/woods/flow_precomputer.rb +21 -2
- data/lib/woods/graph_analyzer.rb +3 -4
- data/lib/woods/index_artifact.rb +173 -0
- data/lib/woods/mcp/bearer_auth.rb +45 -0
- data/lib/woods/mcp/bootstrap_state.rb +94 -0
- data/lib/woods/mcp/bootstrapper.rb +337 -16
- data/lib/woods/mcp/config_resolver.rb +288 -0
- data/lib/woods/mcp/errors.rb +134 -0
- data/lib/woods/mcp/index_reader.rb +265 -30
- data/lib/woods/mcp/origin_guard.rb +132 -0
- data/lib/woods/mcp/provider_probe.rb +166 -0
- data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
- data/lib/woods/mcp/renderers/markdown_renderer.rb +39 -3
- data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
- data/lib/woods/mcp/server.rb +737 -137
- data/lib/woods/model_name_cache.rb +78 -2
- data/lib/woods/notion/client.rb +25 -2
- data/lib/woods/notion/mappers/model_mapper.rb +36 -2
- data/lib/woods/railtie.rb +55 -15
- data/lib/woods/resilience/circuit_breaker.rb +9 -2
- data/lib/woods/resilience/retryable_provider.rb +40 -3
- data/lib/woods/resolved_config.rb +299 -0
- data/lib/woods/retrieval/context_assembler.rb +112 -5
- data/lib/woods/retrieval/query_classifier.rb +1 -1
- data/lib/woods/retrieval/ranker.rb +55 -6
- data/lib/woods/retrieval/search_executor.rb +42 -13
- data/lib/woods/retriever.rb +330 -24
- data/lib/woods/session_tracer/middleware.rb +35 -1
- data/lib/woods/storage/graph_store.rb +39 -0
- data/lib/woods/storage/inapplicable_backend.rb +14 -0
- data/lib/woods/storage/metadata_store.rb +129 -1
- data/lib/woods/storage/pgvector.rb +70 -8
- data/lib/woods/storage/qdrant.rb +196 -5
- data/lib/woods/storage/snapshotter/metadata.rb +172 -0
- data/lib/woods/storage/snapshotter/vector.rb +238 -0
- data/lib/woods/storage/snapshotter.rb +24 -0
- data/lib/woods/storage/vector_store.rb +184 -35
- data/lib/woods/tasks.rb +85 -0
- data/lib/woods/temporal/snapshot_store.rb +49 -1
- data/lib/woods/token_utils.rb +44 -5
- data/lib/woods/unblocked/client.rb +1 -1
- data/lib/woods/unblocked/document_builder.rb +35 -10
- data/lib/woods/unblocked/exporter.rb +1 -1
- data/lib/woods/util/host_guard.rb +61 -0
- data/lib/woods/version.rb +1 -1
- data/lib/woods.rb +126 -6
- metadata +69 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 69b5f822b28adb68fa962350e44b8721811c62bfa25b0350ab8a46b8b121f3d4
|
|
4
|
+
data.tar.gz: 6e1e42f994fd57f5de592f662d451e73b967fd4c84744b1638d94e833a044852
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a83cb96217695d13bba825a2fb25dcf257bb1de94565c2005fb559771d0670b628db70a19651c7626d02f2baf030a98e116bd7c971f22823ce4300642dc1bd73
|
|
7
|
+
data.tar.gz: 8d420c40672e99f2395a410b4f83cf8df761c7f06b42e8558199b7c9d26db9b1ffd9aa866520f3d245a2fcd3bdd2cea9f007c01cfd3ed970d509fe6aebd80eab
|
data/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,175 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [1.3.0] - 2026-05-13
|
|
11
|
+
|
|
12
|
+
### Upgrade Notes
|
|
13
|
+
|
|
14
|
+
Two behavior changes are worth pre-reading before you bump:
|
|
15
|
+
|
|
16
|
+
- **The MCP Index Server (`woods-mcp`, `woods-mcp-http`) no longer silently degrades to pattern-only search when it can't find a real index.** Hosts that ran 1.2.0 without writing `woods.json` got an empty-store retriever that quietly served degraded results. After this release, the bootstrapper raises `Woods::MCP::MissingArtifact` at boot unless one of these is true: (a) you've run `rake woods:embed` against this checkout (writes `woods.json` + dumps under `output_dir`), or (b) you explicitly opt into the legacy env-var auto-detect path with `WOODS_ALLOW_AUTODETECT=1`. The Shape-2 ("shared filesystem") preset documented under `docs/CONFIGURATION_REFERENCE.md#deployment-shapes` is the supported way to ship pre-built indices alongside a separate MCP process.
|
|
17
|
+
- **Console MCP is opt-in.** The Index Server is unaffected. Hosts that mounted `Woods::Console::RackMiddleware` in 1.2.0 already saw it short-circuit at the entry points (Console MCP was disabled in 1.2.0 after an audit). To re-enable it under the new five-layer defense stack, set `config.console_mcp_enabled = true` in your Woods initializer and review the threat-model walkthrough in `docs/CONSOLE_MCP_SETUP.md`. No host automatically re-enables Console MCP on upgrade.
|
|
18
|
+
|
|
19
|
+
The `mcp` runtime gem is now pinned to `>= 0.9.2` (was `~> 0.6`) — see the `### Security` block below.
|
|
20
|
+
|
|
21
|
+
### Added — `console_eval` opt-in (backlog B-053, issue #87)
|
|
22
|
+
|
|
23
|
+
- **Embedded `console_eval` is now opt-in and runs the full five-control contract.** Previously refused unconditionally at dispatch. Opting in requires `WOODS_CONSOLE_UNSAFE_EVAL=true` (or `config.console_unsafe_eval_enabled = true`) AND a `Woods::Console::Confirmation` collaborator AND a JSONL audit-log path. Any missing collaborator raises `Woods::ConfigurationError` at boot — fail-closed by design. The flag still refuses to boot in `Rails.env.production?`.
|
|
24
|
+
- **Execution path: EvalGuard → Confirmation → SafeContext → Timeout → AuditLogger.** `EmbeddedExecutor#handle_eval` invokes each control in order. `EvalGuard.check!` refuses credential/reflection/shell/network payloads before parsing completes. `Confirmation#request_confirmation` delegates to the host-provided callback. The code runs inside the `SafeContext` rolled-back transaction and is wrapped in `Timeout.timeout(1..30s)`. Every outcome (guard-refused, denied, ok, error) writes exactly one audit entry with `CredentialScanner`-redacted params.
|
|
25
|
+
- **New config attrs.** `Woods.configuration.console_unsafe_eval_confirmation` and `console_unsafe_eval_audit_log_path` — host-level defaults for the two required collaborators. Explicit kwargs on `Server.build_embedded` / `Woods::Console::RackMiddleware` take precedence.
|
|
26
|
+
- **Updated operator banner.** The stderr banner now reads "console_eval is LIVE on this process" (previously said "scaffolding is active. Execution is STILL NOT IMPLEMENTED") — it reflects that execution is wired.
|
|
27
|
+
|
|
28
|
+
### Added — Persistence & Bootstrap arc (PRs #73–#79)
|
|
29
|
+
|
|
30
|
+
- **Shape-2 (shared filesystem) support.** A new `:shared_filesystem` preset makes the "rake embed writes to `output_dir`, separate `woods-mcp` server reads from disk" shape a first-class deployment option. All stores in-memory at runtime; persistence is handled by the Snapshotter's atomic dumps. No `sqlite3` gem required — works on MySQL- and Postgres-only hosts. See `docs/CONFIGURATION_REFERENCE.md#deployment-shapes` and `docs/BACKEND_MATRIX.md#persistence-story`.
|
|
31
|
+
- **Typed MCP exception hierarchy.** `Woods::MCP::BootstrapError` with subclasses `MissingCredential`, `MissingArtifact`, `ConfigMismatch`, `DimensionMismatch`, `UnsupportedArtifact`. `Woods::MCP::ProviderUnreachable` lands as a sibling (recoverable, caught internally for degraded start). `Woods::Storage::InapplicableBackend` signals Snapshotter misuse on durable backends. Host apps rescuing `Woods::Error` continue to catch everything; `ConfigurationError` only catches declared-config-shape problems.
|
|
32
|
+
- **On-disk dump format.** `output_dir/dumps/<ISO8601>/` directories with atomic `latest` pointer flipped last. Vectors are packed float32 in a `WVF1` binary format (magic + schema_version + dimension + vector_count + gem_version + model_name header, followed by the float blob and a `vectors.idx` sidecar). Metadata is streaming MessagePack in a `WMD1` format. Both formats are schema-versioned and refuse newer-than-supported artifacts with `UnsupportedArtifact`.
|
|
33
|
+
- **`output_dir/woods.json` resolved config snapshot.** The embed run writes the resolved provider (class, model, host, dimension, gem_version) so the MCP server boots against the same config without re-reading env vars. `Woods::ResolvedConfig` validates schema_version on load and exposes `#matches?`, `#assert_compatible!` for drift detection.
|
|
34
|
+
- **`Woods::Storage::Snapshotter::Vector` / `::Metadata`.** Dump/load seams that the Indexer calls on embed completion and the Bootstrapper calls at MCP boot. `Snapshotter` is its own namespace so persistence stays off the `Storage::*::Interface` contracts — pgvector / Qdrant / SQLite adapters remain persistence-free.
|
|
35
|
+
- **`Woods::IndexArtifact`** — Whole Value wrapping `output_dir` path semantics: `config_path`, `dumps_root`, `latest_dump_path`, `fresh?`, `new_dump_dir`, `promote`, atomic `write_config`. Centralises previously-scattered path knowledge.
|
|
36
|
+
- **`Woods::MCP::ConfigResolver`** — extracted resolver that reads `woods.json` (if present), validates compatibility against the live host config, or raises `MissingArtifact` when no snapshot exists and `WOODS_ALLOW_AUTODETECT=1` isn't set. Returns `(config, source)` where source is `:snapshot`, `:host_config`, `:autodetect`, or `:none`.
|
|
37
|
+
- **`Woods::MCP::ProviderProbe.reachable!`** — pure predicate raising `ProviderUnreachable` on Ollama / OpenAI probe failure. Carries `url` and `reason` (e.g. `"connection_refused"`, `"timeout"`, `"unauthorized"`) for grep-friendly diagnosis.
|
|
38
|
+
- **`Woods::MCP::BootstrapState`** — thread-safe `:initializing → :hydrating → :hydrated | :degraded | :failed` state machine. Exposed by the `woods_status` MCP tool under a new `bootstrap:` block so operators can answer "why is semantic search disabled?" with one tool call.
|
|
39
|
+
- **`Configuration#dump_retention_count`** — number of `dumps/<ISO8601>/` directories to keep after a successful embed. Default 3. Older dumps are removed; the directory currently referenced by `latest` is always preserved.
|
|
40
|
+
- **`WOODS_ALLOW_AUTODETECT=1`** — opt-in env flag for the legacy env-var auto-detect path when `woods.json` is absent. Without the flag (and without a snapshot), the MCP server raises `MissingArtifact` at boot rather than silently degrading.
|
|
41
|
+
- **`exe/woods-mcp` / `exe/woods-mcp-http` top-level rescue.** Typed `BootstrapError` subclasses are caught and printed as `<ClassName>: <message>` to stderr before `exit 2`. Grep-friendly for ops dashboards.
|
|
42
|
+
- **`msgpack` added as a runtime gem dependency** (required for `WMD1` metadata serialization). Only loaded when the Snapshotter's metadata path is reached; pgvector / Qdrant users don't pay the load cost.
|
|
43
|
+
- **`bench/vector_query_and_serialization.rb`** — standalone Phase-0 benchmark harness measuring cosine kernel latency + allocation count and serialization round-trip for pack("e*") vs Marshal vs MessagePack vs JSON. Writes `tmp/bench_results/phase0.json`.
|
|
44
|
+
- **`spec/performance/vector_search_latency_spec.rb`** — opt-in (`:perf` tag) wall-clock regression guard for the kernel. Excluded from the default suite; runs via `rspec --tag perf` or `WOODS_RUN_PERF_SPECS=1`.
|
|
45
|
+
- **`docs/design/PERSISTENCE_AND_BOOTSTRAP.md`** — full design doc for the arc. Documents decision log (MessagePack rejected for vectors, `persistent?` on Interface rejected, fail-loud vs degraded-start split, streaming append deferred) and explicit out-of-scope items.
|
|
46
|
+
|
|
47
|
+
### Changed — Persistence & Bootstrap arc
|
|
48
|
+
|
|
49
|
+
- **`VectorStore::InMemory` flat-buffer backing.** `@ids` + `@vectors_flat` (single contiguous `Array<Float>`) + `@metadata` + `@id_to_index` + `@tombstones`, replacing the old hash-of-hashes. Strided cosine access; tombstone-based deletes preserve iteration stability across dumps. First-store sets dimension; subsequent stores assert compatibility.
|
|
50
|
+
- **Cosine kernel is now a while loop over Array indices** rather than `zip.sum`. Per-query allocations drop from ~9.8M to 2 on a 12k-vector corpus; wall-clock halves. Bit-equal correctness guarded against the reference `zip/sum` implementation (1e-12 tolerance).
|
|
51
|
+
- **`VectorStore::Interface` + `MetadataStore::Interface`** gain `#each_entry` and `#bulk_load`. The Snapshotter consumes these; durable backends (pgvector, Qdrant, SQLite) aren't required to implement them — they never see the Snapshotter path.
|
|
52
|
+
- **`Builder.build_retriever` accepts `vector_store:` / `metadata_store:` kwargs** so the Bootstrapper can inject hydrated stores. Default behaviour (no kwargs) is unchanged — fresh empties are constructed from config.
|
|
53
|
+
- **`Builder.build_metadata_store` / `build_graph_store` are now public** (already-public `build_vector_store` had been the odd one out). Tasks.build_embed_indexer now wires them through.
|
|
54
|
+
- **`Builder.build_embedding_provider` strips `SNAPSHOT_ONLY_KEYS` from `embedding_options`** before splatting into the provider constructor. `:dimension` lives in `embedding_options` for `ResolvedConfig`'s sake but isn't part of the Ollama/OpenAI API — without the filter, any config declaring a dimension raised `ArgumentError` at boot.
|
|
55
|
+
- **`Bootstrapper#build_retriever` no longer silently degrades.** Missing config + no `WOODS_ALLOW_AUTODETECT=1` now raises `MissingArtifact`. Unreachable provider starts the server in the `:degraded` state (retries on first query), visible in `woods_status`. Silent fallback to pattern-based search is gone — it was the core failure mode the arc set out to eliminate.
|
|
56
|
+
- **`MetadataStore::InMemory` stringifies symbol keys on store** so `Snapshotter::Metadata` round-trips (MessagePack doesn't preserve Ruby Symbol type on deserialization).
|
|
57
|
+
- **`Bootstrapper` hydrates `VectorStore::InMemory` metadata from the `MetadataStore` after load** — `vectors.bin` carries only the float blob to stay mmap-friendly; per-vector metadata lives in `metadata.msgpack`. The back-fill step keeps `VectorStore#search`'s filter predicates working after a dump/reload cycle.
|
|
58
|
+
|
|
59
|
+
### Fixed — Persistence & Bootstrap arc
|
|
60
|
+
|
|
61
|
+
- **`Tasks.build_embed_indexer` now wires `resolved_config`, `metadata_store`, `dump_retention_count`.** Without these, `Indexer#persist_snapshot` wrote `vectors.bin` + `latest` but never wrote `woods.json` — breaking the standalone `woods-mcp` boot path entirely. The spec suite missed this because Indexer specs used doubles for the Snapshotter.
|
|
62
|
+
- **`Bootstrapper` hydrates the retriever from Snapshotter dumps at boot** (PR #79). The previous stub returned a retriever with empty stores regardless of the dumps on disk — the entire Shape-2 payoff was unrealized until this fix landed.
|
|
63
|
+
- **`ConfigResolver.populate_from_stored` reads `OPENAI_API_KEY` from env** when the stored config says OpenAI. `woods.json` deliberately omits credentials; without this lookup, the MCP server crashed with a raw `ArgumentError: missing keyword: api_key` that the top-level `BootstrapError` rescue didn't catch. Now raises typed `MissingCredential` with an actionable message.
|
|
64
|
+
- **`Snapshotter::Vector` header parsing raises `UnsupportedArtifact` on truncated files** rather than the `NoMethodError: undefined method 'unpack' for nil` it produced on a 21-byte file that passed the old 20-byte guard. Minimum-length check raised to 28 bytes plus incremental checks before each unpack.
|
|
65
|
+
- **`ResolvedConfig.from_hash` schema-version check uses `<=` not `==`** to match the binary Snapshotters. A future `schema_version = 1` file loaded by a `SUPPORTED_SCHEMA_VERSION = 2` gem now loads cleanly. `schema_version = 0` is still rejected (positive-version guard).
|
|
66
|
+
- **`Bootstrapper.hydrated_vector_store` / `hydrated_metadata_store` propagate `ArgumentError`** from Snapshotter dump_dir validation instead of silently returning nil. Misconfigured `output_dir` is a config bug, not a transient I/O issue — operators need to see it.
|
|
67
|
+
|
|
68
|
+
### Fixed — Ollama embedding (PRs #68–#72)
|
|
69
|
+
|
|
70
|
+
- **Ollama embedding no longer fails with `400 "the input length exceeds the context length"`.** Two issues compounded into a single runtime failure when indexing real Rails codebases against Ollama: (a) the provider hard-coded `num_ctx = 8192` for every model, but Ollama's `/api/embed` enforces each model's *native* context length regardless of `options.num_ctx` ([ollama/ollama#14186](https://github.com/ollama/ollama/issues/14186)) — `nomic-embed-text`'s native ceiling is 2048, and any request larger than that was rejected outright; (b) the indexer's "does this unit need chunking?" check was based on a chars/token estimate that under-counts dense Ruby source (CamelCase constants, callback DSLs, symbol-heavy code), so chunks that looked safe by char count still exceeded the token budget.
|
|
71
|
+
|
|
72
|
+
### Added
|
|
73
|
+
|
|
74
|
+
- **Per-model context-length registry (`Woods::Embedding::Provider::Ollama::MODEL_CONTEXT_LENGTHS`).** `num_ctx` is now auto-selected from the configured model name: `nomic-embed-text` → 2048, `bge-m3` → 8192, `snowflake-arctic-embed2` → 8192, `mxbai-embed-large` → 512, `snowflake-arctic-embed` → 512, `all-minilm` → 256. Unknown models fall back to 2048 (Ollama's embedding default). Explicit `num_ctx:` overrides continue to win when set. `Provider::Ollama#max_input_tokens` reports the selected value so the chunker can size inputs correctly.
|
|
75
|
+
- **`Woods::Embedding::TokenCounter` — optional exact-token accounting via the `tokenizers` gem.** Loads the `bert-base-uncased` WordPiece tokenizer (the base every BERT-family embedding model uses) and re-verifies every chunk client-side. Catches the 10–20% gap between char-based estimates and Ollama's internal count on dense Ruby source. Falls back to a 1.5 chars/token ratio when the gem isn't installed, so Woods works unchanged without it — `gem 'tokenizers', '~> 0.5'` is recommended for any Ollama setup.
|
|
76
|
+
- **Token-aware `Indexer#needs_chunking?`.** When a `TokenCounter` is present, the indexer consults it before deciding to chunk — a char-count-safe but token-count-over-budget unit now gets split instead of sent to Ollama and rejected.
|
|
77
|
+
- **New `docs/EMBEDDING_MODELS.md`** — comparison of the five supported Ollama embedding models (context, dimensions, disk size), instructions for switching models (including the dimension-change re-index requirement), a walkthrough of the `num_ctx` regression and how Woods works around it, and the procedure for adding a new model to the context-length registry.
|
|
78
|
+
|
|
79
|
+
### Changed
|
|
80
|
+
|
|
81
|
+
- **Ollama embedding configuration — `base_url:` keyword corrected to `host:` in user docs.** `Woods::Embedding::Provider::Ollama#initialize` has always accepted `host:` (never `base_url:`), but several doc examples showed `base_url:` — following them would raise `ArgumentError` on boot. `CONFIGURATION_REFERENCE.md`, `TROUBLESHOOTING.md`, `FAQ.md`, `GETTING_STARTED.md`, and top-level `README.md` snippets are corrected. No code change — this is documentation catching up to long-standing code.
|
|
82
|
+
- **`BACKEND_MATRIX.md`** — Ollama section expanded to a full model table with native context, dimensions, and disk weights for each supported model; adds a "Self-hosted + large units" selection-guidance row pointing to `bge-m3`.
|
|
83
|
+
|
|
84
|
+
### Security
|
|
85
|
+
|
|
86
|
+
- **`mcp` gem bumped from `~> 0.6` to `>= 0.9.2, < 1.0` to close [CVE-2026-33946](https://github.com/anthropics/mcp/security/advisories) (HIGH).** The vulnerability is in the upstream `mcp` gem's STDIO transport, not in Woods' use of it, but every Woods install transitively depended on the affected versions. Hosts running `bundle update woods` will pick up the fixed `mcp` release automatically; Gemfile.lock pins on older `mcp` versions need to be regenerated. No API change required in host code.
|
|
87
|
+
- **Console MCP re-enabled behind a five-layer defense-in-depth stack.** The feature was previously disabled at its entry points after an audit flagged a Stripe Connect credential leak via the `authorizations` EAV table. It now ships gated on a new `console_mcp_enabled` config flag (default `false`) and runs through five independent safety layers, so a single misconfigured layer cannot leak secrets:
|
|
88
|
+
- **Layer 0 — feature gate.** `exe/woods-console-mcp`, `exe/woods-console`, and `Woods::Console::RackMiddleware` all short-circuit with a helpful "disabled" notice (stderr + exit 1 for stdio, `410 Gone` with JSON body for HTTP) when `Woods.configuration.console_mcp_enabled` is false. Hosts that have mounted the middleware see no change in behavior until they opt in.
|
|
89
|
+
- **Layer 1 — blocked tables (`console_blocked_tables`).** Rejects a tool call at dispatch time — before the executor is invoked — when any `:model`, `:table`, or `:sql` argument resolves to a configured blocked table. Built on `Woods::Console::TableGate`. Embedded transports now pass a `model_tables` registry so model-scoped tools (`console_find`, `console_sample`, etc.) can resolve model names to their tables without a database round-trip.
|
|
90
|
+
- **Layer 2 — credential scanner.** `Woods::Console::CredentialScanner` walks the final response tree and replaces credential-shaped substrings (Stripe `sk_live_*` / `sk_test_*`, AWS `AKIA*`, GitHub `ghp_*` / `github_pat_*`, GCP service-account private keys, generic high-entropy tokens) with `[REDACTED]`. This catches leaks regardless of where the value landed in the response shape — a row, a sub-hash, a positional array — and regardless of whether the column name looked sensitive. Individual rules can be disabled per-deployment via `console_disabled_scanner_patterns` (array of pattern symbols). Pass `%i[all]` to disable the scanner entirely.
|
|
91
|
+
- **Layer 3 — column + EAV redaction (`console_redacted_columns`, `console_redacted_key_values`).** Identity-based redaction for columns and key/value rows. Preserved verbatim from the prior release. See the two entries below for the shape-aware descent logic and EAV pattern contract.
|
|
92
|
+
- **Layer 4 — SqlValidator deny-list + SafeContext rollback.** Unchanged from prior releases. `console_sql` still rejects DML/DDL at the string level before any database interaction, and every request runs inside a transaction that is always rolled back.
|
|
93
|
+
- **Observability.** Layer 1 rejections emit a `console.table_gate.rejected` structured log line (level `warn`, includes tool name and model). Layer 2 hits emit `console.credential_scan.hits` with per-pattern counts, so operators can see when the net caught something rather than relying on in-band MCP response metadata. The logger is pluggable through `Woods::Observability::StructuredLogger` — operator logging pipelines can consume it without parsing the MCP wire format.
|
|
94
|
+
- **Upgrade path.** Hosts running on the disabled release that mounted `Woods::Console::RackMiddleware`: set `config.console_mcp_enabled = true` in your Woods initializer once you've configured the layers that apply to your threat model. The flag is opt-in by design — no host automatically re-enables the feature on upgrade. See `docs/CONSOLE_MCP_SETUP.md` for the full posture walkthrough and per-layer tuning guidance.
|
|
95
|
+
- **Scope.** The Index MCP server (`woods-mcp`, `woods-mcp-http`) and every extraction workflow remain unaffected — they were never in scope for the audit and ship unchanged.
|
|
96
|
+
|
|
97
|
+
- **`console_redacted_columns` now covers every tool that returns row data.** Redaction previously only walked top-level hash keys, so `console_sample`, `console_recent`, `console_find`, `console_pluck`, `console_sql`, and `console_query` returned configured credential columns in the clear — records were nested under `records` / `record`, and rows were positional arrays under `rows` / `values`. The server-level redaction pass is now shape-aware: it descends into `record` / `records` hashes and uses the `columns` header to redact positional rows. `console_pluck` now also includes a `columns` field in its response so positional redaction can key off of it. Affects every transport (stdio, Rack, bridge).
|
|
98
|
+
- **`console_redacted_key_values` for EAV (key-value) credential storage.** Column-name redaction cannot protect tables that store sensitive values in a generically named column (e.g. a Stripe Connect `authorizations` row of `{key: "stripe_access_token", value: "sk_live_..."}`): adding `value` to `console_redacted_columns` over-redacts every unrelated row. The new `console_redacted_key_values` config accepts one or more `{key_column:, value_column:, sensitive_keys: []}` patterns — when a row's `key_column` cell matches one of `sensitive_keys`, the same row's `value_column` cell is replaced with `[REDACTED]`. Applies across every response shape (`record`, `records`, positional `rows` / `values`) and every transport. Empty by default — configure it in `Woods.configure` to cover the EAV credential tables specific to your app.
|
|
99
|
+
- **TableGate now resolves `joins:` and `association:` arguments through model reflections.** `console_query` (via `joins:`) and `console_association_count` (via `association:`) previously bypassed Layer 1 entirely — an agent could reach `authorizations` rows by joining through a non-blocked model. The gate now accepts a `model_reflections` registry (association name → target table, built at boot from `reflect_on_all_associations`) and rejects any join or association whose target is on `console_blocked_tables`. Polymorphic and reflection-raising associations are skipped gracefully. Exposed via new `TableGate#check_joins!` and `#check_association!` entry points.
|
|
100
|
+
- **TableGate now catches ANSI-89 comma-joins.** `SELECT * FROM users, authorizations WHERE …` previously slipped past the gate because the old regex only matched the first identifier after `FROM` and explicit `JOIN` tokens. The gate now walks every `FROM` clause, splits on top-level commas (parenthesis-depth aware, so subqueries don't mislead it), and rejects a blocked table in any position of the list. Case, schema prefix, and quoted identifiers (`"authorizations"`, `` `authorizations` ``) are all handled.
|
|
101
|
+
- **TableGate now catches blocked tables inside CTE bodies, UNION branches, and FROM-clause subqueries.** The non-greedy `FROM_CLAUSE` regex previously terminated on `WHERE`/`JOIN`/`;`/`)` — but not on a nested `FROM` — so `SELECT * FROM (SELECT * FROM authorizations) AS a`, `WITH a AS (SELECT * FROM authorizations) SELECT * FROM a`, and `SELECT id FROM users UNION SELECT id FROM authorizations` would consume the outer clause and never re-scan the inner table. Treating `\bFROM\b` as a terminator makes every `FROM` occurrence its own independent `.scan` match, closing the H-3 bypass. Specs cover all three shapes.
|
|
102
|
+
- **Safer-by-default column redaction list.** `console_redacted_columns` previously defaulted to `[]`, so a host that enabled Console MCP without configuring Layer 3 got zero column redaction. The gem now seeds `console_redacted_columns` with a curated list of ~30 credential columns that appear across Devise, Doorkeeper, Rodauth, has_secure_password, devise-two-factor, and hand-rolled auth code: `password`, `password_digest`, `encrypted_password`, `crypted_password`, `salt`, `otp_secret`, `encrypted_otp_secret`, `two_factor_secret`, `backup_codes`, `reset_password_token`, `confirmation_token`, `unlock_token`, `remember_token`, `invitation_token`, `access_token`, `refresh_token`, `auth_token`, `api_token`, `api_key`, `bearer_token`, `client_secret`, `webhook_secret`, `signing_secret`, `session_secret`, `private_key`, `encrypted_private_key`, `key_hash`, `token`, `secret`, plus `password_salt`/`consumed_timestep`. Exposed via `Woods::DEFAULT_CONSOLE_REDACTED_COLUMNS` so hosts can extend (`Woods::DEFAULT_CONSOLE_REDACTED_COLUMNS + %w[extra]`) or override (`%w[only these]`). Intentionally excludes `key` (ActiveStorage blob keys, EAV key columns) and PII columns (org-specific compliance).
|
|
103
|
+
- **CredentialScanner ships with 8 additional gateway patterns.** The Layer 2 content scanner now catches `github_pat_` fine-grained PATs, SendGrid API keys (`SG.xxx.yyy`), Mailgun API keys (`key-<32 hex>`), Anthropic API keys (`sk-ant-api**-***`), OpenAI API keys (`sk-` and `sk-proj-`), Shopify access tokens (`shpat_`, `shpca_`, `shpss_`, `shppa_`), Square access tokens (`sq0xxx-***`), and PayPal access tokens (`access_token$production$…$…`). Pattern order is specific-before-generic so Anthropic hits increment `:anthropic_api_key` rather than falling through to `:openai_api_key`. Total active patterns: 17.
|
|
104
|
+
- **TableGate now strips PostgreSQL dollar-quoted literals before scanning.** `SELECT $tag$FROM authorizations$tag$ …` would previously trigger a false match on the literal's contents; the gate now collapses `$…$…$…$` and `$tag$…$tag$` pairs to an empty string in the same pre-scan pass as SQL comments and single-quoted strings. Stripping order matters: dollar-quotes are removed before single-quotes so a stray apostrophe inside a dollar-quoted literal cannot fool the single-quote scanner.
|
|
105
|
+
- **One-time observability warning when the structured logger fails.** `Woods::Console::Server` previously swallowed every `StructuredLogger` exception silently — an operator misconfiguring the log sink would see no signal that Layer 1 rejections and Layer 2 hits were being lost. The first failure now prints a single `[woods-console]` warning to stderr naming the exception class and message; subsequent failures remain silent so a broken sink cannot flood the log. Behavior on a working logger is unchanged.
|
|
106
|
+
- **Credential scanner docstring uses an obvious placeholder.** The `@example` block in `Woods::Console::CredentialScanner` previously contained a Stripe-shaped value that matched its own pattern. Replaced with a clearly synthetic example so the doc cannot be mistaken for a real token during audits.
|
|
107
|
+
- **TableGate now catches blocked tables written as quoted schema-qualified identifiers.** `SELECT * FROM "public"."authorizations"` and `` SELECT * FROM `app`.`authorizations` `` previously slipped past Layer 1 because the regex captured only the first quoted segment (`"public"`) and the second (`"authorizations"`) was discarded. Both `LEAD_IDENT` and `JOIN_REFERENCE` now capture an optional quoted-schema prefix separately, and the joined `schema.table` form is passed to `#blocked?` so a configured entry of either `"authorizations"` (bare) or `"public.authorizations"` (qualified) matches as the operator expects. Closes a TableGate bypass on PostgreSQL and MySQL.
|
|
108
|
+
- **TableGate now recognizes MySQL `STRAIGHT_JOIN` as a join keyword.** `SELECT * FROM users STRAIGHT_JOIN authorizations …` previously slipped past Layer 1 because the `\bJOIN` boundary in `JOIN_REFERENCE` doesn't fire inside the `STRAIGHT_JOIN` token (the `_J` boundary is between two word characters). The join scanner now matches `\b(?:STRAIGHT_)?JOIN`, and `STRAIGHT_JOIN` is added to the `FROM_CLAUSE` terminator alternation so the FROM clause stops before it instead of swallowing the joined table. Closes a TableGate bypass on MySQL.
|
|
109
|
+
- **`blocked_tables` now treats schema-qualified entries symmetrically.** Configuring `blocked_tables: ["audit.authorizations"]` previously matched nothing because `#blocked?` schema-stripped *incoming* identifiers but never the configured set. Bare entries (`"authorizations"`) continue to behave as a wildcard across every schema; schema-qualified entries (`"audit.authorizations"`) now match only references that carry the same schema prefix — including quoted variants `"audit"."authorizations"` and `` `audit`.`authorizations` ``. A reference to `public.authorizations` is *not* blocked when only `audit.authorizations` is on the list, so operators can scope blocks to a specific schema.
|
|
110
|
+
- **TableGate now catches blocked tables behind PostgreSQL `FROM ONLY` and mixed-quoting schema prefixes.** `FROM ONLY authorizations` and `JOIN ONLY authorizations` previously evaded Layer 1 because the `ONLY` inheritance opt-out keyword sat between the join keyword and the table identifier — the regex captured `ONLY` as the table name and the actual table was discarded. Mixed-quoting forms `FROM public."authorizations"` and `` JOIN `app`."authorizations" `` slipped past for the same reason in reverse: the schema-prefix branch in `LEAD_IDENT` and `JOIN_REFERENCE` only recognized fully quoted (`"public"."authorizations"`) or fully bare (`public.authorizations`) prefixes, so a bare-then-quoted combination fell through to the table-only branch and the schema chunk hid the identifier. `JOIN_REFERENCE` now consumes an optional `(?:ONLY\s+)?` after the join keyword and adds a `(?<jschema_bare>\w+)` alternative; `LEAD_IDENT` strips a leading `ONLY ` via the new `ONLY_PREFIX` constant before matching and adds a `(?<schema_bare>\w+)` alternative. Closes two more TableGate bypasses on PostgreSQL and MySQL.
|
|
111
|
+
- **SafeContext statement timeout is now transaction-scoped on PostgreSQL.** The previous `SET statement_timeout = '5000ms'` was a session-level setting that survived the rolled-back transaction and bled into the next consumer of the pooled connection — a host app web request or background job picking up the same connection would inherit the Console MCP timeout. Switched to `SET LOCAL statement_timeout` so the value is scoped to the surrounding transaction and discarded on rollback (which `SafeContext` always does). MySQL's `SET max_execution_time` is left as-is — it already applies only to the next SELECT and doesn't need a `LOCAL` equivalent.
|
|
112
|
+
- **SafeContext now leases a fresh connection from the pool per request.** Construction-time connection capture (`SafeContext.new(connection: ActiveRecord::Base.connection)`) reused the same connection across every Console MCP request for the lifetime of the embedded server, defeating the point of `with_connection` and risking cross-request state leakage in multi-DB / sharded hosts. `SafeContext` now accepts an optional `pool:` kwarg; when set, every `#execute` call wraps the body in `pool.with_connection { |conn| … }` so the connection is leased for the duration of the rolled-back transaction and returned immediately after. The leased connection is published to `Thread.current[:woods_console_leased_connection]` so dispatch handlers (`EmbeddedExecutor#active_connection`) thread it through without re-leasing. The `connection:` form remains for tests and callers managing their own lifecycle. Resolves the `WOODS-CONSOLE-PERREQ-CONN` follow-up tracked alongside the Rails 8.0 deprecation fix below.
|
|
113
|
+
- **`Woods::Console::EvalGuard` — parse-time refusal layer for `console_eval`.** A new checked-method class that walks the normalized `Woods::Ast::Parser` tree of every proposed eval payload and raises `ForbiddenExpressionError` when the snippet reaches a credential or reflection escape. Hardcoded denials (no DSL) cover `Rails.application.credentials.*`, `Rails.application.secrets.*`, `Rails::Secrets.*`, `Devise.secret_key`, every `ENV` form (`ENV['x']`, `ENV.fetch`, bare `ENV`), reflection escapes (`eval`, `instance_eval`, `class_eval`, `module_eval`, `send`, `public_send`, `const_get`, `binding`), and credential-file reads (`File.read` / `IO.read` / `Pathname.new` whose argument source contains `master.key`, `credentials.yml.enc`, `credentials/`, `secrets.yml`, `secrets.yml.enc`). Refuses on parse failure too — a payload that won't parse can't be reasoned about. Adds `prism ~> 1.4` as a runtime dependency (stdlib on Ruby 3.3+, gem on 3.0–3.2) so the AST path is available across the support matrix.
|
|
114
|
+
- **`EvalGuard` is now wired into `console_eval` dispatch.** `Woods::Console::Server.define_eval` instantiates an `EvalGuard` (gated on a new `console_credential_defense_enabled` config flag, default `true`) and passes it to `Tools::Tier4.console_eval` as `guard:`. Forbidden payloads raise `ForbiddenExpressionError` *before* the bridge request is built, and `define_console_tool` now rescues that alongside `SqlValidationError` so the LLM sees a clean MCP error response (`error: true`, message in `text`) instead of a transport-level exception. Hosts can opt out by setting `config.console_credential_defense_enabled = false` in their Woods initializer if the parse-time layer ever interferes with a legitimate workflow — the bridge-side enforcement remains in place either way.
|
|
115
|
+
- **`Woods::Console::CredentialIndex` — boot-time index of the host app's actual secrets.** A new value object that walks `Rails.application.credentials.config` once at server boot, collects every string leaf with length ≥ 12, and holds them in a frozen `Set` plus a precompiled `Regexp.union` for one-pass `gsub` substitution. The pattern-based `CredentialScanner` only catches *known credential shapes*; this index closes the gap for hand-rolled HMAC secrets, Twilio auth tokens, third-party webhook signing keys, and any other value whose format the scanner doesn't recognize but whose exact contents Rails already knows. `match?(str)`, `redact(str)`, and `empty?` are the only public API surface. `.build(rails_app:)` catches `ActiveSupport::EncryptedConfiguration::MissingKeyError`, `ActiveSupport::EncryptedFile::MissingKeyError`, and `ActiveSupport::MessageEncryptor::InvalidMessage` *by class name* (no constant references) so apps without `config/master.key` still boot — the index just stays empty and the other defense layers continue to apply.
|
|
116
|
+
- **CredentialScanner ships with 13 additional Tier 1 gateway patterns.** Layer 2 now catches Stripe Connect account IDs (`acct_*` — PII per Stripe ToS), Klaviyo private API keys (bare `pk_<34 alnum>`, which previously slipped past the Stripe publishable regex and grant full Klaviyo tenant access), Salesforce session/access tokens (`00D<15-org-id>!<base64 payload>`), LaunchDarkly SDK keys (`sdk-<UUID>`) and mobile keys (`mob-<UUID>`), HubSpot private app tokens (`pat-<region>-<UUID>`), Brevo API keys (`xkeysib-<64 hex>-<16 alnum>`) and SMTP keys (`xsmtpsib-…`), Kit (ConvertKit) API keys (`kit_*`), and three Twilio identifier shapes (`AC` account SIDs, `SK` API key SIDs, `VA` Verify service SIDs). Also extends the existing `shopify_access_token` alternation with `shprt_` (refresh) and `shpua_` (user-access) prefixes, both previously missed. Order is specific-before-generic — the Klaviyo regex sits after `stripe_publishable_key` so a real `pk_live_*` Stripe key still increments `:stripe_publishable_key` rather than falling through to `:klaviyo_private_key`. Total active patterns: 30 (was 17). Closes documented critical misses from the credential-leakage research brief.
|
|
117
|
+
- **`CredentialIndex` is now wired into `CredentialScanner` and the Console MCP server.** `CredentialScanner.new` accepts an optional `secret_index:` kwarg; when set, every scanned string runs through the index *before* the shape-pattern pass, and matched substrings are replaced with `[REDACTED:credential]` (a marker distinct from the pattern scanner's `[REDACTED]` so audit output can tell which layer caught a leak). Counts emit under a new `:credential_index` key alongside the per-pattern counters. `Woods::Console::Server.build_response_context` lazy-builds the index from `Rails.application` whenever `console_credential_defense_enabled` is true and a Rails application is reachable — non-Rails specs and CI environments without a `master.key` continue to work unchanged. Multi-DB / sharded hosts are explicitly out of scope: the index reflects only the credentials available to the Rails process that boots the Console MCP server. Use Layer 3 (`console_redacted_columns` / `console_redacted_key_values`) for credentials stored in a separate database.
|
|
118
|
+
|
|
119
|
+
### Changed
|
|
120
|
+
|
|
121
|
+
- **`console_credential_scanning_enabled` removed; `:all` sentinel in `console_disabled_scanner_patterns` takes its place.** The boolean flag added earlier on this unreleased branch is gone — one knob instead of two, no divergent ways to turn off the same layer. Hosts that want the scanner fully disabled now set `config.console_disabled_scanner_patterns = %i[all]`; per-pattern opt-outs continue to work as before (`%i[stripe_publishable_key]`, etc.). No migration needed for anyone on v1.2.0 — the flag never shipped. `Woods::Console::Server.build_response_context` skips constructing the scanner entirely when `:all` is present, so there's no per-response overhead from the disabled path.
|
|
122
|
+
- **`Woods::Console::ResponseContext` is now a Parameter Object + Null Object.** Previously a plain `Struct` data-bag, `ResponseContext` now exposes tell-don't-ask commands — `enforce!(args)`, `redact(result)`, `scan(value)`, `present?` — that bundle the three response-safety layers the Console MCP server threads through every tool call. `.build` returns a `NullResponseContext` singleton (same public surface, no-op bodies) when every layer is absent, so dispatch sites no longer need `ctx&.` safe-navigation chains. The column + EAV redaction logic moves to a new `Woods::Console::Redactor` module — pure, stateless, and unit-testable without constructing a server. `Server#send_to_bridge`, `#scan_for_credentials`, and the tool-dispatch block lose three `ctx&.<layer>` guards and ~80 lines of inline redaction plumbing. No behavior change; the server's `apply_redaction` class-level entry point is retained as a thin delegate to `Redactor.apply` so existing spec calls still work.
|
|
123
|
+
- **`Woods::Console::DispatchPipeline` owns the per-tool dispatch flow.** The integer coercion, Layer 1 gate enforcement, bridge/executor send, Layer 3 redaction, Layer 2 credential scanning, and MCP response rendering that previously lived inline in `Server.register` — wired together through four `method(:send_to_bridge)`-style captures closing over module-level methods — now live on a single per-tool object. Each `define_tool` block is a one-liner that calls `pipeline.call(args)`. Table-gate rejection and credential-scan-hit logging happen inside the pipeline against a pluggable `logger:` (the `StructuredLogger` in production). No behavior change; the refactor collapses the server's dispatch surface enough to make the pipeline a first-class thing to test — see `spec/console/dispatch_pipeline_spec.rb`.
|
|
124
|
+
- **Console `check_*!` / `validate_*!` / `request_confirmation` / `EvalGuard#check!` methods are now commands, not predicates.** Per Avdi Grimm's Confident Ruby guidance, bang methods that enforce a precondition should either complete or raise — they shouldn't return a truthy value callers are tempted to branch on. `TableGate#check_sql!`, `#check_table!`, `#check_joins!`, `#check_association!`, `SqlValidator#validate!`, `ModelValidator#validate_columns!`, `Confirmation#request_confirmation`, and `EvalGuard#check!` now return `nil` on success (unchanged raise behavior on failure). Removed the eight `# rubocop:disable Naming/PredicateMethod` pragmas that were silencing the cop. Specs that asserted `.to be(true)` on return values have been rewritten to use `expect { … }.not_to raise_error`. Internal callers already used these for side effects only — no behavioral change.
|
|
125
|
+
|
|
126
|
+
- **`CredentialScanner#walk` now scans Hash keys as well as values.** Previously, only Hash values were checked for credential shapes; keys were passed through untouched. In EAV row shapes where the key column itself carries the credential name (e.g. `{"sk_live_51..." => "some_value"}`), a credential-shaped key would slip through Layer 2. The scanner now coerces each key to String, runs the pattern and index pass, and restores the original key type — a Symbol key that carries a credential shape is emitted as a Symbol after redaction (e.g. `:"[REDACTED]"`); String keys stay Strings; non-String/non-Symbol keys (Integer, etc.) are untouched. Closes backlog item `credential-scanner-hash-keys-not-scanned` (PR #34 review low #5).
|
|
127
|
+
|
|
128
|
+
- **`CredentialIndex` documents restart-required behavior and exposes a rebuild hook.** The boot-time credential index has always been built once at process start and held for the lifetime of the MCP process — rotating Rails credentials without restarting left the old secrets in the index. This is now documented explicitly in `CredentialIndex.build`'s YARD doc and in `docs/CONSOLE_MCP_SETUP.md`. Two new capabilities close the gap:
|
|
129
|
+
- **`Woods::Console::Server.rebuild_credential_index(rails_app:)`** — rebuilds the index from fresh Rails credentials and hot-swaps it into the active scanner without restarting the process. Returns the new `CredentialIndex`, or `nil` when credential defense is disabled or no server has been built yet. Existing callers of `Server.build` / `Server.build_embedded` are unaffected (additive API, no required signature changes).
|
|
130
|
+
- **Boot-time rotation warning** — at server build time, Woods checks whether any credentials file (`config/credentials.yml.enc`, `config/credentials/<env>.yml.enc`) was modified after the process started. If so, it emits a `console.credential_index.stale` warn-level structured log line (file path, mtime, process start, and a hint). Opt out with `config.console_credential_rotation_warning = false`.
|
|
131
|
+
Closes backlog item `credential-index-rebuild-on-rotation` (PR #34 review low #8).
|
|
132
|
+
|
|
133
|
+
### Fixed
|
|
134
|
+
|
|
135
|
+
- **Console MCP middleware boots cleanly on Rails 8.0.** Replaced `ActiveRecord::Base.connection` with `ActiveRecord::Base.connection_pool.with_connection { |conn| … }` in `RackMiddleware#build_embedded_server` and the `EmbeddedExecutor#active_connection` fallback. `ActiveRecord::Base.connection` is deprecated in Rails 7.2 and removed in 8.0; `with_connection` is the supported cross-version API (works 6.1 → 8.x). Single-pool behavior is preserved — converting `SafeContext` to per-request connection acquisition (multi-DB / sharded hosts) is tracked separately as `WOODS-CONSOLE-PERREQ-CONN`.
|
|
136
|
+
- **Console renderer no longer collapses row data to `"N items"`.** `ConsoleResponseRenderer#render_hash` was summarizing every Array-valued key to a count, which silently elided the actual data from `console_sql`, `console_query`, `console_pluck`, `console_sample`, `console_recent`, and `console_find` responses — the MCP payload carried the rows, but the rendered text agents see only named the shape. Array values now recurse through `render_array` (Array<Hash> → Markdown table, scalar array → bullet list). When `rows` or `values` appears alongside a sibling `columns` array, the renderer emits a positional Markdown table using the columns as headers so sql / query / pluck output is scannable. Metadata-shaped responses (`count`, `aggregate`, `schema`, etc.) are unchanged.
|
|
137
|
+
- **MCP `search` tool no longer destroys regex patterns.** `index_reader` was wrapping queries in `Regexp.escape`, turning `User|Account` into a literal-only match. Now compiles raw with `IGNORECASE` and falls back to the escaped form only on `RegexpError`.
|
|
138
|
+
- **Auto-detect Ollama probe reliability.** The bootstrapper now probes `GET /api/tags` (the documented list-models endpoint) instead of `HEAD /`, which returned 404 on some Ollama versions. Any non-5xx response now marks Ollama as reachable.
|
|
139
|
+
- **`WOODS_SEARCH_MAX_SCAN=""` no longer disables phase-2 search.** Empty and whitespace-only values fall back to the default cap of 500 instead of coercing to 0.
|
|
140
|
+
- **Self-describing error for unsupported tools in embedded mode.** `console_sql` / `console_query` rejections now point at `embedded_read_tools: true` and the setup doc. Other Tier 2–4 rejections still point at the bridge architecture. Replaces the generic "Not yet implemented in embedded mode" message.
|
|
141
|
+
- **`console_embedded_read_tools` configuration flag.** Flows through `Woods.configure` to both the Rack middleware (Option C) and the stdio transports (Options A and B) — previously only the Rack mount accepted `embedded_read_tools:` directly, so stdio deployments had no way to unlock `console_sql` / `console_query` without patching the executable.
|
|
142
|
+
|
|
143
|
+
### Added
|
|
144
|
+
|
|
145
|
+
- **Ollama auto-detection in the MCP bootstrapper.** When no embedding provider is configured and no `OPENAI_API_KEY` is present, the bootstrapper probes `OLLAMA_BASE_URL` (default `http://localhost:11434`) and auto-enables semantic search if reachable. A one-line STDERR banner at startup reports the active provider.
|
|
146
|
+
- **`WOODS_SEARCH_MAX_SCAN` env var.** Caps phase-2 scan volume during `search`. Default 500.
|
|
147
|
+
- **Ransack-style scope predicates** for console data tools — `scope` hashes in `console_count`, `console_sample`, `console_pluck`, `console_aggregate`, `console_association_count`, and `console_recent` now accept suffixed keys (`_eq`, `_not_eq`, `_gt`, `_gteq`, `_lt`, `_lteq`, `_in`, `_not_in`, `_null`, `_not_null`, `_present`, `_blank`, `_matches`). Column names are validated before Arel predicates are built — no string interpolation, no SQL injection surface.
|
|
148
|
+
- **`count` function in `console_aggregate`** — the `column` argument is optional when `function: "count"`, making it easy to count rows matching a scope in a single tool call.
|
|
149
|
+
- **`embedded_read_tools` flag** on `Woods::Console::RackMiddleware` — opts `console_sql` and `console_query` into the embedded executor, with `SqlValidator` + `SafeContext` rollback + per-request connection pooling enforcing read-only safety.
|
|
150
|
+
- **MCP worktree setup guide** (`docs/MCP_WORKTREE_SETUP.md`) — multi-worktree MCP configuration for simultaneous Claude Code sessions across branches.
|
|
151
|
+
- **`pg_query` spike doc** (`docs/PG_QUERY_SPIKE.md`) — evaluation of an optional `pg_query`-backed AST identifier extractor alongside the existing regex `SqlTableScanner`. PostgreSQL hosts that opt into the gem would get AST-grade table extraction; MySQL and gem-less hosts continue on the regex path unchanged. Design-only — no implementation yet.
|
|
152
|
+
|
|
153
|
+
### Changed
|
|
154
|
+
|
|
155
|
+
- **MCP `search` response shape.** `search` now returns `{ results: [...], note?: String, partial?: Boolean }` instead of a bare `Array`. `note` flags broad patterns (>50% of a directory matched). `partial: true` indicates the phase-2 scan cap was reached — set `WOODS_SEARCH_MAX_SCAN` to raise it.
|
|
156
|
+
- **MCP `search` and `codebase_retrieve` descriptions** rewritten in Figma-MCP style (purpose → example → returns → when to use alternatives → gotchas). Fallback message for `codebase_retrieve` now includes exact fix commands.
|
|
157
|
+
- **Scope tool descriptions** updated to reference the supported predicate suffixes, so agents discover the richer filtering surface without reading the cookbook.
|
|
158
|
+
- **Tool descriptions for `console_sql` and `console_query`** rewritten in Figma-MCP-style (purpose → safety → requirement → alternatives) so agents understand when to reach for each and how to enable them.
|
|
159
|
+
- **Docs:** `docs/CONSOLE_MCP_SETUP.md` now covers `embedded_read_tools: true` as an alternative to switching to the bridge architecture, and the Troubleshooting entry for Tier 2–4 tools distinguishes the two read tools from everything else.
|
|
160
|
+
|
|
161
|
+
### Documentation
|
|
162
|
+
|
|
163
|
+
- **MySQL vector-pairing constraint surfaced where readers actually hit it** (#83 docs subset, PR #122). `docs/BACKEND_MATRIX.md` gains a "Database compatibility" subsection at the top of `## Vector Stores` with a MySQL-first table mapping primary database → supported vector stores, and a one-paragraph explanation of why MySQL stacks must pair with Qdrant / Pinecone / FAISS. `docs/TROUBLESHOOTING.md` gains "Configuring vector search on MySQL" under Embedding Problems, showing the MySQL + Qdrant initializer first and the Postgres + pgvector equivalent below for contrast. The `:mysql_qdrant` preset and dedicated integration spec from #83 remain open as post-1.3.0 follow-ups.
|
|
164
|
+
|
|
165
|
+
### Dependencies
|
|
166
|
+
|
|
167
|
+
- **Runtime gems added.**
|
|
168
|
+
- `msgpack` (`>= 1.5`) — required for the `WMD1` streaming metadata snapshot format introduced by the Persistence & Bootstrap arc. Only loaded on the Snapshotter's metadata path; pgvector / Qdrant users don't pay the load cost.
|
|
169
|
+
- `prism` (`~> 1.4`) — backs `Woods::Console::EvalGuard`'s AST inspection. Ships in stdlib on Ruby 3.3+; the gem dependency guarantees the Prism path on 3.0–3.2 so the guard's behavior stays consistent across the support matrix.
|
|
170
|
+
- **Runtime gem version constraint tightened.**
|
|
171
|
+
- `mcp`: `~> 0.6` → `>= 0.9.2, < 1.0` (CVE-2026-33946 HIGH; see `### Security` above).
|
|
172
|
+
- **GitHub Actions bumps.**
|
|
173
|
+
- `ruby/setup-ruby` 1.295.0 → 1.307.0 (PRs #91 + #121).
|
|
174
|
+
- `softprops/action-gh-release` 2.2.2 → 3.0.0 (PR #21 — Node 24 runtime; GitHub-hosted runners support this today).
|
|
175
|
+
- `rubygems/configure-rubygems-credentials` 1.0.0 → 2.0.0 (PR #120 — internal-only changes, no `with:` keys affected).
|
|
176
|
+
|
|
8
177
|
## [1.2.0] - 2026-03-27
|
|
9
178
|
|
|
10
179
|
### Added
|
data/README.md
CHANGED
|
@@ -22,7 +22,7 @@ The AI isn't bad — it just can't see what Rails is doing. Your 40-line model f
|
|
|
22
22
|
|
|
23
23
|
**Woods fixes this by running inside Rails and extracting what's actually there.**
|
|
24
24
|
|
|
25
|
-
See [Why Woods?](docs/
|
|
25
|
+
See [Why Woods?](docs/WHY_WOODS.md) for detailed before/after examples.
|
|
26
26
|
|
|
27
27
|
---
|
|
28
28
|
|
|
@@ -111,6 +111,8 @@ Controller source gets a route map prepended showing the real HTTP verb + path +
|
|
|
111
111
|
|
|
112
112
|
34 extractors build a bidirectional graph: what each unit depends on, and what depends on it. Change a concern and trace every model it touches. Refactor a service and see every controller that calls it. PageRank scoring identifies the most important nodes in your codebase.
|
|
113
113
|
|
|
114
|
+
Navigation edges (`link_to`, `redirect_to`, `form_action`) trace UI user journeys through the graph — filter with the `via` parameter on `dependencies`/`dependents` tools to isolate navigation paths from code references.
|
|
115
|
+
|
|
114
116
|
### Callback Side-Effect Analysis
|
|
115
117
|
|
|
116
118
|
`CallbackAnalyzer` detects what actually happens inside callbacks — which columns get written, which jobs get enqueued, which services get called, which mailers fire. This is the #1 source of unexpected bugs in Rails, and the #1 thing AI tools get wrong.
|
|
@@ -214,6 +216,16 @@ Use the MCP `dependencies` tool to trace what a service triggers:
|
|
|
214
216
|
|
|
215
217
|
Returns all job units reachable from `CheckoutService` within 2 hops — including jobs triggered indirectly via model callbacks (e.g., `CheckoutService` → `Order` → `OrderConfirmationJob`).
|
|
216
218
|
|
|
219
|
+
### Tracing UI Navigation
|
|
220
|
+
|
|
221
|
+
Use the MCP `dependents` tool with `via` filtering to find what links to a controller:
|
|
222
|
+
|
|
223
|
+
```json
|
|
224
|
+
{ "tool": "dependents", "params": { "identifier": "OrdersController", "depth": 1, "via": ["link_to", "form_action"] } }
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
Returns view templates that navigate to `OrdersController` via `link_to` helpers or form submissions — isolating UI navigation edges from code references and other relationship types.
|
|
228
|
+
|
|
217
229
|
### Runtime-Generated Method Detection
|
|
218
230
|
|
|
219
231
|
Because Woods runs inside the booted Rails process, it captures every method Rails generates dynamically — enum predicates, association builders, attribute accessors, and scope methods that static analysis tools cannot see:
|
|
@@ -239,7 +251,7 @@ Woods ships two MCP servers. Most users only need the **Index Server**.
|
|
|
239
251
|
|
|
240
252
|
### Index Server — Reads Pre-Extracted Data (No Rails Required)
|
|
241
253
|
|
|
242
|
-
|
|
254
|
+
29 tools for code lookup, dependency traversal, semantic search, graph analysis, and more (14 always-on + 15 that register based on wiring: 5 operator / 4 feedback / 4 snapshot / 1 session-trace / 1 Notion). Reads static JSON from disk — fast, no Rails boot needed.
|
|
243
255
|
|
|
244
256
|
**Claude Code** — add to `.mcp.json` in your project root:
|
|
245
257
|
|
|
@@ -298,7 +310,7 @@ See [MCP Servers](docs/MCP_SERVERS.md) for the full tool catalog and [MCP Tool C
|
|
|
298
310
|
| **Models** | Schema, associations, validations, scopes, callbacks, enums | Concerns inlined, callback side-effects analyzed |
|
|
299
311
|
| **Controllers** | Actions, filters, permitted params, response formats | Route map prepended, per-action filter chains |
|
|
300
312
|
| **Services & Jobs** | Entry points, dependencies, retry config, queue names | Includes services, interactors, operations, commands |
|
|
301
|
-
| **Views & Components** | ERB templates, Phlex components, ViewComponents | Partial references, slot definitions, prop interfaces |
|
|
313
|
+
| **Views & Components** | ERB templates, Phlex components, ViewComponents | Partial references, slot definitions, prop interfaces, navigation edges (link_to, form_action) |
|
|
302
314
|
| **Routes & Middleware** | Full route table, middleware stack order | Constraint resolution, engine mount points |
|
|
303
315
|
| **GraphQL** | Types, mutations, resolvers, fields | Relay connections, argument definitions |
|
|
304
316
|
| **Background Work** | Jobs, mailers, Action Cable channels, scheduled tasks | Queue configuration, retry policies |
|
|
@@ -388,7 +400,7 @@ Woods.configure do |config|
|
|
|
388
400
|
config.embedding_options = { api_key: ENV['OPENAI_API_KEY'] }
|
|
389
401
|
else
|
|
390
402
|
config.embedding_provider = :ollama
|
|
391
|
-
config.embedding_options = {
|
|
403
|
+
config.embedding_options = { model: 'nomic-embed-text', host: 'http://localhost:11434' }
|
|
392
404
|
end
|
|
393
405
|
end
|
|
394
406
|
```
|
|
@@ -496,7 +508,7 @@ tmp/woods/
|
|
|
496
508
|
│ │
|
|
497
509
|
│ ┌────────────┐ ┌─────────────┐ ┌──────────────────────┐ │
|
|
498
510
|
│ │ Extract │───>│ Resolve │───>│ Write JSON │ │
|
|
499
|
-
│ │
|
|
511
|
+
│ │ 34 types │ │ graph + │ │ per unit │ │
|
|
500
512
|
│ │ │ │ git data │ │ │ │
|
|
501
513
|
│ └────────────┘ └─────────────┘ └──────────────────────┘ │
|
|
502
514
|
└──────────────────────────────────────────────────────────────────┘
|
|
@@ -508,7 +520,7 @@ tmp/woods/
|
|
|
508
520
|
│ │
|
|
509
521
|
│ ┌────────────┐ ┌─────────────┐ ┌──────────────────────┐ │
|
|
510
522
|
│ │ Embed │───>│ Vector Store│ │ MCP Index Server │ │
|
|
511
|
-
│ │ OpenAI / │ │ pgvector / │ │
|
|
523
|
+
│ │ OpenAI / │ │ pgvector / │ │ 29 tools │ │
|
|
512
524
|
│ │ Ollama │ │ Qdrant │ │ No Rails required │ │
|
|
513
525
|
│ └────────────┘ └─────────────┘ └──────────────────────┘ │
|
|
514
526
|
│ │
|
|
@@ -551,7 +563,7 @@ See [Architecture](docs/ARCHITECTURE.md) for the deep dive — extraction phases
|
|
|
551
563
|
| [Extractor Reference](docs/EXTRACTOR_REFERENCE.md) | Deep dive | What each of the 34 extractors captures |
|
|
552
564
|
| [Architecture](docs/ARCHITECTURE.md) | Contributors | Pipeline stages, graph internals, retrieval |
|
|
553
565
|
| [Backend Matrix](docs/BACKEND_MATRIX.md) | Infrastructure | Supported database, vector, and embedding combos |
|
|
554
|
-
| [Why Woods?](docs/
|
|
566
|
+
| [Why Woods?](docs/WHY_WOODS.md) | Evaluation | Detailed before/after comparisons |
|
|
555
567
|
|
|
556
568
|
---
|
|
557
569
|
|
|
@@ -566,7 +578,7 @@ Works with MySQL, PostgreSQL, and SQLite. No additional infrastructure required
|
|
|
566
578
|
|
|
567
579
|
```bash
|
|
568
580
|
bin/setup # Install dependencies
|
|
569
|
-
bundle exec rake spec # Run tests (~
|
|
581
|
+
bundle exec rake spec # Run tests (~3300 examples)
|
|
570
582
|
bundle exec rubocop # Lint
|
|
571
583
|
```
|
|
572
584
|
|
data/exe/woods-console
CHANGED
|
@@ -12,6 +12,11 @@
|
|
|
12
12
|
# The rake task captures stdout before Rails boots and passes the fd via
|
|
13
13
|
# $woods_protocol_out. When run via rails runner, this script
|
|
14
14
|
# captures stdout itself to keep MCP protocol clean.
|
|
15
|
+
#
|
|
16
|
+
# Disabled by default. Set Woods.configuration.console_mcp_enabled = true
|
|
17
|
+
# in the host application's Woods initializer to enable. Layer 1 blocked
|
|
18
|
+
# tables, Layer 2 credential scanning, and Layer 3 column/EAV redaction are
|
|
19
|
+
# all configured through Woods::Configuration — see docs/CONSOLE_MCP_SETUP.md.
|
|
15
20
|
|
|
16
21
|
# Check if the rake task already captured stdout for us.
|
|
17
22
|
protocol_out = $woods_protocol_out # rubocop:disable Style/GlobalVars
|
|
@@ -24,6 +29,13 @@ end
|
|
|
24
29
|
|
|
25
30
|
require 'woods/console/server'
|
|
26
31
|
|
|
32
|
+
unless Woods.configuration.console_mcp_enabled
|
|
33
|
+
warn 'Woods Console MCP is disabled. Set ' \
|
|
34
|
+
'Woods.configuration.console_mcp_enabled = true to enable. ' \
|
|
35
|
+
'See docs/CONSOLE_MCP_SETUP.md for the full security posture.'
|
|
36
|
+
exit 1
|
|
37
|
+
end
|
|
38
|
+
|
|
27
39
|
# Ensure all application models are loaded for the registry.
|
|
28
40
|
Rails.application.eager_load!
|
|
29
41
|
|
|
@@ -36,19 +48,52 @@ rescue StandardError
|
|
|
36
48
|
next
|
|
37
49
|
end
|
|
38
50
|
|
|
51
|
+
model_tables = ActiveRecord::Base.descendants.each_with_object({}) do |model, hash|
|
|
52
|
+
next if model.abstract_class?
|
|
53
|
+
next unless model.table_exists?
|
|
54
|
+
|
|
55
|
+
hash[model.name] = model.table_name
|
|
56
|
+
rescue StandardError
|
|
57
|
+
next
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Association-name → target-table map so TableGate can resolve `joins:` and
|
|
61
|
+
# `association:` arguments before the executor loads data. Polymorphic
|
|
62
|
+
# associations and anything that raises during reflection are skipped.
|
|
63
|
+
model_reflections = ActiveRecord::Base.descendants.each_with_object({}) do |model, hash|
|
|
64
|
+
next if model.abstract_class?
|
|
65
|
+
next unless model.table_exists?
|
|
66
|
+
|
|
67
|
+
assoc_map = {}
|
|
68
|
+
model.reflect_on_all_associations.each do |reflection|
|
|
69
|
+
next if reflection.polymorphic?
|
|
70
|
+
|
|
71
|
+
klass = reflection.klass
|
|
72
|
+
assoc_map[reflection.name.to_s] = klass.table_name if klass.respond_to?(:table_name)
|
|
73
|
+
rescue StandardError
|
|
74
|
+
next
|
|
75
|
+
end
|
|
76
|
+
hash[model.name] = assoc_map
|
|
77
|
+
rescue StandardError
|
|
78
|
+
next
|
|
79
|
+
end
|
|
80
|
+
|
|
39
81
|
validator = Woods::Console::ModelValidator.new(registry: registry)
|
|
40
82
|
safe_context = Woods::Console::SafeContext.new(connection: ActiveRecord::Base.connection)
|
|
41
83
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
end
|
|
84
|
+
config = Woods.configuration
|
|
85
|
+
redacted_columns = Array(config.console_redacted_columns)
|
|
86
|
+
redacted_key_values = Array(config.console_redacted_key_values)
|
|
87
|
+
read_tools_enabled = config.console_embedded_read_tools
|
|
47
88
|
|
|
48
89
|
server = Woods::Console::Server.build_embedded(
|
|
49
90
|
model_validator: validator,
|
|
50
91
|
safe_context: safe_context,
|
|
51
|
-
redacted_columns: redacted_columns
|
|
92
|
+
redacted_columns: redacted_columns,
|
|
93
|
+
redacted_key_values: redacted_key_values,
|
|
94
|
+
read_tools_enabled: read_tools_enabled,
|
|
95
|
+
model_tables: model_tables,
|
|
96
|
+
model_reflections: model_reflections
|
|
52
97
|
)
|
|
53
98
|
|
|
54
99
|
# Restore the protocol output for MCP transport.
|
data/exe/woods-console-mcp
CHANGED
|
@@ -10,17 +10,37 @@
|
|
|
10
10
|
# Connects to a Rails application via a bridge process (Docker exec, direct,
|
|
11
11
|
# or SSH) and exposes read-only query tools via the Model Context Protocol
|
|
12
12
|
# (stdio transport).
|
|
13
|
+
#
|
|
14
|
+
# Disabled by default. Set Woods.configuration.console_mcp_enabled = true
|
|
15
|
+
# in the host application's Woods initializer to enable. Layer 1 blocked
|
|
16
|
+
# tables, Layer 2 credential scanning, and Layer 3 column/EAV redaction are
|
|
17
|
+
# all configured through Woods::Configuration — see docs/CONSOLE_MCP_SETUP.md.
|
|
18
|
+
|
|
19
|
+
# Suppress json-schema MultiJSON deprecation notice that would otherwise pollute
|
|
20
|
+
# stderr during MCP stdio transport. The deprecation fires the first time
|
|
21
|
+
# +json-schema+ is required — must happen before any Woods require that could
|
|
22
|
+
# transitively pull it in. MCP clients parsing stderr can't tolerate banners.
|
|
23
|
+
begin
|
|
24
|
+
require 'json-schema'
|
|
25
|
+
JSON::Validator.use_multi_json = false if defined?(JSON::Validator) && JSON::Validator.respond_to?(:use_multi_json=)
|
|
26
|
+
rescue LoadError
|
|
27
|
+
# json-schema isn't in every bundle.
|
|
28
|
+
end
|
|
13
29
|
|
|
14
30
|
require 'yaml'
|
|
31
|
+
require 'woods'
|
|
15
32
|
require_relative '../lib/woods/console/server'
|
|
16
33
|
|
|
34
|
+
unless Woods.configuration.console_mcp_enabled
|
|
35
|
+
warn 'Woods Console MCP is disabled. Set ' \
|
|
36
|
+
'Woods.configuration.console_mcp_enabled = true to enable. ' \
|
|
37
|
+
'See docs/CONSOLE_MCP_SETUP.md for the full security posture.'
|
|
38
|
+
exit 1
|
|
39
|
+
end
|
|
40
|
+
|
|
17
41
|
config_path = ENV.fetch('WOODS_CONSOLE_CONFIG', File.expand_path('~/.woods/console.yml'))
|
|
18
42
|
config = File.exist?(config_path) ? YAML.safe_load_file(config_path) : {}
|
|
19
43
|
|
|
20
|
-
# Suppress json-schema MultiJSON deprecation notice that would pollute stderr
|
|
21
|
-
# during MCP stdio transport. The notice fires when multi_json is in the bundle.
|
|
22
|
-
JSON::Validator.use_multi_json = false if defined?(JSON::Validator) && JSON::Validator.respond_to?(:use_multi_json=)
|
|
23
|
-
|
|
24
44
|
server = Woods::Console::Server.build(config: config)
|
|
25
45
|
transport = MCP::Server::Transports::StdioTransport.new(server)
|
|
26
46
|
transport.open
|
data/exe/woods-mcp
CHANGED
|
@@ -11,6 +11,20 @@
|
|
|
11
11
|
# them via the Model Context Protocol (stdio transport).
|
|
12
12
|
# Does NOT require Rails — only reads pre-extracted data.
|
|
13
13
|
|
|
14
|
+
# Suppress json-schema MultiJSON deprecation notice that would otherwise pollute
|
|
15
|
+
# stderr during MCP stdio transport. The deprecation fires the first time
|
|
16
|
+
# +json-schema+ is required — earlier revisions set the flag *after* the
|
|
17
|
+
# +require_relative '../lib/woods'+ chain, which lost the race. Any MCP client
|
|
18
|
+
# that parses stderr would trip on the banner. Require +json-schema+ explicitly,
|
|
19
|
+
# set the flag, *then* load Woods.
|
|
20
|
+
begin
|
|
21
|
+
require 'json-schema'
|
|
22
|
+
JSON::Validator.use_multi_json = false if defined?(JSON::Validator) && JSON::Validator.respond_to?(:use_multi_json=)
|
|
23
|
+
rescue LoadError
|
|
24
|
+
# json-schema isn't in every bundle — MCP clients without schema validation
|
|
25
|
+
# just don't see the deprecation notice either.
|
|
26
|
+
end
|
|
27
|
+
|
|
14
28
|
require_relative '../lib/woods'
|
|
15
29
|
require_relative '../lib/woods/dependency_graph'
|
|
16
30
|
require_relative '../lib/woods/graph_analyzer'
|
|
@@ -19,15 +33,24 @@ require_relative '../lib/woods/mcp/bootstrapper'
|
|
|
19
33
|
require_relative '../lib/woods/embedding/text_preparer'
|
|
20
34
|
require_relative '../lib/woods/embedding/indexer'
|
|
21
35
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
36
|
+
begin
|
|
37
|
+
index_dir = Woods::MCP::Bootstrapper.resolve_index_dir(ARGV)
|
|
38
|
+
retriever, bootstrap_state = Woods::MCP::Bootstrapper.build_retriever(index_dir: index_dir)
|
|
39
|
+
snapshot_store = Woods::MCP::Bootstrapper.build_snapshot_store(index_dir)
|
|
40
|
+
rescue Woods::MCP::BootstrapError => e
|
|
41
|
+
warn "[woods-mcp] #{e.class.name.split('::').last}: #{e.message}"
|
|
42
|
+
warn "[woods-mcp] details: #{e.details.inspect}" if e.respond_to?(:details) && !e.details.empty?
|
|
43
|
+
exit 2
|
|
44
|
+
end
|
|
25
45
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
46
|
+
retriever_reloader = lambda do
|
|
47
|
+
Woods::MCP::Bootstrapper.reload_stores!(retriever, index_dir: index_dir)
|
|
48
|
+
end
|
|
29
49
|
|
|
30
|
-
server = Woods::MCP::Server.build(
|
|
50
|
+
server = Woods::MCP::Server.build(
|
|
51
|
+
index_dir: index_dir, retriever: retriever, snapshot_store: snapshot_store,
|
|
52
|
+
bootstrap_state: bootstrap_state, retriever_reloader: retriever_reloader
|
|
53
|
+
)
|
|
31
54
|
|
|
32
55
|
# Pin protocol version for broad client compatibility (Claude Code, Cursor, etc.)
|
|
33
56
|
if ENV['MCP_PROTOCOL_VERSION']
|
data/exe/woods-mcp-http
CHANGED
|
@@ -11,27 +11,68 @@
|
|
|
11
11
|
# them via the Model Context Protocol (Streamable HTTP transport).
|
|
12
12
|
# Requires the `rackup` gem and a Rack-compatible server (e.g., puma).
|
|
13
13
|
|
|
14
|
+
# Suppress json-schema MultiJSON deprecation before any require chain that
|
|
15
|
+
# could transitively load json-schema. See exe/woods-mcp for the full rationale.
|
|
16
|
+
begin
|
|
17
|
+
require 'json-schema'
|
|
18
|
+
JSON::Validator.use_multi_json = false if defined?(JSON::Validator) && JSON::Validator.respond_to?(:use_multi_json=)
|
|
19
|
+
rescue LoadError
|
|
20
|
+
# json-schema isn't in every bundle.
|
|
21
|
+
end
|
|
22
|
+
|
|
14
23
|
require 'rackup'
|
|
15
24
|
require_relative '../lib/woods'
|
|
16
25
|
require_relative '../lib/woods/dependency_graph'
|
|
17
26
|
require_relative '../lib/woods/graph_analyzer'
|
|
18
27
|
require_relative '../lib/woods/mcp/server'
|
|
19
28
|
require_relative '../lib/woods/mcp/bootstrapper'
|
|
29
|
+
require_relative '../lib/woods/mcp/bearer_auth'
|
|
30
|
+
require_relative '../lib/woods/mcp/origin_guard'
|
|
20
31
|
require_relative '../lib/woods/embedding/text_preparer'
|
|
21
32
|
require_relative '../lib/woods/embedding/indexer'
|
|
22
33
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
34
|
+
begin
|
|
35
|
+
index_dir = Woods::MCP::Bootstrapper.resolve_index_dir(ARGV)
|
|
36
|
+
retriever, bootstrap_state = Woods::MCP::Bootstrapper.build_retriever(index_dir: index_dir)
|
|
37
|
+
snapshot_store = Woods::MCP::Bootstrapper.build_snapshot_store(index_dir)
|
|
38
|
+
rescue Woods::MCP::BootstrapError => e
|
|
39
|
+
warn "[woods-mcp-http] #{e.class.name.split('::').last}: #{e.message}"
|
|
40
|
+
warn "[woods-mcp-http] details: #{e.details.inspect}" if e.respond_to?(:details) && !e.details.empty?
|
|
41
|
+
exit 2
|
|
42
|
+
end
|
|
26
43
|
|
|
27
44
|
port = (ENV['PORT'] || 9292).to_i
|
|
28
45
|
host = ENV['HOST'] || 'localhost'
|
|
46
|
+
token = ENV.fetch('WOODS_MCP_HTTP_TOKEN', nil)
|
|
47
|
+
token = nil if token && token.empty?
|
|
48
|
+
|
|
49
|
+
loopback = %w[localhost 127.0.0.1 ::1].include?(host)
|
|
50
|
+
if !loopback && token.nil?
|
|
51
|
+
abort "[woods-mcp-http] Refusing to bind #{host} without WOODS_MCP_HTTP_TOKEN. " \
|
|
52
|
+
'Either set HOST=localhost, set WOODS_MCP_HTTP_TOKEN, or see docs/MCP_HTTP_TRANSPORT.md#security.'
|
|
53
|
+
end
|
|
54
|
+
if loopback && token.nil?
|
|
55
|
+
warn '[woods-mcp-http] WARNING: running on loopback without a token; local processes can reach this server.'
|
|
56
|
+
end
|
|
29
57
|
|
|
30
|
-
|
|
58
|
+
retriever_reloader = lambda do
|
|
59
|
+
Woods::MCP::Bootstrapper.reload_stores!(retriever, index_dir: index_dir)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
server = Woods::MCP::Server.build(
|
|
63
|
+
index_dir: index_dir, retriever: retriever, snapshot_store: snapshot_store,
|
|
64
|
+
bootstrap_state: bootstrap_state, retriever_reloader: retriever_reloader
|
|
65
|
+
)
|
|
31
66
|
transport = MCP::Server::Transports::StreamableHTTPTransport.new(server)
|
|
32
67
|
server.transport = transport
|
|
33
68
|
|
|
34
|
-
|
|
69
|
+
allowed_origins = ENV.fetch('WOODS_MCP_HTTP_ALLOWED_ORIGINS', '').split(',').map(&:strip).reject(&:empty?)
|
|
70
|
+
|
|
71
|
+
inner = proc { |env| transport.handle_request(Rack::Request.new(env)) }
|
|
72
|
+
app = token ? Woods::MCP::BearerAuth.new(inner, token: token) : inner
|
|
73
|
+
app = Woods::MCP::OriginGuard.new(app, allowed_origins: allowed_origins)
|
|
35
74
|
|
|
36
|
-
|
|
75
|
+
origin_summary = allowed_origins.empty? ? 'loopback' : allowed_origins.join(',')
|
|
76
|
+
auth_mode = token ? 'bearer' : 'none'
|
|
77
|
+
warn "Woods MCP HTTP server starting on http://#{host}:#{port} (auth: #{auth_mode}, origins: #{origin_summary})"
|
|
37
78
|
Rackup::Handler.default.run(app, Port: port, Host: host)
|