RubyGems - woods - Versions diffs - 1.2.0 → 1.3.0 - Mend

woods 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +169 -0
data/README.md +20 -8
data/exe/woods-console +51 -6
data/exe/woods-console-mcp +24 -4
data/exe/woods-mcp +30 -7
data/exe/woods-mcp-http +47 -6
data/lib/generators/woods/install_generator.rb +13 -4
data/lib/generators/woods/templates/woods.rb.tt +155 -0
data/lib/tasks/woods.rake +15 -50
data/lib/woods/builder.rb +174 -9
data/lib/woods/cache/cache_middleware.rb +360 -31
data/lib/woods/chunking/semantic_chunker.rb +334 -7
data/lib/woods/console/adapters/job_adapter.rb +10 -4
data/lib/woods/console/audit_logger.rb +76 -4
data/lib/woods/console/bridge.rb +48 -15
data/lib/woods/console/bridge_protocol.rb +44 -0
data/lib/woods/console/confirmation.rb +3 -4
data/lib/woods/console/console_response_renderer.rb +56 -18
data/lib/woods/console/credential_index.rb +201 -0
data/lib/woods/console/credential_scanner.rb +302 -0
data/lib/woods/console/dispatch_pipeline.rb +138 -0
data/lib/woods/console/embedded_executor.rb +682 -35
data/lib/woods/console/eval_guard.rb +319 -0
data/lib/woods/console/model_validator.rb +1 -3
data/lib/woods/console/rack_middleware.rb +185 -29
data/lib/woods/console/redactor.rb +161 -0
data/lib/woods/console/response_context.rb +127 -0
data/lib/woods/console/safe_context.rb +220 -23
data/lib/woods/console/scope_predicate_parser.rb +131 -0
data/lib/woods/console/server.rb +417 -486
data/lib/woods/console/sql_noise_stripper.rb +87 -0
data/lib/woods/console/sql_table_scanner.rb +213 -0
data/lib/woods/console/sql_validator.rb +81 -31
data/lib/woods/console/table_gate.rb +93 -0
data/lib/woods/console/tool_specs.rb +552 -0
data/lib/woods/console/tools/tier1.rb +3 -3
data/lib/woods/console/tools/tier4.rb +7 -1
data/lib/woods/dependency_graph.rb +66 -7
data/lib/woods/embedding/indexer.rb +190 -6
data/lib/woods/embedding/openai.rb +40 -4
data/lib/woods/embedding/provider.rb +104 -8
data/lib/woods/embedding/text_preparer.rb +23 -3
data/lib/woods/embedding/token_counter.rb +133 -0
data/lib/woods/evaluation/baseline_runner.rb +20 -2
data/lib/woods/evaluation/metrics.rb +4 -1
data/lib/woods/extracted_unit.rb +1 -0
data/lib/woods/extractor.rb +7 -1
data/lib/woods/extractors/controller_extractor.rb +6 -0
data/lib/woods/extractors/mailer_extractor.rb +16 -2
data/lib/woods/extractors/model_extractor.rb +6 -1
data/lib/woods/extractors/phlex_extractor.rb +13 -4
data/lib/woods/extractors/rails_source_extractor.rb +2 -0
data/lib/woods/extractors/route_helper_resolver.rb +130 -0
data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
data/lib/woods/extractors/view_component_extractor.rb +12 -1
data/lib/woods/extractors/view_engines/base.rb +141 -0
data/lib/woods/extractors/view_engines/erb.rb +145 -0
data/lib/woods/extractors/view_template_extractor.rb +92 -133
data/lib/woods/flow_assembler.rb +23 -15
data/lib/woods/flow_precomputer.rb +21 -2
data/lib/woods/graph_analyzer.rb +3 -4
data/lib/woods/index_artifact.rb +173 -0
data/lib/woods/mcp/bearer_auth.rb +45 -0
data/lib/woods/mcp/bootstrap_state.rb +94 -0
data/lib/woods/mcp/bootstrapper.rb +337 -16
data/lib/woods/mcp/config_resolver.rb +288 -0
data/lib/woods/mcp/errors.rb +134 -0
data/lib/woods/mcp/index_reader.rb +265 -30
data/lib/woods/mcp/origin_guard.rb +132 -0
data/lib/woods/mcp/provider_probe.rb +166 -0
data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
data/lib/woods/mcp/renderers/markdown_renderer.rb +39 -3
data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
data/lib/woods/mcp/server.rb +737 -137
data/lib/woods/model_name_cache.rb +78 -2
data/lib/woods/notion/client.rb +25 -2
data/lib/woods/notion/mappers/model_mapper.rb +36 -2
data/lib/woods/railtie.rb +55 -15
data/lib/woods/resilience/circuit_breaker.rb +9 -2
data/lib/woods/resilience/retryable_provider.rb +40 -3
data/lib/woods/resolved_config.rb +299 -0
data/lib/woods/retrieval/context_assembler.rb +112 -5
data/lib/woods/retrieval/query_classifier.rb +1 -1
data/lib/woods/retrieval/ranker.rb +55 -6
data/lib/woods/retrieval/search_executor.rb +42 -13
data/lib/woods/retriever.rb +330 -24
data/lib/woods/session_tracer/middleware.rb +35 -1
data/lib/woods/storage/graph_store.rb +39 -0
data/lib/woods/storage/inapplicable_backend.rb +14 -0
data/lib/woods/storage/metadata_store.rb +129 -1
data/lib/woods/storage/pgvector.rb +70 -8
data/lib/woods/storage/qdrant.rb +196 -5
data/lib/woods/storage/snapshotter/metadata.rb +172 -0
data/lib/woods/storage/snapshotter/vector.rb +238 -0
data/lib/woods/storage/snapshotter.rb +24 -0
data/lib/woods/storage/vector_store.rb +184 -35
data/lib/woods/tasks.rb +85 -0
data/lib/woods/temporal/snapshot_store.rb +49 -1
data/lib/woods/token_utils.rb +44 -5
data/lib/woods/unblocked/client.rb +1 -1
data/lib/woods/unblocked/document_builder.rb +35 -10
data/lib/woods/unblocked/exporter.rb +1 -1
data/lib/woods/util/host_guard.rb +61 -0
data/lib/woods/version.rb +1 -1
data/lib/woods.rb +126 -6
metadata +69 -4

data/lib/woods/chunking/semantic_chunker.rb CHANGED Viewed

@@ -29,28 +29,76 @@ module Woods
       end
     end
+    # Class-like unit types that MethodChunker handles — anything
+    # shaped as "class or module with public methods, maybe privates,
+    # maybe callbacks/filters." Extend this list when new extractors
+    # produce units of similar structure.
+    METHOD_CHUNKABLE_TYPES = %i[
+      service job mailer concern policy pundit_policy serializer
+      decorator presenter interactor query_object value_object
+      component view_component action_cable_channel channel
+      graphql_resolver graphql_type helper validator api_client poro
+      manager configuration
+    ].freeze
     # Splits ExtractedUnits into semantic chunks based on unit type.
     #
     # Models are split by: summary, associations, validations, callbacks,
     # scopes, methods. Controllers are split by: summary (filters), per-action.
-    # Other types use whole-unit or method-level splitting based on size.
+    # Class-like types (services, jobs, mailers, concerns, policies, …) split
+    # by summary + per-public-method + bundled privates via MethodChunker.
+    # Other types stay whole.
+    #
+    # Any chunk that still exceeds `max_chars` after semantic splitting is
+    # sliced into line-balanced sub-chunks so no single chunk is ever larger
+    # than the embedding provider's input budget.
     #
     # Units below the token threshold are returned as a single :whole chunk.
     #
     # @example
-    #   chunker = SemanticChunker.new(threshold: 200)
+    #   chunker = SemanticChunker.new(threshold: 200, max_chars: 20_480)
     #   chunks = chunker.chunk(extracted_unit)
     #   chunks.map(&:chunk_type) # => [:summary, :associations, :validations, :methods]
     #
-    class SemanticChunker
+    class SemanticChunker # rubocop:disable Metrics/ClassLength
       # Default token threshold below which units stay whole.
       DEFAULT_THRESHOLD = 200
+      # Minimum chars-per-slice budget during tokenizer-driven recursive
+      # splitting. Prevents unbounded halving on pathological content
+      # (e.g., a single 2000-char regex line that tokenizes into 6000
+      # tokens because BERT WordPiece fragments every `\w+` boundary).
+      MIN_SLICE_CHARS = 256
+      private_constant :MIN_SLICE_CHARS
       # @param threshold [Integer] Token count threshold for chunking
-      def initialize(threshold: DEFAULT_THRESHOLD)
+      # @param max_chars [Integer, nil] Hard character ceiling for any single
+      #   chunk. When set, any chunk larger than this is sliced into
+      #   line-balanced sub-chunks. `nil` disables the safety net.
+      # @param token_counter [Woods::Embedding::TokenCounter, nil] Optional
+      #   exact-token counter. When both this and `max_tokens` are set,
+      #   oversize detection uses the real tokenizer rather than the
+      #   char-length estimate, and post-slice verification recursively
+      #   re-splits any piece that still exceeds `max_tokens`.
+      # @param max_tokens [Integer, nil] Token budget used with
+      #   `token_counter` for the authoritative oversize check.
+      def initialize(threshold: DEFAULT_THRESHOLD, max_chars: nil,
+                     token_counter: nil, max_tokens: nil)
         @threshold = threshold
+        @max_chars = max_chars
+        @token_counter = token_counter
+        @max_tokens = max_tokens
       end
+      # @return [Woods::Embedding::TokenCounter, nil]
+      attr_reader :token_counter
+      # @return [Integer, nil]
+      attr_reader :max_tokens
+      # @return [Integer, nil]
+      attr_reader :max_chars
       # Split an ExtractedUnit into semantic chunks.
       #
       # @param unit [ExtractedUnit] The unit to chunk
@@ -59,15 +107,76 @@ module Woods
         return [] if unit.source_code.nil? || unit.source_code.strip.empty?
         return [build_whole_chunk(unit)] if unit.estimated_tokens <= @threshold
+        enforce_char_limit(chunks_for(unit), unit)
+      end
+      # Enforce {@max_chars} on a unit's already-populated `chunks` array
+      # (hashes produced by extraction or a prior chunking pass). Oversize
+      # chunks are split into line-balanced siblings with `_part_N` chunk
+      # types; small chunks pass through unchanged. No-op when `@max_chars`
+      # is unset or `unit.chunks` is empty.
+      #
+      # Exists so the Indexer can apply the same ceiling to pre-chunked
+      # units (e.g. `rails_source`) that extraction already sliced — the
+      # extractor's own chunker is unaware of the embedding provider's
+      # budget and can emit chunks larger than the ceiling we'd pick here.
+      #
+      # @param unit [ExtractedUnit]
+      # @return [void]
+      def enforce_chunk_limits!(unit)
+        return unless enforcement_active?
+        return if unit.chunks.nil? || unit.chunks.empty?
+        unit.chunks = unit.chunks.flat_map { |chunk| split_oversize_hash_chunk(chunk) }
+      end
+      private
+      # True when either the char ceiling or the token-based verifier is
+      # wired up.
+      def enforcement_active?
+        @max_chars || (@token_counter && @max_tokens)
+      end
+      # Token-authoritative oversize check, falling back to char length.
+      def oversize?(content)
+        return false if content.nil? || content.empty?
+        return @token_counter.count(content) > @max_tokens if tokenizer_active?
+        @max_chars && content.length > @max_chars
+      end
+      def tokenizer_active?
+        @token_counter && @max_tokens
+      end
+      # @param chunk [Hash] a unit-chunk hash (symbol or string keys)
+      # @return [Array<Hash>]
+      def split_oversize_hash_chunk(chunk)
+        content = chunk[:content] || chunk['content']
+        return [chunk] if content.nil? || !oversize?(content)
+        chunk_type = chunk[:chunk_type] || chunk['chunk_type'] || :whole
+        verified_slices(content).each_with_index.map do |slice, idx|
+          { content: slice, chunk_type: :"#{chunk_type}_part_#{idx}" }
+        end
+      end
+      # Dispatch to the type-appropriate chunker.
+      #
+      # @param unit [ExtractedUnit]
+      # @return [Array<Chunk>]
+      def chunks_for(unit)
         case unit.type
         when :model then ModelChunker.new(unit).chunk
         when :controller then ControllerChunker.new(unit).chunk
-        else [build_whole_chunk(unit)]
+        else
+          return MethodChunker.new(unit).chunk if METHOD_CHUNKABLE_TYPES.include?(unit.type)
+          [build_whole_chunk(unit)]
         end
       end
-      private
       # Build a single :whole chunk for small units.
       #
       # @param unit [ExtractedUnit]
@@ -80,6 +189,107 @@ module Woods
           parent_type: unit.type
         )
       end
+      # Slice any chunk whose content exceeds the active budget into
+      # line-balanced sub-chunks. Preserves chunk_type with a `_part_N`
+      # suffix so downstream consumers can see they came from the same
+      # section.
+      #
+      # When `@max_chars` is nil but a token verifier is wired up, we
+      # still need to walk every chunk — `oversize?` will fall through
+      # to the token check. Skipping when only `@max_chars` is missing
+      # leaves the token-based path unreachable from this method.
+      #
+      # @param chunks [Array<Chunk>]
+      # @param unit [ExtractedUnit]
+      # @return [Array<Chunk>]
+      def enforce_char_limit(chunks, unit)
+        return chunks unless enforcement_active?
+        chunks.flat_map { |chunk| split_oversize_chunk(chunk, unit) }
+      end
+      # @param chunk [Chunk]
+      # @param unit [ExtractedUnit]
+      # @return [Array<Chunk>]
+      def split_oversize_chunk(chunk, unit)
+        return [chunk] unless oversize?(chunk.content)
+        verified_slices(chunk.content).each_with_index.map do |slice, idx|
+          Chunk.new(
+            content: slice,
+            chunk_type: :"#{chunk.chunk_type}_part_#{idx}",
+            parent_identifier: unit.identifier,
+            parent_type: unit.type
+          )
+        end
+      end
+      # Slice by lines, then (when a tokenizer is wired in) recursively
+      # re-split any slice whose real token count still exceeds the
+      # budget. Char-based slicing alone is unreliable on dense Rails
+      # source because BERT WordPiece tokenizes `::`-heavy constants at
+      # far below our estimate; the verifier catches those cases.
+      #
+      # @param content [String]
+      # @return [Array<String>]
+      def verified_slices(content)
+        limit = @max_chars || estimated_char_budget
+        slices = slice_by_lines(content, limit)
+        return slices unless tokenizer_active?
+        slices.flat_map { |slice| verify_slice(slice, limit) }
+      end
+      # Ensure a single post-line-split slice fits the token budget.
+      # Halves the char limit and reslices if it doesn't. Stops at
+      # {MIN_SLICE_CHARS} to avoid unbounded recursion on content that
+      # cannot be split line-wise (minified output, huge regex literals).
+      #
+      # @param slice [String]
+      # @param char_limit [Integer]
+      # @return [Array<String>]
+      def verify_slice(slice, char_limit)
+        return [slice] unless @token_counter.count(slice) > @max_tokens
+        smaller = char_limit / 2
+        return [slice] if smaller < MIN_SLICE_CHARS
+        slice_by_lines(slice, smaller).flat_map { |sub| verify_slice(sub, smaller) }
+      end
+      # Conservative char budget used when no explicit `max_chars` was
+      # given but the tokenizer is active. Uses a permissive estimate
+      # because the verifier will halve this further as needed.
+      def estimated_char_budget
+        return 0 unless @max_tokens
+        @max_tokens * 2
+      end
+      # Greedy line-based slicing that respects a supplied `limit`.
+      # Lines longer than `limit` are hard-cut (lossy — but such lines
+      # are already pathological: minified JSON dumps, long regexes).
+      #
+      # @param content [String]
+      # @param limit [Integer]
+      # @return [Array<String>]
+      def slice_by_lines(content, limit = @max_chars)
+        slices = []
+        current = String.new
+        content.each_line do |line|
+          line_parts = line.length > limit ? line.scan(/.{1,#{limit}}/m) : [line]
+          line_parts.each do |part|
+            if current.length + part.length > limit && !current.empty?
+              slices << current
+              current = String.new
+            end
+            current << part
+          end
+        end
+        slices << current unless current.empty?
+        slices
+      end
     end
     # Chunks a model unit by semantic sections: summary, associations,
@@ -291,5 +501,122 @@ module Woods
         chunks
       end
     end
+    # Generic method-aware chunker for class-like unit types.
+    #
+    # Splits into:
+    # - `:summary` — class/module declaration, includes, constants,
+    #   attr_* DSL, class-level method calls, and any class-level code
+    #   before the first public method.
+    # - `:method_<name>` — one chunk per public instance method.
+    # - `:private_methods` — all private/protected methods bundled
+    #   together (they're usually implementation helpers and rarely
+    #   queried individually).
+    #
+    # Used for services, jobs, mailers, concerns, policies, serializers,
+    # decorators, presenters, interactors, form objects, components,
+    # GraphQL resolvers, helpers, validators, and other class-like units.
+    #
+    # @api private
+    class MethodChunker
+      include ChunkBuilder
+      # @param unit [ExtractedUnit]
+      def initialize(unit)
+        @unit = unit
+      end
+      # @return [Array<Chunk>]
+      def chunk
+        state = parse_lines(@unit.source_code.lines)
+        build_chunks(state).reject(&:empty?)
+      end
+      private
+      # Parse lines into summary + per-public-method + private buffers.
+      #
+      # @param lines [Array<String>]
+      # @return [Hash]
+      def parse_lines(lines)
+        state = {
+          summary: [], methods: {}, private_methods: [],
+          current_method: nil, depth: 0, in_private: false
+        }
+        lines.each do |line|
+          if state[:current_method]
+            track_method_line(state, line)
+          else
+            classify_top_level_line(state, line)
+          end
+        end
+        state
+      end
+      # While inside a method body, collect every line and track depth so
+      # we know when the method closes. Blocks (`do...end`, `if...end`)
+      # nest inside methods and must be balanced before the method's own
+      # `end` line counts.
+      def track_method_line(state, line)
+        target = state[:in_private] ? state[:private_methods] : state[:methods][state[:current_method]]
+        target << line
+        state[:depth] += 1 if line.match?(/\bdo\b/) && !line.match?(/\bend\b/)
+        return unless line.strip.match?(/^end\s*$/)
+        state[:depth] -= 1
+        return unless state[:depth] <= 0
+        state[:current_method] = nil
+        state[:depth] = 0
+      end
+      # Classify a class-body line: privacy marker, method start, or
+      # summary content (DSL calls, attrs, comments, includes).
+      def classify_top_level_line(state, line)
+        if line.match?(PRIVATE_PATTERN)
+          state[:in_private] = true
+          state[:private_methods] << line
+        elsif line.match?(METHOD_PATTERN)
+          start_method(state, line)
+        elsif state[:in_private]
+          state[:private_methods] << line
+        else
+          state[:summary] << line
+        end
+      end
+      def start_method(state, line)
+        method_name = line[/def\s+(?:self\.)?(\w+)/, 1]
+        state[:current_method] = method_name
+        state[:depth] = 1
+        if state[:in_private]
+          state[:private_methods] << line
+        else
+          # Preserve insertion order — Hash does this by default, but we
+          # initialize the entry here so `build_chunks` below walks
+          # methods in source order.
+          state[:methods][method_name] = [line]
+        end
+      end
+      # Build the final chunk array from the parse state.
+      #
+      # @param state [Hash]
+      # @return [Array<Chunk>]
+      def build_chunks(state)
+        chunks = []
+        chunks << build_chunk(:summary, state[:summary].join) if state[:summary].any?
+        state[:methods].each do |method_name, lines|
+          chunks << build_chunk(:"method_#{method_name}", lines.join)
+        end
+        chunks << build_chunk(:private_methods, state[:private_methods].join) if state[:private_methods].any?
+        chunks
+      end
+    end
   end
 end

data/lib/woods/console/adapters/job_adapter.rb CHANGED Viewed

@@ -34,10 +34,14 @@ module Woods
         # Find a job by its ID.
         #
-        # @param id [Object] Job ID
+        # A nil id is dropped from the bridge request so downstream tools see
+        # a missing parameter rather than an explicit `nil` — symmetric with
+        # `CacheAdapter.stats(namespace: nil)`.
+        #
+        # @param id [Object, nil] Job ID
         # @return [Hash] Bridge request
         def find_job(id:)
-          { tool: "#{prefix}_find_job", params: { id: id } }
+          { tool: "#{prefix}_find_job", params: { id: id }.compact }
         end
         # List scheduled jobs.
@@ -51,10 +55,12 @@ module Woods
         # Retry a failed job.
         #
-        # @param id [Object] Job ID
+        # A nil id is dropped from the bridge request — see #find_job.
+        #
+        # @param id [Object, nil] Job ID
         # @return [Hash] Bridge request
         def retry_job(id:)
-          { tool: "#{prefix}_retry_job", params: { id: id } }
+          { tool: "#{prefix}_retry_job", params: { id: id }.compact }
         end
         private

data/lib/woods/console/audit_logger.rb CHANGED Viewed

@@ -1,7 +1,9 @@
 # frozen_string_literal: true
 require 'json'
+require 'time'
 require 'fileutils'
+require_relative 'credential_scanner'
 module Woods
   module Console
@@ -10,6 +12,10 @@ module Woods
     # Each line is a JSON object with: tool name, params, timestamp,
     # confirmation status, and result summary.
     #
+    # Params and result summaries are passed through {CredentialScanner} so
+    # credentials an agent pastes inline into `console_eval` (or any other
+    # tool) do not land in audit logs unredacted.
+    #
     # @example
     #   logger = AuditLogger.new(path: 'log/console_audit.jsonl')
     #   logger.log(tool: 'console_eval', params: { code: '1+1' },
@@ -17,9 +23,15 @@ module Woods
     #   logger.entries # => [{ "tool" => "console_eval", ... }]
     #
     class AuditLogger
+      # Soft cap on any single logged field. Stops an attacker with Tier-4
+      # access from filling disk via arbitrarily long params.
+      MAX_FIELD_CHARS = 16_384
       # @param path [String] Path to the JSONL audit log file
-      def initialize(path:)
+      # @param scanner [#scan, nil] CredentialScanner override (mostly for tests).
+      def initialize(path:, scanner: nil)
         @path = path
+        @scanner = scanner || CredentialScanner.new
       end
       # Write an audit entry.
@@ -34,15 +46,75 @@ module Woods
         entry = {
           tool: tool,
-          params: params,
+          params: redact(truncate_deep(params)),
           confirmed: confirmed,
-          result_summary: result_summary,
+          result_summary: redact(truncate_value(result_summary)),
           timestamp: Time.now.utc.iso8601
         }
-        File.open(@path, 'a') { |f| f.puts(JSON.generate(entry)) }
+        # Exclusive flock around the append — concurrent Tier-4 invocations
+        # across Puma threads would otherwise interleave bytes and produce
+        # malformed JSONL lines (integrity hit on audit review).
+        File.open(@path, File::WRONLY | File::APPEND | File::CREAT, 0o644) do |f|
+          f.flock(File::LOCK_EX)
+          f.puts(JSON.generate(sanitize_controls(entry)))
+        end
+      end
+      private
+      # Run a value through CredentialScanner. The scanner returns
+      # `[redacted_value, match_counts]`; the audit log wants only the
+      # redacted payload. nil scanner means pass-through (tests).
+      def redact(value)
+        return value unless @scanner && value
+        redacted, _counts = @scanner.scan(value)
+        redacted
+      rescue StandardError
+        # Never let redaction failure block audit writes — drop the value
+        # to a safe sentinel rather than logging raw content.
+        '[REDACTION_FAILED]'
       end
+      # Recursively cap strings at MAX_FIELD_CHARS. Arrays/hashes preserve
+      # shape; scalars other than String pass through unchanged.
+      def truncate_deep(value)
+        case value
+        when Hash then value.transform_values { |v| truncate_deep(v) }
+        when Array then value.map { |v| truncate_deep(v) }
+        else truncate_value(value)
+        end
+      end
+      def truncate_value(value)
+        return value unless value.is_a?(String) && value.length > MAX_FIELD_CHARS
+        "#{value[0, MAX_FIELD_CHARS]}… [truncated #{value.length - MAX_FIELD_CHARS} chars]"
+      end
+      # Defense-in-depth against log injection: strip ASCII control characters
+      # (NUL through US + DEL, except TAB) from every string in the entry
+      # before it reaches `JSON.generate`. `JSON.generate` already escapes
+      # these in string values, but (a) some downstream log readers parse
+      # JSONL by splitting on literal `\n` before JSON-parsing, and (b) a
+      # future consumer that decodes and reprints values (e.g. a terminal
+      # audit UI) would re-expose injection vectors.
+      CONTROL_CHARS = /[\x00-\x08\x0A-\x1F\x7F]/
+      private_constant :CONTROL_CHARS
+      def sanitize_controls(value)
+        case value
+        when String then value.gsub(CONTROL_CHARS, '')
+        when Hash   then value.transform_keys { |k| sanitize_controls(k) }
+                              .transform_values { |v| sanitize_controls(v) }
+        when Array  then value.map { |v| sanitize_controls(v) }
+        else value
+        end
+      end
+      public
       # Read all audit entries.
       #
       # @return [Array<Hash>] Parsed JSONL entries

data/lib/woods/console/bridge.rb CHANGED Viewed

@@ -1,31 +1,64 @@
 # frozen_string_literal: true
 require 'json'
+require_relative 'bridge_protocol'
 require_relative 'model_validator'
 require_relative 'safe_context'
 module Woods
   module Console
-    # JSON-lines protocol bridge between MCP server and Rails environment.
+    # **PROTOCOL SCAFFOLD — does not execute real queries.** Every handler
+    # below returns static empty data (`{ 'count' => 0 }`, `{ 'records' =>
+    # [] }`, etc.). Real in-process execution lives in
+    # {Woods::Console::EmbeddedExecutor}; the eventual real bridge process
+    # for Option D will replace this scaffold with a class that performs
+    # actual ActiveRecord queries.
+    #
+    # The scaffold pins the JSON-lines wire protocol — request envelope,
+    # response envelope, supported-tools list, error shape — so other
+    # components (EmbeddedExecutor, ConnectionManager, Server) can be
+    # built and tested against a stable contract before the real
+    # bridge-process implementation lands. Treat this class the way you'd
+    # treat a Sinatra fake of a third-party API in tests: it satisfies
+    # the protocol, nothing more.
+    #
+    # ## Why the name carries "Stub"
+    #
+    # Round-1 audit Track H-4 flagged this class as a "critical SafeContext
+    # bypass" because `handle_request` doesn't wrap calls in `SafeContext`.
+    # That finding wasn't exploitable — no live code path executes through
+    # this class in the shipped gem — but the bare name `Bridge` made the
+    # scaffold status invisible to auditors. The `Stub` prefix removes the
+    # ambiguity. When the real bridge-process implementation is delivered,
+    # it should claim the `Bridge` name; this class will either be deleted
+    # (if the protocol is fully owned by the real bridge) or renamed to
+    # `BridgeProtocol` and reduced to a constants module.
+    #
+    # ## Protocol
     #
     # Reads JSON-lines requests from an input IO, validates model/column names,
-    # dispatches to tool handlers, and writes JSON-lines responses to an output IO.
+    # dispatches to (stub) tool handlers, and writes JSON-lines responses to
+    # an output IO.
     #
     # Protocol:
     #   Request:  {"id":"req_1","tool":"count","params":{"model":"Order","scope":{"status":"pending"}}}
     #   Response: {"id":"req_1","ok":true,"result":{"count":1847},"timing_ms":12.3}
     #   Error:    {"id":"req_1","ok":false,"error":"Model not found","error_type":"validation"}
     #
-    # @example
-    #   bridge = Bridge.new(input: $stdin, output: $stdout,
-    #                       model_validator: validator, safe_context: ctx)
+    # @example Wiring against a fake input/output (testing only — handlers return empty data)
+    #   bridge = StubBridge.new(input: $stdin, output: $stdout,
+    #                           model_validator: validator, safe_context: ctx)
     #   bridge.run
     #
-    class Bridge
-      SUPPORTED_TOOLS = %w[count sample find pluck aggregate association_count schema recent status].freeze
-      # Alias used by EmbeddedExecutor to avoid duplicating the list.
-      TIER1_TOOLS = SUPPORTED_TOOLS
-      TOOL_HANDLERS = SUPPORTED_TOOLS.to_h { |t| [t, :"handle_#{t}"] }.freeze
+    class StubBridge
+      # Protocol constants live on {BridgeProtocol} so the real executor
+      # (EmbeddedExecutor) and a future real bridge-process class can
+      # reference them without importing the scaffold. These top-level
+      # aliases keep `StubBridge::SUPPORTED_TOOLS` working for existing
+      # callers and specs.
+      SUPPORTED_TOOLS = BridgeProtocol::SUPPORTED_TOOLS
+      TIER1_TOOLS     = BridgeProtocol::TIER1_TOOLS
+      TOOL_HANDLERS   = BridgeProtocol::TOOL_HANDLERS
       # @param input [IO] Input stream (reads JSON-lines)
       # @param output [IO] Output stream (writes JSON-lines)
@@ -115,10 +148,10 @@ module Woods
         @model_validator.validate_model!(model)
       end
-      # Stub handlers below return empty/zero data by design.
-      # This Bridge class is a protocol scaffold — real execution happens
-      # in EmbeddedExecutor (in-process) or a live Rails bridge process.
-      # The stubs satisfy the protocol contract for testing and offline use.
+      # Stub handlers below return empty/zero data by design — see the
+      # class-level docstring. Real in-process execution happens in
+      # EmbeddedExecutor; the eventual Option-D bridge process will replace
+      # this class entirely.
       def handle_count(_params)
         { 'count' => 0 }
@@ -134,7 +167,7 @@ module Woods
       def handle_pluck(params)
         @model_validator.validate_columns!(params['model'], params['columns']) if params['columns']
-        { 'values' => [] }
+        { 'columns' => Array(params['columns']), 'values' => [] }
       end
       def handle_aggregate(params)

data/lib/woods/console/bridge_protocol.rb ADDED Viewed

@@ -0,0 +1,44 @@
+# frozen_string_literal: true
+module Woods
+  module Console
+    # Canonical console-tool protocol contract shared by {StubBridge}
+    # (the JSON-lines scaffold) and {EmbeddedExecutor} (the in-process
+    # executor that ships today). The eventual real bridge-process
+    # implementation (Option D — see `docs/design/CONSOLE_SERVER.md`)
+    # will also reference this module so every executor that speaks the
+    # protocol agrees on the tool vocabulary.
+    #
+    # Three constants live here:
+    #
+    # - {SUPPORTED_TOOLS} — the canonical Tier 1 tool list.
+    # - {TIER1_TOOLS}     — alias, kept as a distinct name for call
+    #   sites that reason about tier semantics rather than the whole
+    #   supported set.
+    # - {TOOL_HANDLERS}   — tool → `handle_<tool>` method-symbol map.
+    #
+    # Previously these lived on {StubBridge} and {EmbeddedExecutor}
+    # borrowed them with `TIER1_TOOLS = StubBridge::TIER1_TOOLS`, which
+    # reads as "the real executor borrows constants from the stub" —
+    # backwards. Extracting the protocol here lets the real executor
+    # (and a future non-stub `Bridge` class) claim the contract without
+    # importing the scaffold.
+    module BridgeProtocol
+      SUPPORTED_TOOLS = %w[
+        count
+        sample
+        find
+        pluck
+        aggregate
+        association_count
+        schema
+        recent
+        status
+      ].freeze
+      TIER1_TOOLS = SUPPORTED_TOOLS
+      TOOL_HANDLERS = SUPPORTED_TOOLS.to_h { |t| [t, :"handle_#{t}"] }.freeze
+    end
+  end
+end