RubyGems - woods - Versions diffs - 1.2.0 → 1.3.0 - Mend

woods 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +169 -0
data/README.md +20 -8
data/exe/woods-console +51 -6
data/exe/woods-console-mcp +24 -4
data/exe/woods-mcp +30 -7
data/exe/woods-mcp-http +47 -6
data/lib/generators/woods/install_generator.rb +13 -4
data/lib/generators/woods/templates/woods.rb.tt +155 -0
data/lib/tasks/woods.rake +15 -50
data/lib/woods/builder.rb +174 -9
data/lib/woods/cache/cache_middleware.rb +360 -31
data/lib/woods/chunking/semantic_chunker.rb +334 -7
data/lib/woods/console/adapters/job_adapter.rb +10 -4
data/lib/woods/console/audit_logger.rb +76 -4
data/lib/woods/console/bridge.rb +48 -15
data/lib/woods/console/bridge_protocol.rb +44 -0
data/lib/woods/console/confirmation.rb +3 -4
data/lib/woods/console/console_response_renderer.rb +56 -18
data/lib/woods/console/credential_index.rb +201 -0
data/lib/woods/console/credential_scanner.rb +302 -0
data/lib/woods/console/dispatch_pipeline.rb +138 -0
data/lib/woods/console/embedded_executor.rb +682 -35
data/lib/woods/console/eval_guard.rb +319 -0
data/lib/woods/console/model_validator.rb +1 -3
data/lib/woods/console/rack_middleware.rb +185 -29
data/lib/woods/console/redactor.rb +161 -0
data/lib/woods/console/response_context.rb +127 -0
data/lib/woods/console/safe_context.rb +220 -23
data/lib/woods/console/scope_predicate_parser.rb +131 -0
data/lib/woods/console/server.rb +417 -486
data/lib/woods/console/sql_noise_stripper.rb +87 -0
data/lib/woods/console/sql_table_scanner.rb +213 -0
data/lib/woods/console/sql_validator.rb +81 -31
data/lib/woods/console/table_gate.rb +93 -0
data/lib/woods/console/tool_specs.rb +552 -0
data/lib/woods/console/tools/tier1.rb +3 -3
data/lib/woods/console/tools/tier4.rb +7 -1
data/lib/woods/dependency_graph.rb +66 -7
data/lib/woods/embedding/indexer.rb +190 -6
data/lib/woods/embedding/openai.rb +40 -4
data/lib/woods/embedding/provider.rb +104 -8
data/lib/woods/embedding/text_preparer.rb +23 -3
data/lib/woods/embedding/token_counter.rb +133 -0
data/lib/woods/evaluation/baseline_runner.rb +20 -2
data/lib/woods/evaluation/metrics.rb +4 -1
data/lib/woods/extracted_unit.rb +1 -0
data/lib/woods/extractor.rb +7 -1
data/lib/woods/extractors/controller_extractor.rb +6 -0
data/lib/woods/extractors/mailer_extractor.rb +16 -2
data/lib/woods/extractors/model_extractor.rb +6 -1
data/lib/woods/extractors/phlex_extractor.rb +13 -4
data/lib/woods/extractors/rails_source_extractor.rb +2 -0
data/lib/woods/extractors/route_helper_resolver.rb +130 -0
data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
data/lib/woods/extractors/view_component_extractor.rb +12 -1
data/lib/woods/extractors/view_engines/base.rb +141 -0
data/lib/woods/extractors/view_engines/erb.rb +145 -0
data/lib/woods/extractors/view_template_extractor.rb +92 -133
data/lib/woods/flow_assembler.rb +23 -15
data/lib/woods/flow_precomputer.rb +21 -2
data/lib/woods/graph_analyzer.rb +3 -4
data/lib/woods/index_artifact.rb +173 -0
data/lib/woods/mcp/bearer_auth.rb +45 -0
data/lib/woods/mcp/bootstrap_state.rb +94 -0
data/lib/woods/mcp/bootstrapper.rb +337 -16
data/lib/woods/mcp/config_resolver.rb +288 -0
data/lib/woods/mcp/errors.rb +134 -0
data/lib/woods/mcp/index_reader.rb +265 -30
data/lib/woods/mcp/origin_guard.rb +132 -0
data/lib/woods/mcp/provider_probe.rb +166 -0
data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
data/lib/woods/mcp/renderers/markdown_renderer.rb +39 -3
data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
data/lib/woods/mcp/server.rb +737 -137
data/lib/woods/model_name_cache.rb +78 -2
data/lib/woods/notion/client.rb +25 -2
data/lib/woods/notion/mappers/model_mapper.rb +36 -2
data/lib/woods/railtie.rb +55 -15
data/lib/woods/resilience/circuit_breaker.rb +9 -2
data/lib/woods/resilience/retryable_provider.rb +40 -3
data/lib/woods/resolved_config.rb +299 -0
data/lib/woods/retrieval/context_assembler.rb +112 -5
data/lib/woods/retrieval/query_classifier.rb +1 -1
data/lib/woods/retrieval/ranker.rb +55 -6
data/lib/woods/retrieval/search_executor.rb +42 -13
data/lib/woods/retriever.rb +330 -24
data/lib/woods/session_tracer/middleware.rb +35 -1
data/lib/woods/storage/graph_store.rb +39 -0
data/lib/woods/storage/inapplicable_backend.rb +14 -0
data/lib/woods/storage/metadata_store.rb +129 -1
data/lib/woods/storage/pgvector.rb +70 -8
data/lib/woods/storage/qdrant.rb +196 -5
data/lib/woods/storage/snapshotter/metadata.rb +172 -0
data/lib/woods/storage/snapshotter/vector.rb +238 -0
data/lib/woods/storage/snapshotter.rb +24 -0
data/lib/woods/storage/vector_store.rb +184 -35
data/lib/woods/tasks.rb +85 -0
data/lib/woods/temporal/snapshot_store.rb +49 -1
data/lib/woods/token_utils.rb +44 -5
data/lib/woods/unblocked/client.rb +1 -1
data/lib/woods/unblocked/document_builder.rb +35 -10
data/lib/woods/unblocked/exporter.rb +1 -1
data/lib/woods/util/host_guard.rb +61 -0
data/lib/woods/version.rb +1 -1
data/lib/woods.rb +126 -6
metadata +69 -4

data/lib/woods/mcp/index_reader.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'active_support/core_ext/object/blank'
 require 'active_support/core_ext/string/inflections'
 require 'digest'
 require 'json'
@@ -52,6 +53,34 @@ module Woods
         @identifier_map = nil
       end
+      # Pre-populate cached state so the first MCP tool call doesn't pay
+      # for disk reads + JSON parsing.
+      #
+      # Touches every lazy accessor: manifest, summary, dependency_graph,
+      # graph_analysis, and the identifier_map (which reads all _index.json
+      # files). Each step is individually rescued so a missing optional
+      # artefact (e.g. graph_analysis.json) never blocks the rest.
+      #
+      # Safe to call multiple times — lazy accessors short-circuit on the
+      # memoized value.
+      #
+      # @return [Hash] Per-step outcome: `{step => true | Exception}`
+      def warmup!
+        steps = {
+          manifest: -> { manifest },
+          summary: -> { summary },
+          dependency_graph: -> { dependency_graph },
+          graph_analysis: -> { graph_analysis },
+          identifier_map: -> { identifier_map }
+        }
+        steps.each_with_object({}) do |(step, runner), result|
+          runner.call
+          result[step] = true
+        rescue StandardError => e
+          result[step] = e
+        end
+      end
       # Clear all cached state so the next access re-reads from disk.
       #
       # @return [void]
@@ -65,6 +94,7 @@ module Woods
         @dependency_graph = nil
         @graph_analysis = nil
         @raw_graph_data = nil
+        @normalized_graph_edges = nil
       end
       # @return [Hash] Parsed manifest.json
@@ -72,6 +102,17 @@ module Woods
         @manifest ||= parse_json('manifest.json')
       end
+      # Template engines the extraction pipeline currently understands.
+      # Delegates to {ViewTemplateExtractor.supported_template_engines} so
+      # the list stays honest as engines are added or removed. Surfaced by
+      # the MCP `structure` tool (#86).
+      #
+      # @return [Array<Symbol>]
+      def template_engines
+        require_relative '../extractors/view_template_extractor'
+        Woods::Extractors::ViewTemplateExtractor.supported_template_engines.dup
+      end
       # @return [String, nil] SUMMARY.md content, or nil if not present
       def summary
         @summary ||= begin
@@ -119,19 +160,57 @@ module Woods
         dirs.flat_map { |dir| read_index(dir) }
       end
+      # Default maximum number of unit files to load during phase-2 search.
+      # Override with WOODS_SEARCH_MAX_SCAN env var.
+      DEFAULT_SEARCH_MAX_SCAN = 500
       # Search units by case-insensitive pattern.
       #
       # Phase 1: match identifiers from index files (cheap).
       # Phase 2: lazy-load unit files for metadata/source_code matching.
       #
-      # @param query [String] Search pattern (treated as case-insensitive regex)
+      # The query is compiled as a raw Ruby regex with IGNORECASE. If the pattern
+      # is invalid, it falls back to an escaped literal match.
+      #
+      # A "broad" pattern is one that matches more than 50% of the entries in a
+      # type directory. Broad patterns still run but the result includes a :note.
+      #
+      # Phase-2 scan is capped at WOODS_SEARCH_MAX_SCAN unit files (default 500).
+      # When the cap is reached the result includes :partial => true.
+      #
+      # The optional +exact_prefix+ / +exact_suffix+ filters restrict results to
+      # identifiers whose start/end matches the given string literally (case-
+      # insensitive). They are ANDed with the +query+ regex and are safer than
+      # hand-escaping regex anchors — metacharacters like +::+ are treated as
+      # literal text.
+      #
+      # @param query [String, nil] Search pattern (case-insensitive regex). Optional when
+      #   +exact_prefix+ or +exact_suffix+ is provided; otherwise required.
       # @param types [Array<String>, nil] Filter to these singular type names
       # @param fields [Array<String>] Fields to search: "identifier", "metadata", "source_code"
       # @param limit [Integer] Maximum results to return
-      # @return [Array<Hash>] Matches with :identifier, :type, :match_field keys
-      def search(query, types: nil, fields: %w[identifier], limit: 20)
-        pattern = Regexp.new(Regexp.escape(query), Regexp::IGNORECASE)
+      # @param exact_prefix [String, nil] Literal identifier prefix filter (case-insensitive)
+      # @param exact_suffix [String, nil] Literal identifier suffix filter (case-insensitive)
+      # @return [Hash] { results: Array<Hash>, note: String|nil, partial: Boolean }
+      # @raise [ArgumentError] when all of query, exact_prefix, and exact_suffix are blank
+      def search(query = nil, types: nil, fields: %w[identifier], limit: 20, exact_prefix: nil, exact_suffix: nil)
+        prefix = exact_prefix.blank? ? nil : exact_prefix.downcase
+        suffix = exact_suffix.blank? ? nil : exact_suffix.downcase
+        if query.blank? && !prefix && !suffix
+          raise ArgumentError, 'search requires a query or exact_prefix/exact_suffix filter'
+        end
+        # When only prefix/suffix are provided, the regex acts as a match-all
+        # wildcard so the existing phase-1/phase-2 pipeline still works.
+        pattern = compile_search_pattern(query.to_s.empty? ? '.*' : query)
+        max_scan_env = ENV.fetch('WOODS_SEARCH_MAX_SCAN', '').to_s.strip
+        max_scan = max_scan_env.empty? ? DEFAULT_SEARCH_MAX_SCAN : max_scan_env.to_i
+        max_scan = DEFAULT_SEARCH_MAX_SCAN if max_scan <= 0
         results = []
+        notes = []
+        phase2_scanned = 0
+        partial = false
         dirs = if types
                  types.filter_map { |t| TYPE_TO_DIR[t] }
@@ -139,36 +218,85 @@ module Woods
                  TYPE_DIRS
                end
+        # Phase 2 candidates are collected per-dir and then scanned in
+        # round-robin across dirs. Exhausting the per-run scan cap linearly
+        # down TYPE_DIRS order would starve later types (`concerns` at pos
+        # 13, `test_mappings` at pos 31) on any codebase where the earlier
+        # dirs together exceed max_scan entries. Interleaving guarantees
+        # every type contributes to the scanned set.
+        phase2_queues = {}
         dirs.each do |dir|
           type_name = DIR_TO_TYPE[dir]
           entries = read_index(dir)
-          entries.each do |entry|
-            break if results.size >= limit
+          # Broad-match detection: warn when pattern matches >50% of dir entries
+          if entries.size > 1
+            matching_count = entries.count do |e|
+              identifier_passes_filters?(e['identifier'], pattern, prefix, suffix)
+            end
+            if matching_count > entries.size / 2.0
+              notes << "broad pattern matched #{matching_count}/#{entries.size} entries in #{dir}"
+            end
+          end
+          entries.each do |entry|
             id = entry['identifier']
+            next unless identifier_passes_prefix_suffix?(id, prefix, suffix)
-            # Phase 1: identifier matching
+            # Phase 1: identifier matching (still in-order per dir)
             if fields.include?('identifier') && pattern.match?(id)
+              next if results.size >= limit
               results << { identifier: id, type: type_name, match_field: 'identifier' }
               next
             end
-            # Phase 2: metadata/source_code matching (requires loading full unit)
+            # Phase 2 is only reached when the caller opted into deeper fields.
             next unless fields.include?('metadata') || fields.include?('source_code')
-            unit = find_unit(id)
-            next unless unit
+            (phase2_queues[dir] ||= []) << [type_name, id]
+          end
+        end
+        if results.size < limit && phase2_queues.any?
+          queues = phase2_queues.values.map(&:dup)
+          catch(:phase2_done) do
+            loop do
+              progressed = false
+              queues.each do |queue|
+                next if queue.empty?
+                throw :phase2_done if results.size >= limit
+                if phase2_scanned >= max_scan
+                  partial = true
+                  throw :phase2_done
+                end
+                type_name, id = queue.shift
+                progressed = true
-            if fields.include?('source_code') && unit['source_code'] && pattern.match?(unit['source_code'])
-              results << { identifier: id, type: type_name, match_field: 'source_code' }
-            elsif fields.include?('metadata') && unit['metadata'] && pattern.match?(unit['metadata'].to_json)
-              results << { identifier: id, type: type_name, match_field: 'metadata' }
+                unit = find_unit(id)
+                next unless unit
+                phase2_scanned += 1
+                if fields.include?('source_code') && unit['source_code'] && pattern.match?(unit['source_code'])
+                  results << { identifier: id, type: type_name, match_field: 'source_code' }
+                elsif fields.include?('metadata') && unit['metadata'] && pattern.match?(unit['metadata'].to_json)
+                  results << { identifier: id, type: type_name, match_field: 'metadata' }
+                end
+              end
+              break unless progressed
             end
           end
         end
-        results.first(limit)
+        response = { results: results.first(limit) }
+        response[:note] = notes.join('; ') unless notes.empty?
+        response[:partial] = true if partial
+        response
       end
       # BFS traversal of forward dependencies.
@@ -176,9 +304,10 @@ module Woods
       # @param identifier [String] Starting unit identifier
       # @param depth [Integer] Maximum traversal depth
       # @param types [Array<String>, nil] Filter to these singular type names
+      # @param via [Array<String>, nil] Filter to these relationship types (e.g. ["link_to", "redirect_to"])
       # @return [Hash] { root:, nodes: { id => { type:, depth:, deps: [] } } }
-      def traverse_dependencies(identifier, depth: 2, types: nil)
-        traverse(identifier, depth: depth, types: types, direction: :forward)
+      def traverse_dependencies(identifier, depth: 2, types: nil, via: nil)
+        traverse(identifier, depth: depth, types: types, via: via, direction: :forward)
       end
       # BFS traversal of reverse dependencies (dependents).
@@ -186,9 +315,10 @@ module Woods
       # @param identifier [String] Starting unit identifier
       # @param depth [Integer] Maximum traversal depth
       # @param types [Array<String>, nil] Filter to these singular type names
+      # @param via [Array<String>, nil] Filter to these relationship types (e.g. ["link_to", "redirect_to"])
       # @return [Hash] { root:, nodes: { id => { type:, depth:, deps: [] } } }
-      def traverse_dependents(identifier, depth: 2, types: nil)
-        traverse(identifier, depth: depth, types: types, direction: :reverse)
+      def traverse_dependents(identifier, depth: 2, types: nil, via: nil)
+        traverse(identifier, depth: depth, types: types, via: via, direction: :reverse)
       end
       # Search rails_source units by concept keyword.
@@ -200,7 +330,12 @@ module Woods
       # @param limit [Integer] Maximum results to return
       # @return [Array<Hash>] Matching rails_source unit summaries
       def framework_sources(keyword, limit: 20)
-        pattern = Regexp.new(Regexp.escape(keyword), Regexp::IGNORECASE)
+        # Multi-word keywords ("ActiveRecord callbacks") are split on
+        # whitespace and ANDed. Single-word queries behave as before.
+        tokens = keyword.to_s.strip.split(/\s+/)
+        return [] if tokens.empty?
+        patterns = tokens.map { |t| Regexp.new(Regexp.escape(t), Regexp::IGNORECASE) }
         results = []
         entries = read_index('rails_source')
@@ -211,9 +346,12 @@ module Woods
           unit = find_unit(id)
           next unless unit
-          matched = pattern.match?(id) ||
-                    (unit['source_code'] && pattern.match?(unit['source_code'])) ||
-                    (unit['metadata'] && pattern.match?(unit['metadata'].to_json))
+          metadata_json = unit['metadata']&.to_json
+          matched = patterns.all? do |pat|
+            pat.match?(id) ||
+              (unit['source_code'] && pat.match?(unit['source_code'])) ||
+              (metadata_json && pat.match?(metadata_json))
+          end
           next unless matched
@@ -259,7 +397,8 @@ module Woods
               identifier: id,
               type: DIR_TO_TYPE[dir],
               file_path: unit['file_path'],
-              last_modified: last_modified
+              last_modified: last_modified,
+              author: unit.dig('metadata', 'git', 'last_author')
             }
           end
         end
@@ -277,6 +416,46 @@ module Woods
       private
+      # Compile a case-insensitive regex from a query string.
+      #
+      # Treats the query as a raw Ruby regex pattern. Falls back to an escaped
+      # literal match (with a :note field added by callers) when the pattern is
+      # invalid.
+      #
+      # @param query [String] Raw regex pattern
+      # @return [Regexp] Compiled case-insensitive pattern
+      def compile_search_pattern(query)
+        Regexp.new(query, Regexp::IGNORECASE)
+      rescue RegexpError
+        Regexp.new(Regexp.escape(query), Regexp::IGNORECASE)
+      end
+      # Case-insensitive literal prefix/suffix check on an identifier.
+      # Nil filters are treated as "no restriction".
+      def identifier_passes_prefix_suffix?(identifier, prefix, suffix)
+        return false unless identifier
+        downcased = identifier.downcase
+        return false if prefix && !downcased.start_with?(prefix)
+        return false if suffix && !downcased.end_with?(suffix)
+        true
+      end
+      # Combined regex + prefix/suffix check used only by broad-match detection,
+      # which reports how many identifiers would actually surface.
+      def identifier_passes_filters?(identifier, pattern, prefix, suffix)
+        return false unless identifier_passes_prefix_suffix?(identifier, prefix, suffix)
+        pattern.match?(identifier)
+      end
+      # Memoized normalized edges — converts bare strings (old format) to hashes once.
+      # Cleared by reload! alongside raw_graph_data.
+      def normalized_graph_edges
+        @normalized_graph_edges ||= normalize_all_edges(raw_graph_data['edges'] || {})
+      end
       # Build identifier → { type_dir, filename } map from all _index.json files.
       def identifier_map
         @identifier_map ||= build_identifier_map
@@ -340,13 +519,28 @@ module Woods
       end
       # BFS traversal in either direction.
-      def traverse(identifier, depth:, types:, direction:)
+      #
+      # Edges may be stored as bare strings (old format) or as
+      # +{"target" => "...", "via" => "..."}+ hashes (new format).
+      # This method handles both transparently.
+      #
+      # @param identifier [String] Starting unit identifier
+      # @param depth [Integer] Maximum traversal depth
+      # @param types [Array<String>, nil] Filter to these unit type names
+      # @param via [Array<String>, nil] Filter to these relationship types
+      # @param direction [:forward, :reverse] Traversal direction
+      # @return [Hash]
+      def traverse(identifier, depth:, types:, via:, direction:)
         graph_data = raw_graph_data
         nodes_data = graph_data['nodes'] || {}
         return { root: identifier, found: false, nodes: {} } unless nodes_data.key?(identifier)
+        # Normalize edges once per graph load — memoized alongside raw_graph_data
+        normalized_edges = normalized_graph_edges
         type_set = types&.to_set
+        via_set = via&.to_set
         visited = Set.new([identifier])
         queue = [[identifier, 0]]
         result_nodes = {}
@@ -355,12 +549,12 @@ module Woods
           current, current_depth = queue.shift
           neighbors = if direction == :forward
-                        (graph_data['edges'] || {})[current] || []
+                        resolve_forward_neighbors(normalized_edges, current, via_set)
                       else
-                        (graph_data['reverse'] || {})[current] || []
+                        resolve_reverse_neighbors(graph_data, normalized_edges, current, via_set)
                       end
-          # Filter by type if requested
+          # Filter by node type if requested
           filtered = if type_set
                        neighbors.select do |n|
                          node_meta = nodes_data[n]
@@ -370,14 +564,18 @@ module Woods
                        neighbors
                      end
+          # At max depth, record the node with empty deps so the renderer
+          # doesn't emit an extra level of unexpanded neighbors. The parent
+          # node's deps list already shows this node as a child.
+          will_expand = current_depth < depth
           node_meta = nodes_data[current]
           result_nodes[current] = {
             type: node_meta&.dig('type'),
             depth: current_depth,
-            deps: filtered
+            deps: will_expand ? filtered : []
           }
-          next if current_depth >= depth
+          next unless will_expand
           filtered.each do |neighbor|
             unless visited.include?(neighbor)
@@ -389,6 +587,43 @@ module Woods
         { root: identifier, found: true, nodes: result_nodes }
       end
+      # Normalize all edge arrays once, converting bare strings to hashes.
+      #
+      # NOTE: This uses string keys ('target', 'via') because IndexReader
+      # operates on parsed JSON. DependencyGraph.normalize_edges uses symbol
+      # keys (:target, :via) for in-memory Ruby objects. The two normalizers
+      # are intentionally separate — do not merge them.
+      #
+      # @param raw_edges [Hash] Raw edges from graph JSON
+      # @return [Hash] Edges with all entries as { 'target' => ..., 'via' => ... } hashes
+      def normalize_all_edges(raw_edges)
+        raw_edges.transform_values do |entries|
+          entries.map { |e| e.is_a?(Hash) ? e : { 'target' => e } }
+        end
+      end
+      # Extract forward neighbor identifiers, optionally filtered by via type.
+      # Expects pre-normalized edges (all entries are hashes).
+      def resolve_forward_neighbors(normalized_edges, identifier, via_set)
+        edges = normalized_edges[identifier] || []
+        edges = edges.select { |e| via_set.include?(e['via']) } if via_set
+        edges.map { |e| e['target'] }
+      end
+      # Extract reverse neighbor identifiers, optionally filtered by via type.
+      # Reverse edges are stored as bare identifier arrays. When via filtering
+      # is requested, checks each dependent's pre-normalized forward edges to
+      # find those pointing at +identifier+ with a matching via type.
+      def resolve_reverse_neighbors(graph_data, normalized_edges, identifier, via_set)
+        dependents = (graph_data['reverse'] || {})[identifier] || []
+        return dependents unless via_set
+        dependents.select do |dep|
+          forward = normalized_edges[dep] || []
+          forward.any? { |e| e['target'] == identifier && via_set.include?(e['via']) }
+        end
+      end
     end
   end
 end

data/lib/woods/mcp/origin_guard.rb ADDED Viewed

@@ -0,0 +1,132 @@
+# frozen_string_literal: true
+require 'json'
+require_relative '../util/host_guard'
+module Woods
+  module MCP
+    # Rack middleware that rejects browser-origin requests from unexpected sources.
+    #
+    # Defends against DNS rebinding and cross-site request forgery against a
+    # locally-bound MCP HTTP server. Defaults to loopback-only origins; operators
+    # can widen via WOODS_MCP_HTTP_ALLOWED_ORIGINS (comma-separated) or by passing
+    # :allowed_origins. Requests without an Origin header (curl, server-to-server,
+    # MCP stdio clients) are allowed through — bearer auth still gates them.
+    #
+    # Host header validation defends against the residual DNS-rebinding surface:
+    # an attacker who controls a hostname they can point at the server's IP can
+    # pass the Origin check (the browser sends their origin, which we might
+    # allow-list for some deployments) while Host carries their hostname. By
+    # also requiring Host to appear in the allow-list (or to be a loopback
+    # address), we close that gap even when Rails is bound to 0.0.0.0.
+    #
+    # Port-matching: an allow-list entry WITHOUT a port (`http://localhost`)
+    # matches that host on any port. An entry WITH a port (`http://localhost:3000`)
+    # requires an exact port match. Specify explicit ports when port isolation
+    # matters.
+    #
+    # Also answers CORS preflight (OPTIONS) with the matching allow-list.
+    class OriginGuard
+      DEFAULT_ALLOWED = %w[
+        http://localhost http://127.0.0.1 http://[::1]
+        https://localhost https://127.0.0.1 https://[::1]
+      ].freeze
+      # Hosts that always pass the Host-header check even without an explicit
+      # allow-list entry — they resolve to loopback by definition and cannot
+      # be rebound to an attacker-controlled address.
+      LOOPBACK_HOSTS = %w[localhost 127.0.0.1 ::1 [::1]].freeze
+      ALLOWED_METHODS = 'GET, POST, DELETE, OPTIONS'
+      ALLOWED_HEADERS = 'Authorization, Content-Type, Mcp-Session-Id'
+      # Response bodies are emitted as constants so the rejected Origin /
+      # Host value is NEVER echoed back to the caller — preventing a
+      # stored-XSS / log-injection surface where an attacker-supplied
+      # header ended up embedded in the JSON error.
+      FORBIDDEN_BODY = { jsonrpc: '2.0', error: { code: -32_002, message: 'Origin not allowed' }, id: nil }.to_json.freeze
+      FORBIDDEN_HOST_BODY = { jsonrpc: '2.0', error: { code: -32_002, message: 'Host not allowed' }, id: nil }.to_json.freeze
+      def initialize(app, allowed_origins: nil)
+        @app = app
+        @allowed = Array(allowed_origins).compact.reject { |o| o.to_s.strip.empty? }.map { |o| normalize(o) }
+        @allowed = DEFAULT_ALLOWED.dup if @allowed.empty?
+        @allowed_hosts = @allowed.map { |o| extract_host(o) }.compact.uniq
+      end
+      def call(env)
+        origin = env['HTTP_ORIGIN']
+        method = env['REQUEST_METHOD']
+        host = env['HTTP_HOST']
+        return forbidden if origin && !origin_allowed?(origin)
+        return forbidden_host if host && !host_allowed?(host)
+        return preflight(origin) if method == 'OPTIONS'
+        status, headers, body = @app.call(env)
+        headers = cors_headers(origin).merge(headers) if origin && origin_allowed?(origin)
+        [status, headers, body]
+      end
+      private
+      def normalize(origin)
+        origin.to_s.sub(%r{/\z}, '').downcase
+      end
+      def extract_host(origin)
+        host = origin.to_s.sub(%r{\Ahttps?://}, '').sub(%r{/.*\z}, '').downcase
+        host.empty? ? nil : host
+      end
+      def host_allowed?(host)
+        # Canonicalize (strip port, trailing dot, IPv6 brackets) via the
+        # shared helper so Qdrant and OriginGuard stay in sync on bypass
+        # notations. `normalized` keeps the port for literal allow-list
+        # lookups; `bare` drops it for loopback matching.
+        normalized = host.to_s.downcase.sub(/\.\z/, '')
+        bare = Util::HostGuard.canonicalize(host)
+        # Reject non-canonical numeric hosts. Net::HTTP / getaddrinfo
+        # would happily resolve `0x7f000001` or `2130706433` to 127.0.0.1,
+        # bypassing the loopback allow-list.
+        return false if Util::HostGuard.suspicious_numeric_host?(bare)
+        return true if LOOPBACK_HOSTS.include?(bare)
+        @allowed_hosts.include?(normalized) || @allowed_hosts.include?(bare)
+      end
+      def origin_allowed?(origin)
+        return false if origin.match?(/[[:cntrl:]]/)
+        @allowed.include?(normalize(origin)) || @allowed.include?(normalize(origin).sub(/:\d+\z/, ''))
+      end
+      def preflight(origin)
+        headers = origin && origin_allowed?(origin) ? cors_headers(origin) : {}
+        [204, headers, []]
+      end
+      def cors_headers(origin)
+        {
+          'access-control-allow-origin' => origin,
+          'access-control-allow-methods' => ALLOWED_METHODS,
+          'access-control-allow-headers' => ALLOWED_HEADERS,
+          'access-control-expose-headers' => 'Mcp-Session-Id',
+          'vary' => 'Origin'
+        }
+      end
+      def forbidden
+        [403, { 'content-type' => 'application/json' }, [FORBIDDEN_BODY]]
+      end
+      def forbidden_host
+        [403, { 'content-type' => 'application/json' }, [FORBIDDEN_HOST_BODY]]
+      end
+    end
+  end
+end