RubyGems - woods - Versions diffs - 1.1.0 → 1.3.0 - Mend

woods 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +186 -0
data/README.md +20 -8
data/exe/woods-console +51 -6
data/exe/woods-console-mcp +24 -4
data/exe/woods-mcp +30 -7
data/exe/woods-mcp-http +47 -6
data/lib/generators/woods/install_generator.rb +13 -4
data/lib/generators/woods/templates/woods.rb.tt +155 -0
data/lib/tasks/woods.rake +69 -50
data/lib/woods/builder.rb +174 -9
data/lib/woods/cache/cache_middleware.rb +360 -31
data/lib/woods/chunking/semantic_chunker.rb +334 -7
data/lib/woods/console/adapters/job_adapter.rb +10 -4
data/lib/woods/console/audit_logger.rb +76 -4
data/lib/woods/console/bridge.rb +48 -15
data/lib/woods/console/bridge_protocol.rb +44 -0
data/lib/woods/console/confirmation.rb +3 -4
data/lib/woods/console/console_response_renderer.rb +56 -18
data/lib/woods/console/credential_index.rb +201 -0
data/lib/woods/console/credential_scanner.rb +302 -0
data/lib/woods/console/dispatch_pipeline.rb +138 -0
data/lib/woods/console/embedded_executor.rb +682 -35
data/lib/woods/console/eval_guard.rb +319 -0
data/lib/woods/console/model_validator.rb +1 -3
data/lib/woods/console/rack_middleware.rb +185 -29
data/lib/woods/console/redactor.rb +161 -0
data/lib/woods/console/response_context.rb +127 -0
data/lib/woods/console/safe_context.rb +220 -23
data/lib/woods/console/scope_predicate_parser.rb +131 -0
data/lib/woods/console/server.rb +417 -486
data/lib/woods/console/sql_noise_stripper.rb +87 -0
data/lib/woods/console/sql_table_scanner.rb +213 -0
data/lib/woods/console/sql_validator.rb +81 -31
data/lib/woods/console/table_gate.rb +93 -0
data/lib/woods/console/tool_specs.rb +552 -0
data/lib/woods/console/tools/tier1.rb +3 -3
data/lib/woods/console/tools/tier4.rb +7 -1
data/lib/woods/dependency_graph.rb +66 -7
data/lib/woods/embedding/indexer.rb +190 -6
data/lib/woods/embedding/openai.rb +40 -4
data/lib/woods/embedding/provider.rb +104 -8
data/lib/woods/embedding/text_preparer.rb +23 -3
data/lib/woods/embedding/token_counter.rb +133 -0
data/lib/woods/evaluation/baseline_runner.rb +20 -2
data/lib/woods/evaluation/metrics.rb +4 -1
data/lib/woods/extracted_unit.rb +1 -0
data/lib/woods/extractor.rb +7 -1
data/lib/woods/extractors/controller_extractor.rb +6 -0
data/lib/woods/extractors/mailer_extractor.rb +16 -2
data/lib/woods/extractors/model_extractor.rb +6 -1
data/lib/woods/extractors/phlex_extractor.rb +13 -4
data/lib/woods/extractors/rails_source_extractor.rb +2 -0
data/lib/woods/extractors/route_helper_resolver.rb +130 -0
data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
data/lib/woods/extractors/view_component_extractor.rb +12 -1
data/lib/woods/extractors/view_engines/base.rb +141 -0
data/lib/woods/extractors/view_engines/erb.rb +145 -0
data/lib/woods/extractors/view_template_extractor.rb +92 -133
data/lib/woods/flow_assembler.rb +23 -15
data/lib/woods/flow_precomputer.rb +21 -2
data/lib/woods/graph_analyzer.rb +210 -0
data/lib/woods/index_artifact.rb +173 -0
data/lib/woods/mcp/bearer_auth.rb +45 -0
data/lib/woods/mcp/bootstrap_state.rb +94 -0
data/lib/woods/mcp/bootstrapper.rb +337 -16
data/lib/woods/mcp/config_resolver.rb +288 -0
data/lib/woods/mcp/errors.rb +134 -0
data/lib/woods/mcp/index_reader.rb +265 -30
data/lib/woods/mcp/origin_guard.rb +132 -0
data/lib/woods/mcp/provider_probe.rb +166 -0
data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
data/lib/woods/mcp/renderers/markdown_renderer.rb +100 -3
data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
data/lib/woods/mcp/server.rb +771 -137
data/lib/woods/model_name_cache.rb +78 -2
data/lib/woods/notion/client.rb +25 -2
data/lib/woods/notion/mappers/model_mapper.rb +36 -2
data/lib/woods/railtie.rb +55 -15
data/lib/woods/resilience/circuit_breaker.rb +9 -2
data/lib/woods/resilience/retryable_provider.rb +40 -3
data/lib/woods/resolved_config.rb +299 -0
data/lib/woods/retrieval/context_assembler.rb +112 -5
data/lib/woods/retrieval/query_classifier.rb +1 -1
data/lib/woods/retrieval/ranker.rb +55 -6
data/lib/woods/retrieval/search_executor.rb +42 -13
data/lib/woods/retriever.rb +330 -24
data/lib/woods/session_tracer/middleware.rb +35 -1
data/lib/woods/storage/graph_store.rb +39 -0
data/lib/woods/storage/inapplicable_backend.rb +14 -0
data/lib/woods/storage/metadata_store.rb +129 -1
data/lib/woods/storage/pgvector.rb +70 -8
data/lib/woods/storage/qdrant.rb +196 -5
data/lib/woods/storage/snapshotter/metadata.rb +172 -0
data/lib/woods/storage/snapshotter/vector.rb +238 -0
data/lib/woods/storage/snapshotter.rb +24 -0
data/lib/woods/storage/vector_store.rb +184 -35
data/lib/woods/tasks.rb +85 -0
data/lib/woods/temporal/snapshot_store.rb +49 -1
data/lib/woods/token_utils.rb +44 -5
data/lib/woods/unblocked/client.rb +163 -0
data/lib/woods/unblocked/document_builder.rb +326 -0
data/lib/woods/unblocked/exporter.rb +201 -0
data/lib/woods/unblocked/rate_limiter.rb +94 -0
data/lib/woods/util/host_guard.rb +61 -0
data/lib/woods/version.rb +1 -1
data/lib/woods.rb +130 -6
metadata +73 -4

data/lib/woods/mcp/provider_probe.rb ADDED Viewed

@@ -0,0 +1,166 @@
+# frozen_string_literal: true
+require 'net/http'
+require 'uri'
+module Woods
+  module MCP
+    # Probes an embedding provider's HTTP endpoint to confirm it is reachable
+    # before the MCP server commits to a fully-hydrated start.
+    #
+    # A probe is pure: input → result-or-raise. No logging, no stderr writes,
+    # no side effects. The caller decides what to do with a failure.
+    #
+    # Raises {Woods::MCP::ProviderUnreachable} on any network failure with
+    # structured +url:+ and +reason:+ fields so callers can pattern-match on
+    # the reason string. Raises +ArgumentError+ for unknown provider classes —
+    # that is a programming error, not a runtime condition.
+    #
+    # @example
+    #   Woods::MCP::ProviderProbe.reachable!(provider)  # → provider or raises
+    module ProviderProbe
+      # Connect timeout for Ollama probes (LAN/localhost — fail fast).
+      OLLAMA_OPEN_TIMEOUT = 0.5
+      # Read timeout for Ollama probes.
+      OLLAMA_READ_TIMEOUT = 0.5
+      # Connect timeout for OpenAI probes (WAN — allow for latency).
+      OPENAI_OPEN_TIMEOUT = 2.0
+      # Read timeout for OpenAI probes.
+      OPENAI_READ_TIMEOUT = 2.0
+      # Probe +provider+ and return it if reachable.
+      #
+      # Dispatches on the provider's concrete class:
+      # - {Woods::Embedding::Provider::Ollama} → +GET /api/tags+ on the
+      #   configured host. Any non-5xx response is treated as reachable.
+      # - {Woods::Embedding::Provider::OpenAI} → +GET /v1/models+ on
+      #   +api.openai.com:443+. A 401 response raises +ProviderUnreachable+
+      #   with +reason: "unauthorized"+ because an invalid key means the
+      #   provider cannot be used; network failures raise with the appropriate
+      #   reason string.
+      # - Any other class → raises +ArgumentError+.
+      #
+      # @param provider [Woods::Embedding::Provider::Ollama,
+      #   Woods::Embedding::Provider::OpenAI] a concrete embedding provider
+      # @return [Object] the same +provider+ if reachable
+      # @raise [Woods::MCP::ProviderUnreachable] if the endpoint is unreachable,
+      #   times out, returns 5xx, or (for OpenAI) returns 401
+      # @raise [ArgumentError] if +provider+ is not a recognised provider class
+      def self.reachable!(provider)
+        case provider
+        when Woods::Embedding::Provider::Ollama
+          probe_ollama!(provider)
+        when Woods::Embedding::Provider::OpenAI
+          probe_openai!(provider)
+        else
+          raise ArgumentError,
+                "#{self}.reachable! does not know how to probe #{provider.class} — " \
+                'add a provider-specific probe method or implement #probe_url'
+        end
+        provider
+      end
+      # Probe the Ollama instance backing +provider+.
+      #
+      # @param provider [Woods::Embedding::Provider::Ollama]
+      # @raise [Woods::MCP::ProviderUnreachable]
+      # @api private
+      def self.probe_ollama!(provider)
+        base_url = provider.instance_variable_get(:@host)
+        http_get!(base_url, '/api/tags',
+                  open_timeout: OLLAMA_OPEN_TIMEOUT,
+                  read_timeout: OLLAMA_READ_TIMEOUT,
+                  use_ssl: URI.parse(base_url).scheme == 'https') do |response|
+          if response.is_a?(Net::HTTPServerError)
+            raise Woods::MCP::ProviderUnreachable.new(
+              url: base_url,
+              reason: 'http_500'
+            )
+          end
+        end
+      end
+      private_class_method :probe_ollama!
+      # Probe the OpenAI API endpoint.
+      #
+      # The probe sends an unauthenticated +GET /v1/models+ so it deliberately
+      # expects a 401 from a healthy OpenAI. Anything that is not a plain 401
+      # or 2xx/3xx means the provider cannot be used from this host:
+      #
+      # - +401 Unauthorized+ → +reason: "unauthorized"+. The expected response
+      #   for an unauthed probe; starts :degraded so the first real query
+      #   carries the API key and surfaces credential errors precisely.
+      # - +403 Forbidden+ → +reason: "forbidden"+. Seen when the edge
+      #   intercepts the request before OpenAI's auth layer (geoblock,
+      #   corporate proxy). Subsequent embed calls will 403 too, so treating
+      #   this as reachable would give operators a false-green status.
+      # - +5xx+ → +reason: "http_500"+.
+      #
+      # @param provider [Woods::Embedding::Provider::OpenAI]
+      # @raise [Woods::MCP::ProviderUnreachable]
+      # @api private
+      def self.probe_openai!(_provider)
+        base_url = 'https://api.openai.com'
+        http_get!(base_url, '/v1/models',
+                  open_timeout: OPENAI_OPEN_TIMEOUT,
+                  read_timeout: OPENAI_READ_TIMEOUT,
+                  use_ssl: true) do |response|
+          reason = openai_unreachable_reason(response)
+          next unless reason
+          raise Woods::MCP::ProviderUnreachable.new(url: base_url, reason: reason)
+        end
+      end
+      private_class_method :probe_openai!
+      # Map an HTTP response from +GET /v1/models+ to a ProviderUnreachable
+      # reason string, or nil when the response signals a healthy provider.
+      #
+      # Uses +is_a?+ (not +case/when+) so RSpec stubs via
+      # +allow(response).to receive(:is_a?).with(...)+ compose cleanly —
+      # +case/when+ goes through +Module#===+ which some mocks don't round-trip.
+      def self.openai_unreachable_reason(response)
+        return 'unauthorized' if response.is_a?(Net::HTTPUnauthorized)
+        return 'forbidden' if response.is_a?(Net::HTTPForbidden)
+        return 'http_500' if response.is_a?(Net::HTTPServerError)
+        nil
+      end
+      private_class_method :openai_unreachable_reason
+      # Execute +GET path+ against +base_url+ and yield the response to the
+      # caller's block for provider-specific checks.
+      #
+      # All network-level exceptions are translated into
+      # {Woods::MCP::ProviderUnreachable} with a machine-readable reason
+      # string before propagating.
+      #
+      # @param base_url [String] scheme + host + optional port
+      # @param path [String] request path
+      # @param open_timeout [Numeric]
+      # @param read_timeout [Numeric]
+      # @param use_ssl [Boolean]
+      # @yieldparam response [Net::HTTPResponse]
+      # @raise [Woods::MCP::ProviderUnreachable]
+      # @api private
+      def self.http_get!(base_url, path, open_timeout:, read_timeout:, use_ssl:)
+        uri = URI.parse(base_url)
+        http = Net::HTTP.new(uri.host, uri.port)
+        http.open_timeout = open_timeout
+        http.read_timeout = read_timeout
+        http.use_ssl = use_ssl
+        response = http.start { |h| h.get(path) }
+        yield response
+      rescue Errno::ECONNREFUSED, Errno::ECONNRESET
+        raise Woods::MCP::ProviderUnreachable.new(url: base_url, reason: 'connection_refused')
+      rescue Net::OpenTimeout, Errno::ETIMEDOUT, Net::ReadTimeout
+        raise Woods::MCP::ProviderUnreachable.new(url: base_url, reason: 'timeout')
+      rescue SocketError
+        raise Woods::MCP::ProviderUnreachable.new(url: base_url, reason: 'dns_failure')
+      end
+      private_class_method :http_get!
+    end
+  end
+end

data/lib/woods/mcp/renderers/claude_renderer.rb CHANGED Viewed

@@ -59,6 +59,12 @@ module Woods
           wrap_xml('recent_changes', super)
         end
+        def render_trace_flow(data, **)
+          content = super
+          entry_point = data[:entry_point] || data['entry_point']
+          wrap_xml('trace_flow', content, entry_point: entry_point)
+        end
         def render_default(data)
           wrap_xml('result', super)
         end

data/lib/woods/mcp/renderers/markdown_renderer.rb CHANGED Viewed

@@ -102,7 +102,11 @@ module Woods
           %w[rails_version ruby_version git_branch git_sha extracted_at].each do |key|
             lines << "- **#{key.tr('_', ' ').capitalize}:** #{manifest[key]}" if manifest[key]
           end
-          lines << "- **Total units:** #{manifest['total_units']}" if manifest['total_units']
+          lines << "- **Total units indexed:** #{manifest['total_units']}" if manifest['total_units']
+          template_engines = fetch_key(data, :template_engines)
+          if template_engines.is_a?(Array) && template_engines.any?
+            lines << "- **Supported template engines:** #{template_engines.join(', ')}"
+          end
           lines << ''
           counts = manifest['counts']
@@ -120,11 +124,32 @@ module Woods
             lines << '### Summary'
             lines << ''
             lines << summary
+            lines << ''
           end
+          lines << structure_denominators_glossary
           lines.join("\n").rstrip
         end
+        # Canonical glossary of the three index denominators that differ
+        # across Woods' tools. Surfaced once in the structure tool so
+        # readers don't have to cross-reference other tools' outputs to
+        # understand why the numbers disagree. Resolves #105.
+        def structure_denominators_glossary
+          <<~GLOSSARY
+            ### Denominators
+            - **units_indexed** (manifest.json, `structure` tool) — total
+              ExtractedUnits written by the extractor. Canonical count.
+            - **graph_nodes** (`pagerank`, `dependencies`, `dependents`) —
+              units present in the dependency graph. Excludes orphans
+              that have no incoming or outgoing edges.
+            - **searchable_entries** (`codebase_retrieve`) — retriever-store
+              entries, including per-chunk rows for units long enough to
+              be chunked. Always ≥ units_indexed.
+          GLOSSARY
+        end
         # ── graph_analysis ──────────────────────────────────────────
         # @param data [Hash] Graph analysis with section arrays and stats
@@ -147,7 +172,9 @@ module Woods
             lines << "### #{section.tr('_', ' ').capitalize}"
             lines << ''
             items.each do |item|
-              lines << if item.is_a?(Hash)
+              lines << if item.is_a?(Hash) && item.key?('score')
+                         "- **#{item['identifier']}** (#{item['type']}) — score: #{item['score']}"
+                       elsif item.is_a?(Hash)
                          "- **#{item['identifier']}** (#{item['type']}) — #{item['dependent_count']} dependents"
                        else
                          "- #{item}"
@@ -165,6 +192,67 @@ module Woods
           lines.join("\n").rstrip
         end
+        # ── domain_clusters ────────────────────────────────────────
+        # @param data [Hash] Domain cluster data with :clusters and :total
+        # @return [String] Markdown domain cluster overview
+        def render_domain_clusters(data, **)
+          clusters = fetch_key(data, :clusters) || []
+          total = fetch_key(data, :total) || clusters.size
+          lines = []
+          lines << '## Domain Clusters'
+          lines << ''
+          lines << "#{total} domains detected."
+          lines << ''
+          clusters.each do |cluster|
+            name = cluster[:name] || cluster['name']
+            member_count = cluster[:member_count] || cluster['member_count'] || 0
+            hub = cluster[:hub] || cluster['hub']
+            lines << "### #{name} (#{member_count} units)"
+            lines << ''
+            lines << "**Hub:** #{hub}" if hub
+            lines << ''
+            # Type breakdown
+            types = cluster[:types] || cluster['types']
+            if types.is_a?(Hash) && types.any?
+              type_parts = types.sort_by { |_, count| -count }.map { |type, count| "#{count} #{type}s" }
+              lines << "**Types:** #{type_parts.join(', ')}"
+            end
+            # Entry points
+            entry_points = cluster[:entry_points] || cluster['entry_points'] || []
+            lines << "**Entry points:** #{entry_points.first(10).join(', ')}" if entry_points.any?
+            # Members (show first 15)
+            members = cluster[:members] || cluster['members'] || []
+            if members.any?
+              lines << ''
+              lines << '**Members:**'
+              members.first(15).each { |m| lines << "- #{m}" }
+              lines << "- _... and #{members.size - 15} more_" if members.size > 15
+            end
+            # Boundary edges (show first 10)
+            boundaries = cluster[:boundary_edges] || cluster['boundary_edges'] || []
+            if boundaries.any?
+              lines << ''
+              lines << '**Boundary connections:**'
+              boundaries.first(10).each do |edge|
+                from = edge[:from] || edge['from']
+                to = edge[:to] || edge['to']
+                via = edge[:via] || edge['via']
+                lines << "- #{from} → #{to} (#{via})"
+              end
+            end
+            lines << ''
+          end
+          lines.join("\n").rstrip
+        end
         # ── pagerank ────────────────────────────────────────────────
         # @param data [Hash] PageRank data with :total_nodes and :results
@@ -173,7 +261,7 @@ module Woods
           lines = []
           lines << '## PageRank Scores'
           lines << ''
-          lines << "#{fetch_key(data, :total_nodes)} nodes in graph."
+          lines << "Ranking #{fetch_key(data, :total_nodes)} nodes in the dependency graph."
           lines << ''
           lines << '| Rank | Identifier | Type | Score |'
           lines << '|------|-----------|------|-------|'
@@ -240,6 +328,15 @@ module Woods
           lines.join("\n").rstrip
         end
+        # ── trace_flow ──────────────────────────────────────────────
+        # @param data [Hash] Serialized FlowDocument
+        # @return [String] Markdown flow document with a step-by-step operations table
+        def render_trace_flow(data, **)
+          require_relative '../../flow_document'
+          Woods::FlowDocument.from_h(data).to_markdown
+        end
         # ── Default fallback ────────────────────────────────────────
         # @param data [Object] Any data

data/lib/woods/mcp/renderers/plain_renderer.rb CHANGED Viewed

@@ -89,9 +89,14 @@ module Woods
           lines << 'Codebase Structure'
           lines << DIVIDER
-          %w[rails_version ruby_version git_branch git_sha extracted_at total_units].each do |key|
+          %w[rails_version ruby_version git_branch git_sha extracted_at].each do |key|
             lines << "  #{key}: #{manifest[key]}" if manifest[key]
           end
+          lines << "  units_indexed: #{manifest['total_units']}" if manifest['total_units']
+          template_engines = fetch_key(data, :template_engines)
+          if template_engines.is_a?(Array) && template_engines.any?
+            lines << "  template_engines: #{template_engines.join(', ')}"
+          end
           counts = manifest['counts']
           if counts.is_a?(Hash) && counts.any?
@@ -107,6 +112,15 @@ module Woods
             lines << summary
           end
+          lines << ''
+          lines << DIVIDER
+          lines << 'Denominators:'
+          lines << '  units_indexed     (manifest, structure): total ExtractedUnits written.'
+          lines << '  graph_nodes       (pagerank, dependencies, dependents): units in the graph'
+          lines << '                    (excludes orphans with no incoming/outgoing edges).'
+          lines << '  searchable_entries (codebase_retrieve): retriever-store entries including'
+          lines << '                    per-chunk rows. Always >= units_indexed.'
           lines.join("\n").rstrip
         end
@@ -144,7 +158,7 @@ module Woods
         def render_pagerank(data, **)
           lines = []
-          lines << "PageRank Scores (#{fetch_key(data, :total_nodes)} nodes)"
+          lines << "PageRank Scores (ranking #{fetch_key(data, :total_nodes)} graph nodes)"
           lines << DIVIDER
           results = fetch_key(data, :results, [])