RubyGems - woods - Versions diffs - 1.1.0 → 1.2.0 - Mend

woods 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +17 -0
data/lib/tasks/woods.rake +54 -0
data/lib/woods/graph_analyzer.rb +211 -0
data/lib/woods/mcp/renderers/markdown_renderer.rb +61 -0
data/lib/woods/mcp/server.rb +34 -0
data/lib/woods/unblocked/client.rb +163 -0
data/lib/woods/unblocked/document_builder.rb +301 -0
data/lib/woods/unblocked/exporter.rb +201 -0
data/lib/woods/unblocked/rate_limiter.rb +94 -0
data/lib/woods/version.rb +1 -1
data/lib/woods.rb +4 -0
metadata +6 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: ec72d151147ef4b43df866b6ebee48f52a26915efdb44df7bed08c24646cfb7a
-  data.tar.gz: f03a813c3f525eeba7a95269770ee80ee4c4fd91017561529800745a241cfd9c
+  metadata.gz: 927abae1f4f641405384261569e1d25f94a672ca986d1c50093b3f6a56b7db38
+  data.tar.gz: fa35b4320669d195a8e4f377400b6999e735aebf5447071ee3353eaa8856840b
 SHA512:
-  metadata.gz: 75608caf708f2f4ef913653af543e983ac68abb9ab0028f9e967bda13af4e5b0c47f73a1d440696b453fb08afc69ab56cf2d70d366aa29949676e29abb6cdc85
-  data.tar.gz: a0f0e086045967cee236f4d98bd6131e5ca8c99daf7a150aa395c60dc56665bb3f4cc5a615d101480c1f8da752fd38a35893662ebf4cda02f49f008aeb6bc081
+  metadata.gz: 5ae6ef3436f6aa6b936b46103480e797a8a6e0fb4250f5dcc8bc721c2b9b911739e5d5aebd5b8b97c6788d58dcd19e9dbd5c6211a3400283e60084ce80c6d031
+  data.tar.gz: 6b38946aca86d407ab6d516d32dda4c5797adfcd27249b1685bdebb249ff34e71d62e3eabb266c991d1b32ad2df815e2a56dc924615c1def9df0c4c6754cd629

data/CHANGELOG.md CHANGED Viewed

@@ -5,6 +5,23 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [1.2.0] - 2026-03-27
+### Added
+- **Unblocked Documents API exporter** — sync extraction data to an Unblocked collection for code review and Q&A context
+  - `Woods::Unblocked::Client` — REST client with retry and daily budget rate limiting
+  - `Woods::Unblocked::DocumentBuilder` — type-specific Markdown formatters optimized for review context (blast radius, entry points, associations, side effects)
+  - `Woods::Unblocked::Exporter` — full/partial sync orchestrator with priority ordering
+  - `Woods::Unblocked::RateLimiter` — daily budget tracking (1000 calls/day)
+  - New rake tasks: `woods:unblocked_sync` (alias: `woods:relay`)
+  - New config: `unblocked_api_token`, `unblocked_collection_id`, `unblocked_repo_url`
+  - Integration guide: `docs/UNBLOCKED_INTEGRATION.md`
+- **Domain cluster detection** in `GraphAnalyzer` — groups code units into semantic domains using namespace prefixes and graph connectivity
+  - `GraphAnalyzer#domain_clusters` — hybrid namespace + graph clustering with hub identification, entry point detection, and boundary edge mapping
+  - New MCP tool: `domain_clusters` with `min_size` and `types` filters
+  - New renderer: `render_domain_clusters` in MarkdownRenderer
 ## [0.3.1] - 2026-03-04
 ### Fixed

data/lib/tasks/woods.rake CHANGED Viewed

@@ -618,4 +618,58 @@ namespace :woods do
   desc 'Send findings from the field — sync to Notion (alias for notion_sync)'
   task send: :notion_sync
+  desc 'Sync extraction data to Unblocked collection (Documents API)'
+  task unblocked_sync: :environment do
+    require 'woods/unblocked/exporter'
+    config = Woods.configuration
+    config.unblocked_api_token = ENV.fetch('UNBLOCKED_API_TOKEN', nil) || config.unblocked_api_token
+    config.unblocked_collection_id = ENV.fetch('UNBLOCKED_COLLECTION_ID', nil) || config.unblocked_collection_id
+    config.unblocked_repo_url = ENV.fetch('UNBLOCKED_REPO_URL', nil) || config.unblocked_repo_url
+    unless config.unblocked_api_token
+      puts 'ERROR: Unblocked API token not configured.'
+      puts 'Set UNBLOCKED_API_TOKEN env var or configure unblocked_api_token in Woods.configure.'
+      exit 1
+    end
+    unless config.unblocked_collection_id
+      puts 'ERROR: Unblocked collection ID not configured.'
+      puts 'Set UNBLOCKED_COLLECTION_ID env var or configure unblocked_collection_id in Woods.configure.'
+      exit 1
+    end
+    unless config.unblocked_repo_url
+      puts 'ERROR: Repository URL not configured.'
+      puts 'Set UNBLOCKED_REPO_URL env var or configure unblocked_repo_url in Woods.configure.'
+      puts 'Example: https://github.com/your-org/your-repo'
+      exit 1
+    end
+    output_dir = ENV.fetch('WOODS_OUTPUT', config.output_dir)
+    puts 'Syncing extraction data to Unblocked...'
+    puts "  Output dir:     #{output_dir}"
+    puts "  Collection:     #{config.unblocked_collection_id}"
+    puts "  Repo URL:       #{config.unblocked_repo_url}"
+    puts
+    exporter = Woods::Unblocked::Exporter.new(index_dir: output_dir)
+    stats = exporter.sync_all
+    puts
+    puts 'Sync complete!'
+    puts "  Documents synced:   #{stats[:synced]}"
+    puts "  Documents skipped:  #{stats[:skipped]}"
+    if stats[:errors].any?
+      puts "  Errors:             #{stats[:errors].size}"
+      stats[:errors].first(5).each { |e| puts "    - #{e}" }
+      puts "    ... and #{stats[:errors].size - 5} more" if stats[:errors].size > 5
+    end
+  end
+  desc 'Relay findings to Unblocked (alias for unblocked_sync)'
+  task relay: :unblocked_sync
 end

data/lib/woods/graph_analyzer.rb CHANGED Viewed

@@ -154,6 +154,52 @@ module Woods
         end
     end
+    # Group units into semantic domains using namespace prefixes and graph connectivity.
+    #
+    # Strategy:
+    # 1. Seed clusters from top-level namespace prefixes (e.g., ShippingProfile::*, Order::*)
+    # 2. Assign unnamespaced units to their most-connected cluster
+    # 3. Merge small clusters (< min_size) into their most-connected neighbor
+    # 4. For each cluster, identify the hub (highest PageRank) and entry points
+    # 5. Compute boundary edges between clusters
+    #
+    # @param min_size [Integer] Minimum units per cluster before merging (default: 3)
+    # @param types [Array<String>, nil] Filter to these unit types (default: all)
+    # @return [Array<Hash>] Clusters sorted by member count descending.
+    #   Each hash: { name:, hub:, members:, member_count:, entry_points:, boundary_edges:, types: }
+    def domain_clusters(min_size: 3, types: nil)
+      nodes = graph_nodes
+      return [] if nodes.empty?
+      # Filter by types if specified
+      filtered_ids = if types
+                       type_set = types.map(&:to_s)
+                       nodes.select { |_, meta| type_set.include?(meta[:type].to_s) }.keys
+                     else
+                       nodes.keys
+                     end
+      return [] if filtered_ids.empty?
+      # Step 1: Seed clusters from namespace prefixes
+      clusters = seed_namespace_clusters(filtered_ids, nodes)
+      # Step 2: Assign unnamespaced/root units to most-connected cluster
+      assign_orphaned_units(clusters, filtered_ids, nodes)
+      # Step 3: Merge small clusters
+      merge_small_clusters(clusters, min_size)
+      # Step 4: Enrich each cluster with hub, entry points, boundary edges
+      pagerank_scores = @graph.pagerank
+      enrich_clusters(clusters, nodes, pagerank_scores)
+      # Sort by member count descending
+      clusters.values
+              .select { |c| c[:members].any? }
+              .sort_by { |c| -c[:member_count] }
+    end
     # Full analysis report combining all structural metrics.
     #
     # @return [Hash] Complete analysis with :orphans, :dead_ends, :hubs,
@@ -182,6 +228,171 @@ module Woods
     private
+    # ──────────────────────────────────────────────────────────────────────
+    # Domain Cluster Helpers
+    # ──────────────────────────────────────────────────────────────────────
+    # Extract the top-level namespace prefix for clustering.
+    # "ShippingProfile::Setting" => "ShippingProfile"
+    # "Order::Transactions::Refund" => "Order"
+    # "Account" => nil (no namespace)
+    def cluster_prefix(identifier)
+      parts = identifier.to_s.split('::')
+      parts.size > 1 ? parts.first : nil
+    end
+    # Seed initial clusters from namespace prefixes.
+    def seed_namespace_clusters(filtered_ids, _nodes)
+      clusters = {}
+      filtered_ids.each do |id|
+        prefix = cluster_prefix(id)
+        next unless prefix
+        clusters[prefix] ||= { name: prefix, members: [], member_set: Set.new }
+        clusters[prefix][:members] << id
+        clusters[prefix][:member_set].add(id)
+      end
+      clusters
+    end
+    # Assign units with no namespace prefix to their most-connected cluster.
+    def assign_orphaned_units(clusters, filtered_ids, _nodes)
+      return if clusters.empty?
+      unassigned = filtered_ids.select { |id| cluster_prefix(id).nil? }
+      unassigned.each do |id|
+        best_cluster = find_most_connected_cluster(id, clusters)
+        next unless best_cluster
+        clusters[best_cluster][:members] << id
+        clusters[best_cluster][:member_set].add(id)
+      end
+    end
+    # Find which cluster a unit has the most connections to.
+    def find_most_connected_cluster(identifier, clusters)
+      connections = Hash.new(0)
+      # Check forward edges (dependencies)
+      @graph.dependencies_of(identifier).each do |dep|
+        clusters.each do |name, cluster|
+          connections[name] += 1 if cluster[:member_set].include?(dep)
+        end
+      end
+      # Check reverse edges (dependents)
+      @graph.dependents_of(identifier).each do |dep|
+        clusters.each do |name, cluster|
+          connections[name] += 1 if cluster[:member_set].include?(dep)
+        end
+      end
+      return nil if connections.empty?
+      connections.max_by { |_, count| count }.first
+    end
+    # Merge clusters smaller than min_size into their most-connected neighbor.
+    def merge_small_clusters(clusters, min_size)
+      loop do
+        small = clusters.select { |_, c| c[:members].size < min_size }
+        break if small.empty?
+        # Merge the smallest cluster first
+        name, cluster = small.min_by { |_, c| c[:members].size }
+        # Find which other cluster this one connects to most
+        target = find_merge_target(cluster, clusters, name)
+        if target
+          clusters[target][:members].concat(cluster[:members])
+          cluster[:members].each { |id| clusters[target][:member_set].add(id) }
+        end
+        clusters.delete(name)
+      end
+    end
+    # Find the best cluster to merge into (most cross-cluster edges).
+    def find_merge_target(cluster, all_clusters, exclude_name)
+      connections = Hash.new(0)
+      cluster[:members].each do |id|
+        (@graph.dependencies_of(id) + @graph.dependents_of(id)).each do |connected|
+          all_clusters.each do |name, other|
+            next if name == exclude_name
+            connections[name] += 1 if other[:member_set].include?(connected)
+          end
+        end
+      end
+      return nil if connections.empty?
+      connections.max_by { |_, count| count }.first
+    end
+    # Enrich clusters with hub, entry points, boundary edges, and type breakdown.
+    def enrich_clusters(clusters, nodes, pagerank_scores)
+      clusters.each_value do |cluster|
+        members = cluster[:members]
+        member_set = cluster[:member_set]
+        # Hub: highest PageRank within the cluster
+        hub_id = members.max_by { |id| pagerank_scores[id] || 0 }
+        cluster[:hub] = hub_id
+        # Entry points: controllers and GraphQL resolvers in the cluster's dependents
+        entry_types = %w[controller graphql_resolver graphql_mutation graphql_query]
+        entry_points = Set.new
+        members.each do |id|
+          @graph.dependents_of(id).each do |dep|
+            meta = nodes[dep]
+            entry_points.add(dep) if meta && entry_types.include?(meta[:type].to_s)
+          end
+        end
+        cluster[:entry_points] = entry_points.to_a
+        # Boundary edges: connections that cross cluster boundaries
+        boundary = []
+        members.each do |id|
+          @graph.dependencies_of(id).each do |dep|
+            next if member_set.include?(dep)
+            dep_meta = nodes[dep]
+            next unless dep_meta
+            boundary << { from: id, to: dep, via: 'dependency' }
+          end
+          @graph.dependents_of(id).each do |dep|
+            next if member_set.include?(dep)
+            dep_meta = nodes[dep]
+            next unless dep_meta
+            boundary << { from: dep, to: id, via: 'dependent' }
+          end
+        end
+        # Deduplicate and limit boundary edges
+        cluster[:boundary_edges] = boundary.uniq { |e| [e[:from], e[:to]] }.first(20)
+        # Type breakdown
+        type_counts = members.each_with_object(Hash.new(0)) do |id, counts|
+          meta = nodes[id]
+          counts[meta[:type].to_s] += 1 if meta
+        end
+        cluster[:types] = type_counts
+        # Final shape
+        cluster[:member_count] = members.size
+        cluster.delete(:member_set) # Internal tracking, not part of output
+      end
+    end
     # ──────────────────────────────────────────────────────────────────────
     # Graph Accessors
     # ──────────────────────────────────────────────────────────────────────

data/lib/woods/mcp/renderers/markdown_renderer.rb CHANGED Viewed

@@ -165,6 +165,67 @@ module Woods
           lines.join("\n").rstrip
         end
+        # ── domain_clusters ────────────────────────────────────────
+        # @param data [Hash] Domain cluster data with :clusters and :total
+        # @return [String] Markdown domain cluster overview
+        def render_domain_clusters(data, **)
+          clusters = fetch_key(data, :clusters) || []
+          total = fetch_key(data, :total) || clusters.size
+          lines = []
+          lines << '## Domain Clusters'
+          lines << ''
+          lines << "#{total} domains detected."
+          lines << ''
+          clusters.each do |cluster|
+            name = cluster[:name] || cluster['name']
+            member_count = cluster[:member_count] || cluster['member_count'] || 0
+            hub = cluster[:hub] || cluster['hub']
+            lines << "### #{name} (#{member_count} units)"
+            lines << ''
+            lines << "**Hub:** #{hub}" if hub
+            lines << ''
+            # Type breakdown
+            types = cluster[:types] || cluster['types']
+            if types.is_a?(Hash) && types.any?
+              type_parts = types.sort_by { |_, count| -count }.map { |type, count| "#{count} #{type}s" }
+              lines << "**Types:** #{type_parts.join(', ')}"
+            end
+            # Entry points
+            entry_points = cluster[:entry_points] || cluster['entry_points'] || []
+            lines << "**Entry points:** #{entry_points.first(10).join(', ')}" if entry_points.any?
+            # Members (show first 15)
+            members = cluster[:members] || cluster['members'] || []
+            if members.any?
+              lines << ''
+              lines << '**Members:**'
+              members.first(15).each { |m| lines << "- #{m}" }
+              lines << "- _... and #{members.size - 15} more_" if members.size > 15
+            end
+            # Boundary edges (show first 10)
+            boundaries = cluster[:boundary_edges] || cluster['boundary_edges'] || []
+            if boundaries.any?
+              lines << ''
+              lines << '**Boundary connections:**'
+              boundaries.first(10).each do |edge|
+                from = edge[:from] || edge['from']
+                to = edge[:to] || edge['to']
+                via = edge[:via] || edge['via']
+                lines << "- #{from} → #{to} (#{via})"
+              end
+            end
+            lines << ''
+          end
+          lines.join("\n").rstrip
+        end
         # ── pagerank ────────────────────────────────────────────────
         # @param data [Hash] PageRank data with :total_nodes and :results

data/lib/woods/mcp/server.rb CHANGED Viewed

@@ -61,6 +61,7 @@ module Woods
                                 render_key: :dependents)
           define_structure_tool(server, reader, respond, renderer)
           define_graph_analysis_tool(server, reader, respond, renderer)
+          define_domain_clusters_tool(server, reader, respond, renderer)
           define_pagerank_tool(server, reader, respond, renderer)
           define_framework_tool(server, reader, respond, renderer)
           define_recent_changes_tool(server, reader, respond, renderer)
@@ -306,6 +307,39 @@ module Woods
           end
         end
+        def define_domain_clusters_tool(server, reader, respond, renderer)
+          coerce = method(:coerce_array)
+          coerce_int = method(:coerce_integer)
+          server.define_tool(
+            name: 'domain_clusters',
+            description: 'Group code units into semantic domains by namespace and graph connectivity. ' \
+                         'Returns clusters with hub nodes, entry points, boundary edges, and type breakdowns. ' \
+                         'Useful for understanding architectural domains and blast radius.',
+            input_schema: {
+              properties: {
+                min_size: {
+                  type: 'integer',
+                  description: 'Minimum units per cluster before merging into neighbors (default: 3)'
+                },
+                types: {
+                  type: 'array', items: { type: 'string' },
+                  description: 'Filter to these unit types (default: all). Example: ["model", "service", "job"]'
+                }
+              }
+            }
+          ) do |server_context:, min_size: nil, types: nil|
+            min_size = coerce_int.call(min_size) || 3
+            types = coerce.call(types)
+            graph = reader.dependency_graph
+            analyzer = Woods::GraphAnalyzer.new(graph)
+            clusters = analyzer.domain_clusters(min_size: min_size, types: types)
+            respond.call(renderer.render(:domain_clusters, { clusters: clusters, total: clusters.size }))
+          end
+        end
         def define_pagerank_tool(server, reader, respond, renderer)
           coerce = method(:coerce_array)
           coerce_int = method(:coerce_integer)

data/lib/woods/unblocked/client.rb ADDED Viewed

@@ -0,0 +1,163 @@
+# frozen_string_literal: true
+require 'json'
+require 'net/http'
+require 'uri'
+require_relative 'rate_limiter'
+module Woods
+  module Unblocked
+    # REST client for the Unblocked API v1.
+    #
+    # Handles document and collection CRUD with rate limiting, retries,
+    # and error handling. Uses Net::HTTP for zero external dependencies.
+    #
+    # @example
+    #   client = Client.new(api_token: "ubk_...")
+    #   client.put_document(
+    #     collection_id: "uuid",
+    #     title: "Order (model)",
+    #     body: "# Order\n...",
+    #     uri: "https://github.com/org/repo/blob/main/app/models/order.rb"
+    #   )
+    #
+    class Client
+      BASE_URL = 'https://getunblocked.com/api/v1'
+      MAX_RETRIES = 3
+      DEFAULT_TIMEOUT = 30
+      # @param api_token [String] Unblocked API token (Personal or Team)
+      # @param rate_limiter [RateLimiter] Rate limiter instance
+      # @raise [ArgumentError] if api_token is nil or empty
+      def initialize(api_token:, rate_limiter: RateLimiter.new)
+        raise ArgumentError, 'api_token is required' if api_token.nil? || api_token.to_s.strip.empty?
+        @api_token = api_token
+        @rate_limiter = rate_limiter
+      end
+      # Create or update a document (upsert by URI).
+      #
+      # Documents are unique by `uri` across the organization. If a document
+      # with the given URI exists, it is updated; otherwise it is created.
+      # Documents become available for queries within ~1 minute.
+      #
+      # @param collection_id [String] Target collection UUID
+      # @param title [String] Document title (plain text)
+      # @param body [String] Document body (Markdown preferred)
+      # @param uri [String] Source URL (used as unique identifier and citation link)
+      # @return [Hash] { "id" => "document-uuid" }
+      def put_document(collection_id:, title:, body:, uri:)
+        request(:put, 'documents', {
+                  collectionId: collection_id,
+                  title: title,
+                  body: body,
+                  uri: uri
+                })
+      end
+      # Create a new collection.
+      #
+      # @param name [String] Collection name (1-32 chars)
+      # @param description [String] Collection description (1-4096 chars)
+      # @param icon_url [String, nil] Optional icon URL
+      # @return [Hash] { "id" => "collection-uuid", "name" => "...", ... }
+      def create_collection(name:, description:, icon_url: nil)
+        body = { name: name, description: description }
+        body[:iconUrl] = icon_url if icon_url
+        request(:post, 'collections', body)
+      end
+      # List all collections.
+      #
+      # @return [Array<Hash>] Collection objects
+      def list_collections
+        result = request(:get, 'collections')
+        result['items'] || result['data'] || [result].flatten.compact
+      end
+      # Delete a document by ID.
+      #
+      # @param document_id [String] Document UUID
+      # @return [Hash] API response
+      def delete_document(document_id:)
+        request(:delete, "documents/#{document_id}")
+      end
+      private
+      def request(method, path, body = nil)
+        retries = 0
+        loop do
+          response = @rate_limiter.track { execute_http(method, path, body) }
+          return parse_response(response) if response.is_a?(Net::HTTPSuccess)
+          if response.code == '429' && retries < MAX_RETRIES
+            retries += 1
+            wait_time = (response['Retry-After'] || (retries * 2)).to_f
+            sleep(wait_time)
+            next
+          end
+          raise_api_error(response)
+        end
+      end
+      def execute_http(method, path, body)
+        attempts = 0
+        begin
+          uri = URI("#{BASE_URL}/#{path}")
+          http = Net::HTTP.new(uri.host, uri.port)
+          http.use_ssl = true
+          http.open_timeout = DEFAULT_TIMEOUT
+          http.read_timeout = DEFAULT_TIMEOUT
+          req = build_request(method, uri, body)
+          http.request(req)
+        rescue Net::OpenTimeout, Net::ReadTimeout, Errno::ECONNRESET, Errno::ECONNREFUSED => e
+          attempts += 1
+          raise Woods::Error, "Network error after #{attempts} retries: #{e.message}" if attempts >= MAX_RETRIES
+          sleep(2**attempts)
+          retry
+        end
+      end
+      def build_request(method, uri, body)
+        req = case method
+              when :put then Net::HTTP::Put.new(uri)
+              when :post then Net::HTTP::Post.new(uri)
+              when :get then Net::HTTP::Get.new(uri)
+              when :delete then Net::HTTP::Delete.new(uri)
+              else raise ArgumentError, "Unsupported HTTP method: #{method}"
+              end
+        req['Authorization'] = "Bearer #{@api_token}"
+        req['Content-Type'] = 'application/json'
+        req.body = JSON.generate(body) if body
+        req
+      end
+      def parse_response(response)
+        return {} if response.body.nil? || response.body.strip.empty?
+        JSON.parse(response.body)
+      rescue JSON::ParserError
+        {}
+      end
+      def raise_api_error(response)
+        parsed = begin
+          JSON.parse(response.body)
+        rescue JSON::ParserError, TypeError
+          { 'message' => response.body&.slice(0, 200) || 'Unknown error' }
+        end
+        message = parsed['message'] || parsed['error'] || 'Unknown error'
+        raise Woods::Error, "Unblocked API error #{response.code}: #{message}"
+      end
+    end
+  end
+end

data/lib/woods/unblocked/document_builder.rb ADDED Viewed

@@ -0,0 +1,301 @@
+# frozen_string_literal: true
+module Woods
+  module Unblocked
+    # Converts extracted unit JSON into condensed Markdown documents
+    # optimized for Unblocked's code review and Q&A context.
+    #
+    # Each unit type has a specialized formatting strategy that emphasizes
+    # what matters for code review: associations, blast radius, entry points,
+    # side effects, and structural complexity.
+    #
+    # @example
+    #   builder = DocumentBuilder.new(repo_url: "https://github.com/bigcartel/admin")
+    #   doc = builder.build(unit_data)
+    #   # => { title: "Order (model)", body: "# Order (model)\n...", uri: "https://..." }
+    #
+    class DocumentBuilder
+      # @param repo_url [String] GitHub repo base URL for citation URIs
+      def initialize(repo_url:)
+        @repo_url = repo_url.chomp('/')
+      end
+      # Build a document hash from a unit's extracted data.
+      #
+      # @param unit_data [Hash] Parsed unit JSON (from IndexReader)
+      # @return [Hash] { title:, body:, uri: }
+      def build(unit_data)
+        type = unit_data['type']
+        identifier = unit_data['identifier']
+        file_path = unit_data['file_path']
+        {
+          title: "#{identifier} (#{type})",
+          body: build_body(unit_data),
+          uri: build_uri(file_path)
+        }
+      end
+      private
+      def build_uri(file_path)
+        return @repo_url unless file_path
+        "#{@repo_url}/blob/main/#{file_path}"
+      end
+      def build_body(unit_data)
+        type = unit_data['type']
+        case type
+        when 'model' then build_model_body(unit_data)
+        when 'controller' then build_controller_body(unit_data)
+        when 'service', 'job', 'mailer', 'manager', 'decorator', 'concern'
+          build_generic_body(unit_data)
+        when 'graphql', 'graphql_type', 'graphql_mutation', 'graphql_resolver', 'graphql_query'
+          build_graphql_body(unit_data)
+        else build_generic_body(unit_data)
+        end
+      end
+      # ── Model formatting ─────────────────────────────────────────────
+      def build_model_body(unit)
+        meta = unit['metadata'] || {}
+        sections = []
+        sections << model_header(unit, meta)
+        sections << model_associations(meta)
+        sections << model_dependents(unit)
+        sections << model_entry_points(unit)
+        sections << model_schema_highlights(meta)
+        sections << model_side_effects(unit)
+        sections.compact.join("\n\n")
+      end
+      def model_header(unit, meta)
+        parts = ["# #{unit['identifier']} (model)"]
+        file_info = ["**File:** `#{unit['file_path']}`"]
+        file_info << "**LOC:** #{meta['loc']}" if meta['loc']
+        file_info << "**Table:** #{meta['table_name']}" if meta['table_name']
+        column_count = meta['column_count'] || (meta['columns'] || []).size
+        file_info << "(#{column_count} columns)" if column_count&.positive?
+        parts << file_info.join(' | ')
+        parts.join("\n")
+      end
+      def model_associations(meta)
+        assocs = meta['associations'] || []
+        return nil if assocs.empty?
+        grouped = assocs.group_by { |a| a['type'] }
+        lines = ["## Associations (#{assocs.size})"]
+        %w[belongs_to has_many has_one has_and_belongs_to_many].each do |type|
+          items = grouped[type]
+          next unless items&.any?
+          targets = items.map do |a|
+            name = a['target'] || a['name']
+            dep = a.dig('options', 'dependent')
+            dep ? "#{name} (#{dep})" : name
+          end
+          lines << "**#{type}:** #{targets.join(', ')}"
+        end
+        lines.join("\n")
+      end
+      def model_dependents(unit)
+        deps = unit['dependents'] || []
+        return nil if deps.empty?
+        grouped = deps.group_by { |d| d['type'] }
+        summary_parts = grouped.map { |type, items| "#{items.size} #{type}s" }
+        lines = ["## Dependents (#{deps.size} units)"]
+        lines << summary_parts.join(', ')
+        # Blast radius assessment
+        if deps.size > 50
+          lines << '**High blast radius** — changes here affect many parts of the codebase'
+        elsif deps.size > 20
+          lines << '**Moderate blast radius** — changes may ripple to dependent code'
+        end
+        lines.join("\n")
+      end
+      def model_entry_points(unit)
+        deps = unit['dependents'] || []
+        controllers = deps.select { |d| d['type'] == 'controller' }
+        graphql = deps.select { |d| d['type']&.start_with?('graphql') }
+        jobs = deps.select { |d| d['type'] == 'job' }
+        return nil if controllers.empty? && graphql.empty?
+        lines = ['## Entry Points']
+        lines << "**Controllers:** #{controllers.map { |c| c['identifier'] }.join(', ')}" if controllers.any?
+        lines << "**GraphQL:** #{graphql.map { |g| g['identifier'] }.join(', ')}" if graphql.any?
+        lines << "**Jobs:** #{jobs.map { |j| j['identifier'] }.join(', ')}" if jobs.any?
+        lines.join("\n")
+      end
+      def model_schema_highlights(meta)
+        parts = []
+        enums = meta['enums']
+        if enums.is_a?(Hash) && enums.any?
+          enum_strs = enums.map { |name, values| "#{name} (#{format_enum_values(values)})" }
+          parts << "**Enums:** #{enum_strs.join('; ')}"
+        end
+        scopes = meta['scopes']
+        parts << "**Scopes:** #{scopes.map { |s| s['name'] }.join(', ')}" if scopes.is_a?(Array) && scopes.any?
+        concerns = meta['inlined_concerns']
+        parts << "**Concerns:** #{concerns.join(', ')}" if concerns.is_a?(Array) && concerns.any?
+        callbacks = meta['callbacks']
+        if callbacks.is_a?(Array) && callbacks.any?
+          parts << "**Callbacks (#{callbacks.size}):** #{format_callbacks(callbacks)}"
+        end
+        return nil if parts.empty?
+        (['## Schema Highlights'] + parts).join("\n")
+      end
+      def model_side_effects(unit)
+        deps = unit['dependents'] || []
+        jobs = deps.select { |d| d['type'] == 'job' }
+        mailers = deps.select { |d| d['type'] == 'mailer' }
+        return nil if jobs.empty? && mailers.empty?
+        lines = ['## Side Effects']
+        lines << "**Jobs:** #{jobs.map { |j| j['identifier'] }.join(', ')}" if jobs.any?
+        lines << "**Mailers:** #{mailers.map { |m| m['identifier'] }.join(', ')}" if mailers.any?
+        lines.join("\n")
+      end
+      # ── Controller formatting ────────────────────────────────────────
+      def build_controller_body(unit)
+        meta = unit['metadata'] || {}
+        sections = []
+        sections << "# #{unit['identifier']} (controller)"
+        sections << "**File:** `#{unit['file_path']}`"
+        ancestors = meta['ancestors']
+        sections << "**Inherits:** #{ancestors[1..3]&.join(' → ')}" if ancestors.is_a?(Array) && ancestors.size > 1
+        sections << controller_routes(meta)
+        sections << controller_dependencies(unit)
+        sections << controller_dependents(unit)
+        sections.compact.join("\n\n")
+      end
+      def controller_routes(meta)
+        routes = meta['routes']
+        return nil unless routes.is_a?(Hash) && routes.any?
+        lines = ['## Routes']
+        routes.each do |action, route_list|
+          next unless route_list.is_a?(Array)
+          route_list.each do |route|
+            next unless route.is_a?(Hash)
+            lines << "- `#{route['verb']} #{route['path']}` (#{action})"
+          end
+        end
+        lines.size > 1 ? lines.first(20).join("\n") : nil
+      end
+      def controller_dependencies(unit)
+        deps = unit['dependencies'] || []
+        return nil if deps.empty?
+        models = deps.select { |d| d['type'] == 'model' }.map { |d| d['target'] }
+        return nil if models.empty?
+        "## Dependencies\n**Models:** #{models.join(', ')}"
+      end
+      def controller_dependents(unit)
+        deps = unit['dependents'] || []
+        views = deps.select { |d| d['type'] == 'view_template' }
+        return nil if views.empty?
+        "## Views\n#{views.map { |v| "- `#{v['identifier']}`" }.first(10).join("\n")}"
+      end
+      # ── GraphQL formatting ───────────────────────────────────────────
+      def build_graphql_body(unit)
+        sections = []
+        sections << "# #{unit['identifier']} (#{unit['type']})"
+        sections << "**File:** `#{unit['file_path']}`"
+        deps = unit['dependencies'] || []
+        models = deps.select { |d| d['type'] == 'model' }.map { |d| d['target'] }
+        sections << "**Models:** #{models.join(', ')}" if models.any?
+        dependents = unit['dependents'] || []
+        sections << "**Referenced by:** #{dependents.size} units" if dependents.any?
+        sections.compact.join("\n\n")
+      end
+      # ── Generic formatting (services, jobs, mailers, etc.) ──────────
+      def build_generic_body(unit)
+        meta = unit['metadata'] || {}
+        sections = []
+        sections << "# #{unit['identifier']} (#{unit['type']})"
+        sections << "**File:** `#{unit['file_path']}`"
+        sections << "**LOC:** #{meta['loc']}" if meta['loc']
+        deps = unit['dependencies'] || []
+        if deps.any?
+          by_type = deps.group_by { |d| d['type'] }
+          dep_parts = by_type.map { |type, items| "#{type}: #{items.map { |d| d['target'] }.join(', ')}" }
+          sections << "## Dependencies\n#{dep_parts.join("\n")}"
+        end
+        dependents = unit['dependents'] || []
+        if dependents.any?
+          grouped = dependents.group_by { |d| d['type'] }
+          summary = grouped.map { |type, items| "#{items.size} #{type}s" }
+          sections << "## Dependents (#{dependents.size})\n#{summary.join(', ')}"
+        end
+        sections.compact.join("\n\n")
+      end
+      # ── Helpers ──────────────────────────────────────────────────────
+      def format_enum_values(values)
+        case values
+        when Hash then values.keys.first(5).join(', ')
+        when Array then values.first(5).join(', ')
+        else values.to_s
+        end
+      end
+      def format_callbacks(callbacks)
+        callbacks.first(5).map do |cb|
+          "#{cb['type']}: #{cb['filter']}"
+        end.join(', ')
+      end
+    end
+  end
+end

data/lib/woods/unblocked/exporter.rb ADDED Viewed

@@ -0,0 +1,201 @@
+# frozen_string_literal: true
+require 'woods'
+require_relative 'client'
+require_relative 'rate_limiter'
+require_relative 'document_builder'
+module Woods
+  module Unblocked
+    # Orchestrates syncing Woods extraction data to an Unblocked collection.
+    #
+    # Reads extraction output from disk via IndexReader, converts units to
+    # condensed Markdown documents, and pushes via the Unblocked Documents API.
+    # All syncs are idempotent — documents are upserted by URI.
+    #
+    # @example
+    #   exporter = Exporter.new(index_dir: "tmp/woods")
+    #   stats = exporter.sync_all
+    #   # => { synced: 940, skipped: 5060, errors: [] }
+    #
+    class Exporter
+      MAX_ERRORS = 100
+      # Unit types to sync, in priority order.
+      # All units are synced for these types.
+      FULL_SYNC_TYPES = %w[
+        model controller service job mailer manager decorator concern serializer
+        graphql graphql_type graphql_mutation graphql_resolver graphql_query
+      ].freeze
+      # Unit types where only the most-connected units are synced.
+      # Each entry: [type, max_count]
+      PARTIAL_SYNC_TYPES = [
+        ['poro', 100],
+        ['lib', 50]
+      ].freeze
+      # @param index_dir [String] Path to extraction output directory
+      # @param config [Configuration] Woods configuration (default: global config)
+      # @param client [Client, nil] Unblocked API client (auto-created from config if nil)
+      # @param reader [Object, nil] IndexReader instance (auto-created if nil)
+      # @param output [IO] Progress output stream (default: $stdout)
+      # @raise [ConfigurationError] if required config is missing
+      def initialize(index_dir:, config: Woods.configuration, client: nil, reader: nil, output: $stdout)
+        @collection_id = config.unblocked_collection_id
+        raise ConfigurationError, 'unblocked_collection_id is required' unless @collection_id
+        repo_url = config.unblocked_repo_url
+        raise ConfigurationError, 'unblocked_repo_url is required' unless repo_url
+        api_token = config.unblocked_api_token
+        raise ConfigurationError, 'unblocked_api_token is required' unless api_token
+        budget = ENV.fetch('UNBLOCKED_DAILY_BUDGET', RateLimiter::DEFAULT_BUDGET).to_i
+        limiter = RateLimiter.new(daily_budget: budget)
+        @client = client || Client.new(api_token: api_token, rate_limiter: limiter)
+        @reader = reader || build_reader(index_dir)
+        @builder = DocumentBuilder.new(repo_url: repo_url)
+        @output = output
+      end
+      # Sync all configured unit types to the Unblocked collection.
+      #
+      # @return [Hash] { synced: Integer, skipped: Integer, errors: Array<String> }
+      def sync_all
+        synced = 0
+        skipped = 0
+        errors = []
+        FULL_SYNC_TYPES.each do |type|
+          result = sync_type(type)
+          synced += result[:synced]
+          skipped += result[:skipped]
+          errors.concat(result[:errors])
+        end
+        PARTIAL_SYNC_TYPES.each do |type, max_count|
+          result = sync_type_partial(type, max_count)
+          synced += result[:synced]
+          skipped += result[:skipped]
+          errors.concat(result[:errors])
+        end
+        { synced: synced, skipped: skipped, errors: cap_errors(errors) }
+      end
+      # Sync all units of a given type.
+      #
+      # @param type [String] Unit type (e.g. "model", "controller")
+      # @return [Hash] { synced: Integer, skipped: Integer, errors: Array<String> }
+      def sync_type(type)
+        units = @reader.list_units(type: type)
+        log "  #{type}: #{units.size} units"
+        sync_units(units)
+      end
+      # Sync the top N most-connected units of a type (by dependent count).
+      #
+      # @param type [String] Unit type
+      # @param max_count [Integer] Maximum units to sync
+      # @return [Hash] { synced: Integer, skipped: Integer, errors: Array<String> }
+      def sync_type_partial(type, max_count)
+        units = @reader.list_units(type: type)
+        return empty_stats if units.empty?
+        # Load full data to sort by dependent count
+        units_with_data = units.filter_map do |entry|
+          data = @reader.find_unit(entry['identifier'])
+          next unless data
+          dep_count = (data['dependents'] || []).size
+          { entry: entry, data: data, dep_count: dep_count }
+        end
+        top_units = units_with_data.sort_by { |u| -u[:dep_count] }.first(max_count)
+        skipped_count = [units.size - max_count, 0].max
+        log "  #{type}: #{top_units.size}/#{units.size} units (top by dependents)"
+        result = sync_unit_data(top_units.map { |u| [u[:entry], u[:data]] })
+        result[:skipped] += skipped_count
+        result
+      end
+      private
+      def sync_units(units)
+        synced = 0
+        skipped = 0
+        errors = []
+        units.each do |entry|
+          unit_data = @reader.find_unit(entry['identifier'])
+          unless unit_data
+            skipped += 1
+            next
+          end
+          push_document(unit_data)
+          synced += 1
+        rescue Woods::Error => e
+          errors << "#{entry['identifier']}: #{e.message}"
+          break if e.message.include?('daily budget exhausted')
+        rescue StandardError => e
+          errors << "#{entry['identifier']}: #{e.message}"
+        end
+        { synced: synced, skipped: skipped, errors: errors }
+      end
+      def sync_unit_data(entries_with_data)
+        synced = 0
+        skipped = 0
+        errors = []
+        entries_with_data.each do |entry, unit_data|
+          push_document(unit_data)
+          synced += 1
+        rescue Woods::Error => e
+          errors << "#{entry['identifier']}: #{e.message}"
+          break if e.message.include?('daily budget exhausted')
+        rescue StandardError => e
+          errors << "#{entry['identifier']}: #{e.message}"
+        end
+        { synced: synced, skipped: skipped, errors: errors }
+      end
+      def push_document(unit_data)
+        doc = @builder.build(unit_data)
+        @client.put_document(
+          collection_id: @collection_id,
+          title: doc[:title],
+          body: doc[:body],
+          uri: doc[:uri]
+        )
+      end
+      def build_reader(index_dir)
+        require_relative '../mcp/index_reader'
+        Woods::MCP::IndexReader.new(index_dir)
+      end
+      def empty_stats
+        { synced: 0, skipped: 0, errors: [] }
+      end
+      def cap_errors(errors)
+        return errors if errors.size <= MAX_ERRORS
+        errors.first(MAX_ERRORS) + ["... and #{errors.size - MAX_ERRORS} more errors"]
+      end
+      def log(message)
+        @output&.puts(message)
+      end
+    end
+  end
+end

data/lib/woods/unblocked/rate_limiter.rb ADDED Viewed

@@ -0,0 +1,94 @@
+# frozen_string_literal: true
+module Woods
+  module Unblocked
+    # Daily budget-based rate limiter for the Unblocked API (1000 calls/day).
+    #
+    # Unlike Notion's per-second throttling, Unblocked limits by daily call count.
+    # Tracks usage against a configurable budget, warns when approaching the limit,
+    # and raises when exhausted.
+    #
+    # @example
+    #   limiter = RateLimiter.new(daily_budget: 1000)
+    #   limiter.track { client.put_document(...) }  # => result
+    #   limiter.remaining                            # => 999
+    #
+    class RateLimiter
+      DEFAULT_BUDGET = 1000
+      WARN_THRESHOLD = 0.8 # Warn at 80% usage
+      # @param daily_budget [Integer] Maximum API calls per day
+      # @param warn_io [IO] Where to write warnings (default: $stderr)
+      def initialize(daily_budget: DEFAULT_BUDGET, warn_io: $stderr)
+        unless daily_budget.is_a?(Integer) && daily_budget.positive?
+          raise ArgumentError, 'daily_budget must be positive'
+        end
+        @daily_budget = daily_budget
+        @calls_today = 0
+        @warn_io = warn_io
+        @warned = false
+        @mutex = Mutex.new
+      end
+      # Execute a block, tracking the API call against the daily budget.
+      #
+      # @yield The API call to execute
+      # @return [Object] The block's return value
+      # @raise [Woods::Error] if daily budget is exhausted
+      def track
+        raise ArgumentError, 'block required' unless block_given?
+        @mutex.synchronize do
+          if @calls_today >= @daily_budget
+            raise Woods::Error,
+                  "Unblocked API daily budget exhausted (#{@daily_budget} calls). " \
+                  'Budget resets at midnight PST. Use UNBLOCKED_DAILY_BUDGET to adjust.'
+          end
+          @calls_today += 1
+          warn_if_approaching_limit
+        end
+        yield
+      end
+      # Number of API calls remaining in the daily budget.
+      #
+      # @return [Integer]
+      def remaining
+        @daily_budget - @calls_today
+      end
+      # Number of API calls used today.
+      #
+      # @return [Integer]
+      def used
+        @calls_today
+      end
+      # Reset the daily counter (for testing or manual reset).
+      #
+      # @return [void]
+      def reset!
+        @mutex.synchronize do
+          @calls_today = 0
+          @warned = false
+        end
+      end
+      private
+      def warn_if_approaching_limit
+        return if @warned
+        return unless @calls_today >= (@daily_budget * WARN_THRESHOLD).to_i
+        @warned = true
+        @warn_io&.puts(
+          "WARNING: Unblocked API usage at #{@calls_today}/#{@daily_budget} " \
+          "(#{remaining} calls remaining)"
+        )
+      end
+    end
+  end
+end

data/lib/woods/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Woods
-  VERSION = '1.1.0'
+  VERSION = '1.2.0'
 end

data/lib/woods.rb CHANGED Viewed

@@ -43,6 +43,7 @@ module Woods
                   :session_tracer_enabled, :session_store, :session_id_proc, :session_exclude_paths,
                   :console_mcp_enabled, :console_mcp_path, :console_redacted_columns,
                   :notion_api_token, :notion_database_ids,
+                  :unblocked_api_token, :unblocked_collection_id, :unblocked_repo_url,
                   :cache_store, :cache_options
     attr_reader :max_context_tokens, :similarity_threshold, :extractors, :pretty_json, :context_format,
                 :cache_enabled
@@ -70,6 +71,9 @@ module Woods
       @console_redacted_columns = []
       @notion_api_token = nil
       @notion_database_ids = {}
+      @unblocked_api_token = nil
+      @unblocked_collection_id = nil
+      @unblocked_repo_url = nil
       @cache_enabled = false
       @cache_store = nil      # :redis, :solid_cache, :memory, or a CacheStore instance
       @cache_options = {}     # { redis: client, cache: store, ttl: { embeddings: 86400, ... } }

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: woods
 version: !ruby/object:Gem::Version
-  version: 1.1.0
+  version: 1.2.0
 platform: ruby
 authors:
 - Leah Armstrong
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2026-03-15 00:00:00.000000000 Z
+date: 2026-03-27 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: mcp
@@ -237,6 +237,10 @@ files:
 - lib/woods/temporal/json_snapshot_store.rb
 - lib/woods/temporal/snapshot_store.rb
 - lib/woods/token_utils.rb
+- lib/woods/unblocked/client.rb
+- lib/woods/unblocked/document_builder.rb
+- lib/woods/unblocked/exporter.rb
+- lib/woods/unblocked/rate_limiter.rb
 - lib/woods/version.rb
 homepage: https://github.com/lost-in-the/woods
 licenses: