woods 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,201 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'woods'
4
+ require_relative 'client'
5
+ require_relative 'rate_limiter'
6
+ require_relative 'document_builder'
7
+
8
+ module Woods
9
+ module Unblocked
10
+ # Orchestrates syncing Woods extraction data to an Unblocked collection.
11
+ #
12
+ # Reads extraction output from disk via IndexReader, converts units to
13
+ # condensed Markdown documents, and pushes via the Unblocked Documents API.
14
+ # All syncs are idempotent — documents are upserted by URI.
15
+ #
16
+ # @example
17
+ # exporter = Exporter.new(index_dir: "tmp/woods")
18
+ # stats = exporter.sync_all
19
+ # # => { synced: 940, skipped: 5060, errors: [] }
20
+ #
21
+ class Exporter
22
+ MAX_ERRORS = 100
23
+
24
+ # Unit types to sync, in priority order.
25
+ # All units are synced for these types.
26
+ FULL_SYNC_TYPES = %w[
27
+ model controller service job mailer manager decorator concern serializer
28
+ graphql graphql_type graphql_mutation graphql_resolver graphql_query
29
+ ].freeze
30
+
31
+ # Unit types where only the most-connected units are synced.
32
+ # Each entry: [type, max_count]
33
+ PARTIAL_SYNC_TYPES = [
34
+ ['poro', 100],
35
+ ['lib', 50]
36
+ ].freeze
37
+
38
+ # @param index_dir [String] Path to extraction output directory
39
+ # @param config [Configuration] Woods configuration (default: global config)
40
+ # @param client [Client, nil] Unblocked API client (auto-created from config if nil)
41
+ # @param reader [Object, nil] IndexReader instance (auto-created if nil)
42
+ # @param output [IO] Progress output stream (default: $stdout)
43
+ # @raise [ConfigurationError] if required config is missing
44
+ def initialize(index_dir:, config: Woods.configuration, client: nil, reader: nil, output: $stdout)
45
+ @collection_id = config.unblocked_collection_id
46
+ raise ConfigurationError, 'unblocked_collection_id is required' unless @collection_id
47
+
48
+ repo_url = config.unblocked_repo_url
49
+ raise ConfigurationError, 'unblocked_repo_url is required' unless repo_url
50
+
51
+ api_token = config.unblocked_api_token
52
+ raise ConfigurationError, 'unblocked_api_token is required' unless api_token
53
+
54
+ budget = ENV.fetch('UNBLOCKED_DAILY_BUDGET', RateLimiter::DEFAULT_BUDGET).to_i
55
+ limiter = RateLimiter.new(daily_budget: budget)
56
+
57
+ @client = client || Client.new(api_token: api_token, rate_limiter: limiter)
58
+ @reader = reader || build_reader(index_dir)
59
+ @builder = DocumentBuilder.new(repo_url: repo_url)
60
+ @output = output
61
+ end
62
+
63
+ # Sync all configured unit types to the Unblocked collection.
64
+ #
65
+ # @return [Hash] { synced: Integer, skipped: Integer, errors: Array<String> }
66
+ def sync_all
67
+ synced = 0
68
+ skipped = 0
69
+ errors = []
70
+
71
+ FULL_SYNC_TYPES.each do |type|
72
+ result = sync_type(type)
73
+ synced += result[:synced]
74
+ skipped += result[:skipped]
75
+ errors.concat(result[:errors])
76
+ end
77
+
78
+ PARTIAL_SYNC_TYPES.each do |type, max_count|
79
+ result = sync_type_partial(type, max_count)
80
+ synced += result[:synced]
81
+ skipped += result[:skipped]
82
+ errors.concat(result[:errors])
83
+ end
84
+
85
+ { synced: synced, skipped: skipped, errors: cap_errors(errors) }
86
+ end
87
+
88
+ # Sync all units of a given type.
89
+ #
90
+ # @param type [String] Unit type (e.g. "model", "controller")
91
+ # @return [Hash] { synced: Integer, skipped: Integer, errors: Array<String> }
92
+ def sync_type(type)
93
+ units = @reader.list_units(type: type)
94
+ log " #{type}: #{units.size} units"
95
+
96
+ sync_units(units)
97
+ end
98
+
99
+ # Sync the top N most-connected units of a type (by dependent count).
100
+ #
101
+ # @param type [String] Unit type
102
+ # @param max_count [Integer] Maximum units to sync
103
+ # @return [Hash] { synced: Integer, skipped: Integer, errors: Array<String> }
104
+ def sync_type_partial(type, max_count)
105
+ units = @reader.list_units(type: type)
106
+ return empty_stats if units.empty?
107
+
108
+ # Load full data to sort by dependent count
109
+ units_with_data = units.filter_map do |entry|
110
+ data = @reader.find_unit(entry['identifier'])
111
+ next unless data
112
+
113
+ dep_count = (data['dependents'] || []).size
114
+ { entry: entry, data: data, dep_count: dep_count }
115
+ end
116
+
117
+ top_units = units_with_data.sort_by { |u| -u[:dep_count] }.first(max_count)
118
+ skipped_count = [units.size - max_count, 0].max
119
+
120
+ log " #{type}: #{top_units.size}/#{units.size} units (top by dependents)"
121
+
122
+ result = sync_unit_data(top_units.map { |u| [u[:entry], u[:data]] })
123
+ result[:skipped] += skipped_count
124
+ result
125
+ end
126
+
127
+ private
128
+
129
+ def sync_units(units)
130
+ synced = 0
131
+ skipped = 0
132
+ errors = []
133
+
134
+ units.each do |entry|
135
+ unit_data = @reader.find_unit(entry['identifier'])
136
+ unless unit_data
137
+ skipped += 1
138
+ next
139
+ end
140
+
141
+ push_document(unit_data)
142
+ synced += 1
143
+ rescue Woods::Error => e
144
+ errors << "#{entry['identifier']}: #{e.message}"
145
+ break if e.message.include?('daily budget exhausted')
146
+ rescue StandardError => e
147
+ errors << "#{entry['identifier']}: #{e.message}"
148
+ end
149
+
150
+ { synced: synced, skipped: skipped, errors: errors }
151
+ end
152
+
153
+ def sync_unit_data(entries_with_data)
154
+ synced = 0
155
+ skipped = 0
156
+ errors = []
157
+
158
+ entries_with_data.each do |entry, unit_data|
159
+ push_document(unit_data)
160
+ synced += 1
161
+ rescue Woods::Error => e
162
+ errors << "#{entry['identifier']}: #{e.message}"
163
+ break if e.message.include?('daily budget exhausted')
164
+ rescue StandardError => e
165
+ errors << "#{entry['identifier']}: #{e.message}"
166
+ end
167
+
168
+ { synced: synced, skipped: skipped, errors: errors }
169
+ end
170
+
171
+ def push_document(unit_data)
172
+ doc = @builder.build(unit_data)
173
+ @client.put_document(
174
+ collection_id: @collection_id,
175
+ title: doc[:title],
176
+ body: doc[:body],
177
+ uri: doc[:uri]
178
+ )
179
+ end
180
+
181
+ def build_reader(index_dir)
182
+ require_relative '../mcp/index_reader'
183
+ Woods::MCP::IndexReader.new(index_dir)
184
+ end
185
+
186
+ def empty_stats
187
+ { synced: 0, skipped: 0, errors: [] }
188
+ end
189
+
190
+ def cap_errors(errors)
191
+ return errors if errors.size <= MAX_ERRORS
192
+
193
+ errors.first(MAX_ERRORS) + ["... and #{errors.size - MAX_ERRORS} more errors"]
194
+ end
195
+
196
+ def log(message)
197
+ @output&.puts(message)
198
+ end
199
+ end
200
+ end
201
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Woods
4
+ module Unblocked
5
+ # Daily budget-based rate limiter for the Unblocked API (1000 calls/day).
6
+ #
7
+ # Unlike Notion's per-second throttling, Unblocked limits by daily call count.
8
+ # Tracks usage against a configurable budget, warns when approaching the limit,
9
+ # and raises when exhausted.
10
+ #
11
+ # @example
12
+ # limiter = RateLimiter.new(daily_budget: 1000)
13
+ # limiter.track { client.put_document(...) } # => result
14
+ # limiter.remaining # => 999
15
+ #
16
+ class RateLimiter
17
+ DEFAULT_BUDGET = 1000
18
+ WARN_THRESHOLD = 0.8 # Warn at 80% usage
19
+
20
+ # @param daily_budget [Integer] Maximum API calls per day
21
+ # @param warn_io [IO] Where to write warnings (default: $stderr)
22
+ def initialize(daily_budget: DEFAULT_BUDGET, warn_io: $stderr)
23
+ unless daily_budget.is_a?(Integer) && daily_budget.positive?
24
+ raise ArgumentError, 'daily_budget must be positive'
25
+ end
26
+
27
+ @daily_budget = daily_budget
28
+ @calls_today = 0
29
+ @warn_io = warn_io
30
+ @warned = false
31
+ @mutex = Mutex.new
32
+ end
33
+
34
+ # Execute a block, tracking the API call against the daily budget.
35
+ #
36
+ # @yield The API call to execute
37
+ # @return [Object] The block's return value
38
+ # @raise [Woods::Error] if daily budget is exhausted
39
+ def track
40
+ raise ArgumentError, 'block required' unless block_given?
41
+
42
+ @mutex.synchronize do
43
+ if @calls_today >= @daily_budget
44
+ raise Woods::Error,
45
+ "Unblocked API daily budget exhausted (#{@daily_budget} calls). " \
46
+ 'Budget resets at midnight PST. Use UNBLOCKED_DAILY_BUDGET to adjust.'
47
+ end
48
+
49
+ @calls_today += 1
50
+ warn_if_approaching_limit
51
+ end
52
+
53
+ yield
54
+ end
55
+
56
+ # Number of API calls remaining in the daily budget.
57
+ #
58
+ # @return [Integer]
59
+ def remaining
60
+ @daily_budget - @calls_today
61
+ end
62
+
63
+ # Number of API calls used today.
64
+ #
65
+ # @return [Integer]
66
+ def used
67
+ @calls_today
68
+ end
69
+
70
+ # Reset the daily counter (for testing or manual reset).
71
+ #
72
+ # @return [void]
73
+ def reset!
74
+ @mutex.synchronize do
75
+ @calls_today = 0
76
+ @warned = false
77
+ end
78
+ end
79
+
80
+ private
81
+
82
+ def warn_if_approaching_limit
83
+ return if @warned
84
+ return unless @calls_today >= (@daily_budget * WARN_THRESHOLD).to_i
85
+
86
+ @warned = true
87
+ @warn_io&.puts(
88
+ "WARNING: Unblocked API usage at #{@calls_today}/#{@daily_budget} " \
89
+ "(#{remaining} calls remaining)"
90
+ )
91
+ end
92
+ end
93
+ end
94
+ end
data/lib/woods/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Woods
4
- VERSION = '1.0.0'
4
+ VERSION = '1.2.0'
5
5
  end
data/lib/woods.rb CHANGED
@@ -43,6 +43,7 @@ module Woods
43
43
  :session_tracer_enabled, :session_store, :session_id_proc, :session_exclude_paths,
44
44
  :console_mcp_enabled, :console_mcp_path, :console_redacted_columns,
45
45
  :notion_api_token, :notion_database_ids,
46
+ :unblocked_api_token, :unblocked_collection_id, :unblocked_repo_url,
46
47
  :cache_store, :cache_options
47
48
  attr_reader :max_context_tokens, :similarity_threshold, :extractors, :pretty_json, :context_format,
48
49
  :cache_enabled
@@ -70,6 +71,9 @@ module Woods
70
71
  @console_redacted_columns = []
71
72
  @notion_api_token = nil
72
73
  @notion_database_ids = {}
74
+ @unblocked_api_token = nil
75
+ @unblocked_collection_id = nil
76
+ @unblocked_repo_url = nil
73
77
  @cache_enabled = false
74
78
  @cache_store = nil # :redis, :solid_cache, :memory, or a CacheStore instance
75
79
  @cache_options = {} # { redis: client, cache: store, ttl: { embeddings: 86400, ... } }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: woods
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Leah Armstrong
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-03-13 00:00:00.000000000 Z
11
+ date: 2026-03-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mcp
@@ -237,6 +237,10 @@ files:
237
237
  - lib/woods/temporal/json_snapshot_store.rb
238
238
  - lib/woods/temporal/snapshot_store.rb
239
239
  - lib/woods/token_utils.rb
240
+ - lib/woods/unblocked/client.rb
241
+ - lib/woods/unblocked/document_builder.rb
242
+ - lib/woods/unblocked/exporter.rb
243
+ - lib/woods/unblocked/rate_limiter.rb
240
244
  - lib/woods/version.rb
241
245
  homepage: https://github.com/lost-in-the/woods
242
246
  licenses: