woods 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/README.md +175 -2
- data/exe/woods-console-mcp +4 -0
- data/exe/woods-mcp +4 -0
- data/lib/tasks/woods.rake +54 -0
- data/lib/woods/extractors/model_extractor.rb +4 -1
- data/lib/woods/graph_analyzer.rb +211 -0
- data/lib/woods/mcp/renderers/markdown_renderer.rb +61 -0
- data/lib/woods/mcp/server.rb +34 -0
- data/lib/woods/unblocked/client.rb +163 -0
- data/lib/woods/unblocked/document_builder.rb +301 -0
- data/lib/woods/unblocked/exporter.rb +201 -0
- data/lib/woods/unblocked/rate_limiter.rb +94 -0
- data/lib/woods/version.rb +1 -1
- data/lib/woods.rb +4 -0
- metadata +6 -2
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'woods'
|
|
4
|
+
require_relative 'client'
|
|
5
|
+
require_relative 'rate_limiter'
|
|
6
|
+
require_relative 'document_builder'
|
|
7
|
+
|
|
8
|
+
module Woods
|
|
9
|
+
module Unblocked
|
|
10
|
+
# Orchestrates syncing Woods extraction data to an Unblocked collection.
|
|
11
|
+
#
|
|
12
|
+
# Reads extraction output from disk via IndexReader, converts units to
|
|
13
|
+
# condensed Markdown documents, and pushes via the Unblocked Documents API.
|
|
14
|
+
# All syncs are idempotent — documents are upserted by URI.
|
|
15
|
+
#
|
|
16
|
+
# @example
|
|
17
|
+
# exporter = Exporter.new(index_dir: "tmp/woods")
|
|
18
|
+
# stats = exporter.sync_all
|
|
19
|
+
# # => { synced: 940, skipped: 5060, errors: [] }
|
|
20
|
+
#
|
|
21
|
+
class Exporter
|
|
22
|
+
MAX_ERRORS = 100
|
|
23
|
+
|
|
24
|
+
# Unit types to sync, in priority order.
|
|
25
|
+
# All units are synced for these types.
|
|
26
|
+
FULL_SYNC_TYPES = %w[
|
|
27
|
+
model controller service job mailer manager decorator concern serializer
|
|
28
|
+
graphql graphql_type graphql_mutation graphql_resolver graphql_query
|
|
29
|
+
].freeze
|
|
30
|
+
|
|
31
|
+
# Unit types where only the most-connected units are synced.
|
|
32
|
+
# Each entry: [type, max_count]
|
|
33
|
+
PARTIAL_SYNC_TYPES = [
|
|
34
|
+
['poro', 100],
|
|
35
|
+
['lib', 50]
|
|
36
|
+
].freeze
|
|
37
|
+
|
|
38
|
+
# @param index_dir [String] Path to extraction output directory
|
|
39
|
+
# @param config [Configuration] Woods configuration (default: global config)
|
|
40
|
+
# @param client [Client, nil] Unblocked API client (auto-created from config if nil)
|
|
41
|
+
# @param reader [Object, nil] IndexReader instance (auto-created if nil)
|
|
42
|
+
# @param output [IO] Progress output stream (default: $stdout)
|
|
43
|
+
# @raise [ConfigurationError] if required config is missing
|
|
44
|
+
def initialize(index_dir:, config: Woods.configuration, client: nil, reader: nil, output: $stdout)
|
|
45
|
+
@collection_id = config.unblocked_collection_id
|
|
46
|
+
raise ConfigurationError, 'unblocked_collection_id is required' unless @collection_id
|
|
47
|
+
|
|
48
|
+
repo_url = config.unblocked_repo_url
|
|
49
|
+
raise ConfigurationError, 'unblocked_repo_url is required' unless repo_url
|
|
50
|
+
|
|
51
|
+
api_token = config.unblocked_api_token
|
|
52
|
+
raise ConfigurationError, 'unblocked_api_token is required' unless api_token
|
|
53
|
+
|
|
54
|
+
budget = ENV.fetch('UNBLOCKED_DAILY_BUDGET', RateLimiter::DEFAULT_BUDGET).to_i
|
|
55
|
+
limiter = RateLimiter.new(daily_budget: budget)
|
|
56
|
+
|
|
57
|
+
@client = client || Client.new(api_token: api_token, rate_limiter: limiter)
|
|
58
|
+
@reader = reader || build_reader(index_dir)
|
|
59
|
+
@builder = DocumentBuilder.new(repo_url: repo_url)
|
|
60
|
+
@output = output
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Sync all configured unit types to the Unblocked collection.
|
|
64
|
+
#
|
|
65
|
+
# @return [Hash] { synced: Integer, skipped: Integer, errors: Array<String> }
|
|
66
|
+
def sync_all
|
|
67
|
+
synced = 0
|
|
68
|
+
skipped = 0
|
|
69
|
+
errors = []
|
|
70
|
+
|
|
71
|
+
FULL_SYNC_TYPES.each do |type|
|
|
72
|
+
result = sync_type(type)
|
|
73
|
+
synced += result[:synced]
|
|
74
|
+
skipped += result[:skipped]
|
|
75
|
+
errors.concat(result[:errors])
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
PARTIAL_SYNC_TYPES.each do |type, max_count|
|
|
79
|
+
result = sync_type_partial(type, max_count)
|
|
80
|
+
synced += result[:synced]
|
|
81
|
+
skipped += result[:skipped]
|
|
82
|
+
errors.concat(result[:errors])
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
{ synced: synced, skipped: skipped, errors: cap_errors(errors) }
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Sync all units of a given type.
|
|
89
|
+
#
|
|
90
|
+
# @param type [String] Unit type (e.g. "model", "controller")
|
|
91
|
+
# @return [Hash] { synced: Integer, skipped: Integer, errors: Array<String> }
|
|
92
|
+
def sync_type(type)
|
|
93
|
+
units = @reader.list_units(type: type)
|
|
94
|
+
log " #{type}: #{units.size} units"
|
|
95
|
+
|
|
96
|
+
sync_units(units)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Sync the top N most-connected units of a type (by dependent count).
|
|
100
|
+
#
|
|
101
|
+
# @param type [String] Unit type
|
|
102
|
+
# @param max_count [Integer] Maximum units to sync
|
|
103
|
+
# @return [Hash] { synced: Integer, skipped: Integer, errors: Array<String> }
|
|
104
|
+
def sync_type_partial(type, max_count)
|
|
105
|
+
units = @reader.list_units(type: type)
|
|
106
|
+
return empty_stats if units.empty?
|
|
107
|
+
|
|
108
|
+
# Load full data to sort by dependent count
|
|
109
|
+
units_with_data = units.filter_map do |entry|
|
|
110
|
+
data = @reader.find_unit(entry['identifier'])
|
|
111
|
+
next unless data
|
|
112
|
+
|
|
113
|
+
dep_count = (data['dependents'] || []).size
|
|
114
|
+
{ entry: entry, data: data, dep_count: dep_count }
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
top_units = units_with_data.sort_by { |u| -u[:dep_count] }.first(max_count)
|
|
118
|
+
skipped_count = [units.size - max_count, 0].max
|
|
119
|
+
|
|
120
|
+
log " #{type}: #{top_units.size}/#{units.size} units (top by dependents)"
|
|
121
|
+
|
|
122
|
+
result = sync_unit_data(top_units.map { |u| [u[:entry], u[:data]] })
|
|
123
|
+
result[:skipped] += skipped_count
|
|
124
|
+
result
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
private
|
|
128
|
+
|
|
129
|
+
def sync_units(units)
|
|
130
|
+
synced = 0
|
|
131
|
+
skipped = 0
|
|
132
|
+
errors = []
|
|
133
|
+
|
|
134
|
+
units.each do |entry|
|
|
135
|
+
unit_data = @reader.find_unit(entry['identifier'])
|
|
136
|
+
unless unit_data
|
|
137
|
+
skipped += 1
|
|
138
|
+
next
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
push_document(unit_data)
|
|
142
|
+
synced += 1
|
|
143
|
+
rescue Woods::Error => e
|
|
144
|
+
errors << "#{entry['identifier']}: #{e.message}"
|
|
145
|
+
break if e.message.include?('daily budget exhausted')
|
|
146
|
+
rescue StandardError => e
|
|
147
|
+
errors << "#{entry['identifier']}: #{e.message}"
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
{ synced: synced, skipped: skipped, errors: errors }
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def sync_unit_data(entries_with_data)
|
|
154
|
+
synced = 0
|
|
155
|
+
skipped = 0
|
|
156
|
+
errors = []
|
|
157
|
+
|
|
158
|
+
entries_with_data.each do |entry, unit_data|
|
|
159
|
+
push_document(unit_data)
|
|
160
|
+
synced += 1
|
|
161
|
+
rescue Woods::Error => e
|
|
162
|
+
errors << "#{entry['identifier']}: #{e.message}"
|
|
163
|
+
break if e.message.include?('daily budget exhausted')
|
|
164
|
+
rescue StandardError => e
|
|
165
|
+
errors << "#{entry['identifier']}: #{e.message}"
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
{ synced: synced, skipped: skipped, errors: errors }
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def push_document(unit_data)
|
|
172
|
+
doc = @builder.build(unit_data)
|
|
173
|
+
@client.put_document(
|
|
174
|
+
collection_id: @collection_id,
|
|
175
|
+
title: doc[:title],
|
|
176
|
+
body: doc[:body],
|
|
177
|
+
uri: doc[:uri]
|
|
178
|
+
)
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def build_reader(index_dir)
|
|
182
|
+
require_relative '../mcp/index_reader'
|
|
183
|
+
Woods::MCP::IndexReader.new(index_dir)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def empty_stats
|
|
187
|
+
{ synced: 0, skipped: 0, errors: [] }
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def cap_errors(errors)
|
|
191
|
+
return errors if errors.size <= MAX_ERRORS
|
|
192
|
+
|
|
193
|
+
errors.first(MAX_ERRORS) + ["... and #{errors.size - MAX_ERRORS} more errors"]
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def log(message)
|
|
197
|
+
@output&.puts(message)
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Woods
|
|
4
|
+
module Unblocked
|
|
5
|
+
# Daily budget-based rate limiter for the Unblocked API (1000 calls/day).
|
|
6
|
+
#
|
|
7
|
+
# Unlike Notion's per-second throttling, Unblocked limits by daily call count.
|
|
8
|
+
# Tracks usage against a configurable budget, warns when approaching the limit,
|
|
9
|
+
# and raises when exhausted.
|
|
10
|
+
#
|
|
11
|
+
# @example
|
|
12
|
+
# limiter = RateLimiter.new(daily_budget: 1000)
|
|
13
|
+
# limiter.track { client.put_document(...) } # => result
|
|
14
|
+
# limiter.remaining # => 999
|
|
15
|
+
#
|
|
16
|
+
class RateLimiter
|
|
17
|
+
DEFAULT_BUDGET = 1000
|
|
18
|
+
WARN_THRESHOLD = 0.8 # Warn at 80% usage
|
|
19
|
+
|
|
20
|
+
# @param daily_budget [Integer] Maximum API calls per day
|
|
21
|
+
# @param warn_io [IO] Where to write warnings (default: $stderr)
|
|
22
|
+
def initialize(daily_budget: DEFAULT_BUDGET, warn_io: $stderr)
|
|
23
|
+
unless daily_budget.is_a?(Integer) && daily_budget.positive?
|
|
24
|
+
raise ArgumentError, 'daily_budget must be positive'
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
@daily_budget = daily_budget
|
|
28
|
+
@calls_today = 0
|
|
29
|
+
@warn_io = warn_io
|
|
30
|
+
@warned = false
|
|
31
|
+
@mutex = Mutex.new
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Execute a block, tracking the API call against the daily budget.
|
|
35
|
+
#
|
|
36
|
+
# @yield The API call to execute
|
|
37
|
+
# @return [Object] The block's return value
|
|
38
|
+
# @raise [Woods::Error] if daily budget is exhausted
|
|
39
|
+
def track
|
|
40
|
+
raise ArgumentError, 'block required' unless block_given?
|
|
41
|
+
|
|
42
|
+
@mutex.synchronize do
|
|
43
|
+
if @calls_today >= @daily_budget
|
|
44
|
+
raise Woods::Error,
|
|
45
|
+
"Unblocked API daily budget exhausted (#{@daily_budget} calls). " \
|
|
46
|
+
'Budget resets at midnight PST. Use UNBLOCKED_DAILY_BUDGET to adjust.'
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
@calls_today += 1
|
|
50
|
+
warn_if_approaching_limit
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
yield
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Number of API calls remaining in the daily budget.
|
|
57
|
+
#
|
|
58
|
+
# @return [Integer]
|
|
59
|
+
def remaining
|
|
60
|
+
@daily_budget - @calls_today
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Number of API calls used today.
|
|
64
|
+
#
|
|
65
|
+
# @return [Integer]
|
|
66
|
+
def used
|
|
67
|
+
@calls_today
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Reset the daily counter (for testing or manual reset).
|
|
71
|
+
#
|
|
72
|
+
# @return [void]
|
|
73
|
+
def reset!
|
|
74
|
+
@mutex.synchronize do
|
|
75
|
+
@calls_today = 0
|
|
76
|
+
@warned = false
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
def warn_if_approaching_limit
|
|
83
|
+
return if @warned
|
|
84
|
+
return unless @calls_today >= (@daily_budget * WARN_THRESHOLD).to_i
|
|
85
|
+
|
|
86
|
+
@warned = true
|
|
87
|
+
@warn_io&.puts(
|
|
88
|
+
"WARNING: Unblocked API usage at #{@calls_today}/#{@daily_budget} " \
|
|
89
|
+
"(#{remaining} calls remaining)"
|
|
90
|
+
)
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
data/lib/woods/version.rb
CHANGED
data/lib/woods.rb
CHANGED
|
@@ -43,6 +43,7 @@ module Woods
|
|
|
43
43
|
:session_tracer_enabled, :session_store, :session_id_proc, :session_exclude_paths,
|
|
44
44
|
:console_mcp_enabled, :console_mcp_path, :console_redacted_columns,
|
|
45
45
|
:notion_api_token, :notion_database_ids,
|
|
46
|
+
:unblocked_api_token, :unblocked_collection_id, :unblocked_repo_url,
|
|
46
47
|
:cache_store, :cache_options
|
|
47
48
|
attr_reader :max_context_tokens, :similarity_threshold, :extractors, :pretty_json, :context_format,
|
|
48
49
|
:cache_enabled
|
|
@@ -70,6 +71,9 @@ module Woods
|
|
|
70
71
|
@console_redacted_columns = []
|
|
71
72
|
@notion_api_token = nil
|
|
72
73
|
@notion_database_ids = {}
|
|
74
|
+
@unblocked_api_token = nil
|
|
75
|
+
@unblocked_collection_id = nil
|
|
76
|
+
@unblocked_repo_url = nil
|
|
73
77
|
@cache_enabled = false
|
|
74
78
|
@cache_store = nil # :redis, :solid_cache, :memory, or a CacheStore instance
|
|
75
79
|
@cache_options = {} # { redis: client, cache: store, ttl: { embeddings: 86400, ... } }
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: woods
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Leah Armstrong
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-03-
|
|
11
|
+
date: 2026-03-27 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: mcp
|
|
@@ -237,6 +237,10 @@ files:
|
|
|
237
237
|
- lib/woods/temporal/json_snapshot_store.rb
|
|
238
238
|
- lib/woods/temporal/snapshot_store.rb
|
|
239
239
|
- lib/woods/token_utils.rb
|
|
240
|
+
- lib/woods/unblocked/client.rb
|
|
241
|
+
- lib/woods/unblocked/document_builder.rb
|
|
242
|
+
- lib/woods/unblocked/exporter.rb
|
|
243
|
+
- lib/woods/unblocked/rate_limiter.rb
|
|
240
244
|
- lib/woods/version.rb
|
|
241
245
|
homepage: https://github.com/lost-in-the/woods
|
|
242
246
|
licenses:
|