woods 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +169 -0
- data/README.md +20 -8
- data/exe/woods-console +51 -6
- data/exe/woods-console-mcp +24 -4
- data/exe/woods-mcp +30 -7
- data/exe/woods-mcp-http +47 -6
- data/lib/generators/woods/install_generator.rb +13 -4
- data/lib/generators/woods/templates/woods.rb.tt +155 -0
- data/lib/tasks/woods.rake +15 -50
- data/lib/woods/builder.rb +174 -9
- data/lib/woods/cache/cache_middleware.rb +360 -31
- data/lib/woods/chunking/semantic_chunker.rb +334 -7
- data/lib/woods/console/adapters/job_adapter.rb +10 -4
- data/lib/woods/console/audit_logger.rb +76 -4
- data/lib/woods/console/bridge.rb +48 -15
- data/lib/woods/console/bridge_protocol.rb +44 -0
- data/lib/woods/console/confirmation.rb +3 -4
- data/lib/woods/console/console_response_renderer.rb +56 -18
- data/lib/woods/console/credential_index.rb +201 -0
- data/lib/woods/console/credential_scanner.rb +302 -0
- data/lib/woods/console/dispatch_pipeline.rb +138 -0
- data/lib/woods/console/embedded_executor.rb +682 -35
- data/lib/woods/console/eval_guard.rb +319 -0
- data/lib/woods/console/model_validator.rb +1 -3
- data/lib/woods/console/rack_middleware.rb +185 -29
- data/lib/woods/console/redactor.rb +161 -0
- data/lib/woods/console/response_context.rb +127 -0
- data/lib/woods/console/safe_context.rb +220 -23
- data/lib/woods/console/scope_predicate_parser.rb +131 -0
- data/lib/woods/console/server.rb +417 -486
- data/lib/woods/console/sql_noise_stripper.rb +87 -0
- data/lib/woods/console/sql_table_scanner.rb +213 -0
- data/lib/woods/console/sql_validator.rb +81 -31
- data/lib/woods/console/table_gate.rb +93 -0
- data/lib/woods/console/tool_specs.rb +552 -0
- data/lib/woods/console/tools/tier1.rb +3 -3
- data/lib/woods/console/tools/tier4.rb +7 -1
- data/lib/woods/dependency_graph.rb +66 -7
- data/lib/woods/embedding/indexer.rb +190 -6
- data/lib/woods/embedding/openai.rb +40 -4
- data/lib/woods/embedding/provider.rb +104 -8
- data/lib/woods/embedding/text_preparer.rb +23 -3
- data/lib/woods/embedding/token_counter.rb +133 -0
- data/lib/woods/evaluation/baseline_runner.rb +20 -2
- data/lib/woods/evaluation/metrics.rb +4 -1
- data/lib/woods/extracted_unit.rb +1 -0
- data/lib/woods/extractor.rb +7 -1
- data/lib/woods/extractors/controller_extractor.rb +6 -0
- data/lib/woods/extractors/mailer_extractor.rb +16 -2
- data/lib/woods/extractors/model_extractor.rb +6 -1
- data/lib/woods/extractors/phlex_extractor.rb +13 -4
- data/lib/woods/extractors/rails_source_extractor.rb +2 -0
- data/lib/woods/extractors/route_helper_resolver.rb +130 -0
- data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
- data/lib/woods/extractors/view_component_extractor.rb +12 -1
- data/lib/woods/extractors/view_engines/base.rb +141 -0
- data/lib/woods/extractors/view_engines/erb.rb +145 -0
- data/lib/woods/extractors/view_template_extractor.rb +92 -133
- data/lib/woods/flow_assembler.rb +23 -15
- data/lib/woods/flow_precomputer.rb +21 -2
- data/lib/woods/graph_analyzer.rb +3 -4
- data/lib/woods/index_artifact.rb +173 -0
- data/lib/woods/mcp/bearer_auth.rb +45 -0
- data/lib/woods/mcp/bootstrap_state.rb +94 -0
- data/lib/woods/mcp/bootstrapper.rb +337 -16
- data/lib/woods/mcp/config_resolver.rb +288 -0
- data/lib/woods/mcp/errors.rb +134 -0
- data/lib/woods/mcp/index_reader.rb +265 -30
- data/lib/woods/mcp/origin_guard.rb +132 -0
- data/lib/woods/mcp/provider_probe.rb +166 -0
- data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
- data/lib/woods/mcp/renderers/markdown_renderer.rb +39 -3
- data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
- data/lib/woods/mcp/server.rb +737 -137
- data/lib/woods/model_name_cache.rb +78 -2
- data/lib/woods/notion/client.rb +25 -2
- data/lib/woods/notion/mappers/model_mapper.rb +36 -2
- data/lib/woods/railtie.rb +55 -15
- data/lib/woods/resilience/circuit_breaker.rb +9 -2
- data/lib/woods/resilience/retryable_provider.rb +40 -3
- data/lib/woods/resolved_config.rb +299 -0
- data/lib/woods/retrieval/context_assembler.rb +112 -5
- data/lib/woods/retrieval/query_classifier.rb +1 -1
- data/lib/woods/retrieval/ranker.rb +55 -6
- data/lib/woods/retrieval/search_executor.rb +42 -13
- data/lib/woods/retriever.rb +330 -24
- data/lib/woods/session_tracer/middleware.rb +35 -1
- data/lib/woods/storage/graph_store.rb +39 -0
- data/lib/woods/storage/inapplicable_backend.rb +14 -0
- data/lib/woods/storage/metadata_store.rb +129 -1
- data/lib/woods/storage/pgvector.rb +70 -8
- data/lib/woods/storage/qdrant.rb +196 -5
- data/lib/woods/storage/snapshotter/metadata.rb +172 -0
- data/lib/woods/storage/snapshotter/vector.rb +238 -0
- data/lib/woods/storage/snapshotter.rb +24 -0
- data/lib/woods/storage/vector_store.rb +184 -35
- data/lib/woods/tasks.rb +85 -0
- data/lib/woods/temporal/snapshot_store.rb +49 -1
- data/lib/woods/token_utils.rb +44 -5
- data/lib/woods/unblocked/client.rb +1 -1
- data/lib/woods/unblocked/document_builder.rb +35 -10
- data/lib/woods/unblocked/exporter.rb +1 -1
- data/lib/woods/util/host_guard.rb +61 -0
- data/lib/woods/version.rb +1 -1
- data/lib/woods.rb +126 -6
- metadata +69 -4
|
@@ -7,12 +7,25 @@ module Woods
|
|
|
7
7
|
# Avoids O(n*m) per-extractor iteration of ActiveRecord::Base.descendants.
|
|
8
8
|
# Invalidated per extraction run (call .reset! before a new run).
|
|
9
9
|
#
|
|
10
|
+
# Provides two resolution layers:
|
|
11
|
+
# 1. {.model_names_regex} — whole-word match against every fully-qualified
|
|
12
|
+
# model name. Catches `User`, `Library::Book`, and `"Library::Book"`
|
|
13
|
+
# (as a string literal) because `\b` treats `:` and `"` as boundaries.
|
|
14
|
+
# 2. {.resolve_short_name} — when source references the bare inner name
|
|
15
|
+
# (e.g. `Book.new` inside `module Library`), resolve it back to its
|
|
16
|
+
# fully-qualified owner when the short name is unambiguous. Needed
|
|
17
|
+
# because the cache holds `Library::Book` but the source writes
|
|
18
|
+
# `Book` after a `module Library` opens.
|
|
19
|
+
#
|
|
10
20
|
# @example
|
|
11
21
|
# Woods::ModelNameCache.model_names
|
|
12
|
-
# # => ["User", "
|
|
22
|
+
# # => ["User", "Library::Book", ...]
|
|
13
23
|
#
|
|
14
24
|
# Woods::ModelNameCache.model_names_regex
|
|
15
|
-
# # => /\b(?:User|
|
|
25
|
+
# # => /\b(?:User|Library::Book|...)\b/
|
|
26
|
+
#
|
|
27
|
+
# Woods::ModelNameCache.resolve_short_name("Book")
|
|
28
|
+
# # => "Library::Book" (or nil when ambiguous)
|
|
16
29
|
#
|
|
17
30
|
module ModelNameCache
|
|
18
31
|
class << self
|
|
@@ -26,10 +39,40 @@ module Woods
|
|
|
26
39
|
@model_names_regex ||= build_regex
|
|
27
40
|
end
|
|
28
41
|
|
|
42
|
+
# Short-name → fully-qualified owner mapping. Ambiguous short names
|
|
43
|
+
# (two different models sharing the same inner name) map to nil so
|
|
44
|
+
# callers can detect the collision and skip the edge rather than
|
|
45
|
+
# guess.
|
|
46
|
+
#
|
|
47
|
+
# @return [Hash{String => String, nil}]
|
|
48
|
+
def short_name_map
|
|
49
|
+
@short_name_map ||= build_short_name_map
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Resolve a bare short name (e.g. `Book`) to its fully-qualified
|
|
53
|
+
# owner (`Library::Book`) when unambiguous. Returns nil otherwise.
|
|
54
|
+
#
|
|
55
|
+
# @param short [String]
|
|
56
|
+
# @return [String, nil]
|
|
57
|
+
def resolve_short_name(short)
|
|
58
|
+
short_name_map[short.to_s]
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Regex matching bare short names of namespaced models. Used by the
|
|
62
|
+
# dependency scanner to surface references like `Book.new`
|
|
63
|
+
# inside the `Library` module, which the full-name regex misses.
|
|
64
|
+
#
|
|
65
|
+
# @return [Regexp]
|
|
66
|
+
def short_names_regex
|
|
67
|
+
@short_names_regex ||= build_short_names_regex
|
|
68
|
+
end
|
|
69
|
+
|
|
29
70
|
# Clear cache (call at the start of each extraction run)
|
|
30
71
|
def reset!
|
|
31
72
|
@model_names = nil
|
|
32
73
|
@model_names_regex = nil
|
|
74
|
+
@short_name_map = nil
|
|
75
|
+
@short_names_regex = nil
|
|
33
76
|
end
|
|
34
77
|
|
|
35
78
|
private
|
|
@@ -46,6 +89,39 @@ module Woods
|
|
|
46
89
|
|
|
47
90
|
/\b(?:#{names.map { |n| Regexp.escape(n) }.join('|')})\b/
|
|
48
91
|
end
|
|
92
|
+
|
|
93
|
+
# Build short-name → full-name mapping. A short name that appears on
|
|
94
|
+
# multiple fully-qualified models resolves to nil so ambiguity bubbles
|
|
95
|
+
# up (instead of silently picking one). Bare top-level names
|
|
96
|
+
# (no `::`) map to themselves.
|
|
97
|
+
def build_short_name_map
|
|
98
|
+
map = {}
|
|
99
|
+
model_names.each do |full|
|
|
100
|
+
short = full.split('::').last
|
|
101
|
+
map[short] = if map.key?(short) && map[short] != full
|
|
102
|
+
nil # mark ambiguous
|
|
103
|
+
else
|
|
104
|
+
full
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
map
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def build_short_names_regex
|
|
111
|
+
unambiguous = short_name_map.select { |short, full| full && short != full }.keys
|
|
112
|
+
return /(?!)/ if unambiguous.empty?
|
|
113
|
+
|
|
114
|
+
# Match the short name only when:
|
|
115
|
+
# - NOT preceded by `::`, `.`, or another word char (avoids
|
|
116
|
+
# double-counting the full-name hit + rejects `RareBook`).
|
|
117
|
+
# - Followed by a recognisable constant-use context: method call
|
|
118
|
+
# (`.` / `(`), namespace (`::`), list boundary (`,` / `)` / `]`),
|
|
119
|
+
# or end-of-line. This filters out mentions inside sentences
|
|
120
|
+
# (" ... update Book later") and inside string literals
|
|
121
|
+
# that lack a follow-up method call (`"Book"` alone).
|
|
122
|
+
names = unambiguous.map { |n| Regexp.escape(n) }.join('|')
|
|
123
|
+
/(?<![:.\w])(?:#{names})\b(?=\s*(?:\.|::|\(|,|\)|\]|=(?!=)|$))/
|
|
124
|
+
end
|
|
49
125
|
end
|
|
50
126
|
end
|
|
51
127
|
end
|
data/lib/woods/notion/client.rb
CHANGED
|
@@ -146,6 +146,11 @@ module Woods
|
|
|
146
146
|
|
|
147
147
|
# Execute HTTP with rate limiting and network error retry.
|
|
148
148
|
#
|
|
149
|
+
# Any message from an underlying network error is run through
|
|
150
|
+
# {#redact_token} before being re-raised — a malformed reflected
|
|
151
|
+
# URL or request dump from the stdlib must not leak the bearer
|
|
152
|
+
# token into logs or backtraces.
|
|
153
|
+
#
|
|
149
154
|
# @return [Net::HTTPResponse]
|
|
150
155
|
# @raise [Woods::Error] on persistent network failures
|
|
151
156
|
def execute_with_retry(method, path, body)
|
|
@@ -154,7 +159,10 @@ module Woods
|
|
|
154
159
|
@rate_limiter.throttle { execute_http(method, path, body) }
|
|
155
160
|
rescue Net::OpenTimeout, Net::ReadTimeout, Errno::ECONNRESET, Errno::ECONNREFUSED => e
|
|
156
161
|
attempts += 1
|
|
157
|
-
|
|
162
|
+
if attempts >= MAX_RETRIES
|
|
163
|
+
raise Woods::Error,
|
|
164
|
+
"Network error after #{attempts} retries: #{redact_token(e.message)}"
|
|
165
|
+
end
|
|
158
166
|
|
|
159
167
|
sleep(2**attempts)
|
|
160
168
|
retry
|
|
@@ -162,6 +170,9 @@ module Woods
|
|
|
162
170
|
end
|
|
163
171
|
|
|
164
172
|
# Raise a descriptive error from a non-success Notion response.
|
|
173
|
+
# The response body is scrubbed before being formatted into the
|
|
174
|
+
# exception — if the Notion API ever echoes back a header (or a
|
|
175
|
+
# proxy does), the bearer token must not surface here.
|
|
165
176
|
#
|
|
166
177
|
# @raise [Woods::Error]
|
|
167
178
|
def raise_api_error(response)
|
|
@@ -171,7 +182,19 @@ module Woods
|
|
|
171
182
|
{ 'message' => "Unparseable response body: #{response.body&.slice(0, 200)}" }
|
|
172
183
|
end
|
|
173
184
|
message = parsed['message'] || 'Unknown error'
|
|
174
|
-
raise Woods::Error,
|
|
185
|
+
raise Woods::Error,
|
|
186
|
+
"Notion API error #{response.code}: #{redact_token(message)}"
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Replace every occurrence of the bearer token with `[REDACTED]`.
|
|
190
|
+
# Defense in depth — no exception message emitted by this client
|
|
191
|
+
# should carry the secret even if a future code path embeds the
|
|
192
|
+
# request headers verbatim.
|
|
193
|
+
def redact_token(message)
|
|
194
|
+
return message if message.nil? || message.empty?
|
|
195
|
+
return message if @api_token.nil? || @api_token.empty?
|
|
196
|
+
|
|
197
|
+
message.to_s.gsub(@api_token, '[REDACTED]')
|
|
175
198
|
end
|
|
176
199
|
|
|
177
200
|
# Perform the raw HTTP request.
|
|
@@ -15,7 +15,7 @@ module Woods
|
|
|
15
15
|
# properties = mapper.map(unit_data)
|
|
16
16
|
# client.create_page(database_id: db_id, properties: properties)
|
|
17
17
|
#
|
|
18
|
-
class ModelMapper
|
|
18
|
+
class ModelMapper # rubocop:disable Metrics/ClassLength
|
|
19
19
|
include Shared
|
|
20
20
|
|
|
21
21
|
# Map a model unit to Notion Data Models page properties.
|
|
@@ -66,6 +66,16 @@ module Woods
|
|
|
66
66
|
metadata['column_count'] || (metadata['columns'] || []).size
|
|
67
67
|
end
|
|
68
68
|
|
|
69
|
+
# Extract the leading comment block from a model file, redacting
|
|
70
|
+
# any credential-shaped content before shipping it to Notion.
|
|
71
|
+
#
|
|
72
|
+
# Model header comments occasionally contain sample API keys,
|
|
73
|
+
# integration URLs with embedded passwords, or TODO references to
|
|
74
|
+
# internal secrets. Without redaction those land verbatim in a
|
|
75
|
+
# third-party SaaS database. This uses the same {CredentialScanner}
|
|
76
|
+
# that protects the Console MCP so Notion export inherits the same
|
|
77
|
+
# defenses.
|
|
78
|
+
#
|
|
69
79
|
# @return [String]
|
|
70
80
|
def extract_description(source_code)
|
|
71
81
|
return '' unless source_code
|
|
@@ -80,7 +90,31 @@ module Woods
|
|
|
80
90
|
end
|
|
81
91
|
end
|
|
82
92
|
|
|
83
|
-
|
|
93
|
+
return '' if comment_lines.empty?
|
|
94
|
+
|
|
95
|
+
raw = comment_lines.join(' ').strip
|
|
96
|
+
redact_credentials(raw)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def redact_credentials(text)
|
|
100
|
+
return text if text.empty?
|
|
101
|
+
|
|
102
|
+
# CredentialScanner#scan returns `[redacted_value, match_counts]`.
|
|
103
|
+
# Unpack the tuple — returning the whole Array would serialize to
|
|
104
|
+
# Notion as a stringified `["text...", {}]` blob.
|
|
105
|
+
redacted, _counts = scanner.scan(text)
|
|
106
|
+
redacted
|
|
107
|
+
rescue StandardError
|
|
108
|
+
# Scanner construction or scan failure — fail closed: return an
|
|
109
|
+
# empty description rather than risk leaking anything.
|
|
110
|
+
''
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def scanner
|
|
114
|
+
@scanner ||= begin
|
|
115
|
+
require 'woods/console/credential_scanner'
|
|
116
|
+
Woods::Console::CredentialScanner.new
|
|
117
|
+
end
|
|
84
118
|
end
|
|
85
119
|
|
|
86
120
|
# @return [String]
|
data/lib/woods/railtie.rb
CHANGED
|
@@ -11,28 +11,68 @@ module Woods
|
|
|
11
11
|
|
|
12
12
|
initializer 'woods.session_tracer' do |app|
|
|
13
13
|
config = Woods.configuration
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
14
|
+
next unless config.session_tracer_enabled
|
|
15
|
+
|
|
16
|
+
if defined?(Rails) && Rails.env.production? && !config.session_tracer_allow_production
|
|
17
|
+
msg = '[Woods] session tracer disabled in production; ' \
|
|
18
|
+
'set `session_tracer_allow_production = true` to opt in.'
|
|
19
|
+
if defined?(Rails.logger) && Rails.logger
|
|
20
|
+
Rails.logger.warn(msg)
|
|
21
|
+
else
|
|
22
|
+
warn msg
|
|
23
|
+
end
|
|
24
|
+
next
|
|
23
25
|
end
|
|
26
|
+
|
|
27
|
+
require 'woods/session_tracer/middleware'
|
|
28
|
+
|
|
29
|
+
app.middleware.use(
|
|
30
|
+
Woods::SessionTracer::Middleware,
|
|
31
|
+
store: config.session_store,
|
|
32
|
+
session_id_proc: config.session_id_proc,
|
|
33
|
+
exclude_paths: config.session_exclude_paths
|
|
34
|
+
)
|
|
24
35
|
end
|
|
25
36
|
|
|
26
37
|
initializer 'woods.console_mcp' do |app|
|
|
27
38
|
config = Woods.configuration
|
|
28
|
-
|
|
29
|
-
|
|
39
|
+
next unless config.console_mcp_enabled
|
|
40
|
+
|
|
41
|
+
require 'woods/console/rack_middleware'
|
|
42
|
+
require 'woods/mcp/bearer_auth'
|
|
43
|
+
require 'woods/mcp/origin_guard'
|
|
30
44
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
45
|
+
token = config.console_mcp_token
|
|
46
|
+
production = defined?(Rails) && Rails.env.production?
|
|
47
|
+
token_missing = token.nil? || token.to_s.empty?
|
|
48
|
+
|
|
49
|
+
if token_missing
|
|
50
|
+
msg = '[Woods Console] console_mcp_token is not set — Console MCP is a high-privilege ' \
|
|
51
|
+
'endpoint that runs SQL and model introspection against the live database. ' \
|
|
52
|
+
'Set Woods.configuration.console_mcp_token (or WOODS_CONSOLE_MCP_TOKEN env var) ' \
|
|
53
|
+
'to a 32+ character random string.'
|
|
54
|
+
raise Woods::ConfigurationError, msg if production
|
|
55
|
+
|
|
56
|
+
# Non-prod without a token: refuse to wire the middleware at all.
|
|
57
|
+
# Earlier iterations fell through and installed the RackMiddleware
|
|
58
|
+
# with ZERO auth/origin guard in front of it — a binding on 0.0.0.0
|
|
59
|
+
# (common in devcontainers/docker-compose) would expose an
|
|
60
|
+
# unauthenticated SQL-bearing endpoint to every local process.
|
|
61
|
+
# Fail-closed: warn and skip.
|
|
62
|
+
warn "#{msg} Refusing to mount the Console MCP middleware until a token is configured."
|
|
63
|
+
next
|
|
35
64
|
end
|
|
65
|
+
|
|
66
|
+
# Origin guard first — rejects cross-origin POSTs before any auth cost.
|
|
67
|
+
# BearerAuth next — requires `Authorization: Bearer <token>` on every request.
|
|
68
|
+
app.middleware.use(Woods::MCP::OriginGuard, allowed_origins: Array(config.console_mcp_allowed_origins))
|
|
69
|
+
app.middleware.use(Woods::MCP::BearerAuth, token: token)
|
|
70
|
+
|
|
71
|
+
app.middleware.use(
|
|
72
|
+
Woods::Console::RackMiddleware,
|
|
73
|
+
path: config.console_mcp_path,
|
|
74
|
+
embedded_read_tools: config.console_embedded_read_tools
|
|
75
|
+
)
|
|
36
76
|
end
|
|
37
77
|
end
|
|
38
78
|
end
|
|
@@ -56,7 +56,7 @@ module Woods
|
|
|
56
56
|
@mutex.synchronize do
|
|
57
57
|
case @state
|
|
58
58
|
when :open
|
|
59
|
-
unless
|
|
59
|
+
unless monotonic_now - @last_failure_time >= @reset_timeout
|
|
60
60
|
raise CircuitOpenError, "Circuit breaker is open (#{@failure_count} failures)"
|
|
61
61
|
end
|
|
62
62
|
|
|
@@ -81,10 +81,17 @@ module Woods
|
|
|
81
81
|
|
|
82
82
|
private
|
|
83
83
|
|
|
84
|
+
# Monotonic clock reading — immune to NTP slews and DST adjustments.
|
|
85
|
+
#
|
|
86
|
+
# @return [Float] seconds from an unspecified epoch.
|
|
87
|
+
def monotonic_now
|
|
88
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
89
|
+
end
|
|
90
|
+
|
|
84
91
|
# Record a failure and potentially open the circuit.
|
|
85
92
|
def record_failure
|
|
86
93
|
@failure_count += 1
|
|
87
|
-
@last_failure_time =
|
|
94
|
+
@last_failure_time = monotonic_now
|
|
88
95
|
@state = :open if @failure_count >= @threshold
|
|
89
96
|
end
|
|
90
97
|
|
|
@@ -69,29 +69,66 @@ module Woods
|
|
|
69
69
|
@provider.model_name
|
|
70
70
|
end
|
|
71
71
|
|
|
72
|
+
# Delegate the per-provider input cap. The retry wrapper does not
|
|
73
|
+
# change the provider's budget, so just hand through whatever the
|
|
74
|
+
# inner provider reports. Without this, `respond_to?` returns true
|
|
75
|
+
# via Interface but the call raises NotImplementedError.
|
|
76
|
+
#
|
|
77
|
+
# @return [Integer, nil]
|
|
78
|
+
def max_input_tokens
|
|
79
|
+
return @provider.max_input_tokens if @provider.respond_to?(:max_input_tokens)
|
|
80
|
+
|
|
81
|
+
nil
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Maximum backoff delay in seconds. Without a cap, attempts 8+ sleep
|
|
85
|
+
# longer than most service-level timeouts (>25s) and compound retry
|
|
86
|
+
# storms across correlated workers.
|
|
87
|
+
MAX_BACKOFF_SECONDS = 30.0
|
|
88
|
+
|
|
89
|
+
# Base multiplier for exponential backoff. Delay is roughly
|
|
90
|
+
# `BACKOFF_BASE * 2**attempt` with full jitter applied on top.
|
|
91
|
+
BACKOFF_BASE = 0.1
|
|
92
|
+
|
|
72
93
|
private
|
|
73
94
|
|
|
74
|
-
# Execute a block with retry logic
|
|
95
|
+
# Execute a block with retry logic, exponential backoff, and jitter.
|
|
96
|
+
#
|
|
97
|
+
# Argument errors surface immediately (non-retryable — they indicate
|
|
98
|
+
# a programming mistake or invalid input, not a transient failure).
|
|
75
99
|
#
|
|
76
100
|
# @yield The block to execute
|
|
77
101
|
# @return [Object] The return value of the block
|
|
78
102
|
# @raise [CircuitOpenError] immediately without retrying
|
|
103
|
+
# @raise [ArgumentError] immediately without retrying
|
|
79
104
|
# @raise [StandardError] the last error if all retries are exhausted
|
|
80
105
|
def with_retries
|
|
81
106
|
attempt = 0
|
|
82
107
|
begin
|
|
83
108
|
attempt += 1
|
|
84
109
|
yield
|
|
85
|
-
rescue CircuitOpenError
|
|
110
|
+
rescue CircuitOpenError, ArgumentError
|
|
86
111
|
raise
|
|
87
112
|
rescue StandardError => e
|
|
88
113
|
raise e if attempt > @max_retries
|
|
89
114
|
|
|
90
|
-
sleep((
|
|
115
|
+
sleep(backoff_seconds(attempt))
|
|
91
116
|
retry
|
|
92
117
|
end
|
|
93
118
|
end
|
|
94
119
|
|
|
120
|
+
# Full-jitter exponential backoff with a hard cap. See "Exponential
|
|
121
|
+
# Backoff and Jitter", AWS Architecture Blog (Marc Brooker, 2015):
|
|
122
|
+
# a uniformly random delay in [0, base*2**attempt] de-correlates
|
|
123
|
+
# competing retry waves.
|
|
124
|
+
#
|
|
125
|
+
# @param attempt [Integer] 1-based attempt counter
|
|
126
|
+
# @return [Float] seconds to sleep before the next retry
|
|
127
|
+
def backoff_seconds(attempt)
|
|
128
|
+
ceiling = [BACKOFF_BASE * (2**attempt), MAX_BACKOFF_SECONDS].min
|
|
129
|
+
rand * ceiling
|
|
130
|
+
end
|
|
131
|
+
|
|
95
132
|
# Route a call through the circuit breaker if one is configured.
|
|
96
133
|
#
|
|
97
134
|
# @yield The block to execute
|