woods 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +186 -0
  3. data/README.md +20 -8
  4. data/exe/woods-console +51 -6
  5. data/exe/woods-console-mcp +24 -4
  6. data/exe/woods-mcp +30 -7
  7. data/exe/woods-mcp-http +47 -6
  8. data/lib/generators/woods/install_generator.rb +13 -4
  9. data/lib/generators/woods/templates/woods.rb.tt +155 -0
  10. data/lib/tasks/woods.rake +69 -50
  11. data/lib/woods/builder.rb +174 -9
  12. data/lib/woods/cache/cache_middleware.rb +360 -31
  13. data/lib/woods/chunking/semantic_chunker.rb +334 -7
  14. data/lib/woods/console/adapters/job_adapter.rb +10 -4
  15. data/lib/woods/console/audit_logger.rb +76 -4
  16. data/lib/woods/console/bridge.rb +48 -15
  17. data/lib/woods/console/bridge_protocol.rb +44 -0
  18. data/lib/woods/console/confirmation.rb +3 -4
  19. data/lib/woods/console/console_response_renderer.rb +56 -18
  20. data/lib/woods/console/credential_index.rb +201 -0
  21. data/lib/woods/console/credential_scanner.rb +302 -0
  22. data/lib/woods/console/dispatch_pipeline.rb +138 -0
  23. data/lib/woods/console/embedded_executor.rb +682 -35
  24. data/lib/woods/console/eval_guard.rb +319 -0
  25. data/lib/woods/console/model_validator.rb +1 -3
  26. data/lib/woods/console/rack_middleware.rb +185 -29
  27. data/lib/woods/console/redactor.rb +161 -0
  28. data/lib/woods/console/response_context.rb +127 -0
  29. data/lib/woods/console/safe_context.rb +220 -23
  30. data/lib/woods/console/scope_predicate_parser.rb +131 -0
  31. data/lib/woods/console/server.rb +417 -486
  32. data/lib/woods/console/sql_noise_stripper.rb +87 -0
  33. data/lib/woods/console/sql_table_scanner.rb +213 -0
  34. data/lib/woods/console/sql_validator.rb +81 -31
  35. data/lib/woods/console/table_gate.rb +93 -0
  36. data/lib/woods/console/tool_specs.rb +552 -0
  37. data/lib/woods/console/tools/tier1.rb +3 -3
  38. data/lib/woods/console/tools/tier4.rb +7 -1
  39. data/lib/woods/dependency_graph.rb +66 -7
  40. data/lib/woods/embedding/indexer.rb +190 -6
  41. data/lib/woods/embedding/openai.rb +40 -4
  42. data/lib/woods/embedding/provider.rb +104 -8
  43. data/lib/woods/embedding/text_preparer.rb +23 -3
  44. data/lib/woods/embedding/token_counter.rb +133 -0
  45. data/lib/woods/evaluation/baseline_runner.rb +20 -2
  46. data/lib/woods/evaluation/metrics.rb +4 -1
  47. data/lib/woods/extracted_unit.rb +1 -0
  48. data/lib/woods/extractor.rb +7 -1
  49. data/lib/woods/extractors/controller_extractor.rb +6 -0
  50. data/lib/woods/extractors/mailer_extractor.rb +16 -2
  51. data/lib/woods/extractors/model_extractor.rb +6 -1
  52. data/lib/woods/extractors/phlex_extractor.rb +13 -4
  53. data/lib/woods/extractors/rails_source_extractor.rb +2 -0
  54. data/lib/woods/extractors/route_helper_resolver.rb +130 -0
  55. data/lib/woods/extractors/shared_dependency_scanner.rb +130 -2
  56. data/lib/woods/extractors/view_component_extractor.rb +12 -1
  57. data/lib/woods/extractors/view_engines/base.rb +141 -0
  58. data/lib/woods/extractors/view_engines/erb.rb +145 -0
  59. data/lib/woods/extractors/view_template_extractor.rb +92 -133
  60. data/lib/woods/flow_assembler.rb +23 -15
  61. data/lib/woods/flow_precomputer.rb +21 -2
  62. data/lib/woods/graph_analyzer.rb +210 -0
  63. data/lib/woods/index_artifact.rb +173 -0
  64. data/lib/woods/mcp/bearer_auth.rb +45 -0
  65. data/lib/woods/mcp/bootstrap_state.rb +94 -0
  66. data/lib/woods/mcp/bootstrapper.rb +337 -16
  67. data/lib/woods/mcp/config_resolver.rb +288 -0
  68. data/lib/woods/mcp/errors.rb +134 -0
  69. data/lib/woods/mcp/index_reader.rb +265 -30
  70. data/lib/woods/mcp/origin_guard.rb +132 -0
  71. data/lib/woods/mcp/provider_probe.rb +166 -0
  72. data/lib/woods/mcp/renderers/claude_renderer.rb +6 -0
  73. data/lib/woods/mcp/renderers/markdown_renderer.rb +100 -3
  74. data/lib/woods/mcp/renderers/plain_renderer.rb +16 -2
  75. data/lib/woods/mcp/server.rb +771 -137
  76. data/lib/woods/model_name_cache.rb +78 -2
  77. data/lib/woods/notion/client.rb +25 -2
  78. data/lib/woods/notion/mappers/model_mapper.rb +36 -2
  79. data/lib/woods/railtie.rb +55 -15
  80. data/lib/woods/resilience/circuit_breaker.rb +9 -2
  81. data/lib/woods/resilience/retryable_provider.rb +40 -3
  82. data/lib/woods/resolved_config.rb +299 -0
  83. data/lib/woods/retrieval/context_assembler.rb +112 -5
  84. data/lib/woods/retrieval/query_classifier.rb +1 -1
  85. data/lib/woods/retrieval/ranker.rb +55 -6
  86. data/lib/woods/retrieval/search_executor.rb +42 -13
  87. data/lib/woods/retriever.rb +330 -24
  88. data/lib/woods/session_tracer/middleware.rb +35 -1
  89. data/lib/woods/storage/graph_store.rb +39 -0
  90. data/lib/woods/storage/inapplicable_backend.rb +14 -0
  91. data/lib/woods/storage/metadata_store.rb +129 -1
  92. data/lib/woods/storage/pgvector.rb +70 -8
  93. data/lib/woods/storage/qdrant.rb +196 -5
  94. data/lib/woods/storage/snapshotter/metadata.rb +172 -0
  95. data/lib/woods/storage/snapshotter/vector.rb +238 -0
  96. data/lib/woods/storage/snapshotter.rb +24 -0
  97. data/lib/woods/storage/vector_store.rb +184 -35
  98. data/lib/woods/tasks.rb +85 -0
  99. data/lib/woods/temporal/snapshot_store.rb +49 -1
  100. data/lib/woods/token_utils.rb +44 -5
  101. data/lib/woods/unblocked/client.rb +163 -0
  102. data/lib/woods/unblocked/document_builder.rb +326 -0
  103. data/lib/woods/unblocked/exporter.rb +201 -0
  104. data/lib/woods/unblocked/rate_limiter.rb +94 -0
  105. data/lib/woods/util/host_guard.rb +61 -0
  106. data/lib/woods/version.rb +1 -1
  107. data/lib/woods.rb +130 -6
  108. metadata +73 -4
@@ -5,6 +5,89 @@ require_relative 'cache_store'
5
5
 
6
6
  module Woods
7
7
  module Cache
8
+ # Raised by {InflightEntry#await} when the owning thread aborted before
9
+ # either fulfilling or rejecting the entry — for example on `Interrupt`,
10
+ # `Thread#kill`, or a non-StandardError exception that bypasses the explicit
11
+ # `rescue`. Waiters receive this instead of blocking forever.
12
+ #
13
+ # @api private
14
+ class OwnerAbortedError < StandardError
15
+ def initialize(msg = 'embedding fetch owner aborted before fulfill')
16
+ super
17
+ end
18
+ end
19
+
20
+ # Per-text in-flight entry used for single-flight coordination in
21
+ # {CachedEmbeddingProvider#embed_batch}. When thread A is already fetching an
22
+ # embedding for text T, thread B's miss for T attaches to A's entry and waits
23
+ # on its condition variable rather than issuing a parallel provider call.
24
+ # See issue #88.
25
+ #
26
+ # @api private
27
+ class InflightEntry
28
+ def initialize
29
+ @mutex = Mutex.new
30
+ @cond = ConditionVariable.new
31
+ @done = false
32
+ @value = nil
33
+ @error = nil
34
+ @waiter_count = 0
35
+ end
36
+
37
+ # Publish the computed value and wake every waiter. Idempotent — a second
38
+ # call (e.g. from an `ensure` that rejects unfulfilled entries) is a no-op
39
+ # so the hardening in {CachedEmbeddingProvider#fetch_and_fulfill} is safe.
40
+ def fulfill(value)
41
+ @mutex.synchronize do
42
+ return if @done
43
+
44
+ @value = value
45
+ @done = true
46
+ @cond.broadcast
47
+ end
48
+ end
49
+
50
+ # Publish an exception so waiters fail fast instead of blocking forever.
51
+ # Idempotent — see {#fulfill}.
52
+ def reject(error)
53
+ @mutex.synchronize do
54
+ return if @done
55
+
56
+ @error = error
57
+ @done = true
58
+ @cond.broadcast
59
+ end
60
+ end
61
+
62
+ # Block until {#fulfill} or {#reject} is called, then return the value
63
+ # (or re-raise the error) to the waiting thread. `@waiter_count` is bumped
64
+ # under the mutex so tests can deterministically wait for "N threads have
65
+ # attached to this entry" instead of polling coarse Thread#status values.
66
+ def await
67
+ @mutex.synchronize do
68
+ @waiter_count += 1
69
+ begin
70
+ @cond.wait(@mutex) until @done
71
+ ensure
72
+ @waiter_count -= 1
73
+ end
74
+ end
75
+ raise @error if @error
76
+
77
+ @value
78
+ end
79
+
80
+ # Number of threads currently blocked in {#await}. Thread-safe observation
81
+ # used primarily by concurrent specs to synchronize without relying on
82
+ # `Thread#status` (which can transiently report 'sleep' on unrelated
83
+ # mutex contention — see issue #94 CI flake on MRI 3.1/3.2).
84
+ #
85
+ # @return [Integer]
86
+ def waiter_count
87
+ @mutex.synchronize { @waiter_count }
88
+ end
89
+ end
90
+
8
91
  # Decorator that wraps an embedding provider with cache-through logic.
9
92
  #
10
93
  # Implements the same {Embedding::Provider::Interface} so it can be
@@ -27,15 +110,23 @@ module Woods
27
110
  @provider = provider
28
111
  @cache_store = cache_store
29
112
  @ttl = ttl
113
+ @inflight = {}
114
+ @inflight_mutex = Mutex.new
30
115
  end
31
116
 
32
117
  # Embed a single text, returning a cached vector when available.
33
118
  #
119
+ # Shares the per-text single-flight map with {#embed_batch}, so concurrent
120
+ # `embed("x")` / `embed_batch(["x", ...])` misses for the same text all
121
+ # attach to the same in-flight entry and produce exactly one provider call.
122
+ #
34
123
  # @param text [String] Text to embed
35
124
  # @return [Array<Float>] Embedding vector
36
125
  def embed(text)
37
- key = embedding_key(text)
38
- @cache_store.fetch(key, ttl: @ttl) { @provider.embed(text) }
126
+ cached = @cache_store.read(embedding_key(text))
127
+ return cached unless cached.nil?
128
+
129
+ with_single_flight(text) { @provider.embed(text) }
39
130
  end
40
131
 
41
132
  # Embed a batch of texts, using cached vectors for any previously seen texts.
@@ -43,22 +134,20 @@ module Woods
43
134
  # Only texts that are not already cached are sent to the real provider.
44
135
  # Results are merged back in original order.
45
136
  #
137
+ # Uses per-text single-flight to prevent cache-miss stampedes: when N threads
138
+ # concurrently miss on the same text, exactly one calls the provider while
139
+ # the others attach to its {InflightEntry} and wait. See issue #88.
140
+ #
46
141
  # @param texts [Array<String>] Texts to embed
47
142
  # @return [Array<Array<Float>>] Embedding vectors (same order as input)
48
143
  def embed_batch(texts)
49
144
  results, misses, miss_indices = partition_cached(texts)
145
+ return results if misses.empty?
50
146
 
51
- if misses.any?
52
- fresh_vectors = @provider.embed_batch(misses)
53
- misses.each_with_index do |text, i|
54
- results[miss_indices[i]] = fresh_vectors[i]
55
- begin
56
- @cache_store.write(embedding_key(text), fresh_vectors[i], ttl: @ttl)
57
- rescue StandardError => e
58
- warn("[Woods] CachedEmbeddingProvider cache write failed: #{e.message}")
59
- end
60
- end
61
- end
147
+ to_fetch, to_fetch_positions, our_entries, awaiting = claim_inflight(misses)
148
+
149
+ fetch_and_fulfill(to_fetch, to_fetch_positions, our_entries, results, miss_indices)
150
+ await_others(awaiting, results, miss_indices)
62
151
 
63
152
  results
64
153
  end
@@ -77,8 +166,175 @@ module Woods
77
166
  @provider.model_name
78
167
  end
79
168
 
169
+ # Delegate the per-provider input cap so Builder's chunker / text
170
+ # preparer wiring keeps working when the cache wrapper is in front
171
+ # of the provider. Without this, `respond_to?(:max_input_tokens)`
172
+ # returns true (inherited from Interface) but the call raises
173
+ # NotImplementedError.
174
+ #
175
+ # @return [Integer, nil]
176
+ def max_input_tokens
177
+ return @provider.max_input_tokens if @provider.respond_to?(:max_input_tokens)
178
+
179
+ nil
180
+ end
181
+
80
182
  private
81
183
 
184
+ # Run a provider block for a single text under the shared single-flight map.
185
+ # The first thread to miss on `text` becomes the owner, runs the block, caches
186
+ # the result, and fulfills the entry. Concurrent callers for the same text
187
+ # wait on the same entry. Errors propagate to waiters via {InflightEntry#reject}.
188
+ #
189
+ # @param text [String]
190
+ # @yieldreturn [Array<Float>] the freshly computed embedding vector
191
+ # @return [Array<Float>]
192
+ def with_single_flight(text)
193
+ entry, owner = claim_single(text)
194
+ return entry.await unless owner
195
+
196
+ begin
197
+ vector = yield
198
+ write_cache(text, vector)
199
+ entry.fulfill(vector)
200
+ vector
201
+ rescue StandardError => e
202
+ entry.reject(e)
203
+ raise
204
+ ensure
205
+ entry.reject(OwnerAbortedError.new)
206
+ clear_inflight([text])
207
+ end
208
+ end
209
+
210
+ # Single-text counterpart of {#claim_inflight}. Returns the entry for `text`
211
+ # and a boolean indicating whether the current thread is the owner.
212
+ #
213
+ # @param text [String]
214
+ # @return [Array(InflightEntry, Boolean)]
215
+ def claim_single(text)
216
+ @inflight_mutex.synchronize do
217
+ existing = @inflight[text]
218
+ return [existing, false] if existing
219
+
220
+ entry = InflightEntry.new
221
+ @inflight[text] = entry
222
+ [entry, true]
223
+ end
224
+ end
225
+
226
+ # Claim ownership of miss texts that no other thread is currently fetching.
227
+ # Returns four arrays describing the split of `misses`:
228
+ #
229
+ # - `to_fetch` — texts this thread owns and will hand to the provider
230
+ # - `to_fetch_positions` — each owned text's index into `misses`
231
+ # - `our_entries` — {InflightEntry} instances this thread will fulfill/reject
232
+ # - `awaiting` — `[position, entry]` pairs for texts already being fetched
233
+ # by another thread; this thread will block on `entry.await` instead of
234
+ # calling the provider
235
+ #
236
+ # The inflight map is only held during this bookkeeping — not during the
237
+ # provider call or the subsequent waits.
238
+ #
239
+ # @param misses [Array<String>]
240
+ # @return [Array(Array<String>, Array<Integer>, Array<InflightEntry>, Array<Array>)]
241
+ def claim_inflight(misses)
242
+ to_fetch = []
243
+ to_fetch_positions = []
244
+ our_entries = []
245
+ awaiting = []
246
+
247
+ @inflight_mutex.synchronize do
248
+ misses.each_with_index do |text, pos|
249
+ existing = @inflight[text]
250
+ if existing
251
+ awaiting << [pos, existing]
252
+ else
253
+ entry = InflightEntry.new
254
+ @inflight[text] = entry
255
+ our_entries << entry
256
+ to_fetch << text
257
+ to_fetch_positions << pos
258
+ end
259
+ end
260
+ end
261
+
262
+ [to_fetch, to_fetch_positions, our_entries, awaiting]
263
+ end
264
+
265
+ # Call the provider for owned texts, write each vector to the cache, and
266
+ # fulfill the owned entries so waiters wake with the fresh vector.
267
+ #
268
+ # The `ensure` block guarantees every owned entry reaches a terminal state
269
+ # and leaves the inflight map, even under paths the `rescue` misses —
270
+ # non-StandardError exceptions, `Thread#kill`, or a future refactor that
271
+ # introduces a raise into the fulfill loop. {InflightEntry#fulfill} /
272
+ # {InflightEntry#reject} are idempotent, so the fallback reject on
273
+ # already-fulfilled entries is a no-op.
274
+ #
275
+ # @return [void]
276
+ def fetch_and_fulfill(to_fetch, to_fetch_positions, our_entries, results, miss_indices)
277
+ return if to_fetch.empty?
278
+
279
+ begin
280
+ fresh_vectors = @provider.embed_batch(to_fetch)
281
+ # Reject a malformed provider response up-front rather than silently
282
+ # fulfilling waiters with `nil` (or masking a missing tail vector by
283
+ # under-writing the cache).
284
+ if fresh_vectors.size != to_fetch.size
285
+ raise ArgumentError,
286
+ "provider returned #{fresh_vectors.size} vectors for #{to_fetch.size} texts"
287
+ end
288
+ rescue StandardError => e
289
+ our_entries.each { |entry| entry.reject(e) }
290
+ raise
291
+ end
292
+
293
+ to_fetch.each_with_index do |text, i|
294
+ vector = fresh_vectors[i]
295
+ results[miss_indices[to_fetch_positions[i]]] = vector
296
+ write_cache(text, vector)
297
+ our_entries[i].fulfill(vector)
298
+ end
299
+ ensure
300
+ our_entries.each { |entry| entry.reject(OwnerAbortedError.new) }
301
+ clear_inflight(to_fetch)
302
+ end
303
+
304
+ # Block on entries owned by other threads, then slot their fulfilled vectors
305
+ # into `results`. Any exception from a sibling thread's provider call is
306
+ # re-raised here via {InflightEntry#await}.
307
+ #
308
+ # @param awaiting [Array<Array>] pairs of `[position_in_misses, InflightEntry]`
309
+ # @param results [Array]
310
+ # @param miss_indices [Array<Integer>]
311
+ # @return [void]
312
+ def await_others(awaiting, results, miss_indices)
313
+ awaiting.each do |pos, entry|
314
+ results[miss_indices[pos]] = entry.await
315
+ end
316
+ end
317
+
318
+ # Remove the given texts from the inflight map.
319
+ #
320
+ # @param texts [Array<String>]
321
+ # @return [void]
322
+ def clear_inflight(texts)
323
+ @inflight_mutex.synchronize { texts.each { |t| @inflight.delete(t) } }
324
+ end
325
+
326
+ # Write one vector to the cache, warning on backend failure rather than
327
+ # propagating — a transient cache write error must not fail the embed call.
328
+ #
329
+ # @param text [String]
330
+ # @param vector [Array<Float>]
331
+ # @return [void]
332
+ def write_cache(text, vector)
333
+ @cache_store.write(embedding_key(text), vector, ttl: @ttl)
334
+ rescue StandardError => e
335
+ warn("[Woods] CachedEmbeddingProvider cache write failed: #{e.message}")
336
+ end
337
+
82
338
  # Split texts into cached hits and uncached misses.
83
339
  #
84
340
  # @param texts [Array<String>]
@@ -132,32 +388,51 @@ module Woods
132
388
  @context_ttl = context_ttl
133
389
  end
134
390
 
391
+ # Expose the wrapped stores so the MCP +reload+ tool and
392
+ # {Woods::MCP::Bootstrapper.reload_stores!} can re-hydrate caches in
393
+ # place regardless of whether caching is enabled. Without these
394
+ # delegations, reload is a silent no-op when +cache_enabled+ is true —
395
+ # the bootstrapper would see +nil+ stores on the wrapper and skip.
396
+ def vector_store = @retriever.vector_store
397
+ def metadata_store = @retriever.metadata_store
398
+ def graph_store = @retriever.graph_store
399
+
400
+ # Invalidate every cached context result. Called from the MCP +reload+
401
+ # tool after the retriever's stores have been re-hydrated from a fresh
402
+ # embed — otherwise cached results from the old embedding run would
403
+ # linger until their TTL expires and contradict the new stores.
404
+ #
405
+ # Embedding caches (query → vector) are NOT cleared: the query-vector
406
+ # mapping is deterministic for a given provider+model and survives any
407
+ # index reload. Only context results (query → ranked units) go stale.
408
+ #
409
+ # @return [void]
410
+ def invalidate_context_cache!
411
+ @cache_store.clear(namespace: :context)
412
+ rescue StandardError => e
413
+ warn("[Woods] CachedRetriever context-cache invalidation failed: #{e.message}")
414
+ end
415
+
135
416
  # Execute the retrieval pipeline with context-level caching.
136
417
  #
137
418
  # On cache hit, returns a RetrievalResult reconstructed from cached data
138
419
  # without running any pipeline stages. On miss, delegates to the real
139
420
  # retriever and caches the serializable parts of the result.
140
421
  #
422
+ # Cache key includes +types:+ / +exclude_types:+ so a run with a
423
+ # narrower type filter doesn't return a broader-filter cached result.
424
+ #
141
425
  # @param query [String] Natural language query
142
426
  # @param budget [Integer] Token budget
427
+ # @param types [Array<String, Symbol>, nil] Include-only filter
428
+ # @param exclude_types [Array<String, Symbol>, nil] Additional exclusions
143
429
  # @return [Retriever::RetrievalResult]
144
- def retrieve(query, budget: 8000)
145
- key = context_key(query, budget)
430
+ def retrieve(query, budget: 8000, types: nil, exclude_types: nil)
431
+ key = context_key(query, budget, types: types, exclude_types: exclude_types)
146
432
  cached = @cache_store.read(key)
433
+ return rehydrate_cached(cached, budget) if cached
147
434
 
148
- if cached
149
- return Retriever::RetrievalResult.new(
150
- context: cached['context'],
151
- sources: cached['sources'],
152
- classification: nil,
153
- strategy: cached['strategy']&.to_sym,
154
- tokens_used: cached['tokens_used'],
155
- budget: budget,
156
- trace: nil
157
- )
158
- end
159
-
160
- result = @retriever.retrieve(query, budget: budget)
435
+ result = @retriever.retrieve(query, budget: budget, types: types, exclude_types: exclude_types)
161
436
 
162
437
  begin
163
438
  @cache_store.write(key, serialize_result(result), ttl: @context_ttl)
@@ -172,11 +447,23 @@ module Woods
172
447
 
173
448
  # Build a cache key for a context result.
174
449
  #
450
+ # Includes the type filter kwargs so distinct filter combinations miss
451
+ # each other — a lookup with +types: ["service"]+ must not return a
452
+ # previously-cached broad result.
453
+ #
175
454
  # @param query [String]
176
455
  # @param budget [Integer]
456
+ # @param types [Array<String, Symbol>, nil]
457
+ # @param exclude_types [Array<String, Symbol>, nil]
177
458
  # @return [String]
178
- def context_key(query, budget)
179
- Cache.cache_key(:context, query, budget.to_s)
459
+ def context_key(query, budget, types: nil, exclude_types: nil)
460
+ Cache.cache_key(:context, query, budget.to_s, fingerprint(types), fingerprint(exclude_types))
461
+ end
462
+
463
+ def fingerprint(types)
464
+ return '' if types.nil? || types.empty?
465
+
466
+ types.map(&:to_s).sort.join(',')
180
467
  end
181
468
 
182
469
  # Serialize a RetrievalResult to a JSON-safe hash.
@@ -186,14 +473,56 @@ module Woods
186
473
  #
187
474
  # @param result [Retriever::RetrievalResult]
188
475
  # @return [Hash]
476
+ def rehydrate_cached(cached, budget)
477
+ Retriever::RetrievalResult.new(
478
+ context: cached['context'],
479
+ sources: cached['sources'],
480
+ classification: nil,
481
+ strategy: cached['strategy']&.to_sym,
482
+ tokens_used: cached['tokens_used'],
483
+ budget: budget,
484
+ trace: nil,
485
+ type_rank_context: rehydrate_type_rank_context(cached['type_rank_context'])
486
+ )
487
+ end
488
+
189
489
  def serialize_result(result)
190
490
  {
191
491
  'context' => result.context,
192
492
  'sources' => result.sources,
193
493
  'strategy' => result.strategy&.to_s,
194
- 'tokens_used' => result.tokens_used
494
+ 'tokens_used' => result.tokens_used,
495
+ 'type_rank_context' => serialize_type_rank_context(result.type_rank_context)
195
496
  }
196
497
  end
498
+
499
+ # type_rank_context is a Hash<String => Hash<Symbol, ...>> with
500
+ # :source carrying a Symbol value. JSON-backed caches (Redis,
501
+ # SolidCache) collapse both to strings on the round-trip, so we
502
+ # serialize explicitly and re-symbolize both the inner keys and
503
+ # the :source value on rehydrate. The programmatic contract is
504
+ # "symbol keys, symbol :source value" regardless of cache hit
505
+ # vs miss.
506
+ def serialize_type_rank_context(ctx)
507
+ return nil if ctx.nil?
508
+
509
+ ctx.each_with_object({}) do |(type, info), out|
510
+ out[type] = info.each_with_object({}) do |(k, v), h|
511
+ h[k.to_s] = k == :source ? v.to_s : v
512
+ end
513
+ end
514
+ end
515
+
516
+ def rehydrate_type_rank_context(raw)
517
+ return nil if raw.nil?
518
+
519
+ raw.each_with_object({}) do |(type, info), out|
520
+ out[type] = info.each_with_object({}) do |(k, v), h|
521
+ sym_k = k.to_sym
522
+ h[sym_k] = sym_k == :source ? v.to_sym : v
523
+ end
524
+ end
525
+ end
197
526
  end
198
527
  end
199
528
  end