woods 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +89 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +406 -0
  7. data/exe/woods-console +59 -0
  8. data/exe/woods-console-mcp +22 -0
  9. data/exe/woods-mcp +34 -0
  10. data/exe/woods-mcp-http +37 -0
  11. data/exe/woods-mcp-start +58 -0
  12. data/lib/generators/woods/install_generator.rb +32 -0
  13. data/lib/generators/woods/pgvector_generator.rb +37 -0
  14. data/lib/generators/woods/templates/add_pgvector_to_woods.rb.erb +15 -0
  15. data/lib/generators/woods/templates/create_woods_tables.rb.erb +43 -0
  16. data/lib/tasks/woods.rake +621 -0
  17. data/lib/tasks/woods_evaluation.rake +115 -0
  18. data/lib/woods/ast/call_site_extractor.rb +106 -0
  19. data/lib/woods/ast/method_extractor.rb +71 -0
  20. data/lib/woods/ast/node.rb +116 -0
  21. data/lib/woods/ast/parser.rb +614 -0
  22. data/lib/woods/ast.rb +6 -0
  23. data/lib/woods/builder.rb +200 -0
  24. data/lib/woods/cache/cache_middleware.rb +199 -0
  25. data/lib/woods/cache/cache_store.rb +264 -0
  26. data/lib/woods/cache/redis_cache_store.rb +116 -0
  27. data/lib/woods/cache/solid_cache_store.rb +111 -0
  28. data/lib/woods/chunking/chunk.rb +84 -0
  29. data/lib/woods/chunking/semantic_chunker.rb +295 -0
  30. data/lib/woods/console/adapters/cache_adapter.rb +58 -0
  31. data/lib/woods/console/adapters/good_job_adapter.rb +33 -0
  32. data/lib/woods/console/adapters/job_adapter.rb +68 -0
  33. data/lib/woods/console/adapters/sidekiq_adapter.rb +33 -0
  34. data/lib/woods/console/adapters/solid_queue_adapter.rb +33 -0
  35. data/lib/woods/console/audit_logger.rb +75 -0
  36. data/lib/woods/console/bridge.rb +177 -0
  37. data/lib/woods/console/confirmation.rb +90 -0
  38. data/lib/woods/console/connection_manager.rb +173 -0
  39. data/lib/woods/console/console_response_renderer.rb +74 -0
  40. data/lib/woods/console/embedded_executor.rb +373 -0
  41. data/lib/woods/console/model_validator.rb +81 -0
  42. data/lib/woods/console/rack_middleware.rb +87 -0
  43. data/lib/woods/console/safe_context.rb +82 -0
  44. data/lib/woods/console/server.rb +612 -0
  45. data/lib/woods/console/sql_validator.rb +172 -0
  46. data/lib/woods/console/tools/tier1.rb +118 -0
  47. data/lib/woods/console/tools/tier2.rb +117 -0
  48. data/lib/woods/console/tools/tier3.rb +110 -0
  49. data/lib/woods/console/tools/tier4.rb +79 -0
  50. data/lib/woods/coordination/pipeline_lock.rb +109 -0
  51. data/lib/woods/cost_model/embedding_cost.rb +88 -0
  52. data/lib/woods/cost_model/estimator.rb +128 -0
  53. data/lib/woods/cost_model/provider_pricing.rb +67 -0
  54. data/lib/woods/cost_model/storage_cost.rb +52 -0
  55. data/lib/woods/cost_model.rb +22 -0
  56. data/lib/woods/db/migrations/001_create_units.rb +38 -0
  57. data/lib/woods/db/migrations/002_create_edges.rb +35 -0
  58. data/lib/woods/db/migrations/003_create_embeddings.rb +37 -0
  59. data/lib/woods/db/migrations/004_create_snapshots.rb +45 -0
  60. data/lib/woods/db/migrations/005_create_snapshot_units.rb +40 -0
  61. data/lib/woods/db/migrations/006_rename_tables.rb +34 -0
  62. data/lib/woods/db/migrator.rb +73 -0
  63. data/lib/woods/db/schema_version.rb +73 -0
  64. data/lib/woods/dependency_graph.rb +236 -0
  65. data/lib/woods/embedding/indexer.rb +140 -0
  66. data/lib/woods/embedding/openai.rb +126 -0
  67. data/lib/woods/embedding/provider.rb +162 -0
  68. data/lib/woods/embedding/text_preparer.rb +112 -0
  69. data/lib/woods/evaluation/baseline_runner.rb +115 -0
  70. data/lib/woods/evaluation/evaluator.rb +139 -0
  71. data/lib/woods/evaluation/metrics.rb +79 -0
  72. data/lib/woods/evaluation/query_set.rb +148 -0
  73. data/lib/woods/evaluation/report_generator.rb +90 -0
  74. data/lib/woods/extracted_unit.rb +145 -0
  75. data/lib/woods/extractor.rb +1028 -0
  76. data/lib/woods/extractors/action_cable_extractor.rb +201 -0
  77. data/lib/woods/extractors/ast_source_extraction.rb +46 -0
  78. data/lib/woods/extractors/behavioral_profile.rb +309 -0
  79. data/lib/woods/extractors/caching_extractor.rb +261 -0
  80. data/lib/woods/extractors/callback_analyzer.rb +246 -0
  81. data/lib/woods/extractors/concern_extractor.rb +292 -0
  82. data/lib/woods/extractors/configuration_extractor.rb +219 -0
  83. data/lib/woods/extractors/controller_extractor.rb +404 -0
  84. data/lib/woods/extractors/database_view_extractor.rb +278 -0
  85. data/lib/woods/extractors/decorator_extractor.rb +253 -0
  86. data/lib/woods/extractors/engine_extractor.rb +223 -0
  87. data/lib/woods/extractors/event_extractor.rb +211 -0
  88. data/lib/woods/extractors/factory_extractor.rb +289 -0
  89. data/lib/woods/extractors/graphql_extractor.rb +892 -0
  90. data/lib/woods/extractors/i18n_extractor.rb +117 -0
  91. data/lib/woods/extractors/job_extractor.rb +374 -0
  92. data/lib/woods/extractors/lib_extractor.rb +218 -0
  93. data/lib/woods/extractors/mailer_extractor.rb +269 -0
  94. data/lib/woods/extractors/manager_extractor.rb +188 -0
  95. data/lib/woods/extractors/middleware_extractor.rb +133 -0
  96. data/lib/woods/extractors/migration_extractor.rb +469 -0
  97. data/lib/woods/extractors/model_extractor.rb +988 -0
  98. data/lib/woods/extractors/phlex_extractor.rb +252 -0
  99. data/lib/woods/extractors/policy_extractor.rb +191 -0
  100. data/lib/woods/extractors/poro_extractor.rb +229 -0
  101. data/lib/woods/extractors/pundit_extractor.rb +223 -0
  102. data/lib/woods/extractors/rails_source_extractor.rb +473 -0
  103. data/lib/woods/extractors/rake_task_extractor.rb +343 -0
  104. data/lib/woods/extractors/route_extractor.rb +181 -0
  105. data/lib/woods/extractors/scheduled_job_extractor.rb +331 -0
  106. data/lib/woods/extractors/serializer_extractor.rb +339 -0
  107. data/lib/woods/extractors/service_extractor.rb +217 -0
  108. data/lib/woods/extractors/shared_dependency_scanner.rb +91 -0
  109. data/lib/woods/extractors/shared_utility_methods.rb +281 -0
  110. data/lib/woods/extractors/state_machine_extractor.rb +398 -0
  111. data/lib/woods/extractors/test_mapping_extractor.rb +225 -0
  112. data/lib/woods/extractors/validator_extractor.rb +211 -0
  113. data/lib/woods/extractors/view_component_extractor.rb +311 -0
  114. data/lib/woods/extractors/view_template_extractor.rb +261 -0
  115. data/lib/woods/feedback/gap_detector.rb +89 -0
  116. data/lib/woods/feedback/store.rb +119 -0
  117. data/lib/woods/filename_utils.rb +32 -0
  118. data/lib/woods/flow_analysis/operation_extractor.rb +206 -0
  119. data/lib/woods/flow_analysis/response_code_mapper.rb +154 -0
  120. data/lib/woods/flow_assembler.rb +290 -0
  121. data/lib/woods/flow_document.rb +191 -0
  122. data/lib/woods/flow_precomputer.rb +102 -0
  123. data/lib/woods/formatting/base.rb +30 -0
  124. data/lib/woods/formatting/claude_adapter.rb +98 -0
  125. data/lib/woods/formatting/generic_adapter.rb +56 -0
  126. data/lib/woods/formatting/gpt_adapter.rb +64 -0
  127. data/lib/woods/formatting/human_adapter.rb +78 -0
  128. data/lib/woods/graph_analyzer.rb +374 -0
  129. data/lib/woods/mcp/bootstrapper.rb +96 -0
  130. data/lib/woods/mcp/index_reader.rb +394 -0
  131. data/lib/woods/mcp/renderers/claude_renderer.rb +81 -0
  132. data/lib/woods/mcp/renderers/json_renderer.rb +17 -0
  133. data/lib/woods/mcp/renderers/markdown_renderer.rb +353 -0
  134. data/lib/woods/mcp/renderers/plain_renderer.rb +240 -0
  135. data/lib/woods/mcp/server.rb +962 -0
  136. data/lib/woods/mcp/tool_response_renderer.rb +85 -0
  137. data/lib/woods/model_name_cache.rb +51 -0
  138. data/lib/woods/notion/client.rb +217 -0
  139. data/lib/woods/notion/exporter.rb +219 -0
  140. data/lib/woods/notion/mapper.rb +40 -0
  141. data/lib/woods/notion/mappers/column_mapper.rb +57 -0
  142. data/lib/woods/notion/mappers/migration_mapper.rb +39 -0
  143. data/lib/woods/notion/mappers/model_mapper.rb +161 -0
  144. data/lib/woods/notion/mappers/shared.rb +22 -0
  145. data/lib/woods/notion/rate_limiter.rb +68 -0
  146. data/lib/woods/observability/health_check.rb +79 -0
  147. data/lib/woods/observability/instrumentation.rb +34 -0
  148. data/lib/woods/observability/structured_logger.rb +57 -0
  149. data/lib/woods/operator/error_escalator.rb +81 -0
  150. data/lib/woods/operator/pipeline_guard.rb +92 -0
  151. data/lib/woods/operator/status_reporter.rb +80 -0
  152. data/lib/woods/railtie.rb +38 -0
  153. data/lib/woods/resilience/circuit_breaker.rb +99 -0
  154. data/lib/woods/resilience/index_validator.rb +167 -0
  155. data/lib/woods/resilience/retryable_provider.rb +108 -0
  156. data/lib/woods/retrieval/context_assembler.rb +261 -0
  157. data/lib/woods/retrieval/query_classifier.rb +133 -0
  158. data/lib/woods/retrieval/ranker.rb +277 -0
  159. data/lib/woods/retrieval/search_executor.rb +316 -0
  160. data/lib/woods/retriever.rb +152 -0
  161. data/lib/woods/ruby_analyzer/class_analyzer.rb +170 -0
  162. data/lib/woods/ruby_analyzer/dataflow_analyzer.rb +77 -0
  163. data/lib/woods/ruby_analyzer/fqn_builder.rb +18 -0
  164. data/lib/woods/ruby_analyzer/mermaid_renderer.rb +280 -0
  165. data/lib/woods/ruby_analyzer/method_analyzer.rb +143 -0
  166. data/lib/woods/ruby_analyzer/trace_enricher.rb +143 -0
  167. data/lib/woods/ruby_analyzer.rb +87 -0
  168. data/lib/woods/session_tracer/file_store.rb +104 -0
  169. data/lib/woods/session_tracer/middleware.rb +143 -0
  170. data/lib/woods/session_tracer/redis_store.rb +106 -0
  171. data/lib/woods/session_tracer/session_flow_assembler.rb +254 -0
  172. data/lib/woods/session_tracer/session_flow_document.rb +223 -0
  173. data/lib/woods/session_tracer/solid_cache_store.rb +139 -0
  174. data/lib/woods/session_tracer/store.rb +81 -0
  175. data/lib/woods/storage/graph_store.rb +120 -0
  176. data/lib/woods/storage/metadata_store.rb +196 -0
  177. data/lib/woods/storage/pgvector.rb +195 -0
  178. data/lib/woods/storage/qdrant.rb +205 -0
  179. data/lib/woods/storage/vector_store.rb +167 -0
  180. data/lib/woods/temporal/json_snapshot_store.rb +245 -0
  181. data/lib/woods/temporal/snapshot_store.rb +345 -0
  182. data/lib/woods/token_utils.rb +19 -0
  183. data/lib/woods/version.rb +5 -0
  184. data/lib/woods.rb +246 -0
  185. metadata +270 -0
@@ -0,0 +1,200 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'retriever'
4
+ require_relative 'storage/vector_store'
5
+ require_relative 'storage/pgvector'
6
+ require_relative 'storage/qdrant'
7
+ require_relative 'storage/metadata_store'
8
+ require_relative 'storage/graph_store'
9
+ require_relative 'embedding/provider'
10
+ require_relative 'embedding/openai'
11
+
12
+ module Woods
13
+ # Builder reads a {Configuration} and instantiates the appropriate adapters,
14
+ # returning a fully wired {Retriever} ready for use.
15
+ #
16
+ # Named presets are provided for common deployment scenarios. All presets can
17
+ # be further customized with a block passed to {Woods.configure_with_preset}.
18
+ #
19
+ # @example Using a preset
20
+ # Woods.configure_with_preset(:local)
21
+ # result = Woods.retrieve("How does the User model work?")
22
+ #
23
+ # @example Using a preset with block customization
24
+ # Woods.configure_with_preset(:production) do |config|
25
+ # config.embedding_options = { api_key: ENV['OPENAI_API_KEY'] }
26
+ # config.vector_store_options = { url: ENV['QDRANT_URL'], collection: 'myapp' }
27
+ # end
28
+ #
29
+ class Builder # rubocop:disable Metrics/ClassLength
30
+ # Named presets mapping to default adapter types.
31
+ #
32
+ # :local — fully local, no external services required
33
+ # :postgresql — pgvector for vectors, OpenAI for embeddings
34
+ # :production — Qdrant for vectors, OpenAI for embeddings
35
+ PRESETS = {
36
+ local: {
37
+ vector_store: :in_memory,
38
+ metadata_store: :sqlite,
39
+ graph_store: :in_memory,
40
+ embedding_provider: :ollama
41
+ },
42
+ postgresql: {
43
+ vector_store: :pgvector,
44
+ metadata_store: :sqlite,
45
+ graph_store: :in_memory,
46
+ embedding_provider: :openai
47
+ },
48
+ production: {
49
+ vector_store: :qdrant,
50
+ metadata_store: :sqlite,
51
+ graph_store: :in_memory,
52
+ embedding_provider: :openai
53
+ }
54
+ }.freeze
55
+
56
+ # Build a {Configuration} populated with the named preset's adapter types.
57
+ #
58
+ # @param name [Symbol] Preset name — one of :local, :postgresql, or :production
59
+ # @return [Configuration] A new Configuration with preset values applied
60
+ # @raise [ArgumentError] if the preset name is not recognized
61
+ def self.preset_config(name)
62
+ preset = PRESETS.fetch(name) do
63
+ raise ArgumentError, "Unknown preset: #{name}. Valid: #{PRESETS.keys.join(', ')}"
64
+ end
65
+ config = Configuration.new
66
+ preset.each { |key, value| config.public_send(:"#{key}=", value) }
67
+ config
68
+ end
69
+
70
+ # @param config [Configuration] Configuration to read adapter types from
71
+ def initialize(config = Woods.configuration)
72
+ @config = config
73
+ end
74
+
75
+ # Build a {Retriever} wired with adapters from the configuration.
76
+ #
77
+ # When `cache_enabled` is true, the embedding provider is wrapped with
78
+ # {Cache::CachedEmbeddingProvider} and the retriever is wrapped with
79
+ # {Cache::CachedRetriever} for transparent caching of expensive operations.
80
+ #
81
+ # @return [Retriever, Cache::CachedRetriever] A fully wired retriever
82
+ def build_retriever
83
+ provider = build_embedding_provider
84
+ cache = build_cache_store
85
+
86
+ provider = wrap_with_embedding_cache(provider, cache) if cache
87
+
88
+ retriever = Retriever.new(
89
+ vector_store: build_vector_store,
90
+ metadata_store: build_metadata_store,
91
+ graph_store: build_graph_store,
92
+ embedding_provider: provider
93
+ )
94
+
95
+ cache ? wrap_with_retriever_cache(retriever, cache) : retriever
96
+ end
97
+
98
+ # Instantiate the vector store adapter specified by the configuration.
99
+ #
100
+ # @return [Storage::VectorStore::Interface] Vector store adapter instance
101
+ # @raise [ArgumentError] if the configured type is not recognized
102
+ def build_vector_store
103
+ case @config.vector_store
104
+ when :in_memory then Storage::VectorStore::InMemory.new
105
+ when :pgvector then Storage::VectorStore::Pgvector.new(**(@config.vector_store_options || {}))
106
+ when :qdrant then Storage::VectorStore::Qdrant.new(**(@config.vector_store_options || {}))
107
+ else raise ArgumentError, "Unknown vector_store: #{@config.vector_store}"
108
+ end
109
+ end
110
+
111
+ # Instantiate the embedding provider specified by the configuration.
112
+ #
113
+ # @return [Embedding::Provider::Interface] Embedding provider instance
114
+ # @raise [ArgumentError] if the configured type is not recognized
115
+ def build_embedding_provider
116
+ case @config.embedding_provider
117
+ when :openai then Embedding::Provider::OpenAI.new(**(@config.embedding_options || {}))
118
+ when :ollama then Embedding::Provider::Ollama.new(**(@config.embedding_options || {}))
119
+ else raise ArgumentError, "Unknown embedding_provider: #{@config.embedding_provider}"
120
+ end
121
+ end
122
+
123
+ private
124
+
125
+ # Instantiate the metadata store adapter specified by the configuration.
126
+ #
127
+ # @return [Storage::MetadataStore::Interface] Metadata store adapter instance
128
+ # @raise [ArgumentError] if the configured type is not recognized
129
+ def build_metadata_store
130
+ case @config.metadata_store
131
+ when :in_memory then Storage::MetadataStore::InMemory.new
132
+ when :sqlite then Storage::MetadataStore::SQLite.new(**(@config.metadata_store_options || {}))
133
+ else raise ArgumentError, "Unknown metadata_store: #{@config.metadata_store}"
134
+ end
135
+ end
136
+
137
+ # Instantiate the graph store adapter specified by the configuration.
138
+ #
139
+ # @return [Storage::GraphStore::Interface] Graph store adapter instance
140
+ # @raise [ArgumentError] if the configured type is not recognized
141
+ def build_graph_store
142
+ case @config.graph_store
143
+ when :in_memory then Storage::GraphStore::Memory.new
144
+ else raise ArgumentError, "Unknown graph_store: #{@config.graph_store}"
145
+ end
146
+ end
147
+
148
+ # Build a cache store from configuration, or nil if caching is disabled.
149
+ #
150
+ # @return [Cache::CacheStore, nil]
151
+ def build_cache_store
152
+ return nil unless @config.cache_enabled
153
+
154
+ opts = @config.cache_options || {}
155
+
156
+ case @config.cache_store
157
+ when :memory
158
+ Cache::InMemory.new(max_entries: opts.fetch(:max_entries, 500))
159
+ when :redis
160
+ require_relative 'cache/redis_cache_store'
161
+ Cache::RedisCacheStore.new(redis: opts.fetch(:redis), default_ttl: opts[:default_ttl])
162
+ when :solid_cache
163
+ require_relative 'cache/solid_cache_store'
164
+ Cache::SolidCacheStore.new(cache: opts.fetch(:cache), default_ttl: opts[:default_ttl])
165
+ when Cache::CacheStore
166
+ @config.cache_store
167
+ else
168
+ raise ArgumentError, "Unknown cache_store: #{@config.cache_store}"
169
+ end
170
+ end
171
+
172
+ # Wrap an embedding provider with caching.
173
+ #
174
+ # @param provider [Embedding::Provider::Interface]
175
+ # @param cache [Cache::CacheStore]
176
+ # @return [Cache::CachedEmbeddingProvider]
177
+ def wrap_with_embedding_cache(provider, cache)
178
+ ttls = (@config.cache_options || {}).fetch(:ttl, {})
179
+ Cache::CachedEmbeddingProvider.new(
180
+ provider: provider,
181
+ cache_store: cache,
182
+ ttl: ttls.fetch(:embeddings, Cache::DEFAULT_TTLS[:embeddings])
183
+ )
184
+ end
185
+
186
+ # Wrap a retriever with caching.
187
+ #
188
+ # @param retriever [Retriever]
189
+ # @param cache [Cache::CacheStore]
190
+ # @return [Cache::CachedRetriever]
191
+ def wrap_with_retriever_cache(retriever, cache)
192
+ ttls = (@config.cache_options || {}).fetch(:ttl, {})
193
+ Cache::CachedRetriever.new(
194
+ retriever: retriever,
195
+ cache_store: cache,
196
+ context_ttl: ttls.fetch(:context, Cache::DEFAULT_TTLS[:context])
197
+ )
198
+ end
199
+ end
200
+ end
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+ require_relative 'cache_store'
5
+
6
+ module Woods
7
+ module Cache
8
+ # Decorator that wraps an embedding provider with cache-through logic.
9
+ #
10
+ # Implements the same {Embedding::Provider::Interface} so it can be
11
+ # injected transparently in place of the real provider. On cache hit,
12
+ # the expensive API call (OpenAI, Ollama) is skipped entirely.
13
+ #
14
+ # @example
15
+ # real_provider = Embedding::Provider::OpenAI.new(api_key: key)
16
+ # cached = CachedEmbeddingProvider.new(provider: real_provider, cache_store: store)
17
+ # cached.embed("How does User work?") # API call + cache write
18
+ # cached.embed("How does User work?") # cache hit, no API call
19
+ #
20
+ class CachedEmbeddingProvider
21
+ include Embedding::Provider::Interface
22
+
23
+ # @param provider [Embedding::Provider::Interface] The real embedding provider
24
+ # @param cache_store [CacheStore] Cache backend instance
25
+ # @param ttl [Integer] TTL for cached embeddings in seconds
26
+ def initialize(provider:, cache_store:, ttl: DEFAULT_TTLS[:embeddings])
27
+ @provider = provider
28
+ @cache_store = cache_store
29
+ @ttl = ttl
30
+ end
31
+
32
+ # Embed a single text, returning a cached vector when available.
33
+ #
34
+ # @param text [String] Text to embed
35
+ # @return [Array<Float>] Embedding vector
36
+ def embed(text)
37
+ key = embedding_key(text)
38
+ @cache_store.fetch(key, ttl: @ttl) { @provider.embed(text) }
39
+ end
40
+
41
+ # Embed a batch of texts, using cached vectors for any previously seen texts.
42
+ #
43
+ # Only texts that are not already cached are sent to the real provider.
44
+ # Results are merged back in original order.
45
+ #
46
+ # @param texts [Array<String>] Texts to embed
47
+ # @return [Array<Array<Float>>] Embedding vectors (same order as input)
48
+ def embed_batch(texts)
49
+ results, misses, miss_indices = partition_cached(texts)
50
+
51
+ if misses.any?
52
+ fresh_vectors = @provider.embed_batch(misses)
53
+ misses.each_with_index do |text, i|
54
+ results[miss_indices[i]] = fresh_vectors[i]
55
+ begin
56
+ @cache_store.write(embedding_key(text), fresh_vectors[i], ttl: @ttl)
57
+ rescue StandardError => e
58
+ warn("[Woods] CachedEmbeddingProvider cache write failed: #{e.message}")
59
+ end
60
+ end
61
+ end
62
+
63
+ results
64
+ end
65
+
66
+ # Delegate dimensions to the underlying provider.
67
+ #
68
+ # @return [Integer]
69
+ def dimensions
70
+ @provider.dimensions
71
+ end
72
+
73
+ # Delegate model_name to the underlying provider.
74
+ #
75
+ # @return [String]
76
+ def model_name
77
+ @provider.model_name
78
+ end
79
+
80
+ private
81
+
82
+ # Split texts into cached hits and uncached misses.
83
+ #
84
+ # @param texts [Array<String>]
85
+ # @return [Array(Array, Array<String>, Array<Integer>)]
86
+ def partition_cached(texts)
87
+ results = Array.new(texts.size)
88
+ misses = []
89
+ miss_indices = []
90
+
91
+ texts.each_with_index do |text, idx|
92
+ cached = @cache_store.read(embedding_key(text))
93
+ if cached
94
+ results[idx] = cached
95
+ else
96
+ misses << text
97
+ miss_indices << idx
98
+ end
99
+ end
100
+
101
+ [results, misses, miss_indices]
102
+ end
103
+
104
+ # Build a cache key for an embedding text.
105
+ #
106
+ # @param text [String]
107
+ # @return [String]
108
+ def embedding_key(text)
109
+ Cache.cache_key(:embeddings, Digest::SHA256.hexdigest(text))
110
+ end
111
+ end
112
+
113
+ # Decorator that wraps a {Retriever} with result caching.
114
+ #
115
+ # Caches the full formatted context output (the most token-expensive artifact)
116
+ # keyed by query + budget. Also caches the structural context overview
117
+ # separately with a longer TTL.
118
+ #
119
+ # @example
120
+ # retriever = Woods::Retriever.new(...)
121
+ # cached = CachedRetriever.new(retriever: retriever, cache_store: store)
122
+ # cached.retrieve("How does User work?") # full pipeline + cache
123
+ # cached.retrieve("How does User work?") # instant cache hit
124
+ #
125
+ class CachedRetriever
126
+ # @param retriever [Retriever] The real retriever instance
127
+ # @param cache_store [CacheStore] Cache backend instance
128
+ # @param context_ttl [Integer] TTL for formatted context results
129
+ def initialize(retriever:, cache_store:, context_ttl: DEFAULT_TTLS[:context])
130
+ @retriever = retriever
131
+ @cache_store = cache_store
132
+ @context_ttl = context_ttl
133
+ end
134
+
135
+ # Execute the retrieval pipeline with context-level caching.
136
+ #
137
+ # On cache hit, returns a RetrievalResult reconstructed from cached data
138
+ # without running any pipeline stages. On miss, delegates to the real
139
+ # retriever and caches the serializable parts of the result.
140
+ #
141
+ # @param query [String] Natural language query
142
+ # @param budget [Integer] Token budget
143
+ # @return [Retriever::RetrievalResult]
144
+ def retrieve(query, budget: 8000)
145
+ key = context_key(query, budget)
146
+ cached = @cache_store.read(key)
147
+
148
+ if cached
149
+ return Retriever::RetrievalResult.new(
150
+ context: cached['context'],
151
+ sources: cached['sources'],
152
+ classification: nil,
153
+ strategy: cached['strategy']&.to_sym,
154
+ tokens_used: cached['tokens_used'],
155
+ budget: budget,
156
+ trace: nil
157
+ )
158
+ end
159
+
160
+ result = @retriever.retrieve(query, budget: budget)
161
+
162
+ begin
163
+ @cache_store.write(key, serialize_result(result), ttl: @context_ttl)
164
+ rescue StandardError => e
165
+ warn("[Woods] CachedRetriever cache write failed: #{e.message}")
166
+ end
167
+
168
+ result
169
+ end
170
+
171
+ private
172
+
173
+ # Build a cache key for a context result.
174
+ #
175
+ # @param query [String]
176
+ # @param budget [Integer]
177
+ # @return [String]
178
+ def context_key(query, budget)
179
+ Cache.cache_key(:context, query, budget.to_s)
180
+ end
181
+
182
+ # Serialize a RetrievalResult to a JSON-safe hash.
183
+ #
184
+ # Only caches the fields needed to reconstruct a useful result:
185
+ # context string, sources list, strategy, and token count.
186
+ #
187
+ # @param result [Retriever::RetrievalResult]
188
+ # @return [Hash]
189
+ def serialize_result(result)
190
+ {
191
+ 'context' => result.context,
192
+ 'sources' => result.sources,
193
+ 'strategy' => result.strategy&.to_s,
194
+ 'tokens_used' => result.tokens_used
195
+ }
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,264 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+ require 'json'
5
+ require 'logger'
6
+
7
+ module Woods
8
+ module Cache
9
+ # Default TTLs (in seconds) for each cache domain.
10
+ #
11
+ # Embedding vectors are stable (same text → same vector) so they get 24h.
12
+ # Metadata and structural context refresh on re-extraction (1h).
13
+ # Search results and formatted context are session-scoped (15min).
14
+ DEFAULT_TTLS = {
15
+ embeddings: 86_400,
16
+ metadata: 3_600,
17
+ structural: 3_600,
18
+ search: 900,
19
+ context: 900
20
+ }.freeze
21
+
22
+ # Build a namespaced cache key from a domain and raw parts.
23
+ #
24
+ # @param domain [Symbol] Cache domain (:embeddings, :metadata, etc.)
25
+ # @param parts [Array<String>] Key components (will be SHA256-hashed if long)
26
+ # @return [String] Namespaced key
27
+ def self.cache_key(domain, *parts)
28
+ raw = parts.join(':')
29
+ suffix = raw.length > 64 ? Digest::SHA256.hexdigest(raw) : raw
30
+ "woods:cache:#{domain}:#{suffix}"
31
+ end
32
+
33
+ # Abstract cache store interface.
34
+ #
35
+ # All cache backends must implement these methods. The interface is modeled
36
+ # after ActiveSupport::Cache::Store for familiarity but kept minimal.
37
+ #
38
+ # @abstract Subclass and override all public methods.
39
+ class CacheStore
40
+ # Read a value from the cache.
41
+ #
42
+ # @param key [String] Cache key
43
+ # @return [Object, nil] Cached value or nil if missing/expired
44
+ def read(key)
45
+ raise NotImplementedError
46
+ end
47
+
48
+ # Write a value to the cache.
49
+ #
50
+ # @param key [String] Cache key
51
+ # @param value [Object] Value to cache (must be JSON-serializable)
52
+ # @param ttl [Integer, nil] Time-to-live in seconds (nil = use domain default)
53
+ # @return [void]
54
+ def write(key, value, ttl: nil)
55
+ raise NotImplementedError
56
+ end
57
+
58
+ # Delete a key from the cache.
59
+ #
60
+ # @param key [String] Cache key
61
+ # @return [void]
62
+ def delete(key)
63
+ raise NotImplementedError
64
+ end
65
+
66
+ # Check if a key exists and is not expired.
67
+ #
68
+ # @param key [String] Cache key
69
+ # @return [Boolean]
70
+ def exist?(key)
71
+ raise NotImplementedError
72
+ end
73
+
74
+ # Clear cached entries. If namespace is given, only clear that domain.
75
+ #
76
+ # @param namespace [Symbol, nil] Cache domain to clear, or nil for all
77
+ # @return [void]
78
+ def clear(namespace: nil)
79
+ raise NotImplementedError
80
+ end
81
+
82
+ # Read-through cache: return cached value or execute block and cache result.
83
+ #
84
+ # @note nil is treated as a cache miss. If the wrapped operation legitimately
85
+ # returns nil, every call will re-execute the block. Custom backend
86
+ # implementers should preserve this semantic — do not return nil for keys
87
+ # that were written with a non-nil value. This is acceptable for the
88
+ # built-in use cases (embeddings and formatted context are never nil).
89
+ #
90
+ # @param key [String] Cache key
91
+ # @param ttl [Integer, nil] TTL in seconds
92
+ # @yield Block that computes the value on cache miss
93
+ # @return [Object] Cached or freshly computed value
94
+ def fetch(key, ttl: nil)
95
+ cached = read(key)
96
+ return cached unless cached.nil?
97
+
98
+ value = yield
99
+ begin
100
+ write(key, value, ttl: ttl)
101
+ rescue StandardError => e
102
+ logger.warn("[Woods] CacheStore#fetch write failed for #{key}: #{e.message}")
103
+ end
104
+ value
105
+ end
106
+
107
+ private
108
+
109
+ # Return a logger instance (Rails.logger in Rails apps, stderr elsewhere).
110
+ #
111
+ # @return [Logger]
112
+ def logger
113
+ @logger ||= defined?(Rails) ? Rails.logger : Logger.new($stderr)
114
+ end
115
+
116
+ # Build a wildcard pattern for clearing cache entries.
117
+ #
118
+ # @param namespace [Symbol, nil] Cache domain, or nil for all entries
119
+ # @return [String]
120
+ def clear_pattern(namespace)
121
+ namespace ? "woods:cache:#{namespace}:*" : 'woods:cache:*'
122
+ end
123
+
124
+ # Delete a key, silently swallowing any errors.
125
+ #
126
+ # Used for cleanup on corrupt/stale entries where failure is acceptable.
127
+ #
128
+ # @param key [String]
129
+ # @return [nil]
130
+ def delete_silently(key)
131
+ delete(key)
132
+ rescue StandardError
133
+ nil
134
+ end
135
+ end
136
+
137
+ # In-memory cache store with LRU eviction and TTL support.
138
+ #
139
+ # Zero external dependencies. Suitable for single-process use, development,
140
+ # and as a fallback when Redis/SolidCache are not available. Thread-safe.
141
+ #
142
+ # @example
143
+ # store = InMemory.new(max_entries: 200)
144
+ # store.write("ci:emb:abc", [0.1, 0.2], ttl: 3600)
145
+ # store.read("ci:emb:abc") # => [0.1, 0.2]
146
+ #
147
+ class InMemory < CacheStore
148
+ # @param max_entries [Integer] Maximum cached entries before LRU eviction
149
+ def initialize(max_entries: 500)
150
+ super()
151
+ @max_entries = max_entries
152
+ @entries = {}
153
+ @access_order = []
154
+ @mutex = Mutex.new
155
+ end
156
+
157
+ # Read a value, returning nil if missing or expired.
158
+ #
159
+ # @param key [String] Cache key
160
+ # @return [Object, nil]
161
+ def read(key)
162
+ @mutex.synchronize do
163
+ entry = @entries[key]
164
+ return nil unless entry
165
+
166
+ if entry[:expires_at] && Time.now > entry[:expires_at]
167
+ evict_key(key)
168
+ return nil
169
+ end
170
+
171
+ touch(key)
172
+ entry[:value]
173
+ end
174
+ end
175
+
176
+ # Write a value with optional TTL.
177
+ #
178
+ # @param key [String] Cache key
179
+ # @param value [Object] Value to cache
180
+ # @param ttl [Integer, nil] TTL in seconds
181
+ # @return [void]
182
+ def write(key, value, ttl: nil)
183
+ @mutex.synchronize do
184
+ evict_key(key) if @entries.key?(key)
185
+
186
+ if @entries.size >= @max_entries
187
+ oldest = @access_order.shift
188
+ @entries.delete(oldest) if oldest
189
+ end
190
+
191
+ expires_at = ttl ? Time.now + ttl : nil
192
+ @entries[key] = { value: value, expires_at: expires_at }
193
+ @access_order.push(key)
194
+ end
195
+ end
196
+
197
+ # Delete a key.
198
+ #
199
+ # @param key [String] Cache key
200
+ # @return [void]
201
+ def delete(key)
202
+ @mutex.synchronize { evict_key(key) }
203
+ end
204
+
205
+ # Check if a key exists and is not expired.
206
+ #
207
+ # @param key [String] Cache key
208
+ # @return [Boolean]
209
+ def exist?(key)
210
+ @mutex.synchronize do
211
+ entry = @entries[key]
212
+ return false unless entry
213
+ return false if entry[:expires_at] && Time.now > entry[:expires_at]
214
+
215
+ true
216
+ end
217
+ end
218
+
219
+ # Clear entries. If namespace is given, only clear keys matching that domain.
220
+ #
221
+ # @param namespace [Symbol, nil] Domain to clear (:embeddings, :metadata, etc.)
222
+ # @return [void]
223
+ def clear(namespace: nil)
224
+ @mutex.synchronize do
225
+ if namespace
226
+ prefix = "woods:cache:#{namespace}:"
227
+ keys_to_delete = @entries.keys.select { |k| k.start_with?(prefix) }
228
+ keys_to_delete.each { |k| evict_key(k) }
229
+ else
230
+ @entries.clear
231
+ @access_order.clear
232
+ end
233
+ end
234
+ end
235
+
236
+ # Number of entries currently in the cache (for testing/diagnostics).
237
+ #
238
+ # @return [Integer]
239
+ def size
240
+ @mutex.synchronize { @entries.size }
241
+ end
242
+
243
+ private
244
+
245
+ # Remove a key from both the entry hash and access order.
246
+ #
247
+ # @param key [String]
248
+ # @return [void]
249
+ def evict_key(key)
250
+ @entries.delete(key)
251
+ @access_order.delete(key)
252
+ end
253
+
254
+ # Move a key to the end of the access order (most recently used).
255
+ #
256
+ # @param key [String]
257
+ # @return [void]
258
+ def touch(key)
259
+ @access_order.delete(key)
260
+ @access_order.push(key)
261
+ end
262
+ end
263
+ end
264
+ end