woods 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +89 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +406 -0
  7. data/exe/woods-console +59 -0
  8. data/exe/woods-console-mcp +22 -0
  9. data/exe/woods-mcp +34 -0
  10. data/exe/woods-mcp-http +37 -0
  11. data/exe/woods-mcp-start +58 -0
  12. data/lib/generators/woods/install_generator.rb +32 -0
  13. data/lib/generators/woods/pgvector_generator.rb +37 -0
  14. data/lib/generators/woods/templates/add_pgvector_to_woods.rb.erb +15 -0
  15. data/lib/generators/woods/templates/create_woods_tables.rb.erb +43 -0
  16. data/lib/tasks/woods.rake +621 -0
  17. data/lib/tasks/woods_evaluation.rake +115 -0
  18. data/lib/woods/ast/call_site_extractor.rb +106 -0
  19. data/lib/woods/ast/method_extractor.rb +71 -0
  20. data/lib/woods/ast/node.rb +116 -0
  21. data/lib/woods/ast/parser.rb +614 -0
  22. data/lib/woods/ast.rb +6 -0
  23. data/lib/woods/builder.rb +200 -0
  24. data/lib/woods/cache/cache_middleware.rb +199 -0
  25. data/lib/woods/cache/cache_store.rb +264 -0
  26. data/lib/woods/cache/redis_cache_store.rb +116 -0
  27. data/lib/woods/cache/solid_cache_store.rb +111 -0
  28. data/lib/woods/chunking/chunk.rb +84 -0
  29. data/lib/woods/chunking/semantic_chunker.rb +295 -0
  30. data/lib/woods/console/adapters/cache_adapter.rb +58 -0
  31. data/lib/woods/console/adapters/good_job_adapter.rb +33 -0
  32. data/lib/woods/console/adapters/job_adapter.rb +68 -0
  33. data/lib/woods/console/adapters/sidekiq_adapter.rb +33 -0
  34. data/lib/woods/console/adapters/solid_queue_adapter.rb +33 -0
  35. data/lib/woods/console/audit_logger.rb +75 -0
  36. data/lib/woods/console/bridge.rb +177 -0
  37. data/lib/woods/console/confirmation.rb +90 -0
  38. data/lib/woods/console/connection_manager.rb +173 -0
  39. data/lib/woods/console/console_response_renderer.rb +74 -0
  40. data/lib/woods/console/embedded_executor.rb +373 -0
  41. data/lib/woods/console/model_validator.rb +81 -0
  42. data/lib/woods/console/rack_middleware.rb +87 -0
  43. data/lib/woods/console/safe_context.rb +82 -0
  44. data/lib/woods/console/server.rb +612 -0
  45. data/lib/woods/console/sql_validator.rb +172 -0
  46. data/lib/woods/console/tools/tier1.rb +118 -0
  47. data/lib/woods/console/tools/tier2.rb +117 -0
  48. data/lib/woods/console/tools/tier3.rb +110 -0
  49. data/lib/woods/console/tools/tier4.rb +79 -0
  50. data/lib/woods/coordination/pipeline_lock.rb +109 -0
  51. data/lib/woods/cost_model/embedding_cost.rb +88 -0
  52. data/lib/woods/cost_model/estimator.rb +128 -0
  53. data/lib/woods/cost_model/provider_pricing.rb +67 -0
  54. data/lib/woods/cost_model/storage_cost.rb +52 -0
  55. data/lib/woods/cost_model.rb +22 -0
  56. data/lib/woods/db/migrations/001_create_units.rb +38 -0
  57. data/lib/woods/db/migrations/002_create_edges.rb +35 -0
  58. data/lib/woods/db/migrations/003_create_embeddings.rb +37 -0
  59. data/lib/woods/db/migrations/004_create_snapshots.rb +45 -0
  60. data/lib/woods/db/migrations/005_create_snapshot_units.rb +40 -0
  61. data/lib/woods/db/migrations/006_rename_tables.rb +34 -0
  62. data/lib/woods/db/migrator.rb +73 -0
  63. data/lib/woods/db/schema_version.rb +73 -0
  64. data/lib/woods/dependency_graph.rb +236 -0
  65. data/lib/woods/embedding/indexer.rb +140 -0
  66. data/lib/woods/embedding/openai.rb +126 -0
  67. data/lib/woods/embedding/provider.rb +162 -0
  68. data/lib/woods/embedding/text_preparer.rb +112 -0
  69. data/lib/woods/evaluation/baseline_runner.rb +115 -0
  70. data/lib/woods/evaluation/evaluator.rb +139 -0
  71. data/lib/woods/evaluation/metrics.rb +79 -0
  72. data/lib/woods/evaluation/query_set.rb +148 -0
  73. data/lib/woods/evaluation/report_generator.rb +90 -0
  74. data/lib/woods/extracted_unit.rb +145 -0
  75. data/lib/woods/extractor.rb +1028 -0
  76. data/lib/woods/extractors/action_cable_extractor.rb +201 -0
  77. data/lib/woods/extractors/ast_source_extraction.rb +46 -0
  78. data/lib/woods/extractors/behavioral_profile.rb +309 -0
  79. data/lib/woods/extractors/caching_extractor.rb +261 -0
  80. data/lib/woods/extractors/callback_analyzer.rb +246 -0
  81. data/lib/woods/extractors/concern_extractor.rb +292 -0
  82. data/lib/woods/extractors/configuration_extractor.rb +219 -0
  83. data/lib/woods/extractors/controller_extractor.rb +404 -0
  84. data/lib/woods/extractors/database_view_extractor.rb +278 -0
  85. data/lib/woods/extractors/decorator_extractor.rb +253 -0
  86. data/lib/woods/extractors/engine_extractor.rb +223 -0
  87. data/lib/woods/extractors/event_extractor.rb +211 -0
  88. data/lib/woods/extractors/factory_extractor.rb +289 -0
  89. data/lib/woods/extractors/graphql_extractor.rb +892 -0
  90. data/lib/woods/extractors/i18n_extractor.rb +117 -0
  91. data/lib/woods/extractors/job_extractor.rb +374 -0
  92. data/lib/woods/extractors/lib_extractor.rb +218 -0
  93. data/lib/woods/extractors/mailer_extractor.rb +269 -0
  94. data/lib/woods/extractors/manager_extractor.rb +188 -0
  95. data/lib/woods/extractors/middleware_extractor.rb +133 -0
  96. data/lib/woods/extractors/migration_extractor.rb +469 -0
  97. data/lib/woods/extractors/model_extractor.rb +988 -0
  98. data/lib/woods/extractors/phlex_extractor.rb +252 -0
  99. data/lib/woods/extractors/policy_extractor.rb +191 -0
  100. data/lib/woods/extractors/poro_extractor.rb +229 -0
  101. data/lib/woods/extractors/pundit_extractor.rb +223 -0
  102. data/lib/woods/extractors/rails_source_extractor.rb +473 -0
  103. data/lib/woods/extractors/rake_task_extractor.rb +343 -0
  104. data/lib/woods/extractors/route_extractor.rb +181 -0
  105. data/lib/woods/extractors/scheduled_job_extractor.rb +331 -0
  106. data/lib/woods/extractors/serializer_extractor.rb +339 -0
  107. data/lib/woods/extractors/service_extractor.rb +217 -0
  108. data/lib/woods/extractors/shared_dependency_scanner.rb +91 -0
  109. data/lib/woods/extractors/shared_utility_methods.rb +281 -0
  110. data/lib/woods/extractors/state_machine_extractor.rb +398 -0
  111. data/lib/woods/extractors/test_mapping_extractor.rb +225 -0
  112. data/lib/woods/extractors/validator_extractor.rb +211 -0
  113. data/lib/woods/extractors/view_component_extractor.rb +311 -0
  114. data/lib/woods/extractors/view_template_extractor.rb +261 -0
  115. data/lib/woods/feedback/gap_detector.rb +89 -0
  116. data/lib/woods/feedback/store.rb +119 -0
  117. data/lib/woods/filename_utils.rb +32 -0
  118. data/lib/woods/flow_analysis/operation_extractor.rb +206 -0
  119. data/lib/woods/flow_analysis/response_code_mapper.rb +154 -0
  120. data/lib/woods/flow_assembler.rb +290 -0
  121. data/lib/woods/flow_document.rb +191 -0
  122. data/lib/woods/flow_precomputer.rb +102 -0
  123. data/lib/woods/formatting/base.rb +30 -0
  124. data/lib/woods/formatting/claude_adapter.rb +98 -0
  125. data/lib/woods/formatting/generic_adapter.rb +56 -0
  126. data/lib/woods/formatting/gpt_adapter.rb +64 -0
  127. data/lib/woods/formatting/human_adapter.rb +78 -0
  128. data/lib/woods/graph_analyzer.rb +374 -0
  129. data/lib/woods/mcp/bootstrapper.rb +96 -0
  130. data/lib/woods/mcp/index_reader.rb +394 -0
  131. data/lib/woods/mcp/renderers/claude_renderer.rb +81 -0
  132. data/lib/woods/mcp/renderers/json_renderer.rb +17 -0
  133. data/lib/woods/mcp/renderers/markdown_renderer.rb +353 -0
  134. data/lib/woods/mcp/renderers/plain_renderer.rb +240 -0
  135. data/lib/woods/mcp/server.rb +962 -0
  136. data/lib/woods/mcp/tool_response_renderer.rb +85 -0
  137. data/lib/woods/model_name_cache.rb +51 -0
  138. data/lib/woods/notion/client.rb +217 -0
  139. data/lib/woods/notion/exporter.rb +219 -0
  140. data/lib/woods/notion/mapper.rb +40 -0
  141. data/lib/woods/notion/mappers/column_mapper.rb +57 -0
  142. data/lib/woods/notion/mappers/migration_mapper.rb +39 -0
  143. data/lib/woods/notion/mappers/model_mapper.rb +161 -0
  144. data/lib/woods/notion/mappers/shared.rb +22 -0
  145. data/lib/woods/notion/rate_limiter.rb +68 -0
  146. data/lib/woods/observability/health_check.rb +79 -0
  147. data/lib/woods/observability/instrumentation.rb +34 -0
  148. data/lib/woods/observability/structured_logger.rb +57 -0
  149. data/lib/woods/operator/error_escalator.rb +81 -0
  150. data/lib/woods/operator/pipeline_guard.rb +92 -0
  151. data/lib/woods/operator/status_reporter.rb +80 -0
  152. data/lib/woods/railtie.rb +38 -0
  153. data/lib/woods/resilience/circuit_breaker.rb +99 -0
  154. data/lib/woods/resilience/index_validator.rb +167 -0
  155. data/lib/woods/resilience/retryable_provider.rb +108 -0
  156. data/lib/woods/retrieval/context_assembler.rb +261 -0
  157. data/lib/woods/retrieval/query_classifier.rb +133 -0
  158. data/lib/woods/retrieval/ranker.rb +277 -0
  159. data/lib/woods/retrieval/search_executor.rb +316 -0
  160. data/lib/woods/retriever.rb +152 -0
  161. data/lib/woods/ruby_analyzer/class_analyzer.rb +170 -0
  162. data/lib/woods/ruby_analyzer/dataflow_analyzer.rb +77 -0
  163. data/lib/woods/ruby_analyzer/fqn_builder.rb +18 -0
  164. data/lib/woods/ruby_analyzer/mermaid_renderer.rb +280 -0
  165. data/lib/woods/ruby_analyzer/method_analyzer.rb +143 -0
  166. data/lib/woods/ruby_analyzer/trace_enricher.rb +143 -0
  167. data/lib/woods/ruby_analyzer.rb +87 -0
  168. data/lib/woods/session_tracer/file_store.rb +104 -0
  169. data/lib/woods/session_tracer/middleware.rb +143 -0
  170. data/lib/woods/session_tracer/redis_store.rb +106 -0
  171. data/lib/woods/session_tracer/session_flow_assembler.rb +254 -0
  172. data/lib/woods/session_tracer/session_flow_document.rb +223 -0
  173. data/lib/woods/session_tracer/solid_cache_store.rb +139 -0
  174. data/lib/woods/session_tracer/store.rb +81 -0
  175. data/lib/woods/storage/graph_store.rb +120 -0
  176. data/lib/woods/storage/metadata_store.rb +196 -0
  177. data/lib/woods/storage/pgvector.rb +195 -0
  178. data/lib/woods/storage/qdrant.rb +205 -0
  179. data/lib/woods/storage/vector_store.rb +167 -0
  180. data/lib/woods/temporal/json_snapshot_store.rb +245 -0
  181. data/lib/woods/temporal/snapshot_store.rb +345 -0
  182. data/lib/woods/token_utils.rb +19 -0
  183. data/lib/woods/version.rb +5 -0
  184. data/lib/woods.rb +246 -0
  185. metadata +270 -0
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'time'
5
+
6
+ module Woods
7
+ module Operator
8
+ # Reports pipeline status by reading extraction output metadata.
9
+ #
10
+ # @example
11
+ # reporter = StatusReporter.new(output_dir: 'tmp/woods')
12
+ # status = reporter.report
13
+ # status[:status] # => :ok
14
+ # status[:staleness_seconds] # => 3600
15
+ #
16
+ class StatusReporter
17
+ STALE_THRESHOLD = 86_400 # 24 hours
18
+
19
+ # @param output_dir [String] Path to extraction output directory
20
+ def initialize(output_dir:)
21
+ @output_dir = output_dir
22
+ end
23
+
24
+ # Generate a pipeline status report.
25
+ #
26
+ # @return [Hash] Status report with :status, :extracted_at, :total_units, :counts, :staleness_seconds
27
+ def report
28
+ manifest = read_manifest
29
+ return not_extracted_report if manifest.nil?
30
+
31
+ staleness = compute_staleness(manifest['extracted_at'])
32
+
33
+ {
34
+ status: staleness < STALE_THRESHOLD ? :ok : :stale,
35
+ extracted_at: manifest['extracted_at'],
36
+ total_units: manifest['total_units'] || 0,
37
+ counts: manifest['counts'] || {},
38
+ git_sha: manifest['git_sha'],
39
+ git_branch: manifest['git_branch'],
40
+ staleness_seconds: staleness
41
+ }
42
+ end
43
+
44
+ private
45
+
46
+ # @return [Hash, nil]
47
+ def read_manifest
48
+ path = File.join(@output_dir, 'manifest.json')
49
+ return nil unless File.exist?(path)
50
+
51
+ JSON.parse(File.read(path))
52
+ rescue JSON::ParserError
53
+ nil
54
+ end
55
+
56
+ # @return [Hash]
57
+ def not_extracted_report
58
+ {
59
+ status: :not_extracted,
60
+ extracted_at: nil,
61
+ total_units: 0,
62
+ counts: {},
63
+ git_sha: nil,
64
+ git_branch: nil,
65
+ staleness_seconds: nil
66
+ }
67
+ end
68
+
69
+ # @param extracted_at [String, nil] ISO8601 timestamp
70
+ # @return [Numeric]
71
+ def compute_staleness(extracted_at)
72
+ return Float::INFINITY if extracted_at.nil?
73
+
74
+ Time.now - Time.parse(extracted_at)
75
+ rescue ArgumentError
76
+ Float::INFINITY
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Woods
4
+ # Railtie integrates Woods into Rails applications.
5
+ # Loads rake tasks automatically when the gem is bundled.
6
+ # Conditionally inserts session tracer middleware when enabled.
7
+ class Railtie < Rails::Railtie
8
+ rake_tasks do
9
+ load File.expand_path('../tasks/woods.rake', __dir__)
10
+ end
11
+
12
+ initializer 'woods.session_tracer' do |app|
13
+ config = Woods.configuration
14
+ if config.session_tracer_enabled
15
+ require 'woods/session_tracer/middleware'
16
+
17
+ app.middleware.use(
18
+ Woods::SessionTracer::Middleware,
19
+ store: config.session_store,
20
+ session_id_proc: config.session_id_proc,
21
+ exclude_paths: config.session_exclude_paths
22
+ )
23
+ end
24
+ end
25
+
26
+ initializer 'woods.console_mcp' do |app|
27
+ config = Woods.configuration
28
+ if config.console_mcp_enabled
29
+ require 'woods/console/rack_middleware'
30
+
31
+ app.middleware.use(
32
+ Woods::Console::RackMiddleware,
33
+ path: config.console_mcp_path
34
+ )
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Woods
4
+ module Resilience
5
+ # Raised when the circuit breaker is open and calls are being rejected.
6
+ #
7
+ # @example Handling a circuit open condition
8
+ # begin
9
+ # breaker.call { provider.embed(text) }
10
+ # rescue CircuitOpenError => e
11
+ # use_cached_result(text)
12
+ # end
13
+ class CircuitOpenError < Woods::Error; end
14
+
15
+ # Circuit breaker pattern for protecting external service calls.
16
+ #
17
+ # Tracks failures and transitions between three states:
18
+ # - **:closed** — normal operation, calls pass through
19
+ # - **:open** — too many failures, calls are rejected immediately
20
+ # - **:half_open** — testing recovery, one call is allowed through
21
+ #
22
+ # @example Basic usage
23
+ # breaker = CircuitBreaker.new(threshold: 5, reset_timeout: 60)
24
+ # result = breaker.call { external_service.request }
25
+ #
26
+ # @example With retry logic
27
+ # breaker = CircuitBreaker.new(threshold: 3, reset_timeout: 30)
28
+ # begin
29
+ # breaker.call { api.embed(text) }
30
+ # rescue CircuitOpenError
31
+ # # Service is down, use fallback
32
+ # end
33
+ class CircuitBreaker
34
+ # @return [Symbol] Current state — :closed, :open, or :half_open
35
+ attr_reader :state
36
+
37
+ # @param threshold [Integer] Number of consecutive failures before opening the circuit
38
+ # @param reset_timeout [Numeric] Seconds to wait before transitioning from open to half_open
39
+ def initialize(threshold: 5, reset_timeout: 60)
40
+ @threshold = threshold
41
+ @reset_timeout = reset_timeout
42
+ @state = :closed
43
+ @failure_count = 0
44
+ @last_failure_time = nil
45
+ @mutex = Mutex.new
46
+ end
47
+
48
+ # Execute a block through the circuit breaker.
49
+ #
50
+ # @yield The block to execute
51
+ # @return [Object] The return value of the block
52
+ # @raise [CircuitOpenError] if the circuit is open and the timeout has not elapsed
53
+ # @raise [StandardError] re-raises any error from the block
54
+ def call(&block)
55
+ # Phase 1: Check state under mutex
56
+ @mutex.synchronize do
57
+ case @state
58
+ when :open
59
+ unless Time.now - @last_failure_time >= @reset_timeout
60
+ raise CircuitOpenError, "Circuit breaker is open (#{@failure_count} failures)"
61
+ end
62
+
63
+ @state = :half_open
64
+ end
65
+ end
66
+
67
+ # Phase 2: Execute outside mutex
68
+ result = block.call
69
+
70
+ # Phase 3: Record success under mutex
71
+ @mutex.synchronize { reset! }
72
+
73
+ result
74
+ rescue CircuitOpenError
75
+ raise
76
+ rescue StandardError => e
77
+ # Phase 4: Record failure under mutex
78
+ @mutex.synchronize { record_failure }
79
+ raise e
80
+ end
81
+
82
+ private
83
+
84
+ # Record a failure and potentially open the circuit.
85
+ def record_failure
86
+ @failure_count += 1
87
+ @last_failure_time = Time.now
88
+ @state = :open if @failure_count >= @threshold
89
+ end
90
+
91
+ # Reset the circuit breaker to closed state with zero failures.
92
+ def reset!
93
+ @state = :closed
94
+ @failure_count = 0
95
+ @last_failure_time = nil
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,167 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require_relative '../filename_utils'
5
+
6
+ module Woods
7
+ module Resilience
8
+ # Validates the integrity of a codebase index output directory.
9
+ #
10
+ # Checks that:
11
+ # - Each type directory has a valid `_index.json`
12
+ # - All files referenced in the index exist on disk
13
+ # - Content hashes (source_hash) match the actual source_code
14
+ # - No stale unit files exist that aren't listed in the index
15
+ #
16
+ # @example
17
+ # validator = IndexValidator.new(index_dir: "tmp/woods")
18
+ # report = validator.validate
19
+ # puts report.errors if !report.valid?
20
+ class IndexValidator
21
+ include Woods::FilenameUtils
22
+
23
+ # Report produced by {#validate}.
24
+ #
25
+ # @!attribute [r] valid?
26
+ # @return [Boolean] true if no errors were found
27
+ # @!attribute [r] warnings
28
+ # @return [Array<String>] non-fatal issues (e.g., stale files)
29
+ # @!attribute [r] errors
30
+ # @return [Array<String>] fatal integrity issues
31
+ ValidationReport = Struct.new(:valid?, :warnings, :errors, keyword_init: true)
32
+
33
+ # @param index_dir [String] Path to the codebase index output directory
34
+ def initialize(index_dir:)
35
+ @index_dir = index_dir
36
+ end
37
+
38
+ # Validate the index directory and return a report.
39
+ #
40
+ # @return [ValidationReport] the validation results
41
+ def validate
42
+ warnings = []
43
+ errors = []
44
+
45
+ unless Dir.exist?(@index_dir)
46
+ errors << "Index directory does not exist: #{@index_dir}"
47
+ return ValidationReport.new(valid?: false, warnings: warnings, errors: errors)
48
+ end
49
+
50
+ type_dirs = Dir.children(@index_dir).filter_map do |name|
51
+ full_path = File.join(@index_dir, name)
52
+ full_path if File.directory?(full_path)
53
+ end
54
+
55
+ type_dirs.each do |type_dir|
56
+ validate_type_directory(type_dir, warnings, errors)
57
+ end
58
+
59
+ ValidationReport.new(valid?: errors.empty?, warnings: warnings, errors: errors)
60
+ end
61
+
62
+ private
63
+
64
+ # Validate a single type directory (e.g., models/, controllers/).
65
+ #
66
+ # @param type_dir [String] Absolute path to the type directory
67
+ # @param warnings [Array<String>] Accumulated warnings
68
+ # @param errors [Array<String>] Accumulated errors
69
+ def validate_type_directory(type_dir, warnings, errors)
70
+ type_name = File.basename(type_dir)
71
+ index_path = File.join(type_dir, '_index.json')
72
+
73
+ unless File.exist?(index_path)
74
+ errors << "Missing _index.json in #{type_name}/"
75
+ return
76
+ end
77
+
78
+ index_entries = JSON.parse(File.read(index_path))
79
+ indexed_identifiers = Set.new
80
+
81
+ index_entries.each do |entry|
82
+ identifier = entry['identifier']
83
+ indexed_identifiers << identifier
84
+ validate_index_entry(type_dir, type_name, identifier, errors)
85
+ end
86
+
87
+ check_stale_files(type_dir, type_name, indexed_identifiers, warnings)
88
+ end
89
+
90
+ # Validate that a single index entry has a corresponding unit file with correct hash.
91
+ #
92
+ # @param type_dir [String] Path to the type directory
93
+ # @param type_name [String] Name of the type (for error messages)
94
+ # @param identifier [String] The unit identifier from the index
95
+ # @param errors [Array<String>] Accumulated errors
96
+ def validate_index_entry(type_dir, type_name, identifier, errors)
97
+ unit_file = find_unit_file(type_dir, identifier)
98
+
99
+ unless unit_file
100
+ errors << "Missing unit file for #{identifier} in #{type_name}/"
101
+ return
102
+ end
103
+
104
+ validate_content_hash(unit_file, identifier, errors)
105
+ end
106
+
107
+ # Find the JSON file for a given identifier in a type directory.
108
+ #
109
+ # @param type_dir [String] Path to the type directory
110
+ # @param identifier [String] The unit identifier
111
+ # @return [String, nil] Path to the unit file, or nil if not found
112
+ def find_unit_file(type_dir, identifier)
113
+ # Try collision-safe first (current format), then legacy safe_filename, then exact match
114
+ candidates = [
115
+ File.join(type_dir, collision_safe_filename(identifier)),
116
+ File.join(type_dir, safe_filename(identifier)),
117
+ File.join(type_dir, "#{identifier}.json")
118
+ ]
119
+
120
+ candidates.find { |path| File.exist?(path) }
121
+ end
122
+
123
+ # Validate that the source_hash in a unit file matches the actual source_code.
124
+ #
125
+ # @param unit_file [String] Path to the unit JSON file
126
+ # @param identifier [String] The unit identifier (for error messages)
127
+ # @param errors [Array<String>] Accumulated errors
128
+ def validate_content_hash(unit_file, identifier, errors)
129
+ data = JSON.parse(File.read(unit_file))
130
+ source_code = data['source_code']
131
+ stored_hash = data['source_hash']
132
+
133
+ return unless source_code && stored_hash
134
+
135
+ expected_hash = Digest::SHA256.hexdigest(source_code)
136
+ return if stored_hash == expected_hash
137
+
138
+ errors << "Content hash mismatch for #{identifier}: expected #{expected_hash[0..7]}..., " \
139
+ "got #{stored_hash[0..7]}..."
140
+ end
141
+
142
+ # Check for unit files that exist on disk but aren't referenced in the index.
143
+ #
144
+ # @param type_dir [String] Path to the type directory
145
+ # @param type_name [String] Name of the type (for warning messages)
146
+ # @param indexed_identifiers [Set<String>] Identifiers listed in the index
147
+ # @param warnings [Array<String>] Accumulated warnings
148
+ def check_stale_files(type_dir, type_name, indexed_identifiers, warnings)
149
+ # Build a set of expected filenames from indexed identifiers (both current and legacy formats)
150
+ expected_filenames = Set.new
151
+ indexed_identifiers.each do |id|
152
+ expected_filenames << collision_safe_filename(id)
153
+ expected_filenames << safe_filename(id)
154
+ expected_filenames << "#{id}.json"
155
+ end
156
+
157
+ Dir[File.join(type_dir, '*.json')].each do |file|
158
+ basename = File.basename(file)
159
+ next if basename == '_index.json'
160
+ next if expected_filenames.include?(basename)
161
+
162
+ warnings << "Stale file not in index: #{type_name}/#{basename}"
163
+ end
164
+ end
165
+ end
166
+ end
167
+ end
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../embedding/provider'
4
+ require_relative 'circuit_breaker'
5
+
6
+ module Woods
7
+ module Resilience
8
+ # Wraps an embedding provider with retry logic and optional circuit breaker.
9
+ #
10
+ # Transparently retries transient failures with exponential backoff.
11
+ # When a circuit breaker is provided, all calls are routed through it,
12
+ # and {CircuitOpenError} is never retried.
13
+ #
14
+ # @example Without circuit breaker
15
+ # retryable = RetryableProvider.new(provider: ollama_provider, max_retries: 3)
16
+ # vector = retryable.embed("some text")
17
+ #
18
+ # @example With circuit breaker
19
+ # breaker = CircuitBreaker.new(threshold: 5, reset_timeout: 60)
20
+ # retryable = RetryableProvider.new(
21
+ # provider: ollama_provider,
22
+ # max_retries: 3,
23
+ # circuit_breaker: breaker
24
+ # )
25
+ # vector = retryable.embed("some text")
26
+ class RetryableProvider
27
+ include Woods::Embedding::Provider::Interface
28
+
29
+ # @param provider [#embed, #embed_batch, #dimensions, #model_name] The underlying embedding provider
30
+ # @param max_retries [Integer] Maximum number of retry attempts
31
+ # @param circuit_breaker [CircuitBreaker, nil] Optional circuit breaker instance
32
+ def initialize(provider:, max_retries: 3, circuit_breaker: nil)
33
+ @provider = provider
34
+ @max_retries = max_retries
35
+ @circuit_breaker = circuit_breaker
36
+ end
37
+
38
+ # Embed a single text string with retry logic.
39
+ #
40
+ # @param text [String] the text to embed
41
+ # @return [Array<Float>] the embedding vector
42
+ # @raise [CircuitOpenError] if the circuit breaker is open
43
+ # @raise [StandardError] if all retries are exhausted
44
+ def embed(text)
45
+ with_retries { call_provider { @provider.embed(text) } }
46
+ end
47
+
48
+ # Embed multiple texts with retry logic.
49
+ #
50
+ # @param texts [Array<String>] the texts to embed
51
+ # @return [Array<Array<Float>>] array of embedding vectors
52
+ # @raise [CircuitOpenError] if the circuit breaker is open
53
+ # @raise [StandardError] if all retries are exhausted
54
+ def embed_batch(texts)
55
+ with_retries { call_provider { @provider.embed_batch(texts) } }
56
+ end
57
+
58
+ # Return the dimensionality of the embedding vectors.
59
+ #
60
+ # @return [Integer] number of dimensions
61
+ def dimensions
62
+ @provider.dimensions
63
+ end
64
+
65
+ # Return the name of the embedding model.
66
+ #
67
+ # @return [String] model name
68
+ def model_name
69
+ @provider.model_name
70
+ end
71
+
72
+ private
73
+
74
+ # Execute a block with retry logic and exponential backoff.
75
+ #
76
+ # @yield The block to execute
77
+ # @return [Object] The return value of the block
78
+ # @raise [CircuitOpenError] immediately without retrying
79
+ # @raise [StandardError] the last error if all retries are exhausted
80
+ def with_retries
81
+ attempt = 0
82
+ begin
83
+ attempt += 1
84
+ yield
85
+ rescue CircuitOpenError
86
+ raise
87
+ rescue StandardError => e
88
+ raise e if attempt > @max_retries
89
+
90
+ sleep((2**attempt) * 0.1)
91
+ retry
92
+ end
93
+ end
94
+
95
+ # Route a call through the circuit breaker if one is configured.
96
+ #
97
+ # @yield The block to execute
98
+ # @return [Object] The return value of the block
99
+ def call_provider(&block)
100
+ if @circuit_breaker
101
+ @circuit_breaker.call(&block)
102
+ else
103
+ block.call
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end