woods 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +89 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +406 -0
- data/exe/woods-console +59 -0
- data/exe/woods-console-mcp +22 -0
- data/exe/woods-mcp +34 -0
- data/exe/woods-mcp-http +37 -0
- data/exe/woods-mcp-start +58 -0
- data/lib/generators/woods/install_generator.rb +32 -0
- data/lib/generators/woods/pgvector_generator.rb +37 -0
- data/lib/generators/woods/templates/add_pgvector_to_woods.rb.erb +15 -0
- data/lib/generators/woods/templates/create_woods_tables.rb.erb +43 -0
- data/lib/tasks/woods.rake +621 -0
- data/lib/tasks/woods_evaluation.rake +115 -0
- data/lib/woods/ast/call_site_extractor.rb +106 -0
- data/lib/woods/ast/method_extractor.rb +71 -0
- data/lib/woods/ast/node.rb +116 -0
- data/lib/woods/ast/parser.rb +614 -0
- data/lib/woods/ast.rb +6 -0
- data/lib/woods/builder.rb +200 -0
- data/lib/woods/cache/cache_middleware.rb +199 -0
- data/lib/woods/cache/cache_store.rb +264 -0
- data/lib/woods/cache/redis_cache_store.rb +116 -0
- data/lib/woods/cache/solid_cache_store.rb +111 -0
- data/lib/woods/chunking/chunk.rb +84 -0
- data/lib/woods/chunking/semantic_chunker.rb +295 -0
- data/lib/woods/console/adapters/cache_adapter.rb +58 -0
- data/lib/woods/console/adapters/good_job_adapter.rb +33 -0
- data/lib/woods/console/adapters/job_adapter.rb +68 -0
- data/lib/woods/console/adapters/sidekiq_adapter.rb +33 -0
- data/lib/woods/console/adapters/solid_queue_adapter.rb +33 -0
- data/lib/woods/console/audit_logger.rb +75 -0
- data/lib/woods/console/bridge.rb +177 -0
- data/lib/woods/console/confirmation.rb +90 -0
- data/lib/woods/console/connection_manager.rb +173 -0
- data/lib/woods/console/console_response_renderer.rb +74 -0
- data/lib/woods/console/embedded_executor.rb +373 -0
- data/lib/woods/console/model_validator.rb +81 -0
- data/lib/woods/console/rack_middleware.rb +87 -0
- data/lib/woods/console/safe_context.rb +82 -0
- data/lib/woods/console/server.rb +612 -0
- data/lib/woods/console/sql_validator.rb +172 -0
- data/lib/woods/console/tools/tier1.rb +118 -0
- data/lib/woods/console/tools/tier2.rb +117 -0
- data/lib/woods/console/tools/tier3.rb +110 -0
- data/lib/woods/console/tools/tier4.rb +79 -0
- data/lib/woods/coordination/pipeline_lock.rb +109 -0
- data/lib/woods/cost_model/embedding_cost.rb +88 -0
- data/lib/woods/cost_model/estimator.rb +128 -0
- data/lib/woods/cost_model/provider_pricing.rb +67 -0
- data/lib/woods/cost_model/storage_cost.rb +52 -0
- data/lib/woods/cost_model.rb +22 -0
- data/lib/woods/db/migrations/001_create_units.rb +38 -0
- data/lib/woods/db/migrations/002_create_edges.rb +35 -0
- data/lib/woods/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/woods/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/woods/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/woods/db/migrations/006_rename_tables.rb +34 -0
- data/lib/woods/db/migrator.rb +73 -0
- data/lib/woods/db/schema_version.rb +73 -0
- data/lib/woods/dependency_graph.rb +236 -0
- data/lib/woods/embedding/indexer.rb +140 -0
- data/lib/woods/embedding/openai.rb +126 -0
- data/lib/woods/embedding/provider.rb +162 -0
- data/lib/woods/embedding/text_preparer.rb +112 -0
- data/lib/woods/evaluation/baseline_runner.rb +115 -0
- data/lib/woods/evaluation/evaluator.rb +139 -0
- data/lib/woods/evaluation/metrics.rb +79 -0
- data/lib/woods/evaluation/query_set.rb +148 -0
- data/lib/woods/evaluation/report_generator.rb +90 -0
- data/lib/woods/extracted_unit.rb +145 -0
- data/lib/woods/extractor.rb +1028 -0
- data/lib/woods/extractors/action_cable_extractor.rb +201 -0
- data/lib/woods/extractors/ast_source_extraction.rb +46 -0
- data/lib/woods/extractors/behavioral_profile.rb +309 -0
- data/lib/woods/extractors/caching_extractor.rb +261 -0
- data/lib/woods/extractors/callback_analyzer.rb +246 -0
- data/lib/woods/extractors/concern_extractor.rb +292 -0
- data/lib/woods/extractors/configuration_extractor.rb +219 -0
- data/lib/woods/extractors/controller_extractor.rb +404 -0
- data/lib/woods/extractors/database_view_extractor.rb +278 -0
- data/lib/woods/extractors/decorator_extractor.rb +253 -0
- data/lib/woods/extractors/engine_extractor.rb +223 -0
- data/lib/woods/extractors/event_extractor.rb +211 -0
- data/lib/woods/extractors/factory_extractor.rb +289 -0
- data/lib/woods/extractors/graphql_extractor.rb +892 -0
- data/lib/woods/extractors/i18n_extractor.rb +117 -0
- data/lib/woods/extractors/job_extractor.rb +374 -0
- data/lib/woods/extractors/lib_extractor.rb +218 -0
- data/lib/woods/extractors/mailer_extractor.rb +269 -0
- data/lib/woods/extractors/manager_extractor.rb +188 -0
- data/lib/woods/extractors/middleware_extractor.rb +133 -0
- data/lib/woods/extractors/migration_extractor.rb +469 -0
- data/lib/woods/extractors/model_extractor.rb +988 -0
- data/lib/woods/extractors/phlex_extractor.rb +252 -0
- data/lib/woods/extractors/policy_extractor.rb +191 -0
- data/lib/woods/extractors/poro_extractor.rb +229 -0
- data/lib/woods/extractors/pundit_extractor.rb +223 -0
- data/lib/woods/extractors/rails_source_extractor.rb +473 -0
- data/lib/woods/extractors/rake_task_extractor.rb +343 -0
- data/lib/woods/extractors/route_extractor.rb +181 -0
- data/lib/woods/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/woods/extractors/serializer_extractor.rb +339 -0
- data/lib/woods/extractors/service_extractor.rb +217 -0
- data/lib/woods/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/woods/extractors/shared_utility_methods.rb +281 -0
- data/lib/woods/extractors/state_machine_extractor.rb +398 -0
- data/lib/woods/extractors/test_mapping_extractor.rb +225 -0
- data/lib/woods/extractors/validator_extractor.rb +211 -0
- data/lib/woods/extractors/view_component_extractor.rb +311 -0
- data/lib/woods/extractors/view_template_extractor.rb +261 -0
- data/lib/woods/feedback/gap_detector.rb +89 -0
- data/lib/woods/feedback/store.rb +119 -0
- data/lib/woods/filename_utils.rb +32 -0
- data/lib/woods/flow_analysis/operation_extractor.rb +206 -0
- data/lib/woods/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/woods/flow_assembler.rb +290 -0
- data/lib/woods/flow_document.rb +191 -0
- data/lib/woods/flow_precomputer.rb +102 -0
- data/lib/woods/formatting/base.rb +30 -0
- data/lib/woods/formatting/claude_adapter.rb +98 -0
- data/lib/woods/formatting/generic_adapter.rb +56 -0
- data/lib/woods/formatting/gpt_adapter.rb +64 -0
- data/lib/woods/formatting/human_adapter.rb +78 -0
- data/lib/woods/graph_analyzer.rb +374 -0
- data/lib/woods/mcp/bootstrapper.rb +96 -0
- data/lib/woods/mcp/index_reader.rb +394 -0
- data/lib/woods/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/woods/mcp/renderers/json_renderer.rb +17 -0
- data/lib/woods/mcp/renderers/markdown_renderer.rb +353 -0
- data/lib/woods/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/woods/mcp/server.rb +962 -0
- data/lib/woods/mcp/tool_response_renderer.rb +85 -0
- data/lib/woods/model_name_cache.rb +51 -0
- data/lib/woods/notion/client.rb +217 -0
- data/lib/woods/notion/exporter.rb +219 -0
- data/lib/woods/notion/mapper.rb +40 -0
- data/lib/woods/notion/mappers/column_mapper.rb +57 -0
- data/lib/woods/notion/mappers/migration_mapper.rb +39 -0
- data/lib/woods/notion/mappers/model_mapper.rb +161 -0
- data/lib/woods/notion/mappers/shared.rb +22 -0
- data/lib/woods/notion/rate_limiter.rb +68 -0
- data/lib/woods/observability/health_check.rb +79 -0
- data/lib/woods/observability/instrumentation.rb +34 -0
- data/lib/woods/observability/structured_logger.rb +57 -0
- data/lib/woods/operator/error_escalator.rb +81 -0
- data/lib/woods/operator/pipeline_guard.rb +92 -0
- data/lib/woods/operator/status_reporter.rb +80 -0
- data/lib/woods/railtie.rb +38 -0
- data/lib/woods/resilience/circuit_breaker.rb +99 -0
- data/lib/woods/resilience/index_validator.rb +167 -0
- data/lib/woods/resilience/retryable_provider.rb +108 -0
- data/lib/woods/retrieval/context_assembler.rb +261 -0
- data/lib/woods/retrieval/query_classifier.rb +133 -0
- data/lib/woods/retrieval/ranker.rb +277 -0
- data/lib/woods/retrieval/search_executor.rb +316 -0
- data/lib/woods/retriever.rb +152 -0
- data/lib/woods/ruby_analyzer/class_analyzer.rb +170 -0
- data/lib/woods/ruby_analyzer/dataflow_analyzer.rb +77 -0
- data/lib/woods/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/woods/ruby_analyzer/mermaid_renderer.rb +280 -0
- data/lib/woods/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/woods/ruby_analyzer/trace_enricher.rb +143 -0
- data/lib/woods/ruby_analyzer.rb +87 -0
- data/lib/woods/session_tracer/file_store.rb +104 -0
- data/lib/woods/session_tracer/middleware.rb +143 -0
- data/lib/woods/session_tracer/redis_store.rb +106 -0
- data/lib/woods/session_tracer/session_flow_assembler.rb +254 -0
- data/lib/woods/session_tracer/session_flow_document.rb +223 -0
- data/lib/woods/session_tracer/solid_cache_store.rb +139 -0
- data/lib/woods/session_tracer/store.rb +81 -0
- data/lib/woods/storage/graph_store.rb +120 -0
- data/lib/woods/storage/metadata_store.rb +196 -0
- data/lib/woods/storage/pgvector.rb +195 -0
- data/lib/woods/storage/qdrant.rb +205 -0
- data/lib/woods/storage/vector_store.rb +167 -0
- data/lib/woods/temporal/json_snapshot_store.rb +245 -0
- data/lib/woods/temporal/snapshot_store.rb +345 -0
- data/lib/woods/token_utils.rb +19 -0
- data/lib/woods/version.rb +5 -0
- data/lib/woods.rb +246 -0
- metadata +270 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'time'
|
|
5
|
+
|
|
6
|
+
module Woods
|
|
7
|
+
module Operator
|
|
8
|
+
# Reports pipeline status by reading extraction output metadata.
|
|
9
|
+
#
|
|
10
|
+
# @example
|
|
11
|
+
# reporter = StatusReporter.new(output_dir: 'tmp/woods')
|
|
12
|
+
# status = reporter.report
|
|
13
|
+
# status[:status] # => :ok
|
|
14
|
+
# status[:staleness_seconds] # => 3600
|
|
15
|
+
#
|
|
16
|
+
class StatusReporter
|
|
17
|
+
STALE_THRESHOLD = 86_400 # 24 hours
|
|
18
|
+
|
|
19
|
+
# @param output_dir [String] Path to extraction output directory
|
|
20
|
+
def initialize(output_dir:)
|
|
21
|
+
@output_dir = output_dir
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Generate a pipeline status report.
|
|
25
|
+
#
|
|
26
|
+
# @return [Hash] Status report with :status, :extracted_at, :total_units, :counts, :staleness_seconds
|
|
27
|
+
def report
|
|
28
|
+
manifest = read_manifest
|
|
29
|
+
return not_extracted_report if manifest.nil?
|
|
30
|
+
|
|
31
|
+
staleness = compute_staleness(manifest['extracted_at'])
|
|
32
|
+
|
|
33
|
+
{
|
|
34
|
+
status: staleness < STALE_THRESHOLD ? :ok : :stale,
|
|
35
|
+
extracted_at: manifest['extracted_at'],
|
|
36
|
+
total_units: manifest['total_units'] || 0,
|
|
37
|
+
counts: manifest['counts'] || {},
|
|
38
|
+
git_sha: manifest['git_sha'],
|
|
39
|
+
git_branch: manifest['git_branch'],
|
|
40
|
+
staleness_seconds: staleness
|
|
41
|
+
}
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
# @return [Hash, nil]
|
|
47
|
+
def read_manifest
|
|
48
|
+
path = File.join(@output_dir, 'manifest.json')
|
|
49
|
+
return nil unless File.exist?(path)
|
|
50
|
+
|
|
51
|
+
JSON.parse(File.read(path))
|
|
52
|
+
rescue JSON::ParserError
|
|
53
|
+
nil
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# @return [Hash]
|
|
57
|
+
def not_extracted_report
|
|
58
|
+
{
|
|
59
|
+
status: :not_extracted,
|
|
60
|
+
extracted_at: nil,
|
|
61
|
+
total_units: 0,
|
|
62
|
+
counts: {},
|
|
63
|
+
git_sha: nil,
|
|
64
|
+
git_branch: nil,
|
|
65
|
+
staleness_seconds: nil
|
|
66
|
+
}
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# @param extracted_at [String, nil] ISO8601 timestamp
|
|
70
|
+
# @return [Numeric]
|
|
71
|
+
def compute_staleness(extracted_at)
|
|
72
|
+
return Float::INFINITY if extracted_at.nil?
|
|
73
|
+
|
|
74
|
+
Time.now - Time.parse(extracted_at)
|
|
75
|
+
rescue ArgumentError
|
|
76
|
+
Float::INFINITY
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Woods
|
|
4
|
+
# Railtie integrates Woods into Rails applications.
|
|
5
|
+
# Loads rake tasks automatically when the gem is bundled.
|
|
6
|
+
# Conditionally inserts session tracer middleware when enabled.
|
|
7
|
+
class Railtie < Rails::Railtie
|
|
8
|
+
rake_tasks do
|
|
9
|
+
load File.expand_path('../tasks/woods.rake', __dir__)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
initializer 'woods.session_tracer' do |app|
|
|
13
|
+
config = Woods.configuration
|
|
14
|
+
if config.session_tracer_enabled
|
|
15
|
+
require 'woods/session_tracer/middleware'
|
|
16
|
+
|
|
17
|
+
app.middleware.use(
|
|
18
|
+
Woods::SessionTracer::Middleware,
|
|
19
|
+
store: config.session_store,
|
|
20
|
+
session_id_proc: config.session_id_proc,
|
|
21
|
+
exclude_paths: config.session_exclude_paths
|
|
22
|
+
)
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
initializer 'woods.console_mcp' do |app|
|
|
27
|
+
config = Woods.configuration
|
|
28
|
+
if config.console_mcp_enabled
|
|
29
|
+
require 'woods/console/rack_middleware'
|
|
30
|
+
|
|
31
|
+
app.middleware.use(
|
|
32
|
+
Woods::Console::RackMiddleware,
|
|
33
|
+
path: config.console_mcp_path
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Woods
|
|
4
|
+
module Resilience
|
|
5
|
+
# Raised when the circuit breaker is open and calls are being rejected.
|
|
6
|
+
#
|
|
7
|
+
# @example Handling a circuit open condition
|
|
8
|
+
# begin
|
|
9
|
+
# breaker.call { provider.embed(text) }
|
|
10
|
+
# rescue CircuitOpenError => e
|
|
11
|
+
# use_cached_result(text)
|
|
12
|
+
# end
|
|
13
|
+
class CircuitOpenError < Woods::Error; end
|
|
14
|
+
|
|
15
|
+
# Circuit breaker pattern for protecting external service calls.
|
|
16
|
+
#
|
|
17
|
+
# Tracks failures and transitions between three states:
|
|
18
|
+
# - **:closed** — normal operation, calls pass through
|
|
19
|
+
# - **:open** — too many failures, calls are rejected immediately
|
|
20
|
+
# - **:half_open** — testing recovery, one call is allowed through
|
|
21
|
+
#
|
|
22
|
+
# @example Basic usage
|
|
23
|
+
# breaker = CircuitBreaker.new(threshold: 5, reset_timeout: 60)
|
|
24
|
+
# result = breaker.call { external_service.request }
|
|
25
|
+
#
|
|
26
|
+
# @example With retry logic
|
|
27
|
+
# breaker = CircuitBreaker.new(threshold: 3, reset_timeout: 30)
|
|
28
|
+
# begin
|
|
29
|
+
# breaker.call { api.embed(text) }
|
|
30
|
+
# rescue CircuitOpenError
|
|
31
|
+
# # Service is down, use fallback
|
|
32
|
+
# end
|
|
33
|
+
class CircuitBreaker
|
|
34
|
+
# @return [Symbol] Current state — :closed, :open, or :half_open
|
|
35
|
+
attr_reader :state
|
|
36
|
+
|
|
37
|
+
# @param threshold [Integer] Number of consecutive failures before opening the circuit
|
|
38
|
+
# @param reset_timeout [Numeric] Seconds to wait before transitioning from open to half_open
|
|
39
|
+
def initialize(threshold: 5, reset_timeout: 60)
|
|
40
|
+
@threshold = threshold
|
|
41
|
+
@reset_timeout = reset_timeout
|
|
42
|
+
@state = :closed
|
|
43
|
+
@failure_count = 0
|
|
44
|
+
@last_failure_time = nil
|
|
45
|
+
@mutex = Mutex.new
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Execute a block through the circuit breaker.
|
|
49
|
+
#
|
|
50
|
+
# @yield The block to execute
|
|
51
|
+
# @return [Object] The return value of the block
|
|
52
|
+
# @raise [CircuitOpenError] if the circuit is open and the timeout has not elapsed
|
|
53
|
+
# @raise [StandardError] re-raises any error from the block
|
|
54
|
+
def call(&block)
|
|
55
|
+
# Phase 1: Check state under mutex
|
|
56
|
+
@mutex.synchronize do
|
|
57
|
+
case @state
|
|
58
|
+
when :open
|
|
59
|
+
unless Time.now - @last_failure_time >= @reset_timeout
|
|
60
|
+
raise CircuitOpenError, "Circuit breaker is open (#{@failure_count} failures)"
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
@state = :half_open
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Phase 2: Execute outside mutex
|
|
68
|
+
result = block.call
|
|
69
|
+
|
|
70
|
+
# Phase 3: Record success under mutex
|
|
71
|
+
@mutex.synchronize { reset! }
|
|
72
|
+
|
|
73
|
+
result
|
|
74
|
+
rescue CircuitOpenError
|
|
75
|
+
raise
|
|
76
|
+
rescue StandardError => e
|
|
77
|
+
# Phase 4: Record failure under mutex
|
|
78
|
+
@mutex.synchronize { record_failure }
|
|
79
|
+
raise e
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
private
|
|
83
|
+
|
|
84
|
+
# Record a failure and potentially open the circuit.
|
|
85
|
+
def record_failure
|
|
86
|
+
@failure_count += 1
|
|
87
|
+
@last_failure_time = Time.now
|
|
88
|
+
@state = :open if @failure_count >= @threshold
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Reset the circuit breaker to closed state with zero failures.
|
|
92
|
+
def reset!
|
|
93
|
+
@state = :closed
|
|
94
|
+
@failure_count = 0
|
|
95
|
+
@last_failure_time = nil
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require_relative '../filename_utils'
|
|
5
|
+
|
|
6
|
+
module Woods
|
|
7
|
+
module Resilience
|
|
8
|
+
# Validates the integrity of a codebase index output directory.
|
|
9
|
+
#
|
|
10
|
+
# Checks that:
|
|
11
|
+
# - Each type directory has a valid `_index.json`
|
|
12
|
+
# - All files referenced in the index exist on disk
|
|
13
|
+
# - Content hashes (source_hash) match the actual source_code
|
|
14
|
+
# - No stale unit files exist that aren't listed in the index
|
|
15
|
+
#
|
|
16
|
+
# @example
|
|
17
|
+
# validator = IndexValidator.new(index_dir: "tmp/woods")
|
|
18
|
+
# report = validator.validate
|
|
19
|
+
# puts report.errors if !report.valid?
|
|
20
|
+
class IndexValidator
|
|
21
|
+
include Woods::FilenameUtils
|
|
22
|
+
|
|
23
|
+
# Report produced by {#validate}.
|
|
24
|
+
#
|
|
25
|
+
# @!attribute [r] valid?
|
|
26
|
+
# @return [Boolean] true if no errors were found
|
|
27
|
+
# @!attribute [r] warnings
|
|
28
|
+
# @return [Array<String>] non-fatal issues (e.g., stale files)
|
|
29
|
+
# @!attribute [r] errors
|
|
30
|
+
# @return [Array<String>] fatal integrity issues
|
|
31
|
+
ValidationReport = Struct.new(:valid?, :warnings, :errors, keyword_init: true)
|
|
32
|
+
|
|
33
|
+
# @param index_dir [String] Path to the codebase index output directory
|
|
34
|
+
def initialize(index_dir:)
|
|
35
|
+
@index_dir = index_dir
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Validate the index directory and return a report.
|
|
39
|
+
#
|
|
40
|
+
# @return [ValidationReport] the validation results
|
|
41
|
+
def validate
|
|
42
|
+
warnings = []
|
|
43
|
+
errors = []
|
|
44
|
+
|
|
45
|
+
unless Dir.exist?(@index_dir)
|
|
46
|
+
errors << "Index directory does not exist: #{@index_dir}"
|
|
47
|
+
return ValidationReport.new(valid?: false, warnings: warnings, errors: errors)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
type_dirs = Dir.children(@index_dir).filter_map do |name|
|
|
51
|
+
full_path = File.join(@index_dir, name)
|
|
52
|
+
full_path if File.directory?(full_path)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
type_dirs.each do |type_dir|
|
|
56
|
+
validate_type_directory(type_dir, warnings, errors)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
ValidationReport.new(valid?: errors.empty?, warnings: warnings, errors: errors)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
# Validate a single type directory (e.g., models/, controllers/).
|
|
65
|
+
#
|
|
66
|
+
# @param type_dir [String] Absolute path to the type directory
|
|
67
|
+
# @param warnings [Array<String>] Accumulated warnings
|
|
68
|
+
# @param errors [Array<String>] Accumulated errors
|
|
69
|
+
def validate_type_directory(type_dir, warnings, errors)
|
|
70
|
+
type_name = File.basename(type_dir)
|
|
71
|
+
index_path = File.join(type_dir, '_index.json')
|
|
72
|
+
|
|
73
|
+
unless File.exist?(index_path)
|
|
74
|
+
errors << "Missing _index.json in #{type_name}/"
|
|
75
|
+
return
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
index_entries = JSON.parse(File.read(index_path))
|
|
79
|
+
indexed_identifiers = Set.new
|
|
80
|
+
|
|
81
|
+
index_entries.each do |entry|
|
|
82
|
+
identifier = entry['identifier']
|
|
83
|
+
indexed_identifiers << identifier
|
|
84
|
+
validate_index_entry(type_dir, type_name, identifier, errors)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
check_stale_files(type_dir, type_name, indexed_identifiers, warnings)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Validate that a single index entry has a corresponding unit file with correct hash.
|
|
91
|
+
#
|
|
92
|
+
# @param type_dir [String] Path to the type directory
|
|
93
|
+
# @param type_name [String] Name of the type (for error messages)
|
|
94
|
+
# @param identifier [String] The unit identifier from the index
|
|
95
|
+
# @param errors [Array<String>] Accumulated errors
|
|
96
|
+
def validate_index_entry(type_dir, type_name, identifier, errors)
|
|
97
|
+
unit_file = find_unit_file(type_dir, identifier)
|
|
98
|
+
|
|
99
|
+
unless unit_file
|
|
100
|
+
errors << "Missing unit file for #{identifier} in #{type_name}/"
|
|
101
|
+
return
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
validate_content_hash(unit_file, identifier, errors)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Find the JSON file for a given identifier in a type directory.
|
|
108
|
+
#
|
|
109
|
+
# @param type_dir [String] Path to the type directory
|
|
110
|
+
# @param identifier [String] The unit identifier
|
|
111
|
+
# @return [String, nil] Path to the unit file, or nil if not found
|
|
112
|
+
def find_unit_file(type_dir, identifier)
|
|
113
|
+
# Try collision-safe first (current format), then legacy safe_filename, then exact match
|
|
114
|
+
candidates = [
|
|
115
|
+
File.join(type_dir, collision_safe_filename(identifier)),
|
|
116
|
+
File.join(type_dir, safe_filename(identifier)),
|
|
117
|
+
File.join(type_dir, "#{identifier}.json")
|
|
118
|
+
]
|
|
119
|
+
|
|
120
|
+
candidates.find { |path| File.exist?(path) }
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Validate that the source_hash in a unit file matches the actual source_code.
|
|
124
|
+
#
|
|
125
|
+
# @param unit_file [String] Path to the unit JSON file
|
|
126
|
+
# @param identifier [String] The unit identifier (for error messages)
|
|
127
|
+
# @param errors [Array<String>] Accumulated errors
|
|
128
|
+
def validate_content_hash(unit_file, identifier, errors)
|
|
129
|
+
data = JSON.parse(File.read(unit_file))
|
|
130
|
+
source_code = data['source_code']
|
|
131
|
+
stored_hash = data['source_hash']
|
|
132
|
+
|
|
133
|
+
return unless source_code && stored_hash
|
|
134
|
+
|
|
135
|
+
expected_hash = Digest::SHA256.hexdigest(source_code)
|
|
136
|
+
return if stored_hash == expected_hash
|
|
137
|
+
|
|
138
|
+
errors << "Content hash mismatch for #{identifier}: expected #{expected_hash[0..7]}..., " \
|
|
139
|
+
"got #{stored_hash[0..7]}..."
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Check for unit files that exist on disk but aren't referenced in the index.
|
|
143
|
+
#
|
|
144
|
+
# @param type_dir [String] Path to the type directory
|
|
145
|
+
# @param type_name [String] Name of the type (for warning messages)
|
|
146
|
+
# @param indexed_identifiers [Set<String>] Identifiers listed in the index
|
|
147
|
+
# @param warnings [Array<String>] Accumulated warnings
|
|
148
|
+
def check_stale_files(type_dir, type_name, indexed_identifiers, warnings)
|
|
149
|
+
# Build a set of expected filenames from indexed identifiers (both current and legacy formats)
|
|
150
|
+
expected_filenames = Set.new
|
|
151
|
+
indexed_identifiers.each do |id|
|
|
152
|
+
expected_filenames << collision_safe_filename(id)
|
|
153
|
+
expected_filenames << safe_filename(id)
|
|
154
|
+
expected_filenames << "#{id}.json"
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
Dir[File.join(type_dir, '*.json')].each do |file|
|
|
158
|
+
basename = File.basename(file)
|
|
159
|
+
next if basename == '_index.json'
|
|
160
|
+
next if expected_filenames.include?(basename)
|
|
161
|
+
|
|
162
|
+
warnings << "Stale file not in index: #{type_name}/#{basename}"
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../embedding/provider'
|
|
4
|
+
require_relative 'circuit_breaker'
|
|
5
|
+
|
|
6
|
+
module Woods
|
|
7
|
+
module Resilience
|
|
8
|
+
# Wraps an embedding provider with retry logic and optional circuit breaker.
|
|
9
|
+
#
|
|
10
|
+
# Transparently retries transient failures with exponential backoff.
|
|
11
|
+
# When a circuit breaker is provided, all calls are routed through it,
|
|
12
|
+
# and {CircuitOpenError} is never retried.
|
|
13
|
+
#
|
|
14
|
+
# @example Without circuit breaker
|
|
15
|
+
# retryable = RetryableProvider.new(provider: ollama_provider, max_retries: 3)
|
|
16
|
+
# vector = retryable.embed("some text")
|
|
17
|
+
#
|
|
18
|
+
# @example With circuit breaker
|
|
19
|
+
# breaker = CircuitBreaker.new(threshold: 5, reset_timeout: 60)
|
|
20
|
+
# retryable = RetryableProvider.new(
|
|
21
|
+
# provider: ollama_provider,
|
|
22
|
+
# max_retries: 3,
|
|
23
|
+
# circuit_breaker: breaker
|
|
24
|
+
# )
|
|
25
|
+
# vector = retryable.embed("some text")
|
|
26
|
+
class RetryableProvider
|
|
27
|
+
include Woods::Embedding::Provider::Interface
|
|
28
|
+
|
|
29
|
+
# @param provider [#embed, #embed_batch, #dimensions, #model_name] The underlying embedding provider
|
|
30
|
+
# @param max_retries [Integer] Maximum number of retry attempts
|
|
31
|
+
# @param circuit_breaker [CircuitBreaker, nil] Optional circuit breaker instance
|
|
32
|
+
def initialize(provider:, max_retries: 3, circuit_breaker: nil)
|
|
33
|
+
@provider = provider
|
|
34
|
+
@max_retries = max_retries
|
|
35
|
+
@circuit_breaker = circuit_breaker
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Embed a single text string with retry logic.
|
|
39
|
+
#
|
|
40
|
+
# @param text [String] the text to embed
|
|
41
|
+
# @return [Array<Float>] the embedding vector
|
|
42
|
+
# @raise [CircuitOpenError] if the circuit breaker is open
|
|
43
|
+
# @raise [StandardError] if all retries are exhausted
|
|
44
|
+
def embed(text)
|
|
45
|
+
with_retries { call_provider { @provider.embed(text) } }
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Embed multiple texts with retry logic.
|
|
49
|
+
#
|
|
50
|
+
# @param texts [Array<String>] the texts to embed
|
|
51
|
+
# @return [Array<Array<Float>>] array of embedding vectors
|
|
52
|
+
# @raise [CircuitOpenError] if the circuit breaker is open
|
|
53
|
+
# @raise [StandardError] if all retries are exhausted
|
|
54
|
+
def embed_batch(texts)
|
|
55
|
+
with_retries { call_provider { @provider.embed_batch(texts) } }
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Return the dimensionality of the embedding vectors.
|
|
59
|
+
#
|
|
60
|
+
# @return [Integer] number of dimensions
|
|
61
|
+
def dimensions
|
|
62
|
+
@provider.dimensions
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Return the name of the embedding model.
|
|
66
|
+
#
|
|
67
|
+
# @return [String] model name
|
|
68
|
+
def model_name
|
|
69
|
+
@provider.model_name
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
private
|
|
73
|
+
|
|
74
|
+
# Execute a block with retry logic and exponential backoff.
|
|
75
|
+
#
|
|
76
|
+
# @yield The block to execute
|
|
77
|
+
# @return [Object] The return value of the block
|
|
78
|
+
# @raise [CircuitOpenError] immediately without retrying
|
|
79
|
+
# @raise [StandardError] the last error if all retries are exhausted
|
|
80
|
+
def with_retries
|
|
81
|
+
attempt = 0
|
|
82
|
+
begin
|
|
83
|
+
attempt += 1
|
|
84
|
+
yield
|
|
85
|
+
rescue CircuitOpenError
|
|
86
|
+
raise
|
|
87
|
+
rescue StandardError => e
|
|
88
|
+
raise e if attempt > @max_retries
|
|
89
|
+
|
|
90
|
+
sleep((2**attempt) * 0.1)
|
|
91
|
+
retry
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Route a call through the circuit breaker if one is configured.
|
|
96
|
+
#
|
|
97
|
+
# @yield The block to execute
|
|
98
|
+
# @return [Object] The return value of the block
|
|
99
|
+
def call_provider(&block)
|
|
100
|
+
if @circuit_breaker
|
|
101
|
+
@circuit_breaker.call(&block)
|
|
102
|
+
else
|
|
103
|
+
block.call
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|