woods 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +89 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +406 -0
  7. data/exe/woods-console +59 -0
  8. data/exe/woods-console-mcp +22 -0
  9. data/exe/woods-mcp +34 -0
  10. data/exe/woods-mcp-http +37 -0
  11. data/exe/woods-mcp-start +58 -0
  12. data/lib/generators/woods/install_generator.rb +32 -0
  13. data/lib/generators/woods/pgvector_generator.rb +37 -0
  14. data/lib/generators/woods/templates/add_pgvector_to_woods.rb.erb +15 -0
  15. data/lib/generators/woods/templates/create_woods_tables.rb.erb +43 -0
  16. data/lib/tasks/woods.rake +621 -0
  17. data/lib/tasks/woods_evaluation.rake +115 -0
  18. data/lib/woods/ast/call_site_extractor.rb +106 -0
  19. data/lib/woods/ast/method_extractor.rb +71 -0
  20. data/lib/woods/ast/node.rb +116 -0
  21. data/lib/woods/ast/parser.rb +614 -0
  22. data/lib/woods/ast.rb +6 -0
  23. data/lib/woods/builder.rb +200 -0
  24. data/lib/woods/cache/cache_middleware.rb +199 -0
  25. data/lib/woods/cache/cache_store.rb +264 -0
  26. data/lib/woods/cache/redis_cache_store.rb +116 -0
  27. data/lib/woods/cache/solid_cache_store.rb +111 -0
  28. data/lib/woods/chunking/chunk.rb +84 -0
  29. data/lib/woods/chunking/semantic_chunker.rb +295 -0
  30. data/lib/woods/console/adapters/cache_adapter.rb +58 -0
  31. data/lib/woods/console/adapters/good_job_adapter.rb +33 -0
  32. data/lib/woods/console/adapters/job_adapter.rb +68 -0
  33. data/lib/woods/console/adapters/sidekiq_adapter.rb +33 -0
  34. data/lib/woods/console/adapters/solid_queue_adapter.rb +33 -0
  35. data/lib/woods/console/audit_logger.rb +75 -0
  36. data/lib/woods/console/bridge.rb +177 -0
  37. data/lib/woods/console/confirmation.rb +90 -0
  38. data/lib/woods/console/connection_manager.rb +173 -0
  39. data/lib/woods/console/console_response_renderer.rb +74 -0
  40. data/lib/woods/console/embedded_executor.rb +373 -0
  41. data/lib/woods/console/model_validator.rb +81 -0
  42. data/lib/woods/console/rack_middleware.rb +87 -0
  43. data/lib/woods/console/safe_context.rb +82 -0
  44. data/lib/woods/console/server.rb +612 -0
  45. data/lib/woods/console/sql_validator.rb +172 -0
  46. data/lib/woods/console/tools/tier1.rb +118 -0
  47. data/lib/woods/console/tools/tier2.rb +117 -0
  48. data/lib/woods/console/tools/tier3.rb +110 -0
  49. data/lib/woods/console/tools/tier4.rb +79 -0
  50. data/lib/woods/coordination/pipeline_lock.rb +109 -0
  51. data/lib/woods/cost_model/embedding_cost.rb +88 -0
  52. data/lib/woods/cost_model/estimator.rb +128 -0
  53. data/lib/woods/cost_model/provider_pricing.rb +67 -0
  54. data/lib/woods/cost_model/storage_cost.rb +52 -0
  55. data/lib/woods/cost_model.rb +22 -0
  56. data/lib/woods/db/migrations/001_create_units.rb +38 -0
  57. data/lib/woods/db/migrations/002_create_edges.rb +35 -0
  58. data/lib/woods/db/migrations/003_create_embeddings.rb +37 -0
  59. data/lib/woods/db/migrations/004_create_snapshots.rb +45 -0
  60. data/lib/woods/db/migrations/005_create_snapshot_units.rb +40 -0
  61. data/lib/woods/db/migrations/006_rename_tables.rb +34 -0
  62. data/lib/woods/db/migrator.rb +73 -0
  63. data/lib/woods/db/schema_version.rb +73 -0
  64. data/lib/woods/dependency_graph.rb +236 -0
  65. data/lib/woods/embedding/indexer.rb +140 -0
  66. data/lib/woods/embedding/openai.rb +126 -0
  67. data/lib/woods/embedding/provider.rb +162 -0
  68. data/lib/woods/embedding/text_preparer.rb +112 -0
  69. data/lib/woods/evaluation/baseline_runner.rb +115 -0
  70. data/lib/woods/evaluation/evaluator.rb +139 -0
  71. data/lib/woods/evaluation/metrics.rb +79 -0
  72. data/lib/woods/evaluation/query_set.rb +148 -0
  73. data/lib/woods/evaluation/report_generator.rb +90 -0
  74. data/lib/woods/extracted_unit.rb +145 -0
  75. data/lib/woods/extractor.rb +1028 -0
  76. data/lib/woods/extractors/action_cable_extractor.rb +201 -0
  77. data/lib/woods/extractors/ast_source_extraction.rb +46 -0
  78. data/lib/woods/extractors/behavioral_profile.rb +309 -0
  79. data/lib/woods/extractors/caching_extractor.rb +261 -0
  80. data/lib/woods/extractors/callback_analyzer.rb +246 -0
  81. data/lib/woods/extractors/concern_extractor.rb +292 -0
  82. data/lib/woods/extractors/configuration_extractor.rb +219 -0
  83. data/lib/woods/extractors/controller_extractor.rb +404 -0
  84. data/lib/woods/extractors/database_view_extractor.rb +278 -0
  85. data/lib/woods/extractors/decorator_extractor.rb +253 -0
  86. data/lib/woods/extractors/engine_extractor.rb +223 -0
  87. data/lib/woods/extractors/event_extractor.rb +211 -0
  88. data/lib/woods/extractors/factory_extractor.rb +289 -0
  89. data/lib/woods/extractors/graphql_extractor.rb +892 -0
  90. data/lib/woods/extractors/i18n_extractor.rb +117 -0
  91. data/lib/woods/extractors/job_extractor.rb +374 -0
  92. data/lib/woods/extractors/lib_extractor.rb +218 -0
  93. data/lib/woods/extractors/mailer_extractor.rb +269 -0
  94. data/lib/woods/extractors/manager_extractor.rb +188 -0
  95. data/lib/woods/extractors/middleware_extractor.rb +133 -0
  96. data/lib/woods/extractors/migration_extractor.rb +469 -0
  97. data/lib/woods/extractors/model_extractor.rb +988 -0
  98. data/lib/woods/extractors/phlex_extractor.rb +252 -0
  99. data/lib/woods/extractors/policy_extractor.rb +191 -0
  100. data/lib/woods/extractors/poro_extractor.rb +229 -0
  101. data/lib/woods/extractors/pundit_extractor.rb +223 -0
  102. data/lib/woods/extractors/rails_source_extractor.rb +473 -0
  103. data/lib/woods/extractors/rake_task_extractor.rb +343 -0
  104. data/lib/woods/extractors/route_extractor.rb +181 -0
  105. data/lib/woods/extractors/scheduled_job_extractor.rb +331 -0
  106. data/lib/woods/extractors/serializer_extractor.rb +339 -0
  107. data/lib/woods/extractors/service_extractor.rb +217 -0
  108. data/lib/woods/extractors/shared_dependency_scanner.rb +91 -0
  109. data/lib/woods/extractors/shared_utility_methods.rb +281 -0
  110. data/lib/woods/extractors/state_machine_extractor.rb +398 -0
  111. data/lib/woods/extractors/test_mapping_extractor.rb +225 -0
  112. data/lib/woods/extractors/validator_extractor.rb +211 -0
  113. data/lib/woods/extractors/view_component_extractor.rb +311 -0
  114. data/lib/woods/extractors/view_template_extractor.rb +261 -0
  115. data/lib/woods/feedback/gap_detector.rb +89 -0
  116. data/lib/woods/feedback/store.rb +119 -0
  117. data/lib/woods/filename_utils.rb +32 -0
  118. data/lib/woods/flow_analysis/operation_extractor.rb +206 -0
  119. data/lib/woods/flow_analysis/response_code_mapper.rb +154 -0
  120. data/lib/woods/flow_assembler.rb +290 -0
  121. data/lib/woods/flow_document.rb +191 -0
  122. data/lib/woods/flow_precomputer.rb +102 -0
  123. data/lib/woods/formatting/base.rb +30 -0
  124. data/lib/woods/formatting/claude_adapter.rb +98 -0
  125. data/lib/woods/formatting/generic_adapter.rb +56 -0
  126. data/lib/woods/formatting/gpt_adapter.rb +64 -0
  127. data/lib/woods/formatting/human_adapter.rb +78 -0
  128. data/lib/woods/graph_analyzer.rb +374 -0
  129. data/lib/woods/mcp/bootstrapper.rb +96 -0
  130. data/lib/woods/mcp/index_reader.rb +394 -0
  131. data/lib/woods/mcp/renderers/claude_renderer.rb +81 -0
  132. data/lib/woods/mcp/renderers/json_renderer.rb +17 -0
  133. data/lib/woods/mcp/renderers/markdown_renderer.rb +353 -0
  134. data/lib/woods/mcp/renderers/plain_renderer.rb +240 -0
  135. data/lib/woods/mcp/server.rb +962 -0
  136. data/lib/woods/mcp/tool_response_renderer.rb +85 -0
  137. data/lib/woods/model_name_cache.rb +51 -0
  138. data/lib/woods/notion/client.rb +217 -0
  139. data/lib/woods/notion/exporter.rb +219 -0
  140. data/lib/woods/notion/mapper.rb +40 -0
  141. data/lib/woods/notion/mappers/column_mapper.rb +57 -0
  142. data/lib/woods/notion/mappers/migration_mapper.rb +39 -0
  143. data/lib/woods/notion/mappers/model_mapper.rb +161 -0
  144. data/lib/woods/notion/mappers/shared.rb +22 -0
  145. data/lib/woods/notion/rate_limiter.rb +68 -0
  146. data/lib/woods/observability/health_check.rb +79 -0
  147. data/lib/woods/observability/instrumentation.rb +34 -0
  148. data/lib/woods/observability/structured_logger.rb +57 -0
  149. data/lib/woods/operator/error_escalator.rb +81 -0
  150. data/lib/woods/operator/pipeline_guard.rb +92 -0
  151. data/lib/woods/operator/status_reporter.rb +80 -0
  152. data/lib/woods/railtie.rb +38 -0
  153. data/lib/woods/resilience/circuit_breaker.rb +99 -0
  154. data/lib/woods/resilience/index_validator.rb +167 -0
  155. data/lib/woods/resilience/retryable_provider.rb +108 -0
  156. data/lib/woods/retrieval/context_assembler.rb +261 -0
  157. data/lib/woods/retrieval/query_classifier.rb +133 -0
  158. data/lib/woods/retrieval/ranker.rb +277 -0
  159. data/lib/woods/retrieval/search_executor.rb +316 -0
  160. data/lib/woods/retriever.rb +152 -0
  161. data/lib/woods/ruby_analyzer/class_analyzer.rb +170 -0
  162. data/lib/woods/ruby_analyzer/dataflow_analyzer.rb +77 -0
  163. data/lib/woods/ruby_analyzer/fqn_builder.rb +18 -0
  164. data/lib/woods/ruby_analyzer/mermaid_renderer.rb +280 -0
  165. data/lib/woods/ruby_analyzer/method_analyzer.rb +143 -0
  166. data/lib/woods/ruby_analyzer/trace_enricher.rb +143 -0
  167. data/lib/woods/ruby_analyzer.rb +87 -0
  168. data/lib/woods/session_tracer/file_store.rb +104 -0
  169. data/lib/woods/session_tracer/middleware.rb +143 -0
  170. data/lib/woods/session_tracer/redis_store.rb +106 -0
  171. data/lib/woods/session_tracer/session_flow_assembler.rb +254 -0
  172. data/lib/woods/session_tracer/session_flow_document.rb +223 -0
  173. data/lib/woods/session_tracer/solid_cache_store.rb +139 -0
  174. data/lib/woods/session_tracer/store.rb +81 -0
  175. data/lib/woods/storage/graph_store.rb +120 -0
  176. data/lib/woods/storage/metadata_store.rb +196 -0
  177. data/lib/woods/storage/pgvector.rb +195 -0
  178. data/lib/woods/storage/qdrant.rb +205 -0
  179. data/lib/woods/storage/vector_store.rb +167 -0
  180. data/lib/woods/temporal/json_snapshot_store.rb +245 -0
  181. data/lib/woods/temporal/snapshot_store.rb +345 -0
  182. data/lib/woods/token_utils.rb +19 -0
  183. data/lib/woods/version.rb +5 -0
  184. data/lib/woods.rb +246 -0
  185. metadata +270 -0
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Woods
4
+ module Notion
5
+ module Mappers
6
+ # Extracts latest migration dates per table from migration ExtractedUnits.
7
+ #
8
+ # Used to update Data Models pages with the most recent schema change date.
9
+ #
10
+ # @example
11
+ # mapper = MigrationMapper.new
12
+ # changes = mapper.latest_changes(migration_units)
13
+ # # => { "users" => "2026-02-20T10:00:00Z", "posts" => "2026-01-15T09:00:00Z" }
14
+ #
15
+ class MigrationMapper
16
+ # Compute the latest migration date for each affected table.
17
+ #
18
+ # @param migration_units [Array<Hash>] Parsed migration ExtractedUnit JSONs
19
+ # @return [Hash<String, String>] Table name to latest extracted_at timestamp
20
+ def latest_changes(migration_units)
21
+ migration_units.each_with_object({}) do |unit, changes|
22
+ extracted_at = unit['extracted_at']
23
+ next unless extracted_at
24
+
25
+ tables = (unit['metadata'] || {})['tables_affected'] || []
26
+ tables.each { |table| update_latest(changes, table, extracted_at) }
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ # @return [void]
33
+ def update_latest(changes, table, extracted_at)
34
+ changes[table] = extracted_at if changes[table].nil? || extracted_at > changes[table]
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,161 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'shared'
4
+
5
+ module Woods
6
+ module Notion
7
+ module Mappers
8
+ # Maps a model ExtractedUnit to Notion page properties for the Data Models database.
9
+ #
10
+ # Transforms model metadata (associations, validations, callbacks, scopes, git data)
11
+ # into Notion API property format for the Data Models database.
12
+ #
13
+ # @example
14
+ # mapper = ModelMapper.new
15
+ # properties = mapper.map(unit_data)
16
+ # client.create_page(database_id: db_id, properties: properties)
17
+ #
18
+ class ModelMapper
19
+ include Shared
20
+
21
+ # Map a model unit to Notion Data Models page properties.
22
+ #
23
+ # @param unit_data [Hash] Parsed model ExtractedUnit JSON
24
+ # @return [Hash] Notion page properties hash
25
+ def map(unit_data)
26
+ metadata = unit_data['metadata'] || {}
27
+ properties = build_text_properties(unit_data, metadata)
28
+ properties['Column Count'] = { number: column_count(metadata) }
29
+ add_git_properties(properties, metadata['git'] || {})
30
+ properties
31
+ end
32
+
33
+ private
34
+
35
+ # @return [Hash] Text-based Notion properties
36
+ def build_text_properties(unit_data, metadata)
37
+ {
38
+ 'Table Name' => title_property(table_name(unit_data, metadata)),
39
+ 'Model Name' => rich_text_property(unit_data['identifier']),
40
+ 'Description' => rich_text_property(extract_description(unit_data['source_code'])),
41
+ 'Associations' => rich_text_property(format_associations(metadata['associations'])),
42
+ 'Validations' => rich_text_property(format_validations(metadata['validations'])),
43
+ 'Callbacks' => rich_text_property(format_callbacks(metadata['callbacks'])),
44
+ 'Scopes' => rich_text_property(format_scopes(metadata['scopes'])),
45
+ 'File Path' => rich_text_property(unit_data['file_path'] || ''),
46
+ 'Dependencies' => rich_text_property(format_dependencies(unit_data['dependencies']))
47
+ }
48
+ end
49
+
50
+ # @return [void]
51
+ def add_git_properties(properties, git)
52
+ properties['Last Modified'] = { date: { start: git['last_modified'] } } if git['last_modified']
53
+ properties['Change Frequency'] = { select: { name: git['change_frequency'] } } if git['change_frequency']
54
+ end
55
+
56
+ # @return [String]
57
+ def table_name(unit_data, metadata)
58
+ return metadata['table_name'] if metadata['table_name']
59
+
60
+ identifier = unit_data['identifier'] || ''
61
+ "#{identifier.split('::').last.to_s.gsub(/([a-z])([A-Z])/, '\1_\2').downcase}s"
62
+ end
63
+
64
+ # @return [Integer]
65
+ def column_count(metadata)
66
+ metadata['column_count'] || (metadata['columns'] || []).size
67
+ end
68
+
69
+ # @return [String]
70
+ def extract_description(source_code)
71
+ return '' unless source_code
72
+
73
+ comment_lines = []
74
+ source_code.lines.each do |line|
75
+ stripped = line.strip
76
+ if stripped.start_with?('#')
77
+ comment_lines << stripped.sub(/^#\s?/, '')
78
+ elsif comment_lines.any?
79
+ break
80
+ end
81
+ end
82
+
83
+ comment_lines.any? ? comment_lines.join(' ').strip : ''
84
+ end
85
+
86
+ # @return [String]
87
+ def format_associations(associations)
88
+ format_list(associations) { |items| items.map { |a| format_single_association(a) }.join("\n") }
89
+ end
90
+
91
+ # @return [String]
92
+ def format_single_association(assoc)
93
+ parts = ["#{assoc['type']} :#{assoc['name']}"]
94
+ parts << "through: :#{assoc['through']}" if assoc['through']
95
+ parts << "class_name: '#{assoc['class_name']}'" if assoc['class_name']
96
+ parts << "foreign_key: :#{assoc['foreign_key']}" if assoc['foreign_key']
97
+ parts.join(', ')
98
+ end
99
+
100
+ # @return [String]
101
+ def format_validations(validations)
102
+ format_list(validations) do |items|
103
+ items.group_by { |v| v['attribute'] }.map do |attr, vals|
104
+ "#{attr}: #{vals.map { |v| v['type'] }.join(', ')}"
105
+ end.join("\n")
106
+ end
107
+ end
108
+
109
+ # @return [String]
110
+ def format_callbacks(callbacks)
111
+ format_list(callbacks) { |items| items.map { |callback| format_single_callback(callback) }.join("\n") }
112
+ end
113
+
114
+ # @return [String]
115
+ def format_single_callback(callback)
116
+ parts = ["#{callback['type']}: #{callback['filter']}"]
117
+ effects = callback_side_effects(callback['side_effects'])
118
+ parts << "(#{effects.join('; ')})" if effects.any?
119
+ parts.join(' ')
120
+ end
121
+
122
+ # @return [Array<String>]
123
+ def callback_side_effects(side_effects)
124
+ return [] unless side_effects
125
+
126
+ effects = []
127
+ jobs = side_effects['jobs_enqueued']
128
+ effects << "enqueues #{jobs.join(', ')}" if jobs&.any?
129
+ services = side_effects['services_called']
130
+ effects << "calls #{services.join(', ')}" if services&.any?
131
+ effects
132
+ end
133
+
134
+ # @return [String]
135
+ def format_scopes(scopes)
136
+ format_list(scopes) { |items| items.map { |s| s['name'] }.join(', ') }
137
+ end
138
+
139
+ # @return [String]
140
+ def format_dependencies(dependencies)
141
+ format_list(dependencies) { |items| items.map { |dep| "#{dep['target']} (via #{dep['via']})" }.join(', ') }
142
+ end
143
+
144
+ # @return [Hash]
145
+ def title_property(text)
146
+ { title: [{ text: { content: text } }] }
147
+ end
148
+
149
+ # Return 'None' for nil/empty lists; otherwise yield items to a formatting block.
150
+ #
151
+ # @param items [Array, nil]
152
+ # @return [String]
153
+ def format_list(items)
154
+ return 'None' if items.nil? || items.empty?
155
+
156
+ yield items
157
+ end
158
+ end
159
+ end
160
+ end
161
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Woods
4
+ module Notion
5
+ module Mappers
6
+ # Shared helpers for Notion mapper classes.
7
+ module Shared
8
+ MAX_RICH_TEXT_LENGTH = 2000
9
+
10
+ # Build a Notion rich_text property, truncating to API limits.
11
+ #
12
+ # @param text [String]
13
+ # @return [Hash]
14
+ def rich_text_property(text)
15
+ content = text.to_s
16
+ content = "#{content[0...1997]}..." if content.length > MAX_RICH_TEXT_LENGTH
17
+ { rich_text: [{ text: { content: content } }] }
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Woods
4
+ module Notion
5
+ # Thread-safe rate limiter for Notion API (3 requests/second default).
6
+ #
7
+ # Enforces a minimum interval between API calls by sleeping when necessary.
8
+ # Uses a Mutex to ensure thread safety when called from concurrent contexts.
9
+ #
10
+ # @example
11
+ # limiter = RateLimiter.new(requests_per_second: 3)
12
+ # limiter.throttle { client.create_page(...) }
13
+ # limiter.throttle { client.update_page(...) }
14
+ #
15
+ class RateLimiter
16
+ # @param requests_per_second [Numeric] Maximum requests per second (default: 3)
17
+ # @raise [ArgumentError] if requests_per_second is not positive
18
+ def initialize(requests_per_second: 3)
19
+ unless requests_per_second.is_a?(Numeric) && requests_per_second.positive?
20
+ raise ArgumentError, "requests_per_second must be positive, got #{requests_per_second.inspect}"
21
+ end
22
+
23
+ @min_interval = 1.0 / requests_per_second
24
+ @last_request_at = nil
25
+ @mutex = Mutex.new
26
+ end
27
+
28
+ # Execute a block after enforcing the rate limit.
29
+ #
30
+ # Sleeps if the minimum interval since the last request hasn't elapsed.
31
+ # Thread-safe — only one request proceeds at a time.
32
+ #
33
+ # @yield The block to execute after rate limiting
34
+ # @return [Object] The block's return value
35
+ # @raise [ArgumentError] if no block is given
36
+ def throttle
37
+ raise ArgumentError, 'block required' unless block_given?
38
+
39
+ @mutex.synchronize do
40
+ wait_for_interval
41
+ @last_request_at = monotonic_now
42
+ end
43
+
44
+ yield
45
+ end
46
+
47
+ private
48
+
49
+ # Sleep if minimum interval hasn't elapsed since last request.
50
+ #
51
+ # @return [void]
52
+ def wait_for_interval
53
+ return unless @last_request_at
54
+
55
+ elapsed = monotonic_now - @last_request_at
56
+ remaining = @min_interval - elapsed
57
+ sleep(remaining) if remaining.positive?
58
+ end
59
+
60
+ # Monotonic clock for accurate interval measurement.
61
+ #
62
+ # @return [Float] Current monotonic time in seconds
63
+ def monotonic_now
64
+ Process.clock_gettime(Process::CLOCK_MONOTONIC)
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Woods
4
+ module Observability
5
+ # Probes configured components and reports overall system health.
6
+ #
7
+ # Checks vector store, metadata store, and embedding provider by calling
8
+ # lightweight operations on each. Components that are nil are reported
9
+ # as :not_configured and do not affect the overall healthy? status.
10
+ #
11
+ # @example
12
+ # check = HealthCheck.new(
13
+ # vector_store: vector_store,
14
+ # metadata_store: metadata_store,
15
+ # embedding_provider: provider
16
+ # )
17
+ # status = check.run
18
+ # status.healthy? # => true
19
+ # status.components # => { vector_store: :ok, metadata_store: :ok, embedding_provider: :ok }
20
+ #
21
+ class HealthCheck
22
+ # Value object representing the result of a health check.
23
+ HealthStatus = Struct.new(:healthy?, :components, keyword_init: true)
24
+
25
+ # @param vector_store [Object, nil] Vector store adapter (must respond to #count)
26
+ # @param metadata_store [Object, nil] Metadata store adapter (must respond to #count)
27
+ # @param embedding_provider [Object, nil] Embedding provider (must respond to #embed)
28
+ def initialize(vector_store: nil, metadata_store: nil, embedding_provider: nil)
29
+ @vector_store = vector_store
30
+ @metadata_store = metadata_store
31
+ @embedding_provider = embedding_provider
32
+ end
33
+
34
+ # Run health probes on all configured components.
35
+ #
36
+ # @return [HealthStatus] Result with healthy? flag and per-component status
37
+ def run
38
+ components = {
39
+ vector_store: probe_store(@vector_store),
40
+ metadata_store: probe_store(@metadata_store),
41
+ embedding_provider: probe_provider(@embedding_provider)
42
+ }
43
+
44
+ all_healthy = components.values.all? { |status| %i[ok not_configured].include?(status) }
45
+
46
+ HealthStatus.new(healthy?: all_healthy, components: components)
47
+ end
48
+
49
+ private
50
+
51
+ # Probe a store component by calling #count.
52
+ #
53
+ # @param store [Object, nil] Store adapter
54
+ # @return [Symbol] :ok, :error, or :not_configured
55
+ def probe_store(store)
56
+ return :not_configured if store.nil?
57
+
58
+ store.count
59
+ :ok
60
+ rescue StandardError
61
+ :error
62
+ end
63
+
64
+ # Probe an embedding provider by checking its capabilities without making network calls.
65
+ #
66
+ # @param provider [Object, nil] Embedding provider
67
+ # @return [Symbol] :ok, :error, or :not_configured
68
+ def probe_provider(provider)
69
+ return :not_configured if provider.nil?
70
+
71
+ if provider.respond_to?(:embed) && provider.respond_to?(:dimensions)
72
+ :ok
73
+ else
74
+ :error
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Woods
4
+ module Observability
5
+ # Lightweight instrumentation wrapper that delegates to ActiveSupport::Notifications
6
+ # when available, and falls back to a simple yield otherwise.
7
+ #
8
+ # @example
9
+ # Instrumentation.instrument('woods.extraction', unit: 'User') do
10
+ # extract_unit(user_model)
11
+ # end
12
+ #
13
+ module Instrumentation
14
+ module_function
15
+
16
+ # Instrument a block of code with an event name and payload.
17
+ #
18
+ # Delegates to ActiveSupport::Notifications.instrument when available.
19
+ # Otherwise, yields the block directly.
20
+ #
21
+ # @param event [String] Event name (e.g., 'woods.extraction')
22
+ # @param payload [Hash] Additional data to include with the event
23
+ # @yield [payload] The block to instrument
24
+ # @return [Object] The return value of the block
25
+ def instrument(event, payload = {}, &block)
26
+ if defined?(ActiveSupport::Notifications)
27
+ ActiveSupport::Notifications.instrument(event, payload, &block)
28
+ elsif block
29
+ yield payload
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'time'
5
+
6
+ module Woods
7
+ module Observability
8
+ # Structured JSON logger that writes one JSON object per line.
9
+ #
10
+ # Each log entry includes a timestamp, level, event name, and any
11
+ # additional data passed as keyword arguments.
12
+ #
13
+ # @example
14
+ # logger = StructuredLogger.new(output: $stderr)
15
+ # logger.info('extraction.complete', units: 42, duration_ms: 1200)
16
+ # # => {"timestamp":"2026-02-15T12:00:00Z","level":"info",
17
+ # # "event":"extraction.complete","units":42,"duration_ms":1200}
18
+ #
19
+ class StructuredLogger
20
+ # @param output [IO] Output stream (default: $stderr)
21
+ def initialize(output: $stderr)
22
+ @output = output
23
+ end
24
+
25
+ # @!method info(event, **data)
26
+ # Log at info level.
27
+ # @param event [String] Event name
28
+ # @param data [Hash] Additional structured data
29
+ # @!method warn(event, **data)
30
+ # Log at warn level.
31
+ # @!method error(event, **data)
32
+ # Log at error level.
33
+ # @!method debug(event, **data)
34
+ # Log at debug level.
35
+ %w[info warn error debug].each do |level|
36
+ define_method(level) { |event, **data| write_entry(level, event, data) }
37
+ end
38
+
39
+ private
40
+
41
+ # Write a single JSON log line.
42
+ #
43
+ # @param level [String] Log level
44
+ # @param event [String] Event name
45
+ # @param data [Hash] Additional data
46
+ def write_entry(level, event, data)
47
+ entry = {
48
+ timestamp: Time.now.utc.iso8601,
49
+ level: level,
50
+ event: event
51
+ }.merge(data)
52
+
53
+ @output.puts(JSON.generate(entry))
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Woods
4
+ module Operator
5
+ # Classifies pipeline errors by severity and suggests remediation.
6
+ #
7
+ # @example
8
+ # escalator = ErrorEscalator.new
9
+ # result = escalator.classify(Timeout::Error.new("connection timed out"))
10
+ # result[:severity] # => :transient
11
+ # result[:remediation] # => "Retry after a short delay"
12
+ #
13
+ class ErrorEscalator
14
+ TRANSIENT_PATTERNS = [
15
+ { class_pattern: /Timeout|ETIMEDOUT/, category: 'timeout', remediation: 'Retry after a short delay' },
16
+ { class_pattern: /Net::/, category: 'network', remediation: 'Check network connectivity and retry' },
17
+ { class_pattern: /RateLimited|429/, category: 'rate_limit',
18
+ remediation: 'Back off and retry with exponential delay' },
19
+ { class_pattern: /CircuitOpenError/, category: 'circuit_open',
20
+ remediation: 'Wait for circuit breaker reset timeout' },
21
+ { class_pattern: /ConnectionPool|Busy/, category: 'resource_contention',
22
+ remediation: 'Wait for resources to free up' }
23
+ ].freeze
24
+
25
+ PERMANENT_PATTERNS = [
26
+ { class_pattern: /NameError|NoMethodError/, category: 'code_error',
27
+ remediation: 'Fix the code error and re-extract' },
28
+ { class_pattern: /Errno::ENOENT|FileNotFoundError/, category: 'missing_file',
29
+ remediation: 'Verify file paths and re-run extraction' },
30
+ { class_pattern: /JSON::ParserError/, category: 'corrupt_data',
31
+ remediation: 'Clean index and re-extract' },
32
+ { class_pattern: /ConfigurationError/, category: 'configuration',
33
+ remediation: 'Review Woods configuration' },
34
+ { class_pattern: /ExtractionError/, category: 'extraction_failure',
35
+ remediation: 'Check extraction logs for specific failure details' }
36
+ ].freeze
37
+
38
+ # Classify an error by severity and suggest remediation.
39
+ #
40
+ # @param error [StandardError] The error to classify
41
+ # @return [Hash] :severity (:transient or :permanent), :category, :remediation, :error_class, :message
42
+ def classify(error)
43
+ error_string = "#{error.class} #{error.message}"
44
+
45
+ match = find_match(error_string, TRANSIENT_PATTERNS, :transient) ||
46
+ find_match(error_string, PERMANENT_PATTERNS, :permanent)
47
+
48
+ if match
49
+ match.merge(error_class: error.class.name, message: error.message)
50
+ else
51
+ {
52
+ severity: :unknown,
53
+ category: 'unclassified',
54
+ remediation: 'Investigate error details and check logs',
55
+ error_class: error.class.name,
56
+ message: error.message
57
+ }
58
+ end
59
+ end
60
+
61
+ private
62
+
63
+ # @param error_string [String]
64
+ # @param patterns [Array<Hash>]
65
+ # @param severity [Symbol]
66
+ # @return [Hash, nil]
67
+ def find_match(error_string, patterns, severity)
68
+ patterns.each do |pattern|
69
+ next unless error_string.match?(pattern[:class_pattern])
70
+
71
+ return {
72
+ severity: severity,
73
+ category: pattern[:category],
74
+ remediation: pattern[:remediation]
75
+ }
76
+ end
77
+ nil
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'fileutils'
5
+ require 'time'
6
+
7
+ module Woods
8
+ module Operator
9
+ # Rate limiter for pipeline operations using file-based state.
10
+ #
11
+ # Enforces a cooldown between consecutive runs of the same operation
12
+ # to prevent accidental repeated extraction or embedding.
13
+ #
14
+ # @example
15
+ # guard = PipelineGuard.new(state_dir: '/tmp', cooldown: 300)
16
+ # if guard.allow?(:extraction)
17
+ # run_extraction
18
+ # guard.record!(:extraction)
19
+ # end
20
+ #
21
+ class PipelineGuard
22
+ # @param state_dir [String] Directory for persisting state
23
+ # @param cooldown [Integer] Minimum seconds between runs
24
+ def initialize(state_dir:, cooldown: 300)
25
+ @state_dir = state_dir
26
+ @cooldown = cooldown
27
+ @state_path = File.join(state_dir, 'pipeline_guard.json')
28
+ end
29
+
30
+ # Check if an operation is allowed (cooldown elapsed).
31
+ #
32
+ # @param operation [Symbol, String] Operation name
33
+ # @return [Boolean]
34
+ def allow?(operation)
35
+ last = last_run(operation)
36
+ return true if last.nil?
37
+
38
+ (Time.now - last) >= @cooldown
39
+ end
40
+
41
+ # Record that an operation has just run.
42
+ #
43
+ # @param operation [Symbol, String] Operation name
44
+ # @return [void]
45
+ def record!(operation)
46
+ FileUtils.mkdir_p(@state_dir)
47
+ File.open(@state_path, File::RDWR | File::CREAT) do |f|
48
+ f.flock(File::LOCK_EX)
49
+ content = f.read
50
+ state = if content.empty?
51
+ {}
52
+ else
53
+ begin
54
+ JSON.parse(content)
55
+ rescue StandardError
56
+ {}
57
+ end
58
+ end
59
+ state[operation.to_s] = Time.now.iso8601
60
+ f.rewind
61
+ f.write(JSON.generate(state))
62
+ f.truncate(f.pos)
63
+ end
64
+ end
65
+
66
+ # Get the last run time for an operation.
67
+ #
68
+ # @param operation [Symbol, String] Operation name
69
+ # @return [Time, nil]
70
+ def last_run(operation)
71
+ state = read_state
72
+ timestamp = state[operation.to_s]
73
+ return nil if timestamp.nil?
74
+
75
+ Time.parse(timestamp)
76
+ rescue ArgumentError
77
+ nil
78
+ end
79
+
80
+ private
81
+
82
+ # @return [Hash]
83
+ def read_state
84
+ return {} unless File.exist?(@state_path)
85
+
86
+ JSON.parse(File.read(@state_path))
87
+ rescue JSON::ParserError
88
+ {}
89
+ end
90
+ end
91
+ end
92
+ end