codebase_index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/exe/codebase-console-mcp +22 -0
  8. data/exe/codebase-index-mcp +61 -0
  9. data/exe/codebase-index-mcp-http +64 -0
  10. data/exe/codebase-index-mcp-start +58 -0
  11. data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
  12. data/lib/codebase_index/ast/method_extractor.rb +76 -0
  13. data/lib/codebase_index/ast/node.rb +88 -0
  14. data/lib/codebase_index/ast/parser.rb +653 -0
  15. data/lib/codebase_index/ast.rb +6 -0
  16. data/lib/codebase_index/builder.rb +137 -0
  17. data/lib/codebase_index/chunking/chunk.rb +84 -0
  18. data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
  19. data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
  20. data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
  21. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
  22. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
  23. data/lib/codebase_index/console/audit_logger.rb +75 -0
  24. data/lib/codebase_index/console/bridge.rb +170 -0
  25. data/lib/codebase_index/console/confirmation.rb +90 -0
  26. data/lib/codebase_index/console/connection_manager.rb +173 -0
  27. data/lib/codebase_index/console/console_response_renderer.rb +78 -0
  28. data/lib/codebase_index/console/model_validator.rb +81 -0
  29. data/lib/codebase_index/console/safe_context.rb +82 -0
  30. data/lib/codebase_index/console/server.rb +557 -0
  31. data/lib/codebase_index/console/sql_validator.rb +172 -0
  32. data/lib/codebase_index/console/tools/tier1.rb +118 -0
  33. data/lib/codebase_index/console/tools/tier2.rb +117 -0
  34. data/lib/codebase_index/console/tools/tier3.rb +110 -0
  35. data/lib/codebase_index/console/tools/tier4.rb +79 -0
  36. data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
  37. data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
  38. data/lib/codebase_index/cost_model/estimator.rb +128 -0
  39. data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
  40. data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
  41. data/lib/codebase_index/cost_model.rb +22 -0
  42. data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
  43. data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
  44. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
  45. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
  46. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
  47. data/lib/codebase_index/db/migrator.rb +71 -0
  48. data/lib/codebase_index/db/schema_version.rb +73 -0
  49. data/lib/codebase_index/dependency_graph.rb +227 -0
  50. data/lib/codebase_index/embedding/indexer.rb +130 -0
  51. data/lib/codebase_index/embedding/openai.rb +105 -0
  52. data/lib/codebase_index/embedding/provider.rb +135 -0
  53. data/lib/codebase_index/embedding/text_preparer.rb +112 -0
  54. data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
  55. data/lib/codebase_index/evaluation/evaluator.rb +146 -0
  56. data/lib/codebase_index/evaluation/metrics.rb +79 -0
  57. data/lib/codebase_index/evaluation/query_set.rb +148 -0
  58. data/lib/codebase_index/evaluation/report_generator.rb +90 -0
  59. data/lib/codebase_index/extracted_unit.rb +145 -0
  60. data/lib/codebase_index/extractor.rb +956 -0
  61. data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
  62. data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
  63. data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
  64. data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
  65. data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
  66. data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
  67. data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
  68. data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
  69. data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
  70. data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
  71. data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
  72. data/lib/codebase_index/extractors/event_extractor.rb +211 -0
  73. data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
  74. data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
  75. data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
  76. data/lib/codebase_index/extractors/job_extractor.rb +369 -0
  77. data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
  78. data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
  79. data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
  80. data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
  81. data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
  82. data/lib/codebase_index/extractors/model_extractor.rb +960 -0
  83. data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
  84. data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
  85. data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
  86. data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
  87. data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
  88. data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
  89. data/lib/codebase_index/extractors/route_extractor.rb +181 -0
  90. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
  91. data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
  92. data/lib/codebase_index/extractors/service_extractor.rb +254 -0
  93. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
  94. data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
  95. data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
  96. data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
  97. data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
  98. data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
  99. data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
  100. data/lib/codebase_index/feedback/gap_detector.rb +89 -0
  101. data/lib/codebase_index/feedback/store.rb +119 -0
  102. data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
  103. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
  104. data/lib/codebase_index/flow_assembler.rb +290 -0
  105. data/lib/codebase_index/flow_document.rb +191 -0
  106. data/lib/codebase_index/flow_precomputer.rb +102 -0
  107. data/lib/codebase_index/formatting/base.rb +40 -0
  108. data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
  109. data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
  110. data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
  111. data/lib/codebase_index/formatting/human_adapter.rb +78 -0
  112. data/lib/codebase_index/graph_analyzer.rb +374 -0
  113. data/lib/codebase_index/mcp/index_reader.rb +394 -0
  114. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
  115. data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
  116. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
  117. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
  118. data/lib/codebase_index/mcp/server.rb +935 -0
  119. data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
  120. data/lib/codebase_index/model_name_cache.rb +51 -0
  121. data/lib/codebase_index/notion/client.rb +217 -0
  122. data/lib/codebase_index/notion/exporter.rb +219 -0
  123. data/lib/codebase_index/notion/mapper.rb +39 -0
  124. data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
  125. data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
  126. data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
  127. data/lib/codebase_index/notion/rate_limiter.rb +68 -0
  128. data/lib/codebase_index/observability/health_check.rb +81 -0
  129. data/lib/codebase_index/observability/instrumentation.rb +34 -0
  130. data/lib/codebase_index/observability/structured_logger.rb +75 -0
  131. data/lib/codebase_index/operator/error_escalator.rb +81 -0
  132. data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
  133. data/lib/codebase_index/operator/status_reporter.rb +80 -0
  134. data/lib/codebase_index/railtie.rb +26 -0
  135. data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
  136. data/lib/codebase_index/resilience/index_validator.rb +185 -0
  137. data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
  138. data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
  139. data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
  140. data/lib/codebase_index/retrieval/ranker.rb +273 -0
  141. data/lib/codebase_index/retrieval/search_executor.rb +327 -0
  142. data/lib/codebase_index/retriever.rb +160 -0
  143. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
  144. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
  145. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
  146. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
  147. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
  148. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
  149. data/lib/codebase_index/ruby_analyzer.rb +87 -0
  150. data/lib/codebase_index/session_tracer/file_store.rb +111 -0
  151. data/lib/codebase_index/session_tracer/middleware.rb +143 -0
  152. data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
  153. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
  154. data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
  155. data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
  156. data/lib/codebase_index/session_tracer/store.rb +67 -0
  157. data/lib/codebase_index/storage/graph_store.rb +120 -0
  158. data/lib/codebase_index/storage/metadata_store.rb +169 -0
  159. data/lib/codebase_index/storage/pgvector.rb +163 -0
  160. data/lib/codebase_index/storage/qdrant.rb +172 -0
  161. data/lib/codebase_index/storage/vector_store.rb +156 -0
  162. data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
  163. data/lib/codebase_index/version.rb +5 -0
  164. data/lib/codebase_index.rb +223 -0
  165. data/lib/generators/codebase_index/install_generator.rb +32 -0
  166. data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
  167. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
  168. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
  169. data/lib/tasks/codebase_index.rake +583 -0
  170. data/lib/tasks/codebase_index_evaluation.rake +115 -0
  171. metadata +252 -0
@@ -0,0 +1,164 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ module Notion
5
+ module Mappers
6
+ # Maps a model ExtractedUnit to Notion page properties for the Data Models database.
7
+ #
8
+ # Transforms model metadata (associations, validations, callbacks, scopes, git data)
9
+ # into Notion API property format for the Data Models database.
10
+ #
11
+ # @example
12
+ # mapper = ModelMapper.new
13
+ # properties = mapper.map(unit_data)
14
+ # client.create_page(database_id: db_id, properties: properties)
15
+ #
16
+ class ModelMapper
17
+ MAX_RICH_TEXT_LENGTH = 2000
18
+
19
+ # Map a model unit to Notion Data Models page properties.
20
+ #
21
+ # @param unit_data [Hash] Parsed model ExtractedUnit JSON
22
+ # @return [Hash] Notion page properties hash
23
+ def map(unit_data)
24
+ metadata = unit_data['metadata'] || {}
25
+ properties = build_text_properties(unit_data, metadata)
26
+ properties['Column Count'] = { number: column_count(metadata) }
27
+ add_git_properties(properties, metadata['git'] || {})
28
+ properties
29
+ end
30
+
31
+ private
32
+
33
+ # @return [Hash] Text-based Notion properties
34
+ def build_text_properties(unit_data, metadata)
35
+ {
36
+ 'Table Name' => title_property(table_name(unit_data, metadata)),
37
+ 'Model Name' => rich_text_property(unit_data['identifier']),
38
+ 'Description' => rich_text_property(extract_description(unit_data['source_code'])),
39
+ 'Associations' => rich_text_property(format_associations(metadata['associations'])),
40
+ 'Validations' => rich_text_property(format_validations(metadata['validations'])),
41
+ 'Callbacks' => rich_text_property(format_callbacks(metadata['callbacks'])),
42
+ 'Scopes' => rich_text_property(format_scopes(metadata['scopes'])),
43
+ 'File Path' => rich_text_property(unit_data['file_path'] || ''),
44
+ 'Dependencies' => rich_text_property(format_dependencies(unit_data['dependencies']))
45
+ }
46
+ end
47
+
48
+ # @return [void]
49
+ def add_git_properties(properties, git)
50
+ properties['Last Modified'] = { date: { start: git['last_modified'] } } if git['last_modified']
51
+ properties['Change Frequency'] = { select: { name: git['change_frequency'] } } if git['change_frequency']
52
+ end
53
+
54
+ # @return [String]
55
+ def table_name(unit_data, metadata)
56
+ return metadata['table_name'] if metadata['table_name']
57
+
58
+ identifier = unit_data['identifier'] || ''
59
+ "#{identifier.split('::').last.to_s.gsub(/([a-z])([A-Z])/, '\1_\2').downcase}s"
60
+ end
61
+
62
+ # @return [Integer]
63
+ def column_count(metadata)
64
+ metadata['column_count'] || (metadata['columns'] || []).size
65
+ end
66
+
67
+ # @return [String]
68
+ def extract_description(source_code)
69
+ return '' unless source_code
70
+
71
+ comment_lines = []
72
+ source_code.lines.each do |line|
73
+ stripped = line.strip
74
+ if stripped.start_with?('#')
75
+ comment_lines << stripped.sub(/^#\s?/, '')
76
+ elsif comment_lines.any?
77
+ break
78
+ end
79
+ end
80
+
81
+ comment_lines.any? ? comment_lines.join(' ').strip : ''
82
+ end
83
+
84
+ # @return [String]
85
+ def format_associations(associations)
86
+ return 'None' if associations.nil? || associations.empty?
87
+
88
+ associations.map { |a| format_single_association(a) }.join("\n")
89
+ end
90
+
91
+ # @return [String]
92
+ def format_single_association(assoc)
93
+ parts = ["#{assoc['type']} :#{assoc['name']}"]
94
+ parts << "through: :#{assoc['through']}" if assoc['through']
95
+ parts << "class_name: '#{assoc['class_name']}'" if assoc['class_name']
96
+ parts << "foreign_key: :#{assoc['foreign_key']}" if assoc['foreign_key']
97
+ parts.join(', ')
98
+ end
99
+
100
+ # @return [String]
101
+ def format_validations(validations)
102
+ return 'None' if validations.nil? || validations.empty?
103
+
104
+ validations.group_by { |v| v['attribute'] }.map do |attr, vals|
105
+ "#{attr}: #{vals.map { |v| v['type'] }.join(', ')}"
106
+ end.join("\n")
107
+ end
108
+
109
+ # @return [String]
110
+ def format_callbacks(callbacks)
111
+ return 'None' if callbacks.nil? || callbacks.empty?
112
+
113
+ callbacks.map { |callback| format_single_callback(callback) }.join("\n")
114
+ end
115
+
116
+ # @return [String]
117
+ def format_single_callback(callback)
118
+ parts = ["#{callback['type']}: #{callback['filter']}"]
119
+ effects = callback_side_effects(callback['side_effects'])
120
+ parts << "(#{effects.join('; ')})" if effects.any?
121
+ parts.join(' ')
122
+ end
123
+
124
+ # @return [Array<String>]
125
+ def callback_side_effects(side_effects)
126
+ return [] unless side_effects
127
+
128
+ effects = []
129
+ jobs = side_effects['jobs_enqueued']
130
+ effects << "enqueues #{jobs.join(', ')}" if jobs&.any?
131
+ services = side_effects['services_called']
132
+ effects << "calls #{services.join(', ')}" if services&.any?
133
+ effects
134
+ end
135
+
136
+ # @return [String]
137
+ def format_scopes(scopes)
138
+ return 'None' if scopes.nil? || scopes.empty?
139
+
140
+ scopes.map { |s| s['name'] }.join(', ')
141
+ end
142
+
143
+ # @return [String]
144
+ def format_dependencies(dependencies)
145
+ return 'None' if dependencies.nil? || dependencies.empty?
146
+
147
+ dependencies.map { |dep| "#{dep['target']} (via #{dep['via']})" }.join(', ')
148
+ end
149
+
150
+ # @return [Hash]
151
+ def title_property(text)
152
+ { title: [{ text: { content: text } }] }
153
+ end
154
+
155
+ # @return [Hash]
156
+ def rich_text_property(text)
157
+ content = text.to_s
158
+ content = "#{content[0...1997]}..." if content.length > MAX_RICH_TEXT_LENGTH
159
+ { rich_text: [{ text: { content: content } }] }
160
+ end
161
+ end
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ module Notion
5
+ # Thread-safe rate limiter for Notion API (3 requests/second default).
6
+ #
7
+ # Enforces a minimum interval between API calls by sleeping when necessary.
8
+ # Uses a Mutex to ensure thread safety when called from concurrent contexts.
9
+ #
10
+ # @example
11
+ # limiter = RateLimiter.new(requests_per_second: 3)
12
+ # limiter.throttle { client.create_page(...) }
13
+ # limiter.throttle { client.update_page(...) }
14
+ #
15
+ class RateLimiter
16
+ # @param requests_per_second [Numeric] Maximum requests per second (default: 3)
17
+ # @raise [ArgumentError] if requests_per_second is not positive
18
+ def initialize(requests_per_second: 3)
19
+ unless requests_per_second.is_a?(Numeric) && requests_per_second.positive?
20
+ raise ArgumentError, "requests_per_second must be positive, got #{requests_per_second.inspect}"
21
+ end
22
+
23
+ @min_interval = 1.0 / requests_per_second
24
+ @last_request_at = nil
25
+ @mutex = Mutex.new
26
+ end
27
+
28
+ # Execute a block after enforcing the rate limit.
29
+ #
30
+ # Sleeps if the minimum interval since the last request hasn't elapsed.
31
+ # Thread-safe — only one request proceeds at a time.
32
+ #
33
+ # @yield The block to execute after rate limiting
34
+ # @return [Object] The block's return value
35
+ # @raise [ArgumentError] if no block is given
36
+ def throttle
37
+ raise ArgumentError, 'block required' unless block_given?
38
+
39
+ @mutex.synchronize do
40
+ wait_for_interval
41
+ @last_request_at = monotonic_now
42
+ end
43
+
44
+ yield
45
+ end
46
+
47
+ private
48
+
49
+ # Sleep if minimum interval hasn't elapsed since last request.
50
+ #
51
+ # @return [void]
52
+ def wait_for_interval
53
+ return unless @last_request_at
54
+
55
+ elapsed = monotonic_now - @last_request_at
56
+ remaining = @min_interval - elapsed
57
+ sleep(remaining) if remaining.positive?
58
+ end
59
+
60
+ # Monotonic clock for accurate interval measurement.
61
+ #
62
+ # @return [Float] Current monotonic time in seconds
63
+ def monotonic_now
64
+ Process.clock_gettime(Process::CLOCK_MONOTONIC)
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ module Observability
5
+ # Probes configured components and reports overall system health.
6
+ #
7
+ # Checks vector store, metadata store, and embedding provider by calling
8
+ # lightweight operations on each. Components that are nil are reported
9
+ # as :not_configured and do not affect the overall healthy? status.
10
+ #
11
+ # @example
12
+ # check = HealthCheck.new(
13
+ # vector_store: vector_store,
14
+ # metadata_store: metadata_store,
15
+ # embedding_provider: provider
16
+ # )
17
+ # status = check.run
18
+ # status.healthy? # => true
19
+ # status.components # => { vector_store: :ok, metadata_store: :ok, embedding_provider: :ok }
20
+ #
21
+ class HealthCheck
22
+ # Value object representing the result of a health check.
23
+ HealthStatus = Struct.new(:healthy?, :components, keyword_init: true)
24
+
25
+ # @param vector_store [Object, nil] Vector store adapter (must respond to #count)
26
+ # @param metadata_store [Object, nil] Metadata store adapter (must respond to #count)
27
+ # @param embedding_provider [Object, nil] Embedding provider (must respond to #embed)
28
+ def initialize(vector_store: nil, metadata_store: nil, embedding_provider: nil)
29
+ @vector_store = vector_store
30
+ @metadata_store = metadata_store
31
+ @embedding_provider = embedding_provider
32
+ end
33
+
34
+ # Run health probes on all configured components.
35
+ #
36
+ # @return [HealthStatus] Result with healthy? flag and per-component status
37
+ def run
38
+ components = {
39
+ vector_store: probe_store(@vector_store),
40
+ metadata_store: probe_store(@metadata_store),
41
+ embedding_provider: probe_provider(@embedding_provider)
42
+ }
43
+
44
+ all_healthy = components.values.all? { |status| %i[ok not_configured].include?(status) }
45
+
46
+ HealthStatus.new(healthy?: all_healthy, components: components)
47
+ end
48
+
49
+ private
50
+
51
+ # Probe a store component by calling #count.
52
+ #
53
+ # @param store [Object, nil] Store adapter
54
+ # @return [Symbol] :ok, :error, or :not_configured
55
+ def probe_store(store)
56
+ return :not_configured if store.nil?
57
+
58
+ store.count
59
+ :ok
60
+ rescue StandardError
61
+ :error
62
+ end
63
+
64
+ # Probe an embedding provider by checking its capabilities without making network calls.
65
+ #
66
+ # @param provider [Object, nil] Embedding provider
67
+ # @return [Symbol] :ok, :error, or :not_configured
68
+ def probe_provider(provider)
69
+ return :not_configured if provider.nil?
70
+
71
+ if provider.respond_to?(:embed) && provider.respond_to?(:dimensions)
72
+ :ok
73
+ else
74
+ :error
75
+ end
76
+ rescue StandardError
77
+ :error
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ module Observability
5
+ # Lightweight instrumentation wrapper that delegates to ActiveSupport::Notifications
6
+ # when available, and falls back to a simple yield otherwise.
7
+ #
8
+ # @example
9
+ # Instrumentation.instrument('codebase_index.extraction', unit: 'User') do
10
+ # extract_unit(user_model)
11
+ # end
12
+ #
13
+ module Instrumentation
14
+ module_function
15
+
16
+ # Instrument a block of code with an event name and payload.
17
+ #
18
+ # Delegates to ActiveSupport::Notifications.instrument when available.
19
+ # Otherwise, yields the block directly.
20
+ #
21
+ # @param event [String] Event name (e.g., 'codebase_index.extraction')
22
+ # @param payload [Hash] Additional data to include with the event
23
+ # @yield [payload] The block to instrument
24
+ # @return [Object] The return value of the block
25
+ def instrument(event, payload = {}, &block)
26
+ if defined?(ActiveSupport::Notifications)
27
+ ActiveSupport::Notifications.instrument(event, payload, &block)
28
+ elsif block
29
+ yield payload
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'time'
5
+
6
+ module CodebaseIndex
7
+ module Observability
8
+ # Structured JSON logger that writes one JSON object per line.
9
+ #
10
+ # Each log entry includes a timestamp, level, event name, and any
11
+ # additional data passed as keyword arguments.
12
+ #
13
+ # @example
14
+ # logger = StructuredLogger.new(output: $stderr)
15
+ # logger.info('extraction.complete', units: 42, duration_ms: 1200)
16
+ # # => {"timestamp":"2026-02-15T12:00:00Z","level":"info",
17
+ # # "event":"extraction.complete","units":42,"duration_ms":1200}
18
+ #
19
+ class StructuredLogger
20
+ # @param output [IO] Output stream (default: $stderr)
21
+ def initialize(output: $stderr)
22
+ @output = output
23
+ end
24
+
25
+ # Log at info level.
26
+ #
27
+ # @param event [String] Event name
28
+ # @param data [Hash] Additional structured data
29
+ def info(event, **data)
30
+ write_entry('info', event, data)
31
+ end
32
+
33
+ # Log at warn level.
34
+ #
35
+ # @param event [String] Event name
36
+ # @param data [Hash] Additional structured data
37
+ def warn(event, **data)
38
+ write_entry('warn', event, data)
39
+ end
40
+
41
+ # Log at error level.
42
+ #
43
+ # @param event [String] Event name
44
+ # @param data [Hash] Additional structured data
45
+ def error(event, **data)
46
+ write_entry('error', event, data)
47
+ end
48
+
49
+ # Log at debug level.
50
+ #
51
+ # @param event [String] Event name
52
+ # @param data [Hash] Additional structured data
53
+ def debug(event, **data)
54
+ write_entry('debug', event, data)
55
+ end
56
+
57
+ private
58
+
59
+ # Write a single JSON log line.
60
+ #
61
+ # @param level [String] Log level
62
+ # @param event [String] Event name
63
+ # @param data [Hash] Additional data
64
+ def write_entry(level, event, data)
65
+ entry = {
66
+ timestamp: Time.now.utc.iso8601,
67
+ level: level,
68
+ event: event
69
+ }.merge(data)
70
+
71
+ @output.puts(JSON.generate(entry))
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodebaseIndex
4
+ module Operator
5
+ # Classifies pipeline errors by severity and suggests remediation.
6
+ #
7
+ # @example
8
+ # escalator = ErrorEscalator.new
9
+ # result = escalator.classify(Timeout::Error.new("connection timed out"))
10
+ # result[:severity] # => :transient
11
+ # result[:remediation] # => "Retry after a short delay"
12
+ #
13
+ class ErrorEscalator
14
+ TRANSIENT_PATTERNS = [
15
+ { class_pattern: /Timeout|ETIMEDOUT/, category: 'timeout', remediation: 'Retry after a short delay' },
16
+ { class_pattern: /Net::/, category: 'network', remediation: 'Check network connectivity and retry' },
17
+ { class_pattern: /RateLimited|429/, category: 'rate_limit',
18
+ remediation: 'Back off and retry with exponential delay' },
19
+ { class_pattern: /CircuitOpenError/, category: 'circuit_open',
20
+ remediation: 'Wait for circuit breaker reset timeout' },
21
+ { class_pattern: /ConnectionPool|Busy/, category: 'resource_contention',
22
+ remediation: 'Wait for resources to free up' }
23
+ ].freeze
24
+
25
+ PERMANENT_PATTERNS = [
26
+ { class_pattern: /NameError|NoMethodError/, category: 'code_error',
27
+ remediation: 'Fix the code error and re-extract' },
28
+ { class_pattern: /Errno::ENOENT|FileNotFoundError/, category: 'missing_file',
29
+ remediation: 'Verify file paths and re-run extraction' },
30
+ { class_pattern: /JSON::ParserError/, category: 'corrupt_data',
31
+ remediation: 'Clean index and re-extract' },
32
+ { class_pattern: /ConfigurationError/, category: 'configuration',
33
+ remediation: 'Review CodebaseIndex configuration' },
34
+ { class_pattern: /ExtractionError/, category: 'extraction_failure',
35
+ remediation: 'Check extraction logs for specific failure details' }
36
+ ].freeze
37
+
38
+ # Classify an error by severity and suggest remediation.
39
+ #
40
+ # @param error [StandardError] The error to classify
41
+ # @return [Hash] :severity (:transient or :permanent), :category, :remediation, :error_class, :message
42
+ def classify(error)
43
+ error_string = "#{error.class} #{error.message}"
44
+
45
+ match = find_match(error_string, TRANSIENT_PATTERNS, :transient) ||
46
+ find_match(error_string, PERMANENT_PATTERNS, :permanent)
47
+
48
+ if match
49
+ match.merge(error_class: error.class.name, message: error.message)
50
+ else
51
+ {
52
+ severity: :unknown,
53
+ category: 'unclassified',
54
+ remediation: 'Investigate error details and check logs',
55
+ error_class: error.class.name,
56
+ message: error.message
57
+ }
58
+ end
59
+ end
60
+
61
+ private
62
+
63
+ # @param error_string [String]
64
+ # @param patterns [Array<Hash>]
65
+ # @param severity [Symbol]
66
+ # @return [Hash, nil]
67
+ def find_match(error_string, patterns, severity)
68
+ patterns.each do |pattern|
69
+ next unless error_string.match?(pattern[:class_pattern])
70
+
71
+ return {
72
+ severity: severity,
73
+ category: pattern[:category],
74
+ remediation: pattern[:remediation]
75
+ }
76
+ end
77
+ nil
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'fileutils'
5
+ require 'time'
6
+
7
+ module CodebaseIndex
8
+ module Operator
9
+ # Rate limiter for pipeline operations using file-based state.
10
+ #
11
+ # Enforces a cooldown between consecutive runs of the same operation
12
+ # to prevent accidental repeated extraction or embedding.
13
+ #
14
+ # @example
15
+ # guard = PipelineGuard.new(state_dir: '/tmp', cooldown: 300)
16
+ # if guard.allow?(:extraction)
17
+ # run_extraction
18
+ # guard.record!(:extraction)
19
+ # end
20
+ #
21
+ class PipelineGuard
22
+ # @param state_dir [String] Directory for persisting state
23
+ # @param cooldown [Integer] Minimum seconds between runs
24
+ def initialize(state_dir:, cooldown: 300)
25
+ @state_dir = state_dir
26
+ @cooldown = cooldown
27
+ @state_path = File.join(state_dir, 'pipeline_guard.json')
28
+ end
29
+
30
+ # Check if an operation is allowed (cooldown elapsed).
31
+ #
32
+ # @param operation [Symbol, String] Operation name
33
+ # @return [Boolean]
34
+ def allow?(operation)
35
+ last = last_run(operation)
36
+ return true if last.nil?
37
+
38
+ (Time.now - last) >= @cooldown
39
+ end
40
+
41
+ # Record that an operation has just run.
42
+ #
43
+ # @param operation [Symbol, String] Operation name
44
+ # @return [void]
45
+ def record!(operation)
46
+ FileUtils.mkdir_p(@state_dir)
47
+ File.open(@state_path, File::RDWR | File::CREAT) do |f|
48
+ f.flock(File::LOCK_EX)
49
+ content = f.read
50
+ state = if content.empty?
51
+ {}
52
+ else
53
+ begin
54
+ JSON.parse(content)
55
+ rescue StandardError
56
+ {}
57
+ end
58
+ end
59
+ state[operation.to_s] = Time.now.iso8601
60
+ f.rewind
61
+ f.write(JSON.generate(state))
62
+ f.truncate(f.pos)
63
+ end
64
+ end
65
+
66
+ # Get the last run time for an operation.
67
+ #
68
+ # @param operation [Symbol, String] Operation name
69
+ # @return [Time, nil]
70
+ def last_run(operation)
71
+ state = read_state
72
+ timestamp = state[operation.to_s]
73
+ return nil if timestamp.nil?
74
+
75
+ Time.parse(timestamp)
76
+ rescue ArgumentError
77
+ nil
78
+ end
79
+
80
+ private
81
+
82
+ # @return [Hash]
83
+ def read_state
84
+ return {} unless File.exist?(@state_path)
85
+
86
+ JSON.parse(File.read(@state_path))
87
+ rescue JSON::ParserError
88
+ {}
89
+ end
90
+
91
+ # @param state [Hash]
92
+ # @return [void]
93
+ def write_state(state)
94
+ FileUtils.mkdir_p(@state_dir)
95
+ File.write(@state_path, JSON.generate(state))
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'time'
5
+
6
+ module CodebaseIndex
7
+ module Operator
8
+ # Reports pipeline status by reading extraction output metadata.
9
+ #
10
+ # @example
11
+ # reporter = StatusReporter.new(output_dir: 'tmp/codebase_index')
12
+ # status = reporter.report
13
+ # status[:status] # => :ok
14
+ # status[:staleness_seconds] # => 3600
15
+ #
16
+ class StatusReporter
17
+ STALE_THRESHOLD = 86_400 # 24 hours
18
+
19
+ # @param output_dir [String] Path to extraction output directory
20
+ def initialize(output_dir:)
21
+ @output_dir = output_dir
22
+ end
23
+
24
+ # Generate a pipeline status report.
25
+ #
26
+ # @return [Hash] Status report with :status, :extracted_at, :total_units, :counts, :staleness_seconds
27
+ def report
28
+ manifest = read_manifest
29
+ return not_extracted_report if manifest.nil?
30
+
31
+ staleness = compute_staleness(manifest['extracted_at'])
32
+
33
+ {
34
+ status: staleness < STALE_THRESHOLD ? :ok : :stale,
35
+ extracted_at: manifest['extracted_at'],
36
+ total_units: manifest['total_units'] || 0,
37
+ counts: manifest['counts'] || {},
38
+ git_sha: manifest['git_sha'],
39
+ git_branch: manifest['git_branch'],
40
+ staleness_seconds: staleness
41
+ }
42
+ end
43
+
44
+ private
45
+
46
+ # @return [Hash, nil]
47
+ def read_manifest
48
+ path = File.join(@output_dir, 'manifest.json')
49
+ return nil unless File.exist?(path)
50
+
51
+ JSON.parse(File.read(path))
52
+ rescue JSON::ParserError
53
+ nil
54
+ end
55
+
56
+ # @return [Hash]
57
+ def not_extracted_report
58
+ {
59
+ status: :not_extracted,
60
+ extracted_at: nil,
61
+ total_units: 0,
62
+ counts: {},
63
+ git_sha: nil,
64
+ git_branch: nil,
65
+ staleness_seconds: nil
66
+ }
67
+ end
68
+
69
+ # @param extracted_at [String, nil] ISO8601 timestamp
70
+ # @return [Numeric]
71
+ def compute_staleness(extracted_at)
72
+ return Float::INFINITY if extracted_at.nil?
73
+
74
+ Time.now - Time.parse(extracted_at)
75
+ rescue ArgumentError
76
+ Float::INFINITY
77
+ end
78
+ end
79
+ end
80
+ end