codebase_index 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/lib/codebase_index.rb +3 -243
  3. metadata +28 -223
  4. data/CHANGELOG.md +0 -89
  5. data/CODE_OF_CONDUCT.md +0 -83
  6. data/CONTRIBUTING.md +0 -65
  7. data/LICENSE.txt +0 -21
  8. data/README.md +0 -325
  9. data/exe/codebase-console +0 -59
  10. data/exe/codebase-console-mcp +0 -22
  11. data/exe/codebase-index-mcp +0 -34
  12. data/exe/codebase-index-mcp-http +0 -37
  13. data/exe/codebase-index-mcp-start +0 -58
  14. data/lib/codebase_index/ast/call_site_extractor.rb +0 -106
  15. data/lib/codebase_index/ast/method_extractor.rb +0 -71
  16. data/lib/codebase_index/ast/node.rb +0 -116
  17. data/lib/codebase_index/ast/parser.rb +0 -614
  18. data/lib/codebase_index/ast.rb +0 -6
  19. data/lib/codebase_index/builder.rb +0 -200
  20. data/lib/codebase_index/cache/cache_middleware.rb +0 -199
  21. data/lib/codebase_index/cache/cache_store.rb +0 -264
  22. data/lib/codebase_index/cache/redis_cache_store.rb +0 -116
  23. data/lib/codebase_index/cache/solid_cache_store.rb +0 -111
  24. data/lib/codebase_index/chunking/chunk.rb +0 -84
  25. data/lib/codebase_index/chunking/semantic_chunker.rb +0 -295
  26. data/lib/codebase_index/console/adapters/cache_adapter.rb +0 -58
  27. data/lib/codebase_index/console/adapters/good_job_adapter.rb +0 -33
  28. data/lib/codebase_index/console/adapters/job_adapter.rb +0 -68
  29. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +0 -33
  30. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +0 -33
  31. data/lib/codebase_index/console/audit_logger.rb +0 -75
  32. data/lib/codebase_index/console/bridge.rb +0 -177
  33. data/lib/codebase_index/console/confirmation.rb +0 -90
  34. data/lib/codebase_index/console/connection_manager.rb +0 -173
  35. data/lib/codebase_index/console/console_response_renderer.rb +0 -74
  36. data/lib/codebase_index/console/embedded_executor.rb +0 -373
  37. data/lib/codebase_index/console/model_validator.rb +0 -81
  38. data/lib/codebase_index/console/rack_middleware.rb +0 -87
  39. data/lib/codebase_index/console/safe_context.rb +0 -82
  40. data/lib/codebase_index/console/server.rb +0 -612
  41. data/lib/codebase_index/console/sql_validator.rb +0 -172
  42. data/lib/codebase_index/console/tools/tier1.rb +0 -118
  43. data/lib/codebase_index/console/tools/tier2.rb +0 -117
  44. data/lib/codebase_index/console/tools/tier3.rb +0 -110
  45. data/lib/codebase_index/console/tools/tier4.rb +0 -79
  46. data/lib/codebase_index/coordination/pipeline_lock.rb +0 -109
  47. data/lib/codebase_index/cost_model/embedding_cost.rb +0 -88
  48. data/lib/codebase_index/cost_model/estimator.rb +0 -128
  49. data/lib/codebase_index/cost_model/provider_pricing.rb +0 -67
  50. data/lib/codebase_index/cost_model/storage_cost.rb +0 -52
  51. data/lib/codebase_index/cost_model.rb +0 -22
  52. data/lib/codebase_index/db/migrations/001_create_units.rb +0 -38
  53. data/lib/codebase_index/db/migrations/002_create_edges.rb +0 -35
  54. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +0 -37
  55. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +0 -45
  56. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +0 -40
  57. data/lib/codebase_index/db/migrator.rb +0 -71
  58. data/lib/codebase_index/db/schema_version.rb +0 -73
  59. data/lib/codebase_index/dependency_graph.rb +0 -236
  60. data/lib/codebase_index/embedding/indexer.rb +0 -140
  61. data/lib/codebase_index/embedding/openai.rb +0 -126
  62. data/lib/codebase_index/embedding/provider.rb +0 -162
  63. data/lib/codebase_index/embedding/text_preparer.rb +0 -112
  64. data/lib/codebase_index/evaluation/baseline_runner.rb +0 -115
  65. data/lib/codebase_index/evaluation/evaluator.rb +0 -139
  66. data/lib/codebase_index/evaluation/metrics.rb +0 -79
  67. data/lib/codebase_index/evaluation/query_set.rb +0 -148
  68. data/lib/codebase_index/evaluation/report_generator.rb +0 -90
  69. data/lib/codebase_index/extracted_unit.rb +0 -145
  70. data/lib/codebase_index/extractor.rb +0 -1028
  71. data/lib/codebase_index/extractors/action_cable_extractor.rb +0 -201
  72. data/lib/codebase_index/extractors/ast_source_extraction.rb +0 -46
  73. data/lib/codebase_index/extractors/behavioral_profile.rb +0 -309
  74. data/lib/codebase_index/extractors/caching_extractor.rb +0 -261
  75. data/lib/codebase_index/extractors/callback_analyzer.rb +0 -246
  76. data/lib/codebase_index/extractors/concern_extractor.rb +0 -292
  77. data/lib/codebase_index/extractors/configuration_extractor.rb +0 -219
  78. data/lib/codebase_index/extractors/controller_extractor.rb +0 -404
  79. data/lib/codebase_index/extractors/database_view_extractor.rb +0 -278
  80. data/lib/codebase_index/extractors/decorator_extractor.rb +0 -253
  81. data/lib/codebase_index/extractors/engine_extractor.rb +0 -223
  82. data/lib/codebase_index/extractors/event_extractor.rb +0 -211
  83. data/lib/codebase_index/extractors/factory_extractor.rb +0 -289
  84. data/lib/codebase_index/extractors/graphql_extractor.rb +0 -892
  85. data/lib/codebase_index/extractors/i18n_extractor.rb +0 -117
  86. data/lib/codebase_index/extractors/job_extractor.rb +0 -374
  87. data/lib/codebase_index/extractors/lib_extractor.rb +0 -218
  88. data/lib/codebase_index/extractors/mailer_extractor.rb +0 -269
  89. data/lib/codebase_index/extractors/manager_extractor.rb +0 -188
  90. data/lib/codebase_index/extractors/middleware_extractor.rb +0 -133
  91. data/lib/codebase_index/extractors/migration_extractor.rb +0 -469
  92. data/lib/codebase_index/extractors/model_extractor.rb +0 -988
  93. data/lib/codebase_index/extractors/phlex_extractor.rb +0 -252
  94. data/lib/codebase_index/extractors/policy_extractor.rb +0 -191
  95. data/lib/codebase_index/extractors/poro_extractor.rb +0 -229
  96. data/lib/codebase_index/extractors/pundit_extractor.rb +0 -223
  97. data/lib/codebase_index/extractors/rails_source_extractor.rb +0 -473
  98. data/lib/codebase_index/extractors/rake_task_extractor.rb +0 -343
  99. data/lib/codebase_index/extractors/route_extractor.rb +0 -181
  100. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +0 -331
  101. data/lib/codebase_index/extractors/serializer_extractor.rb +0 -339
  102. data/lib/codebase_index/extractors/service_extractor.rb +0 -217
  103. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +0 -91
  104. data/lib/codebase_index/extractors/shared_utility_methods.rb +0 -281
  105. data/lib/codebase_index/extractors/state_machine_extractor.rb +0 -398
  106. data/lib/codebase_index/extractors/test_mapping_extractor.rb +0 -225
  107. data/lib/codebase_index/extractors/validator_extractor.rb +0 -211
  108. data/lib/codebase_index/extractors/view_component_extractor.rb +0 -311
  109. data/lib/codebase_index/extractors/view_template_extractor.rb +0 -261
  110. data/lib/codebase_index/feedback/gap_detector.rb +0 -89
  111. data/lib/codebase_index/feedback/store.rb +0 -119
  112. data/lib/codebase_index/filename_utils.rb +0 -32
  113. data/lib/codebase_index/flow_analysis/operation_extractor.rb +0 -206
  114. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +0 -154
  115. data/lib/codebase_index/flow_assembler.rb +0 -290
  116. data/lib/codebase_index/flow_document.rb +0 -191
  117. data/lib/codebase_index/flow_precomputer.rb +0 -102
  118. data/lib/codebase_index/formatting/base.rb +0 -30
  119. data/lib/codebase_index/formatting/claude_adapter.rb +0 -98
  120. data/lib/codebase_index/formatting/generic_adapter.rb +0 -56
  121. data/lib/codebase_index/formatting/gpt_adapter.rb +0 -64
  122. data/lib/codebase_index/formatting/human_adapter.rb +0 -78
  123. data/lib/codebase_index/graph_analyzer.rb +0 -374
  124. data/lib/codebase_index/mcp/bootstrapper.rb +0 -96
  125. data/lib/codebase_index/mcp/index_reader.rb +0 -394
  126. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +0 -81
  127. data/lib/codebase_index/mcp/renderers/json_renderer.rb +0 -17
  128. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +0 -353
  129. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +0 -240
  130. data/lib/codebase_index/mcp/server.rb +0 -961
  131. data/lib/codebase_index/mcp/tool_response_renderer.rb +0 -85
  132. data/lib/codebase_index/model_name_cache.rb +0 -51
  133. data/lib/codebase_index/notion/client.rb +0 -217
  134. data/lib/codebase_index/notion/exporter.rb +0 -219
  135. data/lib/codebase_index/notion/mapper.rb +0 -40
  136. data/lib/codebase_index/notion/mappers/column_mapper.rb +0 -57
  137. data/lib/codebase_index/notion/mappers/migration_mapper.rb +0 -39
  138. data/lib/codebase_index/notion/mappers/model_mapper.rb +0 -161
  139. data/lib/codebase_index/notion/mappers/shared.rb +0 -22
  140. data/lib/codebase_index/notion/rate_limiter.rb +0 -68
  141. data/lib/codebase_index/observability/health_check.rb +0 -79
  142. data/lib/codebase_index/observability/instrumentation.rb +0 -34
  143. data/lib/codebase_index/observability/structured_logger.rb +0 -57
  144. data/lib/codebase_index/operator/error_escalator.rb +0 -81
  145. data/lib/codebase_index/operator/pipeline_guard.rb +0 -92
  146. data/lib/codebase_index/operator/status_reporter.rb +0 -80
  147. data/lib/codebase_index/railtie.rb +0 -38
  148. data/lib/codebase_index/resilience/circuit_breaker.rb +0 -99
  149. data/lib/codebase_index/resilience/index_validator.rb +0 -167
  150. data/lib/codebase_index/resilience/retryable_provider.rb +0 -108
  151. data/lib/codebase_index/retrieval/context_assembler.rb +0 -261
  152. data/lib/codebase_index/retrieval/query_classifier.rb +0 -133
  153. data/lib/codebase_index/retrieval/ranker.rb +0 -277
  154. data/lib/codebase_index/retrieval/search_executor.rb +0 -316
  155. data/lib/codebase_index/retriever.rb +0 -152
  156. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +0 -170
  157. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +0 -77
  158. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +0 -18
  159. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +0 -280
  160. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +0 -143
  161. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +0 -143
  162. data/lib/codebase_index/ruby_analyzer.rb +0 -87
  163. data/lib/codebase_index/session_tracer/file_store.rb +0 -104
  164. data/lib/codebase_index/session_tracer/middleware.rb +0 -143
  165. data/lib/codebase_index/session_tracer/redis_store.rb +0 -106
  166. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +0 -254
  167. data/lib/codebase_index/session_tracer/session_flow_document.rb +0 -223
  168. data/lib/codebase_index/session_tracer/solid_cache_store.rb +0 -139
  169. data/lib/codebase_index/session_tracer/store.rb +0 -81
  170. data/lib/codebase_index/storage/graph_store.rb +0 -120
  171. data/lib/codebase_index/storage/metadata_store.rb +0 -196
  172. data/lib/codebase_index/storage/pgvector.rb +0 -195
  173. data/lib/codebase_index/storage/qdrant.rb +0 -205
  174. data/lib/codebase_index/storage/vector_store.rb +0 -167
  175. data/lib/codebase_index/temporal/json_snapshot_store.rb +0 -245
  176. data/lib/codebase_index/temporal/snapshot_store.rb +0 -345
  177. data/lib/codebase_index/token_utils.rb +0 -19
  178. data/lib/codebase_index/version.rb +0 -5
  179. data/lib/generators/codebase_index/install_generator.rb +0 -32
  180. data/lib/generators/codebase_index/pgvector_generator.rb +0 -37
  181. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +0 -15
  182. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +0 -43
  183. data/lib/tasks/codebase_index.rake +0 -597
  184. data/lib/tasks/codebase_index_evaluation.rake +0 -115
@@ -1,39 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module CodebaseIndex
4
- module Notion
5
- module Mappers
6
- # Extracts latest migration dates per table from migration ExtractedUnits.
7
- #
8
- # Used to update Data Models pages with the most recent schema change date.
9
- #
10
- # @example
11
- # mapper = MigrationMapper.new
12
- # changes = mapper.latest_changes(migration_units)
13
- # # => { "users" => "2026-02-20T10:00:00Z", "posts" => "2026-01-15T09:00:00Z" }
14
- #
15
- class MigrationMapper
16
- # Compute the latest migration date for each affected table.
17
- #
18
- # @param migration_units [Array<Hash>] Parsed migration ExtractedUnit JSONs
19
- # @return [Hash<String, String>] Table name to latest extracted_at timestamp
20
- def latest_changes(migration_units)
21
- migration_units.each_with_object({}) do |unit, changes|
22
- extracted_at = unit['extracted_at']
23
- next unless extracted_at
24
-
25
- tables = (unit['metadata'] || {})['tables_affected'] || []
26
- tables.each { |table| update_latest(changes, table, extracted_at) }
27
- end
28
- end
29
-
30
- private
31
-
32
- # @return [void]
33
- def update_latest(changes, table, extracted_at)
34
- changes[table] = extracted_at if changes[table].nil? || extracted_at > changes[table]
35
- end
36
- end
37
- end
38
- end
39
- end
@@ -1,161 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'shared'
4
-
5
- module CodebaseIndex
6
- module Notion
7
- module Mappers
8
- # Maps a model ExtractedUnit to Notion page properties for the Data Models database.
9
- #
10
- # Transforms model metadata (associations, validations, callbacks, scopes, git data)
11
- # into Notion API property format for the Data Models database.
12
- #
13
- # @example
14
- # mapper = ModelMapper.new
15
- # properties = mapper.map(unit_data)
16
- # client.create_page(database_id: db_id, properties: properties)
17
- #
18
- class ModelMapper
19
- include Shared
20
-
21
- # Map a model unit to Notion Data Models page properties.
22
- #
23
- # @param unit_data [Hash] Parsed model ExtractedUnit JSON
24
- # @return [Hash] Notion page properties hash
25
- def map(unit_data)
26
- metadata = unit_data['metadata'] || {}
27
- properties = build_text_properties(unit_data, metadata)
28
- properties['Column Count'] = { number: column_count(metadata) }
29
- add_git_properties(properties, metadata['git'] || {})
30
- properties
31
- end
32
-
33
- private
34
-
35
- # @return [Hash] Text-based Notion properties
36
- def build_text_properties(unit_data, metadata)
37
- {
38
- 'Table Name' => title_property(table_name(unit_data, metadata)),
39
- 'Model Name' => rich_text_property(unit_data['identifier']),
40
- 'Description' => rich_text_property(extract_description(unit_data['source_code'])),
41
- 'Associations' => rich_text_property(format_associations(metadata['associations'])),
42
- 'Validations' => rich_text_property(format_validations(metadata['validations'])),
43
- 'Callbacks' => rich_text_property(format_callbacks(metadata['callbacks'])),
44
- 'Scopes' => rich_text_property(format_scopes(metadata['scopes'])),
45
- 'File Path' => rich_text_property(unit_data['file_path'] || ''),
46
- 'Dependencies' => rich_text_property(format_dependencies(unit_data['dependencies']))
47
- }
48
- end
49
-
50
- # @return [void]
51
- def add_git_properties(properties, git)
52
- properties['Last Modified'] = { date: { start: git['last_modified'] } } if git['last_modified']
53
- properties['Change Frequency'] = { select: { name: git['change_frequency'] } } if git['change_frequency']
54
- end
55
-
56
- # @return [String]
57
- def table_name(unit_data, metadata)
58
- return metadata['table_name'] if metadata['table_name']
59
-
60
- identifier = unit_data['identifier'] || ''
61
- "#{identifier.split('::').last.to_s.gsub(/([a-z])([A-Z])/, '\1_\2').downcase}s"
62
- end
63
-
64
- # @return [Integer]
65
- def column_count(metadata)
66
- metadata['column_count'] || (metadata['columns'] || []).size
67
- end
68
-
69
- # @return [String]
70
- def extract_description(source_code)
71
- return '' unless source_code
72
-
73
- comment_lines = []
74
- source_code.lines.each do |line|
75
- stripped = line.strip
76
- if stripped.start_with?('#')
77
- comment_lines << stripped.sub(/^#\s?/, '')
78
- elsif comment_lines.any?
79
- break
80
- end
81
- end
82
-
83
- comment_lines.any? ? comment_lines.join(' ').strip : ''
84
- end
85
-
86
- # @return [String]
87
- def format_associations(associations)
88
- format_list(associations) { |items| items.map { |a| format_single_association(a) }.join("\n") }
89
- end
90
-
91
- # @return [String]
92
- def format_single_association(assoc)
93
- parts = ["#{assoc['type']} :#{assoc['name']}"]
94
- parts << "through: :#{assoc['through']}" if assoc['through']
95
- parts << "class_name: '#{assoc['class_name']}'" if assoc['class_name']
96
- parts << "foreign_key: :#{assoc['foreign_key']}" if assoc['foreign_key']
97
- parts.join(', ')
98
- end
99
-
100
- # @return [String]
101
- def format_validations(validations)
102
- format_list(validations) do |items|
103
- items.group_by { |v| v['attribute'] }.map do |attr, vals|
104
- "#{attr}: #{vals.map { |v| v['type'] }.join(', ')}"
105
- end.join("\n")
106
- end
107
- end
108
-
109
- # @return [String]
110
- def format_callbacks(callbacks)
111
- format_list(callbacks) { |items| items.map { |callback| format_single_callback(callback) }.join("\n") }
112
- end
113
-
114
- # @return [String]
115
- def format_single_callback(callback)
116
- parts = ["#{callback['type']}: #{callback['filter']}"]
117
- effects = callback_side_effects(callback['side_effects'])
118
- parts << "(#{effects.join('; ')})" if effects.any?
119
- parts.join(' ')
120
- end
121
-
122
- # @return [Array<String>]
123
- def callback_side_effects(side_effects)
124
- return [] unless side_effects
125
-
126
- effects = []
127
- jobs = side_effects['jobs_enqueued']
128
- effects << "enqueues #{jobs.join(', ')}" if jobs&.any?
129
- services = side_effects['services_called']
130
- effects << "calls #{services.join(', ')}" if services&.any?
131
- effects
132
- end
133
-
134
- # @return [String]
135
- def format_scopes(scopes)
136
- format_list(scopes) { |items| items.map { |s| s['name'] }.join(', ') }
137
- end
138
-
139
- # @return [String]
140
- def format_dependencies(dependencies)
141
- format_list(dependencies) { |items| items.map { |dep| "#{dep['target']} (via #{dep['via']})" }.join(', ') }
142
- end
143
-
144
- # @return [Hash]
145
- def title_property(text)
146
- { title: [{ text: { content: text } }] }
147
- end
148
-
149
- # Return 'None' for nil/empty lists; otherwise yield items to a formatting block.
150
- #
151
- # @param items [Array, nil]
152
- # @return [String]
153
- def format_list(items)
154
- return 'None' if items.nil? || items.empty?
155
-
156
- yield items
157
- end
158
- end
159
- end
160
- end
161
- end
@@ -1,22 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module CodebaseIndex
4
- module Notion
5
- module Mappers
6
- # Shared helpers for Notion mapper classes.
7
- module Shared
8
- MAX_RICH_TEXT_LENGTH = 2000
9
-
10
- # Build a Notion rich_text property, truncating to API limits.
11
- #
12
- # @param text [String]
13
- # @return [Hash]
14
- def rich_text_property(text)
15
- content = text.to_s
16
- content = "#{content[0...1997]}..." if content.length > MAX_RICH_TEXT_LENGTH
17
- { rich_text: [{ text: { content: content } }] }
18
- end
19
- end
20
- end
21
- end
22
- end
@@ -1,68 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module CodebaseIndex
4
- module Notion
5
- # Thread-safe rate limiter for Notion API (3 requests/second default).
6
- #
7
- # Enforces a minimum interval between API calls by sleeping when necessary.
8
- # Uses a Mutex to ensure thread safety when called from concurrent contexts.
9
- #
10
- # @example
11
- # limiter = RateLimiter.new(requests_per_second: 3)
12
- # limiter.throttle { client.create_page(...) }
13
- # limiter.throttle { client.update_page(...) }
14
- #
15
- class RateLimiter
16
- # @param requests_per_second [Numeric] Maximum requests per second (default: 3)
17
- # @raise [ArgumentError] if requests_per_second is not positive
18
- def initialize(requests_per_second: 3)
19
- unless requests_per_second.is_a?(Numeric) && requests_per_second.positive?
20
- raise ArgumentError, "requests_per_second must be positive, got #{requests_per_second.inspect}"
21
- end
22
-
23
- @min_interval = 1.0 / requests_per_second
24
- @last_request_at = nil
25
- @mutex = Mutex.new
26
- end
27
-
28
- # Execute a block after enforcing the rate limit.
29
- #
30
- # Sleeps if the minimum interval since the last request hasn't elapsed.
31
- # Thread-safe — only one request proceeds at a time.
32
- #
33
- # @yield The block to execute after rate limiting
34
- # @return [Object] The block's return value
35
- # @raise [ArgumentError] if no block is given
36
- def throttle
37
- raise ArgumentError, 'block required' unless block_given?
38
-
39
- @mutex.synchronize do
40
- wait_for_interval
41
- @last_request_at = monotonic_now
42
- end
43
-
44
- yield
45
- end
46
-
47
- private
48
-
49
- # Sleep if minimum interval hasn't elapsed since last request.
50
- #
51
- # @return [void]
52
- def wait_for_interval
53
- return unless @last_request_at
54
-
55
- elapsed = monotonic_now - @last_request_at
56
- remaining = @min_interval - elapsed
57
- sleep(remaining) if remaining.positive?
58
- end
59
-
60
- # Monotonic clock for accurate interval measurement.
61
- #
62
- # @return [Float] Current monotonic time in seconds
63
- def monotonic_now
64
- Process.clock_gettime(Process::CLOCK_MONOTONIC)
65
- end
66
- end
67
- end
68
- end
@@ -1,79 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module CodebaseIndex
4
- module Observability
5
- # Probes configured components and reports overall system health.
6
- #
7
- # Checks vector store, metadata store, and embedding provider by calling
8
- # lightweight operations on each. Components that are nil are reported
9
- # as :not_configured and do not affect the overall healthy? status.
10
- #
11
- # @example
12
- # check = HealthCheck.new(
13
- # vector_store: vector_store,
14
- # metadata_store: metadata_store,
15
- # embedding_provider: provider
16
- # )
17
- # status = check.run
18
- # status.healthy? # => true
19
- # status.components # => { vector_store: :ok, metadata_store: :ok, embedding_provider: :ok }
20
- #
21
- class HealthCheck
22
- # Value object representing the result of a health check.
23
- HealthStatus = Struct.new(:healthy?, :components, keyword_init: true)
24
-
25
- # @param vector_store [Object, nil] Vector store adapter (must respond to #count)
26
- # @param metadata_store [Object, nil] Metadata store adapter (must respond to #count)
27
- # @param embedding_provider [Object, nil] Embedding provider (must respond to #embed)
28
- def initialize(vector_store: nil, metadata_store: nil, embedding_provider: nil)
29
- @vector_store = vector_store
30
- @metadata_store = metadata_store
31
- @embedding_provider = embedding_provider
32
- end
33
-
34
- # Run health probes on all configured components.
35
- #
36
- # @return [HealthStatus] Result with healthy? flag and per-component status
37
- def run
38
- components = {
39
- vector_store: probe_store(@vector_store),
40
- metadata_store: probe_store(@metadata_store),
41
- embedding_provider: probe_provider(@embedding_provider)
42
- }
43
-
44
- all_healthy = components.values.all? { |status| %i[ok not_configured].include?(status) }
45
-
46
- HealthStatus.new(healthy?: all_healthy, components: components)
47
- end
48
-
49
- private
50
-
51
- # Probe a store component by calling #count.
52
- #
53
- # @param store [Object, nil] Store adapter
54
- # @return [Symbol] :ok, :error, or :not_configured
55
- def probe_store(store)
56
- return :not_configured if store.nil?
57
-
58
- store.count
59
- :ok
60
- rescue StandardError
61
- :error
62
- end
63
-
64
- # Probe an embedding provider by checking its capabilities without making network calls.
65
- #
66
- # @param provider [Object, nil] Embedding provider
67
- # @return [Symbol] :ok, :error, or :not_configured
68
- def probe_provider(provider)
69
- return :not_configured if provider.nil?
70
-
71
- if provider.respond_to?(:embed) && provider.respond_to?(:dimensions)
72
- :ok
73
- else
74
- :error
75
- end
76
- end
77
- end
78
- end
79
- end
@@ -1,34 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module CodebaseIndex
4
- module Observability
5
- # Lightweight instrumentation wrapper that delegates to ActiveSupport::Notifications
6
- # when available, and falls back to a simple yield otherwise.
7
- #
8
- # @example
9
- # Instrumentation.instrument('codebase_index.extraction', unit: 'User') do
10
- # extract_unit(user_model)
11
- # end
12
- #
13
- module Instrumentation
14
- module_function
15
-
16
- # Instrument a block of code with an event name and payload.
17
- #
18
- # Delegates to ActiveSupport::Notifications.instrument when available.
19
- # Otherwise, yields the block directly.
20
- #
21
- # @param event [String] Event name (e.g., 'codebase_index.extraction')
22
- # @param payload [Hash] Additional data to include with the event
23
- # @yield [payload] The block to instrument
24
- # @return [Object] The return value of the block
25
- def instrument(event, payload = {}, &block)
26
- if defined?(ActiveSupport::Notifications)
27
- ActiveSupport::Notifications.instrument(event, payload, &block)
28
- elsif block
29
- yield payload
30
- end
31
- end
32
- end
33
- end
34
- end
@@ -1,57 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'json'
4
- require 'time'
5
-
6
- module CodebaseIndex
7
- module Observability
8
- # Structured JSON logger that writes one JSON object per line.
9
- #
10
- # Each log entry includes a timestamp, level, event name, and any
11
- # additional data passed as keyword arguments.
12
- #
13
- # @example
14
- # logger = StructuredLogger.new(output: $stderr)
15
- # logger.info('extraction.complete', units: 42, duration_ms: 1200)
16
- # # => {"timestamp":"2026-02-15T12:00:00Z","level":"info",
17
- # # "event":"extraction.complete","units":42,"duration_ms":1200}
18
- #
19
- class StructuredLogger
20
- # @param output [IO] Output stream (default: $stderr)
21
- def initialize(output: $stderr)
22
- @output = output
23
- end
24
-
25
- # @!method info(event, **data)
26
- # Log at info level.
27
- # @param event [String] Event name
28
- # @param data [Hash] Additional structured data
29
- # @!method warn(event, **data)
30
- # Log at warn level.
31
- # @!method error(event, **data)
32
- # Log at error level.
33
- # @!method debug(event, **data)
34
- # Log at debug level.
35
- %w[info warn error debug].each do |level|
36
- define_method(level) { |event, **data| write_entry(level, event, data) }
37
- end
38
-
39
- private
40
-
41
- # Write a single JSON log line.
42
- #
43
- # @param level [String] Log level
44
- # @param event [String] Event name
45
- # @param data [Hash] Additional data
46
- def write_entry(level, event, data)
47
- entry = {
48
- timestamp: Time.now.utc.iso8601,
49
- level: level,
50
- event: event
51
- }.merge(data)
52
-
53
- @output.puts(JSON.generate(entry))
54
- end
55
- end
56
- end
57
- end
@@ -1,81 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module CodebaseIndex
4
- module Operator
5
- # Classifies pipeline errors by severity and suggests remediation.
6
- #
7
- # @example
8
- # escalator = ErrorEscalator.new
9
- # result = escalator.classify(Timeout::Error.new("connection timed out"))
10
- # result[:severity] # => :transient
11
- # result[:remediation] # => "Retry after a short delay"
12
- #
13
- class ErrorEscalator
14
- TRANSIENT_PATTERNS = [
15
- { class_pattern: /Timeout|ETIMEDOUT/, category: 'timeout', remediation: 'Retry after a short delay' },
16
- { class_pattern: /Net::/, category: 'network', remediation: 'Check network connectivity and retry' },
17
- { class_pattern: /RateLimited|429/, category: 'rate_limit',
18
- remediation: 'Back off and retry with exponential delay' },
19
- { class_pattern: /CircuitOpenError/, category: 'circuit_open',
20
- remediation: 'Wait for circuit breaker reset timeout' },
21
- { class_pattern: /ConnectionPool|Busy/, category: 'resource_contention',
22
- remediation: 'Wait for resources to free up' }
23
- ].freeze
24
-
25
- PERMANENT_PATTERNS = [
26
- { class_pattern: /NameError|NoMethodError/, category: 'code_error',
27
- remediation: 'Fix the code error and re-extract' },
28
- { class_pattern: /Errno::ENOENT|FileNotFoundError/, category: 'missing_file',
29
- remediation: 'Verify file paths and re-run extraction' },
30
- { class_pattern: /JSON::ParserError/, category: 'corrupt_data',
31
- remediation: 'Clean index and re-extract' },
32
- { class_pattern: /ConfigurationError/, category: 'configuration',
33
- remediation: 'Review CodebaseIndex configuration' },
34
- { class_pattern: /ExtractionError/, category: 'extraction_failure',
35
- remediation: 'Check extraction logs for specific failure details' }
36
- ].freeze
37
-
38
- # Classify an error by severity and suggest remediation.
39
- #
40
- # @param error [StandardError] The error to classify
41
- # @return [Hash] :severity (:transient or :permanent), :category, :remediation, :error_class, :message
42
- def classify(error)
43
- error_string = "#{error.class} #{error.message}"
44
-
45
- match = find_match(error_string, TRANSIENT_PATTERNS, :transient) ||
46
- find_match(error_string, PERMANENT_PATTERNS, :permanent)
47
-
48
- if match
49
- match.merge(error_class: error.class.name, message: error.message)
50
- else
51
- {
52
- severity: :unknown,
53
- category: 'unclassified',
54
- remediation: 'Investigate error details and check logs',
55
- error_class: error.class.name,
56
- message: error.message
57
- }
58
- end
59
- end
60
-
61
- private
62
-
63
- # @param error_string [String]
64
- # @param patterns [Array<Hash>]
65
- # @param severity [Symbol]
66
- # @return [Hash, nil]
67
- def find_match(error_string, patterns, severity)
68
- patterns.each do |pattern|
69
- next unless error_string.match?(pattern[:class_pattern])
70
-
71
- return {
72
- severity: severity,
73
- category: pattern[:category],
74
- remediation: pattern[:remediation]
75
- }
76
- end
77
- nil
78
- end
79
- end
80
- end
81
- end
@@ -1,92 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'json'
4
- require 'fileutils'
5
- require 'time'
6
-
7
- module CodebaseIndex
8
- module Operator
9
- # Rate limiter for pipeline operations using file-based state.
10
- #
11
- # Enforces a cooldown between consecutive runs of the same operation
12
- # to prevent accidental repeated extraction or embedding.
13
- #
14
- # @example
15
- # guard = PipelineGuard.new(state_dir: '/tmp', cooldown: 300)
16
- # if guard.allow?(:extraction)
17
- # run_extraction
18
- # guard.record!(:extraction)
19
- # end
20
- #
21
- class PipelineGuard
22
- # @param state_dir [String] Directory for persisting state
23
- # @param cooldown [Integer] Minimum seconds between runs
24
- def initialize(state_dir:, cooldown: 300)
25
- @state_dir = state_dir
26
- @cooldown = cooldown
27
- @state_path = File.join(state_dir, 'pipeline_guard.json')
28
- end
29
-
30
- # Check if an operation is allowed (cooldown elapsed).
31
- #
32
- # @param operation [Symbol, String] Operation name
33
- # @return [Boolean]
34
- def allow?(operation)
35
- last = last_run(operation)
36
- return true if last.nil?
37
-
38
- (Time.now - last) >= @cooldown
39
- end
40
-
41
- # Record that an operation has just run.
42
- #
43
- # @param operation [Symbol, String] Operation name
44
- # @return [void]
45
- def record!(operation)
46
- FileUtils.mkdir_p(@state_dir)
47
- File.open(@state_path, File::RDWR | File::CREAT) do |f|
48
- f.flock(File::LOCK_EX)
49
- content = f.read
50
- state = if content.empty?
51
- {}
52
- else
53
- begin
54
- JSON.parse(content)
55
- rescue StandardError
56
- {}
57
- end
58
- end
59
- state[operation.to_s] = Time.now.iso8601
60
- f.rewind
61
- f.write(JSON.generate(state))
62
- f.truncate(f.pos)
63
- end
64
- end
65
-
66
- # Get the last run time for an operation.
67
- #
68
- # @param operation [Symbol, String] Operation name
69
- # @return [Time, nil]
70
- def last_run(operation)
71
- state = read_state
72
- timestamp = state[operation.to_s]
73
- return nil if timestamp.nil?
74
-
75
- Time.parse(timestamp)
76
- rescue ArgumentError
77
- nil
78
- end
79
-
80
- private
81
-
82
- # @return [Hash]
83
- def read_state
84
- return {} unless File.exist?(@state_path)
85
-
86
- JSON.parse(File.read(@state_path))
87
- rescue JSON::ParserError
88
- {}
89
- end
90
- end
91
- end
92
- end