codebase_index 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/exe/codebase-console-mcp +22 -0
- data/exe/codebase-index-mcp +61 -0
- data/exe/codebase-index-mcp-http +64 -0
- data/exe/codebase-index-mcp-start +58 -0
- data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
- data/lib/codebase_index/ast/method_extractor.rb +76 -0
- data/lib/codebase_index/ast/node.rb +88 -0
- data/lib/codebase_index/ast/parser.rb +653 -0
- data/lib/codebase_index/ast.rb +6 -0
- data/lib/codebase_index/builder.rb +137 -0
- data/lib/codebase_index/chunking/chunk.rb +84 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
- data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
- data/lib/codebase_index/console/audit_logger.rb +75 -0
- data/lib/codebase_index/console/bridge.rb +170 -0
- data/lib/codebase_index/console/confirmation.rb +90 -0
- data/lib/codebase_index/console/connection_manager.rb +173 -0
- data/lib/codebase_index/console/console_response_renderer.rb +78 -0
- data/lib/codebase_index/console/model_validator.rb +81 -0
- data/lib/codebase_index/console/safe_context.rb +82 -0
- data/lib/codebase_index/console/server.rb +557 -0
- data/lib/codebase_index/console/sql_validator.rb +172 -0
- data/lib/codebase_index/console/tools/tier1.rb +118 -0
- data/lib/codebase_index/console/tools/tier2.rb +117 -0
- data/lib/codebase_index/console/tools/tier3.rb +110 -0
- data/lib/codebase_index/console/tools/tier4.rb +79 -0
- data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
- data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
- data/lib/codebase_index/cost_model/estimator.rb +128 -0
- data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
- data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
- data/lib/codebase_index/cost_model.rb +22 -0
- data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
- data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/codebase_index/db/migrator.rb +71 -0
- data/lib/codebase_index/db/schema_version.rb +73 -0
- data/lib/codebase_index/dependency_graph.rb +227 -0
- data/lib/codebase_index/embedding/indexer.rb +130 -0
- data/lib/codebase_index/embedding/openai.rb +105 -0
- data/lib/codebase_index/embedding/provider.rb +135 -0
- data/lib/codebase_index/embedding/text_preparer.rb +112 -0
- data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
- data/lib/codebase_index/evaluation/evaluator.rb +146 -0
- data/lib/codebase_index/evaluation/metrics.rb +79 -0
- data/lib/codebase_index/evaluation/query_set.rb +148 -0
- data/lib/codebase_index/evaluation/report_generator.rb +90 -0
- data/lib/codebase_index/extracted_unit.rb +145 -0
- data/lib/codebase_index/extractor.rb +956 -0
- data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
- data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
- data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
- data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
- data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
- data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
- data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
- data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
- data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
- data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
- data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
- data/lib/codebase_index/extractors/event_extractor.rb +211 -0
- data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
- data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
- data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
- data/lib/codebase_index/extractors/job_extractor.rb +369 -0
- data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
- data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
- data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
- data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
- data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
- data/lib/codebase_index/extractors/model_extractor.rb +960 -0
- data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
- data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
- data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
- data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
- data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
- data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
- data/lib/codebase_index/extractors/route_extractor.rb +181 -0
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
- data/lib/codebase_index/extractors/service_extractor.rb +254 -0
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
- data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
- data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
- data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
- data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
- data/lib/codebase_index/feedback/gap_detector.rb +89 -0
- data/lib/codebase_index/feedback/store.rb +119 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/codebase_index/flow_assembler.rb +290 -0
- data/lib/codebase_index/flow_document.rb +191 -0
- data/lib/codebase_index/flow_precomputer.rb +102 -0
- data/lib/codebase_index/formatting/base.rb +40 -0
- data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
- data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
- data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
- data/lib/codebase_index/formatting/human_adapter.rb +78 -0
- data/lib/codebase_index/graph_analyzer.rb +374 -0
- data/lib/codebase_index/mcp/index_reader.rb +394 -0
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/codebase_index/mcp/server.rb +935 -0
- data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
- data/lib/codebase_index/model_name_cache.rb +51 -0
- data/lib/codebase_index/notion/client.rb +217 -0
- data/lib/codebase_index/notion/exporter.rb +219 -0
- data/lib/codebase_index/notion/mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
- data/lib/codebase_index/notion/rate_limiter.rb +68 -0
- data/lib/codebase_index/observability/health_check.rb +81 -0
- data/lib/codebase_index/observability/instrumentation.rb +34 -0
- data/lib/codebase_index/observability/structured_logger.rb +75 -0
- data/lib/codebase_index/operator/error_escalator.rb +81 -0
- data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
- data/lib/codebase_index/operator/status_reporter.rb +80 -0
- data/lib/codebase_index/railtie.rb +26 -0
- data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
- data/lib/codebase_index/resilience/index_validator.rb +185 -0
- data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
- data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
- data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
- data/lib/codebase_index/retrieval/ranker.rb +273 -0
- data/lib/codebase_index/retrieval/search_executor.rb +327 -0
- data/lib/codebase_index/retriever.rb +160 -0
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
- data/lib/codebase_index/ruby_analyzer.rb +87 -0
- data/lib/codebase_index/session_tracer/file_store.rb +111 -0
- data/lib/codebase_index/session_tracer/middleware.rb +143 -0
- data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
- data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
- data/lib/codebase_index/session_tracer/store.rb +67 -0
- data/lib/codebase_index/storage/graph_store.rb +120 -0
- data/lib/codebase_index/storage/metadata_store.rb +169 -0
- data/lib/codebase_index/storage/pgvector.rb +163 -0
- data/lib/codebase_index/storage/qdrant.rb +172 -0
- data/lib/codebase_index/storage/vector_store.rb +156 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
- data/lib/codebase_index/version.rb +5 -0
- data/lib/codebase_index.rb +223 -0
- data/lib/generators/codebase_index/install_generator.rb +32 -0
- data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
- data/lib/tasks/codebase_index.rake +583 -0
- data/lib/tasks/codebase_index_evaluation.rake +115 -0
- metadata +252 -0
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module Notion
|
|
5
|
+
module Mappers
|
|
6
|
+
# Maps a model ExtractedUnit to Notion page properties for the Data Models database.
|
|
7
|
+
#
|
|
8
|
+
# Transforms model metadata (associations, validations, callbacks, scopes, git data)
|
|
9
|
+
# into Notion API property format for the Data Models database.
|
|
10
|
+
#
|
|
11
|
+
# @example
|
|
12
|
+
# mapper = ModelMapper.new
|
|
13
|
+
# properties = mapper.map(unit_data)
|
|
14
|
+
# client.create_page(database_id: db_id, properties: properties)
|
|
15
|
+
#
|
|
16
|
+
class ModelMapper
|
|
17
|
+
MAX_RICH_TEXT_LENGTH = 2000
|
|
18
|
+
|
|
19
|
+
# Map a model unit to Notion Data Models page properties.
|
|
20
|
+
#
|
|
21
|
+
# @param unit_data [Hash] Parsed model ExtractedUnit JSON
|
|
22
|
+
# @return [Hash] Notion page properties hash
|
|
23
|
+
def map(unit_data)
|
|
24
|
+
metadata = unit_data['metadata'] || {}
|
|
25
|
+
properties = build_text_properties(unit_data, metadata)
|
|
26
|
+
properties['Column Count'] = { number: column_count(metadata) }
|
|
27
|
+
add_git_properties(properties, metadata['git'] || {})
|
|
28
|
+
properties
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
# @return [Hash] Text-based Notion properties
|
|
34
|
+
def build_text_properties(unit_data, metadata)
|
|
35
|
+
{
|
|
36
|
+
'Table Name' => title_property(table_name(unit_data, metadata)),
|
|
37
|
+
'Model Name' => rich_text_property(unit_data['identifier']),
|
|
38
|
+
'Description' => rich_text_property(extract_description(unit_data['source_code'])),
|
|
39
|
+
'Associations' => rich_text_property(format_associations(metadata['associations'])),
|
|
40
|
+
'Validations' => rich_text_property(format_validations(metadata['validations'])),
|
|
41
|
+
'Callbacks' => rich_text_property(format_callbacks(metadata['callbacks'])),
|
|
42
|
+
'Scopes' => rich_text_property(format_scopes(metadata['scopes'])),
|
|
43
|
+
'File Path' => rich_text_property(unit_data['file_path'] || ''),
|
|
44
|
+
'Dependencies' => rich_text_property(format_dependencies(unit_data['dependencies']))
|
|
45
|
+
}
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# @return [void]
|
|
49
|
+
def add_git_properties(properties, git)
|
|
50
|
+
properties['Last Modified'] = { date: { start: git['last_modified'] } } if git['last_modified']
|
|
51
|
+
properties['Change Frequency'] = { select: { name: git['change_frequency'] } } if git['change_frequency']
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# @return [String]
|
|
55
|
+
def table_name(unit_data, metadata)
|
|
56
|
+
return metadata['table_name'] if metadata['table_name']
|
|
57
|
+
|
|
58
|
+
identifier = unit_data['identifier'] || ''
|
|
59
|
+
"#{identifier.split('::').last.to_s.gsub(/([a-z])([A-Z])/, '\1_\2').downcase}s"
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# @return [Integer]
|
|
63
|
+
def column_count(metadata)
|
|
64
|
+
metadata['column_count'] || (metadata['columns'] || []).size
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# @return [String]
|
|
68
|
+
def extract_description(source_code)
|
|
69
|
+
return '' unless source_code
|
|
70
|
+
|
|
71
|
+
comment_lines = []
|
|
72
|
+
source_code.lines.each do |line|
|
|
73
|
+
stripped = line.strip
|
|
74
|
+
if stripped.start_with?('#')
|
|
75
|
+
comment_lines << stripped.sub(/^#\s?/, '')
|
|
76
|
+
elsif comment_lines.any?
|
|
77
|
+
break
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
comment_lines.any? ? comment_lines.join(' ').strip : ''
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# @return [String]
|
|
85
|
+
def format_associations(associations)
|
|
86
|
+
return 'None' if associations.nil? || associations.empty?
|
|
87
|
+
|
|
88
|
+
associations.map { |a| format_single_association(a) }.join("\n")
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# @return [String]
|
|
92
|
+
def format_single_association(assoc)
|
|
93
|
+
parts = ["#{assoc['type']} :#{assoc['name']}"]
|
|
94
|
+
parts << "through: :#{assoc['through']}" if assoc['through']
|
|
95
|
+
parts << "class_name: '#{assoc['class_name']}'" if assoc['class_name']
|
|
96
|
+
parts << "foreign_key: :#{assoc['foreign_key']}" if assoc['foreign_key']
|
|
97
|
+
parts.join(', ')
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# @return [String]
|
|
101
|
+
def format_validations(validations)
|
|
102
|
+
return 'None' if validations.nil? || validations.empty?
|
|
103
|
+
|
|
104
|
+
validations.group_by { |v| v['attribute'] }.map do |attr, vals|
|
|
105
|
+
"#{attr}: #{vals.map { |v| v['type'] }.join(', ')}"
|
|
106
|
+
end.join("\n")
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# @return [String]
|
|
110
|
+
def format_callbacks(callbacks)
|
|
111
|
+
return 'None' if callbacks.nil? || callbacks.empty?
|
|
112
|
+
|
|
113
|
+
callbacks.map { |callback| format_single_callback(callback) }.join("\n")
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# @return [String]
|
|
117
|
+
def format_single_callback(callback)
|
|
118
|
+
parts = ["#{callback['type']}: #{callback['filter']}"]
|
|
119
|
+
effects = callback_side_effects(callback['side_effects'])
|
|
120
|
+
parts << "(#{effects.join('; ')})" if effects.any?
|
|
121
|
+
parts.join(' ')
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# @return [Array<String>]
|
|
125
|
+
def callback_side_effects(side_effects)
|
|
126
|
+
return [] unless side_effects
|
|
127
|
+
|
|
128
|
+
effects = []
|
|
129
|
+
jobs = side_effects['jobs_enqueued']
|
|
130
|
+
effects << "enqueues #{jobs.join(', ')}" if jobs&.any?
|
|
131
|
+
services = side_effects['services_called']
|
|
132
|
+
effects << "calls #{services.join(', ')}" if services&.any?
|
|
133
|
+
effects
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# @return [String]
|
|
137
|
+
def format_scopes(scopes)
|
|
138
|
+
return 'None' if scopes.nil? || scopes.empty?
|
|
139
|
+
|
|
140
|
+
scopes.map { |s| s['name'] }.join(', ')
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# @return [String]
|
|
144
|
+
def format_dependencies(dependencies)
|
|
145
|
+
return 'None' if dependencies.nil? || dependencies.empty?
|
|
146
|
+
|
|
147
|
+
dependencies.map { |dep| "#{dep['target']} (via #{dep['via']})" }.join(', ')
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# @return [Hash]
|
|
151
|
+
def title_property(text)
|
|
152
|
+
{ title: [{ text: { content: text } }] }
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# @return [Hash]
|
|
156
|
+
def rich_text_property(text)
|
|
157
|
+
content = text.to_s
|
|
158
|
+
content = "#{content[0...1997]}..." if content.length > MAX_RICH_TEXT_LENGTH
|
|
159
|
+
{ rich_text: [{ text: { content: content } }] }
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
end
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module Notion
|
|
5
|
+
# Thread-safe rate limiter for Notion API (3 requests/second default).
|
|
6
|
+
#
|
|
7
|
+
# Enforces a minimum interval between API calls by sleeping when necessary.
|
|
8
|
+
# Uses a Mutex to ensure thread safety when called from concurrent contexts.
|
|
9
|
+
#
|
|
10
|
+
# @example
|
|
11
|
+
# limiter = RateLimiter.new(requests_per_second: 3)
|
|
12
|
+
# limiter.throttle { client.create_page(...) }
|
|
13
|
+
# limiter.throttle { client.update_page(...) }
|
|
14
|
+
#
|
|
15
|
+
class RateLimiter
|
|
16
|
+
# @param requests_per_second [Numeric] Maximum requests per second (default: 3)
|
|
17
|
+
# @raise [ArgumentError] if requests_per_second is not positive
|
|
18
|
+
def initialize(requests_per_second: 3)
|
|
19
|
+
unless requests_per_second.is_a?(Numeric) && requests_per_second.positive?
|
|
20
|
+
raise ArgumentError, "requests_per_second must be positive, got #{requests_per_second.inspect}"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
@min_interval = 1.0 / requests_per_second
|
|
24
|
+
@last_request_at = nil
|
|
25
|
+
@mutex = Mutex.new
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Execute a block after enforcing the rate limit.
|
|
29
|
+
#
|
|
30
|
+
# Sleeps if the minimum interval since the last request hasn't elapsed.
|
|
31
|
+
# Thread-safe — only one request proceeds at a time.
|
|
32
|
+
#
|
|
33
|
+
# @yield The block to execute after rate limiting
|
|
34
|
+
# @return [Object] The block's return value
|
|
35
|
+
# @raise [ArgumentError] if no block is given
|
|
36
|
+
def throttle
|
|
37
|
+
raise ArgumentError, 'block required' unless block_given?
|
|
38
|
+
|
|
39
|
+
@mutex.synchronize do
|
|
40
|
+
wait_for_interval
|
|
41
|
+
@last_request_at = monotonic_now
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
yield
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
private
|
|
48
|
+
|
|
49
|
+
# Sleep if minimum interval hasn't elapsed since last request.
|
|
50
|
+
#
|
|
51
|
+
# @return [void]
|
|
52
|
+
def wait_for_interval
|
|
53
|
+
return unless @last_request_at
|
|
54
|
+
|
|
55
|
+
elapsed = monotonic_now - @last_request_at
|
|
56
|
+
remaining = @min_interval - elapsed
|
|
57
|
+
sleep(remaining) if remaining.positive?
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Monotonic clock for accurate interval measurement.
|
|
61
|
+
#
|
|
62
|
+
# @return [Float] Current monotonic time in seconds
|
|
63
|
+
def monotonic_now
|
|
64
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module Observability
|
|
5
|
+
# Probes configured components and reports overall system health.
|
|
6
|
+
#
|
|
7
|
+
# Checks vector store, metadata store, and embedding provider by calling
|
|
8
|
+
# lightweight operations on each. Components that are nil are reported
|
|
9
|
+
# as :not_configured and do not affect the overall healthy? status.
|
|
10
|
+
#
|
|
11
|
+
# @example
|
|
12
|
+
# check = HealthCheck.new(
|
|
13
|
+
# vector_store: vector_store,
|
|
14
|
+
# metadata_store: metadata_store,
|
|
15
|
+
# embedding_provider: provider
|
|
16
|
+
# )
|
|
17
|
+
# status = check.run
|
|
18
|
+
# status.healthy? # => true
|
|
19
|
+
# status.components # => { vector_store: :ok, metadata_store: :ok, embedding_provider: :ok }
|
|
20
|
+
#
|
|
21
|
+
class HealthCheck
|
|
22
|
+
# Value object representing the result of a health check.
|
|
23
|
+
HealthStatus = Struct.new(:healthy?, :components, keyword_init: true)
|
|
24
|
+
|
|
25
|
+
# @param vector_store [Object, nil] Vector store adapter (must respond to #count)
|
|
26
|
+
# @param metadata_store [Object, nil] Metadata store adapter (must respond to #count)
|
|
27
|
+
# @param embedding_provider [Object, nil] Embedding provider (must respond to #embed)
|
|
28
|
+
def initialize(vector_store: nil, metadata_store: nil, embedding_provider: nil)
|
|
29
|
+
@vector_store = vector_store
|
|
30
|
+
@metadata_store = metadata_store
|
|
31
|
+
@embedding_provider = embedding_provider
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Run health probes on all configured components.
|
|
35
|
+
#
|
|
36
|
+
# @return [HealthStatus] Result with healthy? flag and per-component status
|
|
37
|
+
def run
|
|
38
|
+
components = {
|
|
39
|
+
vector_store: probe_store(@vector_store),
|
|
40
|
+
metadata_store: probe_store(@metadata_store),
|
|
41
|
+
embedding_provider: probe_provider(@embedding_provider)
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
all_healthy = components.values.all? { |status| %i[ok not_configured].include?(status) }
|
|
45
|
+
|
|
46
|
+
HealthStatus.new(healthy?: all_healthy, components: components)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
# Probe a store component by calling #count.
|
|
52
|
+
#
|
|
53
|
+
# @param store [Object, nil] Store adapter
|
|
54
|
+
# @return [Symbol] :ok, :error, or :not_configured
|
|
55
|
+
def probe_store(store)
|
|
56
|
+
return :not_configured if store.nil?
|
|
57
|
+
|
|
58
|
+
store.count
|
|
59
|
+
:ok
|
|
60
|
+
rescue StandardError
|
|
61
|
+
:error
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Probe an embedding provider by checking its capabilities without making network calls.
|
|
65
|
+
#
|
|
66
|
+
# @param provider [Object, nil] Embedding provider
|
|
67
|
+
# @return [Symbol] :ok, :error, or :not_configured
|
|
68
|
+
def probe_provider(provider)
|
|
69
|
+
return :not_configured if provider.nil?
|
|
70
|
+
|
|
71
|
+
if provider.respond_to?(:embed) && provider.respond_to?(:dimensions)
|
|
72
|
+
:ok
|
|
73
|
+
else
|
|
74
|
+
:error
|
|
75
|
+
end
|
|
76
|
+
rescue StandardError
|
|
77
|
+
:error
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module Observability
|
|
5
|
+
# Lightweight instrumentation wrapper that delegates to ActiveSupport::Notifications
|
|
6
|
+
# when available, and falls back to a simple yield otherwise.
|
|
7
|
+
#
|
|
8
|
+
# @example
|
|
9
|
+
# Instrumentation.instrument('codebase_index.extraction', unit: 'User') do
|
|
10
|
+
# extract_unit(user_model)
|
|
11
|
+
# end
|
|
12
|
+
#
|
|
13
|
+
module Instrumentation
|
|
14
|
+
module_function
|
|
15
|
+
|
|
16
|
+
# Instrument a block of code with an event name and payload.
|
|
17
|
+
#
|
|
18
|
+
# Delegates to ActiveSupport::Notifications.instrument when available.
|
|
19
|
+
# Otherwise, yields the block directly.
|
|
20
|
+
#
|
|
21
|
+
# @param event [String] Event name (e.g., 'codebase_index.extraction')
|
|
22
|
+
# @param payload [Hash] Additional data to include with the event
|
|
23
|
+
# @yield [payload] The block to instrument
|
|
24
|
+
# @return [Object] The return value of the block
|
|
25
|
+
def instrument(event, payload = {}, &block)
|
|
26
|
+
if defined?(ActiveSupport::Notifications)
|
|
27
|
+
ActiveSupport::Notifications.instrument(event, payload, &block)
|
|
28
|
+
elsif block
|
|
29
|
+
yield payload
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'time'
|
|
5
|
+
|
|
6
|
+
module CodebaseIndex
|
|
7
|
+
module Observability
|
|
8
|
+
# Structured JSON logger that writes one JSON object per line.
|
|
9
|
+
#
|
|
10
|
+
# Each log entry includes a timestamp, level, event name, and any
|
|
11
|
+
# additional data passed as keyword arguments.
|
|
12
|
+
#
|
|
13
|
+
# @example
|
|
14
|
+
# logger = StructuredLogger.new(output: $stderr)
|
|
15
|
+
# logger.info('extraction.complete', units: 42, duration_ms: 1200)
|
|
16
|
+
# # => {"timestamp":"2026-02-15T12:00:00Z","level":"info",
|
|
17
|
+
# # "event":"extraction.complete","units":42,"duration_ms":1200}
|
|
18
|
+
#
|
|
19
|
+
class StructuredLogger
|
|
20
|
+
# @param output [IO] Output stream (default: $stderr)
|
|
21
|
+
def initialize(output: $stderr)
|
|
22
|
+
@output = output
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Log at info level.
|
|
26
|
+
#
|
|
27
|
+
# @param event [String] Event name
|
|
28
|
+
# @param data [Hash] Additional structured data
|
|
29
|
+
def info(event, **data)
|
|
30
|
+
write_entry('info', event, data)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Log at warn level.
|
|
34
|
+
#
|
|
35
|
+
# @param event [String] Event name
|
|
36
|
+
# @param data [Hash] Additional structured data
|
|
37
|
+
def warn(event, **data)
|
|
38
|
+
write_entry('warn', event, data)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Log at error level.
|
|
42
|
+
#
|
|
43
|
+
# @param event [String] Event name
|
|
44
|
+
# @param data [Hash] Additional structured data
|
|
45
|
+
def error(event, **data)
|
|
46
|
+
write_entry('error', event, data)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Log at debug level.
|
|
50
|
+
#
|
|
51
|
+
# @param event [String] Event name
|
|
52
|
+
# @param data [Hash] Additional structured data
|
|
53
|
+
def debug(event, **data)
|
|
54
|
+
write_entry('debug', event, data)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
private
|
|
58
|
+
|
|
59
|
+
# Write a single JSON log line.
|
|
60
|
+
#
|
|
61
|
+
# @param level [String] Log level
|
|
62
|
+
# @param event [String] Event name
|
|
63
|
+
# @param data [Hash] Additional data
|
|
64
|
+
def write_entry(level, event, data)
|
|
65
|
+
entry = {
|
|
66
|
+
timestamp: Time.now.utc.iso8601,
|
|
67
|
+
level: level,
|
|
68
|
+
event: event
|
|
69
|
+
}.merge(data)
|
|
70
|
+
|
|
71
|
+
@output.puts(JSON.generate(entry))
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module Operator
|
|
5
|
+
# Classifies pipeline errors by severity and suggests remediation.
|
|
6
|
+
#
|
|
7
|
+
# @example
|
|
8
|
+
# escalator = ErrorEscalator.new
|
|
9
|
+
# result = escalator.classify(Timeout::Error.new("connection timed out"))
|
|
10
|
+
# result[:severity] # => :transient
|
|
11
|
+
# result[:remediation] # => "Retry after a short delay"
|
|
12
|
+
#
|
|
13
|
+
class ErrorEscalator
|
|
14
|
+
TRANSIENT_PATTERNS = [
|
|
15
|
+
{ class_pattern: /Timeout|ETIMEDOUT/, category: 'timeout', remediation: 'Retry after a short delay' },
|
|
16
|
+
{ class_pattern: /Net::/, category: 'network', remediation: 'Check network connectivity and retry' },
|
|
17
|
+
{ class_pattern: /RateLimited|429/, category: 'rate_limit',
|
|
18
|
+
remediation: 'Back off and retry with exponential delay' },
|
|
19
|
+
{ class_pattern: /CircuitOpenError/, category: 'circuit_open',
|
|
20
|
+
remediation: 'Wait for circuit breaker reset timeout' },
|
|
21
|
+
{ class_pattern: /ConnectionPool|Busy/, category: 'resource_contention',
|
|
22
|
+
remediation: 'Wait for resources to free up' }
|
|
23
|
+
].freeze
|
|
24
|
+
|
|
25
|
+
PERMANENT_PATTERNS = [
|
|
26
|
+
{ class_pattern: /NameError|NoMethodError/, category: 'code_error',
|
|
27
|
+
remediation: 'Fix the code error and re-extract' },
|
|
28
|
+
{ class_pattern: /Errno::ENOENT|FileNotFoundError/, category: 'missing_file',
|
|
29
|
+
remediation: 'Verify file paths and re-run extraction' },
|
|
30
|
+
{ class_pattern: /JSON::ParserError/, category: 'corrupt_data',
|
|
31
|
+
remediation: 'Clean index and re-extract' },
|
|
32
|
+
{ class_pattern: /ConfigurationError/, category: 'configuration',
|
|
33
|
+
remediation: 'Review CodebaseIndex configuration' },
|
|
34
|
+
{ class_pattern: /ExtractionError/, category: 'extraction_failure',
|
|
35
|
+
remediation: 'Check extraction logs for specific failure details' }
|
|
36
|
+
].freeze
|
|
37
|
+
|
|
38
|
+
# Classify an error by severity and suggest remediation.
|
|
39
|
+
#
|
|
40
|
+
# @param error [StandardError] The error to classify
|
|
41
|
+
# @return [Hash] :severity (:transient or :permanent), :category, :remediation, :error_class, :message
|
|
42
|
+
def classify(error)
|
|
43
|
+
error_string = "#{error.class} #{error.message}"
|
|
44
|
+
|
|
45
|
+
match = find_match(error_string, TRANSIENT_PATTERNS, :transient) ||
|
|
46
|
+
find_match(error_string, PERMANENT_PATTERNS, :permanent)
|
|
47
|
+
|
|
48
|
+
if match
|
|
49
|
+
match.merge(error_class: error.class.name, message: error.message)
|
|
50
|
+
else
|
|
51
|
+
{
|
|
52
|
+
severity: :unknown,
|
|
53
|
+
category: 'unclassified',
|
|
54
|
+
remediation: 'Investigate error details and check logs',
|
|
55
|
+
error_class: error.class.name,
|
|
56
|
+
message: error.message
|
|
57
|
+
}
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
private
|
|
62
|
+
|
|
63
|
+
# @param error_string [String]
|
|
64
|
+
# @param patterns [Array<Hash>]
|
|
65
|
+
# @param severity [Symbol]
|
|
66
|
+
# @return [Hash, nil]
|
|
67
|
+
def find_match(error_string, patterns, severity)
|
|
68
|
+
patterns.each do |pattern|
|
|
69
|
+
next unless error_string.match?(pattern[:class_pattern])
|
|
70
|
+
|
|
71
|
+
return {
|
|
72
|
+
severity: severity,
|
|
73
|
+
category: pattern[:category],
|
|
74
|
+
remediation: pattern[:remediation]
|
|
75
|
+
}
|
|
76
|
+
end
|
|
77
|
+
nil
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'fileutils'
|
|
5
|
+
require 'time'
|
|
6
|
+
|
|
7
|
+
module CodebaseIndex
|
|
8
|
+
module Operator
|
|
9
|
+
# Rate limiter for pipeline operations using file-based state.
|
|
10
|
+
#
|
|
11
|
+
# Enforces a cooldown between consecutive runs of the same operation
|
|
12
|
+
# to prevent accidental repeated extraction or embedding.
|
|
13
|
+
#
|
|
14
|
+
# @example
|
|
15
|
+
# guard = PipelineGuard.new(state_dir: '/tmp', cooldown: 300)
|
|
16
|
+
# if guard.allow?(:extraction)
|
|
17
|
+
# run_extraction
|
|
18
|
+
# guard.record!(:extraction)
|
|
19
|
+
# end
|
|
20
|
+
#
|
|
21
|
+
class PipelineGuard
|
|
22
|
+
# @param state_dir [String] Directory for persisting state
|
|
23
|
+
# @param cooldown [Integer] Minimum seconds between runs
|
|
24
|
+
def initialize(state_dir:, cooldown: 300)
|
|
25
|
+
@state_dir = state_dir
|
|
26
|
+
@cooldown = cooldown
|
|
27
|
+
@state_path = File.join(state_dir, 'pipeline_guard.json')
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Check if an operation is allowed (cooldown elapsed).
|
|
31
|
+
#
|
|
32
|
+
# @param operation [Symbol, String] Operation name
|
|
33
|
+
# @return [Boolean]
|
|
34
|
+
def allow?(operation)
|
|
35
|
+
last = last_run(operation)
|
|
36
|
+
return true if last.nil?
|
|
37
|
+
|
|
38
|
+
(Time.now - last) >= @cooldown
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Record that an operation has just run.
|
|
42
|
+
#
|
|
43
|
+
# @param operation [Symbol, String] Operation name
|
|
44
|
+
# @return [void]
|
|
45
|
+
def record!(operation)
|
|
46
|
+
FileUtils.mkdir_p(@state_dir)
|
|
47
|
+
File.open(@state_path, File::RDWR | File::CREAT) do |f|
|
|
48
|
+
f.flock(File::LOCK_EX)
|
|
49
|
+
content = f.read
|
|
50
|
+
state = if content.empty?
|
|
51
|
+
{}
|
|
52
|
+
else
|
|
53
|
+
begin
|
|
54
|
+
JSON.parse(content)
|
|
55
|
+
rescue StandardError
|
|
56
|
+
{}
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
state[operation.to_s] = Time.now.iso8601
|
|
60
|
+
f.rewind
|
|
61
|
+
f.write(JSON.generate(state))
|
|
62
|
+
f.truncate(f.pos)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Get the last run time for an operation.
|
|
67
|
+
#
|
|
68
|
+
# @param operation [Symbol, String] Operation name
|
|
69
|
+
# @return [Time, nil]
|
|
70
|
+
def last_run(operation)
|
|
71
|
+
state = read_state
|
|
72
|
+
timestamp = state[operation.to_s]
|
|
73
|
+
return nil if timestamp.nil?
|
|
74
|
+
|
|
75
|
+
Time.parse(timestamp)
|
|
76
|
+
rescue ArgumentError
|
|
77
|
+
nil
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
# @return [Hash]
|
|
83
|
+
def read_state
|
|
84
|
+
return {} unless File.exist?(@state_path)
|
|
85
|
+
|
|
86
|
+
JSON.parse(File.read(@state_path))
|
|
87
|
+
rescue JSON::ParserError
|
|
88
|
+
{}
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# @param state [Hash]
|
|
92
|
+
# @return [void]
|
|
93
|
+
def write_state(state)
|
|
94
|
+
FileUtils.mkdir_p(@state_dir)
|
|
95
|
+
File.write(@state_path, JSON.generate(state))
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'time'
|
|
5
|
+
|
|
6
|
+
module CodebaseIndex
|
|
7
|
+
module Operator
|
|
8
|
+
# Reports pipeline status by reading extraction output metadata.
|
|
9
|
+
#
|
|
10
|
+
# @example
|
|
11
|
+
# reporter = StatusReporter.new(output_dir: 'tmp/codebase_index')
|
|
12
|
+
# status = reporter.report
|
|
13
|
+
# status[:status] # => :ok
|
|
14
|
+
# status[:staleness_seconds] # => 3600
|
|
15
|
+
#
|
|
16
|
+
class StatusReporter
|
|
17
|
+
STALE_THRESHOLD = 86_400 # 24 hours
|
|
18
|
+
|
|
19
|
+
# @param output_dir [String] Path to extraction output directory
|
|
20
|
+
def initialize(output_dir:)
|
|
21
|
+
@output_dir = output_dir
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Generate a pipeline status report.
|
|
25
|
+
#
|
|
26
|
+
# @return [Hash] Status report with :status, :extracted_at, :total_units, :counts, :staleness_seconds
|
|
27
|
+
def report
|
|
28
|
+
manifest = read_manifest
|
|
29
|
+
return not_extracted_report if manifest.nil?
|
|
30
|
+
|
|
31
|
+
staleness = compute_staleness(manifest['extracted_at'])
|
|
32
|
+
|
|
33
|
+
{
|
|
34
|
+
status: staleness < STALE_THRESHOLD ? :ok : :stale,
|
|
35
|
+
extracted_at: manifest['extracted_at'],
|
|
36
|
+
total_units: manifest['total_units'] || 0,
|
|
37
|
+
counts: manifest['counts'] || {},
|
|
38
|
+
git_sha: manifest['git_sha'],
|
|
39
|
+
git_branch: manifest['git_branch'],
|
|
40
|
+
staleness_seconds: staleness
|
|
41
|
+
}
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
# @return [Hash, nil]
|
|
47
|
+
def read_manifest
|
|
48
|
+
path = File.join(@output_dir, 'manifest.json')
|
|
49
|
+
return nil unless File.exist?(path)
|
|
50
|
+
|
|
51
|
+
JSON.parse(File.read(path))
|
|
52
|
+
rescue JSON::ParserError
|
|
53
|
+
nil
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# @return [Hash]
|
|
57
|
+
def not_extracted_report
|
|
58
|
+
{
|
|
59
|
+
status: :not_extracted,
|
|
60
|
+
extracted_at: nil,
|
|
61
|
+
total_units: 0,
|
|
62
|
+
counts: {},
|
|
63
|
+
git_sha: nil,
|
|
64
|
+
git_branch: nil,
|
|
65
|
+
staleness_seconds: nil
|
|
66
|
+
}
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# @param extracted_at [String, nil] ISO8601 timestamp
|
|
70
|
+
# @return [Numeric]
|
|
71
|
+
def compute_staleness(extracted_at)
|
|
72
|
+
return Float::INFINITY if extracted_at.nil?
|
|
73
|
+
|
|
74
|
+
Time.now - Time.parse(extracted_at)
|
|
75
|
+
rescue ArgumentError
|
|
76
|
+
Float::INFINITY
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|