codebase_index 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/exe/codebase-console-mcp +22 -0
- data/exe/codebase-index-mcp +61 -0
- data/exe/codebase-index-mcp-http +64 -0
- data/exe/codebase-index-mcp-start +58 -0
- data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
- data/lib/codebase_index/ast/method_extractor.rb +76 -0
- data/lib/codebase_index/ast/node.rb +88 -0
- data/lib/codebase_index/ast/parser.rb +653 -0
- data/lib/codebase_index/ast.rb +6 -0
- data/lib/codebase_index/builder.rb +137 -0
- data/lib/codebase_index/chunking/chunk.rb +84 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
- data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
- data/lib/codebase_index/console/audit_logger.rb +75 -0
- data/lib/codebase_index/console/bridge.rb +170 -0
- data/lib/codebase_index/console/confirmation.rb +90 -0
- data/lib/codebase_index/console/connection_manager.rb +173 -0
- data/lib/codebase_index/console/console_response_renderer.rb +78 -0
- data/lib/codebase_index/console/model_validator.rb +81 -0
- data/lib/codebase_index/console/safe_context.rb +82 -0
- data/lib/codebase_index/console/server.rb +557 -0
- data/lib/codebase_index/console/sql_validator.rb +172 -0
- data/lib/codebase_index/console/tools/tier1.rb +118 -0
- data/lib/codebase_index/console/tools/tier2.rb +117 -0
- data/lib/codebase_index/console/tools/tier3.rb +110 -0
- data/lib/codebase_index/console/tools/tier4.rb +79 -0
- data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
- data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
- data/lib/codebase_index/cost_model/estimator.rb +128 -0
- data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
- data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
- data/lib/codebase_index/cost_model.rb +22 -0
- data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
- data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/codebase_index/db/migrator.rb +71 -0
- data/lib/codebase_index/db/schema_version.rb +73 -0
- data/lib/codebase_index/dependency_graph.rb +227 -0
- data/lib/codebase_index/embedding/indexer.rb +130 -0
- data/lib/codebase_index/embedding/openai.rb +105 -0
- data/lib/codebase_index/embedding/provider.rb +135 -0
- data/lib/codebase_index/embedding/text_preparer.rb +112 -0
- data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
- data/lib/codebase_index/evaluation/evaluator.rb +146 -0
- data/lib/codebase_index/evaluation/metrics.rb +79 -0
- data/lib/codebase_index/evaluation/query_set.rb +148 -0
- data/lib/codebase_index/evaluation/report_generator.rb +90 -0
- data/lib/codebase_index/extracted_unit.rb +145 -0
- data/lib/codebase_index/extractor.rb +956 -0
- data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
- data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
- data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
- data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
- data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
- data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
- data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
- data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
- data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
- data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
- data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
- data/lib/codebase_index/extractors/event_extractor.rb +211 -0
- data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
- data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
- data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
- data/lib/codebase_index/extractors/job_extractor.rb +369 -0
- data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
- data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
- data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
- data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
- data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
- data/lib/codebase_index/extractors/model_extractor.rb +960 -0
- data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
- data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
- data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
- data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
- data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
- data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
- data/lib/codebase_index/extractors/route_extractor.rb +181 -0
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
- data/lib/codebase_index/extractors/service_extractor.rb +254 -0
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
- data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
- data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
- data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
- data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
- data/lib/codebase_index/feedback/gap_detector.rb +89 -0
- data/lib/codebase_index/feedback/store.rb +119 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/codebase_index/flow_assembler.rb +290 -0
- data/lib/codebase_index/flow_document.rb +191 -0
- data/lib/codebase_index/flow_precomputer.rb +102 -0
- data/lib/codebase_index/formatting/base.rb +40 -0
- data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
- data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
- data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
- data/lib/codebase_index/formatting/human_adapter.rb +78 -0
- data/lib/codebase_index/graph_analyzer.rb +374 -0
- data/lib/codebase_index/mcp/index_reader.rb +394 -0
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/codebase_index/mcp/server.rb +935 -0
- data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
- data/lib/codebase_index/model_name_cache.rb +51 -0
- data/lib/codebase_index/notion/client.rb +217 -0
- data/lib/codebase_index/notion/exporter.rb +219 -0
- data/lib/codebase_index/notion/mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
- data/lib/codebase_index/notion/rate_limiter.rb +68 -0
- data/lib/codebase_index/observability/health_check.rb +81 -0
- data/lib/codebase_index/observability/instrumentation.rb +34 -0
- data/lib/codebase_index/observability/structured_logger.rb +75 -0
- data/lib/codebase_index/operator/error_escalator.rb +81 -0
- data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
- data/lib/codebase_index/operator/status_reporter.rb +80 -0
- data/lib/codebase_index/railtie.rb +26 -0
- data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
- data/lib/codebase_index/resilience/index_validator.rb +185 -0
- data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
- data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
- data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
- data/lib/codebase_index/retrieval/ranker.rb +273 -0
- data/lib/codebase_index/retrieval/search_executor.rb +327 -0
- data/lib/codebase_index/retriever.rb +160 -0
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
- data/lib/codebase_index/ruby_analyzer.rb +87 -0
- data/lib/codebase_index/session_tracer/file_store.rb +111 -0
- data/lib/codebase_index/session_tracer/middleware.rb +143 -0
- data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
- data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
- data/lib/codebase_index/session_tracer/store.rb +67 -0
- data/lib/codebase_index/storage/graph_store.rb +120 -0
- data/lib/codebase_index/storage/metadata_store.rb +169 -0
- data/lib/codebase_index/storage/pgvector.rb +163 -0
- data/lib/codebase_index/storage/qdrant.rb +172 -0
- data/lib/codebase_index/storage/vector_store.rb +156 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
- data/lib/codebase_index/version.rb +5 -0
- data/lib/codebase_index.rb +223 -0
- data/lib/generators/codebase_index/install_generator.rb +32 -0
- data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
- data/lib/tasks/codebase_index.rake +583 -0
- data/lib/tasks/codebase_index_evaluation.rake +115 -0
- metadata +252 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# CodebaseIndex - Rails Codebase Indexing and Retrieval
|
|
4
|
+
#
|
|
5
|
+
# A system for extracting, indexing, and retrieving context from Rails codebases
|
|
6
|
+
# to enable AI-assisted development, debugging, and analytics.
|
|
7
|
+
#
|
|
8
|
+
# ## Quick Start
|
|
9
|
+
#
|
|
10
|
+
# # Extract codebase
|
|
11
|
+
# CodebaseIndex.extract!
|
|
12
|
+
#
|
|
13
|
+
# # Or via rake
|
|
14
|
+
# bundle exec rake codebase_index:extract
|
|
15
|
+
#
|
|
16
|
+
# ## Configuration
|
|
17
|
+
#
|
|
18
|
+
# CodebaseIndex.configure do |config|
|
|
19
|
+
# config.output_dir = Rails.root.join("tmp/codebase_index")
|
|
20
|
+
# config.max_context_tokens = 8000
|
|
21
|
+
# config.include_framework_sources = true
|
|
22
|
+
# end
|
|
23
|
+
#
|
|
24
|
+
require_relative 'codebase_index/version'
|
|
25
|
+
|
|
26
|
+
module CodebaseIndex
|
|
27
|
+
class Error < StandardError; end
|
|
28
|
+
class ConfigurationError < Error; end
|
|
29
|
+
class ExtractionError < Error; end
|
|
30
|
+
class SessionTracerError < Error; end
|
|
31
|
+
|
|
32
|
+
CONFIG_MUTEX = Mutex.new
|
|
33
|
+
|
|
34
|
+
# ════════════════════════════════════════════════════════════════════════
|
|
35
|
+
# Configuration
|
|
36
|
+
# ════════════════════════════════════════════════════════════════════════
|
|
37
|
+
|
|
38
|
+
class Configuration
|
|
39
|
+
attr_accessor :embedding_model, :include_framework_sources, :gem_configs,
|
|
40
|
+
:vector_store, :metadata_store, :graph_store, :embedding_provider, :log_level,
|
|
41
|
+
:vector_store_options, :metadata_store_options, :embedding_options,
|
|
42
|
+
:concurrent_extraction, :precompute_flows, :enable_snapshots,
|
|
43
|
+
:session_tracer_enabled, :session_store, :session_id_proc, :session_exclude_paths,
|
|
44
|
+
:notion_api_token, :notion_database_ids
|
|
45
|
+
attr_reader :max_context_tokens, :similarity_threshold, :extractors, :pretty_json, :context_format
|
|
46
|
+
|
|
47
|
+
def initialize
|
|
48
|
+
@output_dir = nil # Resolved lazily; Rails.root is nil at require time
|
|
49
|
+
@embedding_model = 'text-embedding-3-small'
|
|
50
|
+
@max_context_tokens = 8000
|
|
51
|
+
@similarity_threshold = 0.7
|
|
52
|
+
@include_framework_sources = true
|
|
53
|
+
@gem_configs = {}
|
|
54
|
+
@extractors = %i[models controllers services components view_components jobs mailers graphql serializers
|
|
55
|
+
managers policies validators rails_source]
|
|
56
|
+
@pretty_json = true
|
|
57
|
+
@concurrent_extraction = false
|
|
58
|
+
@precompute_flows = false
|
|
59
|
+
@enable_snapshots = false
|
|
60
|
+
@context_format = :markdown
|
|
61
|
+
@session_tracer_enabled = false
|
|
62
|
+
@session_store = nil
|
|
63
|
+
@session_id_proc = nil
|
|
64
|
+
@session_exclude_paths = []
|
|
65
|
+
@notion_api_token = nil
|
|
66
|
+
@notion_database_ids = {}
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# @return [Pathname, String] Output directory, defaulting to Rails.root/tmp/codebase_index
|
|
70
|
+
def output_dir
|
|
71
|
+
@output_dir ||= defined?(Rails) && Rails.root ? Rails.root.join('tmp/codebase_index') : 'tmp/codebase_index'
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# @param value [Object] Must respond to #to_s
|
|
75
|
+
# @raise [ConfigurationError] if value is nil
|
|
76
|
+
def output_dir=(value)
|
|
77
|
+
raise ConfigurationError, 'output_dir cannot be nil' if value.nil?
|
|
78
|
+
|
|
79
|
+
@output_dir = value
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# @param value [Integer] Must be a positive Integer
|
|
83
|
+
# @raise [ConfigurationError] if value is not a positive Integer
|
|
84
|
+
def max_context_tokens=(value)
|
|
85
|
+
unless value.is_a?(Integer) && value.positive?
|
|
86
|
+
raise ConfigurationError, "max_context_tokens must be a positive Integer, got #{value.inspect}"
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
@max_context_tokens = value
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# @param value [Numeric] Must be between 0.0 and 1.0 inclusive
|
|
93
|
+
# @raise [ConfigurationError] if value is out of range or not numeric
|
|
94
|
+
def similarity_threshold=(value)
|
|
95
|
+
raise ConfigurationError, "similarity_threshold must be Numeric, got #{value.inspect}" unless value.is_a?(Numeric)
|
|
96
|
+
|
|
97
|
+
float_val = value.to_f
|
|
98
|
+
unless float_val.between?(0.0, 1.0)
|
|
99
|
+
raise ConfigurationError, "similarity_threshold must be between 0.0 and 1.0, got #{value.inspect}"
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
@similarity_threshold = float_val
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# @param value [Array<Symbol>] List of extractor names
|
|
106
|
+
# @raise [ConfigurationError] if value is not an Array of Symbols
|
|
107
|
+
def extractors=(value)
|
|
108
|
+
unless value.is_a?(Array) && value.all?(Symbol)
|
|
109
|
+
raise ConfigurationError, "extractors must be an Array of Symbols, got #{value.inspect}"
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
@extractors = value
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# @param value [Boolean] Must be true or false
|
|
116
|
+
# @raise [ConfigurationError] if value is not a boolean
|
|
117
|
+
def pretty_json=(value)
|
|
118
|
+
unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
|
119
|
+
raise ConfigurationError, "pretty_json must be true or false, got #{value.inspect}"
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
@pretty_json = value
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# @param value [Symbol] Must be one of :claude, :markdown, :plain, :json
|
|
126
|
+
# @raise [ConfigurationError] if value is not a valid format
|
|
127
|
+
def context_format=(value)
|
|
128
|
+
valid = %i[claude markdown plain json]
|
|
129
|
+
unless valid.include?(value)
|
|
130
|
+
raise ConfigurationError, "context_format must be one of #{valid.inspect}, got #{value.inspect}"
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
@context_format = value
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Add a gem to be indexed
|
|
137
|
+
#
|
|
138
|
+
# @param gem_name [String] Name of the gem
|
|
139
|
+
# @param paths [Array<String>] Relative paths within the gem to index
|
|
140
|
+
# @param priority [Symbol] :high, :medium, or :low
|
|
141
|
+
def add_gem(gem_name, paths:, priority: :medium)
|
|
142
|
+
@gem_configs[gem_name] = { paths: paths, priority: priority }
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# ════════════════════════════════════════════════════════════════════════
|
|
147
|
+
# Module Interface
|
|
148
|
+
# ════════════════════════════════════════════════════════════════════════
|
|
149
|
+
|
|
150
|
+
class << self
|
|
151
|
+
attr_accessor :configuration
|
|
152
|
+
|
|
153
|
+
def configure
|
|
154
|
+
CONFIG_MUTEX.synchronize do
|
|
155
|
+
self.configuration ||= Configuration.new
|
|
156
|
+
yield(configuration) if block_given?
|
|
157
|
+
configuration
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Configure the module using a named preset and optional block customization.
|
|
162
|
+
#
|
|
163
|
+
# Valid preset names: :local, :postgresql, :production
|
|
164
|
+
#
|
|
165
|
+
# @param name [Symbol] Preset name
|
|
166
|
+
# @yield [config] Optional block for further customization after preset is applied
|
|
167
|
+
# @yieldparam config [Configuration] The configuration object
|
|
168
|
+
# @return [Configuration] The applied configuration
|
|
169
|
+
def configure_with_preset(name)
|
|
170
|
+
CONFIG_MUTEX.synchronize do
|
|
171
|
+
self.configuration = Builder.preset_config(name)
|
|
172
|
+
yield configuration if block_given?
|
|
173
|
+
configuration
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Build a Retriever wired with adapters from the current configuration.
|
|
178
|
+
#
|
|
179
|
+
# @return [Retriever] A fully wired retriever instance
|
|
180
|
+
def build_retriever
|
|
181
|
+
Builder.new(configuration).build_retriever
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# Retrieve context for a natural language query using the current configuration.
|
|
185
|
+
#
|
|
186
|
+
# @param query [String] Natural language query
|
|
187
|
+
# @param opts [Hash] Options passed through to the retriever (e.g., budget:)
|
|
188
|
+
# @return [Retriever::RetrievalResult] Retrieval result
|
|
189
|
+
def retrieve(query, **opts)
|
|
190
|
+
build_retriever.retrieve(query, **opts)
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Perform full extraction
|
|
194
|
+
#
|
|
195
|
+
# @param output_dir [String] Override output directory
|
|
196
|
+
# @return [Hash] Extraction results
|
|
197
|
+
def extract!(output_dir: nil)
|
|
198
|
+
require_relative 'codebase_index/extractor'
|
|
199
|
+
|
|
200
|
+
dir = output_dir || configuration.output_dir
|
|
201
|
+
extractor = Extractor.new(output_dir: dir)
|
|
202
|
+
extractor.extract_all
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# Perform incremental extraction
|
|
206
|
+
#
|
|
207
|
+
# @param changed_files [Array<String>] List of changed files
|
|
208
|
+
# @return [Array<String>] Re-extracted unit identifiers
|
|
209
|
+
def extract_changed!(changed_files)
|
|
210
|
+
require_relative 'codebase_index/extractor'
|
|
211
|
+
|
|
212
|
+
extractor = Extractor.new(output_dir: configuration.output_dir)
|
|
213
|
+
extractor.extract_changed(changed_files)
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# Initialize with defaults
|
|
218
|
+
configure
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
require_relative 'codebase_index/builder'
|
|
222
|
+
require_relative 'codebase_index/cost_model'
|
|
223
|
+
require_relative 'codebase_index/railtie' if defined?(Rails::Railtie)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'rails/generators'
|
|
4
|
+
require 'rails/generators/active_record'
|
|
5
|
+
|
|
6
|
+
module CodebaseIndex
|
|
7
|
+
module Generators
|
|
8
|
+
# Rails generator that creates a migration for CodebaseIndex tables.
|
|
9
|
+
#
|
|
10
|
+
# Usage:
|
|
11
|
+
# rails generate codebase_index:install
|
|
12
|
+
#
|
|
13
|
+
# Creates a migration with codebase_units, codebase_edges, and
|
|
14
|
+
# codebase_embeddings tables. Works with PostgreSQL, MySQL, and SQLite.
|
|
15
|
+
#
|
|
16
|
+
class InstallGenerator < Rails::Generators::Base
|
|
17
|
+
include ActiveRecord::Generators::Migration
|
|
18
|
+
|
|
19
|
+
source_root File.expand_path('templates', __dir__)
|
|
20
|
+
|
|
21
|
+
desc 'Creates a migration for CodebaseIndex tables (units, edges, embeddings)'
|
|
22
|
+
|
|
23
|
+
# @return [void]
|
|
24
|
+
def create_migration_file
|
|
25
|
+
migration_template(
|
|
26
|
+
'create_codebase_index_tables.rb.erb',
|
|
27
|
+
'db/migrate/create_codebase_index_tables.rb'
|
|
28
|
+
)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'rails/generators'
|
|
4
|
+
require 'rails/generators/active_record'
|
|
5
|
+
|
|
6
|
+
module CodebaseIndex
|
|
7
|
+
module Generators
|
|
8
|
+
# Rails generator that adds pgvector support to CodebaseIndex.
|
|
9
|
+
#
|
|
10
|
+
# Requires the pgvector PostgreSQL extension. Adds a native vector column
|
|
11
|
+
# and HNSW index to the codebase_embeddings table.
|
|
12
|
+
#
|
|
13
|
+
# Usage:
|
|
14
|
+
# rails generate codebase_index:pgvector
|
|
15
|
+
# rails generate codebase_index:pgvector --dimensions 3072
|
|
16
|
+
#
|
|
17
|
+
class PgvectorGenerator < Rails::Generators::Base
|
|
18
|
+
include ActiveRecord::Generators::Migration
|
|
19
|
+
|
|
20
|
+
source_root File.expand_path('templates', __dir__)
|
|
21
|
+
|
|
22
|
+
desc 'Adds pgvector native vector column and HNSW index to codebase_embeddings'
|
|
23
|
+
|
|
24
|
+
class_option :dimensions, type: :numeric, default: 1536,
|
|
25
|
+
desc: 'Vector dimensions (1536 for text-embedding-3-small, 3072 for large)'
|
|
26
|
+
|
|
27
|
+
# @return [void]
|
|
28
|
+
def create_migration_file
|
|
29
|
+
@dimensions = options[:dimensions]
|
|
30
|
+
migration_template(
|
|
31
|
+
'add_pgvector_to_codebase_index.rb.erb',
|
|
32
|
+
'db/migrate/add_pgvector_to_codebase_index.rb'
|
|
33
|
+
)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
class AddPgvectorToCodebaseIndex < ActiveRecord::Migration[7.0]
|
|
2
|
+
def change
|
|
3
|
+
enable_extension 'vector' unless extension_enabled?('vector')
|
|
4
|
+
|
|
5
|
+
add_column :codebase_embeddings, :embedding_vector, :vector,
|
|
6
|
+
limit: <%= @dimensions || 1536 %>, null: true
|
|
7
|
+
|
|
8
|
+
# HNSW index for fast approximate nearest neighbor search
|
|
9
|
+
# Using cosine distance operator (vector_cosine_ops)
|
|
10
|
+
add_index :codebase_embeddings, :embedding_vector,
|
|
11
|
+
using: :hnsw,
|
|
12
|
+
opclass: :vector_cosine_ops,
|
|
13
|
+
name: 'idx_codebase_embeddings_vector_hnsw'
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
class CreateCodebaseIndexTables < ActiveRecord::Migration[7.0]
|
|
2
|
+
def change
|
|
3
|
+
create_table :codebase_units do |t|
|
|
4
|
+
t.string :unit_type, null: false
|
|
5
|
+
t.string :identifier, null: false
|
|
6
|
+
t.string :namespace
|
|
7
|
+
t.string :file_path, null: false
|
|
8
|
+
t.text :source_code
|
|
9
|
+
t.string :source_hash
|
|
10
|
+
t.json :metadata
|
|
11
|
+
|
|
12
|
+
t.timestamps
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
add_index :codebase_units, :unit_type
|
|
16
|
+
add_index :codebase_units, :identifier, unique: true
|
|
17
|
+
add_index :codebase_units, :file_path
|
|
18
|
+
|
|
19
|
+
create_table :codebase_edges do |t|
|
|
20
|
+
t.references :source, null: false, foreign_key: { to_table: :codebase_units }
|
|
21
|
+
t.references :target, null: false, foreign_key: { to_table: :codebase_units }
|
|
22
|
+
t.string :relationship, null: false
|
|
23
|
+
t.string :via
|
|
24
|
+
|
|
25
|
+
t.datetime :created_at, null: false
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
add_index :codebase_edges, [:source_id, :target_id, :relationship], unique: true,
|
|
29
|
+
name: 'idx_codebase_edges_unique'
|
|
30
|
+
|
|
31
|
+
create_table :codebase_embeddings do |t|
|
|
32
|
+
t.references :unit, null: false, foreign_key: { to_table: :codebase_units }
|
|
33
|
+
t.string :chunk_type
|
|
34
|
+
t.text :embedding, null: false
|
|
35
|
+
t.string :content_hash, null: false
|
|
36
|
+
t.integer :dimensions, null: false
|
|
37
|
+
|
|
38
|
+
t.datetime :created_at, null: false
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
add_index :codebase_embeddings, :content_hash
|
|
42
|
+
end
|
|
43
|
+
end
|