codebase_index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/exe/codebase-console-mcp +22 -0
  8. data/exe/codebase-index-mcp +61 -0
  9. data/exe/codebase-index-mcp-http +64 -0
  10. data/exe/codebase-index-mcp-start +58 -0
  11. data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
  12. data/lib/codebase_index/ast/method_extractor.rb +76 -0
  13. data/lib/codebase_index/ast/node.rb +88 -0
  14. data/lib/codebase_index/ast/parser.rb +653 -0
  15. data/lib/codebase_index/ast.rb +6 -0
  16. data/lib/codebase_index/builder.rb +137 -0
  17. data/lib/codebase_index/chunking/chunk.rb +84 -0
  18. data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
  19. data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
  20. data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
  21. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
  22. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
  23. data/lib/codebase_index/console/audit_logger.rb +75 -0
  24. data/lib/codebase_index/console/bridge.rb +170 -0
  25. data/lib/codebase_index/console/confirmation.rb +90 -0
  26. data/lib/codebase_index/console/connection_manager.rb +173 -0
  27. data/lib/codebase_index/console/console_response_renderer.rb +78 -0
  28. data/lib/codebase_index/console/model_validator.rb +81 -0
  29. data/lib/codebase_index/console/safe_context.rb +82 -0
  30. data/lib/codebase_index/console/server.rb +557 -0
  31. data/lib/codebase_index/console/sql_validator.rb +172 -0
  32. data/lib/codebase_index/console/tools/tier1.rb +118 -0
  33. data/lib/codebase_index/console/tools/tier2.rb +117 -0
  34. data/lib/codebase_index/console/tools/tier3.rb +110 -0
  35. data/lib/codebase_index/console/tools/tier4.rb +79 -0
  36. data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
  37. data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
  38. data/lib/codebase_index/cost_model/estimator.rb +128 -0
  39. data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
  40. data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
  41. data/lib/codebase_index/cost_model.rb +22 -0
  42. data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
  43. data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
  44. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
  45. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
  46. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
  47. data/lib/codebase_index/db/migrator.rb +71 -0
  48. data/lib/codebase_index/db/schema_version.rb +73 -0
  49. data/lib/codebase_index/dependency_graph.rb +227 -0
  50. data/lib/codebase_index/embedding/indexer.rb +130 -0
  51. data/lib/codebase_index/embedding/openai.rb +105 -0
  52. data/lib/codebase_index/embedding/provider.rb +135 -0
  53. data/lib/codebase_index/embedding/text_preparer.rb +112 -0
  54. data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
  55. data/lib/codebase_index/evaluation/evaluator.rb +146 -0
  56. data/lib/codebase_index/evaluation/metrics.rb +79 -0
  57. data/lib/codebase_index/evaluation/query_set.rb +148 -0
  58. data/lib/codebase_index/evaluation/report_generator.rb +90 -0
  59. data/lib/codebase_index/extracted_unit.rb +145 -0
  60. data/lib/codebase_index/extractor.rb +956 -0
  61. data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
  62. data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
  63. data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
  64. data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
  65. data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
  66. data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
  67. data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
  68. data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
  69. data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
  70. data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
  71. data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
  72. data/lib/codebase_index/extractors/event_extractor.rb +211 -0
  73. data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
  74. data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
  75. data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
  76. data/lib/codebase_index/extractors/job_extractor.rb +369 -0
  77. data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
  78. data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
  79. data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
  80. data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
  81. data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
  82. data/lib/codebase_index/extractors/model_extractor.rb +960 -0
  83. data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
  84. data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
  85. data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
  86. data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
  87. data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
  88. data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
  89. data/lib/codebase_index/extractors/route_extractor.rb +181 -0
  90. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
  91. data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
  92. data/lib/codebase_index/extractors/service_extractor.rb +254 -0
  93. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
  94. data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
  95. data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
  96. data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
  97. data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
  98. data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
  99. data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
  100. data/lib/codebase_index/feedback/gap_detector.rb +89 -0
  101. data/lib/codebase_index/feedback/store.rb +119 -0
  102. data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
  103. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
  104. data/lib/codebase_index/flow_assembler.rb +290 -0
  105. data/lib/codebase_index/flow_document.rb +191 -0
  106. data/lib/codebase_index/flow_precomputer.rb +102 -0
  107. data/lib/codebase_index/formatting/base.rb +40 -0
  108. data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
  109. data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
  110. data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
  111. data/lib/codebase_index/formatting/human_adapter.rb +78 -0
  112. data/lib/codebase_index/graph_analyzer.rb +374 -0
  113. data/lib/codebase_index/mcp/index_reader.rb +394 -0
  114. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
  115. data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
  116. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
  117. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
  118. data/lib/codebase_index/mcp/server.rb +935 -0
  119. data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
  120. data/lib/codebase_index/model_name_cache.rb +51 -0
  121. data/lib/codebase_index/notion/client.rb +217 -0
  122. data/lib/codebase_index/notion/exporter.rb +219 -0
  123. data/lib/codebase_index/notion/mapper.rb +39 -0
  124. data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
  125. data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
  126. data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
  127. data/lib/codebase_index/notion/rate_limiter.rb +68 -0
  128. data/lib/codebase_index/observability/health_check.rb +81 -0
  129. data/lib/codebase_index/observability/instrumentation.rb +34 -0
  130. data/lib/codebase_index/observability/structured_logger.rb +75 -0
  131. data/lib/codebase_index/operator/error_escalator.rb +81 -0
  132. data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
  133. data/lib/codebase_index/operator/status_reporter.rb +80 -0
  134. data/lib/codebase_index/railtie.rb +26 -0
  135. data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
  136. data/lib/codebase_index/resilience/index_validator.rb +185 -0
  137. data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
  138. data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
  139. data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
  140. data/lib/codebase_index/retrieval/ranker.rb +273 -0
  141. data/lib/codebase_index/retrieval/search_executor.rb +327 -0
  142. data/lib/codebase_index/retriever.rb +160 -0
  143. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
  144. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
  145. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
  146. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
  147. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
  148. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
  149. data/lib/codebase_index/ruby_analyzer.rb +87 -0
  150. data/lib/codebase_index/session_tracer/file_store.rb +111 -0
  151. data/lib/codebase_index/session_tracer/middleware.rb +143 -0
  152. data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
  153. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
  154. data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
  155. data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
  156. data/lib/codebase_index/session_tracer/store.rb +67 -0
  157. data/lib/codebase_index/storage/graph_store.rb +120 -0
  158. data/lib/codebase_index/storage/metadata_store.rb +169 -0
  159. data/lib/codebase_index/storage/pgvector.rb +163 -0
  160. data/lib/codebase_index/storage/qdrant.rb +172 -0
  161. data/lib/codebase_index/storage/vector_store.rb +156 -0
  162. data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
  163. data/lib/codebase_index/version.rb +5 -0
  164. data/lib/codebase_index.rb +223 -0
  165. data/lib/generators/codebase_index/install_generator.rb +32 -0
  166. data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
  167. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
  168. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
  169. data/lib/tasks/codebase_index.rake +583 -0
  170. data/lib/tasks/codebase_index_evaluation.rake +115 -0
  171. metadata +252 -0
@@ -0,0 +1,223 @@
1
+ # frozen_string_literal: true
2
+
3
+ # CodebaseIndex - Rails Codebase Indexing and Retrieval
4
+ #
5
+ # A system for extracting, indexing, and retrieving context from Rails codebases
6
+ # to enable AI-assisted development, debugging, and analytics.
7
+ #
8
+ # ## Quick Start
9
+ #
10
+ # # Extract codebase
11
+ # CodebaseIndex.extract!
12
+ #
13
+ # # Or via rake
14
+ # bundle exec rake codebase_index:extract
15
+ #
16
+ # ## Configuration
17
+ #
18
+ # CodebaseIndex.configure do |config|
19
+ # config.output_dir = Rails.root.join("tmp/codebase_index")
20
+ # config.max_context_tokens = 8000
21
+ # config.include_framework_sources = true
22
+ # end
23
+ #
24
+ require_relative 'codebase_index/version'
25
+
26
+ module CodebaseIndex
27
+ class Error < StandardError; end
28
+ class ConfigurationError < Error; end
29
+ class ExtractionError < Error; end
30
+ class SessionTracerError < Error; end
31
+
32
+ CONFIG_MUTEX = Mutex.new
33
+
34
+ # ════════════════════════════════════════════════════════════════════════
35
+ # Configuration
36
+ # ════════════════════════════════════════════════════════════════════════
37
+
38
+ class Configuration
39
+ attr_accessor :embedding_model, :include_framework_sources, :gem_configs,
40
+ :vector_store, :metadata_store, :graph_store, :embedding_provider, :log_level,
41
+ :vector_store_options, :metadata_store_options, :embedding_options,
42
+ :concurrent_extraction, :precompute_flows, :enable_snapshots,
43
+ :session_tracer_enabled, :session_store, :session_id_proc, :session_exclude_paths,
44
+ :notion_api_token, :notion_database_ids
45
+ attr_reader :max_context_tokens, :similarity_threshold, :extractors, :pretty_json, :context_format
46
+
47
+ def initialize
48
+ @output_dir = nil # Resolved lazily; Rails.root is nil at require time
49
+ @embedding_model = 'text-embedding-3-small'
50
+ @max_context_tokens = 8000
51
+ @similarity_threshold = 0.7
52
+ @include_framework_sources = true
53
+ @gem_configs = {}
54
+ @extractors = %i[models controllers services components view_components jobs mailers graphql serializers
55
+ managers policies validators rails_source]
56
+ @pretty_json = true
57
+ @concurrent_extraction = false
58
+ @precompute_flows = false
59
+ @enable_snapshots = false
60
+ @context_format = :markdown
61
+ @session_tracer_enabled = false
62
+ @session_store = nil
63
+ @session_id_proc = nil
64
+ @session_exclude_paths = []
65
+ @notion_api_token = nil
66
+ @notion_database_ids = {}
67
+ end
68
+
69
+ # @return [Pathname, String] Output directory, defaulting to Rails.root/tmp/codebase_index
70
+ def output_dir
71
+ @output_dir ||= defined?(Rails) && Rails.root ? Rails.root.join('tmp/codebase_index') : 'tmp/codebase_index'
72
+ end
73
+
74
+ # @param value [Object] Must respond to #to_s
75
+ # @raise [ConfigurationError] if value is nil
76
+ def output_dir=(value)
77
+ raise ConfigurationError, 'output_dir cannot be nil' if value.nil?
78
+
79
+ @output_dir = value
80
+ end
81
+
82
+ # @param value [Integer] Must be a positive Integer
83
+ # @raise [ConfigurationError] if value is not a positive Integer
84
+ def max_context_tokens=(value)
85
+ unless value.is_a?(Integer) && value.positive?
86
+ raise ConfigurationError, "max_context_tokens must be a positive Integer, got #{value.inspect}"
87
+ end
88
+
89
+ @max_context_tokens = value
90
+ end
91
+
92
+ # @param value [Numeric] Must be between 0.0 and 1.0 inclusive
93
+ # @raise [ConfigurationError] if value is out of range or not numeric
94
+ def similarity_threshold=(value)
95
+ raise ConfigurationError, "similarity_threshold must be Numeric, got #{value.inspect}" unless value.is_a?(Numeric)
96
+
97
+ float_val = value.to_f
98
+ unless float_val.between?(0.0, 1.0)
99
+ raise ConfigurationError, "similarity_threshold must be between 0.0 and 1.0, got #{value.inspect}"
100
+ end
101
+
102
+ @similarity_threshold = float_val
103
+ end
104
+
105
+ # @param value [Array<Symbol>] List of extractor names
106
+ # @raise [ConfigurationError] if value is not an Array of Symbols
107
+ def extractors=(value)
108
+ unless value.is_a?(Array) && value.all?(Symbol)
109
+ raise ConfigurationError, "extractors must be an Array of Symbols, got #{value.inspect}"
110
+ end
111
+
112
+ @extractors = value
113
+ end
114
+
115
+ # @param value [Boolean] Must be true or false
116
+ # @raise [ConfigurationError] if value is not a boolean
117
+ def pretty_json=(value)
118
+ unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
119
+ raise ConfigurationError, "pretty_json must be true or false, got #{value.inspect}"
120
+ end
121
+
122
+ @pretty_json = value
123
+ end
124
+
125
+ # @param value [Symbol] Must be one of :claude, :markdown, :plain, :json
126
+ # @raise [ConfigurationError] if value is not a valid format
127
+ def context_format=(value)
128
+ valid = %i[claude markdown plain json]
129
+ unless valid.include?(value)
130
+ raise ConfigurationError, "context_format must be one of #{valid.inspect}, got #{value.inspect}"
131
+ end
132
+
133
+ @context_format = value
134
+ end
135
+
136
+ # Add a gem to be indexed
137
+ #
138
+ # @param gem_name [String] Name of the gem
139
+ # @param paths [Array<String>] Relative paths within the gem to index
140
+ # @param priority [Symbol] :high, :medium, or :low
141
+ def add_gem(gem_name, paths:, priority: :medium)
142
+ @gem_configs[gem_name] = { paths: paths, priority: priority }
143
+ end
144
+ end
145
+
146
+ # ════════════════════════════════════════════════════════════════════════
147
+ # Module Interface
148
+ # ════════════════════════════════════════════════════════════════════════
149
+
150
+ class << self
151
+ attr_accessor :configuration
152
+
153
+ def configure
154
+ CONFIG_MUTEX.synchronize do
155
+ self.configuration ||= Configuration.new
156
+ yield(configuration) if block_given?
157
+ configuration
158
+ end
159
+ end
160
+
161
+ # Configure the module using a named preset and optional block customization.
162
+ #
163
+ # Valid preset names: :local, :postgresql, :production
164
+ #
165
+ # @param name [Symbol] Preset name
166
+ # @yield [config] Optional block for further customization after preset is applied
167
+ # @yieldparam config [Configuration] The configuration object
168
+ # @return [Configuration] The applied configuration
169
+ def configure_with_preset(name)
170
+ CONFIG_MUTEX.synchronize do
171
+ self.configuration = Builder.preset_config(name)
172
+ yield configuration if block_given?
173
+ configuration
174
+ end
175
+ end
176
+
177
+ # Build a Retriever wired with adapters from the current configuration.
178
+ #
179
+ # @return [Retriever] A fully wired retriever instance
180
+ def build_retriever
181
+ Builder.new(configuration).build_retriever
182
+ end
183
+
184
+ # Retrieve context for a natural language query using the current configuration.
185
+ #
186
+ # @param query [String] Natural language query
187
+ # @param opts [Hash] Options passed through to the retriever (e.g., budget:)
188
+ # @return [Retriever::RetrievalResult] Retrieval result
189
+ def retrieve(query, **opts)
190
+ build_retriever.retrieve(query, **opts)
191
+ end
192
+
193
+ # Perform full extraction
194
+ #
195
+ # @param output_dir [String] Override output directory
196
+ # @return [Hash] Extraction results
197
+ def extract!(output_dir: nil)
198
+ require_relative 'codebase_index/extractor'
199
+
200
+ dir = output_dir || configuration.output_dir
201
+ extractor = Extractor.new(output_dir: dir)
202
+ extractor.extract_all
203
+ end
204
+
205
+ # Perform incremental extraction
206
+ #
207
+ # @param changed_files [Array<String>] List of changed files
208
+ # @return [Array<String>] Re-extracted unit identifiers
209
+ def extract_changed!(changed_files)
210
+ require_relative 'codebase_index/extractor'
211
+
212
+ extractor = Extractor.new(output_dir: configuration.output_dir)
213
+ extractor.extract_changed(changed_files)
214
+ end
215
+ end
216
+
217
+ # Initialize with defaults
218
+ configure
219
+ end
220
+
221
+ require_relative 'codebase_index/builder'
222
+ require_relative 'codebase_index/cost_model'
223
+ require_relative 'codebase_index/railtie' if defined?(Rails::Railtie)
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rails/generators'
4
+ require 'rails/generators/active_record'
5
+
6
+ module CodebaseIndex
7
+ module Generators
8
+ # Rails generator that creates a migration for CodebaseIndex tables.
9
+ #
10
+ # Usage:
11
+ # rails generate codebase_index:install
12
+ #
13
+ # Creates a migration with codebase_units, codebase_edges, and
14
+ # codebase_embeddings tables. Works with PostgreSQL, MySQL, and SQLite.
15
+ #
16
+ class InstallGenerator < Rails::Generators::Base
17
+ include ActiveRecord::Generators::Migration
18
+
19
+ source_root File.expand_path('templates', __dir__)
20
+
21
+ desc 'Creates a migration for CodebaseIndex tables (units, edges, embeddings)'
22
+
23
+ # @return [void]
24
+ def create_migration_file
25
+ migration_template(
26
+ 'create_codebase_index_tables.rb.erb',
27
+ 'db/migrate/create_codebase_index_tables.rb'
28
+ )
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rails/generators'
4
+ require 'rails/generators/active_record'
5
+
6
+ module CodebaseIndex
7
+ module Generators
8
+ # Rails generator that adds pgvector support to CodebaseIndex.
9
+ #
10
+ # Requires the pgvector PostgreSQL extension. Adds a native vector column
11
+ # and HNSW index to the codebase_embeddings table.
12
+ #
13
+ # Usage:
14
+ # rails generate codebase_index:pgvector
15
+ # rails generate codebase_index:pgvector --dimensions 3072
16
+ #
17
+ class PgvectorGenerator < Rails::Generators::Base
18
+ include ActiveRecord::Generators::Migration
19
+
20
+ source_root File.expand_path('templates', __dir__)
21
+
22
+ desc 'Adds pgvector native vector column and HNSW index to codebase_embeddings'
23
+
24
+ class_option :dimensions, type: :numeric, default: 1536,
25
+ desc: 'Vector dimensions (1536 for text-embedding-3-small, 3072 for large)'
26
+
27
+ # @return [void]
28
+ def create_migration_file
29
+ @dimensions = options[:dimensions]
30
+ migration_template(
31
+ 'add_pgvector_to_codebase_index.rb.erb',
32
+ 'db/migrate/add_pgvector_to_codebase_index.rb'
33
+ )
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,15 @@
1
+ class AddPgvectorToCodebaseIndex < ActiveRecord::Migration[7.0]
2
+ def change
3
+ enable_extension 'vector' unless extension_enabled?('vector')
4
+
5
+ add_column :codebase_embeddings, :embedding_vector, :vector,
6
+ limit: <%= @dimensions || 1536 %>, null: true
7
+
8
+ # HNSW index for fast approximate nearest neighbor search
9
+ # Using cosine distance operator (vector_cosine_ops)
10
+ add_index :codebase_embeddings, :embedding_vector,
11
+ using: :hnsw,
12
+ opclass: :vector_cosine_ops,
13
+ name: 'idx_codebase_embeddings_vector_hnsw'
14
+ end
15
+ end
@@ -0,0 +1,43 @@
1
+ class CreateCodebaseIndexTables < ActiveRecord::Migration[7.0]
2
+ def change
3
+ create_table :codebase_units do |t|
4
+ t.string :unit_type, null: false
5
+ t.string :identifier, null: false
6
+ t.string :namespace
7
+ t.string :file_path, null: false
8
+ t.text :source_code
9
+ t.string :source_hash
10
+ t.json :metadata
11
+
12
+ t.timestamps
13
+ end
14
+
15
+ add_index :codebase_units, :unit_type
16
+ add_index :codebase_units, :identifier, unique: true
17
+ add_index :codebase_units, :file_path
18
+
19
+ create_table :codebase_edges do |t|
20
+ t.references :source, null: false, foreign_key: { to_table: :codebase_units }
21
+ t.references :target, null: false, foreign_key: { to_table: :codebase_units }
22
+ t.string :relationship, null: false
23
+ t.string :via
24
+
25
+ t.datetime :created_at, null: false
26
+ end
27
+
28
+ add_index :codebase_edges, [:source_id, :target_id, :relationship], unique: true,
29
+ name: 'idx_codebase_edges_unique'
30
+
31
+ create_table :codebase_embeddings do |t|
32
+ t.references :unit, null: false, foreign_key: { to_table: :codebase_units }
33
+ t.string :chunk_type
34
+ t.text :embedding, null: false
35
+ t.string :content_hash, null: false
36
+ t.integer :dimensions, null: false
37
+
38
+ t.datetime :created_at, null: false
39
+ end
40
+
41
+ add_index :codebase_embeddings, :content_hash
42
+ end
43
+ end