codebase_index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/exe/codebase-console-mcp +22 -0
  8. data/exe/codebase-index-mcp +61 -0
  9. data/exe/codebase-index-mcp-http +64 -0
  10. data/exe/codebase-index-mcp-start +58 -0
  11. data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
  12. data/lib/codebase_index/ast/method_extractor.rb +76 -0
  13. data/lib/codebase_index/ast/node.rb +88 -0
  14. data/lib/codebase_index/ast/parser.rb +653 -0
  15. data/lib/codebase_index/ast.rb +6 -0
  16. data/lib/codebase_index/builder.rb +137 -0
  17. data/lib/codebase_index/chunking/chunk.rb +84 -0
  18. data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
  19. data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
  20. data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
  21. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
  22. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
  23. data/lib/codebase_index/console/audit_logger.rb +75 -0
  24. data/lib/codebase_index/console/bridge.rb +170 -0
  25. data/lib/codebase_index/console/confirmation.rb +90 -0
  26. data/lib/codebase_index/console/connection_manager.rb +173 -0
  27. data/lib/codebase_index/console/console_response_renderer.rb +78 -0
  28. data/lib/codebase_index/console/model_validator.rb +81 -0
  29. data/lib/codebase_index/console/safe_context.rb +82 -0
  30. data/lib/codebase_index/console/server.rb +557 -0
  31. data/lib/codebase_index/console/sql_validator.rb +172 -0
  32. data/lib/codebase_index/console/tools/tier1.rb +118 -0
  33. data/lib/codebase_index/console/tools/tier2.rb +117 -0
  34. data/lib/codebase_index/console/tools/tier3.rb +110 -0
  35. data/lib/codebase_index/console/tools/tier4.rb +79 -0
  36. data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
  37. data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
  38. data/lib/codebase_index/cost_model/estimator.rb +128 -0
  39. data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
  40. data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
  41. data/lib/codebase_index/cost_model.rb +22 -0
  42. data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
  43. data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
  44. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
  45. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
  46. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
  47. data/lib/codebase_index/db/migrator.rb +71 -0
  48. data/lib/codebase_index/db/schema_version.rb +73 -0
  49. data/lib/codebase_index/dependency_graph.rb +227 -0
  50. data/lib/codebase_index/embedding/indexer.rb +130 -0
  51. data/lib/codebase_index/embedding/openai.rb +105 -0
  52. data/lib/codebase_index/embedding/provider.rb +135 -0
  53. data/lib/codebase_index/embedding/text_preparer.rb +112 -0
  54. data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
  55. data/lib/codebase_index/evaluation/evaluator.rb +146 -0
  56. data/lib/codebase_index/evaluation/metrics.rb +79 -0
  57. data/lib/codebase_index/evaluation/query_set.rb +148 -0
  58. data/lib/codebase_index/evaluation/report_generator.rb +90 -0
  59. data/lib/codebase_index/extracted_unit.rb +145 -0
  60. data/lib/codebase_index/extractor.rb +956 -0
  61. data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
  62. data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
  63. data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
  64. data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
  65. data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
  66. data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
  67. data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
  68. data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
  69. data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
  70. data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
  71. data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
  72. data/lib/codebase_index/extractors/event_extractor.rb +211 -0
  73. data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
  74. data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
  75. data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
  76. data/lib/codebase_index/extractors/job_extractor.rb +369 -0
  77. data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
  78. data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
  79. data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
  80. data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
  81. data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
  82. data/lib/codebase_index/extractors/model_extractor.rb +960 -0
  83. data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
  84. data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
  85. data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
  86. data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
  87. data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
  88. data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
  89. data/lib/codebase_index/extractors/route_extractor.rb +181 -0
  90. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
  91. data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
  92. data/lib/codebase_index/extractors/service_extractor.rb +254 -0
  93. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
  94. data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
  95. data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
  96. data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
  97. data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
  98. data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
  99. data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
  100. data/lib/codebase_index/feedback/gap_detector.rb +89 -0
  101. data/lib/codebase_index/feedback/store.rb +119 -0
  102. data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
  103. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
  104. data/lib/codebase_index/flow_assembler.rb +290 -0
  105. data/lib/codebase_index/flow_document.rb +191 -0
  106. data/lib/codebase_index/flow_precomputer.rb +102 -0
  107. data/lib/codebase_index/formatting/base.rb +40 -0
  108. data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
  109. data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
  110. data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
  111. data/lib/codebase_index/formatting/human_adapter.rb +78 -0
  112. data/lib/codebase_index/graph_analyzer.rb +374 -0
  113. data/lib/codebase_index/mcp/index_reader.rb +394 -0
  114. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
  115. data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
  116. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
  117. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
  118. data/lib/codebase_index/mcp/server.rb +935 -0
  119. data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
  120. data/lib/codebase_index/model_name_cache.rb +51 -0
  121. data/lib/codebase_index/notion/client.rb +217 -0
  122. data/lib/codebase_index/notion/exporter.rb +219 -0
  123. data/lib/codebase_index/notion/mapper.rb +39 -0
  124. data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
  125. data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
  126. data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
  127. data/lib/codebase_index/notion/rate_limiter.rb +68 -0
  128. data/lib/codebase_index/observability/health_check.rb +81 -0
  129. data/lib/codebase_index/observability/instrumentation.rb +34 -0
  130. data/lib/codebase_index/observability/structured_logger.rb +75 -0
  131. data/lib/codebase_index/operator/error_escalator.rb +81 -0
  132. data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
  133. data/lib/codebase_index/operator/status_reporter.rb +80 -0
  134. data/lib/codebase_index/railtie.rb +26 -0
  135. data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
  136. data/lib/codebase_index/resilience/index_validator.rb +185 -0
  137. data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
  138. data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
  139. data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
  140. data/lib/codebase_index/retrieval/ranker.rb +273 -0
  141. data/lib/codebase_index/retrieval/search_executor.rb +327 -0
  142. data/lib/codebase_index/retriever.rb +160 -0
  143. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
  144. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
  145. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
  146. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
  147. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
  148. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
  149. data/lib/codebase_index/ruby_analyzer.rb +87 -0
  150. data/lib/codebase_index/session_tracer/file_store.rb +111 -0
  151. data/lib/codebase_index/session_tracer/middleware.rb +143 -0
  152. data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
  153. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
  154. data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
  155. data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
  156. data/lib/codebase_index/session_tracer/store.rb +67 -0
  157. data/lib/codebase_index/storage/graph_store.rb +120 -0
  158. data/lib/codebase_index/storage/metadata_store.rb +169 -0
  159. data/lib/codebase_index/storage/pgvector.rb +163 -0
  160. data/lib/codebase_index/storage/qdrant.rb +172 -0
  161. data/lib/codebase_index/storage/vector_store.rb +156 -0
  162. data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
  163. data/lib/codebase_index/version.rb +5 -0
  164. data/lib/codebase_index.rb +223 -0
  165. data/lib/generators/codebase_index/install_generator.rb +32 -0
  166. data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
  167. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
  168. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
  169. data/lib/tasks/codebase_index.rake +583 -0
  170. data/lib/tasks/codebase_index_evaluation.rake +115 -0
  171. metadata +252 -0
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../dependency_graph'
4
+
5
+ module CodebaseIndex
6
+ module Storage
7
+ # GraphStore provides an interface for querying code unit relationships.
8
+ #
9
+ # All graph store adapters must include the {Interface} module and implement
10
+ # its methods. The {Memory} adapter wraps the existing {DependencyGraph}.
11
+ #
12
+ # @example Using the memory adapter
13
+ # store = CodebaseIndex::Storage::GraphStore::Memory.new
14
+ # store.register(unit)
15
+ # store.dependencies_of("User")
16
+ #
17
+ module GraphStore
18
+ # Interface that all graph store adapters must implement.
19
+ module Interface
20
+ # Get direct dependencies of a unit.
21
+ #
22
+ # @param identifier [String] Unit identifier
23
+ # @return [Array<String>] List of dependency identifiers
24
+ # @raise [NotImplementedError] if not implemented by adapter
25
+ def dependencies_of(identifier)
26
+ raise NotImplementedError
27
+ end
28
+
29
+ # Get direct dependents of a unit (reverse dependencies).
30
+ #
31
+ # @param identifier [String] Unit identifier
32
+ # @return [Array<String>] List of dependent identifiers
33
+ # @raise [NotImplementedError] if not implemented by adapter
34
+ def dependents_of(identifier)
35
+ raise NotImplementedError
36
+ end
37
+
38
+ # Find all units transitively affected by changes to the given files.
39
+ #
40
+ # @param changed_files [Array<String>] List of changed file paths
41
+ # @param max_depth [Integer, nil] Maximum traversal depth (nil for unlimited)
42
+ # @return [Array<String>] List of affected unit identifiers
43
+ # @raise [NotImplementedError] if not implemented by adapter
44
+ def affected_by(changed_files, max_depth: nil)
45
+ raise NotImplementedError
46
+ end
47
+
48
+ # Get all units of a specific type.
49
+ #
50
+ # @param type [Symbol] Unit type (:model, :controller, etc.)
51
+ # @return [Array<String>] List of unit identifiers
52
+ # @raise [NotImplementedError] if not implemented by adapter
53
+ def by_type(type)
54
+ raise NotImplementedError
55
+ end
56
+
57
+ # Compute PageRank importance scores for all units.
58
+ #
59
+ # @param damping [Float] Damping factor (default: 0.85)
60
+ # @param iterations [Integer] Number of iterations (default: 20)
61
+ # @return [Hash<String, Float>] Identifier => PageRank score
62
+ # @raise [NotImplementedError] if not implemented by adapter
63
+ def pagerank(damping: 0.85, iterations: 20)
64
+ raise NotImplementedError
65
+ end
66
+ end
67
+
68
+ # In-memory graph store wrapping the existing DependencyGraph.
69
+ #
70
+ # Delegates all operations to {CodebaseIndex::DependencyGraph}, providing
71
+ # a consistent storage interface.
72
+ #
73
+ # @example
74
+ # store = Memory.new
75
+ # store.register(user_unit)
76
+ # store.dependencies_of("User") # => ["Organization"]
77
+ #
78
+ class Memory
79
+ include Interface
80
+
81
+ # @param graph [DependencyGraph, nil] Existing graph to wrap, or nil to create a new one
82
+ def initialize(graph = nil)
83
+ @graph = graph || DependencyGraph.new
84
+ end
85
+
86
+ # Register a unit in the graph.
87
+ #
88
+ # @param unit [ExtractedUnit] The unit to register
89
+ def register(unit)
90
+ @graph.register(unit)
91
+ end
92
+
93
+ # @see Interface#dependencies_of
94
+ def dependencies_of(identifier)
95
+ @graph.dependencies_of(identifier)
96
+ end
97
+
98
+ # @see Interface#dependents_of
99
+ def dependents_of(identifier)
100
+ @graph.dependents_of(identifier)
101
+ end
102
+
103
+ # @see Interface#affected_by
104
+ def affected_by(changed_files, max_depth: nil)
105
+ @graph.affected_by(changed_files, max_depth: max_depth)
106
+ end
107
+
108
+ # @see Interface#by_type
109
+ def by_type(type)
110
+ @graph.units_of_type(type)
111
+ end
112
+
113
+ # @see Interface#pagerank
114
+ def pagerank(damping: 0.85, iterations: 20)
115
+ @graph.pagerank(damping: damping, iterations: iterations)
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,169 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ module CodebaseIndex
6
+ module Storage
7
+ # MetadataStore provides an interface for storing and querying unit metadata.
8
+ #
9
+ # All metadata store adapters must include the {Interface} module and implement
10
+ # its methods. The {SQLite} adapter is provided for local persistence.
11
+ #
12
+ # @example Using the SQLite adapter
13
+ # store = CodebaseIndex::Storage::MetadataStore::SQLite.new(":memory:")
14
+ # store.store("User", { type: "model", file_path: "app/models/user.rb" })
15
+ # store.find("User")
16
+ #
17
+ module MetadataStore
18
+ # Interface that all metadata store adapters must implement.
19
+ module Interface
20
+ # Store or update metadata for a unit.
21
+ #
22
+ # @param id [String] Unique identifier for the unit
23
+ # @param metadata [Hash] Metadata to store
24
+ # @raise [NotImplementedError] if not implemented by adapter
25
+ def store(id, metadata)
26
+ raise NotImplementedError
27
+ end
28
+
29
+ # Find a unit by ID.
30
+ #
31
+ # @param id [String] The identifier to look up
32
+ # @return [Hash, nil] The stored metadata, or nil if not found
33
+ # @raise [NotImplementedError] if not implemented by adapter
34
+ def find(id)
35
+ raise NotImplementedError
36
+ end
37
+
38
+ # Find all units of a given type.
39
+ #
40
+ # @param type [String] The unit type to filter by
41
+ # @return [Array<Hash>] Matching metadata records
42
+ # @raise [NotImplementedError] if not implemented by adapter
43
+ def find_by_type(type)
44
+ raise NotImplementedError
45
+ end
46
+
47
+ # Search metadata by text query across specified fields.
48
+ #
49
+ # @param query [String] Text to search for
50
+ # @param fields [Array<String>, nil] Specific fields to search (nil = all)
51
+ # @return [Array<Hash>] Matching metadata records
52
+ # @raise [NotImplementedError] if not implemented by adapter
53
+ def search(query, fields: nil)
54
+ raise NotImplementedError
55
+ end
56
+
57
+ # Delete a unit by ID.
58
+ #
59
+ # @param id [String] The identifier to delete
60
+ # @raise [NotImplementedError] if not implemented by adapter
61
+ def delete(id)
62
+ raise NotImplementedError
63
+ end
64
+
65
+ # Return the total number of stored units.
66
+ #
67
+ # @return [Integer] Total count
68
+ # @raise [NotImplementedError] if not implemented by adapter
69
+ def count
70
+ raise NotImplementedError
71
+ end
72
+ end
73
+
74
+ # SQLite-backed metadata store using the JSON1 extension.
75
+ #
76
+ # Stores unit metadata as JSON in a single table with type indexing
77
+ # for efficient filtering. Uses upsert semantics for store operations.
78
+ #
79
+ # @example
80
+ # store = SQLite.new(":memory:")
81
+ # store.store("User", { type: "model", namespace: "Admin" })
82
+ # store.find("User") # => { "type" => "model", "namespace" => "Admin" }
83
+ #
84
+ class SQLite
85
+ include Interface
86
+
87
+ # @param db_path [String] Path to the SQLite database file, or ":memory:" for in-memory
88
+ def initialize(db_path = ':memory:')
89
+ require 'sqlite3'
90
+ @db = ::SQLite3::Database.new(db_path)
91
+ @db.results_as_hash = true
92
+ create_table
93
+ end
94
+
95
+ # @see Interface#store
96
+ def store(id, metadata)
97
+ type = metadata[:type] || metadata['type']
98
+ data = JSON.generate(metadata)
99
+
100
+ @db.execute(<<~SQL, [id, type.to_s, data, Time.now.iso8601])
101
+ INSERT INTO units (id, type, data, updated_at) VALUES (?, ?, ?, ?)
102
+ ON CONFLICT(id) DO UPDATE SET
103
+ type = excluded.type, data = excluded.data, updated_at = excluded.updated_at
104
+ SQL
105
+ end
106
+
107
+ # @see Interface#find
108
+ def find(id)
109
+ row = @db.get_first_row('SELECT data FROM units WHERE id = ?', [id])
110
+ return nil unless row
111
+
112
+ JSON.parse(row['data'])
113
+ end
114
+
115
+ # @see Interface#find_by_type
116
+ def find_by_type(type)
117
+ rows = @db.execute('SELECT id, data FROM units WHERE type = ?', [type.to_s])
118
+ rows.map do |row|
119
+ parsed = JSON.parse(row['data'])
120
+ parsed['id'] = row['id']
121
+ parsed
122
+ end
123
+ end
124
+
125
+ # @see Interface#search
126
+ def search(query, fields: nil)
127
+ if fields
128
+ conditions = fields.map { "json_extract(data, '$.#{_1}') LIKE ?" }.join(' OR ')
129
+ params = fields.map { "%#{query}%" }
130
+ rows = @db.execute("SELECT id, data FROM units WHERE #{conditions}", params)
131
+ else
132
+ rows = @db.execute('SELECT id, data FROM units WHERE data LIKE ?', ["%#{query}%"])
133
+ end
134
+
135
+ rows.map do |row|
136
+ parsed = JSON.parse(row['data'])
137
+ parsed['id'] = row['id']
138
+ parsed
139
+ end
140
+ end
141
+
142
+ # @see Interface#delete
143
+ def delete(id)
144
+ @db.execute('DELETE FROM units WHERE id = ?', [id])
145
+ end
146
+
147
+ # @see Interface#count
148
+ def count
149
+ @db.get_first_value('SELECT COUNT(*) FROM units')
150
+ end
151
+
152
+ private
153
+
154
+ # Create the units table if it doesn't exist.
155
+ def create_table
156
+ @db.execute(<<~SQL)
157
+ CREATE TABLE IF NOT EXISTS units (
158
+ id TEXT PRIMARY KEY,
159
+ type TEXT,
160
+ data JSON,
161
+ updated_at TEXT
162
+ )
163
+ SQL
164
+ @db.execute('CREATE INDEX IF NOT EXISTS idx_units_type ON units(type)')
165
+ end
166
+ end
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,163 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require_relative 'vector_store'
5
+
6
+ module CodebaseIndex
7
+ module Storage
8
+ module VectorStore
9
+ # PostgreSQL + pgvector adapter for vector storage and similarity search.
10
+ #
11
+ # Uses the pgvector extension for efficient approximate nearest neighbor
12
+ # search with HNSW indexing. Stores metadata as JSONB for flexible filtering.
13
+ #
14
+ # @example
15
+ # store = Pgvector.new(connection: ActiveRecord::Base.connection, dimensions: 768)
16
+ # store.ensure_schema!
17
+ # store.store("User", [0.1, 0.2, ...], { type: "model" })
18
+ # results = store.search([0.1, 0.2, ...], limit: 5, filters: { type: "model" })
19
+ #
20
+ class Pgvector
21
+ include Interface
22
+
23
+ TABLE = 'codebase_index_vectors'
24
+
25
+ # @param connection [Object] ActiveRecord database connection
26
+ # @param dimensions [Integer] Size of the embedding vectors
27
+ def initialize(connection:, dimensions:)
28
+ @connection = connection
29
+ @dimensions = dimensions
30
+ end
31
+
32
+ # Create the pgvector extension, vectors table, and HNSW index.
33
+ #
34
+ # Safe to call multiple times (uses IF NOT EXISTS).
35
+ def ensure_schema!
36
+ @connection.execute('CREATE EXTENSION IF NOT EXISTS vector')
37
+ @connection.execute(<<~SQL)
38
+ CREATE TABLE IF NOT EXISTS #{TABLE} (
39
+ id TEXT PRIMARY KEY,
40
+ embedding vector(#{@dimensions}),
41
+ metadata JSONB DEFAULT '{}',
42
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
43
+ )
44
+ SQL
45
+ @connection.execute(<<~SQL)
46
+ CREATE INDEX IF NOT EXISTS idx_#{TABLE}_embedding_hnsw
47
+ ON #{TABLE} USING hnsw (embedding vector_cosine_ops)
48
+ SQL
49
+ end
50
+
51
+ # Store or update a vector with metadata.
52
+ #
53
+ # @param id [String] Unique identifier
54
+ # @param vector [Array<Float>] The embedding vector
55
+ # @param metadata [Hash] Optional metadata
56
+ # @see Interface#store
57
+ def store(id, vector, metadata = {})
58
+ validate_vector!(vector)
59
+ quoted_id = @connection.quote(id)
60
+ quoted_metadata = @connection.quote(JSON.generate(metadata))
61
+ vector_literal = "[#{vector.join(',')}]"
62
+
63
+ @connection.execute(<<~SQL)
64
+ INSERT INTO #{TABLE} (id, embedding, metadata, created_at)
65
+ VALUES (#{quoted_id}, '#{vector_literal}', #{quoted_metadata}::jsonb, CURRENT_TIMESTAMP)
66
+ ON CONFLICT (id) DO UPDATE SET
67
+ embedding = EXCLUDED.embedding,
68
+ metadata = EXCLUDED.metadata,
69
+ created_at = CURRENT_TIMESTAMP
70
+ SQL
71
+ end
72
+
73
+ # Search for similar vectors using cosine distance.
74
+ #
75
+ # @param query_vector [Array<Float>] The query embedding
76
+ # @param limit [Integer] Maximum results to return
77
+ # @param filters [Hash] Metadata key-value filters
78
+ # @return [Array<SearchResult>] Results sorted by descending similarity
79
+ # @see Interface#search
80
+ def search(query_vector, limit: 10, filters: {})
81
+ validate_vector!(query_vector)
82
+ vector_literal = "[#{query_vector.join(',')}]"
83
+ where_clause = build_where(filters)
84
+
85
+ sql = <<~SQL
86
+ SELECT id, embedding <=> '#{vector_literal}' AS distance, metadata
87
+ FROM #{TABLE}
88
+ #{where_clause}
89
+ ORDER BY distance ASC
90
+ LIMIT #{limit.to_i}
91
+ SQL
92
+
93
+ rows = @connection.execute(sql)
94
+ rows.map { |row| row_to_result(row) }
95
+ end
96
+
97
+ # @see Interface#delete
98
+ def delete(id)
99
+ quoted_id = @connection.quote(id)
100
+ @connection.execute("DELETE FROM #{TABLE} WHERE id = #{quoted_id}")
101
+ end
102
+
103
+ # @see Interface#delete_by_filter
104
+ def delete_by_filter(filters)
105
+ where_clause = build_where(filters)
106
+ @connection.execute("DELETE FROM #{TABLE} #{where_clause}")
107
+ end
108
+
109
+ # @see Interface#count
110
+ def count
111
+ result = @connection.execute("SELECT COUNT(*) AS count FROM #{TABLE}")
112
+ result.first['count'].to_i
113
+ end
114
+
115
+ private
116
+
117
+ # Convert a database row to a SearchResult.
118
+ #
119
+ # @param row [Hash] Database row with id, distance, metadata
120
+ # @return [SearchResult]
121
+ def row_to_result(row)
122
+ metadata = row['metadata']
123
+ parsed_metadata = metadata.is_a?(String) ? JSON.parse(metadata) : metadata
124
+ SearchResult.new(
125
+ id: row['id'],
126
+ score: 1.0 - row['distance'].to_f,
127
+ metadata: parsed_metadata
128
+ )
129
+ end
130
+
131
+ # Build a WHERE clause from metadata filters.
132
+ #
133
+ # @param filters [Hash] Metadata key-value pairs
134
+ # @return [String] SQL WHERE clause, or empty string if no filters
135
+ def build_where(filters)
136
+ return '' if filters.empty?
137
+
138
+ conditions = filters.map do |key, value|
139
+ key_s = key.to_s
140
+ unless key_s.match?(/\A[a-zA-Z_][a-zA-Z0-9_]*\z/)
141
+ raise ArgumentError, "Invalid filter key: #{key_s.inspect}"
142
+ end
143
+
144
+ "metadata->>'#{key_s}' = #{@connection.quote(value.to_s)}"
145
+ end
146
+ "WHERE #{conditions.join(' AND ')}"
147
+ end
148
+
149
+ # Validate that all vector elements are numeric.
150
+ #
151
+ # @param vector [Array] The vector to validate
152
+ # @raise [ArgumentError] if any element is not numeric
153
+ def validate_vector!(vector)
154
+ vector.each_with_index do |element, i|
155
+ unless element.is_a?(Numeric)
156
+ raise ArgumentError, "Vector element at index #{i} is not numeric: #{element.inspect}"
157
+ end
158
+ end
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'net/http'
4
+ require 'json'
5
+ require 'uri'
6
+ require_relative 'vector_store'
7
+
8
+ module CodebaseIndex
9
+ module Storage
10
+ module VectorStore
11
+ # Qdrant adapter for vector storage and similarity search via HTTP API.
12
+ #
13
+ # Communicates with a Qdrant instance over HTTP. Supports optional API key
14
+ # authentication for managed/cloud deployments.
15
+ #
16
+ # @example
17
+ # store = Qdrant.new(url: "http://localhost:6333", collection: "codebase")
18
+ # store.ensure_collection!(dimensions: 768)
19
+ # store.store("User", [0.1, 0.2, ...], { type: "model" })
20
+ # results = store.search([0.1, 0.2, ...], limit: 5)
21
+ #
22
+ class Qdrant
23
+ include Interface
24
+
25
+ # @param url [String] Qdrant server URL
26
+ # @param collection [String] Collection name
27
+ # @param api_key [String, nil] Optional API key for authentication
28
+ def initialize(url:, collection:, api_key: nil)
29
+ @url = url
30
+ @collection = collection
31
+ @api_key = api_key
32
+ @uri = URI(url)
33
+ end
34
+
35
+ # Create the collection if it doesn't exist.
36
+ #
37
+ # @param dimensions [Integer] Vector dimensionality
38
+ def ensure_collection!(dimensions:)
39
+ body = {
40
+ vectors: {
41
+ size: dimensions,
42
+ distance: 'Cosine'
43
+ }
44
+ }
45
+ request(:put, "/collections/#{@collection}", body)
46
+ end
47
+
48
+ # Store or update a vector with metadata payload.
49
+ #
50
+ # @param id [String] Unique identifier
51
+ # @param vector [Array<Float>] The embedding vector
52
+ # @param metadata [Hash] Optional payload metadata
53
+ # @see Interface#store
54
+ def store(id, vector, metadata = {})
55
+ body = {
56
+ points: [
57
+ {
58
+ id: id,
59
+ vector: vector,
60
+ payload: metadata
61
+ }
62
+ ]
63
+ }
64
+ request(:put, "/collections/#{@collection}/points", body)
65
+ end
66
+
67
+ # Search for similar vectors.
68
+ #
69
+ # @param query_vector [Array<Float>] The query embedding
70
+ # @param limit [Integer] Maximum results to return
71
+ # @param filters [Hash] Metadata key-value filters
72
+ # @return [Array<SearchResult>] Results sorted by descending similarity
73
+ # @see Interface#search
74
+ def search(query_vector, limit: 10, filters: {})
75
+ body = {
76
+ vector: query_vector,
77
+ limit: limit,
78
+ with_payload: true
79
+ }
80
+ body[:filter] = build_filter(filters) unless filters.empty?
81
+
82
+ response = request(:post, "/collections/#{@collection}/points/search", body)
83
+ results = response['result'] || []
84
+
85
+ results.map do |hit|
86
+ SearchResult.new(
87
+ id: hit['id'],
88
+ score: hit['score'],
89
+ metadata: hit['payload']
90
+ )
91
+ end
92
+ end
93
+
94
+ # @see Interface#delete
95
+ def delete(id)
96
+ body = { points: [id] }
97
+ request(:post, "/collections/#{@collection}/points/delete", body)
98
+ end
99
+
100
+ # @see Interface#delete_by_filter
101
+ def delete_by_filter(filters)
102
+ body = { filter: build_filter(filters) }
103
+ request(:post, "/collections/#{@collection}/points/delete", body)
104
+ end
105
+
106
+ # @see Interface#count
107
+ def count
108
+ response = request(:post, "/collections/#{@collection}/points/count", { exact: true })
109
+ response['result']['count']
110
+ end
111
+
112
+ private
113
+
114
+ # Build a Qdrant filter from metadata key-value pairs.
115
+ #
116
+ # @param filters [Hash] Metadata filters
117
+ # @return [Hash] Qdrant-compatible filter with must conditions
118
+ def build_filter(filters)
119
+ conditions = filters.map do |key, value|
120
+ { key: key.to_s, match: { value: value } }
121
+ end
122
+ { must: conditions }
123
+ end
124
+
125
+ # Send an HTTP request to the Qdrant API.
126
+ #
127
+ # @param method [Symbol] HTTP method (:get, :post, :put, :delete)
128
+ # @param path [String] API path
129
+ # @param body [Hash, nil] Request body
130
+ # @return [Hash] Parsed JSON response
131
+ # @raise [CodebaseIndex::Error] if the API returns a non-success status
132
+ def request(method, path, body = nil)
133
+ http = build_http
134
+ req = build_request(method, path, body)
135
+ response = http.request(req)
136
+
137
+ unless response.is_a?(Net::HTTPSuccess)
138
+ raise CodebaseIndex::Error, "Qdrant API error: #{response.code} #{response.body}"
139
+ end
140
+
141
+ JSON.parse(response.body)
142
+ end
143
+
144
+ # Build an HTTP client for the Qdrant server.
145
+ #
146
+ # @return [Net::HTTP]
147
+ def build_http
148
+ http = Net::HTTP.new(@uri.host, @uri.port)
149
+ http.use_ssl = @uri.scheme == 'https'
150
+ http.open_timeout = 10
151
+ http.read_timeout = 30
152
+ http
153
+ end
154
+
155
+ # Build an HTTP request with headers and body.
156
+ #
157
+ # @param method [Symbol] HTTP method
158
+ # @param path [String] API path
159
+ # @param body [Hash, nil] Request body
160
+ # @return [Net::HTTPRequest]
161
+ def build_request(method, path, body)
162
+ request_class = { get: Net::HTTP::Get, post: Net::HTTP::Post,
163
+ put: Net::HTTP::Put, delete: Net::HTTP::Delete }.fetch(method)
164
+ req = request_class.new(path, 'Content-Type' => 'application/json')
165
+ req['api-key'] = @api_key if @api_key
166
+ req.body = body.to_json if body
167
+ req
168
+ end
169
+ end
170
+ end
171
+ end
172
+ end