codebase_index 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/lib/codebase_index.rb +3 -243
  3. metadata +28 -223
  4. data/CHANGELOG.md +0 -89
  5. data/CODE_OF_CONDUCT.md +0 -83
  6. data/CONTRIBUTING.md +0 -65
  7. data/LICENSE.txt +0 -21
  8. data/README.md +0 -325
  9. data/exe/codebase-console +0 -59
  10. data/exe/codebase-console-mcp +0 -22
  11. data/exe/codebase-index-mcp +0 -34
  12. data/exe/codebase-index-mcp-http +0 -37
  13. data/exe/codebase-index-mcp-start +0 -58
  14. data/lib/codebase_index/ast/call_site_extractor.rb +0 -106
  15. data/lib/codebase_index/ast/method_extractor.rb +0 -71
  16. data/lib/codebase_index/ast/node.rb +0 -116
  17. data/lib/codebase_index/ast/parser.rb +0 -614
  18. data/lib/codebase_index/ast.rb +0 -6
  19. data/lib/codebase_index/builder.rb +0 -200
  20. data/lib/codebase_index/cache/cache_middleware.rb +0 -199
  21. data/lib/codebase_index/cache/cache_store.rb +0 -264
  22. data/lib/codebase_index/cache/redis_cache_store.rb +0 -116
  23. data/lib/codebase_index/cache/solid_cache_store.rb +0 -111
  24. data/lib/codebase_index/chunking/chunk.rb +0 -84
  25. data/lib/codebase_index/chunking/semantic_chunker.rb +0 -295
  26. data/lib/codebase_index/console/adapters/cache_adapter.rb +0 -58
  27. data/lib/codebase_index/console/adapters/good_job_adapter.rb +0 -33
  28. data/lib/codebase_index/console/adapters/job_adapter.rb +0 -68
  29. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +0 -33
  30. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +0 -33
  31. data/lib/codebase_index/console/audit_logger.rb +0 -75
  32. data/lib/codebase_index/console/bridge.rb +0 -177
  33. data/lib/codebase_index/console/confirmation.rb +0 -90
  34. data/lib/codebase_index/console/connection_manager.rb +0 -173
  35. data/lib/codebase_index/console/console_response_renderer.rb +0 -74
  36. data/lib/codebase_index/console/embedded_executor.rb +0 -373
  37. data/lib/codebase_index/console/model_validator.rb +0 -81
  38. data/lib/codebase_index/console/rack_middleware.rb +0 -87
  39. data/lib/codebase_index/console/safe_context.rb +0 -82
  40. data/lib/codebase_index/console/server.rb +0 -612
  41. data/lib/codebase_index/console/sql_validator.rb +0 -172
  42. data/lib/codebase_index/console/tools/tier1.rb +0 -118
  43. data/lib/codebase_index/console/tools/tier2.rb +0 -117
  44. data/lib/codebase_index/console/tools/tier3.rb +0 -110
  45. data/lib/codebase_index/console/tools/tier4.rb +0 -79
  46. data/lib/codebase_index/coordination/pipeline_lock.rb +0 -109
  47. data/lib/codebase_index/cost_model/embedding_cost.rb +0 -88
  48. data/lib/codebase_index/cost_model/estimator.rb +0 -128
  49. data/lib/codebase_index/cost_model/provider_pricing.rb +0 -67
  50. data/lib/codebase_index/cost_model/storage_cost.rb +0 -52
  51. data/lib/codebase_index/cost_model.rb +0 -22
  52. data/lib/codebase_index/db/migrations/001_create_units.rb +0 -38
  53. data/lib/codebase_index/db/migrations/002_create_edges.rb +0 -35
  54. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +0 -37
  55. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +0 -45
  56. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +0 -40
  57. data/lib/codebase_index/db/migrator.rb +0 -71
  58. data/lib/codebase_index/db/schema_version.rb +0 -73
  59. data/lib/codebase_index/dependency_graph.rb +0 -236
  60. data/lib/codebase_index/embedding/indexer.rb +0 -140
  61. data/lib/codebase_index/embedding/openai.rb +0 -126
  62. data/lib/codebase_index/embedding/provider.rb +0 -162
  63. data/lib/codebase_index/embedding/text_preparer.rb +0 -112
  64. data/lib/codebase_index/evaluation/baseline_runner.rb +0 -115
  65. data/lib/codebase_index/evaluation/evaluator.rb +0 -139
  66. data/lib/codebase_index/evaluation/metrics.rb +0 -79
  67. data/lib/codebase_index/evaluation/query_set.rb +0 -148
  68. data/lib/codebase_index/evaluation/report_generator.rb +0 -90
  69. data/lib/codebase_index/extracted_unit.rb +0 -145
  70. data/lib/codebase_index/extractor.rb +0 -1028
  71. data/lib/codebase_index/extractors/action_cable_extractor.rb +0 -201
  72. data/lib/codebase_index/extractors/ast_source_extraction.rb +0 -46
  73. data/lib/codebase_index/extractors/behavioral_profile.rb +0 -309
  74. data/lib/codebase_index/extractors/caching_extractor.rb +0 -261
  75. data/lib/codebase_index/extractors/callback_analyzer.rb +0 -246
  76. data/lib/codebase_index/extractors/concern_extractor.rb +0 -292
  77. data/lib/codebase_index/extractors/configuration_extractor.rb +0 -219
  78. data/lib/codebase_index/extractors/controller_extractor.rb +0 -404
  79. data/lib/codebase_index/extractors/database_view_extractor.rb +0 -278
  80. data/lib/codebase_index/extractors/decorator_extractor.rb +0 -253
  81. data/lib/codebase_index/extractors/engine_extractor.rb +0 -223
  82. data/lib/codebase_index/extractors/event_extractor.rb +0 -211
  83. data/lib/codebase_index/extractors/factory_extractor.rb +0 -289
  84. data/lib/codebase_index/extractors/graphql_extractor.rb +0 -892
  85. data/lib/codebase_index/extractors/i18n_extractor.rb +0 -117
  86. data/lib/codebase_index/extractors/job_extractor.rb +0 -374
  87. data/lib/codebase_index/extractors/lib_extractor.rb +0 -218
  88. data/lib/codebase_index/extractors/mailer_extractor.rb +0 -269
  89. data/lib/codebase_index/extractors/manager_extractor.rb +0 -188
  90. data/lib/codebase_index/extractors/middleware_extractor.rb +0 -133
  91. data/lib/codebase_index/extractors/migration_extractor.rb +0 -469
  92. data/lib/codebase_index/extractors/model_extractor.rb +0 -988
  93. data/lib/codebase_index/extractors/phlex_extractor.rb +0 -252
  94. data/lib/codebase_index/extractors/policy_extractor.rb +0 -191
  95. data/lib/codebase_index/extractors/poro_extractor.rb +0 -229
  96. data/lib/codebase_index/extractors/pundit_extractor.rb +0 -223
  97. data/lib/codebase_index/extractors/rails_source_extractor.rb +0 -473
  98. data/lib/codebase_index/extractors/rake_task_extractor.rb +0 -343
  99. data/lib/codebase_index/extractors/route_extractor.rb +0 -181
  100. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +0 -331
  101. data/lib/codebase_index/extractors/serializer_extractor.rb +0 -339
  102. data/lib/codebase_index/extractors/service_extractor.rb +0 -217
  103. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +0 -91
  104. data/lib/codebase_index/extractors/shared_utility_methods.rb +0 -281
  105. data/lib/codebase_index/extractors/state_machine_extractor.rb +0 -398
  106. data/lib/codebase_index/extractors/test_mapping_extractor.rb +0 -225
  107. data/lib/codebase_index/extractors/validator_extractor.rb +0 -211
  108. data/lib/codebase_index/extractors/view_component_extractor.rb +0 -311
  109. data/lib/codebase_index/extractors/view_template_extractor.rb +0 -261
  110. data/lib/codebase_index/feedback/gap_detector.rb +0 -89
  111. data/lib/codebase_index/feedback/store.rb +0 -119
  112. data/lib/codebase_index/filename_utils.rb +0 -32
  113. data/lib/codebase_index/flow_analysis/operation_extractor.rb +0 -206
  114. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +0 -154
  115. data/lib/codebase_index/flow_assembler.rb +0 -290
  116. data/lib/codebase_index/flow_document.rb +0 -191
  117. data/lib/codebase_index/flow_precomputer.rb +0 -102
  118. data/lib/codebase_index/formatting/base.rb +0 -30
  119. data/lib/codebase_index/formatting/claude_adapter.rb +0 -98
  120. data/lib/codebase_index/formatting/generic_adapter.rb +0 -56
  121. data/lib/codebase_index/formatting/gpt_adapter.rb +0 -64
  122. data/lib/codebase_index/formatting/human_adapter.rb +0 -78
  123. data/lib/codebase_index/graph_analyzer.rb +0 -374
  124. data/lib/codebase_index/mcp/bootstrapper.rb +0 -96
  125. data/lib/codebase_index/mcp/index_reader.rb +0 -394
  126. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +0 -81
  127. data/lib/codebase_index/mcp/renderers/json_renderer.rb +0 -17
  128. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +0 -353
  129. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +0 -240
  130. data/lib/codebase_index/mcp/server.rb +0 -961
  131. data/lib/codebase_index/mcp/tool_response_renderer.rb +0 -85
  132. data/lib/codebase_index/model_name_cache.rb +0 -51
  133. data/lib/codebase_index/notion/client.rb +0 -217
  134. data/lib/codebase_index/notion/exporter.rb +0 -219
  135. data/lib/codebase_index/notion/mapper.rb +0 -40
  136. data/lib/codebase_index/notion/mappers/column_mapper.rb +0 -57
  137. data/lib/codebase_index/notion/mappers/migration_mapper.rb +0 -39
  138. data/lib/codebase_index/notion/mappers/model_mapper.rb +0 -161
  139. data/lib/codebase_index/notion/mappers/shared.rb +0 -22
  140. data/lib/codebase_index/notion/rate_limiter.rb +0 -68
  141. data/lib/codebase_index/observability/health_check.rb +0 -79
  142. data/lib/codebase_index/observability/instrumentation.rb +0 -34
  143. data/lib/codebase_index/observability/structured_logger.rb +0 -57
  144. data/lib/codebase_index/operator/error_escalator.rb +0 -81
  145. data/lib/codebase_index/operator/pipeline_guard.rb +0 -92
  146. data/lib/codebase_index/operator/status_reporter.rb +0 -80
  147. data/lib/codebase_index/railtie.rb +0 -38
  148. data/lib/codebase_index/resilience/circuit_breaker.rb +0 -99
  149. data/lib/codebase_index/resilience/index_validator.rb +0 -167
  150. data/lib/codebase_index/resilience/retryable_provider.rb +0 -108
  151. data/lib/codebase_index/retrieval/context_assembler.rb +0 -261
  152. data/lib/codebase_index/retrieval/query_classifier.rb +0 -133
  153. data/lib/codebase_index/retrieval/ranker.rb +0 -277
  154. data/lib/codebase_index/retrieval/search_executor.rb +0 -316
  155. data/lib/codebase_index/retriever.rb +0 -152
  156. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +0 -170
  157. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +0 -77
  158. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +0 -18
  159. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +0 -280
  160. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +0 -143
  161. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +0 -143
  162. data/lib/codebase_index/ruby_analyzer.rb +0 -87
  163. data/lib/codebase_index/session_tracer/file_store.rb +0 -104
  164. data/lib/codebase_index/session_tracer/middleware.rb +0 -143
  165. data/lib/codebase_index/session_tracer/redis_store.rb +0 -106
  166. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +0 -254
  167. data/lib/codebase_index/session_tracer/session_flow_document.rb +0 -223
  168. data/lib/codebase_index/session_tracer/solid_cache_store.rb +0 -139
  169. data/lib/codebase_index/session_tracer/store.rb +0 -81
  170. data/lib/codebase_index/storage/graph_store.rb +0 -120
  171. data/lib/codebase_index/storage/metadata_store.rb +0 -196
  172. data/lib/codebase_index/storage/pgvector.rb +0 -195
  173. data/lib/codebase_index/storage/qdrant.rb +0 -205
  174. data/lib/codebase_index/storage/vector_store.rb +0 -167
  175. data/lib/codebase_index/temporal/json_snapshot_store.rb +0 -245
  176. data/lib/codebase_index/temporal/snapshot_store.rb +0 -345
  177. data/lib/codebase_index/token_utils.rb +0 -19
  178. data/lib/codebase_index/version.rb +0 -5
  179. data/lib/generators/codebase_index/install_generator.rb +0 -32
  180. data/lib/generators/codebase_index/pgvector_generator.rb +0 -37
  181. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +0 -15
  182. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +0 -43
  183. data/lib/tasks/codebase_index.rake +0 -597
  184. data/lib/tasks/codebase_index_evaluation.rake +0 -115
@@ -1,196 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'json'
4
-
5
- module CodebaseIndex
6
- module Storage
7
- # MetadataStore provides an interface for storing and querying unit metadata.
8
- #
9
- # All metadata store adapters must include the {Interface} module and implement
10
- # its methods. The {SQLite} adapter is provided for local persistence.
11
- #
12
- # @example Using the SQLite adapter
13
- # store = CodebaseIndex::Storage::MetadataStore::SQLite.new(":memory:")
14
- # store.store("User", { type: "model", file_path: "app/models/user.rb" })
15
- # store.find("User")
16
- #
17
- module MetadataStore
18
- # Interface that all metadata store adapters must implement.
19
- module Interface
20
- # Store or update metadata for a unit.
21
- #
22
- # @param id [String] Unique identifier for the unit
23
- # @param metadata [Hash] Metadata to store
24
- # @raise [NotImplementedError] if not implemented by adapter
25
- def store(id, metadata)
26
- raise NotImplementedError
27
- end
28
-
29
- # Find a unit by ID.
30
- #
31
- # @param id [String] The identifier to look up
32
- # @return [Hash, nil] The stored metadata, or nil if not found
33
- # @raise [NotImplementedError] if not implemented by adapter
34
- def find(id)
35
- raise NotImplementedError
36
- end
37
-
38
- # Find multiple units by IDs in a single query.
39
- #
40
- # Default implementation falls back to individual find calls.
41
- # Adapters should override for batch-optimized behavior.
42
- #
43
- # @param ids [Array<String>] The identifiers to look up
44
- # @return [Hash<String, Hash>] Map of id => metadata for found units
45
- def find_batch(ids)
46
- ids.each_with_object({}) do |id, result|
47
- data = find(id)
48
- result[id] = data if data
49
- end
50
- end
51
-
52
- # Find all units of a given type.
53
- #
54
- # @param type [String] The unit type to filter by
55
- # @return [Array<Hash>] Matching metadata records
56
- # @raise [NotImplementedError] if not implemented by adapter
57
- def find_by_type(type)
58
- raise NotImplementedError
59
- end
60
-
61
- # Search metadata by text query across specified fields.
62
- #
63
- # @param query [String] Text to search for
64
- # @param fields [Array<String>, nil] Specific fields to search (nil = all)
65
- # @return [Array<Hash>] Matching metadata records
66
- # @raise [NotImplementedError] if not implemented by adapter
67
- def search(query, fields: nil)
68
- raise NotImplementedError
69
- end
70
-
71
- # Delete a unit by ID.
72
- #
73
- # @param id [String] The identifier to delete
74
- # @raise [NotImplementedError] if not implemented by adapter
75
- def delete(id)
76
- raise NotImplementedError
77
- end
78
-
79
- # Return the total number of stored units.
80
- #
81
- # @return [Integer] Total count
82
- # @raise [NotImplementedError] if not implemented by adapter
83
- def count
84
- raise NotImplementedError
85
- end
86
- end
87
-
88
- # SQLite-backed metadata store using the JSON1 extension.
89
- #
90
- # Stores unit metadata as JSON in a single table with type indexing
91
- # for efficient filtering. Uses upsert semantics for store operations.
92
- #
93
- # @example
94
- # store = SQLite.new(":memory:")
95
- # store.store("User", { type: "model", namespace: "Admin" })
96
- # store.find("User") # => { "type" => "model", "namespace" => "Admin" }
97
- #
98
- class SQLite
99
- include Interface
100
-
101
- # @param db_path [String] Path to the SQLite database file, or ":memory:" for in-memory
102
- def initialize(db_path = ':memory:')
103
- require 'sqlite3'
104
- @db = ::SQLite3::Database.new(db_path)
105
- @db.results_as_hash = true
106
- create_table
107
- end
108
-
109
- # @see Interface#store
110
- def store(id, metadata)
111
- type = metadata[:type] || metadata['type']
112
- data = JSON.generate(metadata)
113
-
114
- @db.execute(<<~SQL, [id, type.to_s, data, Time.now.iso8601])
115
- INSERT INTO units (id, type, data, updated_at) VALUES (?, ?, ?, ?)
116
- ON CONFLICT(id) DO UPDATE SET
117
- type = excluded.type, data = excluded.data, updated_at = excluded.updated_at
118
- SQL
119
- end
120
-
121
- # @see Interface#find
122
- def find(id)
123
- row = @db.get_first_row('SELECT data FROM units WHERE id = ?', [id])
124
- return nil unless row
125
-
126
- JSON.parse(row['data'])
127
- end
128
-
129
- # @see Interface#find_batch
130
- def find_batch(ids)
131
- return {} if ids.empty?
132
-
133
- placeholders = Array.new(ids.size, '?').join(', ')
134
- rows = @db.execute("SELECT id, data FROM units WHERE id IN (#{placeholders})", ids)
135
- rows.to_h do |row|
136
- [row['id'], JSON.parse(row['data'])]
137
- end
138
- end
139
-
140
- # @see Interface#find_by_type
141
- def find_by_type(type)
142
- rows = @db.execute('SELECT id, data FROM units WHERE type = ?', [type.to_s])
143
- rows.map { |row| parse_row(row) }
144
- end
145
-
146
- # @see Interface#search
147
- def search(query, fields: nil)
148
- if fields
149
- conditions = fields.map { "json_extract(data, '$.#{_1}') LIKE ?" }.join(' OR ')
150
- params = fields.map { "%#{query}%" }
151
- rows = @db.execute("SELECT id, data FROM units WHERE #{conditions}", params)
152
- else
153
- rows = @db.execute('SELECT id, data FROM units WHERE data LIKE ?', ["%#{query}%"])
154
- end
155
-
156
- rows.map { |row| parse_row(row) }
157
- end
158
-
159
- # @see Interface#delete
160
- def delete(id)
161
- @db.execute('DELETE FROM units WHERE id = ?', [id])
162
- end
163
-
164
- # @see Interface#count
165
- def count
166
- @db.get_first_value('SELECT COUNT(*) FROM units')
167
- end
168
-
169
- private
170
-
171
- # Parse a database row into a metadata hash with the id field injected.
172
- #
173
- # @param row [Hash] Database row with 'id' and 'data' keys
174
- # @return [Hash] Parsed metadata with 'id' key set
175
- def parse_row(row)
176
- parsed = JSON.parse(row['data'])
177
- parsed['id'] = row['id']
178
- parsed
179
- end
180
-
181
- # Create the units table if it doesn't exist.
182
- def create_table
183
- @db.execute(<<~SQL)
184
- CREATE TABLE IF NOT EXISTS units (
185
- id TEXT PRIMARY KEY,
186
- type TEXT,
187
- data JSON,
188
- updated_at TEXT
189
- )
190
- SQL
191
- @db.execute('CREATE INDEX IF NOT EXISTS idx_units_type ON units(type)')
192
- end
193
- end
194
- end
195
- end
196
- end
@@ -1,195 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'json'
4
- require_relative 'vector_store'
5
-
6
- module CodebaseIndex
7
- module Storage
8
- module VectorStore
9
- # PostgreSQL + pgvector adapter for vector storage and similarity search.
10
- #
11
- # Uses the pgvector extension for efficient approximate nearest neighbor
12
- # search with HNSW indexing. Stores metadata as JSONB for flexible filtering.
13
- #
14
- # @example
15
- # store = Pgvector.new(connection: ActiveRecord::Base.connection, dimensions: 768)
16
- # store.ensure_schema!
17
- # store.store("User", [0.1, 0.2, ...], { type: "model" })
18
- # results = store.search([0.1, 0.2, ...], limit: 5, filters: { type: "model" })
19
- #
20
- class Pgvector # rubocop:disable Metrics/ClassLength
21
- include Interface
22
-
23
- TABLE = 'codebase_index_vectors'
24
-
25
- # @param connection [Object] ActiveRecord database connection
26
- # @param dimensions [Integer] Size of the embedding vectors
27
- def initialize(connection:, dimensions:)
28
- @connection = connection
29
- @dimensions = dimensions
30
- end
31
-
32
- # Create the pgvector extension, vectors table, and HNSW index.
33
- #
34
- # Safe to call multiple times (uses IF NOT EXISTS).
35
- def ensure_schema!
36
- @connection.execute('CREATE EXTENSION IF NOT EXISTS vector')
37
- @connection.execute(<<~SQL)
38
- CREATE TABLE IF NOT EXISTS #{TABLE} (
39
- id TEXT PRIMARY KEY,
40
- embedding vector(#{@dimensions}),
41
- metadata JSONB DEFAULT '{}',
42
- created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
43
- )
44
- SQL
45
- @connection.execute(<<~SQL)
46
- CREATE INDEX IF NOT EXISTS idx_#{TABLE}_embedding_hnsw
47
- ON #{TABLE} USING hnsw (embedding vector_cosine_ops)
48
- SQL
49
- end
50
-
51
- # Store or update a vector with metadata.
52
- #
53
- # @param id [String] Unique identifier
54
- # @param vector [Array<Float>] The embedding vector
55
- # @param metadata [Hash] Optional metadata
56
- # @see Interface#store
57
- def store(id, vector, metadata = {})
58
- validate_vector!(vector)
59
- entry = format_entry(id, vector, metadata)
60
-
61
- @connection.execute(<<~SQL)
62
- INSERT INTO #{TABLE} (id, embedding, metadata, created_at)
63
- VALUES #{entry}
64
- ON CONFLICT (id) DO UPDATE SET
65
- embedding = EXCLUDED.embedding,
66
- metadata = EXCLUDED.metadata,
67
- created_at = CURRENT_TIMESTAMP
68
- SQL
69
- end
70
-
71
- # Store multiple vectors in a single multi-row INSERT.
72
- #
73
- # @param entries [Array<Hash>] Each entry has :id, :vector, :metadata keys
74
- def store_batch(entries)
75
- return if entries.empty?
76
-
77
- values = entries.map do |entry|
78
- validate_vector!(entry[:vector])
79
- format_entry(entry[:id], entry[:vector], entry[:metadata] || {})
80
- end
81
-
82
- @connection.execute(<<~SQL)
83
- INSERT INTO #{TABLE} (id, embedding, metadata, created_at)
84
- VALUES #{values.join(",\n")}
85
- ON CONFLICT (id) DO UPDATE SET
86
- embedding = EXCLUDED.embedding,
87
- metadata = EXCLUDED.metadata,
88
- created_at = CURRENT_TIMESTAMP
89
- SQL
90
- end
91
-
92
- # Search for similar vectors using cosine distance.
93
- #
94
- # @param query_vector [Array<Float>] The query embedding
95
- # @param limit [Integer] Maximum results to return
96
- # @param filters [Hash] Metadata key-value filters
97
- # @return [Array<SearchResult>] Results sorted by descending similarity
98
- # @see Interface#search
99
- def search(query_vector, limit: 10, filters: {})
100
- validate_vector!(query_vector)
101
- vector_literal = "[#{query_vector.join(',')}]"
102
- where_clause = build_where(filters)
103
-
104
- sql = <<~SQL
105
- SELECT id, embedding <=> '#{vector_literal}' AS distance, metadata
106
- FROM #{TABLE}
107
- #{where_clause}
108
- ORDER BY distance ASC
109
- LIMIT #{limit.to_i}
110
- SQL
111
-
112
- rows = @connection.execute(sql)
113
- rows.map { |row| row_to_result(row) }
114
- end
115
-
116
- # @see Interface#delete
117
- def delete(id)
118
- quoted_id = @connection.quote(id)
119
- @connection.execute("DELETE FROM #{TABLE} WHERE id = #{quoted_id}")
120
- end
121
-
122
- # @see Interface#delete_by_filter
123
- def delete_by_filter(filters)
124
- where_clause = build_where(filters)
125
- @connection.execute("DELETE FROM #{TABLE} #{where_clause}")
126
- end
127
-
128
- # @see Interface#count
129
- def count
130
- result = @connection.execute("SELECT COUNT(*) AS count FROM #{TABLE}")
131
- result.first['count'].to_i
132
- end
133
-
134
- private
135
-
136
- # Format a single entry as a SQL VALUES tuple.
137
- #
138
- # @param id [String] Unique identifier
139
- # @param vector [Array<Float>] Embedding vector
140
- # @param metadata [Hash] Entry metadata
141
- # @return [String] SQL values row literal
142
- def format_entry(id, vector, metadata)
143
- quoted_id = @connection.quote(id)
144
- quoted_metadata = @connection.quote(JSON.generate(metadata))
145
- vector_literal = "[#{vector.join(',')}]"
146
- "(#{quoted_id}, '#{vector_literal}', #{quoted_metadata}::jsonb, CURRENT_TIMESTAMP)"
147
- end
148
-
149
- # Convert a database row to a SearchResult.
150
- #
151
- # @param row [Hash] Database row with id, distance, metadata
152
- # @return [SearchResult]
153
- def row_to_result(row)
154
- metadata = row['metadata']
155
- parsed_metadata = metadata.is_a?(String) ? JSON.parse(metadata) : metadata
156
- SearchResult.new(
157
- id: row['id'],
158
- score: 1.0 - row['distance'].to_f,
159
- metadata: parsed_metadata
160
- )
161
- end
162
-
163
- # Build a WHERE clause from metadata filters.
164
- #
165
- # @param filters [Hash] Metadata key-value pairs
166
- # @return [String] SQL WHERE clause, or empty string if no filters
167
- def build_where(filters)
168
- return '' if filters.empty?
169
-
170
- conditions = filters.map do |key, value|
171
- key_s = key.to_s
172
- unless key_s.match?(/\A[a-zA-Z_][a-zA-Z0-9_]*\z/)
173
- raise ArgumentError, "Invalid filter key: #{key_s.inspect}"
174
- end
175
-
176
- "metadata->>'#{key_s}' = #{@connection.quote(value.to_s)}"
177
- end
178
- "WHERE #{conditions.join(' AND ')}"
179
- end
180
-
181
- # Validate that all vector elements are numeric.
182
- #
183
- # @param vector [Array] The vector to validate
184
- # @raise [ArgumentError] if any element is not numeric
185
- def validate_vector!(vector)
186
- vector.each_with_index do |element, i|
187
- unless element.is_a?(Numeric)
188
- raise ArgumentError, "Vector element at index #{i} is not numeric: #{element.inspect}"
189
- end
190
- end
191
- end
192
- end
193
- end
194
- end
195
- end
@@ -1,205 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'net/http'
4
- require 'json'
5
- require 'uri'
6
- require_relative 'vector_store'
7
-
8
- module CodebaseIndex
9
- module Storage
10
- module VectorStore
11
- # Qdrant adapter for vector storage and similarity search via HTTP API.
12
- #
13
- # Communicates with a Qdrant instance over HTTP. Supports optional API key
14
- # authentication for managed/cloud deployments.
15
- #
16
- # @example
17
- # store = Qdrant.new(url: "http://localhost:6333", collection: "codebase")
18
- # store.ensure_collection!(dimensions: 768)
19
- # store.store("User", [0.1, 0.2, ...], { type: "model" })
20
- # results = store.search([0.1, 0.2, ...], limit: 5)
21
- #
22
- class Qdrant # rubocop:disable Metrics/ClassLength
23
- include Interface
24
-
25
- # @param url [String] Qdrant server URL
26
- # @param collection [String] Collection name
27
- # @param api_key [String, nil] Optional API key for authentication
28
- def initialize(url:, collection:, api_key: nil)
29
- @url = url
30
- @collection = collection
31
- @api_key = api_key
32
- @uri = URI(url)
33
- end
34
-
35
- # Create the collection if it doesn't exist.
36
- #
37
- # @param dimensions [Integer] Vector dimensionality
38
- def ensure_collection!(dimensions:)
39
- body = {
40
- vectors: {
41
- size: dimensions,
42
- distance: 'Cosine'
43
- }
44
- }
45
- request(:put, "/collections/#{@collection}", body)
46
- end
47
-
48
- # Store or update a vector with metadata payload.
49
- #
50
- # @param id [String] Unique identifier
51
- # @param vector [Array<Float>] The embedding vector
52
- # @param metadata [Hash] Optional payload metadata
53
- # @see Interface#store
54
- def store(id, vector, metadata = {})
55
- body = {
56
- points: [
57
- {
58
- id: id,
59
- vector: vector,
60
- payload: metadata
61
- }
62
- ]
63
- }
64
- request(:put, "/collections/#{@collection}/points", body)
65
- end
66
-
67
- # Store multiple vectors in a single batch upsert request.
68
- #
69
- # Sends the entire entries array in one HTTP call. Callers are responsible
70
- # for chunking into reasonable batch sizes (e.g., 100–500 points) before
71
- # calling this method; the embedding Indexer's +batch_size+ config controls
72
- # the upstream chunk size.
73
- #
74
- # @param entries [Array<Hash>] Each entry has :id, :vector, :metadata keys
75
- def store_batch(entries)
76
- return if entries.empty?
77
-
78
- body = {
79
- points: entries.map do |entry|
80
- { id: entry[:id], vector: entry[:vector], payload: entry[:metadata] || {} }
81
- end
82
- }
83
- request(:put, "/collections/#{@collection}/points", body)
84
- end
85
-
86
- # Search for similar vectors.
87
- #
88
- # @param query_vector [Array<Float>] The query embedding
89
- # @param limit [Integer] Maximum results to return
90
- # @param filters [Hash] Metadata key-value filters
91
- # @return [Array<SearchResult>] Results sorted by descending similarity
92
- # @see Interface#search
93
- def search(query_vector, limit: 10, filters: {})
94
- body = {
95
- vector: query_vector,
96
- limit: limit,
97
- with_payload: true
98
- }
99
- body[:filter] = build_filter(filters) unless filters.empty?
100
-
101
- response = request(:post, "/collections/#{@collection}/points/search", body)
102
- results = response['result'] || []
103
-
104
- results.map do |hit|
105
- SearchResult.new(
106
- id: hit['id'],
107
- score: hit['score'],
108
- metadata: hit['payload']
109
- )
110
- end
111
- end
112
-
113
- # @see Interface#delete
114
- def delete(id)
115
- body = { points: [id] }
116
- request(:post, "/collections/#{@collection}/points/delete", body)
117
- end
118
-
119
- # @see Interface#delete_by_filter
120
- def delete_by_filter(filters)
121
- body = { filter: build_filter(filters) }
122
- request(:post, "/collections/#{@collection}/points/delete", body)
123
- end
124
-
125
- # @see Interface#count
126
- def count
127
- response = request(:post, "/collections/#{@collection}/points/count", { exact: true })
128
- response['result']['count']
129
- end
130
-
131
- private
132
-
133
- # Build a Qdrant filter from metadata key-value pairs.
134
- #
135
- # @param filters [Hash] Metadata filters
136
- # @return [Hash] Qdrant-compatible filter with must conditions
137
- def build_filter(filters)
138
- conditions = filters.map do |key, value|
139
- { key: key.to_s, match: { value: value } }
140
- end
141
- { must: conditions }
142
- end
143
-
144
- # Send an HTTP request to the Qdrant API.
145
- #
146
- # @param method [Symbol] HTTP method (:get, :post, :put, :delete)
147
- # @param path [String] API path
148
- # @param body [Hash, nil] Request body
149
- # @return [Hash] Parsed JSON response
150
- # @raise [CodebaseIndex::Error] if the API returns a non-success status
151
- def request(method, path, body = nil)
152
- req = build_request(method, path, body)
153
- response = http_client.request(req)
154
-
155
- unless response.is_a?(Net::HTTPSuccess)
156
- raise CodebaseIndex::Error, "Qdrant API error: #{response.code} #{response.body}"
157
- end
158
-
159
- JSON.parse(response.body)
160
- rescue Errno::ECONNRESET, Net::OpenTimeout, IOError
161
- # Connection dropped — reset and retry once
162
- @http_client = nil
163
- response = http_client.request(req)
164
- unless response.is_a?(Net::HTTPSuccess)
165
- raise CodebaseIndex::Error, "Qdrant API error: #{response.code} #{response.body}"
166
- end
167
-
168
- JSON.parse(response.body)
169
- end
170
-
171
- # Return a reusable, started HTTP client for the Qdrant server.
172
- # Calling http.start opens a persistent TCP connection so
173
- # keep_alive_timeout actually takes effect across requests.
174
- #
175
- # @return [Net::HTTP]
176
- def http_client
177
- return @http_client if @http_client&.started?
178
-
179
- http = Net::HTTP.new(@uri.host, @uri.port)
180
- http.use_ssl = @uri.scheme == 'https'
181
- http.open_timeout = 10
182
- http.read_timeout = 30
183
- http.keep_alive_timeout = 30
184
- http.start
185
- @http_client = http
186
- end
187
-
188
- # Build an HTTP request with headers and body.
189
- #
190
- # @param method [Symbol] HTTP method
191
- # @param path [String] API path
192
- # @param body [Hash, nil] Request body
193
- # @return [Net::HTTPRequest]
194
- def build_request(method, path, body)
195
- request_class = { get: Net::HTTP::Get, post: Net::HTTP::Post,
196
- put: Net::HTTP::Put, delete: Net::HTTP::Delete }.fetch(method)
197
- req = request_class.new(path, 'Content-Type' => 'application/json')
198
- req['api-key'] = @api_key if @api_key
199
- req.body = body.to_json if body
200
- req
201
- end
202
- end
203
- end
204
- end
205
- end