codebase_index 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/codebase_index.rb +3 -243
- metadata +28 -223
- data/CHANGELOG.md +0 -89
- data/CODE_OF_CONDUCT.md +0 -83
- data/CONTRIBUTING.md +0 -65
- data/LICENSE.txt +0 -21
- data/README.md +0 -325
- data/exe/codebase-console +0 -59
- data/exe/codebase-console-mcp +0 -22
- data/exe/codebase-index-mcp +0 -34
- data/exe/codebase-index-mcp-http +0 -37
- data/exe/codebase-index-mcp-start +0 -58
- data/lib/codebase_index/ast/call_site_extractor.rb +0 -106
- data/lib/codebase_index/ast/method_extractor.rb +0 -71
- data/lib/codebase_index/ast/node.rb +0 -116
- data/lib/codebase_index/ast/parser.rb +0 -614
- data/lib/codebase_index/ast.rb +0 -6
- data/lib/codebase_index/builder.rb +0 -200
- data/lib/codebase_index/cache/cache_middleware.rb +0 -199
- data/lib/codebase_index/cache/cache_store.rb +0 -264
- data/lib/codebase_index/cache/redis_cache_store.rb +0 -116
- data/lib/codebase_index/cache/solid_cache_store.rb +0 -111
- data/lib/codebase_index/chunking/chunk.rb +0 -84
- data/lib/codebase_index/chunking/semantic_chunker.rb +0 -295
- data/lib/codebase_index/console/adapters/cache_adapter.rb +0 -58
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +0 -33
- data/lib/codebase_index/console/adapters/job_adapter.rb +0 -68
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +0 -33
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +0 -33
- data/lib/codebase_index/console/audit_logger.rb +0 -75
- data/lib/codebase_index/console/bridge.rb +0 -177
- data/lib/codebase_index/console/confirmation.rb +0 -90
- data/lib/codebase_index/console/connection_manager.rb +0 -173
- data/lib/codebase_index/console/console_response_renderer.rb +0 -74
- data/lib/codebase_index/console/embedded_executor.rb +0 -373
- data/lib/codebase_index/console/model_validator.rb +0 -81
- data/lib/codebase_index/console/rack_middleware.rb +0 -87
- data/lib/codebase_index/console/safe_context.rb +0 -82
- data/lib/codebase_index/console/server.rb +0 -612
- data/lib/codebase_index/console/sql_validator.rb +0 -172
- data/lib/codebase_index/console/tools/tier1.rb +0 -118
- data/lib/codebase_index/console/tools/tier2.rb +0 -117
- data/lib/codebase_index/console/tools/tier3.rb +0 -110
- data/lib/codebase_index/console/tools/tier4.rb +0 -79
- data/lib/codebase_index/coordination/pipeline_lock.rb +0 -109
- data/lib/codebase_index/cost_model/embedding_cost.rb +0 -88
- data/lib/codebase_index/cost_model/estimator.rb +0 -128
- data/lib/codebase_index/cost_model/provider_pricing.rb +0 -67
- data/lib/codebase_index/cost_model/storage_cost.rb +0 -52
- data/lib/codebase_index/cost_model.rb +0 -22
- data/lib/codebase_index/db/migrations/001_create_units.rb +0 -38
- data/lib/codebase_index/db/migrations/002_create_edges.rb +0 -35
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +0 -37
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +0 -45
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +0 -40
- data/lib/codebase_index/db/migrator.rb +0 -71
- data/lib/codebase_index/db/schema_version.rb +0 -73
- data/lib/codebase_index/dependency_graph.rb +0 -236
- data/lib/codebase_index/embedding/indexer.rb +0 -140
- data/lib/codebase_index/embedding/openai.rb +0 -126
- data/lib/codebase_index/embedding/provider.rb +0 -162
- data/lib/codebase_index/embedding/text_preparer.rb +0 -112
- data/lib/codebase_index/evaluation/baseline_runner.rb +0 -115
- data/lib/codebase_index/evaluation/evaluator.rb +0 -139
- data/lib/codebase_index/evaluation/metrics.rb +0 -79
- data/lib/codebase_index/evaluation/query_set.rb +0 -148
- data/lib/codebase_index/evaluation/report_generator.rb +0 -90
- data/lib/codebase_index/extracted_unit.rb +0 -145
- data/lib/codebase_index/extractor.rb +0 -1028
- data/lib/codebase_index/extractors/action_cable_extractor.rb +0 -201
- data/lib/codebase_index/extractors/ast_source_extraction.rb +0 -46
- data/lib/codebase_index/extractors/behavioral_profile.rb +0 -309
- data/lib/codebase_index/extractors/caching_extractor.rb +0 -261
- data/lib/codebase_index/extractors/callback_analyzer.rb +0 -246
- data/lib/codebase_index/extractors/concern_extractor.rb +0 -292
- data/lib/codebase_index/extractors/configuration_extractor.rb +0 -219
- data/lib/codebase_index/extractors/controller_extractor.rb +0 -404
- data/lib/codebase_index/extractors/database_view_extractor.rb +0 -278
- data/lib/codebase_index/extractors/decorator_extractor.rb +0 -253
- data/lib/codebase_index/extractors/engine_extractor.rb +0 -223
- data/lib/codebase_index/extractors/event_extractor.rb +0 -211
- data/lib/codebase_index/extractors/factory_extractor.rb +0 -289
- data/lib/codebase_index/extractors/graphql_extractor.rb +0 -892
- data/lib/codebase_index/extractors/i18n_extractor.rb +0 -117
- data/lib/codebase_index/extractors/job_extractor.rb +0 -374
- data/lib/codebase_index/extractors/lib_extractor.rb +0 -218
- data/lib/codebase_index/extractors/mailer_extractor.rb +0 -269
- data/lib/codebase_index/extractors/manager_extractor.rb +0 -188
- data/lib/codebase_index/extractors/middleware_extractor.rb +0 -133
- data/lib/codebase_index/extractors/migration_extractor.rb +0 -469
- data/lib/codebase_index/extractors/model_extractor.rb +0 -988
- data/lib/codebase_index/extractors/phlex_extractor.rb +0 -252
- data/lib/codebase_index/extractors/policy_extractor.rb +0 -191
- data/lib/codebase_index/extractors/poro_extractor.rb +0 -229
- data/lib/codebase_index/extractors/pundit_extractor.rb +0 -223
- data/lib/codebase_index/extractors/rails_source_extractor.rb +0 -473
- data/lib/codebase_index/extractors/rake_task_extractor.rb +0 -343
- data/lib/codebase_index/extractors/route_extractor.rb +0 -181
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +0 -331
- data/lib/codebase_index/extractors/serializer_extractor.rb +0 -339
- data/lib/codebase_index/extractors/service_extractor.rb +0 -217
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +0 -91
- data/lib/codebase_index/extractors/shared_utility_methods.rb +0 -281
- data/lib/codebase_index/extractors/state_machine_extractor.rb +0 -398
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +0 -225
- data/lib/codebase_index/extractors/validator_extractor.rb +0 -211
- data/lib/codebase_index/extractors/view_component_extractor.rb +0 -311
- data/lib/codebase_index/extractors/view_template_extractor.rb +0 -261
- data/lib/codebase_index/feedback/gap_detector.rb +0 -89
- data/lib/codebase_index/feedback/store.rb +0 -119
- data/lib/codebase_index/filename_utils.rb +0 -32
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +0 -206
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +0 -154
- data/lib/codebase_index/flow_assembler.rb +0 -290
- data/lib/codebase_index/flow_document.rb +0 -191
- data/lib/codebase_index/flow_precomputer.rb +0 -102
- data/lib/codebase_index/formatting/base.rb +0 -30
- data/lib/codebase_index/formatting/claude_adapter.rb +0 -98
- data/lib/codebase_index/formatting/generic_adapter.rb +0 -56
- data/lib/codebase_index/formatting/gpt_adapter.rb +0 -64
- data/lib/codebase_index/formatting/human_adapter.rb +0 -78
- data/lib/codebase_index/graph_analyzer.rb +0 -374
- data/lib/codebase_index/mcp/bootstrapper.rb +0 -96
- data/lib/codebase_index/mcp/index_reader.rb +0 -394
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +0 -81
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +0 -17
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +0 -353
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +0 -240
- data/lib/codebase_index/mcp/server.rb +0 -961
- data/lib/codebase_index/mcp/tool_response_renderer.rb +0 -85
- data/lib/codebase_index/model_name_cache.rb +0 -51
- data/lib/codebase_index/notion/client.rb +0 -217
- data/lib/codebase_index/notion/exporter.rb +0 -219
- data/lib/codebase_index/notion/mapper.rb +0 -40
- data/lib/codebase_index/notion/mappers/column_mapper.rb +0 -57
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +0 -39
- data/lib/codebase_index/notion/mappers/model_mapper.rb +0 -161
- data/lib/codebase_index/notion/mappers/shared.rb +0 -22
- data/lib/codebase_index/notion/rate_limiter.rb +0 -68
- data/lib/codebase_index/observability/health_check.rb +0 -79
- data/lib/codebase_index/observability/instrumentation.rb +0 -34
- data/lib/codebase_index/observability/structured_logger.rb +0 -57
- data/lib/codebase_index/operator/error_escalator.rb +0 -81
- data/lib/codebase_index/operator/pipeline_guard.rb +0 -92
- data/lib/codebase_index/operator/status_reporter.rb +0 -80
- data/lib/codebase_index/railtie.rb +0 -38
- data/lib/codebase_index/resilience/circuit_breaker.rb +0 -99
- data/lib/codebase_index/resilience/index_validator.rb +0 -167
- data/lib/codebase_index/resilience/retryable_provider.rb +0 -108
- data/lib/codebase_index/retrieval/context_assembler.rb +0 -261
- data/lib/codebase_index/retrieval/query_classifier.rb +0 -133
- data/lib/codebase_index/retrieval/ranker.rb +0 -277
- data/lib/codebase_index/retrieval/search_executor.rb +0 -316
- data/lib/codebase_index/retriever.rb +0 -152
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +0 -170
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +0 -77
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +0 -18
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +0 -280
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +0 -143
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +0 -143
- data/lib/codebase_index/ruby_analyzer.rb +0 -87
- data/lib/codebase_index/session_tracer/file_store.rb +0 -104
- data/lib/codebase_index/session_tracer/middleware.rb +0 -143
- data/lib/codebase_index/session_tracer/redis_store.rb +0 -106
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +0 -254
- data/lib/codebase_index/session_tracer/session_flow_document.rb +0 -223
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +0 -139
- data/lib/codebase_index/session_tracer/store.rb +0 -81
- data/lib/codebase_index/storage/graph_store.rb +0 -120
- data/lib/codebase_index/storage/metadata_store.rb +0 -196
- data/lib/codebase_index/storage/pgvector.rb +0 -195
- data/lib/codebase_index/storage/qdrant.rb +0 -205
- data/lib/codebase_index/storage/vector_store.rb +0 -167
- data/lib/codebase_index/temporal/json_snapshot_store.rb +0 -245
- data/lib/codebase_index/temporal/snapshot_store.rb +0 -345
- data/lib/codebase_index/token_utils.rb +0 -19
- data/lib/codebase_index/version.rb +0 -5
- data/lib/generators/codebase_index/install_generator.rb +0 -32
- data/lib/generators/codebase_index/pgvector_generator.rb +0 -37
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +0 -15
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +0 -43
- data/lib/tasks/codebase_index.rake +0 -597
- data/lib/tasks/codebase_index_evaluation.rake +0 -115
|
@@ -1,196 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'json'
|
|
4
|
-
|
|
5
|
-
module CodebaseIndex
|
|
6
|
-
module Storage
|
|
7
|
-
# MetadataStore provides an interface for storing and querying unit metadata.
|
|
8
|
-
#
|
|
9
|
-
# All metadata store adapters must include the {Interface} module and implement
|
|
10
|
-
# its methods. The {SQLite} adapter is provided for local persistence.
|
|
11
|
-
#
|
|
12
|
-
# @example Using the SQLite adapter
|
|
13
|
-
# store = CodebaseIndex::Storage::MetadataStore::SQLite.new(":memory:")
|
|
14
|
-
# store.store("User", { type: "model", file_path: "app/models/user.rb" })
|
|
15
|
-
# store.find("User")
|
|
16
|
-
#
|
|
17
|
-
module MetadataStore
|
|
18
|
-
# Interface that all metadata store adapters must implement.
|
|
19
|
-
module Interface
|
|
20
|
-
# Store or update metadata for a unit.
|
|
21
|
-
#
|
|
22
|
-
# @param id [String] Unique identifier for the unit
|
|
23
|
-
# @param metadata [Hash] Metadata to store
|
|
24
|
-
# @raise [NotImplementedError] if not implemented by adapter
|
|
25
|
-
def store(id, metadata)
|
|
26
|
-
raise NotImplementedError
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
# Find a unit by ID.
|
|
30
|
-
#
|
|
31
|
-
# @param id [String] The identifier to look up
|
|
32
|
-
# @return [Hash, nil] The stored metadata, or nil if not found
|
|
33
|
-
# @raise [NotImplementedError] if not implemented by adapter
|
|
34
|
-
def find(id)
|
|
35
|
-
raise NotImplementedError
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
# Find multiple units by IDs in a single query.
|
|
39
|
-
#
|
|
40
|
-
# Default implementation falls back to individual find calls.
|
|
41
|
-
# Adapters should override for batch-optimized behavior.
|
|
42
|
-
#
|
|
43
|
-
# @param ids [Array<String>] The identifiers to look up
|
|
44
|
-
# @return [Hash<String, Hash>] Map of id => metadata for found units
|
|
45
|
-
def find_batch(ids)
|
|
46
|
-
ids.each_with_object({}) do |id, result|
|
|
47
|
-
data = find(id)
|
|
48
|
-
result[id] = data if data
|
|
49
|
-
end
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
# Find all units of a given type.
|
|
53
|
-
#
|
|
54
|
-
# @param type [String] The unit type to filter by
|
|
55
|
-
# @return [Array<Hash>] Matching metadata records
|
|
56
|
-
# @raise [NotImplementedError] if not implemented by adapter
|
|
57
|
-
def find_by_type(type)
|
|
58
|
-
raise NotImplementedError
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
# Search metadata by text query across specified fields.
|
|
62
|
-
#
|
|
63
|
-
# @param query [String] Text to search for
|
|
64
|
-
# @param fields [Array<String>, nil] Specific fields to search (nil = all)
|
|
65
|
-
# @return [Array<Hash>] Matching metadata records
|
|
66
|
-
# @raise [NotImplementedError] if not implemented by adapter
|
|
67
|
-
def search(query, fields: nil)
|
|
68
|
-
raise NotImplementedError
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
# Delete a unit by ID.
|
|
72
|
-
#
|
|
73
|
-
# @param id [String] The identifier to delete
|
|
74
|
-
# @raise [NotImplementedError] if not implemented by adapter
|
|
75
|
-
def delete(id)
|
|
76
|
-
raise NotImplementedError
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
# Return the total number of stored units.
|
|
80
|
-
#
|
|
81
|
-
# @return [Integer] Total count
|
|
82
|
-
# @raise [NotImplementedError] if not implemented by adapter
|
|
83
|
-
def count
|
|
84
|
-
raise NotImplementedError
|
|
85
|
-
end
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
# SQLite-backed metadata store using the JSON1 extension.
|
|
89
|
-
#
|
|
90
|
-
# Stores unit metadata as JSON in a single table with type indexing
|
|
91
|
-
# for efficient filtering. Uses upsert semantics for store operations.
|
|
92
|
-
#
|
|
93
|
-
# @example
|
|
94
|
-
# store = SQLite.new(":memory:")
|
|
95
|
-
# store.store("User", { type: "model", namespace: "Admin" })
|
|
96
|
-
# store.find("User") # => { "type" => "model", "namespace" => "Admin" }
|
|
97
|
-
#
|
|
98
|
-
class SQLite
|
|
99
|
-
include Interface
|
|
100
|
-
|
|
101
|
-
# @param db_path [String] Path to the SQLite database file, or ":memory:" for in-memory
|
|
102
|
-
def initialize(db_path = ':memory:')
|
|
103
|
-
require 'sqlite3'
|
|
104
|
-
@db = ::SQLite3::Database.new(db_path)
|
|
105
|
-
@db.results_as_hash = true
|
|
106
|
-
create_table
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
# @see Interface#store
|
|
110
|
-
def store(id, metadata)
|
|
111
|
-
type = metadata[:type] || metadata['type']
|
|
112
|
-
data = JSON.generate(metadata)
|
|
113
|
-
|
|
114
|
-
@db.execute(<<~SQL, [id, type.to_s, data, Time.now.iso8601])
|
|
115
|
-
INSERT INTO units (id, type, data, updated_at) VALUES (?, ?, ?, ?)
|
|
116
|
-
ON CONFLICT(id) DO UPDATE SET
|
|
117
|
-
type = excluded.type, data = excluded.data, updated_at = excluded.updated_at
|
|
118
|
-
SQL
|
|
119
|
-
end
|
|
120
|
-
|
|
121
|
-
# @see Interface#find
|
|
122
|
-
def find(id)
|
|
123
|
-
row = @db.get_first_row('SELECT data FROM units WHERE id = ?', [id])
|
|
124
|
-
return nil unless row
|
|
125
|
-
|
|
126
|
-
JSON.parse(row['data'])
|
|
127
|
-
end
|
|
128
|
-
|
|
129
|
-
# @see Interface#find_batch
|
|
130
|
-
def find_batch(ids)
|
|
131
|
-
return {} if ids.empty?
|
|
132
|
-
|
|
133
|
-
placeholders = Array.new(ids.size, '?').join(', ')
|
|
134
|
-
rows = @db.execute("SELECT id, data FROM units WHERE id IN (#{placeholders})", ids)
|
|
135
|
-
rows.to_h do |row|
|
|
136
|
-
[row['id'], JSON.parse(row['data'])]
|
|
137
|
-
end
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
# @see Interface#find_by_type
|
|
141
|
-
def find_by_type(type)
|
|
142
|
-
rows = @db.execute('SELECT id, data FROM units WHERE type = ?', [type.to_s])
|
|
143
|
-
rows.map { |row| parse_row(row) }
|
|
144
|
-
end
|
|
145
|
-
|
|
146
|
-
# @see Interface#search
|
|
147
|
-
def search(query, fields: nil)
|
|
148
|
-
if fields
|
|
149
|
-
conditions = fields.map { "json_extract(data, '$.#{_1}') LIKE ?" }.join(' OR ')
|
|
150
|
-
params = fields.map { "%#{query}%" }
|
|
151
|
-
rows = @db.execute("SELECT id, data FROM units WHERE #{conditions}", params)
|
|
152
|
-
else
|
|
153
|
-
rows = @db.execute('SELECT id, data FROM units WHERE data LIKE ?', ["%#{query}%"])
|
|
154
|
-
end
|
|
155
|
-
|
|
156
|
-
rows.map { |row| parse_row(row) }
|
|
157
|
-
end
|
|
158
|
-
|
|
159
|
-
# @see Interface#delete
|
|
160
|
-
def delete(id)
|
|
161
|
-
@db.execute('DELETE FROM units WHERE id = ?', [id])
|
|
162
|
-
end
|
|
163
|
-
|
|
164
|
-
# @see Interface#count
|
|
165
|
-
def count
|
|
166
|
-
@db.get_first_value('SELECT COUNT(*) FROM units')
|
|
167
|
-
end
|
|
168
|
-
|
|
169
|
-
private
|
|
170
|
-
|
|
171
|
-
# Parse a database row into a metadata hash with the id field injected.
|
|
172
|
-
#
|
|
173
|
-
# @param row [Hash] Database row with 'id' and 'data' keys
|
|
174
|
-
# @return [Hash] Parsed metadata with 'id' key set
|
|
175
|
-
def parse_row(row)
|
|
176
|
-
parsed = JSON.parse(row['data'])
|
|
177
|
-
parsed['id'] = row['id']
|
|
178
|
-
parsed
|
|
179
|
-
end
|
|
180
|
-
|
|
181
|
-
# Create the units table if it doesn't exist.
|
|
182
|
-
def create_table
|
|
183
|
-
@db.execute(<<~SQL)
|
|
184
|
-
CREATE TABLE IF NOT EXISTS units (
|
|
185
|
-
id TEXT PRIMARY KEY,
|
|
186
|
-
type TEXT,
|
|
187
|
-
data JSON,
|
|
188
|
-
updated_at TEXT
|
|
189
|
-
)
|
|
190
|
-
SQL
|
|
191
|
-
@db.execute('CREATE INDEX IF NOT EXISTS idx_units_type ON units(type)')
|
|
192
|
-
end
|
|
193
|
-
end
|
|
194
|
-
end
|
|
195
|
-
end
|
|
196
|
-
end
|
|
@@ -1,195 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'json'
|
|
4
|
-
require_relative 'vector_store'
|
|
5
|
-
|
|
6
|
-
module CodebaseIndex
|
|
7
|
-
module Storage
|
|
8
|
-
module VectorStore
|
|
9
|
-
# PostgreSQL + pgvector adapter for vector storage and similarity search.
|
|
10
|
-
#
|
|
11
|
-
# Uses the pgvector extension for efficient approximate nearest neighbor
|
|
12
|
-
# search with HNSW indexing. Stores metadata as JSONB for flexible filtering.
|
|
13
|
-
#
|
|
14
|
-
# @example
|
|
15
|
-
# store = Pgvector.new(connection: ActiveRecord::Base.connection, dimensions: 768)
|
|
16
|
-
# store.ensure_schema!
|
|
17
|
-
# store.store("User", [0.1, 0.2, ...], { type: "model" })
|
|
18
|
-
# results = store.search([0.1, 0.2, ...], limit: 5, filters: { type: "model" })
|
|
19
|
-
#
|
|
20
|
-
class Pgvector # rubocop:disable Metrics/ClassLength
|
|
21
|
-
include Interface
|
|
22
|
-
|
|
23
|
-
TABLE = 'codebase_index_vectors'
|
|
24
|
-
|
|
25
|
-
# @param connection [Object] ActiveRecord database connection
|
|
26
|
-
# @param dimensions [Integer] Size of the embedding vectors
|
|
27
|
-
def initialize(connection:, dimensions:)
|
|
28
|
-
@connection = connection
|
|
29
|
-
@dimensions = dimensions
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
# Create the pgvector extension, vectors table, and HNSW index.
|
|
33
|
-
#
|
|
34
|
-
# Safe to call multiple times (uses IF NOT EXISTS).
|
|
35
|
-
def ensure_schema!
|
|
36
|
-
@connection.execute('CREATE EXTENSION IF NOT EXISTS vector')
|
|
37
|
-
@connection.execute(<<~SQL)
|
|
38
|
-
CREATE TABLE IF NOT EXISTS #{TABLE} (
|
|
39
|
-
id TEXT PRIMARY KEY,
|
|
40
|
-
embedding vector(#{@dimensions}),
|
|
41
|
-
metadata JSONB DEFAULT '{}',
|
|
42
|
-
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
43
|
-
)
|
|
44
|
-
SQL
|
|
45
|
-
@connection.execute(<<~SQL)
|
|
46
|
-
CREATE INDEX IF NOT EXISTS idx_#{TABLE}_embedding_hnsw
|
|
47
|
-
ON #{TABLE} USING hnsw (embedding vector_cosine_ops)
|
|
48
|
-
SQL
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
# Store or update a vector with metadata.
|
|
52
|
-
#
|
|
53
|
-
# @param id [String] Unique identifier
|
|
54
|
-
# @param vector [Array<Float>] The embedding vector
|
|
55
|
-
# @param metadata [Hash] Optional metadata
|
|
56
|
-
# @see Interface#store
|
|
57
|
-
def store(id, vector, metadata = {})
|
|
58
|
-
validate_vector!(vector)
|
|
59
|
-
entry = format_entry(id, vector, metadata)
|
|
60
|
-
|
|
61
|
-
@connection.execute(<<~SQL)
|
|
62
|
-
INSERT INTO #{TABLE} (id, embedding, metadata, created_at)
|
|
63
|
-
VALUES #{entry}
|
|
64
|
-
ON CONFLICT (id) DO UPDATE SET
|
|
65
|
-
embedding = EXCLUDED.embedding,
|
|
66
|
-
metadata = EXCLUDED.metadata,
|
|
67
|
-
created_at = CURRENT_TIMESTAMP
|
|
68
|
-
SQL
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
# Store multiple vectors in a single multi-row INSERT.
|
|
72
|
-
#
|
|
73
|
-
# @param entries [Array<Hash>] Each entry has :id, :vector, :metadata keys
|
|
74
|
-
def store_batch(entries)
|
|
75
|
-
return if entries.empty?
|
|
76
|
-
|
|
77
|
-
values = entries.map do |entry|
|
|
78
|
-
validate_vector!(entry[:vector])
|
|
79
|
-
format_entry(entry[:id], entry[:vector], entry[:metadata] || {})
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
@connection.execute(<<~SQL)
|
|
83
|
-
INSERT INTO #{TABLE} (id, embedding, metadata, created_at)
|
|
84
|
-
VALUES #{values.join(",\n")}
|
|
85
|
-
ON CONFLICT (id) DO UPDATE SET
|
|
86
|
-
embedding = EXCLUDED.embedding,
|
|
87
|
-
metadata = EXCLUDED.metadata,
|
|
88
|
-
created_at = CURRENT_TIMESTAMP
|
|
89
|
-
SQL
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
# Search for similar vectors using cosine distance.
|
|
93
|
-
#
|
|
94
|
-
# @param query_vector [Array<Float>] The query embedding
|
|
95
|
-
# @param limit [Integer] Maximum results to return
|
|
96
|
-
# @param filters [Hash] Metadata key-value filters
|
|
97
|
-
# @return [Array<SearchResult>] Results sorted by descending similarity
|
|
98
|
-
# @see Interface#search
|
|
99
|
-
def search(query_vector, limit: 10, filters: {})
|
|
100
|
-
validate_vector!(query_vector)
|
|
101
|
-
vector_literal = "[#{query_vector.join(',')}]"
|
|
102
|
-
where_clause = build_where(filters)
|
|
103
|
-
|
|
104
|
-
sql = <<~SQL
|
|
105
|
-
SELECT id, embedding <=> '#{vector_literal}' AS distance, metadata
|
|
106
|
-
FROM #{TABLE}
|
|
107
|
-
#{where_clause}
|
|
108
|
-
ORDER BY distance ASC
|
|
109
|
-
LIMIT #{limit.to_i}
|
|
110
|
-
SQL
|
|
111
|
-
|
|
112
|
-
rows = @connection.execute(sql)
|
|
113
|
-
rows.map { |row| row_to_result(row) }
|
|
114
|
-
end
|
|
115
|
-
|
|
116
|
-
# @see Interface#delete
|
|
117
|
-
def delete(id)
|
|
118
|
-
quoted_id = @connection.quote(id)
|
|
119
|
-
@connection.execute("DELETE FROM #{TABLE} WHERE id = #{quoted_id}")
|
|
120
|
-
end
|
|
121
|
-
|
|
122
|
-
# @see Interface#delete_by_filter
|
|
123
|
-
def delete_by_filter(filters)
|
|
124
|
-
where_clause = build_where(filters)
|
|
125
|
-
@connection.execute("DELETE FROM #{TABLE} #{where_clause}")
|
|
126
|
-
end
|
|
127
|
-
|
|
128
|
-
# @see Interface#count
|
|
129
|
-
def count
|
|
130
|
-
result = @connection.execute("SELECT COUNT(*) AS count FROM #{TABLE}")
|
|
131
|
-
result.first['count'].to_i
|
|
132
|
-
end
|
|
133
|
-
|
|
134
|
-
private
|
|
135
|
-
|
|
136
|
-
# Format a single entry as a SQL VALUES tuple.
|
|
137
|
-
#
|
|
138
|
-
# @param id [String] Unique identifier
|
|
139
|
-
# @param vector [Array<Float>] Embedding vector
|
|
140
|
-
# @param metadata [Hash] Entry metadata
|
|
141
|
-
# @return [String] SQL values row literal
|
|
142
|
-
def format_entry(id, vector, metadata)
|
|
143
|
-
quoted_id = @connection.quote(id)
|
|
144
|
-
quoted_metadata = @connection.quote(JSON.generate(metadata))
|
|
145
|
-
vector_literal = "[#{vector.join(',')}]"
|
|
146
|
-
"(#{quoted_id}, '#{vector_literal}', #{quoted_metadata}::jsonb, CURRENT_TIMESTAMP)"
|
|
147
|
-
end
|
|
148
|
-
|
|
149
|
-
# Convert a database row to a SearchResult.
|
|
150
|
-
#
|
|
151
|
-
# @param row [Hash] Database row with id, distance, metadata
|
|
152
|
-
# @return [SearchResult]
|
|
153
|
-
def row_to_result(row)
|
|
154
|
-
metadata = row['metadata']
|
|
155
|
-
parsed_metadata = metadata.is_a?(String) ? JSON.parse(metadata) : metadata
|
|
156
|
-
SearchResult.new(
|
|
157
|
-
id: row['id'],
|
|
158
|
-
score: 1.0 - row['distance'].to_f,
|
|
159
|
-
metadata: parsed_metadata
|
|
160
|
-
)
|
|
161
|
-
end
|
|
162
|
-
|
|
163
|
-
# Build a WHERE clause from metadata filters.
|
|
164
|
-
#
|
|
165
|
-
# @param filters [Hash] Metadata key-value pairs
|
|
166
|
-
# @return [String] SQL WHERE clause, or empty string if no filters
|
|
167
|
-
def build_where(filters)
|
|
168
|
-
return '' if filters.empty?
|
|
169
|
-
|
|
170
|
-
conditions = filters.map do |key, value|
|
|
171
|
-
key_s = key.to_s
|
|
172
|
-
unless key_s.match?(/\A[a-zA-Z_][a-zA-Z0-9_]*\z/)
|
|
173
|
-
raise ArgumentError, "Invalid filter key: #{key_s.inspect}"
|
|
174
|
-
end
|
|
175
|
-
|
|
176
|
-
"metadata->>'#{key_s}' = #{@connection.quote(value.to_s)}"
|
|
177
|
-
end
|
|
178
|
-
"WHERE #{conditions.join(' AND ')}"
|
|
179
|
-
end
|
|
180
|
-
|
|
181
|
-
# Validate that all vector elements are numeric.
|
|
182
|
-
#
|
|
183
|
-
# @param vector [Array] The vector to validate
|
|
184
|
-
# @raise [ArgumentError] if any element is not numeric
|
|
185
|
-
def validate_vector!(vector)
|
|
186
|
-
vector.each_with_index do |element, i|
|
|
187
|
-
unless element.is_a?(Numeric)
|
|
188
|
-
raise ArgumentError, "Vector element at index #{i} is not numeric: #{element.inspect}"
|
|
189
|
-
end
|
|
190
|
-
end
|
|
191
|
-
end
|
|
192
|
-
end
|
|
193
|
-
end
|
|
194
|
-
end
|
|
195
|
-
end
|
|
@@ -1,205 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'net/http'
|
|
4
|
-
require 'json'
|
|
5
|
-
require 'uri'
|
|
6
|
-
require_relative 'vector_store'
|
|
7
|
-
|
|
8
|
-
module CodebaseIndex
|
|
9
|
-
module Storage
|
|
10
|
-
module VectorStore
|
|
11
|
-
# Qdrant adapter for vector storage and similarity search via HTTP API.
|
|
12
|
-
#
|
|
13
|
-
# Communicates with a Qdrant instance over HTTP. Supports optional API key
|
|
14
|
-
# authentication for managed/cloud deployments.
|
|
15
|
-
#
|
|
16
|
-
# @example
|
|
17
|
-
# store = Qdrant.new(url: "http://localhost:6333", collection: "codebase")
|
|
18
|
-
# store.ensure_collection!(dimensions: 768)
|
|
19
|
-
# store.store("User", [0.1, 0.2, ...], { type: "model" })
|
|
20
|
-
# results = store.search([0.1, 0.2, ...], limit: 5)
|
|
21
|
-
#
|
|
22
|
-
class Qdrant # rubocop:disable Metrics/ClassLength
|
|
23
|
-
include Interface
|
|
24
|
-
|
|
25
|
-
# @param url [String] Qdrant server URL
|
|
26
|
-
# @param collection [String] Collection name
|
|
27
|
-
# @param api_key [String, nil] Optional API key for authentication
|
|
28
|
-
def initialize(url:, collection:, api_key: nil)
|
|
29
|
-
@url = url
|
|
30
|
-
@collection = collection
|
|
31
|
-
@api_key = api_key
|
|
32
|
-
@uri = URI(url)
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# Create the collection if it doesn't exist.
|
|
36
|
-
#
|
|
37
|
-
# @param dimensions [Integer] Vector dimensionality
|
|
38
|
-
def ensure_collection!(dimensions:)
|
|
39
|
-
body = {
|
|
40
|
-
vectors: {
|
|
41
|
-
size: dimensions,
|
|
42
|
-
distance: 'Cosine'
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
request(:put, "/collections/#{@collection}", body)
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
# Store or update a vector with metadata payload.
|
|
49
|
-
#
|
|
50
|
-
# @param id [String] Unique identifier
|
|
51
|
-
# @param vector [Array<Float>] The embedding vector
|
|
52
|
-
# @param metadata [Hash] Optional payload metadata
|
|
53
|
-
# @see Interface#store
|
|
54
|
-
def store(id, vector, metadata = {})
|
|
55
|
-
body = {
|
|
56
|
-
points: [
|
|
57
|
-
{
|
|
58
|
-
id: id,
|
|
59
|
-
vector: vector,
|
|
60
|
-
payload: metadata
|
|
61
|
-
}
|
|
62
|
-
]
|
|
63
|
-
}
|
|
64
|
-
request(:put, "/collections/#{@collection}/points", body)
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
# Store multiple vectors in a single batch upsert request.
|
|
68
|
-
#
|
|
69
|
-
# Sends the entire entries array in one HTTP call. Callers are responsible
|
|
70
|
-
# for chunking into reasonable batch sizes (e.g., 100–500 points) before
|
|
71
|
-
# calling this method; the embedding Indexer's +batch_size+ config controls
|
|
72
|
-
# the upstream chunk size.
|
|
73
|
-
#
|
|
74
|
-
# @param entries [Array<Hash>] Each entry has :id, :vector, :metadata keys
|
|
75
|
-
def store_batch(entries)
|
|
76
|
-
return if entries.empty?
|
|
77
|
-
|
|
78
|
-
body = {
|
|
79
|
-
points: entries.map do |entry|
|
|
80
|
-
{ id: entry[:id], vector: entry[:vector], payload: entry[:metadata] || {} }
|
|
81
|
-
end
|
|
82
|
-
}
|
|
83
|
-
request(:put, "/collections/#{@collection}/points", body)
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
# Search for similar vectors.
|
|
87
|
-
#
|
|
88
|
-
# @param query_vector [Array<Float>] The query embedding
|
|
89
|
-
# @param limit [Integer] Maximum results to return
|
|
90
|
-
# @param filters [Hash] Metadata key-value filters
|
|
91
|
-
# @return [Array<SearchResult>] Results sorted by descending similarity
|
|
92
|
-
# @see Interface#search
|
|
93
|
-
def search(query_vector, limit: 10, filters: {})
|
|
94
|
-
body = {
|
|
95
|
-
vector: query_vector,
|
|
96
|
-
limit: limit,
|
|
97
|
-
with_payload: true
|
|
98
|
-
}
|
|
99
|
-
body[:filter] = build_filter(filters) unless filters.empty?
|
|
100
|
-
|
|
101
|
-
response = request(:post, "/collections/#{@collection}/points/search", body)
|
|
102
|
-
results = response['result'] || []
|
|
103
|
-
|
|
104
|
-
results.map do |hit|
|
|
105
|
-
SearchResult.new(
|
|
106
|
-
id: hit['id'],
|
|
107
|
-
score: hit['score'],
|
|
108
|
-
metadata: hit['payload']
|
|
109
|
-
)
|
|
110
|
-
end
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
# @see Interface#delete
|
|
114
|
-
def delete(id)
|
|
115
|
-
body = { points: [id] }
|
|
116
|
-
request(:post, "/collections/#{@collection}/points/delete", body)
|
|
117
|
-
end
|
|
118
|
-
|
|
119
|
-
# @see Interface#delete_by_filter
|
|
120
|
-
def delete_by_filter(filters)
|
|
121
|
-
body = { filter: build_filter(filters) }
|
|
122
|
-
request(:post, "/collections/#{@collection}/points/delete", body)
|
|
123
|
-
end
|
|
124
|
-
|
|
125
|
-
# @see Interface#count
|
|
126
|
-
def count
|
|
127
|
-
response = request(:post, "/collections/#{@collection}/points/count", { exact: true })
|
|
128
|
-
response['result']['count']
|
|
129
|
-
end
|
|
130
|
-
|
|
131
|
-
private
|
|
132
|
-
|
|
133
|
-
# Build a Qdrant filter from metadata key-value pairs.
|
|
134
|
-
#
|
|
135
|
-
# @param filters [Hash] Metadata filters
|
|
136
|
-
# @return [Hash] Qdrant-compatible filter with must conditions
|
|
137
|
-
def build_filter(filters)
|
|
138
|
-
conditions = filters.map do |key, value|
|
|
139
|
-
{ key: key.to_s, match: { value: value } }
|
|
140
|
-
end
|
|
141
|
-
{ must: conditions }
|
|
142
|
-
end
|
|
143
|
-
|
|
144
|
-
# Send an HTTP request to the Qdrant API.
|
|
145
|
-
#
|
|
146
|
-
# @param method [Symbol] HTTP method (:get, :post, :put, :delete)
|
|
147
|
-
# @param path [String] API path
|
|
148
|
-
# @param body [Hash, nil] Request body
|
|
149
|
-
# @return [Hash] Parsed JSON response
|
|
150
|
-
# @raise [CodebaseIndex::Error] if the API returns a non-success status
|
|
151
|
-
def request(method, path, body = nil)
|
|
152
|
-
req = build_request(method, path, body)
|
|
153
|
-
response = http_client.request(req)
|
|
154
|
-
|
|
155
|
-
unless response.is_a?(Net::HTTPSuccess)
|
|
156
|
-
raise CodebaseIndex::Error, "Qdrant API error: #{response.code} #{response.body}"
|
|
157
|
-
end
|
|
158
|
-
|
|
159
|
-
JSON.parse(response.body)
|
|
160
|
-
rescue Errno::ECONNRESET, Net::OpenTimeout, IOError
|
|
161
|
-
# Connection dropped — reset and retry once
|
|
162
|
-
@http_client = nil
|
|
163
|
-
response = http_client.request(req)
|
|
164
|
-
unless response.is_a?(Net::HTTPSuccess)
|
|
165
|
-
raise CodebaseIndex::Error, "Qdrant API error: #{response.code} #{response.body}"
|
|
166
|
-
end
|
|
167
|
-
|
|
168
|
-
JSON.parse(response.body)
|
|
169
|
-
end
|
|
170
|
-
|
|
171
|
-
# Return a reusable, started HTTP client for the Qdrant server.
|
|
172
|
-
# Calling http.start opens a persistent TCP connection so
|
|
173
|
-
# keep_alive_timeout actually takes effect across requests.
|
|
174
|
-
#
|
|
175
|
-
# @return [Net::HTTP]
|
|
176
|
-
def http_client
|
|
177
|
-
return @http_client if @http_client&.started?
|
|
178
|
-
|
|
179
|
-
http = Net::HTTP.new(@uri.host, @uri.port)
|
|
180
|
-
http.use_ssl = @uri.scheme == 'https'
|
|
181
|
-
http.open_timeout = 10
|
|
182
|
-
http.read_timeout = 30
|
|
183
|
-
http.keep_alive_timeout = 30
|
|
184
|
-
http.start
|
|
185
|
-
@http_client = http
|
|
186
|
-
end
|
|
187
|
-
|
|
188
|
-
# Build an HTTP request with headers and body.
|
|
189
|
-
#
|
|
190
|
-
# @param method [Symbol] HTTP method
|
|
191
|
-
# @param path [String] API path
|
|
192
|
-
# @param body [Hash, nil] Request body
|
|
193
|
-
# @return [Net::HTTPRequest]
|
|
194
|
-
def build_request(method, path, body)
|
|
195
|
-
request_class = { get: Net::HTTP::Get, post: Net::HTTP::Post,
|
|
196
|
-
put: Net::HTTP::Put, delete: Net::HTTP::Delete }.fetch(method)
|
|
197
|
-
req = request_class.new(path, 'Content-Type' => 'application/json')
|
|
198
|
-
req['api-key'] = @api_key if @api_key
|
|
199
|
-
req.body = body.to_json if body
|
|
200
|
-
req
|
|
201
|
-
end
|
|
202
|
-
end
|
|
203
|
-
end
|
|
204
|
-
end
|
|
205
|
-
end
|