codebase_index 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/exe/codebase-console-mcp +22 -0
- data/exe/codebase-index-mcp +61 -0
- data/exe/codebase-index-mcp-http +64 -0
- data/exe/codebase-index-mcp-start +58 -0
- data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
- data/lib/codebase_index/ast/method_extractor.rb +76 -0
- data/lib/codebase_index/ast/node.rb +88 -0
- data/lib/codebase_index/ast/parser.rb +653 -0
- data/lib/codebase_index/ast.rb +6 -0
- data/lib/codebase_index/builder.rb +137 -0
- data/lib/codebase_index/chunking/chunk.rb +84 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
- data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
- data/lib/codebase_index/console/audit_logger.rb +75 -0
- data/lib/codebase_index/console/bridge.rb +170 -0
- data/lib/codebase_index/console/confirmation.rb +90 -0
- data/lib/codebase_index/console/connection_manager.rb +173 -0
- data/lib/codebase_index/console/console_response_renderer.rb +78 -0
- data/lib/codebase_index/console/model_validator.rb +81 -0
- data/lib/codebase_index/console/safe_context.rb +82 -0
- data/lib/codebase_index/console/server.rb +557 -0
- data/lib/codebase_index/console/sql_validator.rb +172 -0
- data/lib/codebase_index/console/tools/tier1.rb +118 -0
- data/lib/codebase_index/console/tools/tier2.rb +117 -0
- data/lib/codebase_index/console/tools/tier3.rb +110 -0
- data/lib/codebase_index/console/tools/tier4.rb +79 -0
- data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
- data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
- data/lib/codebase_index/cost_model/estimator.rb +128 -0
- data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
- data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
- data/lib/codebase_index/cost_model.rb +22 -0
- data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
- data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/codebase_index/db/migrator.rb +71 -0
- data/lib/codebase_index/db/schema_version.rb +73 -0
- data/lib/codebase_index/dependency_graph.rb +227 -0
- data/lib/codebase_index/embedding/indexer.rb +130 -0
- data/lib/codebase_index/embedding/openai.rb +105 -0
- data/lib/codebase_index/embedding/provider.rb +135 -0
- data/lib/codebase_index/embedding/text_preparer.rb +112 -0
- data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
- data/lib/codebase_index/evaluation/evaluator.rb +146 -0
- data/lib/codebase_index/evaluation/metrics.rb +79 -0
- data/lib/codebase_index/evaluation/query_set.rb +148 -0
- data/lib/codebase_index/evaluation/report_generator.rb +90 -0
- data/lib/codebase_index/extracted_unit.rb +145 -0
- data/lib/codebase_index/extractor.rb +956 -0
- data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
- data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
- data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
- data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
- data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
- data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
- data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
- data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
- data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
- data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
- data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
- data/lib/codebase_index/extractors/event_extractor.rb +211 -0
- data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
- data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
- data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
- data/lib/codebase_index/extractors/job_extractor.rb +369 -0
- data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
- data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
- data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
- data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
- data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
- data/lib/codebase_index/extractors/model_extractor.rb +960 -0
- data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
- data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
- data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
- data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
- data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
- data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
- data/lib/codebase_index/extractors/route_extractor.rb +181 -0
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
- data/lib/codebase_index/extractors/service_extractor.rb +254 -0
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
- data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
- data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
- data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
- data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
- data/lib/codebase_index/feedback/gap_detector.rb +89 -0
- data/lib/codebase_index/feedback/store.rb +119 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/codebase_index/flow_assembler.rb +290 -0
- data/lib/codebase_index/flow_document.rb +191 -0
- data/lib/codebase_index/flow_precomputer.rb +102 -0
- data/lib/codebase_index/formatting/base.rb +40 -0
- data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
- data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
- data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
- data/lib/codebase_index/formatting/human_adapter.rb +78 -0
- data/lib/codebase_index/graph_analyzer.rb +374 -0
- data/lib/codebase_index/mcp/index_reader.rb +394 -0
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/codebase_index/mcp/server.rb +935 -0
- data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
- data/lib/codebase_index/model_name_cache.rb +51 -0
- data/lib/codebase_index/notion/client.rb +217 -0
- data/lib/codebase_index/notion/exporter.rb +219 -0
- data/lib/codebase_index/notion/mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
- data/lib/codebase_index/notion/rate_limiter.rb +68 -0
- data/lib/codebase_index/observability/health_check.rb +81 -0
- data/lib/codebase_index/observability/instrumentation.rb +34 -0
- data/lib/codebase_index/observability/structured_logger.rb +75 -0
- data/lib/codebase_index/operator/error_escalator.rb +81 -0
- data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
- data/lib/codebase_index/operator/status_reporter.rb +80 -0
- data/lib/codebase_index/railtie.rb +26 -0
- data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
- data/lib/codebase_index/resilience/index_validator.rb +185 -0
- data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
- data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
- data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
- data/lib/codebase_index/retrieval/ranker.rb +273 -0
- data/lib/codebase_index/retrieval/search_executor.rb +327 -0
- data/lib/codebase_index/retriever.rb +160 -0
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
- data/lib/codebase_index/ruby_analyzer.rb +87 -0
- data/lib/codebase_index/session_tracer/file_store.rb +111 -0
- data/lib/codebase_index/session_tracer/middleware.rb +143 -0
- data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
- data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
- data/lib/codebase_index/session_tracer/store.rb +67 -0
- data/lib/codebase_index/storage/graph_store.rb +120 -0
- data/lib/codebase_index/storage/metadata_store.rb +169 -0
- data/lib/codebase_index/storage/pgvector.rb +163 -0
- data/lib/codebase_index/storage/qdrant.rb +172 -0
- data/lib/codebase_index/storage/vector_store.rb +156 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
- data/lib/codebase_index/version.rb +5 -0
- data/lib/codebase_index.rb +223 -0
- data/lib/generators/codebase_index/install_generator.rb +32 -0
- data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
- data/lib/tasks/codebase_index.rake +583 -0
- data/lib/tasks/codebase_index_evaluation.rake +115 -0
- metadata +252 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../dependency_graph'
|
|
4
|
+
|
|
5
|
+
module CodebaseIndex
|
|
6
|
+
module Storage
|
|
7
|
+
# GraphStore provides an interface for querying code unit relationships.
|
|
8
|
+
#
|
|
9
|
+
# All graph store adapters must include the {Interface} module and implement
|
|
10
|
+
# its methods. The {Memory} adapter wraps the existing {DependencyGraph}.
|
|
11
|
+
#
|
|
12
|
+
# @example Using the memory adapter
|
|
13
|
+
# store = CodebaseIndex::Storage::GraphStore::Memory.new
|
|
14
|
+
# store.register(unit)
|
|
15
|
+
# store.dependencies_of("User")
|
|
16
|
+
#
|
|
17
|
+
module GraphStore
|
|
18
|
+
# Interface that all graph store adapters must implement.
|
|
19
|
+
module Interface
|
|
20
|
+
# Get direct dependencies of a unit.
|
|
21
|
+
#
|
|
22
|
+
# @param identifier [String] Unit identifier
|
|
23
|
+
# @return [Array<String>] List of dependency identifiers
|
|
24
|
+
# @raise [NotImplementedError] if not implemented by adapter
|
|
25
|
+
def dependencies_of(identifier)
|
|
26
|
+
raise NotImplementedError
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Get direct dependents of a unit (reverse dependencies).
|
|
30
|
+
#
|
|
31
|
+
# @param identifier [String] Unit identifier
|
|
32
|
+
# @return [Array<String>] List of dependent identifiers
|
|
33
|
+
# @raise [NotImplementedError] if not implemented by adapter
|
|
34
|
+
def dependents_of(identifier)
|
|
35
|
+
raise NotImplementedError
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Find all units transitively affected by changes to the given files.
|
|
39
|
+
#
|
|
40
|
+
# @param changed_files [Array<String>] List of changed file paths
|
|
41
|
+
# @param max_depth [Integer, nil] Maximum traversal depth (nil for unlimited)
|
|
42
|
+
# @return [Array<String>] List of affected unit identifiers
|
|
43
|
+
# @raise [NotImplementedError] if not implemented by adapter
|
|
44
|
+
def affected_by(changed_files, max_depth: nil)
|
|
45
|
+
raise NotImplementedError
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Get all units of a specific type.
|
|
49
|
+
#
|
|
50
|
+
# @param type [Symbol] Unit type (:model, :controller, etc.)
|
|
51
|
+
# @return [Array<String>] List of unit identifiers
|
|
52
|
+
# @raise [NotImplementedError] if not implemented by adapter
|
|
53
|
+
def by_type(type)
|
|
54
|
+
raise NotImplementedError
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Compute PageRank importance scores for all units.
|
|
58
|
+
#
|
|
59
|
+
# @param damping [Float] Damping factor (default: 0.85)
|
|
60
|
+
# @param iterations [Integer] Number of iterations (default: 20)
|
|
61
|
+
# @return [Hash<String, Float>] Identifier => PageRank score
|
|
62
|
+
# @raise [NotImplementedError] if not implemented by adapter
|
|
63
|
+
def pagerank(damping: 0.85, iterations: 20)
|
|
64
|
+
raise NotImplementedError
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# In-memory graph store wrapping the existing DependencyGraph.
|
|
69
|
+
#
|
|
70
|
+
# Delegates all operations to {CodebaseIndex::DependencyGraph}, providing
|
|
71
|
+
# a consistent storage interface.
|
|
72
|
+
#
|
|
73
|
+
# @example
|
|
74
|
+
# store = Memory.new
|
|
75
|
+
# store.register(user_unit)
|
|
76
|
+
# store.dependencies_of("User") # => ["Organization"]
|
|
77
|
+
#
|
|
78
|
+
class Memory
|
|
79
|
+
include Interface
|
|
80
|
+
|
|
81
|
+
# @param graph [DependencyGraph, nil] Existing graph to wrap, or nil to create a new one
|
|
82
|
+
def initialize(graph = nil)
|
|
83
|
+
@graph = graph || DependencyGraph.new
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Register a unit in the graph.
|
|
87
|
+
#
|
|
88
|
+
# @param unit [ExtractedUnit] The unit to register
|
|
89
|
+
def register(unit)
|
|
90
|
+
@graph.register(unit)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# @see Interface#dependencies_of
|
|
94
|
+
def dependencies_of(identifier)
|
|
95
|
+
@graph.dependencies_of(identifier)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# @see Interface#dependents_of
|
|
99
|
+
def dependents_of(identifier)
|
|
100
|
+
@graph.dependents_of(identifier)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# @see Interface#affected_by
|
|
104
|
+
def affected_by(changed_files, max_depth: nil)
|
|
105
|
+
@graph.affected_by(changed_files, max_depth: max_depth)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# @see Interface#by_type
|
|
109
|
+
def by_type(type)
|
|
110
|
+
@graph.units_of_type(type)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# @see Interface#pagerank
|
|
114
|
+
def pagerank(damping: 0.85, iterations: 20)
|
|
115
|
+
@graph.pagerank(damping: damping, iterations: iterations)
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
|
|
5
|
+
module CodebaseIndex
|
|
6
|
+
module Storage
|
|
7
|
+
# MetadataStore provides an interface for storing and querying unit metadata.
|
|
8
|
+
#
|
|
9
|
+
# All metadata store adapters must include the {Interface} module and implement
|
|
10
|
+
# its methods. The {SQLite} adapter is provided for local persistence.
|
|
11
|
+
#
|
|
12
|
+
# @example Using the SQLite adapter
|
|
13
|
+
# store = CodebaseIndex::Storage::MetadataStore::SQLite.new(":memory:")
|
|
14
|
+
# store.store("User", { type: "model", file_path: "app/models/user.rb" })
|
|
15
|
+
# store.find("User")
|
|
16
|
+
#
|
|
17
|
+
module MetadataStore
|
|
18
|
+
# Interface that all metadata store adapters must implement.
|
|
19
|
+
module Interface
|
|
20
|
+
# Store or update metadata for a unit.
|
|
21
|
+
#
|
|
22
|
+
# @param id [String] Unique identifier for the unit
|
|
23
|
+
# @param metadata [Hash] Metadata to store
|
|
24
|
+
# @raise [NotImplementedError] if not implemented by adapter
|
|
25
|
+
def store(id, metadata)
|
|
26
|
+
raise NotImplementedError
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Find a unit by ID.
|
|
30
|
+
#
|
|
31
|
+
# @param id [String] The identifier to look up
|
|
32
|
+
# @return [Hash, nil] The stored metadata, or nil if not found
|
|
33
|
+
# @raise [NotImplementedError] if not implemented by adapter
|
|
34
|
+
def find(id)
|
|
35
|
+
raise NotImplementedError
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Find all units of a given type.
|
|
39
|
+
#
|
|
40
|
+
# @param type [String] The unit type to filter by
|
|
41
|
+
# @return [Array<Hash>] Matching metadata records
|
|
42
|
+
# @raise [NotImplementedError] if not implemented by adapter
|
|
43
|
+
def find_by_type(type)
|
|
44
|
+
raise NotImplementedError
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Search metadata by text query across specified fields.
|
|
48
|
+
#
|
|
49
|
+
# @param query [String] Text to search for
|
|
50
|
+
# @param fields [Array<String>, nil] Specific fields to search (nil = all)
|
|
51
|
+
# @return [Array<Hash>] Matching metadata records
|
|
52
|
+
# @raise [NotImplementedError] if not implemented by adapter
|
|
53
|
+
def search(query, fields: nil)
|
|
54
|
+
raise NotImplementedError
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Delete a unit by ID.
|
|
58
|
+
#
|
|
59
|
+
# @param id [String] The identifier to delete
|
|
60
|
+
# @raise [NotImplementedError] if not implemented by adapter
|
|
61
|
+
def delete(id)
|
|
62
|
+
raise NotImplementedError
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Return the total number of stored units.
|
|
66
|
+
#
|
|
67
|
+
# @return [Integer] Total count
|
|
68
|
+
# @raise [NotImplementedError] if not implemented by adapter
|
|
69
|
+
def count
|
|
70
|
+
raise NotImplementedError
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# SQLite-backed metadata store using the JSON1 extension.
|
|
75
|
+
#
|
|
76
|
+
# Stores unit metadata as JSON in a single table with type indexing
|
|
77
|
+
# for efficient filtering. Uses upsert semantics for store operations.
|
|
78
|
+
#
|
|
79
|
+
# @example
|
|
80
|
+
# store = SQLite.new(":memory:")
|
|
81
|
+
# store.store("User", { type: "model", namespace: "Admin" })
|
|
82
|
+
# store.find("User") # => { "type" => "model", "namespace" => "Admin" }
|
|
83
|
+
#
|
|
84
|
+
class SQLite
|
|
85
|
+
include Interface
|
|
86
|
+
|
|
87
|
+
# @param db_path [String] Path to the SQLite database file, or ":memory:" for in-memory
|
|
88
|
+
def initialize(db_path = ':memory:')
|
|
89
|
+
require 'sqlite3'
|
|
90
|
+
@db = ::SQLite3::Database.new(db_path)
|
|
91
|
+
@db.results_as_hash = true
|
|
92
|
+
create_table
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# @see Interface#store
|
|
96
|
+
def store(id, metadata)
|
|
97
|
+
type = metadata[:type] || metadata['type']
|
|
98
|
+
data = JSON.generate(metadata)
|
|
99
|
+
|
|
100
|
+
@db.execute(<<~SQL, [id, type.to_s, data, Time.now.iso8601])
|
|
101
|
+
INSERT INTO units (id, type, data, updated_at) VALUES (?, ?, ?, ?)
|
|
102
|
+
ON CONFLICT(id) DO UPDATE SET
|
|
103
|
+
type = excluded.type, data = excluded.data, updated_at = excluded.updated_at
|
|
104
|
+
SQL
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# @see Interface#find
|
|
108
|
+
def find(id)
|
|
109
|
+
row = @db.get_first_row('SELECT data FROM units WHERE id = ?', [id])
|
|
110
|
+
return nil unless row
|
|
111
|
+
|
|
112
|
+
JSON.parse(row['data'])
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# @see Interface#find_by_type
|
|
116
|
+
def find_by_type(type)
|
|
117
|
+
rows = @db.execute('SELECT id, data FROM units WHERE type = ?', [type.to_s])
|
|
118
|
+
rows.map do |row|
|
|
119
|
+
parsed = JSON.parse(row['data'])
|
|
120
|
+
parsed['id'] = row['id']
|
|
121
|
+
parsed
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# @see Interface#search
|
|
126
|
+
def search(query, fields: nil)
|
|
127
|
+
if fields
|
|
128
|
+
conditions = fields.map { "json_extract(data, '$.#{_1}') LIKE ?" }.join(' OR ')
|
|
129
|
+
params = fields.map { "%#{query}%" }
|
|
130
|
+
rows = @db.execute("SELECT id, data FROM units WHERE #{conditions}", params)
|
|
131
|
+
else
|
|
132
|
+
rows = @db.execute('SELECT id, data FROM units WHERE data LIKE ?', ["%#{query}%"])
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
rows.map do |row|
|
|
136
|
+
parsed = JSON.parse(row['data'])
|
|
137
|
+
parsed['id'] = row['id']
|
|
138
|
+
parsed
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# @see Interface#delete
|
|
143
|
+
def delete(id)
|
|
144
|
+
@db.execute('DELETE FROM units WHERE id = ?', [id])
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# @see Interface#count
|
|
148
|
+
def count
|
|
149
|
+
@db.get_first_value('SELECT COUNT(*) FROM units')
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
private
|
|
153
|
+
|
|
154
|
+
# Create the units table if it doesn't exist.
|
|
155
|
+
def create_table
|
|
156
|
+
@db.execute(<<~SQL)
|
|
157
|
+
CREATE TABLE IF NOT EXISTS units (
|
|
158
|
+
id TEXT PRIMARY KEY,
|
|
159
|
+
type TEXT,
|
|
160
|
+
data JSON,
|
|
161
|
+
updated_at TEXT
|
|
162
|
+
)
|
|
163
|
+
SQL
|
|
164
|
+
@db.execute('CREATE INDEX IF NOT EXISTS idx_units_type ON units(type)')
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require_relative 'vector_store'
|
|
5
|
+
|
|
6
|
+
module CodebaseIndex
|
|
7
|
+
module Storage
|
|
8
|
+
module VectorStore
|
|
9
|
+
# PostgreSQL + pgvector adapter for vector storage and similarity search.
|
|
10
|
+
#
|
|
11
|
+
# Uses the pgvector extension for efficient approximate nearest neighbor
|
|
12
|
+
# search with HNSW indexing. Stores metadata as JSONB for flexible filtering.
|
|
13
|
+
#
|
|
14
|
+
# @example
|
|
15
|
+
# store = Pgvector.new(connection: ActiveRecord::Base.connection, dimensions: 768)
|
|
16
|
+
# store.ensure_schema!
|
|
17
|
+
# store.store("User", [0.1, 0.2, ...], { type: "model" })
|
|
18
|
+
# results = store.search([0.1, 0.2, ...], limit: 5, filters: { type: "model" })
|
|
19
|
+
#
|
|
20
|
+
class Pgvector
|
|
21
|
+
include Interface
|
|
22
|
+
|
|
23
|
+
TABLE = 'codebase_index_vectors'
|
|
24
|
+
|
|
25
|
+
# @param connection [Object] ActiveRecord database connection
|
|
26
|
+
# @param dimensions [Integer] Size of the embedding vectors
|
|
27
|
+
def initialize(connection:, dimensions:)
|
|
28
|
+
@connection = connection
|
|
29
|
+
@dimensions = dimensions
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Create the pgvector extension, vectors table, and HNSW index.
|
|
33
|
+
#
|
|
34
|
+
# Safe to call multiple times (uses IF NOT EXISTS).
|
|
35
|
+
def ensure_schema!
|
|
36
|
+
@connection.execute('CREATE EXTENSION IF NOT EXISTS vector')
|
|
37
|
+
@connection.execute(<<~SQL)
|
|
38
|
+
CREATE TABLE IF NOT EXISTS #{TABLE} (
|
|
39
|
+
id TEXT PRIMARY KEY,
|
|
40
|
+
embedding vector(#{@dimensions}),
|
|
41
|
+
metadata JSONB DEFAULT '{}',
|
|
42
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
43
|
+
)
|
|
44
|
+
SQL
|
|
45
|
+
@connection.execute(<<~SQL)
|
|
46
|
+
CREATE INDEX IF NOT EXISTS idx_#{TABLE}_embedding_hnsw
|
|
47
|
+
ON #{TABLE} USING hnsw (embedding vector_cosine_ops)
|
|
48
|
+
SQL
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Store or update a vector with metadata.
|
|
52
|
+
#
|
|
53
|
+
# @param id [String] Unique identifier
|
|
54
|
+
# @param vector [Array<Float>] The embedding vector
|
|
55
|
+
# @param metadata [Hash] Optional metadata
|
|
56
|
+
# @see Interface#store
|
|
57
|
+
def store(id, vector, metadata = {})
|
|
58
|
+
validate_vector!(vector)
|
|
59
|
+
quoted_id = @connection.quote(id)
|
|
60
|
+
quoted_metadata = @connection.quote(JSON.generate(metadata))
|
|
61
|
+
vector_literal = "[#{vector.join(',')}]"
|
|
62
|
+
|
|
63
|
+
@connection.execute(<<~SQL)
|
|
64
|
+
INSERT INTO #{TABLE} (id, embedding, metadata, created_at)
|
|
65
|
+
VALUES (#{quoted_id}, '#{vector_literal}', #{quoted_metadata}::jsonb, CURRENT_TIMESTAMP)
|
|
66
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
67
|
+
embedding = EXCLUDED.embedding,
|
|
68
|
+
metadata = EXCLUDED.metadata,
|
|
69
|
+
created_at = CURRENT_TIMESTAMP
|
|
70
|
+
SQL
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Search for similar vectors using cosine distance.
|
|
74
|
+
#
|
|
75
|
+
# @param query_vector [Array<Float>] The query embedding
|
|
76
|
+
# @param limit [Integer] Maximum results to return
|
|
77
|
+
# @param filters [Hash] Metadata key-value filters
|
|
78
|
+
# @return [Array<SearchResult>] Results sorted by descending similarity
|
|
79
|
+
# @see Interface#search
|
|
80
|
+
def search(query_vector, limit: 10, filters: {})
|
|
81
|
+
validate_vector!(query_vector)
|
|
82
|
+
vector_literal = "[#{query_vector.join(',')}]"
|
|
83
|
+
where_clause = build_where(filters)
|
|
84
|
+
|
|
85
|
+
sql = <<~SQL
|
|
86
|
+
SELECT id, embedding <=> '#{vector_literal}' AS distance, metadata
|
|
87
|
+
FROM #{TABLE}
|
|
88
|
+
#{where_clause}
|
|
89
|
+
ORDER BY distance ASC
|
|
90
|
+
LIMIT #{limit.to_i}
|
|
91
|
+
SQL
|
|
92
|
+
|
|
93
|
+
rows = @connection.execute(sql)
|
|
94
|
+
rows.map { |row| row_to_result(row) }
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# @see Interface#delete
|
|
98
|
+
def delete(id)
|
|
99
|
+
quoted_id = @connection.quote(id)
|
|
100
|
+
@connection.execute("DELETE FROM #{TABLE} WHERE id = #{quoted_id}")
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# @see Interface#delete_by_filter
|
|
104
|
+
def delete_by_filter(filters)
|
|
105
|
+
where_clause = build_where(filters)
|
|
106
|
+
@connection.execute("DELETE FROM #{TABLE} #{where_clause}")
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# @see Interface#count
|
|
110
|
+
def count
|
|
111
|
+
result = @connection.execute("SELECT COUNT(*) AS count FROM #{TABLE}")
|
|
112
|
+
result.first['count'].to_i
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
private
|
|
116
|
+
|
|
117
|
+
# Convert a database row to a SearchResult.
|
|
118
|
+
#
|
|
119
|
+
# @param row [Hash] Database row with id, distance, metadata
|
|
120
|
+
# @return [SearchResult]
|
|
121
|
+
def row_to_result(row)
|
|
122
|
+
metadata = row['metadata']
|
|
123
|
+
parsed_metadata = metadata.is_a?(String) ? JSON.parse(metadata) : metadata
|
|
124
|
+
SearchResult.new(
|
|
125
|
+
id: row['id'],
|
|
126
|
+
score: 1.0 - row['distance'].to_f,
|
|
127
|
+
metadata: parsed_metadata
|
|
128
|
+
)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Build a WHERE clause from metadata filters.
|
|
132
|
+
#
|
|
133
|
+
# @param filters [Hash] Metadata key-value pairs
|
|
134
|
+
# @return [String] SQL WHERE clause, or empty string if no filters
|
|
135
|
+
def build_where(filters)
|
|
136
|
+
return '' if filters.empty?
|
|
137
|
+
|
|
138
|
+
conditions = filters.map do |key, value|
|
|
139
|
+
key_s = key.to_s
|
|
140
|
+
unless key_s.match?(/\A[a-zA-Z_][a-zA-Z0-9_]*\z/)
|
|
141
|
+
raise ArgumentError, "Invalid filter key: #{key_s.inspect}"
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
"metadata->>'#{key_s}' = #{@connection.quote(value.to_s)}"
|
|
145
|
+
end
|
|
146
|
+
"WHERE #{conditions.join(' AND ')}"
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Validate that all vector elements are numeric.
|
|
150
|
+
#
|
|
151
|
+
# @param vector [Array] The vector to validate
|
|
152
|
+
# @raise [ArgumentError] if any element is not numeric
|
|
153
|
+
def validate_vector!(vector)
|
|
154
|
+
vector.each_with_index do |element, i|
|
|
155
|
+
unless element.is_a?(Numeric)
|
|
156
|
+
raise ArgumentError, "Vector element at index #{i} is not numeric: #{element.inspect}"
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'net/http'
|
|
4
|
+
require 'json'
|
|
5
|
+
require 'uri'
|
|
6
|
+
require_relative 'vector_store'
|
|
7
|
+
|
|
8
|
+
module CodebaseIndex
|
|
9
|
+
module Storage
|
|
10
|
+
module VectorStore
|
|
11
|
+
# Qdrant adapter for vector storage and similarity search via HTTP API.
|
|
12
|
+
#
|
|
13
|
+
# Communicates with a Qdrant instance over HTTP. Supports optional API key
|
|
14
|
+
# authentication for managed/cloud deployments.
|
|
15
|
+
#
|
|
16
|
+
# @example
|
|
17
|
+
# store = Qdrant.new(url: "http://localhost:6333", collection: "codebase")
|
|
18
|
+
# store.ensure_collection!(dimensions: 768)
|
|
19
|
+
# store.store("User", [0.1, 0.2, ...], { type: "model" })
|
|
20
|
+
# results = store.search([0.1, 0.2, ...], limit: 5)
|
|
21
|
+
#
|
|
22
|
+
class Qdrant
|
|
23
|
+
include Interface
|
|
24
|
+
|
|
25
|
+
# @param url [String] Qdrant server URL
|
|
26
|
+
# @param collection [String] Collection name
|
|
27
|
+
# @param api_key [String, nil] Optional API key for authentication
|
|
28
|
+
def initialize(url:, collection:, api_key: nil)
|
|
29
|
+
@url = url
|
|
30
|
+
@collection = collection
|
|
31
|
+
@api_key = api_key
|
|
32
|
+
@uri = URI(url)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Create the collection if it doesn't exist.
|
|
36
|
+
#
|
|
37
|
+
# @param dimensions [Integer] Vector dimensionality
|
|
38
|
+
def ensure_collection!(dimensions:)
|
|
39
|
+
body = {
|
|
40
|
+
vectors: {
|
|
41
|
+
size: dimensions,
|
|
42
|
+
distance: 'Cosine'
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
request(:put, "/collections/#{@collection}", body)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Store or update a vector with metadata payload.
|
|
49
|
+
#
|
|
50
|
+
# @param id [String] Unique identifier
|
|
51
|
+
# @param vector [Array<Float>] The embedding vector
|
|
52
|
+
# @param metadata [Hash] Optional payload metadata
|
|
53
|
+
# @see Interface#store
|
|
54
|
+
def store(id, vector, metadata = {})
|
|
55
|
+
body = {
|
|
56
|
+
points: [
|
|
57
|
+
{
|
|
58
|
+
id: id,
|
|
59
|
+
vector: vector,
|
|
60
|
+
payload: metadata
|
|
61
|
+
}
|
|
62
|
+
]
|
|
63
|
+
}
|
|
64
|
+
request(:put, "/collections/#{@collection}/points", body)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Search for similar vectors.
|
|
68
|
+
#
|
|
69
|
+
# @param query_vector [Array<Float>] The query embedding
|
|
70
|
+
# @param limit [Integer] Maximum results to return
|
|
71
|
+
# @param filters [Hash] Metadata key-value filters
|
|
72
|
+
# @return [Array<SearchResult>] Results sorted by descending similarity
|
|
73
|
+
# @see Interface#search
|
|
74
|
+
def search(query_vector, limit: 10, filters: {})
|
|
75
|
+
body = {
|
|
76
|
+
vector: query_vector,
|
|
77
|
+
limit: limit,
|
|
78
|
+
with_payload: true
|
|
79
|
+
}
|
|
80
|
+
body[:filter] = build_filter(filters) unless filters.empty?
|
|
81
|
+
|
|
82
|
+
response = request(:post, "/collections/#{@collection}/points/search", body)
|
|
83
|
+
results = response['result'] || []
|
|
84
|
+
|
|
85
|
+
results.map do |hit|
|
|
86
|
+
SearchResult.new(
|
|
87
|
+
id: hit['id'],
|
|
88
|
+
score: hit['score'],
|
|
89
|
+
metadata: hit['payload']
|
|
90
|
+
)
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# @see Interface#delete
|
|
95
|
+
def delete(id)
|
|
96
|
+
body = { points: [id] }
|
|
97
|
+
request(:post, "/collections/#{@collection}/points/delete", body)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# @see Interface#delete_by_filter
|
|
101
|
+
def delete_by_filter(filters)
|
|
102
|
+
body = { filter: build_filter(filters) }
|
|
103
|
+
request(:post, "/collections/#{@collection}/points/delete", body)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# @see Interface#count
|
|
107
|
+
def count
|
|
108
|
+
response = request(:post, "/collections/#{@collection}/points/count", { exact: true })
|
|
109
|
+
response['result']['count']
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
private
|
|
113
|
+
|
|
114
|
+
# Build a Qdrant filter from metadata key-value pairs.
|
|
115
|
+
#
|
|
116
|
+
# @param filters [Hash] Metadata filters
|
|
117
|
+
# @return [Hash] Qdrant-compatible filter with must conditions
|
|
118
|
+
def build_filter(filters)
|
|
119
|
+
conditions = filters.map do |key, value|
|
|
120
|
+
{ key: key.to_s, match: { value: value } }
|
|
121
|
+
end
|
|
122
|
+
{ must: conditions }
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Send an HTTP request to the Qdrant API.
|
|
126
|
+
#
|
|
127
|
+
# @param method [Symbol] HTTP method (:get, :post, :put, :delete)
|
|
128
|
+
# @param path [String] API path
|
|
129
|
+
# @param body [Hash, nil] Request body
|
|
130
|
+
# @return [Hash] Parsed JSON response
|
|
131
|
+
# @raise [CodebaseIndex::Error] if the API returns a non-success status
|
|
132
|
+
def request(method, path, body = nil)
|
|
133
|
+
http = build_http
|
|
134
|
+
req = build_request(method, path, body)
|
|
135
|
+
response = http.request(req)
|
|
136
|
+
|
|
137
|
+
unless response.is_a?(Net::HTTPSuccess)
|
|
138
|
+
raise CodebaseIndex::Error, "Qdrant API error: #{response.code} #{response.body}"
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
JSON.parse(response.body)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Build an HTTP client for the Qdrant server.
|
|
145
|
+
#
|
|
146
|
+
# @return [Net::HTTP]
|
|
147
|
+
def build_http
|
|
148
|
+
http = Net::HTTP.new(@uri.host, @uri.port)
|
|
149
|
+
http.use_ssl = @uri.scheme == 'https'
|
|
150
|
+
http.open_timeout = 10
|
|
151
|
+
http.read_timeout = 30
|
|
152
|
+
http
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Build an HTTP request with headers and body.
|
|
156
|
+
#
|
|
157
|
+
# @param method [Symbol] HTTP method
|
|
158
|
+
# @param path [String] API path
|
|
159
|
+
# @param body [Hash, nil] Request body
|
|
160
|
+
# @return [Net::HTTPRequest]
|
|
161
|
+
def build_request(method, path, body)
|
|
162
|
+
request_class = { get: Net::HTTP::Get, post: Net::HTTP::Post,
|
|
163
|
+
put: Net::HTTP::Put, delete: Net::HTTP::Delete }.fetch(method)
|
|
164
|
+
req = request_class.new(path, 'Content-Type' => 'application/json')
|
|
165
|
+
req['api-key'] = @api_key if @api_key
|
|
166
|
+
req.body = body.to_json if body
|
|
167
|
+
req
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
end
|