codebase_index 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/exe/codebase-console-mcp +22 -0
- data/exe/codebase-index-mcp +61 -0
- data/exe/codebase-index-mcp-http +64 -0
- data/exe/codebase-index-mcp-start +58 -0
- data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
- data/lib/codebase_index/ast/method_extractor.rb +76 -0
- data/lib/codebase_index/ast/node.rb +88 -0
- data/lib/codebase_index/ast/parser.rb +653 -0
- data/lib/codebase_index/ast.rb +6 -0
- data/lib/codebase_index/builder.rb +137 -0
- data/lib/codebase_index/chunking/chunk.rb +84 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
- data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
- data/lib/codebase_index/console/audit_logger.rb +75 -0
- data/lib/codebase_index/console/bridge.rb +170 -0
- data/lib/codebase_index/console/confirmation.rb +90 -0
- data/lib/codebase_index/console/connection_manager.rb +173 -0
- data/lib/codebase_index/console/console_response_renderer.rb +78 -0
- data/lib/codebase_index/console/model_validator.rb +81 -0
- data/lib/codebase_index/console/safe_context.rb +82 -0
- data/lib/codebase_index/console/server.rb +557 -0
- data/lib/codebase_index/console/sql_validator.rb +172 -0
- data/lib/codebase_index/console/tools/tier1.rb +118 -0
- data/lib/codebase_index/console/tools/tier2.rb +117 -0
- data/lib/codebase_index/console/tools/tier3.rb +110 -0
- data/lib/codebase_index/console/tools/tier4.rb +79 -0
- data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
- data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
- data/lib/codebase_index/cost_model/estimator.rb +128 -0
- data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
- data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
- data/lib/codebase_index/cost_model.rb +22 -0
- data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
- data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/codebase_index/db/migrator.rb +71 -0
- data/lib/codebase_index/db/schema_version.rb +73 -0
- data/lib/codebase_index/dependency_graph.rb +227 -0
- data/lib/codebase_index/embedding/indexer.rb +130 -0
- data/lib/codebase_index/embedding/openai.rb +105 -0
- data/lib/codebase_index/embedding/provider.rb +135 -0
- data/lib/codebase_index/embedding/text_preparer.rb +112 -0
- data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
- data/lib/codebase_index/evaluation/evaluator.rb +146 -0
- data/lib/codebase_index/evaluation/metrics.rb +79 -0
- data/lib/codebase_index/evaluation/query_set.rb +148 -0
- data/lib/codebase_index/evaluation/report_generator.rb +90 -0
- data/lib/codebase_index/extracted_unit.rb +145 -0
- data/lib/codebase_index/extractor.rb +956 -0
- data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
- data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
- data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
- data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
- data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
- data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
- data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
- data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
- data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
- data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
- data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
- data/lib/codebase_index/extractors/event_extractor.rb +211 -0
- data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
- data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
- data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
- data/lib/codebase_index/extractors/job_extractor.rb +369 -0
- data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
- data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
- data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
- data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
- data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
- data/lib/codebase_index/extractors/model_extractor.rb +960 -0
- data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
- data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
- data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
- data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
- data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
- data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
- data/lib/codebase_index/extractors/route_extractor.rb +181 -0
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
- data/lib/codebase_index/extractors/service_extractor.rb +254 -0
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
- data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
- data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
- data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
- data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
- data/lib/codebase_index/feedback/gap_detector.rb +89 -0
- data/lib/codebase_index/feedback/store.rb +119 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/codebase_index/flow_assembler.rb +290 -0
- data/lib/codebase_index/flow_document.rb +191 -0
- data/lib/codebase_index/flow_precomputer.rb +102 -0
- data/lib/codebase_index/formatting/base.rb +40 -0
- data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
- data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
- data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
- data/lib/codebase_index/formatting/human_adapter.rb +78 -0
- data/lib/codebase_index/graph_analyzer.rb +374 -0
- data/lib/codebase_index/mcp/index_reader.rb +394 -0
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/codebase_index/mcp/server.rb +935 -0
- data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
- data/lib/codebase_index/model_name_cache.rb +51 -0
- data/lib/codebase_index/notion/client.rb +217 -0
- data/lib/codebase_index/notion/exporter.rb +219 -0
- data/lib/codebase_index/notion/mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
- data/lib/codebase_index/notion/rate_limiter.rb +68 -0
- data/lib/codebase_index/observability/health_check.rb +81 -0
- data/lib/codebase_index/observability/instrumentation.rb +34 -0
- data/lib/codebase_index/observability/structured_logger.rb +75 -0
- data/lib/codebase_index/operator/error_escalator.rb +81 -0
- data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
- data/lib/codebase_index/operator/status_reporter.rb +80 -0
- data/lib/codebase_index/railtie.rb +26 -0
- data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
- data/lib/codebase_index/resilience/index_validator.rb +185 -0
- data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
- data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
- data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
- data/lib/codebase_index/retrieval/ranker.rb +273 -0
- data/lib/codebase_index/retrieval/search_executor.rb +327 -0
- data/lib/codebase_index/retriever.rb +160 -0
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
- data/lib/codebase_index/ruby_analyzer.rb +87 -0
- data/lib/codebase_index/session_tracer/file_store.rb +111 -0
- data/lib/codebase_index/session_tracer/middleware.rb +143 -0
- data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
- data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
- data/lib/codebase_index/session_tracer/store.rb +67 -0
- data/lib/codebase_index/storage/graph_store.rb +120 -0
- data/lib/codebase_index/storage/metadata_store.rb +169 -0
- data/lib/codebase_index/storage/pgvector.rb +163 -0
- data/lib/codebase_index/storage/qdrant.rb +172 -0
- data/lib/codebase_index/storage/vector_store.rb +156 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
- data/lib/codebase_index/version.rb +5 -0
- data/lib/codebase_index.rb +223 -0
- data/lib/generators/codebase_index/install_generator.rb +32 -0
- data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
- data/lib/tasks/codebase_index.rake +583 -0
- data/lib/tasks/codebase_index_evaluation.rake +115 -0
- metadata +252 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module CodebaseIndex
|
|
4
|
+
module Storage
|
|
5
|
+
# VectorStore provides an interface for storing and searching embedding vectors.
|
|
6
|
+
#
|
|
7
|
+
# All vector store adapters must include the {Interface} module and implement
|
|
8
|
+
# its methods. The {InMemory} adapter is provided for development and testing.
|
|
9
|
+
#
|
|
10
|
+
# @example Using the in-memory adapter
|
|
11
|
+
# store = CodebaseIndex::Storage::VectorStore::InMemory.new
|
|
12
|
+
# store.store("User", [0.1, 0.2, 0.3], { type: "model" })
|
|
13
|
+
# results = store.search([0.1, 0.2, 0.3], limit: 5)
|
|
14
|
+
#
|
|
15
|
+
module VectorStore
|
|
16
|
+
# Interface that all vector store adapters must implement.
|
|
17
|
+
module Interface
|
|
18
|
+
# Store a vector with associated metadata.
|
|
19
|
+
#
|
|
20
|
+
# @param id [String] Unique identifier for the vector
|
|
21
|
+
# @param vector [Array<Float>] The embedding vector
|
|
22
|
+
# @param metadata [Hash] Optional metadata to store alongside the vector
|
|
23
|
+
# @raise [NotImplementedError] if not implemented by adapter
|
|
24
|
+
def store(id, vector, metadata = {})
|
|
25
|
+
raise NotImplementedError
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Search for similar vectors using cosine similarity.
|
|
29
|
+
#
|
|
30
|
+
# @param query_vector [Array<Float>] The query embedding vector
|
|
31
|
+
# @param limit [Integer] Maximum number of results to return
|
|
32
|
+
# @param filters [Hash] Optional metadata filters to apply
|
|
33
|
+
# @return [Array<SearchResult>] Results sorted by descending similarity
|
|
34
|
+
# @raise [NotImplementedError] if not implemented by adapter
|
|
35
|
+
def search(query_vector, limit: 10, filters: {})
|
|
36
|
+
raise NotImplementedError
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Delete a vector by ID.
|
|
40
|
+
#
|
|
41
|
+
# @param id [String] The identifier to delete
|
|
42
|
+
# @raise [NotImplementedError] if not implemented by adapter
|
|
43
|
+
def delete(id)
|
|
44
|
+
raise NotImplementedError
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Delete vectors matching metadata filters.
|
|
48
|
+
#
|
|
49
|
+
# @param filters [Hash] Metadata key-value pairs to match
|
|
50
|
+
# @raise [NotImplementedError] if not implemented by adapter
|
|
51
|
+
def delete_by_filter(filters)
|
|
52
|
+
raise NotImplementedError
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Return the number of stored vectors.
|
|
56
|
+
#
|
|
57
|
+
# @return [Integer] Total count
|
|
58
|
+
# @raise [NotImplementedError] if not implemented by adapter
|
|
59
|
+
def count
|
|
60
|
+
raise NotImplementedError
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Value object representing a single search result.
|
|
65
|
+
SearchResult = Struct.new(:id, :score, :metadata, keyword_init: true)
|
|
66
|
+
|
|
67
|
+
# In-memory vector store using hash storage and cosine similarity.
|
|
68
|
+
#
|
|
69
|
+
# Suitable for development and testing. Not intended for production use
|
|
70
|
+
# with large datasets.
|
|
71
|
+
#
|
|
72
|
+
# @example
|
|
73
|
+
# store = InMemory.new
|
|
74
|
+
# store.store("doc1", [1.0, 0.0], { type: "model" })
|
|
75
|
+
# store.store("doc2", [0.0, 1.0], { type: "service" })
|
|
76
|
+
# store.search([1.0, 0.0], limit: 1)
|
|
77
|
+
# # => [#<SearchResult id="doc1", score=1.0, metadata={type: "model"}>]
|
|
78
|
+
#
|
|
79
|
+
class InMemory
|
|
80
|
+
include Interface
|
|
81
|
+
|
|
82
|
+
def initialize
|
|
83
|
+
@entries = {} # id => { vector:, metadata: }
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# @see Interface#store
|
|
87
|
+
def store(id, vector, metadata = {})
|
|
88
|
+
@entries[id] = { vector: vector, metadata: metadata }
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# @see Interface#search
|
|
92
|
+
def search(query_vector, limit: 10, filters: {})
|
|
93
|
+
candidates = filter_entries(filters)
|
|
94
|
+
|
|
95
|
+
scored = candidates.map do |id, entry|
|
|
96
|
+
score = cosine_similarity(query_vector, entry[:vector])
|
|
97
|
+
SearchResult.new(id: id, score: score, metadata: entry[:metadata])
|
|
98
|
+
end
|
|
99
|
+
scored.sort_by { |r| -r.score }.first(limit)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# @see Interface#delete
|
|
103
|
+
def delete(id)
|
|
104
|
+
@entries.delete(id)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# @see Interface#delete_by_filter
|
|
108
|
+
def delete_by_filter(filters)
|
|
109
|
+
@entries.reject! do |_id, entry|
|
|
110
|
+
filters.all? { |key, value| entry[:metadata][key] == value }
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# @see Interface#count
|
|
115
|
+
def count
|
|
116
|
+
@entries.size
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
private
|
|
120
|
+
|
|
121
|
+
# Filter entries by metadata key-value pairs.
|
|
122
|
+
#
|
|
123
|
+
# @param filters [Hash] Metadata filters
|
|
124
|
+
# @return [Hash] Filtered entries
|
|
125
|
+
def filter_entries(filters)
|
|
126
|
+
return @entries if filters.empty?
|
|
127
|
+
|
|
128
|
+
@entries.select do |_id, entry|
|
|
129
|
+
filters.all? { |key, value| entry[:metadata][key] == value }
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Compute cosine similarity between two vectors.
|
|
134
|
+
#
|
|
135
|
+
# @param vec_a [Array<Float>] First vector
|
|
136
|
+
# @param vec_b [Array<Float>] Second vector
|
|
137
|
+
# @return [Float] Cosine similarity between -1.0 and 1.0
|
|
138
|
+
# @raise [ArgumentError] if vectors have different dimensions
|
|
139
|
+
def cosine_similarity(vec_a, vec_b)
|
|
140
|
+
unless vec_a.length == vec_b.length
|
|
141
|
+
raise ArgumentError,
|
|
142
|
+
"Vector dimension mismatch (#{vec_a.length} vs #{vec_b.length})"
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
dot = vec_a.zip(vec_b).sum { |x, y| x * y }
|
|
146
|
+
mag_a = Math.sqrt(vec_a.sum { |x| x**2 })
|
|
147
|
+
mag_b = Math.sqrt(vec_b.sum { |x| x**2 })
|
|
148
|
+
|
|
149
|
+
return 0.0 if mag_a.zero? || mag_b.zero?
|
|
150
|
+
|
|
151
|
+
dot / (mag_a * mag_b)
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
require 'time'
|
|
5
|
+
|
|
6
|
+
module CodebaseIndex
|
|
7
|
+
module Temporal
|
|
8
|
+
# SnapshotStore captures and queries temporal snapshots of extraction runs.
|
|
9
|
+
#
|
|
10
|
+
# Each snapshot is anchored to a git commit SHA and stores per-unit content
|
|
11
|
+
# hashes for efficient diff computation. Full source is not duplicated —
|
|
12
|
+
# only hashes of source, metadata, and dependencies are stored per snapshot.
|
|
13
|
+
#
|
|
14
|
+
# @example Capturing a snapshot
|
|
15
|
+
# store = SnapshotStore.new(connection: db)
|
|
16
|
+
# store.capture(manifest, unit_hashes)
|
|
17
|
+
#
|
|
18
|
+
# @example Comparing snapshots
|
|
19
|
+
# diff = store.diff("abc123", "def456")
|
|
20
|
+
# diff[:added] # => [{ identifier: "NewModel", ... }]
|
|
21
|
+
# diff[:modified] # => [{ identifier: "User", ... }]
|
|
22
|
+
# diff[:deleted] # => [{ identifier: "OldService", ... }]
|
|
23
|
+
#
|
|
24
|
+
class SnapshotStore # rubocop:disable Metrics/ClassLength
|
|
25
|
+
# @param connection [Object] Database connection supporting #execute and #get_first_row
|
|
26
|
+
def initialize(connection:)
|
|
27
|
+
@db = connection
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Capture a snapshot after extraction completes.
|
|
31
|
+
#
|
|
32
|
+
# Stores the manifest metadata and per-unit content hashes.
|
|
33
|
+
# Computes diff stats vs. the most recent previous snapshot.
|
|
34
|
+
#
|
|
35
|
+
# @param manifest [Hash] The manifest data (string or symbol keys)
|
|
36
|
+
# @param unit_hashes [Array<Hash>] Per-unit content hashes
|
|
37
|
+
# @return [Hash] Snapshot record with diff stats
|
|
38
|
+
def capture(manifest, unit_hashes)
|
|
39
|
+
git_sha = mget(manifest, 'git_sha')
|
|
40
|
+
return nil unless git_sha
|
|
41
|
+
|
|
42
|
+
previous = find_latest
|
|
43
|
+
upsert_snapshot(manifest, git_sha, unit_hashes.size)
|
|
44
|
+
|
|
45
|
+
snapshot_id = fetch_snapshot_id(git_sha)
|
|
46
|
+
@db.execute('DELETE FROM codebase_snapshot_units WHERE snapshot_id = ?', [snapshot_id])
|
|
47
|
+
insert_unit_hashes(snapshot_id, unit_hashes)
|
|
48
|
+
|
|
49
|
+
update_diff_stats(snapshot_id, previous)
|
|
50
|
+
find(git_sha)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# List snapshots, optionally filtered by branch.
|
|
54
|
+
#
|
|
55
|
+
# @param limit [Integer] Max results (default 20)
|
|
56
|
+
# @param branch [String, nil] Filter by branch name
|
|
57
|
+
# @return [Array<Hash>] Snapshot summaries sorted by extracted_at descending
|
|
58
|
+
def list(limit: 20, branch: nil)
|
|
59
|
+
rows = if branch
|
|
60
|
+
@db.execute(
|
|
61
|
+
'SELECT * FROM codebase_snapshots WHERE git_branch = ? ORDER BY extracted_at DESC LIMIT ?',
|
|
62
|
+
[branch, limit]
|
|
63
|
+
)
|
|
64
|
+
else
|
|
65
|
+
@db.execute(
|
|
66
|
+
'SELECT * FROM codebase_snapshots ORDER BY extracted_at DESC LIMIT ?',
|
|
67
|
+
[limit]
|
|
68
|
+
)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
rows.map { |row| row_to_hash(row) }
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Find a specific snapshot by git SHA.
|
|
75
|
+
#
|
|
76
|
+
# @param git_sha [String]
|
|
77
|
+
# @return [Hash, nil] Snapshot metadata or nil if not found
|
|
78
|
+
def find(git_sha)
|
|
79
|
+
row = @db.get_first_row('SELECT * FROM codebase_snapshots WHERE git_sha = ?', [git_sha])
|
|
80
|
+
return nil unless row
|
|
81
|
+
|
|
82
|
+
row_to_hash(row)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Compute diff between two snapshots.
|
|
86
|
+
#
|
|
87
|
+
# @param sha_a [String] Before snapshot git SHA
|
|
88
|
+
# @param sha_b [String] After snapshot git SHA
|
|
89
|
+
# @return [Hash] {added: [...], modified: [...], deleted: [...]}
|
|
90
|
+
def diff(sha_a, sha_b)
|
|
91
|
+
id_a = fetch_snapshot_id(sha_a)
|
|
92
|
+
id_b = fetch_snapshot_id(sha_b)
|
|
93
|
+
|
|
94
|
+
return { added: [], modified: [], deleted: [] } unless id_a && id_b
|
|
95
|
+
|
|
96
|
+
units_a = load_snapshot_units(id_a)
|
|
97
|
+
units_b = load_snapshot_units(id_b)
|
|
98
|
+
|
|
99
|
+
compute_diff(units_a, units_b)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# History of a single unit across snapshots.
|
|
103
|
+
#
|
|
104
|
+
# @param identifier [String] Unit identifier
|
|
105
|
+
# @param limit [Integer] Max snapshots to return (default 20)
|
|
106
|
+
# @return [Array<Hash>] Entries with git_sha, extracted_at, source_hash, changed flag
|
|
107
|
+
def unit_history(identifier, limit: 20)
|
|
108
|
+
rows = @db.execute(<<~SQL, [identifier, limit])
|
|
109
|
+
SELECT su.source_hash, su.metadata_hash, su.dependencies_hash, su.unit_type,
|
|
110
|
+
s.git_sha, s.extracted_at, s.git_branch
|
|
111
|
+
FROM codebase_snapshot_units su
|
|
112
|
+
JOIN codebase_snapshots s ON s.id = su.snapshot_id
|
|
113
|
+
WHERE su.identifier = ?
|
|
114
|
+
ORDER BY s.extracted_at DESC
|
|
115
|
+
LIMIT ?
|
|
116
|
+
SQL
|
|
117
|
+
|
|
118
|
+
entries = rows.map { |row| history_entry_from_row(row) }
|
|
119
|
+
mark_changed_entries(entries)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
private
|
|
123
|
+
|
|
124
|
+
# Build a history entry hash from a database row.
|
|
125
|
+
#
|
|
126
|
+
# @param row [Hash]
|
|
127
|
+
# @return [Hash]
|
|
128
|
+
def history_entry_from_row(row)
|
|
129
|
+
{
|
|
130
|
+
git_sha: row['git_sha'],
|
|
131
|
+
extracted_at: row['extracted_at'],
|
|
132
|
+
git_branch: row['git_branch'],
|
|
133
|
+
unit_type: row['unit_type'],
|
|
134
|
+
source_hash: row['source_hash'],
|
|
135
|
+
metadata_hash: row['metadata_hash'],
|
|
136
|
+
dependencies_hash: row['dependencies_hash']
|
|
137
|
+
}
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Mark changed flag on history entries by comparing source hashes.
|
|
141
|
+
#
|
|
142
|
+
# @param entries [Array<Hash>]
|
|
143
|
+
# @return [Array<Hash>]
|
|
144
|
+
def mark_changed_entries(entries)
|
|
145
|
+
entries.each_with_index do |entry, i|
|
|
146
|
+
entry[:changed] = if i == entries.size - 1
|
|
147
|
+
true # Oldest version is always "changed" (first appearance)
|
|
148
|
+
else
|
|
149
|
+
entry[:source_hash] != entries[i + 1][:source_hash]
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
entries
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Get a value from a hash that may have string or symbol keys.
|
|
156
|
+
#
|
|
157
|
+
# @param hash [Hash]
|
|
158
|
+
# @param key [String]
|
|
159
|
+
# @return [Object, nil]
|
|
160
|
+
def mget(hash, key)
|
|
161
|
+
hash[key] || hash[key.to_sym]
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Insert or replace the snapshot row from manifest data.
|
|
165
|
+
#
|
|
166
|
+
# @param manifest [Hash]
|
|
167
|
+
# @param git_sha [String]
|
|
168
|
+
# @param default_total [Integer]
|
|
169
|
+
# @return [void]
|
|
170
|
+
def upsert_snapshot(manifest, git_sha, default_total)
|
|
171
|
+
params = [
|
|
172
|
+
git_sha,
|
|
173
|
+
mget(manifest, 'git_branch'),
|
|
174
|
+
mget(manifest, 'extracted_at') || Time.now.iso8601,
|
|
175
|
+
mget(manifest, 'rails_version'),
|
|
176
|
+
mget(manifest, 'ruby_version'),
|
|
177
|
+
mget(manifest, 'total_units') || default_total,
|
|
178
|
+
JSON.generate(mget(manifest, 'counts') || {}),
|
|
179
|
+
mget(manifest, 'gemfile_lock_sha'),
|
|
180
|
+
mget(manifest, 'schema_sha')
|
|
181
|
+
]
|
|
182
|
+
@db.execute(<<~SQL, params)
|
|
183
|
+
INSERT OR REPLACE INTO codebase_snapshots
|
|
184
|
+
(git_sha, git_branch, extracted_at, rails_version, ruby_version,
|
|
185
|
+
total_units, unit_counts, gemfile_lock_sha, schema_sha)
|
|
186
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
187
|
+
SQL
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Update a snapshot's diff stats vs. a previous snapshot.
|
|
191
|
+
#
|
|
192
|
+
# @param snapshot_id [Integer]
|
|
193
|
+
# @param previous [Hash, nil]
|
|
194
|
+
# @return [void]
|
|
195
|
+
def update_diff_stats(snapshot_id, previous)
|
|
196
|
+
diff_stats = compute_diff_stats(snapshot_id, previous)
|
|
197
|
+
@db.execute(
|
|
198
|
+
'UPDATE codebase_snapshots SET units_added = ?, units_modified = ?, units_deleted = ? WHERE id = ?',
|
|
199
|
+
[diff_stats[:added], diff_stats[:modified], diff_stats[:deleted], snapshot_id]
|
|
200
|
+
)
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Find the most recent snapshot.
|
|
204
|
+
#
|
|
205
|
+
# @return [Hash, nil]
|
|
206
|
+
def find_latest
|
|
207
|
+
row = @db.get_first_row('SELECT * FROM codebase_snapshots ORDER BY extracted_at DESC LIMIT 1')
|
|
208
|
+
return nil unless row
|
|
209
|
+
|
|
210
|
+
row_to_hash(row)
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# Fetch a snapshot's ID by git SHA.
|
|
214
|
+
#
|
|
215
|
+
# @param git_sha [String]
|
|
216
|
+
# @return [Integer, nil]
|
|
217
|
+
def fetch_snapshot_id(git_sha)
|
|
218
|
+
@db.get_first_value('SELECT id FROM codebase_snapshots WHERE git_sha = ?', [git_sha])
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Insert per-unit hash records for a snapshot.
|
|
222
|
+
#
|
|
223
|
+
# @param snapshot_id [Integer]
|
|
224
|
+
# @param unit_hashes [Array<Hash>]
|
|
225
|
+
# @return [void]
|
|
226
|
+
def insert_unit_hashes(snapshot_id, unit_hashes)
|
|
227
|
+
sql = <<~SQL
|
|
228
|
+
INSERT INTO codebase_snapshot_units
|
|
229
|
+
(snapshot_id, identifier, unit_type, source_hash, metadata_hash, dependencies_hash)
|
|
230
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
231
|
+
SQL
|
|
232
|
+
|
|
233
|
+
unit_hashes.each do |uh|
|
|
234
|
+
params = [
|
|
235
|
+
snapshot_id,
|
|
236
|
+
uh[:identifier] || uh['identifier'],
|
|
237
|
+
(uh[:type] || uh['type']).to_s,
|
|
238
|
+
uh[:source_hash] || uh['source_hash'],
|
|
239
|
+
uh[:metadata_hash] || uh['metadata_hash'],
|
|
240
|
+
uh[:dependencies_hash] || uh['dependencies_hash']
|
|
241
|
+
]
|
|
242
|
+
@db.execute(sql, params)
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Load all unit records for a snapshot as a hash keyed by identifier.
|
|
247
|
+
#
|
|
248
|
+
# @param snapshot_id [Integer]
|
|
249
|
+
# @return [Hash{String => Hash}]
|
|
250
|
+
def load_snapshot_units(snapshot_id)
|
|
251
|
+
sql = <<~SQL
|
|
252
|
+
SELECT identifier, unit_type, source_hash, metadata_hash, dependencies_hash
|
|
253
|
+
FROM codebase_snapshot_units WHERE snapshot_id = ?
|
|
254
|
+
SQL
|
|
255
|
+
rows = @db.execute(sql, [snapshot_id])
|
|
256
|
+
|
|
257
|
+
rows.to_h do |row|
|
|
258
|
+
[row['identifier'], {
|
|
259
|
+
unit_type: row['unit_type'],
|
|
260
|
+
source_hash: row['source_hash'],
|
|
261
|
+
metadata_hash: row['metadata_hash'],
|
|
262
|
+
dependencies_hash: row['dependencies_hash']
|
|
263
|
+
}]
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
# Compute diff between two sets of unit hashes.
|
|
268
|
+
#
|
|
269
|
+
# @param units_a [Hash{String => Hash}] Before
|
|
270
|
+
# @param units_b [Hash{String => Hash}] After
|
|
271
|
+
# @return [Hash] {added: [...], modified: [...], deleted: [...]}
|
|
272
|
+
def compute_diff(units_a, units_b) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
|
273
|
+
added = []
|
|
274
|
+
modified = []
|
|
275
|
+
deleted = []
|
|
276
|
+
|
|
277
|
+
# Units in B but not A → added
|
|
278
|
+
# Units in both → check for modifications
|
|
279
|
+
units_b.each do |identifier, data_b|
|
|
280
|
+
if units_a.key?(identifier)
|
|
281
|
+
data_a = units_a[identifier]
|
|
282
|
+
if data_a[:source_hash] != data_b[:source_hash] ||
|
|
283
|
+
data_a[:metadata_hash] != data_b[:metadata_hash] ||
|
|
284
|
+
data_a[:dependencies_hash] != data_b[:dependencies_hash]
|
|
285
|
+
modified << { identifier: identifier, unit_type: data_b[:unit_type] }
|
|
286
|
+
end
|
|
287
|
+
else
|
|
288
|
+
added << { identifier: identifier, unit_type: data_b[:unit_type] }
|
|
289
|
+
end
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# Units in A but not B → deleted
|
|
293
|
+
units_a.each do |identifier, data_a|
|
|
294
|
+
deleted << { identifier: identifier, unit_type: data_a[:unit_type] } unless units_b.key?(identifier)
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
{ added: added, modified: modified, deleted: deleted }
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# Compute aggregate diff stats.
|
|
301
|
+
#
|
|
302
|
+
# @param current_snapshot_id [Integer]
|
|
303
|
+
# @param previous_snapshot [Hash, nil]
|
|
304
|
+
# @return [Hash] {added:, modified:, deleted:}
|
|
305
|
+
def compute_diff_stats(current_snapshot_id, previous_snapshot)
|
|
306
|
+
return { added: 0, modified: 0, deleted: 0 } unless previous_snapshot
|
|
307
|
+
|
|
308
|
+
prev_id = fetch_snapshot_id(previous_snapshot[:git_sha])
|
|
309
|
+
return { added: 0, modified: 0, deleted: 0 } unless prev_id
|
|
310
|
+
|
|
311
|
+
units_prev = load_snapshot_units(prev_id)
|
|
312
|
+
units_curr = load_snapshot_units(current_snapshot_id)
|
|
313
|
+
|
|
314
|
+
result = compute_diff(units_prev, units_curr)
|
|
315
|
+
{ added: result[:added].size, modified: result[:modified].size, deleted: result[:deleted].size }
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
# Convert a database row to a normalized hash.
|
|
319
|
+
#
|
|
320
|
+
# @param row [Hash] SQLite3 result row
|
|
321
|
+
# @return [Hash]
|
|
322
|
+
def row_to_hash(row)
|
|
323
|
+
{
|
|
324
|
+
id: row['id'],
|
|
325
|
+
git_sha: row['git_sha'],
|
|
326
|
+
git_branch: row['git_branch'],
|
|
327
|
+
extracted_at: row['extracted_at'],
|
|
328
|
+
rails_version: row['rails_version'],
|
|
329
|
+
ruby_version: row['ruby_version'],
|
|
330
|
+
total_units: row['total_units'],
|
|
331
|
+
unit_counts: row['unit_counts'] ? JSON.parse(row['unit_counts']) : {},
|
|
332
|
+
gemfile_lock_sha: row['gemfile_lock_sha'],
|
|
333
|
+
schema_sha: row['schema_sha'],
|
|
334
|
+
units_added: row['units_added'],
|
|
335
|
+
units_modified: row['units_modified'],
|
|
336
|
+
units_deleted: row['units_deleted']
|
|
337
|
+
}
|
|
338
|
+
end
|
|
339
|
+
end
|
|
340
|
+
end
|
|
341
|
+
end
|