codebase_index 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/codebase_index.rb +3 -243
- metadata +28 -223
- data/CHANGELOG.md +0 -89
- data/CODE_OF_CONDUCT.md +0 -83
- data/CONTRIBUTING.md +0 -65
- data/LICENSE.txt +0 -21
- data/README.md +0 -325
- data/exe/codebase-console +0 -59
- data/exe/codebase-console-mcp +0 -22
- data/exe/codebase-index-mcp +0 -34
- data/exe/codebase-index-mcp-http +0 -37
- data/exe/codebase-index-mcp-start +0 -58
- data/lib/codebase_index/ast/call_site_extractor.rb +0 -106
- data/lib/codebase_index/ast/method_extractor.rb +0 -71
- data/lib/codebase_index/ast/node.rb +0 -116
- data/lib/codebase_index/ast/parser.rb +0 -614
- data/lib/codebase_index/ast.rb +0 -6
- data/lib/codebase_index/builder.rb +0 -200
- data/lib/codebase_index/cache/cache_middleware.rb +0 -199
- data/lib/codebase_index/cache/cache_store.rb +0 -264
- data/lib/codebase_index/cache/redis_cache_store.rb +0 -116
- data/lib/codebase_index/cache/solid_cache_store.rb +0 -111
- data/lib/codebase_index/chunking/chunk.rb +0 -84
- data/lib/codebase_index/chunking/semantic_chunker.rb +0 -295
- data/lib/codebase_index/console/adapters/cache_adapter.rb +0 -58
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +0 -33
- data/lib/codebase_index/console/adapters/job_adapter.rb +0 -68
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +0 -33
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +0 -33
- data/lib/codebase_index/console/audit_logger.rb +0 -75
- data/lib/codebase_index/console/bridge.rb +0 -177
- data/lib/codebase_index/console/confirmation.rb +0 -90
- data/lib/codebase_index/console/connection_manager.rb +0 -173
- data/lib/codebase_index/console/console_response_renderer.rb +0 -74
- data/lib/codebase_index/console/embedded_executor.rb +0 -373
- data/lib/codebase_index/console/model_validator.rb +0 -81
- data/lib/codebase_index/console/rack_middleware.rb +0 -87
- data/lib/codebase_index/console/safe_context.rb +0 -82
- data/lib/codebase_index/console/server.rb +0 -612
- data/lib/codebase_index/console/sql_validator.rb +0 -172
- data/lib/codebase_index/console/tools/tier1.rb +0 -118
- data/lib/codebase_index/console/tools/tier2.rb +0 -117
- data/lib/codebase_index/console/tools/tier3.rb +0 -110
- data/lib/codebase_index/console/tools/tier4.rb +0 -79
- data/lib/codebase_index/coordination/pipeline_lock.rb +0 -109
- data/lib/codebase_index/cost_model/embedding_cost.rb +0 -88
- data/lib/codebase_index/cost_model/estimator.rb +0 -128
- data/lib/codebase_index/cost_model/provider_pricing.rb +0 -67
- data/lib/codebase_index/cost_model/storage_cost.rb +0 -52
- data/lib/codebase_index/cost_model.rb +0 -22
- data/lib/codebase_index/db/migrations/001_create_units.rb +0 -38
- data/lib/codebase_index/db/migrations/002_create_edges.rb +0 -35
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +0 -37
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +0 -45
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +0 -40
- data/lib/codebase_index/db/migrator.rb +0 -71
- data/lib/codebase_index/db/schema_version.rb +0 -73
- data/lib/codebase_index/dependency_graph.rb +0 -236
- data/lib/codebase_index/embedding/indexer.rb +0 -140
- data/lib/codebase_index/embedding/openai.rb +0 -126
- data/lib/codebase_index/embedding/provider.rb +0 -162
- data/lib/codebase_index/embedding/text_preparer.rb +0 -112
- data/lib/codebase_index/evaluation/baseline_runner.rb +0 -115
- data/lib/codebase_index/evaluation/evaluator.rb +0 -139
- data/lib/codebase_index/evaluation/metrics.rb +0 -79
- data/lib/codebase_index/evaluation/query_set.rb +0 -148
- data/lib/codebase_index/evaluation/report_generator.rb +0 -90
- data/lib/codebase_index/extracted_unit.rb +0 -145
- data/lib/codebase_index/extractor.rb +0 -1028
- data/lib/codebase_index/extractors/action_cable_extractor.rb +0 -201
- data/lib/codebase_index/extractors/ast_source_extraction.rb +0 -46
- data/lib/codebase_index/extractors/behavioral_profile.rb +0 -309
- data/lib/codebase_index/extractors/caching_extractor.rb +0 -261
- data/lib/codebase_index/extractors/callback_analyzer.rb +0 -246
- data/lib/codebase_index/extractors/concern_extractor.rb +0 -292
- data/lib/codebase_index/extractors/configuration_extractor.rb +0 -219
- data/lib/codebase_index/extractors/controller_extractor.rb +0 -404
- data/lib/codebase_index/extractors/database_view_extractor.rb +0 -278
- data/lib/codebase_index/extractors/decorator_extractor.rb +0 -253
- data/lib/codebase_index/extractors/engine_extractor.rb +0 -223
- data/lib/codebase_index/extractors/event_extractor.rb +0 -211
- data/lib/codebase_index/extractors/factory_extractor.rb +0 -289
- data/lib/codebase_index/extractors/graphql_extractor.rb +0 -892
- data/lib/codebase_index/extractors/i18n_extractor.rb +0 -117
- data/lib/codebase_index/extractors/job_extractor.rb +0 -374
- data/lib/codebase_index/extractors/lib_extractor.rb +0 -218
- data/lib/codebase_index/extractors/mailer_extractor.rb +0 -269
- data/lib/codebase_index/extractors/manager_extractor.rb +0 -188
- data/lib/codebase_index/extractors/middleware_extractor.rb +0 -133
- data/lib/codebase_index/extractors/migration_extractor.rb +0 -469
- data/lib/codebase_index/extractors/model_extractor.rb +0 -988
- data/lib/codebase_index/extractors/phlex_extractor.rb +0 -252
- data/lib/codebase_index/extractors/policy_extractor.rb +0 -191
- data/lib/codebase_index/extractors/poro_extractor.rb +0 -229
- data/lib/codebase_index/extractors/pundit_extractor.rb +0 -223
- data/lib/codebase_index/extractors/rails_source_extractor.rb +0 -473
- data/lib/codebase_index/extractors/rake_task_extractor.rb +0 -343
- data/lib/codebase_index/extractors/route_extractor.rb +0 -181
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +0 -331
- data/lib/codebase_index/extractors/serializer_extractor.rb +0 -339
- data/lib/codebase_index/extractors/service_extractor.rb +0 -217
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +0 -91
- data/lib/codebase_index/extractors/shared_utility_methods.rb +0 -281
- data/lib/codebase_index/extractors/state_machine_extractor.rb +0 -398
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +0 -225
- data/lib/codebase_index/extractors/validator_extractor.rb +0 -211
- data/lib/codebase_index/extractors/view_component_extractor.rb +0 -311
- data/lib/codebase_index/extractors/view_template_extractor.rb +0 -261
- data/lib/codebase_index/feedback/gap_detector.rb +0 -89
- data/lib/codebase_index/feedback/store.rb +0 -119
- data/lib/codebase_index/filename_utils.rb +0 -32
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +0 -206
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +0 -154
- data/lib/codebase_index/flow_assembler.rb +0 -290
- data/lib/codebase_index/flow_document.rb +0 -191
- data/lib/codebase_index/flow_precomputer.rb +0 -102
- data/lib/codebase_index/formatting/base.rb +0 -30
- data/lib/codebase_index/formatting/claude_adapter.rb +0 -98
- data/lib/codebase_index/formatting/generic_adapter.rb +0 -56
- data/lib/codebase_index/formatting/gpt_adapter.rb +0 -64
- data/lib/codebase_index/formatting/human_adapter.rb +0 -78
- data/lib/codebase_index/graph_analyzer.rb +0 -374
- data/lib/codebase_index/mcp/bootstrapper.rb +0 -96
- data/lib/codebase_index/mcp/index_reader.rb +0 -394
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +0 -81
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +0 -17
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +0 -353
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +0 -240
- data/lib/codebase_index/mcp/server.rb +0 -961
- data/lib/codebase_index/mcp/tool_response_renderer.rb +0 -85
- data/lib/codebase_index/model_name_cache.rb +0 -51
- data/lib/codebase_index/notion/client.rb +0 -217
- data/lib/codebase_index/notion/exporter.rb +0 -219
- data/lib/codebase_index/notion/mapper.rb +0 -40
- data/lib/codebase_index/notion/mappers/column_mapper.rb +0 -57
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +0 -39
- data/lib/codebase_index/notion/mappers/model_mapper.rb +0 -161
- data/lib/codebase_index/notion/mappers/shared.rb +0 -22
- data/lib/codebase_index/notion/rate_limiter.rb +0 -68
- data/lib/codebase_index/observability/health_check.rb +0 -79
- data/lib/codebase_index/observability/instrumentation.rb +0 -34
- data/lib/codebase_index/observability/structured_logger.rb +0 -57
- data/lib/codebase_index/operator/error_escalator.rb +0 -81
- data/lib/codebase_index/operator/pipeline_guard.rb +0 -92
- data/lib/codebase_index/operator/status_reporter.rb +0 -80
- data/lib/codebase_index/railtie.rb +0 -38
- data/lib/codebase_index/resilience/circuit_breaker.rb +0 -99
- data/lib/codebase_index/resilience/index_validator.rb +0 -167
- data/lib/codebase_index/resilience/retryable_provider.rb +0 -108
- data/lib/codebase_index/retrieval/context_assembler.rb +0 -261
- data/lib/codebase_index/retrieval/query_classifier.rb +0 -133
- data/lib/codebase_index/retrieval/ranker.rb +0 -277
- data/lib/codebase_index/retrieval/search_executor.rb +0 -316
- data/lib/codebase_index/retriever.rb +0 -152
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +0 -170
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +0 -77
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +0 -18
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +0 -280
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +0 -143
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +0 -143
- data/lib/codebase_index/ruby_analyzer.rb +0 -87
- data/lib/codebase_index/session_tracer/file_store.rb +0 -104
- data/lib/codebase_index/session_tracer/middleware.rb +0 -143
- data/lib/codebase_index/session_tracer/redis_store.rb +0 -106
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +0 -254
- data/lib/codebase_index/session_tracer/session_flow_document.rb +0 -223
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +0 -139
- data/lib/codebase_index/session_tracer/store.rb +0 -81
- data/lib/codebase_index/storage/graph_store.rb +0 -120
- data/lib/codebase_index/storage/metadata_store.rb +0 -196
- data/lib/codebase_index/storage/pgvector.rb +0 -195
- data/lib/codebase_index/storage/qdrant.rb +0 -205
- data/lib/codebase_index/storage/vector_store.rb +0 -167
- data/lib/codebase_index/temporal/json_snapshot_store.rb +0 -245
- data/lib/codebase_index/temporal/snapshot_store.rb +0 -345
- data/lib/codebase_index/token_utils.rb +0 -19
- data/lib/codebase_index/version.rb +0 -5
- data/lib/generators/codebase_index/install_generator.rb +0 -32
- data/lib/generators/codebase_index/pgvector_generator.rb +0 -37
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +0 -15
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +0 -43
- data/lib/tasks/codebase_index.rake +0 -597
- data/lib/tasks/codebase_index_evaluation.rake +0 -115
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module CodebaseIndex
|
|
4
|
-
module Storage
|
|
5
|
-
# VectorStore provides an interface for storing and searching embedding vectors.
|
|
6
|
-
#
|
|
7
|
-
# All vector store adapters must include the {Interface} module and implement
|
|
8
|
-
# its methods. The {InMemory} adapter is provided for development and testing.
|
|
9
|
-
#
|
|
10
|
-
# @example Using the in-memory adapter
|
|
11
|
-
# store = CodebaseIndex::Storage::VectorStore::InMemory.new
|
|
12
|
-
# store.store("User", [0.1, 0.2, 0.3], { type: "model" })
|
|
13
|
-
# results = store.search([0.1, 0.2, 0.3], limit: 5)
|
|
14
|
-
#
|
|
15
|
-
module VectorStore
|
|
16
|
-
# Interface that all vector store adapters must implement.
|
|
17
|
-
module Interface
|
|
18
|
-
# Store a vector with associated metadata.
|
|
19
|
-
#
|
|
20
|
-
# @param id [String] Unique identifier for the vector
|
|
21
|
-
# @param vector [Array<Float>] The embedding vector
|
|
22
|
-
# @param metadata [Hash] Optional metadata to store alongside the vector
|
|
23
|
-
# @raise [NotImplementedError] if not implemented by adapter
|
|
24
|
-
def store(id, vector, metadata = {})
|
|
25
|
-
raise NotImplementedError
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
# Store multiple vectors in a single batch operation.
|
|
29
|
-
#
|
|
30
|
-
# Default implementation falls back to individual store calls.
|
|
31
|
-
# Adapters should override for bulk-optimized behavior (e.g.,
|
|
32
|
-
# multi-row INSERT for pgvector, batch upsert for Qdrant).
|
|
33
|
-
#
|
|
34
|
-
# @param entries [Array<Hash>] Each entry has :id, :vector, :metadata keys
|
|
35
|
-
def store_batch(entries)
|
|
36
|
-
entries.each { |e| store(e[:id], e[:vector], e[:metadata] || {}) }
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
# Search for similar vectors using cosine similarity.
|
|
40
|
-
#
|
|
41
|
-
# @param query_vector [Array<Float>] The query embedding vector
|
|
42
|
-
# @param limit [Integer] Maximum number of results to return
|
|
43
|
-
# @param filters [Hash] Optional metadata filters to apply
|
|
44
|
-
# @return [Array<SearchResult>] Results sorted by descending similarity
|
|
45
|
-
# @raise [NotImplementedError] if not implemented by adapter
|
|
46
|
-
def search(query_vector, limit: 10, filters: {})
|
|
47
|
-
raise NotImplementedError
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
# Delete a vector by ID.
|
|
51
|
-
#
|
|
52
|
-
# @param id [String] The identifier to delete
|
|
53
|
-
# @raise [NotImplementedError] if not implemented by adapter
|
|
54
|
-
def delete(id)
|
|
55
|
-
raise NotImplementedError
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
# Delete vectors matching metadata filters.
|
|
59
|
-
#
|
|
60
|
-
# @param filters [Hash] Metadata key-value pairs to match
|
|
61
|
-
# @raise [NotImplementedError] if not implemented by adapter
|
|
62
|
-
def delete_by_filter(filters)
|
|
63
|
-
raise NotImplementedError
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
# Return the number of stored vectors.
|
|
67
|
-
#
|
|
68
|
-
# @return [Integer] Total count
|
|
69
|
-
# @raise [NotImplementedError] if not implemented by adapter
|
|
70
|
-
def count
|
|
71
|
-
raise NotImplementedError
|
|
72
|
-
end
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
# Value object representing a single search result.
|
|
76
|
-
SearchResult = Struct.new(:id, :score, :metadata, keyword_init: true)
|
|
77
|
-
|
|
78
|
-
# In-memory vector store using hash storage and cosine similarity.
|
|
79
|
-
#
|
|
80
|
-
# Suitable for development and testing. Not intended for production use
|
|
81
|
-
# with large datasets.
|
|
82
|
-
#
|
|
83
|
-
# @example
|
|
84
|
-
# store = InMemory.new
|
|
85
|
-
# store.store("doc1", [1.0, 0.0], { type: "model" })
|
|
86
|
-
# store.store("doc2", [0.0, 1.0], { type: "service" })
|
|
87
|
-
# store.search([1.0, 0.0], limit: 1)
|
|
88
|
-
# # => [#<SearchResult id="doc1", score=1.0, metadata={type: "model"}>]
|
|
89
|
-
#
|
|
90
|
-
class InMemory
|
|
91
|
-
include Interface
|
|
92
|
-
|
|
93
|
-
def initialize
|
|
94
|
-
@entries = {} # id => { vector:, metadata: }
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
# @see Interface#store
|
|
98
|
-
def store(id, vector, metadata = {})
|
|
99
|
-
@entries[id] = { vector: vector, metadata: metadata }
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
# @see Interface#search
|
|
103
|
-
def search(query_vector, limit: 10, filters: {})
|
|
104
|
-
candidates = filter_entries(filters)
|
|
105
|
-
|
|
106
|
-
scored = candidates.map do |id, entry|
|
|
107
|
-
score = cosine_similarity(query_vector, entry[:vector])
|
|
108
|
-
SearchResult.new(id: id, score: score, metadata: entry[:metadata])
|
|
109
|
-
end
|
|
110
|
-
scored.sort_by { |r| -r.score }.first(limit)
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
# @see Interface#delete
|
|
114
|
-
def delete(id)
|
|
115
|
-
@entries.delete(id)
|
|
116
|
-
end
|
|
117
|
-
|
|
118
|
-
# @see Interface#delete_by_filter
|
|
119
|
-
def delete_by_filter(filters)
|
|
120
|
-
@entries.reject! do |_id, entry|
|
|
121
|
-
filters.all? { |key, value| entry[:metadata][key] == value }
|
|
122
|
-
end
|
|
123
|
-
end
|
|
124
|
-
|
|
125
|
-
# @see Interface#count
|
|
126
|
-
def count
|
|
127
|
-
@entries.size
|
|
128
|
-
end
|
|
129
|
-
|
|
130
|
-
private
|
|
131
|
-
|
|
132
|
-
# Filter entries by metadata key-value pairs.
|
|
133
|
-
#
|
|
134
|
-
# @param filters [Hash] Metadata filters
|
|
135
|
-
# @return [Hash] Filtered entries
|
|
136
|
-
def filter_entries(filters)
|
|
137
|
-
return @entries if filters.empty?
|
|
138
|
-
|
|
139
|
-
@entries.select do |_id, entry|
|
|
140
|
-
filters.all? { |key, value| entry[:metadata][key] == value }
|
|
141
|
-
end
|
|
142
|
-
end
|
|
143
|
-
|
|
144
|
-
# Compute cosine similarity between two vectors.
|
|
145
|
-
#
|
|
146
|
-
# @param vec_a [Array<Float>] First vector
|
|
147
|
-
# @param vec_b [Array<Float>] Second vector
|
|
148
|
-
# @return [Float] Cosine similarity between -1.0 and 1.0
|
|
149
|
-
# @raise [ArgumentError] if vectors have different dimensions
|
|
150
|
-
def cosine_similarity(vec_a, vec_b)
|
|
151
|
-
unless vec_a.length == vec_b.length
|
|
152
|
-
raise ArgumentError,
|
|
153
|
-
"Vector dimension mismatch (#{vec_a.length} vs #{vec_b.length})"
|
|
154
|
-
end
|
|
155
|
-
|
|
156
|
-
dot = vec_a.zip(vec_b).sum { |x, y| x * y }
|
|
157
|
-
mag_a = Math.sqrt(vec_a.sum { |x| x**2 })
|
|
158
|
-
mag_b = Math.sqrt(vec_b.sum { |x| x**2 })
|
|
159
|
-
|
|
160
|
-
return 0.0 if mag_a.zero? || mag_b.zero?
|
|
161
|
-
|
|
162
|
-
dot / (mag_a * mag_b)
|
|
163
|
-
end
|
|
164
|
-
end
|
|
165
|
-
end
|
|
166
|
-
end
|
|
167
|
-
end
|
|
@@ -1,245 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'json'
|
|
4
|
-
require 'time'
|
|
5
|
-
require 'digest'
|
|
6
|
-
|
|
7
|
-
module CodebaseIndex
|
|
8
|
-
module Temporal
|
|
9
|
-
# JSON-file-based snapshot store for temporal tracking without SQLite.
|
|
10
|
-
#
|
|
11
|
-
# Stores snapshots as individual JSON files in a `snapshots/` subdirectory
|
|
12
|
-
# of the index output directory. Each file is named by git SHA and contains
|
|
13
|
-
# manifest metadata plus per-unit content hashes.
|
|
14
|
-
#
|
|
15
|
-
# Implements the same public interface as SnapshotStore so the MCP server
|
|
16
|
-
# tools work identically.
|
|
17
|
-
#
|
|
18
|
-
# @example
|
|
19
|
-
# store = JsonSnapshotStore.new(dir: '/app/tmp/codebase_index')
|
|
20
|
-
# store.capture(manifest, unit_hashes)
|
|
21
|
-
# store.list # => [{ git_sha: "abc123", ... }]
|
|
22
|
-
# store.diff("abc123", "def456") # => { added: [...], modified: [...], deleted: [...] }
|
|
23
|
-
#
|
|
24
|
-
class JsonSnapshotStore # rubocop:disable Metrics/ClassLength
|
|
25
|
-
def initialize(dir:)
|
|
26
|
-
@dir = File.join(dir, 'snapshots')
|
|
27
|
-
FileUtils.mkdir_p(@dir)
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
def capture(manifest, unit_hashes)
|
|
31
|
-
git_sha = mget(manifest, 'git_sha')
|
|
32
|
-
return nil unless git_sha
|
|
33
|
-
|
|
34
|
-
previous = find_latest
|
|
35
|
-
snapshot = build_snapshot(manifest, git_sha, unit_hashes)
|
|
36
|
-
|
|
37
|
-
if previous
|
|
38
|
-
diff_result = compute_diff(previous[:units], index_units(unit_hashes))
|
|
39
|
-
snapshot[:units_added] = diff_result[:added].size
|
|
40
|
-
snapshot[:units_modified] = diff_result[:modified].size
|
|
41
|
-
snapshot[:units_deleted] = diff_result[:deleted].size
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
write_snapshot(git_sha, snapshot)
|
|
45
|
-
snapshot.except(:units)
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
def list(limit: 20, branch: nil)
|
|
49
|
-
snapshots = load_all_summaries
|
|
50
|
-
snapshots.select! { |s| s[:git_branch] == branch } if branch
|
|
51
|
-
snapshots.sort_by { |s| s[:extracted_at] || '' }.reverse.first(limit)
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
def find(git_sha)
|
|
55
|
-
path = snapshot_path(git_sha)
|
|
56
|
-
return nil unless File.exist?(path)
|
|
57
|
-
|
|
58
|
-
data = JSON.parse(File.read(path))
|
|
59
|
-
symbolize_snapshot(data).except(:units)
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
def diff(sha_a, sha_b)
|
|
63
|
-
snap_a = load_snapshot_with_units(sha_a)
|
|
64
|
-
snap_b = load_snapshot_with_units(sha_b)
|
|
65
|
-
|
|
66
|
-
return { added: [], modified: [], deleted: [] } unless snap_a && snap_b
|
|
67
|
-
|
|
68
|
-
compute_diff(snap_a[:units], snap_b[:units])
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
def unit_history(identifier, limit: 20)
|
|
72
|
-
snapshots = load_all_with_units
|
|
73
|
-
.sort_by { |s| s[:extracted_at] || '' }
|
|
74
|
-
.reverse
|
|
75
|
-
.first(limit)
|
|
76
|
-
|
|
77
|
-
entries = snapshots.filter_map do |snap|
|
|
78
|
-
unit = snap[:units]&.[](identifier)
|
|
79
|
-
next unless unit
|
|
80
|
-
|
|
81
|
-
{
|
|
82
|
-
git_sha: snap[:git_sha],
|
|
83
|
-
extracted_at: snap[:extracted_at],
|
|
84
|
-
git_branch: snap[:git_branch],
|
|
85
|
-
unit_type: unit[:unit_type],
|
|
86
|
-
source_hash: unit[:source_hash],
|
|
87
|
-
metadata_hash: unit[:metadata_hash],
|
|
88
|
-
dependencies_hash: unit[:dependencies_hash]
|
|
89
|
-
}
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
mark_changed_entries(entries)
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
private
|
|
96
|
-
|
|
97
|
-
def mget(hash, key)
|
|
98
|
-
hash[key] || hash[key.to_sym]
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
def build_snapshot(manifest, git_sha, unit_hashes)
|
|
102
|
-
{
|
|
103
|
-
git_sha: git_sha,
|
|
104
|
-
git_branch: mget(manifest, 'git_branch'),
|
|
105
|
-
extracted_at: mget(manifest, 'extracted_at') || Time.now.iso8601,
|
|
106
|
-
rails_version: mget(manifest, 'rails_version'),
|
|
107
|
-
ruby_version: mget(manifest, 'ruby_version'),
|
|
108
|
-
total_units: mget(manifest, 'total_units') || unit_hashes.size,
|
|
109
|
-
unit_counts: mget(manifest, 'counts') || {},
|
|
110
|
-
gemfile_lock_sha: mget(manifest, 'gemfile_lock_sha'),
|
|
111
|
-
schema_sha: mget(manifest, 'schema_sha'),
|
|
112
|
-
units_added: 0,
|
|
113
|
-
units_modified: 0,
|
|
114
|
-
units_deleted: 0,
|
|
115
|
-
units: index_units(unit_hashes)
|
|
116
|
-
}
|
|
117
|
-
end
|
|
118
|
-
|
|
119
|
-
def index_units(unit_hashes)
|
|
120
|
-
unit_hashes.filter_map do |uh|
|
|
121
|
-
id = mget(uh, 'identifier')
|
|
122
|
-
next if id.nil?
|
|
123
|
-
|
|
124
|
-
[id, {
|
|
125
|
-
unit_type: mget(uh, 'type').to_s,
|
|
126
|
-
source_hash: mget(uh, 'source_hash'),
|
|
127
|
-
metadata_hash: mget(uh, 'metadata_hash'),
|
|
128
|
-
dependencies_hash: mget(uh, 'dependencies_hash')
|
|
129
|
-
}]
|
|
130
|
-
end.to_h
|
|
131
|
-
end
|
|
132
|
-
|
|
133
|
-
def compute_diff(units_a, units_b) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
|
|
134
|
-
added = []
|
|
135
|
-
modified = []
|
|
136
|
-
deleted = []
|
|
137
|
-
|
|
138
|
-
units_b.each do |identifier, data_b|
|
|
139
|
-
if units_a.key?(identifier)
|
|
140
|
-
data_a = units_a[identifier]
|
|
141
|
-
if data_a[:source_hash] != data_b[:source_hash] ||
|
|
142
|
-
data_a[:metadata_hash] != data_b[:metadata_hash] ||
|
|
143
|
-
data_a[:dependencies_hash] != data_b[:dependencies_hash]
|
|
144
|
-
modified << { identifier: identifier, unit_type: data_b[:unit_type] }
|
|
145
|
-
end
|
|
146
|
-
else
|
|
147
|
-
added << { identifier: identifier, unit_type: data_b[:unit_type] }
|
|
148
|
-
end
|
|
149
|
-
end
|
|
150
|
-
|
|
151
|
-
units_a.each do |identifier, data_a|
|
|
152
|
-
deleted << { identifier: identifier, unit_type: data_a[:unit_type] } unless units_b.key?(identifier)
|
|
153
|
-
end
|
|
154
|
-
|
|
155
|
-
{ added: added, modified: modified, deleted: deleted }
|
|
156
|
-
end
|
|
157
|
-
|
|
158
|
-
def mark_changed_entries(entries)
|
|
159
|
-
entries.each_with_index do |entry, i|
|
|
160
|
-
entry[:changed] = if i == entries.size - 1
|
|
161
|
-
true
|
|
162
|
-
else
|
|
163
|
-
entry[:source_hash] != entries[i + 1][:source_hash]
|
|
164
|
-
end
|
|
165
|
-
end
|
|
166
|
-
entries
|
|
167
|
-
end
|
|
168
|
-
|
|
169
|
-
def snapshot_path(git_sha)
|
|
170
|
-
raise ArgumentError, "Invalid git SHA: #{git_sha}" unless git_sha.match?(/\A[0-9a-f]+\z/i)
|
|
171
|
-
|
|
172
|
-
File.join(@dir, "#{git_sha}.json")
|
|
173
|
-
end
|
|
174
|
-
|
|
175
|
-
def write_snapshot(git_sha, data)
|
|
176
|
-
File.write(snapshot_path(git_sha), JSON.pretty_generate(data))
|
|
177
|
-
end
|
|
178
|
-
|
|
179
|
-
def load_snapshot_with_units(git_sha)
|
|
180
|
-
path = snapshot_path(git_sha)
|
|
181
|
-
return nil unless File.exist?(path)
|
|
182
|
-
|
|
183
|
-
symbolize_snapshot(JSON.parse(File.read(path)))
|
|
184
|
-
end
|
|
185
|
-
|
|
186
|
-
def load_all_summaries
|
|
187
|
-
Dir.glob(File.join(@dir, '*.json')).filter_map do |path|
|
|
188
|
-
data = JSON.parse(File.read(path))
|
|
189
|
-
symbolize_snapshot(data).except(:units)
|
|
190
|
-
rescue JSON::ParserError => e
|
|
191
|
-
warn "[CodebaseIndex] Skipping corrupt snapshot #{File.basename(path)}: #{e.message}"
|
|
192
|
-
nil
|
|
193
|
-
end
|
|
194
|
-
end
|
|
195
|
-
|
|
196
|
-
def load_all_with_units
|
|
197
|
-
Dir.glob(File.join(@dir, '*.json')).filter_map do |path|
|
|
198
|
-
symbolize_snapshot(JSON.parse(File.read(path)))
|
|
199
|
-
rescue JSON::ParserError => e
|
|
200
|
-
warn "[CodebaseIndex] Skipping corrupt snapshot #{File.basename(path)}: #{e.message}"
|
|
201
|
-
nil
|
|
202
|
-
end
|
|
203
|
-
end
|
|
204
|
-
|
|
205
|
-
def find_latest
|
|
206
|
-
snapshots = load_all_summaries
|
|
207
|
-
return nil if snapshots.empty?
|
|
208
|
-
|
|
209
|
-
latest = snapshots.max_by { |s| s[:extracted_at] || '' }
|
|
210
|
-
load_snapshot_with_units(latest[:git_sha])
|
|
211
|
-
end
|
|
212
|
-
|
|
213
|
-
def symbolize_snapshot(data)
|
|
214
|
-
{
|
|
215
|
-
git_sha: data['git_sha'],
|
|
216
|
-
git_branch: data['git_branch'],
|
|
217
|
-
extracted_at: data['extracted_at'],
|
|
218
|
-
rails_version: data['rails_version'],
|
|
219
|
-
ruby_version: data['ruby_version'],
|
|
220
|
-
total_units: data['total_units'],
|
|
221
|
-
unit_counts: data['unit_counts'] || {},
|
|
222
|
-
gemfile_lock_sha: data['gemfile_lock_sha'],
|
|
223
|
-
schema_sha: data['schema_sha'],
|
|
224
|
-
units_added: data['units_added'],
|
|
225
|
-
units_modified: data['units_modified'],
|
|
226
|
-
units_deleted: data['units_deleted'],
|
|
227
|
-
units: symbolize_units(data['units'])
|
|
228
|
-
}
|
|
229
|
-
end
|
|
230
|
-
|
|
231
|
-
def symbolize_units(units)
|
|
232
|
-
return {} unless units
|
|
233
|
-
|
|
234
|
-
units.transform_values do |v|
|
|
235
|
-
{
|
|
236
|
-
unit_type: v['unit_type'],
|
|
237
|
-
source_hash: v['source_hash'],
|
|
238
|
-
metadata_hash: v['metadata_hash'],
|
|
239
|
-
dependencies_hash: v['dependencies_hash']
|
|
240
|
-
}
|
|
241
|
-
end
|
|
242
|
-
end
|
|
243
|
-
end
|
|
244
|
-
end
|
|
245
|
-
end
|