codebase_index 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/lib/codebase_index.rb +3 -243
  3. metadata +28 -223
  4. data/CHANGELOG.md +0 -89
  5. data/CODE_OF_CONDUCT.md +0 -83
  6. data/CONTRIBUTING.md +0 -65
  7. data/LICENSE.txt +0 -21
  8. data/README.md +0 -325
  9. data/exe/codebase-console +0 -59
  10. data/exe/codebase-console-mcp +0 -22
  11. data/exe/codebase-index-mcp +0 -34
  12. data/exe/codebase-index-mcp-http +0 -37
  13. data/exe/codebase-index-mcp-start +0 -58
  14. data/lib/codebase_index/ast/call_site_extractor.rb +0 -106
  15. data/lib/codebase_index/ast/method_extractor.rb +0 -71
  16. data/lib/codebase_index/ast/node.rb +0 -116
  17. data/lib/codebase_index/ast/parser.rb +0 -614
  18. data/lib/codebase_index/ast.rb +0 -6
  19. data/lib/codebase_index/builder.rb +0 -200
  20. data/lib/codebase_index/cache/cache_middleware.rb +0 -199
  21. data/lib/codebase_index/cache/cache_store.rb +0 -264
  22. data/lib/codebase_index/cache/redis_cache_store.rb +0 -116
  23. data/lib/codebase_index/cache/solid_cache_store.rb +0 -111
  24. data/lib/codebase_index/chunking/chunk.rb +0 -84
  25. data/lib/codebase_index/chunking/semantic_chunker.rb +0 -295
  26. data/lib/codebase_index/console/adapters/cache_adapter.rb +0 -58
  27. data/lib/codebase_index/console/adapters/good_job_adapter.rb +0 -33
  28. data/lib/codebase_index/console/adapters/job_adapter.rb +0 -68
  29. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +0 -33
  30. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +0 -33
  31. data/lib/codebase_index/console/audit_logger.rb +0 -75
  32. data/lib/codebase_index/console/bridge.rb +0 -177
  33. data/lib/codebase_index/console/confirmation.rb +0 -90
  34. data/lib/codebase_index/console/connection_manager.rb +0 -173
  35. data/lib/codebase_index/console/console_response_renderer.rb +0 -74
  36. data/lib/codebase_index/console/embedded_executor.rb +0 -373
  37. data/lib/codebase_index/console/model_validator.rb +0 -81
  38. data/lib/codebase_index/console/rack_middleware.rb +0 -87
  39. data/lib/codebase_index/console/safe_context.rb +0 -82
  40. data/lib/codebase_index/console/server.rb +0 -612
  41. data/lib/codebase_index/console/sql_validator.rb +0 -172
  42. data/lib/codebase_index/console/tools/tier1.rb +0 -118
  43. data/lib/codebase_index/console/tools/tier2.rb +0 -117
  44. data/lib/codebase_index/console/tools/tier3.rb +0 -110
  45. data/lib/codebase_index/console/tools/tier4.rb +0 -79
  46. data/lib/codebase_index/coordination/pipeline_lock.rb +0 -109
  47. data/lib/codebase_index/cost_model/embedding_cost.rb +0 -88
  48. data/lib/codebase_index/cost_model/estimator.rb +0 -128
  49. data/lib/codebase_index/cost_model/provider_pricing.rb +0 -67
  50. data/lib/codebase_index/cost_model/storage_cost.rb +0 -52
  51. data/lib/codebase_index/cost_model.rb +0 -22
  52. data/lib/codebase_index/db/migrations/001_create_units.rb +0 -38
  53. data/lib/codebase_index/db/migrations/002_create_edges.rb +0 -35
  54. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +0 -37
  55. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +0 -45
  56. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +0 -40
  57. data/lib/codebase_index/db/migrator.rb +0 -71
  58. data/lib/codebase_index/db/schema_version.rb +0 -73
  59. data/lib/codebase_index/dependency_graph.rb +0 -236
  60. data/lib/codebase_index/embedding/indexer.rb +0 -140
  61. data/lib/codebase_index/embedding/openai.rb +0 -126
  62. data/lib/codebase_index/embedding/provider.rb +0 -162
  63. data/lib/codebase_index/embedding/text_preparer.rb +0 -112
  64. data/lib/codebase_index/evaluation/baseline_runner.rb +0 -115
  65. data/lib/codebase_index/evaluation/evaluator.rb +0 -139
  66. data/lib/codebase_index/evaluation/metrics.rb +0 -79
  67. data/lib/codebase_index/evaluation/query_set.rb +0 -148
  68. data/lib/codebase_index/evaluation/report_generator.rb +0 -90
  69. data/lib/codebase_index/extracted_unit.rb +0 -145
  70. data/lib/codebase_index/extractor.rb +0 -1028
  71. data/lib/codebase_index/extractors/action_cable_extractor.rb +0 -201
  72. data/lib/codebase_index/extractors/ast_source_extraction.rb +0 -46
  73. data/lib/codebase_index/extractors/behavioral_profile.rb +0 -309
  74. data/lib/codebase_index/extractors/caching_extractor.rb +0 -261
  75. data/lib/codebase_index/extractors/callback_analyzer.rb +0 -246
  76. data/lib/codebase_index/extractors/concern_extractor.rb +0 -292
  77. data/lib/codebase_index/extractors/configuration_extractor.rb +0 -219
  78. data/lib/codebase_index/extractors/controller_extractor.rb +0 -404
  79. data/lib/codebase_index/extractors/database_view_extractor.rb +0 -278
  80. data/lib/codebase_index/extractors/decorator_extractor.rb +0 -253
  81. data/lib/codebase_index/extractors/engine_extractor.rb +0 -223
  82. data/lib/codebase_index/extractors/event_extractor.rb +0 -211
  83. data/lib/codebase_index/extractors/factory_extractor.rb +0 -289
  84. data/lib/codebase_index/extractors/graphql_extractor.rb +0 -892
  85. data/lib/codebase_index/extractors/i18n_extractor.rb +0 -117
  86. data/lib/codebase_index/extractors/job_extractor.rb +0 -374
  87. data/lib/codebase_index/extractors/lib_extractor.rb +0 -218
  88. data/lib/codebase_index/extractors/mailer_extractor.rb +0 -269
  89. data/lib/codebase_index/extractors/manager_extractor.rb +0 -188
  90. data/lib/codebase_index/extractors/middleware_extractor.rb +0 -133
  91. data/lib/codebase_index/extractors/migration_extractor.rb +0 -469
  92. data/lib/codebase_index/extractors/model_extractor.rb +0 -988
  93. data/lib/codebase_index/extractors/phlex_extractor.rb +0 -252
  94. data/lib/codebase_index/extractors/policy_extractor.rb +0 -191
  95. data/lib/codebase_index/extractors/poro_extractor.rb +0 -229
  96. data/lib/codebase_index/extractors/pundit_extractor.rb +0 -223
  97. data/lib/codebase_index/extractors/rails_source_extractor.rb +0 -473
  98. data/lib/codebase_index/extractors/rake_task_extractor.rb +0 -343
  99. data/lib/codebase_index/extractors/route_extractor.rb +0 -181
  100. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +0 -331
  101. data/lib/codebase_index/extractors/serializer_extractor.rb +0 -339
  102. data/lib/codebase_index/extractors/service_extractor.rb +0 -217
  103. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +0 -91
  104. data/lib/codebase_index/extractors/shared_utility_methods.rb +0 -281
  105. data/lib/codebase_index/extractors/state_machine_extractor.rb +0 -398
  106. data/lib/codebase_index/extractors/test_mapping_extractor.rb +0 -225
  107. data/lib/codebase_index/extractors/validator_extractor.rb +0 -211
  108. data/lib/codebase_index/extractors/view_component_extractor.rb +0 -311
  109. data/lib/codebase_index/extractors/view_template_extractor.rb +0 -261
  110. data/lib/codebase_index/feedback/gap_detector.rb +0 -89
  111. data/lib/codebase_index/feedback/store.rb +0 -119
  112. data/lib/codebase_index/filename_utils.rb +0 -32
  113. data/lib/codebase_index/flow_analysis/operation_extractor.rb +0 -206
  114. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +0 -154
  115. data/lib/codebase_index/flow_assembler.rb +0 -290
  116. data/lib/codebase_index/flow_document.rb +0 -191
  117. data/lib/codebase_index/flow_precomputer.rb +0 -102
  118. data/lib/codebase_index/formatting/base.rb +0 -30
  119. data/lib/codebase_index/formatting/claude_adapter.rb +0 -98
  120. data/lib/codebase_index/formatting/generic_adapter.rb +0 -56
  121. data/lib/codebase_index/formatting/gpt_adapter.rb +0 -64
  122. data/lib/codebase_index/formatting/human_adapter.rb +0 -78
  123. data/lib/codebase_index/graph_analyzer.rb +0 -374
  124. data/lib/codebase_index/mcp/bootstrapper.rb +0 -96
  125. data/lib/codebase_index/mcp/index_reader.rb +0 -394
  126. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +0 -81
  127. data/lib/codebase_index/mcp/renderers/json_renderer.rb +0 -17
  128. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +0 -353
  129. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +0 -240
  130. data/lib/codebase_index/mcp/server.rb +0 -961
  131. data/lib/codebase_index/mcp/tool_response_renderer.rb +0 -85
  132. data/lib/codebase_index/model_name_cache.rb +0 -51
  133. data/lib/codebase_index/notion/client.rb +0 -217
  134. data/lib/codebase_index/notion/exporter.rb +0 -219
  135. data/lib/codebase_index/notion/mapper.rb +0 -40
  136. data/lib/codebase_index/notion/mappers/column_mapper.rb +0 -57
  137. data/lib/codebase_index/notion/mappers/migration_mapper.rb +0 -39
  138. data/lib/codebase_index/notion/mappers/model_mapper.rb +0 -161
  139. data/lib/codebase_index/notion/mappers/shared.rb +0 -22
  140. data/lib/codebase_index/notion/rate_limiter.rb +0 -68
  141. data/lib/codebase_index/observability/health_check.rb +0 -79
  142. data/lib/codebase_index/observability/instrumentation.rb +0 -34
  143. data/lib/codebase_index/observability/structured_logger.rb +0 -57
  144. data/lib/codebase_index/operator/error_escalator.rb +0 -81
  145. data/lib/codebase_index/operator/pipeline_guard.rb +0 -92
  146. data/lib/codebase_index/operator/status_reporter.rb +0 -80
  147. data/lib/codebase_index/railtie.rb +0 -38
  148. data/lib/codebase_index/resilience/circuit_breaker.rb +0 -99
  149. data/lib/codebase_index/resilience/index_validator.rb +0 -167
  150. data/lib/codebase_index/resilience/retryable_provider.rb +0 -108
  151. data/lib/codebase_index/retrieval/context_assembler.rb +0 -261
  152. data/lib/codebase_index/retrieval/query_classifier.rb +0 -133
  153. data/lib/codebase_index/retrieval/ranker.rb +0 -277
  154. data/lib/codebase_index/retrieval/search_executor.rb +0 -316
  155. data/lib/codebase_index/retriever.rb +0 -152
  156. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +0 -170
  157. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +0 -77
  158. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +0 -18
  159. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +0 -280
  160. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +0 -143
  161. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +0 -143
  162. data/lib/codebase_index/ruby_analyzer.rb +0 -87
  163. data/lib/codebase_index/session_tracer/file_store.rb +0 -104
  164. data/lib/codebase_index/session_tracer/middleware.rb +0 -143
  165. data/lib/codebase_index/session_tracer/redis_store.rb +0 -106
  166. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +0 -254
  167. data/lib/codebase_index/session_tracer/session_flow_document.rb +0 -223
  168. data/lib/codebase_index/session_tracer/solid_cache_store.rb +0 -139
  169. data/lib/codebase_index/session_tracer/store.rb +0 -81
  170. data/lib/codebase_index/storage/graph_store.rb +0 -120
  171. data/lib/codebase_index/storage/metadata_store.rb +0 -196
  172. data/lib/codebase_index/storage/pgvector.rb +0 -195
  173. data/lib/codebase_index/storage/qdrant.rb +0 -205
  174. data/lib/codebase_index/storage/vector_store.rb +0 -167
  175. data/lib/codebase_index/temporal/json_snapshot_store.rb +0 -245
  176. data/lib/codebase_index/temporal/snapshot_store.rb +0 -345
  177. data/lib/codebase_index/token_utils.rb +0 -19
  178. data/lib/codebase_index/version.rb +0 -5
  179. data/lib/generators/codebase_index/install_generator.rb +0 -32
  180. data/lib/generators/codebase_index/pgvector_generator.rb +0 -37
  181. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +0 -15
  182. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +0 -43
  183. data/lib/tasks/codebase_index.rake +0 -597
  184. data/lib/tasks/codebase_index_evaluation.rake +0 -115
@@ -1,167 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module CodebaseIndex
4
- module Storage
5
- # VectorStore provides an interface for storing and searching embedding vectors.
6
- #
7
- # All vector store adapters must include the {Interface} module and implement
8
- # its methods. The {InMemory} adapter is provided for development and testing.
9
- #
10
- # @example Using the in-memory adapter
11
- # store = CodebaseIndex::Storage::VectorStore::InMemory.new
12
- # store.store("User", [0.1, 0.2, 0.3], { type: "model" })
13
- # results = store.search([0.1, 0.2, 0.3], limit: 5)
14
- #
15
- module VectorStore
16
- # Interface that all vector store adapters must implement.
17
- module Interface
18
- # Store a vector with associated metadata.
19
- #
20
- # @param id [String] Unique identifier for the vector
21
- # @param vector [Array<Float>] The embedding vector
22
- # @param metadata [Hash] Optional metadata to store alongside the vector
23
- # @raise [NotImplementedError] if not implemented by adapter
24
- def store(id, vector, metadata = {})
25
- raise NotImplementedError
26
- end
27
-
28
- # Store multiple vectors in a single batch operation.
29
- #
30
- # Default implementation falls back to individual store calls.
31
- # Adapters should override for bulk-optimized behavior (e.g.,
32
- # multi-row INSERT for pgvector, batch upsert for Qdrant).
33
- #
34
- # @param entries [Array<Hash>] Each entry has :id, :vector, :metadata keys
35
- def store_batch(entries)
36
- entries.each { |e| store(e[:id], e[:vector], e[:metadata] || {}) }
37
- end
38
-
39
- # Search for similar vectors using cosine similarity.
40
- #
41
- # @param query_vector [Array<Float>] The query embedding vector
42
- # @param limit [Integer] Maximum number of results to return
43
- # @param filters [Hash] Optional metadata filters to apply
44
- # @return [Array<SearchResult>] Results sorted by descending similarity
45
- # @raise [NotImplementedError] if not implemented by adapter
46
- def search(query_vector, limit: 10, filters: {})
47
- raise NotImplementedError
48
- end
49
-
50
- # Delete a vector by ID.
51
- #
52
- # @param id [String] The identifier to delete
53
- # @raise [NotImplementedError] if not implemented by adapter
54
- def delete(id)
55
- raise NotImplementedError
56
- end
57
-
58
- # Delete vectors matching metadata filters.
59
- #
60
- # @param filters [Hash] Metadata key-value pairs to match
61
- # @raise [NotImplementedError] if not implemented by adapter
62
- def delete_by_filter(filters)
63
- raise NotImplementedError
64
- end
65
-
66
- # Return the number of stored vectors.
67
- #
68
- # @return [Integer] Total count
69
- # @raise [NotImplementedError] if not implemented by adapter
70
- def count
71
- raise NotImplementedError
72
- end
73
- end
74
-
75
- # Value object representing a single search result.
76
- SearchResult = Struct.new(:id, :score, :metadata, keyword_init: true)
77
-
78
- # In-memory vector store using hash storage and cosine similarity.
79
- #
80
- # Suitable for development and testing. Not intended for production use
81
- # with large datasets.
82
- #
83
- # @example
84
- # store = InMemory.new
85
- # store.store("doc1", [1.0, 0.0], { type: "model" })
86
- # store.store("doc2", [0.0, 1.0], { type: "service" })
87
- # store.search([1.0, 0.0], limit: 1)
88
- # # => [#<SearchResult id="doc1", score=1.0, metadata={type: "model"}>]
89
- #
90
- class InMemory
91
- include Interface
92
-
93
- def initialize
94
- @entries = {} # id => { vector:, metadata: }
95
- end
96
-
97
- # @see Interface#store
98
- def store(id, vector, metadata = {})
99
- @entries[id] = { vector: vector, metadata: metadata }
100
- end
101
-
102
- # @see Interface#search
103
- def search(query_vector, limit: 10, filters: {})
104
- candidates = filter_entries(filters)
105
-
106
- scored = candidates.map do |id, entry|
107
- score = cosine_similarity(query_vector, entry[:vector])
108
- SearchResult.new(id: id, score: score, metadata: entry[:metadata])
109
- end
110
- scored.sort_by { |r| -r.score }.first(limit)
111
- end
112
-
113
- # @see Interface#delete
114
- def delete(id)
115
- @entries.delete(id)
116
- end
117
-
118
- # @see Interface#delete_by_filter
119
- def delete_by_filter(filters)
120
- @entries.reject! do |_id, entry|
121
- filters.all? { |key, value| entry[:metadata][key] == value }
122
- end
123
- end
124
-
125
- # @see Interface#count
126
- def count
127
- @entries.size
128
- end
129
-
130
- private
131
-
132
- # Filter entries by metadata key-value pairs.
133
- #
134
- # @param filters [Hash] Metadata filters
135
- # @return [Hash] Filtered entries
136
- def filter_entries(filters)
137
- return @entries if filters.empty?
138
-
139
- @entries.select do |_id, entry|
140
- filters.all? { |key, value| entry[:metadata][key] == value }
141
- end
142
- end
143
-
144
- # Compute cosine similarity between two vectors.
145
- #
146
- # @param vec_a [Array<Float>] First vector
147
- # @param vec_b [Array<Float>] Second vector
148
- # @return [Float] Cosine similarity between -1.0 and 1.0
149
- # @raise [ArgumentError] if vectors have different dimensions
150
- def cosine_similarity(vec_a, vec_b)
151
- unless vec_a.length == vec_b.length
152
- raise ArgumentError,
153
- "Vector dimension mismatch (#{vec_a.length} vs #{vec_b.length})"
154
- end
155
-
156
- dot = vec_a.zip(vec_b).sum { |x, y| x * y }
157
- mag_a = Math.sqrt(vec_a.sum { |x| x**2 })
158
- mag_b = Math.sqrt(vec_b.sum { |x| x**2 })
159
-
160
- return 0.0 if mag_a.zero? || mag_b.zero?
161
-
162
- dot / (mag_a * mag_b)
163
- end
164
- end
165
- end
166
- end
167
- end
@@ -1,245 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'json'
4
- require 'time'
5
- require 'digest'
6
-
7
- module CodebaseIndex
8
- module Temporal
9
- # JSON-file-based snapshot store for temporal tracking without SQLite.
10
- #
11
- # Stores snapshots as individual JSON files in a `snapshots/` subdirectory
12
- # of the index output directory. Each file is named by git SHA and contains
13
- # manifest metadata plus per-unit content hashes.
14
- #
15
- # Implements the same public interface as SnapshotStore so the MCP server
16
- # tools work identically.
17
- #
18
- # @example
19
- # store = JsonSnapshotStore.new(dir: '/app/tmp/codebase_index')
20
- # store.capture(manifest, unit_hashes)
21
- # store.list # => [{ git_sha: "abc123", ... }]
22
- # store.diff("abc123", "def456") # => { added: [...], modified: [...], deleted: [...] }
23
- #
24
- class JsonSnapshotStore # rubocop:disable Metrics/ClassLength
25
- def initialize(dir:)
26
- @dir = File.join(dir, 'snapshots')
27
- FileUtils.mkdir_p(@dir)
28
- end
29
-
30
- def capture(manifest, unit_hashes)
31
- git_sha = mget(manifest, 'git_sha')
32
- return nil unless git_sha
33
-
34
- previous = find_latest
35
- snapshot = build_snapshot(manifest, git_sha, unit_hashes)
36
-
37
- if previous
38
- diff_result = compute_diff(previous[:units], index_units(unit_hashes))
39
- snapshot[:units_added] = diff_result[:added].size
40
- snapshot[:units_modified] = diff_result[:modified].size
41
- snapshot[:units_deleted] = diff_result[:deleted].size
42
- end
43
-
44
- write_snapshot(git_sha, snapshot)
45
- snapshot.except(:units)
46
- end
47
-
48
- def list(limit: 20, branch: nil)
49
- snapshots = load_all_summaries
50
- snapshots.select! { |s| s[:git_branch] == branch } if branch
51
- snapshots.sort_by { |s| s[:extracted_at] || '' }.reverse.first(limit)
52
- end
53
-
54
- def find(git_sha)
55
- path = snapshot_path(git_sha)
56
- return nil unless File.exist?(path)
57
-
58
- data = JSON.parse(File.read(path))
59
- symbolize_snapshot(data).except(:units)
60
- end
61
-
62
- def diff(sha_a, sha_b)
63
- snap_a = load_snapshot_with_units(sha_a)
64
- snap_b = load_snapshot_with_units(sha_b)
65
-
66
- return { added: [], modified: [], deleted: [] } unless snap_a && snap_b
67
-
68
- compute_diff(snap_a[:units], snap_b[:units])
69
- end
70
-
71
- def unit_history(identifier, limit: 20)
72
- snapshots = load_all_with_units
73
- .sort_by { |s| s[:extracted_at] || '' }
74
- .reverse
75
- .first(limit)
76
-
77
- entries = snapshots.filter_map do |snap|
78
- unit = snap[:units]&.[](identifier)
79
- next unless unit
80
-
81
- {
82
- git_sha: snap[:git_sha],
83
- extracted_at: snap[:extracted_at],
84
- git_branch: snap[:git_branch],
85
- unit_type: unit[:unit_type],
86
- source_hash: unit[:source_hash],
87
- metadata_hash: unit[:metadata_hash],
88
- dependencies_hash: unit[:dependencies_hash]
89
- }
90
- end
91
-
92
- mark_changed_entries(entries)
93
- end
94
-
95
- private
96
-
97
- def mget(hash, key)
98
- hash[key] || hash[key.to_sym]
99
- end
100
-
101
- def build_snapshot(manifest, git_sha, unit_hashes)
102
- {
103
- git_sha: git_sha,
104
- git_branch: mget(manifest, 'git_branch'),
105
- extracted_at: mget(manifest, 'extracted_at') || Time.now.iso8601,
106
- rails_version: mget(manifest, 'rails_version'),
107
- ruby_version: mget(manifest, 'ruby_version'),
108
- total_units: mget(manifest, 'total_units') || unit_hashes.size,
109
- unit_counts: mget(manifest, 'counts') || {},
110
- gemfile_lock_sha: mget(manifest, 'gemfile_lock_sha'),
111
- schema_sha: mget(manifest, 'schema_sha'),
112
- units_added: 0,
113
- units_modified: 0,
114
- units_deleted: 0,
115
- units: index_units(unit_hashes)
116
- }
117
- end
118
-
119
- def index_units(unit_hashes)
120
- unit_hashes.filter_map do |uh|
121
- id = mget(uh, 'identifier')
122
- next if id.nil?
123
-
124
- [id, {
125
- unit_type: mget(uh, 'type').to_s,
126
- source_hash: mget(uh, 'source_hash'),
127
- metadata_hash: mget(uh, 'metadata_hash'),
128
- dependencies_hash: mget(uh, 'dependencies_hash')
129
- }]
130
- end.to_h
131
- end
132
-
133
- def compute_diff(units_a, units_b) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
134
- added = []
135
- modified = []
136
- deleted = []
137
-
138
- units_b.each do |identifier, data_b|
139
- if units_a.key?(identifier)
140
- data_a = units_a[identifier]
141
- if data_a[:source_hash] != data_b[:source_hash] ||
142
- data_a[:metadata_hash] != data_b[:metadata_hash] ||
143
- data_a[:dependencies_hash] != data_b[:dependencies_hash]
144
- modified << { identifier: identifier, unit_type: data_b[:unit_type] }
145
- end
146
- else
147
- added << { identifier: identifier, unit_type: data_b[:unit_type] }
148
- end
149
- end
150
-
151
- units_a.each do |identifier, data_a|
152
- deleted << { identifier: identifier, unit_type: data_a[:unit_type] } unless units_b.key?(identifier)
153
- end
154
-
155
- { added: added, modified: modified, deleted: deleted }
156
- end
157
-
158
- def mark_changed_entries(entries)
159
- entries.each_with_index do |entry, i|
160
- entry[:changed] = if i == entries.size - 1
161
- true
162
- else
163
- entry[:source_hash] != entries[i + 1][:source_hash]
164
- end
165
- end
166
- entries
167
- end
168
-
169
- def snapshot_path(git_sha)
170
- raise ArgumentError, "Invalid git SHA: #{git_sha}" unless git_sha.match?(/\A[0-9a-f]+\z/i)
171
-
172
- File.join(@dir, "#{git_sha}.json")
173
- end
174
-
175
- def write_snapshot(git_sha, data)
176
- File.write(snapshot_path(git_sha), JSON.pretty_generate(data))
177
- end
178
-
179
- def load_snapshot_with_units(git_sha)
180
- path = snapshot_path(git_sha)
181
- return nil unless File.exist?(path)
182
-
183
- symbolize_snapshot(JSON.parse(File.read(path)))
184
- end
185
-
186
- def load_all_summaries
187
- Dir.glob(File.join(@dir, '*.json')).filter_map do |path|
188
- data = JSON.parse(File.read(path))
189
- symbolize_snapshot(data).except(:units)
190
- rescue JSON::ParserError => e
191
- warn "[CodebaseIndex] Skipping corrupt snapshot #{File.basename(path)}: #{e.message}"
192
- nil
193
- end
194
- end
195
-
196
- def load_all_with_units
197
- Dir.glob(File.join(@dir, '*.json')).filter_map do |path|
198
- symbolize_snapshot(JSON.parse(File.read(path)))
199
- rescue JSON::ParserError => e
200
- warn "[CodebaseIndex] Skipping corrupt snapshot #{File.basename(path)}: #{e.message}"
201
- nil
202
- end
203
- end
204
-
205
- def find_latest
206
- snapshots = load_all_summaries
207
- return nil if snapshots.empty?
208
-
209
- latest = snapshots.max_by { |s| s[:extracted_at] || '' }
210
- load_snapshot_with_units(latest[:git_sha])
211
- end
212
-
213
- def symbolize_snapshot(data)
214
- {
215
- git_sha: data['git_sha'],
216
- git_branch: data['git_branch'],
217
- extracted_at: data['extracted_at'],
218
- rails_version: data['rails_version'],
219
- ruby_version: data['ruby_version'],
220
- total_units: data['total_units'],
221
- unit_counts: data['unit_counts'] || {},
222
- gemfile_lock_sha: data['gemfile_lock_sha'],
223
- schema_sha: data['schema_sha'],
224
- units_added: data['units_added'],
225
- units_modified: data['units_modified'],
226
- units_deleted: data['units_deleted'],
227
- units: symbolize_units(data['units'])
228
- }
229
- end
230
-
231
- def symbolize_units(units)
232
- return {} unless units
233
-
234
- units.transform_values do |v|
235
- {
236
- unit_type: v['unit_type'],
237
- source_hash: v['source_hash'],
238
- metadata_hash: v['metadata_hash'],
239
- dependencies_hash: v['dependencies_hash']
240
- }
241
- end
242
- end
243
- end
244
- end
245
- end