woods 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +89 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +406 -0
  7. data/exe/woods-console +59 -0
  8. data/exe/woods-console-mcp +22 -0
  9. data/exe/woods-mcp +34 -0
  10. data/exe/woods-mcp-http +37 -0
  11. data/exe/woods-mcp-start +58 -0
  12. data/lib/generators/woods/install_generator.rb +32 -0
  13. data/lib/generators/woods/pgvector_generator.rb +37 -0
  14. data/lib/generators/woods/templates/add_pgvector_to_woods.rb.erb +15 -0
  15. data/lib/generators/woods/templates/create_woods_tables.rb.erb +43 -0
  16. data/lib/tasks/woods.rake +621 -0
  17. data/lib/tasks/woods_evaluation.rake +115 -0
  18. data/lib/woods/ast/call_site_extractor.rb +106 -0
  19. data/lib/woods/ast/method_extractor.rb +71 -0
  20. data/lib/woods/ast/node.rb +116 -0
  21. data/lib/woods/ast/parser.rb +614 -0
  22. data/lib/woods/ast.rb +6 -0
  23. data/lib/woods/builder.rb +200 -0
  24. data/lib/woods/cache/cache_middleware.rb +199 -0
  25. data/lib/woods/cache/cache_store.rb +264 -0
  26. data/lib/woods/cache/redis_cache_store.rb +116 -0
  27. data/lib/woods/cache/solid_cache_store.rb +111 -0
  28. data/lib/woods/chunking/chunk.rb +84 -0
  29. data/lib/woods/chunking/semantic_chunker.rb +295 -0
  30. data/lib/woods/console/adapters/cache_adapter.rb +58 -0
  31. data/lib/woods/console/adapters/good_job_adapter.rb +33 -0
  32. data/lib/woods/console/adapters/job_adapter.rb +68 -0
  33. data/lib/woods/console/adapters/sidekiq_adapter.rb +33 -0
  34. data/lib/woods/console/adapters/solid_queue_adapter.rb +33 -0
  35. data/lib/woods/console/audit_logger.rb +75 -0
  36. data/lib/woods/console/bridge.rb +177 -0
  37. data/lib/woods/console/confirmation.rb +90 -0
  38. data/lib/woods/console/connection_manager.rb +173 -0
  39. data/lib/woods/console/console_response_renderer.rb +74 -0
  40. data/lib/woods/console/embedded_executor.rb +373 -0
  41. data/lib/woods/console/model_validator.rb +81 -0
  42. data/lib/woods/console/rack_middleware.rb +87 -0
  43. data/lib/woods/console/safe_context.rb +82 -0
  44. data/lib/woods/console/server.rb +612 -0
  45. data/lib/woods/console/sql_validator.rb +172 -0
  46. data/lib/woods/console/tools/tier1.rb +118 -0
  47. data/lib/woods/console/tools/tier2.rb +117 -0
  48. data/lib/woods/console/tools/tier3.rb +110 -0
  49. data/lib/woods/console/tools/tier4.rb +79 -0
  50. data/lib/woods/coordination/pipeline_lock.rb +109 -0
  51. data/lib/woods/cost_model/embedding_cost.rb +88 -0
  52. data/lib/woods/cost_model/estimator.rb +128 -0
  53. data/lib/woods/cost_model/provider_pricing.rb +67 -0
  54. data/lib/woods/cost_model/storage_cost.rb +52 -0
  55. data/lib/woods/cost_model.rb +22 -0
  56. data/lib/woods/db/migrations/001_create_units.rb +38 -0
  57. data/lib/woods/db/migrations/002_create_edges.rb +35 -0
  58. data/lib/woods/db/migrations/003_create_embeddings.rb +37 -0
  59. data/lib/woods/db/migrations/004_create_snapshots.rb +45 -0
  60. data/lib/woods/db/migrations/005_create_snapshot_units.rb +40 -0
  61. data/lib/woods/db/migrations/006_rename_tables.rb +34 -0
  62. data/lib/woods/db/migrator.rb +73 -0
  63. data/lib/woods/db/schema_version.rb +73 -0
  64. data/lib/woods/dependency_graph.rb +236 -0
  65. data/lib/woods/embedding/indexer.rb +140 -0
  66. data/lib/woods/embedding/openai.rb +126 -0
  67. data/lib/woods/embedding/provider.rb +162 -0
  68. data/lib/woods/embedding/text_preparer.rb +112 -0
  69. data/lib/woods/evaluation/baseline_runner.rb +115 -0
  70. data/lib/woods/evaluation/evaluator.rb +139 -0
  71. data/lib/woods/evaluation/metrics.rb +79 -0
  72. data/lib/woods/evaluation/query_set.rb +148 -0
  73. data/lib/woods/evaluation/report_generator.rb +90 -0
  74. data/lib/woods/extracted_unit.rb +145 -0
  75. data/lib/woods/extractor.rb +1028 -0
  76. data/lib/woods/extractors/action_cable_extractor.rb +201 -0
  77. data/lib/woods/extractors/ast_source_extraction.rb +46 -0
  78. data/lib/woods/extractors/behavioral_profile.rb +309 -0
  79. data/lib/woods/extractors/caching_extractor.rb +261 -0
  80. data/lib/woods/extractors/callback_analyzer.rb +246 -0
  81. data/lib/woods/extractors/concern_extractor.rb +292 -0
  82. data/lib/woods/extractors/configuration_extractor.rb +219 -0
  83. data/lib/woods/extractors/controller_extractor.rb +404 -0
  84. data/lib/woods/extractors/database_view_extractor.rb +278 -0
  85. data/lib/woods/extractors/decorator_extractor.rb +253 -0
  86. data/lib/woods/extractors/engine_extractor.rb +223 -0
  87. data/lib/woods/extractors/event_extractor.rb +211 -0
  88. data/lib/woods/extractors/factory_extractor.rb +289 -0
  89. data/lib/woods/extractors/graphql_extractor.rb +892 -0
  90. data/lib/woods/extractors/i18n_extractor.rb +117 -0
  91. data/lib/woods/extractors/job_extractor.rb +374 -0
  92. data/lib/woods/extractors/lib_extractor.rb +218 -0
  93. data/lib/woods/extractors/mailer_extractor.rb +269 -0
  94. data/lib/woods/extractors/manager_extractor.rb +188 -0
  95. data/lib/woods/extractors/middleware_extractor.rb +133 -0
  96. data/lib/woods/extractors/migration_extractor.rb +469 -0
  97. data/lib/woods/extractors/model_extractor.rb +988 -0
  98. data/lib/woods/extractors/phlex_extractor.rb +252 -0
  99. data/lib/woods/extractors/policy_extractor.rb +191 -0
  100. data/lib/woods/extractors/poro_extractor.rb +229 -0
  101. data/lib/woods/extractors/pundit_extractor.rb +223 -0
  102. data/lib/woods/extractors/rails_source_extractor.rb +473 -0
  103. data/lib/woods/extractors/rake_task_extractor.rb +343 -0
  104. data/lib/woods/extractors/route_extractor.rb +181 -0
  105. data/lib/woods/extractors/scheduled_job_extractor.rb +331 -0
  106. data/lib/woods/extractors/serializer_extractor.rb +339 -0
  107. data/lib/woods/extractors/service_extractor.rb +217 -0
  108. data/lib/woods/extractors/shared_dependency_scanner.rb +91 -0
  109. data/lib/woods/extractors/shared_utility_methods.rb +281 -0
  110. data/lib/woods/extractors/state_machine_extractor.rb +398 -0
  111. data/lib/woods/extractors/test_mapping_extractor.rb +225 -0
  112. data/lib/woods/extractors/validator_extractor.rb +211 -0
  113. data/lib/woods/extractors/view_component_extractor.rb +311 -0
  114. data/lib/woods/extractors/view_template_extractor.rb +261 -0
  115. data/lib/woods/feedback/gap_detector.rb +89 -0
  116. data/lib/woods/feedback/store.rb +119 -0
  117. data/lib/woods/filename_utils.rb +32 -0
  118. data/lib/woods/flow_analysis/operation_extractor.rb +206 -0
  119. data/lib/woods/flow_analysis/response_code_mapper.rb +154 -0
  120. data/lib/woods/flow_assembler.rb +290 -0
  121. data/lib/woods/flow_document.rb +191 -0
  122. data/lib/woods/flow_precomputer.rb +102 -0
  123. data/lib/woods/formatting/base.rb +30 -0
  124. data/lib/woods/formatting/claude_adapter.rb +98 -0
  125. data/lib/woods/formatting/generic_adapter.rb +56 -0
  126. data/lib/woods/formatting/gpt_adapter.rb +64 -0
  127. data/lib/woods/formatting/human_adapter.rb +78 -0
  128. data/lib/woods/graph_analyzer.rb +374 -0
  129. data/lib/woods/mcp/bootstrapper.rb +96 -0
  130. data/lib/woods/mcp/index_reader.rb +394 -0
  131. data/lib/woods/mcp/renderers/claude_renderer.rb +81 -0
  132. data/lib/woods/mcp/renderers/json_renderer.rb +17 -0
  133. data/lib/woods/mcp/renderers/markdown_renderer.rb +353 -0
  134. data/lib/woods/mcp/renderers/plain_renderer.rb +240 -0
  135. data/lib/woods/mcp/server.rb +962 -0
  136. data/lib/woods/mcp/tool_response_renderer.rb +85 -0
  137. data/lib/woods/model_name_cache.rb +51 -0
  138. data/lib/woods/notion/client.rb +217 -0
  139. data/lib/woods/notion/exporter.rb +219 -0
  140. data/lib/woods/notion/mapper.rb +40 -0
  141. data/lib/woods/notion/mappers/column_mapper.rb +57 -0
  142. data/lib/woods/notion/mappers/migration_mapper.rb +39 -0
  143. data/lib/woods/notion/mappers/model_mapper.rb +161 -0
  144. data/lib/woods/notion/mappers/shared.rb +22 -0
  145. data/lib/woods/notion/rate_limiter.rb +68 -0
  146. data/lib/woods/observability/health_check.rb +79 -0
  147. data/lib/woods/observability/instrumentation.rb +34 -0
  148. data/lib/woods/observability/structured_logger.rb +57 -0
  149. data/lib/woods/operator/error_escalator.rb +81 -0
  150. data/lib/woods/operator/pipeline_guard.rb +92 -0
  151. data/lib/woods/operator/status_reporter.rb +80 -0
  152. data/lib/woods/railtie.rb +38 -0
  153. data/lib/woods/resilience/circuit_breaker.rb +99 -0
  154. data/lib/woods/resilience/index_validator.rb +167 -0
  155. data/lib/woods/resilience/retryable_provider.rb +108 -0
  156. data/lib/woods/retrieval/context_assembler.rb +261 -0
  157. data/lib/woods/retrieval/query_classifier.rb +133 -0
  158. data/lib/woods/retrieval/ranker.rb +277 -0
  159. data/lib/woods/retrieval/search_executor.rb +316 -0
  160. data/lib/woods/retriever.rb +152 -0
  161. data/lib/woods/ruby_analyzer/class_analyzer.rb +170 -0
  162. data/lib/woods/ruby_analyzer/dataflow_analyzer.rb +77 -0
  163. data/lib/woods/ruby_analyzer/fqn_builder.rb +18 -0
  164. data/lib/woods/ruby_analyzer/mermaid_renderer.rb +280 -0
  165. data/lib/woods/ruby_analyzer/method_analyzer.rb +143 -0
  166. data/lib/woods/ruby_analyzer/trace_enricher.rb +143 -0
  167. data/lib/woods/ruby_analyzer.rb +87 -0
  168. data/lib/woods/session_tracer/file_store.rb +104 -0
  169. data/lib/woods/session_tracer/middleware.rb +143 -0
  170. data/lib/woods/session_tracer/redis_store.rb +106 -0
  171. data/lib/woods/session_tracer/session_flow_assembler.rb +254 -0
  172. data/lib/woods/session_tracer/session_flow_document.rb +223 -0
  173. data/lib/woods/session_tracer/solid_cache_store.rb +139 -0
  174. data/lib/woods/session_tracer/store.rb +81 -0
  175. data/lib/woods/storage/graph_store.rb +120 -0
  176. data/lib/woods/storage/metadata_store.rb +196 -0
  177. data/lib/woods/storage/pgvector.rb +195 -0
  178. data/lib/woods/storage/qdrant.rb +205 -0
  179. data/lib/woods/storage/vector_store.rb +167 -0
  180. data/lib/woods/temporal/json_snapshot_store.rb +245 -0
  181. data/lib/woods/temporal/snapshot_store.rb +345 -0
  182. data/lib/woods/token_utils.rb +19 -0
  183. data/lib/woods/version.rb +5 -0
  184. data/lib/woods.rb +246 -0
  185. metadata +270 -0
@@ -0,0 +1,167 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Woods
4
+ module Storage
5
+ # VectorStore provides an interface for storing and searching embedding vectors.
6
+ #
7
+ # All vector store adapters must include the {Interface} module and implement
8
+ # its methods. The {InMemory} adapter is provided for development and testing.
9
+ #
10
+ # @example Using the in-memory adapter
11
+ # store = Woods::Storage::VectorStore::InMemory.new
12
+ # store.store("User", [0.1, 0.2, 0.3], { type: "model" })
13
+ # results = store.search([0.1, 0.2, 0.3], limit: 5)
14
+ #
15
+ module VectorStore
16
+ # Interface that all vector store adapters must implement.
17
+ module Interface
18
+ # Store a vector with associated metadata.
19
+ #
20
+ # @param id [String] Unique identifier for the vector
21
+ # @param vector [Array<Float>] The embedding vector
22
+ # @param metadata [Hash] Optional metadata to store alongside the vector
23
+ # @raise [NotImplementedError] if not implemented by adapter
24
+ def store(id, vector, metadata = {})
25
+ raise NotImplementedError
26
+ end
27
+
28
+ # Store multiple vectors in a single batch operation.
29
+ #
30
+ # Default implementation falls back to individual store calls.
31
+ # Adapters should override for bulk-optimized behavior (e.g.,
32
+ # multi-row INSERT for pgvector, batch upsert for Qdrant).
33
+ #
34
+ # @param entries [Array<Hash>] Each entry has :id, :vector, :metadata keys
35
+ def store_batch(entries)
36
+ entries.each { |e| store(e[:id], e[:vector], e[:metadata] || {}) }
37
+ end
38
+
39
+ # Search for similar vectors using cosine similarity.
40
+ #
41
+ # @param query_vector [Array<Float>] The query embedding vector
42
+ # @param limit [Integer] Maximum number of results to return
43
+ # @param filters [Hash] Optional metadata filters to apply
44
+ # @return [Array<SearchResult>] Results sorted by descending similarity
45
+ # @raise [NotImplementedError] if not implemented by adapter
46
+ def search(query_vector, limit: 10, filters: {})
47
+ raise NotImplementedError
48
+ end
49
+
50
+ # Delete a vector by ID.
51
+ #
52
+ # @param id [String] The identifier to delete
53
+ # @raise [NotImplementedError] if not implemented by adapter
54
+ def delete(id)
55
+ raise NotImplementedError
56
+ end
57
+
58
+ # Delete vectors matching metadata filters.
59
+ #
60
+ # @param filters [Hash] Metadata key-value pairs to match
61
+ # @raise [NotImplementedError] if not implemented by adapter
62
+ def delete_by_filter(filters)
63
+ raise NotImplementedError
64
+ end
65
+
66
+ # Return the number of stored vectors.
67
+ #
68
+ # @return [Integer] Total count
69
+ # @raise [NotImplementedError] if not implemented by adapter
70
+ def count
71
+ raise NotImplementedError
72
+ end
73
+ end
74
+
75
+ # Value object representing a single search result.
76
+ SearchResult = Struct.new(:id, :score, :metadata, keyword_init: true)
77
+
78
+ # In-memory vector store using hash storage and cosine similarity.
79
+ #
80
+ # Suitable for development and testing. Not intended for production use
81
+ # with large datasets.
82
+ #
83
+ # @example
84
+ # store = InMemory.new
85
+ # store.store("doc1", [1.0, 0.0], { type: "model" })
86
+ # store.store("doc2", [0.0, 1.0], { type: "service" })
87
+ # store.search([1.0, 0.0], limit: 1)
88
+ # # => [#<SearchResult id="doc1", score=1.0, metadata={type: "model"}>]
89
+ #
90
+ class InMemory
91
+ include Interface
92
+
93
+ def initialize
94
+ @entries = {} # id => { vector:, metadata: }
95
+ end
96
+
97
+ # @see Interface#store
98
+ def store(id, vector, metadata = {})
99
+ @entries[id] = { vector: vector, metadata: metadata }
100
+ end
101
+
102
+ # @see Interface#search
103
+ def search(query_vector, limit: 10, filters: {})
104
+ candidates = filter_entries(filters)
105
+
106
+ scored = candidates.map do |id, entry|
107
+ score = cosine_similarity(query_vector, entry[:vector])
108
+ SearchResult.new(id: id, score: score, metadata: entry[:metadata])
109
+ end
110
+ scored.sort_by { |r| -r.score }.first(limit)
111
+ end
112
+
113
+ # @see Interface#delete
114
+ def delete(id)
115
+ @entries.delete(id)
116
+ end
117
+
118
+ # @see Interface#delete_by_filter
119
+ def delete_by_filter(filters)
120
+ @entries.reject! do |_id, entry|
121
+ filters.all? { |key, value| entry[:metadata][key] == value }
122
+ end
123
+ end
124
+
125
+ # @see Interface#count
126
+ def count
127
+ @entries.size
128
+ end
129
+
130
+ private
131
+
132
+ # Filter entries by metadata key-value pairs.
133
+ #
134
+ # @param filters [Hash] Metadata filters
135
+ # @return [Hash] Filtered entries
136
+ def filter_entries(filters)
137
+ return @entries if filters.empty?
138
+
139
+ @entries.select do |_id, entry|
140
+ filters.all? { |key, value| entry[:metadata][key] == value }
141
+ end
142
+ end
143
+
144
+ # Compute cosine similarity between two vectors.
145
+ #
146
+ # @param vec_a [Array<Float>] First vector
147
+ # @param vec_b [Array<Float>] Second vector
148
+ # @return [Float] Cosine similarity between -1.0 and 1.0
149
+ # @raise [ArgumentError] if vectors have different dimensions
150
+ def cosine_similarity(vec_a, vec_b)
151
+ unless vec_a.length == vec_b.length
152
+ raise ArgumentError,
153
+ "Vector dimension mismatch (#{vec_a.length} vs #{vec_b.length})"
154
+ end
155
+
156
+ dot = vec_a.zip(vec_b).sum { |x, y| x * y }
157
+ mag_a = Math.sqrt(vec_a.sum { |x| x**2 })
158
+ mag_b = Math.sqrt(vec_b.sum { |x| x**2 })
159
+
160
+ return 0.0 if mag_a.zero? || mag_b.zero?
161
+
162
+ dot / (mag_a * mag_b)
163
+ end
164
+ end
165
+ end
166
+ end
167
+ end
@@ -0,0 +1,245 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'time'
5
+ require 'digest'
6
+
7
+ module Woods
8
+ module Temporal
9
+ # JSON-file-based snapshot store for temporal tracking without SQLite.
10
+ #
11
+ # Stores snapshots as individual JSON files in a `snapshots/` subdirectory
12
+ # of the index output directory. Each file is named by git SHA and contains
13
+ # manifest metadata plus per-unit content hashes.
14
+ #
15
+ # Implements the same public interface as SnapshotStore so the MCP server
16
+ # tools work identically.
17
+ #
18
+ # @example
19
+ # store = JsonSnapshotStore.new(dir: '/app/tmp/woods')
20
+ # store.capture(manifest, unit_hashes)
21
+ # store.list # => [{ git_sha: "abc123", ... }]
22
+ # store.diff("abc123", "def456") # => { added: [...], modified: [...], deleted: [...] }
23
+ #
24
+ class JsonSnapshotStore # rubocop:disable Metrics/ClassLength
25
+ def initialize(dir:)
26
+ @dir = File.join(dir, 'snapshots')
27
+ FileUtils.mkdir_p(@dir)
28
+ end
29
+
30
+ def capture(manifest, unit_hashes)
31
+ git_sha = mget(manifest, 'git_sha')
32
+ return nil unless git_sha
33
+
34
+ previous = find_latest
35
+ snapshot = build_snapshot(manifest, git_sha, unit_hashes)
36
+
37
+ if previous
38
+ diff_result = compute_diff(previous[:units], index_units(unit_hashes))
39
+ snapshot[:units_added] = diff_result[:added].size
40
+ snapshot[:units_modified] = diff_result[:modified].size
41
+ snapshot[:units_deleted] = diff_result[:deleted].size
42
+ end
43
+
44
+ write_snapshot(git_sha, snapshot)
45
+ snapshot.except(:units)
46
+ end
47
+
48
+ def list(limit: 20, branch: nil)
49
+ snapshots = load_all_summaries
50
+ snapshots.select! { |s| s[:git_branch] == branch } if branch
51
+ snapshots.sort_by { |s| s[:extracted_at] || '' }.reverse.first(limit)
52
+ end
53
+
54
+ def find(git_sha)
55
+ path = snapshot_path(git_sha)
56
+ return nil unless File.exist?(path)
57
+
58
+ data = JSON.parse(File.read(path))
59
+ symbolize_snapshot(data).except(:units)
60
+ end
61
+
62
+ def diff(sha_a, sha_b)
63
+ snap_a = load_snapshot_with_units(sha_a)
64
+ snap_b = load_snapshot_with_units(sha_b)
65
+
66
+ return { added: [], modified: [], deleted: [] } unless snap_a && snap_b
67
+
68
+ compute_diff(snap_a[:units], snap_b[:units])
69
+ end
70
+
71
+ def unit_history(identifier, limit: 20)
72
+ snapshots = load_all_with_units
73
+ .sort_by { |s| s[:extracted_at] || '' }
74
+ .reverse
75
+ .first(limit)
76
+
77
+ entries = snapshots.filter_map do |snap|
78
+ unit = snap[:units]&.[](identifier)
79
+ next unless unit
80
+
81
+ {
82
+ git_sha: snap[:git_sha],
83
+ extracted_at: snap[:extracted_at],
84
+ git_branch: snap[:git_branch],
85
+ unit_type: unit[:unit_type],
86
+ source_hash: unit[:source_hash],
87
+ metadata_hash: unit[:metadata_hash],
88
+ dependencies_hash: unit[:dependencies_hash]
89
+ }
90
+ end
91
+
92
+ mark_changed_entries(entries)
93
+ end
94
+
95
+ private
96
+
97
+ def mget(hash, key)
98
+ hash[key] || hash[key.to_sym]
99
+ end
100
+
101
+ def build_snapshot(manifest, git_sha, unit_hashes)
102
+ {
103
+ git_sha: git_sha,
104
+ git_branch: mget(manifest, 'git_branch'),
105
+ extracted_at: mget(manifest, 'extracted_at') || Time.now.iso8601,
106
+ rails_version: mget(manifest, 'rails_version'),
107
+ ruby_version: mget(manifest, 'ruby_version'),
108
+ total_units: mget(manifest, 'total_units') || unit_hashes.size,
109
+ unit_counts: mget(manifest, 'counts') || {},
110
+ gemfile_lock_sha: mget(manifest, 'gemfile_lock_sha'),
111
+ schema_sha: mget(manifest, 'schema_sha'),
112
+ units_added: 0,
113
+ units_modified: 0,
114
+ units_deleted: 0,
115
+ units: index_units(unit_hashes)
116
+ }
117
+ end
118
+
119
+ def index_units(unit_hashes)
120
+ unit_hashes.filter_map do |uh|
121
+ id = mget(uh, 'identifier')
122
+ next if id.nil?
123
+
124
+ [id, {
125
+ unit_type: mget(uh, 'type').to_s,
126
+ source_hash: mget(uh, 'source_hash'),
127
+ metadata_hash: mget(uh, 'metadata_hash'),
128
+ dependencies_hash: mget(uh, 'dependencies_hash')
129
+ }]
130
+ end.to_h
131
+ end
132
+
133
+ def compute_diff(units_a, units_b) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
134
+ added = []
135
+ modified = []
136
+ deleted = []
137
+
138
+ units_b.each do |identifier, data_b|
139
+ if units_a.key?(identifier)
140
+ data_a = units_a[identifier]
141
+ if data_a[:source_hash] != data_b[:source_hash] ||
142
+ data_a[:metadata_hash] != data_b[:metadata_hash] ||
143
+ data_a[:dependencies_hash] != data_b[:dependencies_hash]
144
+ modified << { identifier: identifier, unit_type: data_b[:unit_type] }
145
+ end
146
+ else
147
+ added << { identifier: identifier, unit_type: data_b[:unit_type] }
148
+ end
149
+ end
150
+
151
+ units_a.each do |identifier, data_a|
152
+ deleted << { identifier: identifier, unit_type: data_a[:unit_type] } unless units_b.key?(identifier)
153
+ end
154
+
155
+ { added: added, modified: modified, deleted: deleted }
156
+ end
157
+
158
+ def mark_changed_entries(entries)
159
+ entries.each_with_index do |entry, i|
160
+ entry[:changed] = if i == entries.size - 1
161
+ true
162
+ else
163
+ entry[:source_hash] != entries[i + 1][:source_hash]
164
+ end
165
+ end
166
+ entries
167
+ end
168
+
169
+ def snapshot_path(git_sha)
170
+ raise ArgumentError, "Invalid git SHA: #{git_sha}" unless git_sha.match?(/\A[0-9a-f]+\z/i)
171
+
172
+ File.join(@dir, "#{git_sha}.json")
173
+ end
174
+
175
+ def write_snapshot(git_sha, data)
176
+ File.write(snapshot_path(git_sha), JSON.pretty_generate(data))
177
+ end
178
+
179
+ def load_snapshot_with_units(git_sha)
180
+ path = snapshot_path(git_sha)
181
+ return nil unless File.exist?(path)
182
+
183
+ symbolize_snapshot(JSON.parse(File.read(path)))
184
+ end
185
+
186
+ def load_all_summaries
187
+ Dir.glob(File.join(@dir, '*.json')).filter_map do |path|
188
+ data = JSON.parse(File.read(path))
189
+ symbolize_snapshot(data).except(:units)
190
+ rescue JSON::ParserError => e
191
+ warn "[Woods] Skipping corrupt snapshot #{File.basename(path)}: #{e.message}"
192
+ nil
193
+ end
194
+ end
195
+
196
+ def load_all_with_units
197
+ Dir.glob(File.join(@dir, '*.json')).filter_map do |path|
198
+ symbolize_snapshot(JSON.parse(File.read(path)))
199
+ rescue JSON::ParserError => e
200
+ warn "[Woods] Skipping corrupt snapshot #{File.basename(path)}: #{e.message}"
201
+ nil
202
+ end
203
+ end
204
+
205
+ def find_latest
206
+ snapshots = load_all_summaries
207
+ return nil if snapshots.empty?
208
+
209
+ latest = snapshots.max_by { |s| s[:extracted_at] || '' }
210
+ load_snapshot_with_units(latest[:git_sha])
211
+ end
212
+
213
+ def symbolize_snapshot(data)
214
+ {
215
+ git_sha: data['git_sha'],
216
+ git_branch: data['git_branch'],
217
+ extracted_at: data['extracted_at'],
218
+ rails_version: data['rails_version'],
219
+ ruby_version: data['ruby_version'],
220
+ total_units: data['total_units'],
221
+ unit_counts: data['unit_counts'] || {},
222
+ gemfile_lock_sha: data['gemfile_lock_sha'],
223
+ schema_sha: data['schema_sha'],
224
+ units_added: data['units_added'],
225
+ units_modified: data['units_modified'],
226
+ units_deleted: data['units_deleted'],
227
+ units: symbolize_units(data['units'])
228
+ }
229
+ end
230
+
231
+ def symbolize_units(units)
232
+ return {} unless units
233
+
234
+ units.transform_values do |v|
235
+ {
236
+ unit_type: v['unit_type'],
237
+ source_hash: v['source_hash'],
238
+ metadata_hash: v['metadata_hash'],
239
+ dependencies_hash: v['dependencies_hash']
240
+ }
241
+ end
242
+ end
243
+ end
244
+ end
245
+ end