rspec-tracer 1.2.2 → 2.0.0.pre.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +197 -45
  3. data/README.md +439 -429
  4. data/bin/rspec-tracer +15 -0
  5. data/lib/rspec_tracer/cache/Rakefile +43 -0
  6. data/lib/rspec_tracer/cli/cache_clear.rb +98 -0
  7. data/lib/rspec_tracer/cli/cache_info.rb +103 -0
  8. data/lib/rspec_tracer/cli/doctor.rb +275 -0
  9. data/lib/rspec_tracer/cli/explain.rb +148 -0
  10. data/lib/rspec_tracer/cli/report_open.rb +82 -0
  11. data/lib/rspec_tracer/cli.rb +116 -0
  12. data/lib/rspec_tracer/configuration.rb +1100 -3
  13. data/lib/rspec_tracer/engine.rb +1076 -0
  14. data/lib/rspec_tracer/example.rb +21 -6
  15. data/lib/rspec_tracer/filter.rb +35 -0
  16. data/lib/rspec_tracer/line_stub.rb +61 -0
  17. data/lib/rspec_tracer/load_config.rb +2 -2
  18. data/lib/rspec_tracer/logger.rb +15 -0
  19. data/lib/rspec_tracer/rails/README.md +78 -0
  20. data/lib/rspec_tracer/rails/i18n_tracking.rb +137 -0
  21. data/lib/rspec_tracer/rails/notifications.rb +263 -0
  22. data/lib/rspec_tracer/rails/preset.rb +94 -0
  23. data/lib/rspec_tracer/rails/railtie.rb +22 -0
  24. data/lib/rspec_tracer/rails.rb +15 -0
  25. data/lib/rspec_tracer/remote_cache/README.md +140 -0
  26. data/lib/rspec_tracer/remote_cache/Rakefile +35 -11
  27. data/lib/rspec_tracer/remote_cache/archive.rb +137 -0
  28. data/lib/rspec_tracer/remote_cache/backend.rb +73 -0
  29. data/lib/rspec_tracer/remote_cache/git_ancestry.rb +241 -0
  30. data/lib/rspec_tracer/remote_cache/local_fs_backend.rb +439 -0
  31. data/lib/rspec_tracer/remote_cache/redis_backend.rb +554 -0
  32. data/lib/rspec_tracer/remote_cache/s3_backend.rb +712 -0
  33. data/lib/rspec_tracer/remote_cache/user_tasks.rb +397 -0
  34. data/lib/rspec_tracer/remote_cache/validator.rb +40 -62
  35. data/lib/rspec_tracer/remote_cache.rb +22 -0
  36. data/lib/rspec_tracer/reporters/README.md +103 -0
  37. data/lib/rspec_tracer/reporters/base.rb +87 -0
  38. data/lib/rspec_tracer/reporters/coverage_json_reporter.rb +338 -0
  39. data/lib/rspec_tracer/reporters/html/.gitignore +19 -0
  40. data/lib/rspec_tracer/reporters/html/.prettierignore +4 -0
  41. data/lib/rspec_tracer/reporters/html/.prettierrc.json +9 -0
  42. data/lib/rspec_tracer/reporters/html/README.md +80 -0
  43. data/lib/rspec_tracer/reporters/html/dist/assets/index.css +2 -0
  44. data/lib/rspec_tracer/reporters/html/dist/assets/index.js +1 -0
  45. data/lib/rspec_tracer/reporters/html/dist/index.html +24 -0
  46. data/lib/rspec_tracer/reporters/html/eslint.config.js +62 -0
  47. data/lib/rspec_tracer/reporters/html/package-lock.json +4941 -0
  48. data/lib/rspec_tracer/reporters/html/package.json +29 -0
  49. data/lib/rspec_tracer/reporters/html/src/app.jsx +130 -0
  50. data/lib/rspec_tracer/reporters/html/src/components/AllExamples.jsx +86 -0
  51. data/lib/rspec_tracer/reporters/html/src/components/DuplicateExamples.jsx +68 -0
  52. data/lib/rspec_tracer/reporters/html/src/components/ExamplesDependency.jsx +78 -0
  53. data/lib/rspec_tracer/reporters/html/src/components/FilesDependency.jsx +72 -0
  54. data/lib/rspec_tracer/reporters/html/src/components/FlakyExamples.jsx +42 -0
  55. data/lib/rspec_tracer/reporters/html/src/components/ReportTable.jsx +131 -0
  56. data/lib/rspec_tracer/reporters/html/src/components/SearchBar.jsx +19 -0
  57. data/lib/rspec_tracer/reporters/html/src/index.html +23 -0
  58. data/lib/rspec_tracer/reporters/html/src/main.jsx +37 -0
  59. data/lib/rspec_tracer/reporters/html/src/styles.css +434 -0
  60. data/lib/rspec_tracer/reporters/html/vite.config.js +42 -0
  61. data/lib/rspec_tracer/reporters/html_reporter.rb +266 -0
  62. data/lib/rspec_tracer/reporters/json_reporter.rb +88 -0
  63. data/lib/rspec_tracer/reporters/payload_builder.rb +235 -0
  64. data/lib/rspec_tracer/reporters/registry.rb +120 -0
  65. data/lib/rspec_tracer/reporters/terminal_reporter.rb +264 -0
  66. data/lib/rspec_tracer/rspec/README.md +73 -0
  67. data/lib/rspec_tracer/rspec/installation.rb +97 -0
  68. data/lib/rspec_tracer/rspec/metadata.rb +96 -0
  69. data/lib/rspec_tracer/rspec/parallel_tests.rb +459 -0
  70. data/lib/rspec_tracer/rspec/reporter_hook.rb +84 -0
  71. data/lib/rspec_tracer/rspec/runner_hook.rb +178 -0
  72. data/lib/rspec_tracer/source_file.rb +24 -7
  73. data/lib/rspec_tracer/storage/README.md +35 -0
  74. data/lib/rspec_tracer/storage/backend.rb +68 -0
  75. data/lib/rspec_tracer/storage/json_backend.rb +866 -0
  76. data/lib/rspec_tracer/storage/lazy_snapshot.rb +65 -0
  77. data/lib/rspec_tracer/storage/schema.rb +43 -0
  78. data/lib/rspec_tracer/storage/serializer/json.rb +41 -0
  79. data/lib/rspec_tracer/storage/serializer/msgpack.rb +90 -0
  80. data/lib/rspec_tracer/storage/snapshot.rb +127 -0
  81. data/lib/rspec_tracer/storage/sqlite_backend.rb +686 -0
  82. data/lib/rspec_tracer/time_formatter.rb +37 -18
  83. data/lib/rspec_tracer/tracker/README.md +36 -0
  84. data/lib/rspec_tracer/tracker/coverage_adapter.rb +174 -0
  85. data/lib/rspec_tracer/tracker/declared_globs.rb +100 -0
  86. data/lib/rspec_tracer/tracker/dependency_graph.rb +134 -0
  87. data/lib/rspec_tracer/tracker/env_matcher.rb +127 -0
  88. data/lib/rspec_tracer/tracker/env_snapshot.rb +77 -0
  89. data/lib/rspec_tracer/tracker/example_registry.rb +153 -0
  90. data/lib/rspec_tracer/tracker/file_digest.rb +61 -0
  91. data/lib/rspec_tracer/tracker/filter.rb +127 -0
  92. data/lib/rspec_tracer/tracker/input.rb +99 -0
  93. data/lib/rspec_tracer/tracker/io_hooks/file.rb +55 -0
  94. data/lib/rspec_tracer/tracker/io_hooks/io.rb +24 -0
  95. data/lib/rspec_tracer/tracker/io_hooks/json.rb +23 -0
  96. data/lib/rspec_tracer/tracker/io_hooks/kernel.rb +26 -0
  97. data/lib/rspec_tracer/tracker/io_hooks/yaml.rb +38 -0
  98. data/lib/rspec_tracer/tracker/io_hooks.rb +195 -0
  99. data/lib/rspec_tracer/tracker/loaded_files_tracker.rb +295 -0
  100. data/lib/rspec_tracer/tracker/new_file_detector.rb +62 -0
  101. data/lib/rspec_tracer/tracker/whole_suite_invalidators.rb +96 -0
  102. data/lib/rspec_tracer/version.rb +4 -1
  103. data/lib/rspec_tracer.rb +232 -381
  104. metadata +93 -43
  105. data/lib/rspec_tracer/cache.rb +0 -207
  106. data/lib/rspec_tracer/coverage_merger.rb +0 -42
  107. data/lib/rspec_tracer/coverage_reporter.rb +0 -187
  108. data/lib/rspec_tracer/coverage_writer.rb +0 -58
  109. data/lib/rspec_tracer/html_reporter/Rakefile +0 -18
  110. data/lib/rspec_tracer/html_reporter/assets/javascripts/application.js +0 -56
  111. data/lib/rspec_tracer/html_reporter/assets/javascripts/libraries/jquery.js +0 -10881
  112. data/lib/rspec_tracer/html_reporter/assets/javascripts/plugins/datatables.js +0 -15381
  113. data/lib/rspec_tracer/html_reporter/assets/stylesheets/application.css +0 -196
  114. data/lib/rspec_tracer/html_reporter/assets/stylesheets/plugins/datatables.css +0 -459
  115. data/lib/rspec_tracer/html_reporter/assets/stylesheets/plugins/jquery-ui.css +0 -436
  116. data/lib/rspec_tracer/html_reporter/assets/stylesheets/print.css +0 -92
  117. data/lib/rspec_tracer/html_reporter/assets/stylesheets/reset.css +0 -265
  118. data/lib/rspec_tracer/html_reporter/public/application.css +0 -5
  119. data/lib/rspec_tracer/html_reporter/public/application.js +0 -6
  120. data/lib/rspec_tracer/html_reporter/public/datatables/images/sort_asc.png +0 -0
  121. data/lib/rspec_tracer/html_reporter/public/datatables/images/sort_asc_disabled.png +0 -0
  122. data/lib/rspec_tracer/html_reporter/public/datatables/images/sort_both.png +0 -0
  123. data/lib/rspec_tracer/html_reporter/public/datatables/images/sort_desc.png +0 -0
  124. data/lib/rspec_tracer/html_reporter/public/datatables/images/sort_desc_disabled.png +0 -0
  125. data/lib/rspec_tracer/html_reporter/public/favicon.png +0 -0
  126. data/lib/rspec_tracer/html_reporter/public/loading.gif +0 -0
  127. data/lib/rspec_tracer/html_reporter/reporter.rb +0 -242
  128. data/lib/rspec_tracer/html_reporter/views/duplicate_examples.erb +0 -34
  129. data/lib/rspec_tracer/html_reporter/views/examples.erb +0 -58
  130. data/lib/rspec_tracer/html_reporter/views/examples_dependency.erb +0 -36
  131. data/lib/rspec_tracer/html_reporter/views/files_dependency.erb +0 -36
  132. data/lib/rspec_tracer/html_reporter/views/flaky_examples.erb +0 -38
  133. data/lib/rspec_tracer/html_reporter/views/layout.erb +0 -38
  134. data/lib/rspec_tracer/remote_cache/aws.rb +0 -176
  135. data/lib/rspec_tracer/remote_cache/cache.rb +0 -75
  136. data/lib/rspec_tracer/remote_cache/repo.rb +0 -210
  137. data/lib/rspec_tracer/report_generator.rb +0 -158
  138. data/lib/rspec_tracer/report_merger.rb +0 -68
  139. data/lib/rspec_tracer/report_writer.rb +0 -141
  140. data/lib/rspec_tracer/reporter.rb +0 -204
  141. data/lib/rspec_tracer/rspec_reporter.rb +0 -41
  142. data/lib/rspec_tracer/rspec_runner.rb +0 -56
  143. data/lib/rspec_tracer/ruby_coverage.rb +0 -9
  144. data/lib/rspec_tracer/runner.rb +0 -278
@@ -0,0 +1,866 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest/md5'
4
+ require 'fileutils'
5
+ require 'json'
6
+ require 'set'
7
+ require 'time' # Time#iso8601 for last_run.json timestamp
8
+
9
+ require_relative 'backend'
10
+ require_relative 'lazy_snapshot'
11
+ require_relative 'schema'
12
+ require_relative 'serializer/json'
13
+ require_relative 'serializer/msgpack'
14
+ require_relative 'snapshot'
15
+
16
+ module RSpecTracer
17
+ # Internal Storage — see {RSpecTracer} for the user-facing surface.
18
+ # @api private
19
+ module Storage
20
+ # JSON-on-disk storage backend. 1.x shipped this layout without a
21
+ # formal contract; 2.0 treats the FILENAMES list below as the
22
+ # authoritative user-facing surface.
23
+ #
24
+ # External tooling (CI cache keys, debug scripts, report
25
+ # renderers) may reference these exact filenames, so additions or
26
+ # removals are breaking changes. The shared-examples contract in
27
+ # `spec/contracts/storage_backend.rb` enforces the list.
28
+ #
29
+ # Commit point: `last_run.json` is written last via tmp + rename.
30
+ # If any of the 11 per-run files fails to write, `last_run.json`
31
+ # stays pointed at the previous successful run and the partially-
32
+ # written run-id directory is orphaned (harmless; `clear!` reaps
33
+ # it). Readers that see `last_run.json` therefore see a complete
34
+ # snapshot.
35
+ #
36
+ # Concurrency: an exclusive flock on a sentinel file
37
+ # (`.rspec_tracer.lock` under cache_path) serializes writers.
38
+ # Readers do not take the lock - `last_run.json`'s atomic rename
39
+ # is their consistency model.
40
+ #
41
+ # Corruption policy: `load_graph` never raises. Missing files,
42
+ # malformed JSON, wrong schema, binary-garbage input all yield
43
+ # `nil` + an info log. This is the invariant the fuzz spec
44
+ # asserts across 1000 iterations.
45
+ #
46
+ # Encoding: every read and write passes `encoding: 'UTF-8'`.
47
+ # Fixes the `Encoding::InvalidByteSequenceError` that bit the
48
+ # dogfood path when an example title contained a non-ASCII byte on
49
+ # a US-ASCII-defaulted filesystem.
50
+ # rubocop:disable Metrics/ClassLength
51
+ class JsonBackend
52
+ # On-disk filenames under the default `:json` serializer. This
53
+ # is the user-facing surface documented in
54
+ # USER_FACING_SURFACE.md section 6 - external tooling that walks
55
+ # `rspec_tracer_cache/` relies on exactly these names.
56
+ # The `:msgpack` serializer substitutes `.msgpack.gz` for the
57
+ # `.json` suffix (one file per field on disk); the file stems
58
+ # and per-field semantics do not change.
59
+ # boot_set.json lands at the end of the list - additive w.r.t.
60
+ # 1.x and v2 readers that walked this enumeration. It carries
61
+ # the project's transitive boot-load set (schema_version 3).
62
+ # wsi_snapshot.json persists the WholeSuiteInvalidators
63
+ # digest_snapshot so warm runs can tell whether Gemfile.lock /
64
+ # .ruby-version / .rspec-tracer / tracer-gem identity changed
65
+ # since the previous run. Without it, warm runs always saw a
66
+ # nil previous and treated every run as a cold first run.
67
+ # Missing file deserializes to `{}` so older caches still load -
68
+ # the fallback path fires one full re-run (safe).
69
+ # env_snapshot.json persists the `Tracker::EnvSnapshot` digest
70
+ # map for env-var values the per-example `tracks: { env: ... }`
71
+ # DSL declares. Same missing-coerces-to-`{}` fallback as
72
+ # wsi_snapshot - no schema bump.
73
+ # env_dependency.json persists the per-example tracked-env
74
+ # attribution map that reporters need for the Examples Dependency
75
+ # report. Missing file coerces to `{}`; older caches load
76
+ # without a cold re-run.
77
+ FILENAMES = %w[
78
+ all_examples.json
79
+ duplicate_examples.json
80
+ interrupted_examples.json
81
+ flaky_examples.json
82
+ failed_examples.json
83
+ pending_examples.json
84
+ skipped_examples.json
85
+ all_files.json
86
+ dependency.json
87
+ reverse_dependency.json
88
+ examples_coverage.json
89
+ boot_set.json
90
+ wsi_snapshot.json
91
+ env_snapshot.json
92
+ env_dependency.json
93
+ cache_hit_reason.json
94
+ ].freeze
95
+
96
+ # Internal constant.
97
+ # @api private
98
+ LAST_RUN_FILENAME = 'last_run.json'
99
+ # Internal constant.
100
+ # @api private
101
+ LOCK_FILENAME = '.rspec_tracer.lock'
102
+ # Internal constant.
103
+ # @api private
104
+ ENCODING = 'UTF-8'
105
+
106
+ # Known snapshot field symbols. Derived directly from FIELD_KINDS
107
+ # below (the write-side and read-side shape tables both enumerate
108
+ # the same set, so a divergence would already blow up write
109
+ # paths). Kept as an Array of Symbol so `#read_field` can dispatch
110
+ # without constructing a per-serializer filename table; the
111
+ # filename is computed as "#{field}.#{@serializer.extension}".
112
+ FIELD_NAMES = %i[
113
+ all_examples
114
+ duplicate_examples
115
+ interrupted_examples
116
+ flaky_examples
117
+ failed_examples
118
+ pending_examples
119
+ skipped_examples
120
+ all_files
121
+ dependency
122
+ reverse_dependency
123
+ examples_coverage
124
+ boot_set
125
+ wsi_snapshot
126
+ env_snapshot
127
+ env_dependency
128
+ cache_hit_reason
129
+ ].freeze
130
+
131
+ # Binds a backend + run directory so `LazySnapshot` readers
132
+ # call exactly one public entry point (`backend.read_field`).
133
+ # Keeping this as a nested class (not a Proc) so mutant can
134
+ # introspect the reader contract.
135
+ class FieldReader
136
+ # Internal method on the tracer pipeline.
137
+ # @api private
138
+ def initialize(backend:, dir:)
139
+ @backend = backend
140
+ @dir = dir
141
+ end
142
+
143
+ # Internal method on the tracer pipeline.
144
+ # @api private
145
+ def read(field)
146
+ @backend.read_field(@dir, field)
147
+ end
148
+ end
149
+
150
+ # Write-side field groups. Each group dispatches to one
151
+ # serializer (Hash pass-through, Set->sorted Array, or the
152
+ # Hash[id => Set<path>] -> Hash[id => Array<path>] flavor
153
+ # shared by dependency + reverse_dependency). Kept data-driven
154
+ # so a schema_version bump adds one entry instead of a new
155
+ # branch. Read-side uses FIELD_KINDS below.
156
+ ID_SET_FIELDS = %w[
157
+ interrupted_examples flaky_examples failed_examples pending_examples skipped_examples
158
+ ].freeze
159
+ # Internal constant.
160
+ # @api private
161
+ HASH_FIELDS = %w[
162
+ all_examples duplicate_examples all_files examples_coverage
163
+ boot_set wsi_snapshot env_snapshot env_dependency cache_hit_reason
164
+ ].freeze
165
+ # Internal constant.
166
+ # @api private
167
+ DEPENDENCY_FIELDS = %w[dependency reverse_dependency].freeze
168
+
169
+ # Read-side field -> deserializer-kind map. Drives
170
+ # `decode_field` so the lazy reader looks up one shape
171
+ # per field instead of spelling out a case/when that
172
+ # has to stay in sync with FILENAMES. `:symbolized` =
173
+ # Hash whose inner Hash values get symbolized keys
174
+ # (1.x's all_examples / all_files convention);
175
+ # `:dupe_examples` = same but Array-of-inner-Hash;
176
+ # `:id_set` = Array on disk -> Set in memory;
177
+ # `:dependency` = Hash[id => Array] -> Hash[id => Set];
178
+ # `:plain_hash` = pass-through (examples_coverage, the
179
+ # digest maps, env_dependency).
180
+ FIELD_KINDS = {
181
+ all_examples: :symbolized,
182
+ all_files: :symbolized,
183
+ duplicate_examples: :dupe_examples,
184
+ interrupted_examples: :id_set,
185
+ flaky_examples: :id_set,
186
+ failed_examples: :id_set,
187
+ pending_examples: :id_set,
188
+ skipped_examples: :id_set,
189
+ dependency: :dependency,
190
+ reverse_dependency: :dependency,
191
+ examples_coverage: :plain_hash,
192
+ boot_set: :plain_hash,
193
+ wsi_snapshot: :plain_hash,
194
+ env_snapshot: :plain_hash,
195
+ env_dependency: :plain_hash,
196
+ cache_hit_reason: :plain_hash
197
+ }.freeze
198
+
199
+ # Internal attribute.
200
+ # @api private
201
+ attr_reader :cache_path, :serializer, :serializer_name
202
+
203
+ # rubocop:disable Metrics/ParameterLists
204
+ def initialize(cache_path:, logger: nil, retention_local_count: nil,
205
+ warn_per_file_mb: nil, warn_total_mb: nil, serializer: :json)
206
+ # rubocop:enable Metrics/ParameterLists
207
+ @cache_path = File.expand_path(cache_path)
208
+ @logger = logger
209
+ @retention_local_count = retention_local_count
210
+ @warn_per_file_mb = warn_per_file_mb
211
+ @warn_total_mb = warn_total_mb
212
+ @serializer = resolve_serializer(serializer)
213
+ @serializer_name = serializer
214
+ end
215
+
216
+ # Internal method on the tracer pipeline.
217
+ # @api private
218
+ def last_run_id
219
+ manifest = read_last_run_manifest
220
+ return nil unless manifest.is_a?(Hash)
221
+
222
+ run_id = manifest['run_id']
223
+ return nil if run_id.nil? || run_id.to_s.empty?
224
+
225
+ run_id
226
+ end
227
+
228
+ # Internal method on the tracer pipeline.
229
+ # @api private
230
+ def load_graph(schema_version:)
231
+ manifest = read_last_run_manifest
232
+ return nil unless manifest.is_a?(Hash)
233
+
234
+ stored = manifest['schema_version']
235
+ unless Schema.supported?(stored) && stored == schema_version
236
+ info("schema_version mismatch (stored=#{stored.inspect}, expected=#{schema_version}); cold run")
237
+ return nil
238
+ end
239
+
240
+ run_id = manifest['run_id']
241
+ return nil if run_id.nil? || run_id.to_s.empty?
242
+
243
+ dir = File.join(@cache_path, run_id)
244
+ return nil unless File.directory?(dir)
245
+
246
+ LazySnapshot.new(
247
+ schema_version: stored, run_id: run_id,
248
+ reader: FieldReader.new(backend: self, dir: dir)
249
+ )
250
+ rescue StandardError => e
251
+ info("failed to load cache: #{e.class}: #{e.message}; cold run")
252
+ nil
253
+ end
254
+
255
+ # Read and deserialize one per-run field. Public so
256
+ # `FieldReader` (constructed by `load_graph`) can dispatch.
257
+ # Missing file -> same default value the eager read previously
258
+ # produced (Set.new for ID-set fields, {} for hashes) -
259
+ # preserves the "malformed cache loads gracefully" contract.
260
+ #
261
+ # `deep_intern` runs before the decode so String dedup
262
+ # happens once per on-disk path / example_id regardless of
263
+ # how many times the value appears in the parsed tree.
264
+ # RAM win on large caches is the whole point of this method;
265
+ # see json_backend_spec.rb "string interning" for the
266
+ # measurable assertion.
267
+ def read_field(dir, field)
268
+ raise ArgumentError, "unknown snapshot field: #{field.inspect}" unless FIELD_KINDS.key?(field)
269
+
270
+ raw = read_run_file(dir, field_filename(field))
271
+ decode_field(field, deep_intern(raw))
272
+ end
273
+
274
+ # Per-serializer on-disk filename for a snapshot field.
275
+ # `:json` -> `all_examples.json`; `:msgpack` ->
276
+ # `all_examples.msgpack.gz`. Public so integration specs /
277
+ # reporters can resolve the expected path without reaching
278
+ # into @serializer.
279
+ def field_filename(field)
280
+ "#{field}.#{@serializer.extension}"
281
+ end
282
+
283
+ # Internal method on the tracer pipeline.
284
+ # @api private
285
+ def save_graph(snapshot, schema_version:)
286
+ raise ArgumentError, 'snapshot must not be nil' if snapshot.nil?
287
+
288
+ unless Schema.supported?(schema_version)
289
+ raise ArgumentError, "unsupported schema_version: #{schema_version.inspect}"
290
+ end
291
+
292
+ run_id = snapshot.run_id
293
+ raise ArgumentError, 'snapshot.run_id must be a non-empty string' if run_id.nil? || run_id.to_s.empty?
294
+
295
+ transactional_save do
296
+ dir = File.join(@cache_path, run_id)
297
+ FileUtils.mkdir_p(dir)
298
+ write_run_files(dir, snapshot)
299
+ write_last_run_atomic(schema_version: schema_version, run_id: run_id)
300
+ end
301
+
302
+ maybe_prune_after_save
303
+ maybe_warn_size_budget(run_id)
304
+ snapshot
305
+ end
306
+
307
+ # Retain the `keep` most-recently-modified run-id directories
308
+ # under cache_path and delete older ones. Always preserves the
309
+ # run-id that `last_run.json` points at (deleting it would make
310
+ # the next reader cold-run). Returns the count removed. Never
311
+ # raises - a prune failure is logged at warn level and treated
312
+ # as best-effort cleanup, same graceful-degradation contract
313
+ # the remote cache backends use.
314
+ #
315
+ # `keep` nil / non-positive -> no-op. Called automatically from
316
+ # `save_graph` when the backend was constructed with
317
+ # `retention_local_count:`; also exposed via `rake
318
+ # rspec_tracer:cache:gc` for one-off cleanup.
319
+ def prune_run_dirs!(keep:)
320
+ return 0 if keep.nil? || keep <= 0
321
+ return 0 unless File.directory?(@cache_path)
322
+
323
+ current = last_run_id
324
+ candidates = collect_run_dirs
325
+ return 0 if candidates.empty?
326
+
327
+ _keep, pruned = partition_dirs_to_prune(candidates, keep: keep, current: current)
328
+ pruned.each { |path| FileUtils.rm_rf(path) }
329
+ pruned.size
330
+ rescue StandardError => e
331
+ @logger&.warn("rspec-tracer cache gc: prune failed (#{e.class}: #{e.message})")
332
+ 0
333
+ end
334
+
335
+ # Internal method on the tracer pipeline.
336
+ # @api private
337
+ def transactional_save(&block)
338
+ raise ArgumentError, 'block required' unless block
339
+
340
+ FileUtils.mkdir_p(@cache_path)
341
+ File.open(lock_path, File::RDWR | File::CREAT, 0o644) do |lock|
342
+ lock.flock(File::LOCK_EX)
343
+ yield
344
+ end
345
+ end
346
+
347
+ # Internal method on the tracer pipeline.
348
+ # @api private
349
+ def clear!
350
+ return unless File.directory?(@cache_path)
351
+
352
+ FileUtils.rm_rf(@cache_path)
353
+ end
354
+
355
+ # Merge per-worker snapshots (written to `peer_cache_paths`) into
356
+ # this backend's top-level cache and persist via `save_graph`.
357
+ # Read each peer via `load_graph` so schema + corruption policy
358
+ # (missing files yield nil, malformed JSON logs + returns nil)
359
+ # flows through the same path as a normal load.
360
+ #
361
+ # No peers / every peer nil -> no-op returns nil. Partial peers
362
+ # merge what's available; graceful degradation is the entire
363
+ # point of running this at at_exit time.
364
+ #
365
+ # `schema_version` is passed through so peers saved under a
366
+ # different schema version are rejected without side effects
367
+ # (same semantics as a warm run under a mismatched cache).
368
+ def merge_from_peers(peer_cache_paths, schema_version:)
369
+ peer_snapshots = peer_cache_paths.filter_map do |path|
370
+ self.class.new(cache_path: path, logger: @logger, serializer: @serializer_name)
371
+ .load_graph(schema_version: schema_version)
372
+ end
373
+
374
+ return nil if peer_snapshots.empty?
375
+
376
+ merged = Merger.call(peer_snapshots, schema_version: schema_version)
377
+ save_graph(merged, schema_version: schema_version)
378
+ merged
379
+ end
380
+
381
+ # Stateless snapshot union. parallel_tests partitions spec files
382
+ # across workers, so example IDs are disjoint in practice - the
383
+ # merge collision rules (first-wins for metadata, sum-of-ints for
384
+ # per-line coverage) only fire on collaborating workers that
385
+ # happened to observe the same input file.
386
+ module Merger
387
+ # Internal helper for the tracer pipeline.
388
+ # @api private
389
+ def self.call(snapshots, schema_version:)
390
+ state = empty_state
391
+ snapshots.each { |s| absorb(state, s) }
392
+
393
+ state[:reverse_dependency] = reverse_of(state[:dependency])
394
+ state[:run_id] = Digest::MD5.hexdigest(state[:all_examples].keys.sort.to_json)
395
+
396
+ Snapshot.new(
397
+ schema_version: schema_version,
398
+ run_id: state[:run_id],
399
+ all_examples: state[:all_examples],
400
+ duplicate_examples: state[:duplicate_examples],
401
+ interrupted_examples: state[:interrupted_examples],
402
+ flaky_examples: state[:flaky_examples],
403
+ failed_examples: state[:failed_examples],
404
+ pending_examples: state[:pending_examples],
405
+ skipped_examples: state[:skipped_examples],
406
+ all_files: state[:all_files],
407
+ dependency: state[:dependency],
408
+ reverse_dependency: state[:reverse_dependency],
409
+ examples_coverage: state[:examples_coverage],
410
+ boot_set: state[:boot_set],
411
+ wsi_snapshot: state[:wsi_snapshot],
412
+ env_snapshot: state[:env_snapshot],
413
+ env_dependency: state[:env_dependency],
414
+ cache_hit_reason: state[:cache_hit_reason]
415
+ )
416
+ end
417
+
418
+ # Internal helper for the tracer pipeline.
419
+ # @api private
420
+ def self.empty_state
421
+ {
422
+ all_examples: {},
423
+ duplicate_examples: Hash.new { |h, k| h[k] = [] },
424
+ interrupted_examples: Set.new,
425
+ flaky_examples: Set.new,
426
+ failed_examples: Set.new,
427
+ pending_examples: Set.new,
428
+ skipped_examples: Set.new,
429
+ all_files: {},
430
+ dependency: Hash.new { |h, k| h[k] = Set.new },
431
+ examples_coverage: {},
432
+ boot_set: {},
433
+ wsi_snapshot: {},
434
+ env_snapshot: {},
435
+ env_dependency: {},
436
+ cache_hit_reason: Hash.new(0)
437
+ }
438
+ end
439
+
440
+ # Union every field from one peer snapshot into the running
441
+ # state. Each field has a distinct combine rule (merge-first-wins,
442
+ # Set#merge, concat, or summing coverage strengths), so the
443
+ # branching is inherent to the shape. Decomposing per-field would
444
+ # scatter the merge contract.
445
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
446
+ def self.absorb(state, snapshot)
447
+ state[:all_examples].merge!(snapshot.all_examples || {}) { |_, v, _| v }
448
+ (snapshot.duplicate_examples || {}).each do |id, entries|
449
+ state[:duplicate_examples][id].concat(entries)
450
+ end
451
+ state[:interrupted_examples].merge(snapshot.interrupted_examples || Set.new)
452
+ state[:flaky_examples].merge(snapshot.flaky_examples || Set.new)
453
+ state[:failed_examples].merge(snapshot.failed_examples || Set.new)
454
+ state[:pending_examples].merge(snapshot.pending_examples || Set.new)
455
+ state[:skipped_examples].merge(snapshot.skipped_examples || Set.new)
456
+ state[:all_files].merge!(snapshot.all_files || {}) { |_, v, _| v }
457
+ (snapshot.dependency || {}).each do |id, paths|
458
+ state[:dependency][id].merge(paths)
459
+ end
460
+ merge_examples_coverage!(state[:examples_coverage], snapshot.examples_coverage || {})
461
+ state[:boot_set].merge!(snapshot.boot_set || {})
462
+ state[:wsi_snapshot].merge!(snapshot.wsi_snapshot || {})
463
+ state[:env_snapshot].merge!(snapshot.env_snapshot || {})
464
+ merge_env_dependency!(state[:env_dependency], snapshot.env_dependency || {})
465
+ merge_cache_hit_reason!(state[:cache_hit_reason], snapshot.cache_hit_reason || {})
466
+ end
467
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
468
+
469
+ # Per-example env attribution unions set-wise: an example that
470
+ # declared `tracks: { env: [A, B] }` on one worker and
471
+ # `tracks: { env: [B, C] }` on another (edge case; parallel_tests
472
+ # workers rarely run the same example) collapses to [A, B, C].
473
+ def self.merge_env_dependency!(target, source)
474
+ source.each do |id, names|
475
+ existing = target[id] || []
476
+ target[id] = (existing | Array(names)).sort
477
+ end
478
+ end
479
+
480
+ # Sum per-worker reason counts. parallel_tests partitions
481
+ # examples across workers; each worker's filtered_examples
482
+ # tally is disjoint by example_id, so sum is the right combine
483
+ # rule (a "Files changed" count from worker A plus the same
484
+ # reason's count from worker B = total examples that ran for
485
+ # that reason across the suite).
486
+ def self.merge_cache_hit_reason!(target, source)
487
+ source.each { |reason, count| target[reason] += count }
488
+ end
489
+
490
+ # Internal helper for the tracer pipeline.
491
+ # @api private
492
+ def self.merge_examples_coverage!(target, source)
493
+ source.each do |id, per_file|
494
+ entry = target[id] ||= {}
495
+ per_file.each do |file_path, lines|
496
+ file_entry = entry[file_path] ||= {}
497
+ lines.each do |line_key, strength|
498
+ file_entry[line_key] = (file_entry[line_key] || 0) + (strength || 0)
499
+ end
500
+ end
501
+ end
502
+ end
503
+
504
+ # Internal helper for the tracer pipeline.
505
+ # @api private
506
+ def self.reverse_of(dependency)
507
+ reverse = Hash.new { |h, k| h[k] = Set.new }
508
+ dependency.each do |id, file_names|
509
+ file_names.each { |name| reverse[name] << id }
510
+ end
511
+ reverse
512
+ end
513
+ end
514
+
515
+ private
516
+
517
+ # Internal method on the tracer pipeline.
518
+ # @api private
519
+ def last_run_path
520
+ File.join(@cache_path, LAST_RUN_FILENAME)
521
+ end
522
+
523
+ # Internal method on the tracer pipeline.
524
+ # @api private
525
+ def lock_path
526
+ File.join(@cache_path, LOCK_FILENAME)
527
+ end
528
+
529
+ # Internal method on the tracer pipeline.
530
+ # @api private
531
+ def maybe_prune_after_save
532
+ prune_run_dirs!(keep: @retention_local_count) if @retention_local_count
533
+ end
534
+
535
+ # Internal constant.
536
+ # @api private
537
+ BYTES_PER_MB = 1_048_576
538
+ private_constant :BYTES_PER_MB
539
+
540
+ # Emit a warn-level log line for each just-saved file that
541
+ # exceeded the per-file budget, and one total-budget line when
542
+ # the whole cache tree exceeds that threshold. Both thresholds
543
+ # are MiB; 0 / nil disables. The warning suggests the most
544
+ # effective remediations in order: add_filter for vendor paths
545
+ # (usually the biggest win), transitive_load_tracking off
546
+ # (cuts the constants-blind-spot overhead), and the `:msgpack`
547
+ # serializer (PR B). Budgets surface B11 symptoms (issue #15 /
548
+ # #20) without forcing behavior change.
549
+ def maybe_warn_size_budget(run_id)
550
+ warn_oversized_run_files(run_id) if positive_threshold?(@warn_per_file_mb)
551
+ warn_oversized_cache_total if positive_threshold?(@warn_total_mb)
552
+ end
553
+
554
+ # Internal method on the tracer pipeline.
555
+ # @api private
556
+ def positive_threshold?(value)
557
+ value.is_a?(::Integer) && value.positive?
558
+ end
559
+
560
+ # Internal method on the tracer pipeline.
561
+ # @api private
562
+ def warn_oversized_run_files(run_id)
563
+ run_dir = File.join(@cache_path, run_id)
564
+ return unless File.directory?(run_dir)
565
+
566
+ threshold_bytes = @warn_per_file_mb * BYTES_PER_MB
567
+ per_file_glob(run_dir).each do |path|
568
+ size = File.size(path)
569
+ next unless size > threshold_bytes
570
+
571
+ @logger&.warn(
572
+ "rspec-tracer cache: #{File.basename(path)} is #{format_mib(size)} " \
573
+ "(> #{@warn_per_file_mb} MiB per-file threshold); remediations (in order): " \
574
+ 'add_filter for vendor paths, `transitive_load_tracking false`, ' \
575
+ '`storage_backend :json, serializer: :msgpack` for disk reduction'
576
+ )
577
+ end
578
+ end
579
+
580
+ # Internal method on the tracer pipeline.
581
+ # @api private
582
+ def warn_oversized_cache_total
583
+ total = total_cache_size_bytes
584
+ threshold_bytes = @warn_total_mb * BYTES_PER_MB
585
+ return unless total > threshold_bytes
586
+
587
+ @logger&.warn(
588
+ "rspec-tracer cache: total size is #{format_mib(total)} " \
589
+ "(> #{@warn_total_mb} MiB total threshold); remediations (in order): " \
590
+ '`cache_retention_local_count N` to cap history, ' \
591
+ 'add_filter for vendor paths, ' \
592
+ '`storage_backend :json, serializer: :msgpack` for disk reduction'
593
+ )
594
+ end
595
+
596
+ # Glob matching this backend's serializer extension. Surfaces
597
+ # the active on-disk layout so size-budget warnings stay
598
+ # accurate when the user switches to `:msgpack` (.msgpack.gz
599
+ # files instead of .json).
600
+ def per_file_glob(run_dir)
601
+ Dir[File.join(run_dir, "*.#{@serializer.extension}")]
602
+ end
603
+
604
+ # Internal method on the tracer pipeline.
605
+ # @api private
606
+ def total_cache_size_bytes
607
+ total = 0
608
+ Dir[File.join(@cache_path, '**', "*.#{@serializer.extension}")].each do |path|
609
+ total += File.size(path) if File.file?(path)
610
+ end
611
+ total
612
+ rescue StandardError
613
+ 0
614
+ end
615
+
616
+ # Internal method on the tracer pipeline.
617
+ # @api private
618
+ def format_mib(bytes)
619
+ "#{(bytes.to_f / BYTES_PER_MB).round(1)} MiB"
620
+ end
621
+
622
+ # Enumerate run-id subdirectories of cache_path, newest first
623
+ # by mtime. Non-directory children (last_run.json, lock file)
624
+ # and dotfiles are excluded so a stray `.DS_Store` or editor
625
+ # swap file doesn't confuse the prune math.
626
+ def collect_run_dirs
627
+ entries = Dir.children(@cache_path).filter_map do |name|
628
+ next if name.start_with?('.')
629
+
630
+ path = File.join(@cache_path, name)
631
+ next unless File.directory?(path)
632
+
633
+ [path, File.mtime(path).to_f]
634
+ end
635
+ entries.sort_by { |(_, mtime)| -mtime }.map(&:first)
636
+ end
637
+
638
+ # Split a newest-first list of run directories into (keep,
639
+ # prune). The live `current` run-id is always retained even if
640
+ # it fell off the top-N by mtime (defensive: an external
641
+ # `touch` on an old dir must not force deletion of the live
642
+ # one). Inputs are absolute paths; `current` is the basename
643
+ # reported by `last_run_id` (may be nil if last_run.json is
644
+ # missing or corrupt).
645
+ def partition_dirs_to_prune(candidates, keep:, current:)
646
+ keep_paths = []
647
+ prune_paths = []
648
+ candidates.each do |path|
649
+ if keep_paths.size < keep || File.basename(path) == current
650
+ keep_paths << path
651
+ else
652
+ prune_paths << path
653
+ end
654
+ end
655
+ [keep_paths, prune_paths]
656
+ end
657
+
658
+ # Internal method on the tracer pipeline.
659
+ # @api private
660
+ def read_last_run_manifest
661
+ return nil unless File.file?(last_run_path)
662
+
663
+ read_json(last_run_path)
664
+ rescue StandardError
665
+ nil
666
+ end
667
+
668
+ # last_run.json is always plain JSON regardless of serializer -
669
+ # it is the human-debuggable + CI-script-compatible pointer that
670
+ # USER_FACING_SURFACE.md section 6 locks. Helper stays narrow so the
671
+ # serializer dispatch can not accidentally reach it.
672
+ def read_json(path)
673
+ contents = File.read(path, encoding: ENCODING)
674
+ JSON.parse(contents)
675
+ end
676
+
677
+ # Internal method on the tracer pipeline.
678
+ # @api private
679
+ def write_json_atomic(path, data)
680
+ tmp_path = "#{path}.tmp.#{Process.pid}.#{rand(1_000_000)}"
681
+ File.write(tmp_path, JSON.pretty_generate(data), encoding: ENCODING)
682
+ File.rename(tmp_path, path)
683
+ ensure
684
+ File.delete(tmp_path) if tmp_path && File.file?(tmp_path)
685
+ end
686
+
687
+ # Internal method on the tracer pipeline.
688
+ # @api private
689
+ def write_run_files(dir, snapshot)
690
+ HASH_FIELDS.each { |f| write_run_field(dir, f, snapshot.send(f) || {}) }
691
+ ID_SET_FIELDS.each { |f| write_run_field(dir, f, serialize_id_set(snapshot.send(f))) }
692
+ DEPENDENCY_FIELDS.each { |f| write_run_field(dir, f, serialize_dependency(snapshot.send(f))) }
693
+ end
694
+
695
+ # Internal method on the tracer pipeline.
696
+ # @api private
697
+ def write_run_field(dir, name, payload)
698
+ write_payload_atomic(File.join(dir, field_filename(name.to_sym)), payload)
699
+ end
700
+
701
+ # Atomic write for a per-field payload. Encodes via the active
702
+ # serializer; writes binary so msgpack + zlib bytes are not
703
+ # re-encoded by Ruby's IO layer. Same tmp-rename pattern as
704
+ # write_json_atomic (last_run.json commit point).
705
+ def write_payload_atomic(path, data)
706
+ tmp_path = "#{path}.tmp.#{Process.pid}.#{rand(1_000_000)}"
707
+ File.binwrite(tmp_path, @serializer.encode(data))
708
+ File.rename(tmp_path, path)
709
+ ensure
710
+ File.delete(tmp_path) if tmp_path && File.file?(tmp_path)
711
+ end
712
+
713
+ # Internal method on the tracer pipeline.
714
+ # @api private
715
+ def write_last_run_atomic(schema_version:, run_id:)
716
+ manifest = { 'schema_version' => schema_version, 'run_id' => run_id, 'timestamp' => Time.now.utc.iso8601 }
717
+ write_json_atomic(last_run_path, manifest)
718
+ end
719
+
720
+ # Dispatch one field's raw JSON body through the right
721
+ # deserializer. Paired with FIELD_KINDS. Kept here rather
722
+ # than on the reader so all shape knowledge stays in one
723
+ # class; mutant sees one AST node per kind branch.
724
+ def decode_field(field, raw)
725
+ case FIELD_KINDS.fetch(field)
726
+ when :symbolized then deserialize_symbolized(raw)
727
+ when :dupe_examples then deserialize_dupe_examples(raw)
728
+ when :id_set then deserialize_id_set(raw)
729
+ when :dependency then deserialize_dependency(raw)
730
+ when :plain_hash then deserialize_plain_hash(raw)
731
+ end
732
+ end
733
+
734
+ # Plain-hash round-trip: JSON.parse returns nil on failure, and
735
+ # a valid-but-wrong-shape file would otherwise poison the
736
+ # Snapshot field. Treat anything non-Hash as the empty default.
737
+ def deserialize_plain_hash(raw)
738
+ raw.is_a?(Hash) ? raw : {}
739
+ end
740
+
741
+ # Walk a parsed JSON tree and replace every String with its
742
+ # frozen-string-table entry via `String#-@`. Idempotent on
743
+ # already-frozen Strings. Portable across every matrix Ruby
744
+ # (json-gem's `freeze: true` option only arrived in 2.8 /
745
+ # Ruby 3.4); the explicit walk keeps behavior identical on
746
+ # Ruby 3.1 + 3.2 cells.
747
+ #
748
+ # Big win on dependency.json where the same file path repeats
749
+ # across every example that depends on it - 2000 unique paths
750
+ # may appear 1M+ times; interning collapses that to 2000
751
+ # objects + refs. Small overhead on fields where strings are
752
+ # unique (description text in all_examples) but the RAM
753
+ # savings on the path-heavy fields dominate.
754
+ def deep_intern(obj)
755
+ case obj
756
+ when Hash
757
+ obj.each_with_object({}) { |(k, v), h| h[k.is_a?(String) ? -k : k] = deep_intern(v) }
758
+ when Array
759
+ obj.map { |v| deep_intern(v) }
760
+ when String
761
+ -obj
762
+ else
763
+ obj
764
+ end
765
+ end
766
+
767
+ # Internal method on the tracer pipeline.
768
+ # @api private
769
+ def read_run_file(dir, name)
770
+ path = File.join(dir, name)
771
+ return nil unless File.file?(path)
772
+
773
+ @serializer.decode(File.binread(path))
774
+ rescue StandardError
775
+ nil
776
+ end
777
+
778
+ # Sorted Array on disk, Set in memory - matches 1.x report_writer.
779
+ def serialize_id_set(collection)
780
+ return [] if collection.nil?
781
+
782
+ collection.to_a.sort
783
+ end
784
+
785
+ # Internal method on the tracer pipeline.
786
+ # @api private
787
+ def deserialize_id_set(raw)
788
+ return Set.new unless raw.is_a?(Array)
789
+
790
+ raw.to_set
791
+ end
792
+
793
+ # dependency / reverse_dependency: Hash[id => Set<path>] in
794
+ # memory, Hash[id => Array<path>] on disk.
795
+ def serialize_dependency(collection)
796
+ return {} if collection.nil?
797
+
798
+ collection.transform_values { |paths| Array(paths) }
799
+ end
800
+
801
+ # Internal method on the tracer pipeline.
802
+ # @api private
803
+ def deserialize_dependency(raw)
804
+ return {} unless raw.is_a?(Hash)
805
+
806
+ raw.transform_values { |paths| Array(paths).to_set }
807
+ end
808
+
809
+ # all_examples / all_files: inner hashes whose keys 1.x
810
+ # symbolizes after parse.
811
+ def deserialize_symbolized(raw)
812
+ return {} unless raw.is_a?(Hash)
813
+
814
+ raw.transform_values do |inner|
815
+ inner.is_a?(Hash) ? inner.transform_keys(&:to_sym) : inner
816
+ end
817
+ end
818
+
819
+ # duplicate_examples: Hash[id => Array<Hash>]; each inner hash's
820
+ # keys symbolized post-parse.
821
+ def deserialize_dupe_examples(raw)
822
+ return {} unless raw.is_a?(Hash)
823
+
824
+ raw.transform_values do |list|
825
+ next list unless list.is_a?(Array)
826
+
827
+ list.map { |entry| entry.is_a?(Hash) ? entry.transform_keys(&:to_sym) : entry }
828
+ end
829
+ end
830
+
831
+ # Internal method on the tracer pipeline.
832
+ # @api private
833
+ def info(message)
834
+ @logger&.info(message)
835
+ end
836
+
837
+ # Map the user-facing `:json` / `:msgpack` name onto the
838
+ # concrete Serializer class. A `:msgpack` request with the
839
+ # msgpack gem absent warns once and falls back to `:json` so
840
+ # the user's test suite keeps running (graceful-degradation
841
+ # contract; same posture as the remote-cache optional deps).
842
+ def resolve_serializer(name)
843
+ case name
844
+ when :json
845
+ Serializer::Json
846
+ when :msgpack
847
+ Serializer::Msgpack.available? ? Serializer::Msgpack : msgpack_unavailable_fallback
848
+ else
849
+ raise ArgumentError,
850
+ "unknown serializer: #{name.inspect}; allowed: [:json, :msgpack]"
851
+ end
852
+ end
853
+
854
+ # Internal method on the tracer pipeline.
855
+ # @api private
856
+ def msgpack_unavailable_fallback
857
+ @logger&.warn(
858
+ 'rspec-tracer cache: msgpack gem is not installed; falling back to :json. ' \
859
+ "Add `gem 'msgpack'` to your Gemfile to use the :msgpack serializer."
860
+ )
861
+ Serializer::Json
862
+ end
863
+ end
864
+ # rubocop:enable Metrics/ClassLength
865
+ end
866
+ end