event_meter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +1081 -0
  4. data/exe/event_meter +5 -0
  5. data/lib/event_meter/auto_cleanup.rb +93 -0
  6. data/lib/event_meter/cli.rb +124 -0
  7. data/lib/event_meter/configuration.rb +244 -0
  8. data/lib/event_meter/errors.rb +9 -0
  9. data/lib/event_meter/event.rb +180 -0
  10. data/lib/event_meter/event_payload.rb +103 -0
  11. data/lib/event_meter/hash_input.rb +20 -0
  12. data/lib/event_meter/index_key.rb +19 -0
  13. data/lib/event_meter/keys.rb +63 -0
  14. data/lib/event_meter/path_name.rb +37 -0
  15. data/lib/event_meter/processor.rb +305 -0
  16. data/lib/event_meter/rails.rb +79 -0
  17. data/lib/event_meter/report_definition.rb +184 -0
  18. data/lib/event_meter/reports.rb +143 -0
  19. data/lib/event_meter/rollup.rb +148 -0
  20. data/lib/event_meter/stores/cleanup_helpers.rb +76 -0
  21. data/lib/event_meter/stores/file_helpers.rb +47 -0
  22. data/lib/event_meter/stores/lock_refresher.rb +75 -0
  23. data/lib/event_meter/stores/namespace.rb +14 -0
  24. data/lib/event_meter/stores/redis_lock.rb +77 -0
  25. data/lib/event_meter/stores/rollup/active_record_postgres.rb +135 -0
  26. data/lib/event_meter/stores/rollup/file.rb +736 -0
  27. data/lib/event_meter/stores/rollup/postgres.rb +813 -0
  28. data/lib/event_meter/stores/rollup/redis.rb +349 -0
  29. data/lib/event_meter/stores/stream/file.rb +98 -0
  30. data/lib/event_meter/stores/stream/redis.rb +79 -0
  31. data/lib/event_meter/time_buckets.rb +56 -0
  32. data/lib/event_meter/version.rb +3 -0
  33. data/lib/event_meter/write_result.rb +26 -0
  34. data/lib/event_meter.rb +150 -0
  35. data/lib/generators/event_meter/install_generator.rb +57 -0
  36. data/lib/generators/event_meter/templates/create_event_meter_tables.rb.erb +12 -0
  37. data/lib/generators/event_meter/templates/event_meter.rb.erb +12 -0
  38. metadata +156 -0
@@ -0,0 +1,736 @@
1
+ require "digest"
2
+ require "fileutils"
3
+ require "json"
4
+ require "time"
5
+
6
+ require_relative "../../rollup"
7
+ require_relative "../cleanup_helpers"
8
+ require_relative "../file_helpers"
9
+ require_relative "../namespace"
10
+
11
+ module EventMeter
12
+ module Stores
13
+ module Rollup
14
+ class File
15
+ include CleanupHelpers
16
+ include FileHelpers
17
+ include Namespace
18
+
19
+ APPLIED_KEY = "_applied"
20
+ BATCHES_KEY = "batches"
21
+ PROCESSED_IDS_KEY = "processed_ids"
22
+ STREAM_FILE_KEY = "stream_file"
23
+
24
+ attr_reader :path, :namespace, :report_name, :version
25
+
26
+ def initialize(path:, namespace: nil, report_name: nil, version: nil)
27
+ @path = normalize_file_store_path(path)
28
+ @namespace = normalize_namespace(namespace) if namespace
29
+ @report_name = report_name
30
+ @version = version
31
+
32
+ FileUtils.mkdir_p(rollup_path) if scoped?
33
+ end
34
+
35
+ def for_namespace(namespace)
36
+ namespace = normalize_namespace(namespace)
37
+ return self if self.namespace == namespace
38
+
39
+ if self.namespace
40
+ raise ConfigurationError, "file rollup storage namespace #{self.namespace.inspect} does not match #{namespace.inspect}"
41
+ end
42
+
43
+ @namespace = namespace
44
+ FileUtils.mkdir_p(rollup_path) if scoped?
45
+ self
46
+ end
47
+
48
+ def for_report(name:, version:)
49
+ self.class.new(
50
+ path: path,
51
+ namespace: namespace,
52
+ report_name: name.to_s,
53
+ version: version
54
+ )
55
+ end
56
+
57
+ def ensure_definition(definition)
58
+ update_json_file(definition_path, {}) do |stored|
59
+ if stored.empty?
60
+ stored.merge!(definition.to_h)
61
+ else
62
+ ensure_same_definition!(stored, definition)
63
+ end
64
+ end
65
+ end
66
+
67
+ def report_definition(name:, version:)
68
+ return nil unless scoped_for?(name, version)
69
+
70
+ hash_value(read_json_file(definition_path))
71
+ end
72
+
73
+ def processed_ids(ids)
74
+ ensure_scoped!
75
+
76
+ ids.select do |id|
77
+ processed_sidecar_for(id).processed?(id)
78
+ end
79
+ end
80
+
81
+ def apply(batch)
82
+ ensure_scoped!
83
+ return if batch.empty?
84
+
85
+ batch_id = transaction_id(batch.entry_ids)
86
+ applied_paths = []
87
+ applied_paths.concat(apply_rollups(batch_id, batch))
88
+ applied_paths.concat(apply_string_updates(batch_id, batch))
89
+ mark_processed_entries(batch, batch_id, applied_paths.uniq)
90
+ end
91
+
92
+ def forget_processed_ids(ids)
93
+ ensure_scoped!
94
+
95
+ sidecars = ids.map { |id| processed_sidecar_for(id) }.uniq(&:path)
96
+ applied_paths_by_batch = {}
97
+
98
+ sidecars.each do |sidecar|
99
+ sidecar.batch_paths.each do |batch_id, paths|
100
+ applied_paths_by_batch[batch_id] ||= []
101
+ applied_paths_by_batch[batch_id].concat(paths)
102
+ end
103
+
104
+ sidecar.delete
105
+ end
106
+
107
+ applied_paths_by_batch.each do |batch_id, relative_paths|
108
+ forget_applied_marker(batch_id, relative_paths.uniq)
109
+ end
110
+ end
111
+
112
+ def hgetall_many(keys)
113
+ ensure_scoped!
114
+
115
+ keys.map do |key|
116
+ rollup = rollup_key_parts(key)
117
+ next {} unless rollup
118
+
119
+ data = read_json_file(rollup_bucket_path(rollup.fetch(:every), rollup.fetch(:bucket)))
120
+ hash_value(data[rollup.fetch(:index)]).dup
121
+ end
122
+ end
123
+
124
+ def keys_matching(pattern, limit: nil)
125
+ ensure_scoped!
126
+
127
+ keys = rollup_keys.select { |key| key_matches?(key, pattern) }.sort
128
+ limit ? keys.first(positive_integer(limit, "limit")) : keys
129
+ end
130
+
131
+ def get(key)
132
+ ensure_scoped!
133
+
134
+ read_json_file(shard_path("strings", key))[key]
135
+ end
136
+
137
+ def cleanup_watermark(key)
138
+ read_json_file(cleanup_state_path)[key]
139
+ end
140
+
141
+ def write_cleanup_watermark(key, value)
142
+ update_json_file(cleanup_state_path, {}) do |data|
143
+ data[key] = value.to_s
144
+ end
145
+ end
146
+
147
+ def with_lock(ttl:)
148
+ FileUtils.mkdir_p(::File.dirname(lock_path))
149
+
150
+ ::File.open(lock_path, ::File::RDWR | ::File::CREAT, 0o600) do |file|
151
+ return false unless file.flock(::File::LOCK_EX | ::File::LOCK_NB)
152
+
153
+ yield
154
+ true
155
+ ensure
156
+ file&.flock(::File::LOCK_UN)
157
+ end
158
+ end
159
+
160
+ def cleanup_history(before:, events:, interval_state:)
161
+ ensure_namespace!
162
+ return cleanup_all_report_histories(before: before, events: events, interval_state: interval_state) unless scoped?
163
+
164
+ cleanup_scoped_history(before: before, events: events, interval_state: interval_state)
165
+ end
166
+
167
+ private
168
+
169
+ def apply_rollups(batch_id, batch)
170
+ batch.rollups.group_by { |key, _rollup| rollup_bucket_file_for_key(key) }.filter_map do |file, entries|
171
+ next unless file
172
+
173
+ apply_once(file, batch_id) do |data|
174
+ entries.each do |key, rollup|
175
+ parts = rollup_key_parts(key)
176
+ next unless parts
177
+
178
+ index = parts.fetch(:index)
179
+ merged = EventMeter::Rollup.from_hash(hash_value(data[index])).merge!(rollup)
180
+ data[index] = merged.fields.transform_values(&:to_s)
181
+ end
182
+ end
183
+ end
184
+ end
185
+
186
+ def apply_string_updates(batch_id, batch)
187
+ batch.state_updates.group_by { |key, _value| shard_path("strings", key) }.map do |file, entries|
188
+ apply_once(file, batch_id) do |data|
189
+ entries.each do |key, value|
190
+ data[key] = [integer_value(data[key]), integer_value(value)].compact.max.to_s
191
+ end
192
+ end
193
+ end
194
+ end
195
+
196
+ def apply_once(file, batch_id)
197
+ update_json_file(file, {}) do |data|
198
+ applied = applied_hash(data)
199
+ next if applied.key?(batch_id)
200
+
201
+ yield data
202
+ applied[batch_id] = current_time.iso8601(6)
203
+ data[APPLIED_KEY] = applied
204
+ end
205
+
206
+ relative_path(file)
207
+ end
208
+
209
+ def mark_processed_entries(batch, batch_id, applied_paths)
210
+ timestamp = current_time.iso8601(6)
211
+
212
+ batch.entry_ids.group_by { |id| processed_sidecar_for(id) }.each do |sidecar, ids|
213
+ sidecar.mark(ids, batch_id: batch_id, applied_paths: applied_paths, timestamp: timestamp)
214
+ end
215
+ end
216
+
217
+ def forget_applied_marker(batch_id, relative_paths)
218
+ relative_paths.each do |relative_path|
219
+ file = absolute_rollup_file(relative_path)
220
+ next unless file.start_with?("#{rollup_path}/")
221
+ next unless ::File.exist?(file)
222
+
223
+ update_json_file(file, {}) do |data|
224
+ applied = applied_hash(data)
225
+ applied.delete(batch_id)
226
+
227
+ if applied.empty?
228
+ data.delete(APPLIED_KEY)
229
+ else
230
+ data[APPLIED_KEY] = applied
231
+ end
232
+ end
233
+ end
234
+ end
235
+
236
+ def cleanup_scoped_history(before:, events:, interval_state:)
237
+ filter = event_filter(events)
238
+ result = {
239
+ rollup_keys_deleted: cleanup_rollups(before, filter),
240
+ interval_state_keys_deleted: interval_state ? cleanup_interval_state(before, filter) : 0,
241
+ processed_entries_deleted: cleanup_processed_sidecars(before, filter)
242
+ }
243
+ cleanup_old_applied_markers(before)
244
+
245
+ result
246
+ end
247
+
248
+ def cleanup_all_report_histories(before:, events:, interval_state:)
249
+ definition_files.each_with_object(empty_cleanup_result) do |definition_file, total|
250
+ definition = read_json_file(definition_file)
251
+ next if definition.empty?
252
+
253
+ result = self.class.new(
254
+ path: path,
255
+ namespace: namespace,
256
+ report_name: definition.fetch("name"),
257
+ version: definition.fetch("version")
258
+ ).cleanup_history(before: before, events: events, interval_state: interval_state)
259
+
260
+ merge_cleanup_result(total, result)
261
+ rescue KeyError, ArgumentError, TypeError
262
+ total
263
+ end
264
+ end
265
+
266
+ def cleanup_rollups(before, event_filter)
267
+ return 0 if filtered_out?(event_filter)
268
+
269
+ deleted = 0
270
+
271
+ rollup_bucket_files.each do |file|
272
+ next unless rollup_file_old?(file, before)
273
+
274
+ data = read_json_file(file)
275
+ deleted += data.keys.reject { |key| metadata_key?(key) }.length
276
+ FileUtils.rm_f(file)
277
+ FileUtils.rm_f(lock_file_path(file))
278
+ end
279
+
280
+ deleted
281
+ end
282
+
283
+ def cleanup_interval_state(before, event_filter)
284
+ before_ms = (before.to_f * 1000).to_i
285
+ deleted = 0
286
+
287
+ shard_files("strings").each do |file|
288
+ update_json_file(file, {}) do |data|
289
+ data.keys.grep(/\A#{Regexp.escape(namespace)}:state:/).each do |key|
290
+ if state_key_old?(key, before_ms, event_filter, data[key])
291
+ data.delete(key)
292
+ deleted += 1
293
+ end
294
+ end
295
+ end
296
+ end
297
+
298
+ deleted
299
+ end
300
+
301
+ def cleanup_processed_sidecars(before, event_filter)
302
+ return 0 if filtered_out?(event_filter)
303
+
304
+ sidecar_files.sum do |file|
305
+ sidecar = ProcessedSidecar.new(path: file)
306
+ next 0 unless sidecar.old?(before)
307
+
308
+ count = sidecar.processed_count
309
+ sidecar.delete
310
+ count
311
+ end
312
+ end
313
+
314
+ def cleanup_old_applied_markers(before)
315
+ each_data_file do |file|
316
+ update_json_file(file, {}) do |data|
317
+ applied = applied_hash(data)
318
+ applied.delete_if { |_batch_id, timestamp| processed_entry_old?(timestamp, before) }
319
+
320
+ if applied.empty?
321
+ data.delete(APPLIED_KEY)
322
+ else
323
+ data[APPLIED_KEY] = applied
324
+ end
325
+ end
326
+ end
327
+ end
328
+
329
+ def processed_entry_old?(timestamp, before)
330
+ Time.parse(timestamp).utc < before
331
+ rescue ArgumentError, TypeError, RangeError
332
+ true
333
+ end
334
+
335
+ def rollup_keys
336
+ rollup_bucket_files.flat_map do |file|
337
+ parts = rollup_file_parts(file)
338
+ next [] unless parts
339
+
340
+ read_json_file(file).keys.filter_map do |index|
341
+ next if metadata_key?(index)
342
+
343
+ Keys.rollup(
344
+ namespace: namespace,
345
+ name: report_name,
346
+ version: version,
347
+ every: parts.fetch(:every),
348
+ bucket: parts.fetch(:time),
349
+ index: IndexStruct.new(index)
350
+ )
351
+ end
352
+ end
353
+ end
354
+
355
+ def rollup_file_old?(file, before)
356
+ parts = rollup_file_parts(file)
357
+ return false unless parts
358
+
359
+ bucket_end_time(parts.fetch(:every).to_s, parts.fetch(:bucket)) <= before
360
+ rescue ArgumentError, TypeError
361
+ false
362
+ end
363
+
364
+ def rollup_file_parts(file)
365
+ relative = relative_path(file)
366
+ match = relative.match(%r{\Ahashes/(minute|hour)/(\d+)\.json\z})
367
+ return unless match
368
+
369
+ every = match[1].to_sym
370
+ bucket = match[2]
371
+
372
+ {
373
+ every: every,
374
+ bucket: bucket,
375
+ time: bucket_time(every, bucket)
376
+ }
377
+ end
378
+
379
+ def bucket_time(every, bucket)
380
+ case every.to_s
381
+ when "minute"
382
+ minute_bucket_time(bucket)
383
+ when "hour"
384
+ hour_bucket_time(bucket)
385
+ else
386
+ raise ArgumentError, "unsupported rollup bucket: #{every.inspect}"
387
+ end
388
+ end
389
+
390
+ def rollup_bucket_file_for_key(key)
391
+ parts = rollup_key_parts(key)
392
+ return unless parts
393
+
394
+ rollup_bucket_path(parts.fetch(:every), parts.fetch(:bucket))
395
+ end
396
+
397
+ def rollup_key_parts(key)
398
+ prefix = "#{namespace}:rollup:"
399
+ return unless key.start_with?(prefix)
400
+
401
+ event_name, version_key, every, bucket, index = key.delete_prefix(prefix).split(":", 5)
402
+ return unless event_name == Keys.event_name(report_name)
403
+ return unless version_key == Keys.version_key(version)
404
+ return unless %w[minute hour].include?(every)
405
+ return unless bucket && index
406
+
407
+ {
408
+ every: every.to_sym,
409
+ bucket: bucket,
410
+ index: index
411
+ }
412
+ end
413
+
414
+ def update_json_file(file, default)
415
+ FileUtils.mkdir_p(::File.dirname(file))
416
+
417
+ ::File.open(lock_file_path(file), ::File::RDWR | ::File::CREAT, 0o600) do |lock|
418
+ lock.flock(::File::LOCK_EX)
419
+ data = read_json_file(file, default)
420
+ yield data
421
+ write_or_remove_json(file, data)
422
+ ensure
423
+ lock&.flock(::File::LOCK_UN)
424
+ end
425
+ end
426
+
427
+ def read_json_file(file, default = {})
428
+ return default.dup unless ::File.exist?(file)
429
+
430
+ data = JSON.parse(::File.read(file))
431
+ data.is_a?(Hash) ? data : default.dup
432
+ rescue JSON::ParserError, Errno::ENOENT, SystemCallError, IOError
433
+ default.dup
434
+ end
435
+
436
+ def write_or_remove_json(file, data)
437
+ if data.empty?
438
+ FileUtils.rm_f(file)
439
+ else
440
+ atomic_write_json(file, data)
441
+ end
442
+ end
443
+
444
+ def applied_hash(data)
445
+ hash = hash_value(data[APPLIED_KEY])
446
+ data[APPLIED_KEY] = hash
447
+ hash
448
+ end
449
+
450
+ def hash_value(value)
451
+ value.is_a?(Hash) ? value : {}
452
+ end
453
+
454
+ def integer_value(value)
455
+ Integer(value)
456
+ rescue ArgumentError, TypeError, RangeError
457
+ nil
458
+ end
459
+
460
+ def positive_integer(value, name)
461
+ integer = Integer(value)
462
+ return integer if integer.positive?
463
+
464
+ raise ArgumentError, "#{name} must be positive"
465
+ rescue ArgumentError, TypeError, RangeError
466
+ raise ArgumentError, "#{name} must be positive"
467
+ end
468
+
469
+ def transaction_id(entry_ids)
470
+ Digest::SHA256.hexdigest(entry_ids.map(&:to_s).sort.join("\n"))
471
+ end
472
+
473
+ def processed_sidecar_for(entry_id)
474
+ ProcessedSidecar.new(path: processed_sidecar_path(stream_file_name_for(entry_id)))
475
+ end
476
+
477
+ def stream_file_name_for(entry_id)
478
+ value = entry_id.to_s
479
+ separator = value.index(":")
480
+ return value[0...separator] if separator&.positive?
481
+
482
+ "entries-#{Digest::SHA256.hexdigest(value)[0, 16]}"
483
+ end
484
+
485
+ def safe_file_name(value)
486
+ value = value.to_s
487
+ return value if value.match?(/\A[a-zA-Z0-9._-]+\z/)
488
+
489
+ IndexKey.escape(value)
490
+ end
491
+
492
+ def processed_sidecar_path(stream_file_name)
493
+ ::File.join(processed_path, "#{safe_file_name(stream_file_name)}.processed.json")
494
+ end
495
+
496
+ def shard_path(section, key)
497
+ ::File.join(rollup_path, section, "shards", "#{shard_id(key)}.json")
498
+ end
499
+
500
+ def shard_id(key)
501
+ Digest::SHA256.hexdigest(key.to_s)[0, 2]
502
+ end
503
+
504
+ def rollup_bucket_path(every, bucket)
505
+ ::File.join(rollup_path, "hashes", every.to_s, "#{bucket}.json")
506
+ end
507
+
508
+ def rollup_bucket_files
509
+ Dir.glob(::File.join(rollup_path, "hashes", "*", "*.json")).sort
510
+ end
511
+
512
+ def shard_files(section)
513
+ Dir.glob(::File.join(rollup_path, section, "shards", "*.json")).sort
514
+ end
515
+
516
+ def sidecar_files
517
+ Dir.glob(::File.join(processed_path, "*.processed.json")).sort
518
+ end
519
+
520
+ def each_data_file(&block)
521
+ (rollup_bucket_files + shard_files("strings")).each(&block)
522
+ end
523
+
524
+ def definition_files
525
+ Dir.glob(::File.join(path, "rollups", "*", "*", "v*", "definition.json")).sort
526
+ end
527
+
528
+ def filtered_out?(event_filter)
529
+ event_filter && !event_filter.include?(Keys.event_name(report_name))
530
+ end
531
+
532
+ def metadata_key?(key)
533
+ key.to_s.start_with?("_")
534
+ end
535
+
536
+ def current_time
537
+ Time.now.utc
538
+ end
539
+
540
+ def lock_file_path(file)
541
+ "#{file}.lock"
542
+ end
543
+
544
+ def cleanup_state_path
545
+ @cleanup_state_path ||= ::File.join(path, "cleanup.json")
546
+ end
547
+
548
+ def relative_path(file)
549
+ file.delete_prefix("#{rollup_path}/")
550
+ end
551
+
552
+ def absolute_rollup_file(relative_path)
553
+ ::File.expand_path(::File.join(rollup_path, relative_path.to_s))
554
+ end
555
+
556
+ def empty_cleanup_result
557
+ {
558
+ rollup_keys_deleted: 0,
559
+ interval_state_keys_deleted: 0,
560
+ processed_entries_deleted: 0
561
+ }
562
+ end
563
+
564
+ def merge_cleanup_result(total, result)
565
+ total.each_key do |key|
566
+ total[key] += result.fetch(key, 0)
567
+ end
568
+
569
+ total
570
+ end
571
+
572
+ def ensure_same_definition!(stored, definition)
573
+ stored_definition = ReportDefinition.from_h(stored)
574
+ return if stored_definition.fingerprint == definition.fingerprint
575
+
576
+ raise DefinitionChangedError, "#{definition.name} v#{definition.version} changed; bump version"
577
+ end
578
+
579
+ def key_matches?(key, pattern)
580
+ prefix = "#{namespace}:"
581
+ return ::File.fnmatch?(pattern, key) unless pattern.start_with?(prefix)
582
+ return false unless key.start_with?(prefix)
583
+
584
+ ::File.fnmatch?(pattern.delete_prefix(prefix), key.delete_prefix(prefix))
585
+ end
586
+
587
+ def rollup_path
588
+ @rollup_path ||= ::File.join(
589
+ path,
590
+ "rollups",
591
+ PathName.event(namespace),
592
+ PathName.event(report_name),
593
+ PathName.version(version)
594
+ )
595
+ end
596
+
597
+ def processed_path
598
+ @processed_path ||= ::File.join(rollup_path, "processed")
599
+ end
600
+
601
+ def scoped?
602
+ namespace && report_name && version
603
+ end
604
+
605
+ def scoped_for?(name, version)
606
+ scoped? && report_name == name.to_s && self.version.to_i == version.to_i
607
+ end
608
+
609
+ def ensure_scoped!
610
+ ensure_namespace!
611
+ return if scoped?
612
+
613
+ raise ConfigurationError, "file rollup storage must be scoped with for_report"
614
+ end
615
+
616
+ def ensure_namespace!
617
+ return if namespace
618
+
619
+ raise ConfigurationError, "file rollup storage must be configured with namespace"
620
+ end
621
+
622
+ def definition_path
623
+ @definition_path ||= ::File.join(rollup_path, "definition.json")
624
+ end
625
+
626
+ def lock_path
627
+ @lock_path ||= if scoped?
628
+ ::File.join(rollup_path, "process.lock")
629
+ else
630
+ ::File.join(path, "process.lock")
631
+ end
632
+ end
633
+
634
+ IndexStruct = Struct.new(:key)
635
+
636
+ class ProcessedSidecar
637
+ include FileHelpers
638
+
639
+ attr_reader :path
640
+
641
+ def initialize(path:)
642
+ @path = path
643
+ end
644
+
645
+ def processed?(entry_id)
646
+ hash_value(read[PROCESSED_IDS_KEY]).key?(entry_id.to_s)
647
+ end
648
+
649
+ def mark(entry_ids, batch_id:, applied_paths:, timestamp:)
650
+ update do |data|
651
+ data[STREAM_FILE_KEY] ||= stream_file
652
+ processed = hash_value(data[PROCESSED_IDS_KEY])
653
+ entry_ids.each { |id| processed[id.to_s] = timestamp }
654
+ data[PROCESSED_IDS_KEY] = processed
655
+
656
+ batches = hash_value(data[BATCHES_KEY])
657
+ batches[batch_id] = {
658
+ "processed_at" => timestamp,
659
+ "applied_paths" => applied_paths
660
+ }
661
+ data[BATCHES_KEY] = batches
662
+ end
663
+ end
664
+
665
+ def batch_paths
666
+ hash_value(read[BATCHES_KEY]).transform_values do |batch|
667
+ Array(hash_value(batch)["applied_paths"]).map(&:to_s)
668
+ end
669
+ end
670
+
671
+ def processed_count
672
+ hash_value(read[PROCESSED_IDS_KEY]).length
673
+ end
674
+
675
+ def old?(before)
676
+ timestamps = hash_value(read[PROCESSED_IDS_KEY]).values
677
+ return false if timestamps.empty?
678
+
679
+ timestamps.all? { |timestamp| processed_entry_old?(timestamp, before) }
680
+ end
681
+
682
+ def delete
683
+ ::FileUtils.rm_f(path)
684
+ ::FileUtils.rm_f(lock_path)
685
+ end
686
+
687
+ private
688
+
689
+ def update
690
+ ::FileUtils.mkdir_p(::File.dirname(path))
691
+
692
+ ::File.open(lock_path, ::File::RDWR | ::File::CREAT, 0o600) do |lock|
693
+ lock.flock(::File::LOCK_EX)
694
+ data = read
695
+ yield data
696
+ write(data)
697
+ ensure
698
+ lock&.flock(::File::LOCK_UN)
699
+ end
700
+ end
701
+
702
+ def read
703
+ return {} unless ::File.exist?(path)
704
+
705
+ data = ::JSON.parse(::File.read(path))
706
+ data.is_a?(Hash) ? data : {}
707
+ rescue ::JSON::ParserError, Errno::ENOENT, SystemCallError, IOError
708
+ {}
709
+ end
710
+
711
+ def write(data)
712
+ atomic_write_json(path, data)
713
+ end
714
+
715
+ def hash_value(value)
716
+ value.is_a?(Hash) ? value : {}
717
+ end
718
+
719
+ def stream_file
720
+ ::File.basename(path).delete_suffix(".processed.json")
721
+ end
722
+
723
+ def lock_path
724
+ "#{path}.lock"
725
+ end
726
+
727
+ def processed_entry_old?(timestamp, before)
728
+ ::Time.parse(timestamp).utc < before
729
+ rescue ArgumentError, TypeError, RangeError
730
+ true
731
+ end
732
+ end
733
+ end
734
+ end
735
+ end
736
+ end