igniter-ledger 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +481 -0
  3. data/examples/intelligent_ledger/availability_boundary_ledger.rb +1190 -0
  4. data/examples/intelligent_ledger/availability_deriver.rb +150 -0
  5. data/examples/intelligent_ledger/availability_ledger.rb +197 -0
  6. data/examples/intelligent_ledger/ledger_boundary.rb +180 -0
  7. data/examples/store_poc.rb +45 -0
  8. data/exe/igniter-ledger-server +111 -0
  9. data/exe/igniter-store-server +6 -0
  10. data/ext/igniter_store_native/Cargo.toml +28 -0
  11. data/ext/igniter_store_native/extconf.rb +6 -0
  12. data/ext/igniter_store_native/src/fact.rs +303 -0
  13. data/ext/igniter_store_native/src/fact_log.rs +180 -0
  14. data/ext/igniter_store_native/src/file_backend.rs +91 -0
  15. data/ext/igniter_store_native/src/lib.rs +55 -0
  16. data/lib/igniter/ledger.rb +7 -0
  17. data/lib/igniter/store/access_path.rb +84 -0
  18. data/lib/igniter/store/change_event.rb +65 -0
  19. data/lib/igniter/store/changefeed_buffer.rb +585 -0
  20. data/lib/igniter/store/codecs.rb +253 -0
  21. data/lib/igniter/store/contractable_receipt_sink.rb +172 -0
  22. data/lib/igniter/store/fact.rb +121 -0
  23. data/lib/igniter/store/fact_log.rb +103 -0
  24. data/lib/igniter/store/file_backend.rb +269 -0
  25. data/lib/igniter/store/http_adapter.rb +413 -0
  26. data/lib/igniter/store/igniter_store.rb +838 -0
  27. data/lib/igniter/store/mcp_adapter.rb +403 -0
  28. data/lib/igniter/store/native.rb +80 -0
  29. data/lib/igniter/store/network_backend.rb +159 -0
  30. data/lib/igniter/store/protocol/handlers/access_path_handler.rb +38 -0
  31. data/lib/igniter/store/protocol/handlers/command_handler.rb +59 -0
  32. data/lib/igniter/store/protocol/handlers/derivation_handler.rb +27 -0
  33. data/lib/igniter/store/protocol/handlers/effect_handler.rb +65 -0
  34. data/lib/igniter/store/protocol/handlers/history_handler.rb +24 -0
  35. data/lib/igniter/store/protocol/handlers/projection_handler.rb +41 -0
  36. data/lib/igniter/store/protocol/handlers/relation_handler.rb +43 -0
  37. data/lib/igniter/store/protocol/handlers/store_handler.rb +24 -0
  38. data/lib/igniter/store/protocol/handlers/subscription_handler.rb +24 -0
  39. data/lib/igniter/store/protocol/interpreter.rb +447 -0
  40. data/lib/igniter/store/protocol/receipt.rb +96 -0
  41. data/lib/igniter/store/protocol/sync_profile.rb +53 -0
  42. data/lib/igniter/store/protocol/wire_envelope.rb +214 -0
  43. data/lib/igniter/store/protocol.rb +27 -0
  44. data/lib/igniter/store/read_cache.rb +163 -0
  45. data/lib/igniter/store/schema_graph.rb +248 -0
  46. data/lib/igniter/store/segmented_file_backend.rb +699 -0
  47. data/lib/igniter/store/server_config.rb +55 -0
  48. data/lib/igniter/store/server_logger.rb +64 -0
  49. data/lib/igniter/store/server_metrics.rb +222 -0
  50. data/lib/igniter/store/store_server.rb +597 -0
  51. data/lib/igniter/store/subscription_registry.rb +73 -0
  52. data/lib/igniter/store/tbackend_adapter_descriptor.rb +307 -0
  53. data/lib/igniter/store/tcp_adapter.rb +127 -0
  54. data/lib/igniter/store/wire_protocol.rb +42 -0
  55. data/lib/igniter/store.rb +64 -0
  56. data/lib/igniter-ledger.rb +4 -0
  57. data/lib/igniter-store.rb +5 -0
  58. metadata +212 -0
@@ -0,0 +1,699 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "fileutils"
5
+ require_relative "wire_protocol"
6
+ require_relative "codecs"
7
+
8
+ module Igniter
9
+ module Store
10
+ # Partitioned, manifest-tracked WAL backend with pluggable per-store codecs.
11
+ #
12
+ # A single instance replaces FileBackend for a whole IgniterStore — facts
13
+ # from every store are written into per-store, per-time-bucket segment
14
+ # files under a shared root directory.
15
+ #
16
+ # Layout:
17
+ # {root_dir}/
18
+ # wal/
19
+ # store={name}/
20
+ # date={bucket}/
21
+ # segment-000001.wal
22
+ # segment-000001.wal.manifest.json ← written atomically on seal
23
+ # segment-000002.wal
24
+ #
25
+ # Codec selection:
26
+ #
27
+ # # All stores use the default codec (json_crc32):
28
+ # SegmentedFileBackend.new(root)
29
+ #
30
+ # # All stores use compact_delta:
31
+ # SegmentedFileBackend.new(root, codec: :compact_delta)
32
+ #
33
+ # # Per-store codec map (string or symbol keys):
34
+ # SegmentedFileBackend.new(root,
35
+ # codec: { technician_locations: :compact_delta,
36
+ # vendor_leads: :compact_delta,
37
+ # crm_records: :json_crc32 })
38
+ #
39
+ # compact_delta is recommended for high-frequency History stores (sensor
40
+ # readings, GPS tracks) and gives ~16x size reduction over json_crc32.
41
+ # It is NOT resumable after a crash — any live compact_delta segment is
42
+ # sealed on the next startup and a fresh segment is opened.
43
+ #
44
+ # Public interface is identical to FileBackend: write_fact, replay, close.
45
+ class SegmentedFileBackend
46
+ include WireProtocol
47
+
48
+ MANIFEST_SUFFIX = ".manifest.json"
49
+ PURGED_SUFFIX = ".purged.json"
50
+ QUARANTINE_SUFFIX = ".quarantine.json"
51
+ DEFAULT_MAX_BYTES = 64 * 1024 * 1024 # 64 MB
52
+ DEFAULT_CODEC = :json_crc32
53
+ SCHEMA_VERSION = 1
54
+
55
+ attr_reader :root_dir
56
+
57
+ # +root_dir+ — root data directory shared by all stores.
58
+ # +max_bytes+ — rotate segment when file reaches this size (default 64 MB).
59
+ # +time_bucket+ — :day (default), :hour, or :none.
60
+ # +codec+ — Symbol or Hash{store_name => Symbol}. See class docs.
61
+ # +retention+ — Hash{ store_name => { strategy:, duration: } }
62
+ # Strategies:
63
+ # :permanent — never purge (default when no policy set)
64
+ # :rolling_window — purge sealed segments where max_timestamp < now - duration (Float seconds)
65
+ # :ephemeral — keep only the single newest sealed segment per store
66
+ # +flush+ — durability policy applied after every +write_fact+:
67
+ # :batch — (default) flush only at BATCH_SIZE, close, or checkpoint.
68
+ # compact_delta facts < BATCH_SIZE are lost on a crash.
69
+ # :on_write — flush after every single fact (safest, smallest write window).
70
+ # { every_n: N } — flush after every N facts per store.
71
+ #
72
+ # json_crc32 writes every fact immediately regardless of this setting.
73
+ def initialize(root_dir, max_bytes: DEFAULT_MAX_BYTES, time_bucket: :day,
74
+ codec: DEFAULT_CODEC, retention: {}, flush: :batch)
75
+ @root_dir = root_dir.to_s
76
+ @max_bytes = max_bytes
77
+ @time_bucket = time_bucket
78
+ @codec_spec = codec # Symbol or Hash
79
+ @flush_policy = flush
80
+ @segments = {} # store_name (String) → segment state Hash
81
+ @retention_policies = {}
82
+ @mutex = Mutex.new
83
+
84
+ FileUtils.mkdir_p(File.join(@root_dir, "wal"))
85
+ retention.each { |store, policy| set_retention(store, **policy) }
86
+ recover_orphaned_segments!
87
+ end
88
+
89
+ def write_fact(fact)
90
+ store = fact.store.to_s
91
+ @mutex.synchronize do
92
+ seg = active_segment_for(store)
93
+ seg[:codec].encode_fact(seg[:file], fact)
94
+ seg[:count] += 1
95
+ ts = fact.transaction_time.to_f
96
+ seg[:min_ts] = seg[:min_ts] ? [seg[:min_ts], ts].min : ts
97
+ seg[:max_ts] = seg[:max_ts] ? [seg[:max_ts], ts].max : ts
98
+ apply_flush_policy(seg)
99
+ end
100
+ end
101
+
102
+ # Returns all facts from matching segments sorted by timestamp.
103
+ # +store+ — restrict to one store name (Symbol or String); nil = all stores.
104
+ # +since+ — skip sealed segments with max_timestamp < since (Float unix sec).
105
+ # +as_of+ — skip sealed segments with min_timestamp > as_of (Float unix sec).
106
+ def replay(store: nil, since: nil, as_of: nil)
107
+ segment_paths_for(store: store ? store.to_s : nil, since: since, as_of: as_of)
108
+ .flat_map { |path| read_segment(path) }
109
+ .sort_by(&:transaction_time)
110
+ end
111
+
112
+ # Seal every open segment and open a fresh one per store.
113
+ def checkpoint!
114
+ @mutex.synchronize do
115
+ old = @segments.dup
116
+ @segments.clear
117
+ old.each do |store, seg|
118
+ seal_segment!(seg)
119
+ @segments[store] = open_new_segment(store)
120
+ end
121
+ end
122
+ end
123
+
124
+ def close
125
+ @mutex.synchronize do
126
+ @segments.values.each { |seg| seal_segment!(seg) }
127
+ @segments.clear
128
+ end
129
+ end
130
+
131
+ def segment_count
132
+ all_segment_paths.size
133
+ end
134
+
135
+ def stored_store_names
136
+ Dir[File.join(@root_dir, "wal", "store=*")]
137
+ .select { |d| File.directory?(d) }
138
+ .map { |d| File.basename(d).sub("store=", "") }
139
+ end
140
+
141
+ # Register (or replace) the retention policy for a store.
142
+ def set_retention(store, strategy:, duration: nil)
143
+ @mutex.synchronize do
144
+ @retention_policies[store.to_s] = { strategy: strategy.to_sym, duration: duration }
145
+ end
146
+ end
147
+
148
+ # Delete eligible sealed segments for stores that have a policy.
149
+ # Returns an Array of receipt hashes (one per deleted segment).
150
+ # Live (unsealed) segments are never touched.
151
+ # +store+ — restrict purge to one store; nil = all stores with a policy.
152
+ def purge!(store: nil)
153
+ @mutex.synchronize do
154
+ targets = store ? [store.to_s] : @retention_policies.keys
155
+ targets.flat_map { |s| purge_store!(s) }
156
+ end
157
+ end
158
+
159
+ # List purge receipts written by previous purge! calls.
160
+ # +store+ — restrict to one store; nil = all stores.
161
+ # +since+ — only receipts where purged_at >= since (Float unix sec).
162
+ # +until_+ — only receipts where purged_at <= until_ (Float unix sec).
163
+ # +limit+ — return at most this many, ordered by purged_at ascending.
164
+ def purge_receipts(store: nil, since: nil, until_: nil, limit: nil)
165
+ glob = store ? "store=#{store}" : "store=*"
166
+ receipts = Dir[File.join(@root_dir, "wal", glob, "**", "*#{PURGED_SUFFIX}")]
167
+ .map { |p| JSON.parse(File.read(p)) rescue nil }
168
+ .compact
169
+ .sort_by { |r| r["purged_at"] || 0 }
170
+ receipts = receipts.select { |r| (r["purged_at"] || 0) >= since } if since
171
+ receipts = receipts.select { |r| (r["purged_at"] || 0) <= until_ } if until_
172
+ receipts = receipts.first(limit) if limit
173
+ receipts
174
+ end
175
+
176
+ # List quarantine receipts for segments that could not be decoded.
177
+ # +store+ — restrict to one store; nil = all stores.
178
+ def quarantine_receipts(store: nil)
179
+ glob = store ? "store=#{store}" : "store=*"
180
+ Dir[File.join(@root_dir, "wal", glob, "**", "*#{QUARANTINE_SUFFIX}")]
181
+ .map { |p| JSON.parse(File.read(p)) rescue nil }
182
+ .compact
183
+ end
184
+
185
+ # Detailed per-segment manifest for one or all stores.
186
+ # Includes a "segments" array with one entry per segment (sealed + live).
187
+ # Safe to call while the backend is open.
188
+ def segment_manifest(store: nil)
189
+ @mutex.synchronize do
190
+ build_storage_view(store: store ? store.to_s : nil, include_segments: true)
191
+ end
192
+ end
193
+
194
+ # Compact aggregate stats for one or all stores.
195
+ # No per-segment detail — suitable for health checks and protocol metadata.
196
+ def storage_stats(store: nil)
197
+ @mutex.synchronize do
198
+ build_storage_view(store: store ? store.to_s : nil, include_segments: false)
199
+ end
200
+ end
201
+
202
+ # Returns the current durability posture: configured policy plus a per-store
203
+ # breakdown showing how many facts are buffered in memory vs. on disk.
204
+ #
205
+ # Buffered facts are at risk of loss on a process crash. A "flushed" store
206
+ # has all accepted facts on disk; a "buffered" store has unflushed in-memory
207
+ # facts that would be lost if the process were killed right now.
208
+ def durability_snapshot
209
+ @mutex.synchronize do
210
+ stores_snap = @segments.to_h do |name, seg|
211
+ buffered = seg[:codec].buffered_count
212
+ [name, {
213
+ "codec" => seg[:codec_name].to_s,
214
+ "buffered_count" => buffered,
215
+ "facts_on_disk" => seg[:count] - buffered,
216
+ "durability" => buffered > 0 ? "buffered" : "flushed"
217
+ }]
218
+ end
219
+ { "policy" => flush_policy_name, "stores" => stores_snap }
220
+ end
221
+ end
222
+
223
+ private
224
+
225
+ # ── Retention ────────────────────────────────────────────────────────
226
+
227
+ def purge_store!(store)
228
+ policy = @retention_policies[store]
229
+ return [] unless policy
230
+
231
+ now = Process.clock_gettime(Process::CLOCK_REALTIME)
232
+ live = @segments[store]&.dig(:path)
233
+ sealed = sealed_segment_paths(store)
234
+
235
+ to_delete = select_for_purge(sealed, policy, now)
236
+ to_delete.reject! { |p| p == live }
237
+
238
+ to_delete.map { |p| delete_segment_with_receipt!(p, policy, now) }.compact
239
+ end
240
+
241
+ def sealed_segment_paths(store)
242
+ Dir[File.join(@root_dir, "wal", "store=#{store}", "**", "segment-*.wal")]
243
+ .reject { |p| p.end_with?(MANIFEST_SUFFIX) || p.end_with?(PURGED_SUFFIX) }
244
+ .select { |p| File.exist?(p + MANIFEST_SUFFIX) }
245
+ .sort
246
+ end
247
+
248
+ def select_for_purge(paths, policy, now)
249
+ case policy[:strategy]
250
+ when :permanent
251
+ []
252
+ when :rolling_window
253
+ duration = policy[:duration].to_f
254
+ paths.select { |p|
255
+ m = JSON.parse(File.read(p + MANIFEST_SUFFIX)) rescue nil
256
+ next false unless m
257
+ max_ts = m["max_timestamp"]
258
+ max_ts && max_ts < (now - duration)
259
+ }
260
+ when :ephemeral
261
+ paths.empty? ? [] : paths[0..-2]
262
+ else
263
+ []
264
+ end
265
+ end
266
+
267
+ def delete_segment_with_receipt!(path, policy, now)
268
+ mpath = path + MANIFEST_SUFFIX
269
+ manifest = File.exist?(mpath) ? (JSON.parse(File.read(mpath)) rescue {}) : {}
270
+
271
+ receipt = manifest.merge(
272
+ "purged_at" => now,
273
+ "purge_strategy" => policy[:strategy].to_s,
274
+ "purge_duration" => policy[:duration],
275
+ "segment_path" => path,
276
+ "reason" => purge_reason(policy, manifest, now)
277
+ )
278
+
279
+ receipt_path = path + PURGED_SUFFIX
280
+ File.write(receipt_path, JSON.generate(receipt))
281
+
282
+ FileUtils.rm_f(path)
283
+ FileUtils.rm_f(mpath)
284
+ receipt
285
+ end
286
+
287
+ def purge_reason(policy, manifest, now)
288
+ store_name = manifest["store"] || "unknown"
289
+ seg_id = manifest["segment_id"] || "unknown"
290
+ case policy[:strategy].to_sym
291
+ when :rolling_window
292
+ age = now - (manifest["max_timestamp"] || now)
293
+ "rolling_window: segment #{seg_id} (store=#{store_name}) max_timestamp #{age.round(1)}s older than retention window of #{policy[:duration]}s"
294
+ when :ephemeral
295
+ "ephemeral: segment #{seg_id} (store=#{store_name}) superseded by newer sealed segment"
296
+ else
297
+ "#{policy[:strategy]}: segment #{seg_id} (store=#{store_name}) purged by policy"
298
+ end
299
+ end
300
+
301
+ # ── Flush policy ─────────────────────────────────────────────────────
302
+
303
+ def apply_flush_policy(seg)
304
+ case @flush_policy
305
+ when :on_write
306
+ seg[:codec].flush(seg[:file])
307
+ seg[:file].flush
308
+ when Hash
309
+ n = @flush_policy[:every_n]
310
+ if n
311
+ seg[:facts_since_flush] = (seg[:facts_since_flush] || 0) + 1
312
+ if seg[:facts_since_flush] >= n
313
+ seg[:codec].flush(seg[:file])
314
+ seg[:file].flush
315
+ seg[:facts_since_flush] = 0
316
+ end
317
+ end
318
+ end
319
+ # :batch — no extra flush beyond what the codec already does at BATCH_SIZE
320
+ end
321
+
322
+ def flush_policy_name
323
+ case @flush_policy
324
+ when :batch then "batch"
325
+ when :on_write then "on_write"
326
+ when Hash then "every_n:#{@flush_policy[:every_n]}"
327
+ else @flush_policy.to_s
328
+ end
329
+ end
330
+
331
+ # ── Codec resolution ─────────────────────────────────────────────────
332
+
333
+ def codec_name_for(store)
334
+ case @codec_spec
335
+ when Symbol, String then @codec_spec.to_sym
336
+ when Hash
337
+ (@codec_spec[store.to_sym] || @codec_spec[store.to_s] || DEFAULT_CODEC).to_sym
338
+ else DEFAULT_CODEC
339
+ end
340
+ end
341
+
342
+ # ── Segment lifecycle ─────────────────────────────────────────────────
343
+
344
+ def active_segment_for(store)
345
+ @segments[store] ||= open_or_resume_segment(store)
346
+ rotate_if_needed!(store)
347
+ @segments[store]
348
+ end
349
+
350
+ def rotate_if_needed!(store)
351
+ seg = @segments[store]
352
+ on_disk = File.size?(seg[:path]) || 0
353
+ if current_bucket != seg[:bucket] || on_disk >= @max_bytes
354
+ seal_segment!(seg)
355
+ @segments[store] = open_new_segment(store)
356
+ end
357
+ end
358
+
359
+ # Resume a live (unsealed) json_crc32 segment if one exists in the
360
+ # current bucket. compact_delta segments are NOT resumable — any live
361
+ # segment is sealed and a fresh one is started.
362
+ def open_or_resume_segment(store)
363
+ bucket = current_bucket
364
+ dir = store_bucket_dir(store, bucket)
365
+ FileUtils.mkdir_p(dir)
366
+
367
+ live = Dir[File.join(dir, "segment-*.wal")]
368
+ .reject { |p| p.end_with?(MANIFEST_SUFFIX) }
369
+ .reject { |p| File.exist?(p + MANIFEST_SUFFIX) }
370
+ .max_by { |p| segment_number_from_path(p) }
371
+
372
+ cname = codec_name_for(store)
373
+
374
+ if live && cname == :json_crc32
375
+ resume_segment(live, store, bucket, cname)
376
+ else
377
+ seal_orphaned_live!(live, codec_name: cname) if live
378
+ open_new_segment_in(store, bucket, cname)
379
+ end
380
+ end
381
+
382
+ def resume_segment(path, store, bucket, codec_name)
383
+ file = File.open(path, "ab")
384
+ file.sync = true
385
+ codec = Codecs.build(codec_name)
386
+ { path: path, file: file, store: store, bucket: bucket,
387
+ number: segment_number_from_path(path), codec_name: codec_name,
388
+ codec: codec, count: count_frames(path), min_ts: nil, max_ts: nil }
389
+ end
390
+
391
+ def open_new_segment(store)
392
+ open_new_segment_in(store, current_bucket, codec_name_for(store))
393
+ end
394
+
395
+ def open_new_segment_in(store, bucket, codec_name)
396
+ dir = store_bucket_dir(store, bucket)
397
+ FileUtils.mkdir_p(dir)
398
+ next_num = (segment_numbers_in(dir).max || 0) + 1
399
+ path = segment_path_for(store, bucket, next_num)
400
+ file = File.open(path, "ab")
401
+ file.sync = true
402
+ codec = Codecs.build(codec_name)
403
+ codec.start_segment(file, store: store)
404
+ { path: path, file: file, store: store, bucket: bucket,
405
+ number: next_num, codec_name: codec_name,
406
+ codec: codec, count: 0, min_ts: nil, max_ts: nil }
407
+ end
408
+
409
+ # Seal a live segment that belongs to a previous session or a codec
410
+ # that cannot be resumed (compact_delta). No manifest metadata is
411
+ # available so we only write a minimal one.
412
+ def seal_orphaned_live!(path, codec_name: DEFAULT_CODEC)
413
+ file = File.open(path, "ab")
414
+ file.flush
415
+ file.close
416
+ store_name = path.split("store=").last.split("/").first
417
+ bucket = path.split("date=").last.split("/").first
418
+ number = segment_number_from_path(path)
419
+ if File.size(path) == 0
420
+ FileUtils.rm_f(path)
421
+ return
422
+ end
423
+ write_manifest(path, codec: codec_name.to_s,
424
+ fact_count: count_frames_for_codec(path, codec_name),
425
+ byte_size: File.size(path), min_ts: nil, max_ts: nil,
426
+ store: store_name, bucket: bucket, number: number)
427
+ end
428
+
429
+ def seal_segment!(seg)
430
+ return unless seg
431
+ seg[:codec].flush(seg[:file])
432
+ seg[:file].flush
433
+ seg[:file].close
434
+ if seg[:count] == 0
435
+ FileUtils.rm_f(seg[:path])
436
+ return
437
+ end
438
+ write_manifest(seg[:path],
439
+ codec: seg[:codec].name,
440
+ fact_count: seg[:count],
441
+ byte_size: File.size(seg[:path]),
442
+ min_ts: seg[:min_ts],
443
+ max_ts: seg[:max_ts],
444
+ store: seg[:store],
445
+ bucket: seg[:bucket],
446
+ number: seg[:number])
447
+ end
448
+
449
+ def write_manifest(path, codec:, fact_count:, byte_size:, min_ts:, max_ts:,
450
+ store:, bucket:, number:)
451
+ manifest = {
452
+ segment_id: segment_id(store, bucket, number),
453
+ store: store,
454
+ codec: codec,
455
+ fact_count: fact_count,
456
+ byte_size: byte_size,
457
+ min_timestamp: min_ts,
458
+ max_timestamp: max_ts,
459
+ sealed: true,
460
+ sealed_at: Process.clock_gettime(Process::CLOCK_REALTIME)
461
+ }
462
+ tmp = path + MANIFEST_SUFFIX + ".tmp"
463
+ File.write(tmp, JSON.generate(manifest))
464
+ FileUtils.mv(tmp, path + MANIFEST_SUFFIX)
465
+ end
466
+
467
+ # ── Replay ────────────────────────────────────────────────────────────
468
+
469
+ def segment_paths_for(store:, since:, as_of:)
470
+ glob = store ? "store=#{store}" : "store=*"
471
+ all = Dir[File.join(@root_dir, "wal", glob, "date=*", "segment-*.wal")]
472
+ .reject { |p| p.end_with?(MANIFEST_SUFFIX) }
473
+ .sort
474
+ return all unless since || as_of
475
+
476
+ all.select { |path|
477
+ mpath = path + MANIFEST_SUFFIX
478
+ next true unless File.exist?(mpath)
479
+
480
+ m = JSON.parse(File.read(mpath))
481
+ max_ts = m["max_timestamp"]
482
+ min_ts = m["min_timestamp"]
483
+ next false if since && max_ts && max_ts < since
484
+ next false if as_of && min_ts && min_ts > as_of
485
+ true
486
+ }
487
+ end
488
+
489
+ def read_segment(path)
490
+ codec_name = manifest_codec_for(path)
491
+ codec = Codecs.build(codec_name)
492
+ facts = File.open(path, "rb") { |io| codec.decode(io) }
493
+ if facts.empty? && segment_expects_facts?(path)
494
+ write_quarantine_receipt(path, RuntimeError.new("segment not empty but decoded 0 facts"))
495
+ end
496
+ facts
497
+ rescue StandardError => e
498
+ write_quarantine_receipt(path, e)
499
+ []
500
+ end
501
+
502
+ def manifest_codec_for(path)
503
+ mpath = path + MANIFEST_SUFFIX
504
+ return DEFAULT_CODEC unless File.exist?(mpath)
505
+ (JSON.parse(File.read(mpath))["codec"] || DEFAULT_CODEC.to_s).to_sym
506
+ rescue StandardError
507
+ DEFAULT_CODEC
508
+ end
509
+
510
+ # ── Path helpers ──────────────────────────────────────────────────────
511
+
512
+ def store_bucket_dir(store, bucket)
513
+ File.join(@root_dir, "wal", "store=#{store}", "date=#{bucket}")
514
+ end
515
+
516
+ def segment_path_for(store, bucket, number)
517
+ File.join(store_bucket_dir(store, bucket), "segment-#{number.to_s.rjust(6, "0")}.wal")
518
+ end
519
+
520
+ def segment_id(store, bucket, number)
521
+ "#{store}/#{bucket}/#{number.to_s.rjust(6, "0")}"
522
+ end
523
+
524
+ def segment_number_from_path(path)
525
+ File.basename(path, ".wal").split("-").last.to_i
526
+ end
527
+
528
+ def all_segment_paths
529
+ Dir[File.join(@root_dir, "wal", "store=*", "date=*", "segment-*.wal")]
530
+ .reject { |p| p.end_with?(MANIFEST_SUFFIX) }
531
+ end
532
+
533
+ def segment_numbers_in(dir)
534
+ Dir[File.join(dir, "segment-*.wal")]
535
+ .reject { |p| p.end_with?(MANIFEST_SUFFIX) }
536
+ .map { |p| segment_number_from_path(p) }
537
+ end
538
+
539
+ def current_bucket
540
+ case @time_bucket
541
+ when :hour then Time.now.utc.strftime("%Y-%m-%dT%H")
542
+ when :none then "flat"
543
+ else Time.now.utc.strftime("%Y-%m-%d")
544
+ end
545
+ end
546
+
547
+ def count_frames(path)
548
+ return 0 unless File.exist?(path)
549
+ n = 0
550
+ File.open(path, "rb") { |f| n += 1 while read_frame(f) }
551
+ n
552
+ rescue StandardError
553
+ 0
554
+ end
555
+
556
+ # For compact_delta the first frame is a header, subsequent frames are batches.
557
+ # Each batch carries a count prefix — sum those instead of counting raw frames.
558
+ def count_frames_for_codec(path, codec_name)
559
+ return count_frames(path) unless codec_name.to_sym == :compact_delta_zlib ||
560
+ codec_name.to_sym == :compact_delta
561
+ return 0 unless File.exist?(path)
562
+ total = 0
563
+ File.open(path, "rb") do |f|
564
+ read_frame(f) # skip header
565
+ while (body = read_frame(f))
566
+ total += body[0, 4].unpack1("N") rescue 0
567
+ end
568
+ end
569
+ total
570
+ rescue StandardError
571
+ 0
572
+ end
573
+
574
+ def write_quarantine_receipt(path, error)
575
+ mpath = path + MANIFEST_SUFFIX
576
+ manifest = File.exist?(mpath) ? (JSON.parse(File.read(mpath)) rescue {}) : {}
577
+ receipt = manifest.merge(
578
+ "quarantined_at" => Process.clock_gettime(Process::CLOCK_REALTIME),
579
+ "error_class" => error.class.to_s,
580
+ "error_message" => error.message.to_s[0, 500],
581
+ "segment_path" => path
582
+ )
583
+ File.write(path + QUARANTINE_SUFFIX, JSON.generate(receipt))
584
+ rescue StandardError
585
+ nil # never raise from error-handler path
586
+ end
587
+
588
+ def segment_expects_facts?(path)
589
+ mpath = path + MANIFEST_SUFFIX
590
+ return false unless File.exist?(mpath)
591
+ (JSON.parse(File.read(mpath))["fact_count"] || 0).to_i > 0
592
+ rescue StandardError
593
+ false
594
+ end
595
+
596
+ # On startup, seal any live segments that were left open by a previous crash.
597
+ # Codec is detected by peeking at the first frame rather than relying on the
598
+ # current codec config (the store may have been reconfigured between sessions).
599
+ def recover_orphaned_segments!
600
+ Dir[File.join(@root_dir, "wal", "store=*", "date=*")].each do |dir|
601
+ orphans = Dir[File.join(dir, "segment-*.wal")]
602
+ .reject { |p| p.end_with?(MANIFEST_SUFFIX) || p.end_with?(PURGED_SUFFIX) || p.end_with?(QUARANTINE_SUFFIX) }
603
+ .reject { |p| File.exist?(p + MANIFEST_SUFFIX) }
604
+ orphans.each { |p| seal_orphaned_live!(p, codec_name: detect_segment_codec(p)) }
605
+ end
606
+ end
607
+
608
+ # Peek at the first frame of a segment file and determine its codec.
609
+ def detect_segment_codec(path)
610
+ File.open(path, "rb") do |f|
611
+ body = read_frame(f)
612
+ return DEFAULT_CODEC unless body&.length&.> 0
613
+ parsed = MessagePack.unpack(body)
614
+ return :compact_delta_zlib if parsed.is_a?(Hash) && parsed.key?("fields")
615
+ DEFAULT_CODEC
616
+ end
617
+ rescue StandardError
618
+ DEFAULT_CODEC
619
+ end
620
+
621
+ # ── Storage metadata ──────────────────────────────────────────────────
622
+
623
+ def build_storage_view(store:, include_segments:)
624
+ target_stores = store ? [store] : manifest_store_names
625
+ now = Process.clock_gettime(Process::CLOCK_REALTIME)
626
+ {
627
+ "schema_version" => SCHEMA_VERSION,
628
+ "generated_at" => now,
629
+ "stores" => target_stores.sort.to_h { |s| [s, build_store_stats(s, include_segments: include_segments)] }
630
+ }
631
+ end
632
+
633
+ def build_store_stats(store, include_segments:)
634
+ sealed_manifests = Dir[File.join(@root_dir, "wal", "store=#{store}", "**", "segment-*.wal#{MANIFEST_SUFFIX}")]
635
+ .sort
636
+ .map { |p| JSON.parse(File.read(p)) rescue nil }
637
+ .compact
638
+
639
+ live = @segments[store]
640
+
641
+ total_facts = sealed_manifests.sum { |m| m["fact_count"].to_i }
642
+ total_facts += live[:count] if live
643
+ total_bytes = sealed_manifests.sum { |m| m["byte_size"].to_i }
644
+ total_bytes += (File.size?(live[:path]) || 0) if live
645
+ codecs = (sealed_manifests.map { |m| m["codec"] } +
646
+ (live ? [live[:codec_name].to_s] : [])).uniq.compact.sort
647
+
648
+ min_ts = (sealed_manifests.map { |m| m["min_timestamp"] }.compact +
649
+ (live&.dig(:min_ts) ? [live[:min_ts]] : [])).min
650
+ max_ts = (sealed_manifests.map { |m| m["max_timestamp"] }.compact +
651
+ (live&.dig(:max_ts) ? [live[:max_ts]] : [])).max
652
+
653
+ purge_count = Dir[File.join(@root_dir, "wal", "store=#{store}", "**", "*#{PURGED_SUFFIX}")].size
654
+ quarantine_count = Dir[File.join(@root_dir, "wal", "store=#{store}", "**", "*#{QUARANTINE_SUFFIX}")].size
655
+
656
+ stats = {
657
+ "segment_count" => sealed_manifests.size + (live ? 1 : 0),
658
+ "sealed_count" => sealed_manifests.size,
659
+ "live_count" => live ? 1 : 0,
660
+ "codecs" => codecs,
661
+ "byte_size" => total_bytes,
662
+ "fact_count" => total_facts,
663
+ "min_timestamp" => min_ts,
664
+ "max_timestamp" => max_ts,
665
+ "purge_receipt_count" => purge_count,
666
+ "quarantine_receipt_count" => quarantine_count
667
+ }
668
+
669
+ if include_segments
670
+ segs = sealed_manifests.map { |m|
671
+ m.slice("segment_id", "codec", "fact_count", "byte_size",
672
+ "min_timestamp", "max_timestamp", "sealed", "sealed_at")
673
+ }
674
+ if live
675
+ segs << {
676
+ "segment_id" => segment_id(live[:store], live[:bucket], live[:number]),
677
+ "codec" => live[:codec_name].to_s,
678
+ "fact_count" => live[:count],
679
+ "byte_size" => File.size?(live[:path]) || 0,
680
+ "min_timestamp" => live[:min_ts],
681
+ "max_timestamp" => live[:max_ts],
682
+ "sealed" => false,
683
+ "sealed_at" => nil
684
+ }
685
+ end
686
+ stats["segments"] = segs
687
+ end
688
+
689
+ stats
690
+ end
691
+
692
+ def manifest_store_names
693
+ disk = stored_store_names
694
+ live = @segments.keys
695
+ (disk + live).uniq
696
+ end
697
+ end
698
+ end
699
+ end