igniter-ledger 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +481 -0
- data/examples/intelligent_ledger/availability_boundary_ledger.rb +1190 -0
- data/examples/intelligent_ledger/availability_deriver.rb +150 -0
- data/examples/intelligent_ledger/availability_ledger.rb +197 -0
- data/examples/intelligent_ledger/ledger_boundary.rb +180 -0
- data/examples/store_poc.rb +45 -0
- data/exe/igniter-ledger-server +111 -0
- data/exe/igniter-store-server +6 -0
- data/ext/igniter_store_native/Cargo.toml +28 -0
- data/ext/igniter_store_native/extconf.rb +6 -0
- data/ext/igniter_store_native/src/fact.rs +303 -0
- data/ext/igniter_store_native/src/fact_log.rs +180 -0
- data/ext/igniter_store_native/src/file_backend.rs +91 -0
- data/ext/igniter_store_native/src/lib.rs +55 -0
- data/lib/igniter/ledger.rb +7 -0
- data/lib/igniter/store/access_path.rb +84 -0
- data/lib/igniter/store/change_event.rb +65 -0
- data/lib/igniter/store/changefeed_buffer.rb +585 -0
- data/lib/igniter/store/codecs.rb +253 -0
- data/lib/igniter/store/contractable_receipt_sink.rb +172 -0
- data/lib/igniter/store/fact.rb +121 -0
- data/lib/igniter/store/fact_log.rb +103 -0
- data/lib/igniter/store/file_backend.rb +269 -0
- data/lib/igniter/store/http_adapter.rb +413 -0
- data/lib/igniter/store/igniter_store.rb +838 -0
- data/lib/igniter/store/mcp_adapter.rb +403 -0
- data/lib/igniter/store/native.rb +80 -0
- data/lib/igniter/store/network_backend.rb +159 -0
- data/lib/igniter/store/protocol/handlers/access_path_handler.rb +38 -0
- data/lib/igniter/store/protocol/handlers/command_handler.rb +59 -0
- data/lib/igniter/store/protocol/handlers/derivation_handler.rb +27 -0
- data/lib/igniter/store/protocol/handlers/effect_handler.rb +65 -0
- data/lib/igniter/store/protocol/handlers/history_handler.rb +24 -0
- data/lib/igniter/store/protocol/handlers/projection_handler.rb +41 -0
- data/lib/igniter/store/protocol/handlers/relation_handler.rb +43 -0
- data/lib/igniter/store/protocol/handlers/store_handler.rb +24 -0
- data/lib/igniter/store/protocol/handlers/subscription_handler.rb +24 -0
- data/lib/igniter/store/protocol/interpreter.rb +447 -0
- data/lib/igniter/store/protocol/receipt.rb +96 -0
- data/lib/igniter/store/protocol/sync_profile.rb +53 -0
- data/lib/igniter/store/protocol/wire_envelope.rb +214 -0
- data/lib/igniter/store/protocol.rb +27 -0
- data/lib/igniter/store/read_cache.rb +163 -0
- data/lib/igniter/store/schema_graph.rb +248 -0
- data/lib/igniter/store/segmented_file_backend.rb +699 -0
- data/lib/igniter/store/server_config.rb +55 -0
- data/lib/igniter/store/server_logger.rb +64 -0
- data/lib/igniter/store/server_metrics.rb +222 -0
- data/lib/igniter/store/store_server.rb +597 -0
- data/lib/igniter/store/subscription_registry.rb +73 -0
- data/lib/igniter/store/tbackend_adapter_descriptor.rb +307 -0
- data/lib/igniter/store/tcp_adapter.rb +127 -0
- data/lib/igniter/store/wire_protocol.rb +42 -0
- data/lib/igniter/store.rb +64 -0
- data/lib/igniter-ledger.rb +4 -0
- data/lib/igniter-store.rb +5 -0
- metadata +212 -0
|
@@ -0,0 +1,699 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
require_relative "wire_protocol"
|
|
6
|
+
require_relative "codecs"
|
|
7
|
+
|
|
8
|
+
module Igniter
|
|
9
|
+
module Store
|
|
10
|
+
# Partitioned, manifest-tracked WAL backend with pluggable per-store codecs.
|
|
11
|
+
#
|
|
12
|
+
# A single instance replaces FileBackend for a whole IgniterStore — facts
|
|
13
|
+
# from every store are written into per-store, per-time-bucket segment
|
|
14
|
+
# files under a shared root directory.
|
|
15
|
+
#
|
|
16
|
+
# Layout:
|
|
17
|
+
# {root_dir}/
|
|
18
|
+
# wal/
|
|
19
|
+
# store={name}/
|
|
20
|
+
# date={bucket}/
|
|
21
|
+
# segment-000001.wal
|
|
22
|
+
# segment-000001.wal.manifest.json ← written atomically on seal
|
|
23
|
+
# segment-000002.wal
|
|
24
|
+
#
|
|
25
|
+
# Codec selection:
|
|
26
|
+
#
|
|
27
|
+
# # All stores use the default codec (json_crc32):
|
|
28
|
+
# SegmentedFileBackend.new(root)
|
|
29
|
+
#
|
|
30
|
+
# # All stores use compact_delta:
|
|
31
|
+
# SegmentedFileBackend.new(root, codec: :compact_delta)
|
|
32
|
+
#
|
|
33
|
+
# # Per-store codec map (string or symbol keys):
|
|
34
|
+
# SegmentedFileBackend.new(root,
|
|
35
|
+
# codec: { technician_locations: :compact_delta,
|
|
36
|
+
# vendor_leads: :compact_delta,
|
|
37
|
+
# crm_records: :json_crc32 })
|
|
38
|
+
#
|
|
39
|
+
# compact_delta is recommended for high-frequency History stores (sensor
|
|
40
|
+
# readings, GPS tracks) and gives ~16x size reduction over json_crc32.
|
|
41
|
+
# It is NOT resumable after a crash — any live compact_delta segment is
|
|
42
|
+
# sealed on the next startup and a fresh segment is opened.
|
|
43
|
+
#
|
|
44
|
+
# Public interface is identical to FileBackend: write_fact, replay, close.
|
|
45
|
+
class SegmentedFileBackend
|
|
46
|
+
include WireProtocol
|
|
47
|
+
|
|
48
|
+
MANIFEST_SUFFIX = ".manifest.json"
|
|
49
|
+
PURGED_SUFFIX = ".purged.json"
|
|
50
|
+
QUARANTINE_SUFFIX = ".quarantine.json"
|
|
51
|
+
DEFAULT_MAX_BYTES = 64 * 1024 * 1024 # 64 MB
|
|
52
|
+
DEFAULT_CODEC = :json_crc32
|
|
53
|
+
SCHEMA_VERSION = 1
|
|
54
|
+
|
|
55
|
+
attr_reader :root_dir
|
|
56
|
+
|
|
57
|
+
# +root_dir+ — root data directory shared by all stores.
|
|
58
|
+
# +max_bytes+ — rotate segment when file reaches this size (default 64 MB).
|
|
59
|
+
# +time_bucket+ — :day (default), :hour, or :none.
|
|
60
|
+
# +codec+ — Symbol or Hash{store_name => Symbol}. See class docs.
|
|
61
|
+
# +retention+ — Hash{ store_name => { strategy:, duration: } }
|
|
62
|
+
# Strategies:
|
|
63
|
+
# :permanent — never purge (default when no policy set)
|
|
64
|
+
# :rolling_window — purge sealed segments where max_timestamp < now - duration (Float seconds)
|
|
65
|
+
# :ephemeral — keep only the single newest sealed segment per store
|
|
66
|
+
# +flush+ — durability policy applied after every +write_fact+:
|
|
67
|
+
# :batch — (default) flush only at BATCH_SIZE, close, or checkpoint.
|
|
68
|
+
# compact_delta facts < BATCH_SIZE are lost on a crash.
|
|
69
|
+
# :on_write — flush after every single fact (safest, smallest write window).
|
|
70
|
+
# { every_n: N } — flush after every N facts per store.
|
|
71
|
+
#
|
|
72
|
+
# json_crc32 writes every fact immediately regardless of this setting.
|
|
73
|
+
def initialize(root_dir, max_bytes: DEFAULT_MAX_BYTES, time_bucket: :day,
|
|
74
|
+
codec: DEFAULT_CODEC, retention: {}, flush: :batch)
|
|
75
|
+
@root_dir = root_dir.to_s
|
|
76
|
+
@max_bytes = max_bytes
|
|
77
|
+
@time_bucket = time_bucket
|
|
78
|
+
@codec_spec = codec # Symbol or Hash
|
|
79
|
+
@flush_policy = flush
|
|
80
|
+
@segments = {} # store_name (String) → segment state Hash
|
|
81
|
+
@retention_policies = {}
|
|
82
|
+
@mutex = Mutex.new
|
|
83
|
+
|
|
84
|
+
FileUtils.mkdir_p(File.join(@root_dir, "wal"))
|
|
85
|
+
retention.each { |store, policy| set_retention(store, **policy) }
|
|
86
|
+
recover_orphaned_segments!
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def write_fact(fact)
|
|
90
|
+
store = fact.store.to_s
|
|
91
|
+
@mutex.synchronize do
|
|
92
|
+
seg = active_segment_for(store)
|
|
93
|
+
seg[:codec].encode_fact(seg[:file], fact)
|
|
94
|
+
seg[:count] += 1
|
|
95
|
+
ts = fact.transaction_time.to_f
|
|
96
|
+
seg[:min_ts] = seg[:min_ts] ? [seg[:min_ts], ts].min : ts
|
|
97
|
+
seg[:max_ts] = seg[:max_ts] ? [seg[:max_ts], ts].max : ts
|
|
98
|
+
apply_flush_policy(seg)
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Returns all facts from matching segments sorted by timestamp.
|
|
103
|
+
# +store+ — restrict to one store name (Symbol or String); nil = all stores.
|
|
104
|
+
# +since+ — skip sealed segments with max_timestamp < since (Float unix sec).
|
|
105
|
+
# +as_of+ — skip sealed segments with min_timestamp > as_of (Float unix sec).
|
|
106
|
+
def replay(store: nil, since: nil, as_of: nil)
|
|
107
|
+
segment_paths_for(store: store ? store.to_s : nil, since: since, as_of: as_of)
|
|
108
|
+
.flat_map { |path| read_segment(path) }
|
|
109
|
+
.sort_by(&:transaction_time)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Seal every open segment and open a fresh one per store.
|
|
113
|
+
def checkpoint!
|
|
114
|
+
@mutex.synchronize do
|
|
115
|
+
old = @segments.dup
|
|
116
|
+
@segments.clear
|
|
117
|
+
old.each do |store, seg|
|
|
118
|
+
seal_segment!(seg)
|
|
119
|
+
@segments[store] = open_new_segment(store)
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def close
|
|
125
|
+
@mutex.synchronize do
|
|
126
|
+
@segments.values.each { |seg| seal_segment!(seg) }
|
|
127
|
+
@segments.clear
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def segment_count
|
|
132
|
+
all_segment_paths.size
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def stored_store_names
|
|
136
|
+
Dir[File.join(@root_dir, "wal", "store=*")]
|
|
137
|
+
.select { |d| File.directory?(d) }
|
|
138
|
+
.map { |d| File.basename(d).sub("store=", "") }
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Register (or replace) the retention policy for a store.
|
|
142
|
+
def set_retention(store, strategy:, duration: nil)
|
|
143
|
+
@mutex.synchronize do
|
|
144
|
+
@retention_policies[store.to_s] = { strategy: strategy.to_sym, duration: duration }
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Delete eligible sealed segments for stores that have a policy.
|
|
149
|
+
# Returns an Array of receipt hashes (one per deleted segment).
|
|
150
|
+
# Live (unsealed) segments are never touched.
|
|
151
|
+
# +store+ — restrict purge to one store; nil = all stores with a policy.
|
|
152
|
+
def purge!(store: nil)
|
|
153
|
+
@mutex.synchronize do
|
|
154
|
+
targets = store ? [store.to_s] : @retention_policies.keys
|
|
155
|
+
targets.flat_map { |s| purge_store!(s) }
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# List purge receipts written by previous purge! calls.
|
|
160
|
+
# +store+ — restrict to one store; nil = all stores.
|
|
161
|
+
# +since+ — only receipts where purged_at >= since (Float unix sec).
|
|
162
|
+
# +until_+ — only receipts where purged_at <= until_ (Float unix sec).
|
|
163
|
+
# +limit+ — return at most this many, ordered by purged_at ascending.
|
|
164
|
+
def purge_receipts(store: nil, since: nil, until_: nil, limit: nil)
|
|
165
|
+
glob = store ? "store=#{store}" : "store=*"
|
|
166
|
+
receipts = Dir[File.join(@root_dir, "wal", glob, "**", "*#{PURGED_SUFFIX}")]
|
|
167
|
+
.map { |p| JSON.parse(File.read(p)) rescue nil }
|
|
168
|
+
.compact
|
|
169
|
+
.sort_by { |r| r["purged_at"] || 0 }
|
|
170
|
+
receipts = receipts.select { |r| (r["purged_at"] || 0) >= since } if since
|
|
171
|
+
receipts = receipts.select { |r| (r["purged_at"] || 0) <= until_ } if until_
|
|
172
|
+
receipts = receipts.first(limit) if limit
|
|
173
|
+
receipts
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# List quarantine receipts for segments that could not be decoded.
|
|
177
|
+
# +store+ — restrict to one store; nil = all stores.
|
|
178
|
+
def quarantine_receipts(store: nil)
|
|
179
|
+
glob = store ? "store=#{store}" : "store=*"
|
|
180
|
+
Dir[File.join(@root_dir, "wal", glob, "**", "*#{QUARANTINE_SUFFIX}")]
|
|
181
|
+
.map { |p| JSON.parse(File.read(p)) rescue nil }
|
|
182
|
+
.compact
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Detailed per-segment manifest for one or all stores.
|
|
186
|
+
# Includes a "segments" array with one entry per segment (sealed + live).
|
|
187
|
+
# Safe to call while the backend is open.
|
|
188
|
+
def segment_manifest(store: nil)
|
|
189
|
+
@mutex.synchronize do
|
|
190
|
+
build_storage_view(store: store ? store.to_s : nil, include_segments: true)
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Compact aggregate stats for one or all stores.
|
|
195
|
+
# No per-segment detail — suitable for health checks and protocol metadata.
|
|
196
|
+
def storage_stats(store: nil)
|
|
197
|
+
@mutex.synchronize do
|
|
198
|
+
build_storage_view(store: store ? store.to_s : nil, include_segments: false)
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# Returns the current durability posture: configured policy plus a per-store
|
|
203
|
+
# breakdown showing how many facts are buffered in memory vs. on disk.
|
|
204
|
+
#
|
|
205
|
+
# Buffered facts are at risk of loss on a process crash. A "flushed" store
|
|
206
|
+
# has all accepted facts on disk; a "buffered" store has unflushed in-memory
|
|
207
|
+
# facts that would be lost if the process were killed right now.
|
|
208
|
+
def durability_snapshot
|
|
209
|
+
@mutex.synchronize do
|
|
210
|
+
stores_snap = @segments.to_h do |name, seg|
|
|
211
|
+
buffered = seg[:codec].buffered_count
|
|
212
|
+
[name, {
|
|
213
|
+
"codec" => seg[:codec_name].to_s,
|
|
214
|
+
"buffered_count" => buffered,
|
|
215
|
+
"facts_on_disk" => seg[:count] - buffered,
|
|
216
|
+
"durability" => buffered > 0 ? "buffered" : "flushed"
|
|
217
|
+
}]
|
|
218
|
+
end
|
|
219
|
+
{ "policy" => flush_policy_name, "stores" => stores_snap }
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
private
|
|
224
|
+
|
|
225
|
+
# ── Retention ────────────────────────────────────────────────────────
|
|
226
|
+
|
|
227
|
+
def purge_store!(store)
|
|
228
|
+
policy = @retention_policies[store]
|
|
229
|
+
return [] unless policy
|
|
230
|
+
|
|
231
|
+
now = Process.clock_gettime(Process::CLOCK_REALTIME)
|
|
232
|
+
live = @segments[store]&.dig(:path)
|
|
233
|
+
sealed = sealed_segment_paths(store)
|
|
234
|
+
|
|
235
|
+
to_delete = select_for_purge(sealed, policy, now)
|
|
236
|
+
to_delete.reject! { |p| p == live }
|
|
237
|
+
|
|
238
|
+
to_delete.map { |p| delete_segment_with_receipt!(p, policy, now) }.compact
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
def sealed_segment_paths(store)
|
|
242
|
+
Dir[File.join(@root_dir, "wal", "store=#{store}", "**", "segment-*.wal")]
|
|
243
|
+
.reject { |p| p.end_with?(MANIFEST_SUFFIX) || p.end_with?(PURGED_SUFFIX) }
|
|
244
|
+
.select { |p| File.exist?(p + MANIFEST_SUFFIX) }
|
|
245
|
+
.sort
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def select_for_purge(paths, policy, now)
|
|
249
|
+
case policy[:strategy]
|
|
250
|
+
when :permanent
|
|
251
|
+
[]
|
|
252
|
+
when :rolling_window
|
|
253
|
+
duration = policy[:duration].to_f
|
|
254
|
+
paths.select { |p|
|
|
255
|
+
m = JSON.parse(File.read(p + MANIFEST_SUFFIX)) rescue nil
|
|
256
|
+
next false unless m
|
|
257
|
+
max_ts = m["max_timestamp"]
|
|
258
|
+
max_ts && max_ts < (now - duration)
|
|
259
|
+
}
|
|
260
|
+
when :ephemeral
|
|
261
|
+
paths.empty? ? [] : paths[0..-2]
|
|
262
|
+
else
|
|
263
|
+
[]
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def delete_segment_with_receipt!(path, policy, now)
|
|
268
|
+
mpath = path + MANIFEST_SUFFIX
|
|
269
|
+
manifest = File.exist?(mpath) ? (JSON.parse(File.read(mpath)) rescue {}) : {}
|
|
270
|
+
|
|
271
|
+
receipt = manifest.merge(
|
|
272
|
+
"purged_at" => now,
|
|
273
|
+
"purge_strategy" => policy[:strategy].to_s,
|
|
274
|
+
"purge_duration" => policy[:duration],
|
|
275
|
+
"segment_path" => path,
|
|
276
|
+
"reason" => purge_reason(policy, manifest, now)
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
receipt_path = path + PURGED_SUFFIX
|
|
280
|
+
File.write(receipt_path, JSON.generate(receipt))
|
|
281
|
+
|
|
282
|
+
FileUtils.rm_f(path)
|
|
283
|
+
FileUtils.rm_f(mpath)
|
|
284
|
+
receipt
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def purge_reason(policy, manifest, now)
|
|
288
|
+
store_name = manifest["store"] || "unknown"
|
|
289
|
+
seg_id = manifest["segment_id"] || "unknown"
|
|
290
|
+
case policy[:strategy].to_sym
|
|
291
|
+
when :rolling_window
|
|
292
|
+
age = now - (manifest["max_timestamp"] || now)
|
|
293
|
+
"rolling_window: segment #{seg_id} (store=#{store_name}) max_timestamp #{age.round(1)}s older than retention window of #{policy[:duration]}s"
|
|
294
|
+
when :ephemeral
|
|
295
|
+
"ephemeral: segment #{seg_id} (store=#{store_name}) superseded by newer sealed segment"
|
|
296
|
+
else
|
|
297
|
+
"#{policy[:strategy]}: segment #{seg_id} (store=#{store_name}) purged by policy"
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
# ── Flush policy ─────────────────────────────────────────────────────
|
|
302
|
+
|
|
303
|
+
def apply_flush_policy(seg)
|
|
304
|
+
case @flush_policy
|
|
305
|
+
when :on_write
|
|
306
|
+
seg[:codec].flush(seg[:file])
|
|
307
|
+
seg[:file].flush
|
|
308
|
+
when Hash
|
|
309
|
+
n = @flush_policy[:every_n]
|
|
310
|
+
if n
|
|
311
|
+
seg[:facts_since_flush] = (seg[:facts_since_flush] || 0) + 1
|
|
312
|
+
if seg[:facts_since_flush] >= n
|
|
313
|
+
seg[:codec].flush(seg[:file])
|
|
314
|
+
seg[:file].flush
|
|
315
|
+
seg[:facts_since_flush] = 0
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
end
|
|
319
|
+
# :batch — no extra flush beyond what the codec already does at BATCH_SIZE
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
def flush_policy_name
|
|
323
|
+
case @flush_policy
|
|
324
|
+
when :batch then "batch"
|
|
325
|
+
when :on_write then "on_write"
|
|
326
|
+
when Hash then "every_n:#{@flush_policy[:every_n]}"
|
|
327
|
+
else @flush_policy.to_s
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
# ── Codec resolution ─────────────────────────────────────────────────
|
|
332
|
+
|
|
333
|
+
def codec_name_for(store)
|
|
334
|
+
case @codec_spec
|
|
335
|
+
when Symbol, String then @codec_spec.to_sym
|
|
336
|
+
when Hash
|
|
337
|
+
(@codec_spec[store.to_sym] || @codec_spec[store.to_s] || DEFAULT_CODEC).to_sym
|
|
338
|
+
else DEFAULT_CODEC
|
|
339
|
+
end
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
# ── Segment lifecycle ─────────────────────────────────────────────────
|
|
343
|
+
|
|
344
|
+
def active_segment_for(store)
|
|
345
|
+
@segments[store] ||= open_or_resume_segment(store)
|
|
346
|
+
rotate_if_needed!(store)
|
|
347
|
+
@segments[store]
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
def rotate_if_needed!(store)
|
|
351
|
+
seg = @segments[store]
|
|
352
|
+
on_disk = File.size?(seg[:path]) || 0
|
|
353
|
+
if current_bucket != seg[:bucket] || on_disk >= @max_bytes
|
|
354
|
+
seal_segment!(seg)
|
|
355
|
+
@segments[store] = open_new_segment(store)
|
|
356
|
+
end
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
# Resume a live (unsealed) json_crc32 segment if one exists in the
|
|
360
|
+
# current bucket. compact_delta segments are NOT resumable — any live
|
|
361
|
+
# segment is sealed and a fresh one is started.
|
|
362
|
+
def open_or_resume_segment(store)
|
|
363
|
+
bucket = current_bucket
|
|
364
|
+
dir = store_bucket_dir(store, bucket)
|
|
365
|
+
FileUtils.mkdir_p(dir)
|
|
366
|
+
|
|
367
|
+
live = Dir[File.join(dir, "segment-*.wal")]
|
|
368
|
+
.reject { |p| p.end_with?(MANIFEST_SUFFIX) }
|
|
369
|
+
.reject { |p| File.exist?(p + MANIFEST_SUFFIX) }
|
|
370
|
+
.max_by { |p| segment_number_from_path(p) }
|
|
371
|
+
|
|
372
|
+
cname = codec_name_for(store)
|
|
373
|
+
|
|
374
|
+
if live && cname == :json_crc32
|
|
375
|
+
resume_segment(live, store, bucket, cname)
|
|
376
|
+
else
|
|
377
|
+
seal_orphaned_live!(live, codec_name: cname) if live
|
|
378
|
+
open_new_segment_in(store, bucket, cname)
|
|
379
|
+
end
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
def resume_segment(path, store, bucket, codec_name)
|
|
383
|
+
file = File.open(path, "ab")
|
|
384
|
+
file.sync = true
|
|
385
|
+
codec = Codecs.build(codec_name)
|
|
386
|
+
{ path: path, file: file, store: store, bucket: bucket,
|
|
387
|
+
number: segment_number_from_path(path), codec_name: codec_name,
|
|
388
|
+
codec: codec, count: count_frames(path), min_ts: nil, max_ts: nil }
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
def open_new_segment(store)
|
|
392
|
+
open_new_segment_in(store, current_bucket, codec_name_for(store))
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
def open_new_segment_in(store, bucket, codec_name)
|
|
396
|
+
dir = store_bucket_dir(store, bucket)
|
|
397
|
+
FileUtils.mkdir_p(dir)
|
|
398
|
+
next_num = (segment_numbers_in(dir).max || 0) + 1
|
|
399
|
+
path = segment_path_for(store, bucket, next_num)
|
|
400
|
+
file = File.open(path, "ab")
|
|
401
|
+
file.sync = true
|
|
402
|
+
codec = Codecs.build(codec_name)
|
|
403
|
+
codec.start_segment(file, store: store)
|
|
404
|
+
{ path: path, file: file, store: store, bucket: bucket,
|
|
405
|
+
number: next_num, codec_name: codec_name,
|
|
406
|
+
codec: codec, count: 0, min_ts: nil, max_ts: nil }
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
# Seal a live segment that belongs to a previous session or a codec
|
|
410
|
+
# that cannot be resumed (compact_delta). No manifest metadata is
|
|
411
|
+
# available so we only write a minimal one.
|
|
412
|
+
def seal_orphaned_live!(path, codec_name: DEFAULT_CODEC)
|
|
413
|
+
file = File.open(path, "ab")
|
|
414
|
+
file.flush
|
|
415
|
+
file.close
|
|
416
|
+
store_name = path.split("store=").last.split("/").first
|
|
417
|
+
bucket = path.split("date=").last.split("/").first
|
|
418
|
+
number = segment_number_from_path(path)
|
|
419
|
+
if File.size(path) == 0
|
|
420
|
+
FileUtils.rm_f(path)
|
|
421
|
+
return
|
|
422
|
+
end
|
|
423
|
+
write_manifest(path, codec: codec_name.to_s,
|
|
424
|
+
fact_count: count_frames_for_codec(path, codec_name),
|
|
425
|
+
byte_size: File.size(path), min_ts: nil, max_ts: nil,
|
|
426
|
+
store: store_name, bucket: bucket, number: number)
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
def seal_segment!(seg)
|
|
430
|
+
return unless seg
|
|
431
|
+
seg[:codec].flush(seg[:file])
|
|
432
|
+
seg[:file].flush
|
|
433
|
+
seg[:file].close
|
|
434
|
+
if seg[:count] == 0
|
|
435
|
+
FileUtils.rm_f(seg[:path])
|
|
436
|
+
return
|
|
437
|
+
end
|
|
438
|
+
write_manifest(seg[:path],
|
|
439
|
+
codec: seg[:codec].name,
|
|
440
|
+
fact_count: seg[:count],
|
|
441
|
+
byte_size: File.size(seg[:path]),
|
|
442
|
+
min_ts: seg[:min_ts],
|
|
443
|
+
max_ts: seg[:max_ts],
|
|
444
|
+
store: seg[:store],
|
|
445
|
+
bucket: seg[:bucket],
|
|
446
|
+
number: seg[:number])
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
def write_manifest(path, codec:, fact_count:, byte_size:, min_ts:, max_ts:,
|
|
450
|
+
store:, bucket:, number:)
|
|
451
|
+
manifest = {
|
|
452
|
+
segment_id: segment_id(store, bucket, number),
|
|
453
|
+
store: store,
|
|
454
|
+
codec: codec,
|
|
455
|
+
fact_count: fact_count,
|
|
456
|
+
byte_size: byte_size,
|
|
457
|
+
min_timestamp: min_ts,
|
|
458
|
+
max_timestamp: max_ts,
|
|
459
|
+
sealed: true,
|
|
460
|
+
sealed_at: Process.clock_gettime(Process::CLOCK_REALTIME)
|
|
461
|
+
}
|
|
462
|
+
tmp = path + MANIFEST_SUFFIX + ".tmp"
|
|
463
|
+
File.write(tmp, JSON.generate(manifest))
|
|
464
|
+
FileUtils.mv(tmp, path + MANIFEST_SUFFIX)
|
|
465
|
+
end
|
|
466
|
+
|
|
467
|
+
# ── Replay ────────────────────────────────────────────────────────────
|
|
468
|
+
|
|
469
|
+
def segment_paths_for(store:, since:, as_of:)
|
|
470
|
+
glob = store ? "store=#{store}" : "store=*"
|
|
471
|
+
all = Dir[File.join(@root_dir, "wal", glob, "date=*", "segment-*.wal")]
|
|
472
|
+
.reject { |p| p.end_with?(MANIFEST_SUFFIX) }
|
|
473
|
+
.sort
|
|
474
|
+
return all unless since || as_of
|
|
475
|
+
|
|
476
|
+
all.select { |path|
|
|
477
|
+
mpath = path + MANIFEST_SUFFIX
|
|
478
|
+
next true unless File.exist?(mpath)
|
|
479
|
+
|
|
480
|
+
m = JSON.parse(File.read(mpath))
|
|
481
|
+
max_ts = m["max_timestamp"]
|
|
482
|
+
min_ts = m["min_timestamp"]
|
|
483
|
+
next false if since && max_ts && max_ts < since
|
|
484
|
+
next false if as_of && min_ts && min_ts > as_of
|
|
485
|
+
true
|
|
486
|
+
}
|
|
487
|
+
end
|
|
488
|
+
|
|
489
|
+
def read_segment(path)
|
|
490
|
+
codec_name = manifest_codec_for(path)
|
|
491
|
+
codec = Codecs.build(codec_name)
|
|
492
|
+
facts = File.open(path, "rb") { |io| codec.decode(io) }
|
|
493
|
+
if facts.empty? && segment_expects_facts?(path)
|
|
494
|
+
write_quarantine_receipt(path, RuntimeError.new("segment not empty but decoded 0 facts"))
|
|
495
|
+
end
|
|
496
|
+
facts
|
|
497
|
+
rescue StandardError => e
|
|
498
|
+
write_quarantine_receipt(path, e)
|
|
499
|
+
[]
|
|
500
|
+
end
|
|
501
|
+
|
|
502
|
+
def manifest_codec_for(path)
|
|
503
|
+
mpath = path + MANIFEST_SUFFIX
|
|
504
|
+
return DEFAULT_CODEC unless File.exist?(mpath)
|
|
505
|
+
(JSON.parse(File.read(mpath))["codec"] || DEFAULT_CODEC.to_s).to_sym
|
|
506
|
+
rescue StandardError
|
|
507
|
+
DEFAULT_CODEC
|
|
508
|
+
end
|
|
509
|
+
|
|
510
|
+
# ── Path helpers ──────────────────────────────────────────────────────
|
|
511
|
+
|
|
512
|
+
def store_bucket_dir(store, bucket)
|
|
513
|
+
File.join(@root_dir, "wal", "store=#{store}", "date=#{bucket}")
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
def segment_path_for(store, bucket, number)
|
|
517
|
+
File.join(store_bucket_dir(store, bucket), "segment-#{number.to_s.rjust(6, "0")}.wal")
|
|
518
|
+
end
|
|
519
|
+
|
|
520
|
+
def segment_id(store, bucket, number)
|
|
521
|
+
"#{store}/#{bucket}/#{number.to_s.rjust(6, "0")}"
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
def segment_number_from_path(path)
|
|
525
|
+
File.basename(path, ".wal").split("-").last.to_i
|
|
526
|
+
end
|
|
527
|
+
|
|
528
|
+
def all_segment_paths
|
|
529
|
+
Dir[File.join(@root_dir, "wal", "store=*", "date=*", "segment-*.wal")]
|
|
530
|
+
.reject { |p| p.end_with?(MANIFEST_SUFFIX) }
|
|
531
|
+
end
|
|
532
|
+
|
|
533
|
+
def segment_numbers_in(dir)
|
|
534
|
+
Dir[File.join(dir, "segment-*.wal")]
|
|
535
|
+
.reject { |p| p.end_with?(MANIFEST_SUFFIX) }
|
|
536
|
+
.map { |p| segment_number_from_path(p) }
|
|
537
|
+
end
|
|
538
|
+
|
|
539
|
+
def current_bucket
|
|
540
|
+
case @time_bucket
|
|
541
|
+
when :hour then Time.now.utc.strftime("%Y-%m-%dT%H")
|
|
542
|
+
when :none then "flat"
|
|
543
|
+
else Time.now.utc.strftime("%Y-%m-%d")
|
|
544
|
+
end
|
|
545
|
+
end
|
|
546
|
+
|
|
547
|
+
def count_frames(path)
|
|
548
|
+
return 0 unless File.exist?(path)
|
|
549
|
+
n = 0
|
|
550
|
+
File.open(path, "rb") { |f| n += 1 while read_frame(f) }
|
|
551
|
+
n
|
|
552
|
+
rescue StandardError
|
|
553
|
+
0
|
|
554
|
+
end
|
|
555
|
+
|
|
556
|
+
# For compact_delta the first frame is a header, subsequent frames are batches.
|
|
557
|
+
# Each batch carries a count prefix — sum those instead of counting raw frames.
|
|
558
|
+
def count_frames_for_codec(path, codec_name)
|
|
559
|
+
return count_frames(path) unless codec_name.to_sym == :compact_delta_zlib ||
|
|
560
|
+
codec_name.to_sym == :compact_delta
|
|
561
|
+
return 0 unless File.exist?(path)
|
|
562
|
+
total = 0
|
|
563
|
+
File.open(path, "rb") do |f|
|
|
564
|
+
read_frame(f) # skip header
|
|
565
|
+
while (body = read_frame(f))
|
|
566
|
+
total += body[0, 4].unpack1("N") rescue 0
|
|
567
|
+
end
|
|
568
|
+
end
|
|
569
|
+
total
|
|
570
|
+
rescue StandardError
|
|
571
|
+
0
|
|
572
|
+
end
|
|
573
|
+
|
|
574
|
+
def write_quarantine_receipt(path, error)
|
|
575
|
+
mpath = path + MANIFEST_SUFFIX
|
|
576
|
+
manifest = File.exist?(mpath) ? (JSON.parse(File.read(mpath)) rescue {}) : {}
|
|
577
|
+
receipt = manifest.merge(
|
|
578
|
+
"quarantined_at" => Process.clock_gettime(Process::CLOCK_REALTIME),
|
|
579
|
+
"error_class" => error.class.to_s,
|
|
580
|
+
"error_message" => error.message.to_s[0, 500],
|
|
581
|
+
"segment_path" => path
|
|
582
|
+
)
|
|
583
|
+
File.write(path + QUARANTINE_SUFFIX, JSON.generate(receipt))
|
|
584
|
+
rescue StandardError
|
|
585
|
+
nil # never raise from error-handler path
|
|
586
|
+
end
|
|
587
|
+
|
|
588
|
+
def segment_expects_facts?(path)
|
|
589
|
+
mpath = path + MANIFEST_SUFFIX
|
|
590
|
+
return false unless File.exist?(mpath)
|
|
591
|
+
(JSON.parse(File.read(mpath))["fact_count"] || 0).to_i > 0
|
|
592
|
+
rescue StandardError
|
|
593
|
+
false
|
|
594
|
+
end
|
|
595
|
+
|
|
596
|
+
# On startup, seal any live segments that were left open by a previous crash.
|
|
597
|
+
# Codec is detected by peeking at the first frame rather than relying on the
|
|
598
|
+
# current codec config (the store may have been reconfigured between sessions).
|
|
599
|
+
def recover_orphaned_segments!
|
|
600
|
+
Dir[File.join(@root_dir, "wal", "store=*", "date=*")].each do |dir|
|
|
601
|
+
orphans = Dir[File.join(dir, "segment-*.wal")]
|
|
602
|
+
.reject { |p| p.end_with?(MANIFEST_SUFFIX) || p.end_with?(PURGED_SUFFIX) || p.end_with?(QUARANTINE_SUFFIX) }
|
|
603
|
+
.reject { |p| File.exist?(p + MANIFEST_SUFFIX) }
|
|
604
|
+
orphans.each { |p| seal_orphaned_live!(p, codec_name: detect_segment_codec(p)) }
|
|
605
|
+
end
|
|
606
|
+
end
|
|
607
|
+
|
|
608
|
+
# Peek at the first frame of a segment file and determine its codec.
|
|
609
|
+
def detect_segment_codec(path)
|
|
610
|
+
File.open(path, "rb") do |f|
|
|
611
|
+
body = read_frame(f)
|
|
612
|
+
return DEFAULT_CODEC unless body&.length&.> 0
|
|
613
|
+
parsed = MessagePack.unpack(body)
|
|
614
|
+
return :compact_delta_zlib if parsed.is_a?(Hash) && parsed.key?("fields")
|
|
615
|
+
DEFAULT_CODEC
|
|
616
|
+
end
|
|
617
|
+
rescue StandardError
|
|
618
|
+
DEFAULT_CODEC
|
|
619
|
+
end
|
|
620
|
+
|
|
621
|
+
# ── Storage metadata ──────────────────────────────────────────────────
|
|
622
|
+
|
|
623
|
+
def build_storage_view(store:, include_segments:)
|
|
624
|
+
target_stores = store ? [store] : manifest_store_names
|
|
625
|
+
now = Process.clock_gettime(Process::CLOCK_REALTIME)
|
|
626
|
+
{
|
|
627
|
+
"schema_version" => SCHEMA_VERSION,
|
|
628
|
+
"generated_at" => now,
|
|
629
|
+
"stores" => target_stores.sort.to_h { |s| [s, build_store_stats(s, include_segments: include_segments)] }
|
|
630
|
+
}
|
|
631
|
+
end
|
|
632
|
+
|
|
633
|
+
def build_store_stats(store, include_segments:)
|
|
634
|
+
sealed_manifests = Dir[File.join(@root_dir, "wal", "store=#{store}", "**", "segment-*.wal#{MANIFEST_SUFFIX}")]
|
|
635
|
+
.sort
|
|
636
|
+
.map { |p| JSON.parse(File.read(p)) rescue nil }
|
|
637
|
+
.compact
|
|
638
|
+
|
|
639
|
+
live = @segments[store]
|
|
640
|
+
|
|
641
|
+
total_facts = sealed_manifests.sum { |m| m["fact_count"].to_i }
|
|
642
|
+
total_facts += live[:count] if live
|
|
643
|
+
total_bytes = sealed_manifests.sum { |m| m["byte_size"].to_i }
|
|
644
|
+
total_bytes += (File.size?(live[:path]) || 0) if live
|
|
645
|
+
codecs = (sealed_manifests.map { |m| m["codec"] } +
|
|
646
|
+
(live ? [live[:codec_name].to_s] : [])).uniq.compact.sort
|
|
647
|
+
|
|
648
|
+
min_ts = (sealed_manifests.map { |m| m["min_timestamp"] }.compact +
|
|
649
|
+
(live&.dig(:min_ts) ? [live[:min_ts]] : [])).min
|
|
650
|
+
max_ts = (sealed_manifests.map { |m| m["max_timestamp"] }.compact +
|
|
651
|
+
(live&.dig(:max_ts) ? [live[:max_ts]] : [])).max
|
|
652
|
+
|
|
653
|
+
purge_count = Dir[File.join(@root_dir, "wal", "store=#{store}", "**", "*#{PURGED_SUFFIX}")].size
|
|
654
|
+
quarantine_count = Dir[File.join(@root_dir, "wal", "store=#{store}", "**", "*#{QUARANTINE_SUFFIX}")].size
|
|
655
|
+
|
|
656
|
+
stats = {
|
|
657
|
+
"segment_count" => sealed_manifests.size + (live ? 1 : 0),
|
|
658
|
+
"sealed_count" => sealed_manifests.size,
|
|
659
|
+
"live_count" => live ? 1 : 0,
|
|
660
|
+
"codecs" => codecs,
|
|
661
|
+
"byte_size" => total_bytes,
|
|
662
|
+
"fact_count" => total_facts,
|
|
663
|
+
"min_timestamp" => min_ts,
|
|
664
|
+
"max_timestamp" => max_ts,
|
|
665
|
+
"purge_receipt_count" => purge_count,
|
|
666
|
+
"quarantine_receipt_count" => quarantine_count
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
if include_segments
|
|
670
|
+
segs = sealed_manifests.map { |m|
|
|
671
|
+
m.slice("segment_id", "codec", "fact_count", "byte_size",
|
|
672
|
+
"min_timestamp", "max_timestamp", "sealed", "sealed_at")
|
|
673
|
+
}
|
|
674
|
+
if live
|
|
675
|
+
segs << {
|
|
676
|
+
"segment_id" => segment_id(live[:store], live[:bucket], live[:number]),
|
|
677
|
+
"codec" => live[:codec_name].to_s,
|
|
678
|
+
"fact_count" => live[:count],
|
|
679
|
+
"byte_size" => File.size?(live[:path]) || 0,
|
|
680
|
+
"min_timestamp" => live[:min_ts],
|
|
681
|
+
"max_timestamp" => live[:max_ts],
|
|
682
|
+
"sealed" => false,
|
|
683
|
+
"sealed_at" => nil
|
|
684
|
+
}
|
|
685
|
+
end
|
|
686
|
+
stats["segments"] = segs
|
|
687
|
+
end
|
|
688
|
+
|
|
689
|
+
stats
|
|
690
|
+
end
|
|
691
|
+
|
|
692
|
+
def manifest_store_names
|
|
693
|
+
disk = stored_store_names
|
|
694
|
+
live = @segments.keys
|
|
695
|
+
(disk + live).uniq
|
|
696
|
+
end
|
|
697
|
+
end
|
|
698
|
+
end
|
|
699
|
+
end
|