igniter-ledger 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +481 -0
- data/examples/intelligent_ledger/availability_boundary_ledger.rb +1190 -0
- data/examples/intelligent_ledger/availability_deriver.rb +150 -0
- data/examples/intelligent_ledger/availability_ledger.rb +197 -0
- data/examples/intelligent_ledger/ledger_boundary.rb +180 -0
- data/examples/store_poc.rb +45 -0
- data/exe/igniter-ledger-server +111 -0
- data/exe/igniter-store-server +6 -0
- data/ext/igniter_store_native/Cargo.toml +28 -0
- data/ext/igniter_store_native/extconf.rb +6 -0
- data/ext/igniter_store_native/src/fact.rs +303 -0
- data/ext/igniter_store_native/src/fact_log.rs +180 -0
- data/ext/igniter_store_native/src/file_backend.rs +91 -0
- data/ext/igniter_store_native/src/lib.rs +55 -0
- data/lib/igniter/ledger.rb +7 -0
- data/lib/igniter/store/access_path.rb +84 -0
- data/lib/igniter/store/change_event.rb +65 -0
- data/lib/igniter/store/changefeed_buffer.rb +585 -0
- data/lib/igniter/store/codecs.rb +253 -0
- data/lib/igniter/store/contractable_receipt_sink.rb +172 -0
- data/lib/igniter/store/fact.rb +121 -0
- data/lib/igniter/store/fact_log.rb +103 -0
- data/lib/igniter/store/file_backend.rb +269 -0
- data/lib/igniter/store/http_adapter.rb +413 -0
- data/lib/igniter/store/igniter_store.rb +838 -0
- data/lib/igniter/store/mcp_adapter.rb +403 -0
- data/lib/igniter/store/native.rb +80 -0
- data/lib/igniter/store/network_backend.rb +159 -0
- data/lib/igniter/store/protocol/handlers/access_path_handler.rb +38 -0
- data/lib/igniter/store/protocol/handlers/command_handler.rb +59 -0
- data/lib/igniter/store/protocol/handlers/derivation_handler.rb +27 -0
- data/lib/igniter/store/protocol/handlers/effect_handler.rb +65 -0
- data/lib/igniter/store/protocol/handlers/history_handler.rb +24 -0
- data/lib/igniter/store/protocol/handlers/projection_handler.rb +41 -0
- data/lib/igniter/store/protocol/handlers/relation_handler.rb +43 -0
- data/lib/igniter/store/protocol/handlers/store_handler.rb +24 -0
- data/lib/igniter/store/protocol/handlers/subscription_handler.rb +24 -0
- data/lib/igniter/store/protocol/interpreter.rb +447 -0
- data/lib/igniter/store/protocol/receipt.rb +96 -0
- data/lib/igniter/store/protocol/sync_profile.rb +53 -0
- data/lib/igniter/store/protocol/wire_envelope.rb +214 -0
- data/lib/igniter/store/protocol.rb +27 -0
- data/lib/igniter/store/read_cache.rb +163 -0
- data/lib/igniter/store/schema_graph.rb +248 -0
- data/lib/igniter/store/segmented_file_backend.rb +699 -0
- data/lib/igniter/store/server_config.rb +55 -0
- data/lib/igniter/store/server_logger.rb +64 -0
- data/lib/igniter/store/server_metrics.rb +222 -0
- data/lib/igniter/store/store_server.rb +597 -0
- data/lib/igniter/store/subscription_registry.rb +73 -0
- data/lib/igniter/store/tbackend_adapter_descriptor.rb +307 -0
- data/lib/igniter/store/tcp_adapter.rb +127 -0
- data/lib/igniter/store/wire_protocol.rb +42 -0
- data/lib/igniter/store.rb +64 -0
- data/lib/igniter-ledger.rb +4 -0
- data/lib/igniter-store.rb +5 -0
- metadata +212 -0
|
@@ -0,0 +1,585 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "securerandom"
|
|
4
|
+
|
|
5
|
+
module Igniter
|
|
6
|
+
module Store
|
|
7
|
+
# Bounded in-memory Changefeed buffer with async per-subscriber fan-out,
|
|
8
|
+
# delivery policies, and production diagnostics.
|
|
9
|
+
#
|
|
10
|
+
# Receives committed facts via +emit+, builds ChangeEvent objects with
|
|
11
|
+
# monotonic sequence cursors, retains recent events in a bounded ring, and
|
|
12
|
+
# fans out to registered subscriber handlers via per-subscriber bounded
|
|
13
|
+
# queues and worker threads so that slow subscribers never stall +emit+.
|
|
14
|
+
#
|
|
15
|
+
# Delivery semantics: async best-effort push.
|
|
16
|
+
# - Fan-out enqueues to a per-subscriber SubscriberQueue; emit returns quickly.
|
|
17
|
+
# - Each subscriber has one worker thread draining its queue.
|
|
18
|
+
# - When a subscriber queue is full, overflow policy determines which event
|
|
19
|
+
# is dropped (see +overflow:+ option).
|
|
20
|
+
# - A handler that raises is removed, counted as failed, and its worker exits.
|
|
21
|
+
# - When the ring is full the oldest retained event is dropped and
|
|
22
|
+
# +dropped_total+ is incremented.
|
|
23
|
+
# - No durable checkpoints in this v0 slice.
|
|
24
|
+
#
|
|
25
|
+
# Overflow policies (subscriber queue full):
|
|
26
|
+
# - +:drop_oldest+ — remove the oldest queued event; add the incoming event.
|
|
27
|
+
# - +:drop_newest+ — discard the incoming event; queue unchanged.
|
|
28
|
+
#
|
|
29
|
+
# Close policies (Subscription#close):
|
|
30
|
+
# - +:drain+ — deliver all queued events before stopping the worker.
|
|
31
|
+
# - +:discard+ — clear the queue immediately; worker exits after current event.
|
|
32
|
+
#
|
|
33
|
+
# Alert thresholds (optional, checked at each #snapshot call):
|
|
34
|
+
# - +:failed_total+ — fires :changefeed_subscriber_failures
|
|
35
|
+
# - +:overflow_dropped_total+ — fires :changefeed_overflow_drops
|
|
36
|
+
# - +:total_queued+ — fires :changefeed_queue_pressure (aggregate)
|
|
37
|
+
# - +:queue_pressure_ratio+ — fires :changefeed_queue_pressure (per-subscriber)
|
|
38
|
+
#
|
|
39
|
+
# Diagnostics ring records bounded lifecycle/failure events:
|
|
40
|
+
# - :subscriber_subscribed / :subscriber_closed / :subscriber_failed
|
|
41
|
+
# - :subscriber_overflow
|
|
42
|
+
#
|
|
43
|
+
# Ordering policy:
|
|
44
|
+
# - Sequences are assigned in emit-call order (monotonically increasing).
|
|
45
|
+
# - IgniterStore emits the source fact BEFORE triggering derivations/scatters,
|
|
46
|
+
# so subscribers always see cause before effects within their queue.
|
|
47
|
+
#
|
|
48
|
+
# Replay cursor semantics (see #replay):
|
|
49
|
+
# - nil cursor → all retained events from oldest retained sequence.
|
|
50
|
+
# - {sequence: N} → events with sequence > N.
|
|
51
|
+
# - N < oldest-1 → :cursor_too_old (gap due to ring overflow).
|
|
52
|
+
# - N >= newest → empty :ok (caller is already at the head).
|
|
53
|
+
#
|
|
54
|
+
# Usage:
|
|
55
|
+
# buf = ChangefeedBuffer.new(max_size: 1_000)
|
|
56
|
+
# handle = buf.subscribe(stores: [:tasks]) { |event| deliver(event) }
|
|
57
|
+
# buf.emit(fact) # enqueues to matching subscriber queues; returns quickly
|
|
58
|
+
# handle.close # respects close_policy (drain or discard), joins worker
|
|
59
|
+
class ChangefeedBuffer
|
|
60
|
+
DEFAULT_MAX_SIZE = 1_000
|
|
61
|
+
DEFAULT_SUBSCRIBER_QUEUE_SIZE = 100
|
|
62
|
+
DEFAULT_OVERFLOW = :drop_oldest
|
|
63
|
+
DEFAULT_CLOSE_POLICY = :drain
|
|
64
|
+
DEFAULT_DIAGNOSTIC_RING_SIZE = 100
|
|
65
|
+
|
|
66
|
+
VALID_OVERFLOW_POLICIES = %i[drop_oldest drop_newest].freeze
|
|
67
|
+
VALID_CLOSE_POLICIES = %i[drain discard].freeze
|
|
68
|
+
VALID_THRESHOLD_KEYS = %i[total_queued overflow_dropped_total failed_total queue_pressure_ratio].freeze
|
|
69
|
+
|
|
70
|
+
# Bounded FIFO queue for one subscriber's async delivery pipeline.
|
|
71
|
+
#
|
|
72
|
+
# +push+ is non-blocking and returns +true+ when an overflow drop occurs.
|
|
73
|
+
# +pop+ blocks until an event is available or the queue is closed.
|
|
74
|
+
# Once closed, +pop+ drains remaining items (unless discard was requested)
|
|
75
|
+
# then returns +nil+.
|
|
76
|
+
class SubscriberQueue
|
|
77
|
+
def initialize(max_size:, overflow: :drop_oldest)
|
|
78
|
+
@max_size = max_size
|
|
79
|
+
@overflow = overflow
|
|
80
|
+
@items = []
|
|
81
|
+
@mu = Mutex.new
|
|
82
|
+
@cond = ConditionVariable.new
|
|
83
|
+
@closed = false
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Returns +true+ if an overflow drop occurred, +false+ otherwise.
|
|
87
|
+
def push(event)
|
|
88
|
+
@mu.synchronize do
|
|
89
|
+
return false if @closed
|
|
90
|
+
if @items.size >= @max_size
|
|
91
|
+
case @overflow
|
|
92
|
+
when :drop_oldest
|
|
93
|
+
@items.shift
|
|
94
|
+
@items << event
|
|
95
|
+
@cond.signal
|
|
96
|
+
when :drop_newest
|
|
97
|
+
# discard the incoming event; queue unchanged
|
|
98
|
+
end
|
|
99
|
+
return true
|
|
100
|
+
end
|
|
101
|
+
@items << event
|
|
102
|
+
@cond.signal
|
|
103
|
+
false
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Blocks until next event or close signal. Returns nil when closed+drained.
|
|
108
|
+
def pop
|
|
109
|
+
@mu.synchronize do
|
|
110
|
+
@cond.wait(@mu) while @items.empty? && !@closed
|
|
111
|
+
@items.shift
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Pass +discard: true+ to clear queued events before signaling close.
|
|
116
|
+
def close(discard: false)
|
|
117
|
+
@mu.synchronize do
|
|
118
|
+
@items.clear if discard
|
|
119
|
+
@closed = true
|
|
120
|
+
@cond.broadcast
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def size
|
|
125
|
+
@mu.synchronize { @items.size }
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Bounded ring buffer for structured diagnostic entries.
|
|
130
|
+
# All push/snapshot operations are thread-safe.
|
|
131
|
+
# Oldest entries are evicted when +max_size+ is exceeded;
|
|
132
|
+
# +dropped_diagnostics_total+ counts evictions.
|
|
133
|
+
class DiagnosticRing
|
|
134
|
+
def initialize(max_size)
|
|
135
|
+
@max_size = max_size
|
|
136
|
+
@entries = []
|
|
137
|
+
@mu = Mutex.new
|
|
138
|
+
@total = 0
|
|
139
|
+
@dropped = 0
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def push(entry)
|
|
143
|
+
@mu.synchronize do
|
|
144
|
+
@total += 1
|
|
145
|
+
if @entries.size >= @max_size
|
|
146
|
+
@entries.shift
|
|
147
|
+
@dropped += 1
|
|
148
|
+
end
|
|
149
|
+
@entries << entry
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def snapshot
|
|
154
|
+
@mu.synchronize do
|
|
155
|
+
{
|
|
156
|
+
recent: @entries.dup,
|
|
157
|
+
recent_count: @total,
|
|
158
|
+
dropped_diagnostics_total: @dropped
|
|
159
|
+
}
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Returned by #subscribe. Call #close to stop delivery and release resources.
|
|
165
|
+
# Close behavior is governed by the buffer's +close_policy+:
|
|
166
|
+
# - +:drain+ — pending events are delivered before worker stops.
|
|
167
|
+
# - +:discard+ — pending events are dropped; worker stops after current event.
|
|
168
|
+
# Calling #close is idempotent.
|
|
169
|
+
class Subscription
|
|
170
|
+
def initialize(record, buffer)
|
|
171
|
+
@record = record
|
|
172
|
+
@buffer = buffer
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def close
|
|
176
|
+
@buffer.__send__(:remove_record, @record)
|
|
177
|
+
@record.thread&.join(2) rescue nil
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
SubscriptionRecord = Struct.new(
|
|
182
|
+
:id, :stores, :handler, :queue, :thread,
|
|
183
|
+
:overflow, :close_policy,
|
|
184
|
+
:delivered_total, :overflow_dropped_total, :failed_total,
|
|
185
|
+
:status,
|
|
186
|
+
keyword_init: true
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
def initialize(max_size: DEFAULT_MAX_SIZE,
|
|
190
|
+
subscriber_queue_size: DEFAULT_SUBSCRIBER_QUEUE_SIZE,
|
|
191
|
+
overflow: DEFAULT_OVERFLOW,
|
|
192
|
+
close_policy: DEFAULT_CLOSE_POLICY,
|
|
193
|
+
diagnostic_ring_size: DEFAULT_DIAGNOSTIC_RING_SIZE,
|
|
194
|
+
alert_thresholds: {})
|
|
195
|
+
unless VALID_OVERFLOW_POLICIES.include?(overflow)
|
|
196
|
+
raise ArgumentError, "unknown overflow policy: #{overflow.inspect}. " \
|
|
197
|
+
"Valid: #{VALID_OVERFLOW_POLICIES.map(&:inspect).join(", ")}"
|
|
198
|
+
end
|
|
199
|
+
unless VALID_CLOSE_POLICIES.include?(close_policy)
|
|
200
|
+
raise ArgumentError, "unknown close_policy: #{close_policy.inspect}. " \
|
|
201
|
+
"Valid: #{VALID_CLOSE_POLICIES.map(&:inspect).join(", ")}"
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
thresholds = (alert_thresholds || {}).transform_keys(&:to_sym)
|
|
205
|
+
unknown = thresholds.keys - VALID_THRESHOLD_KEYS
|
|
206
|
+
unless unknown.empty?
|
|
207
|
+
raise ArgumentError, "unknown alert_threshold keys: #{unknown.map(&:inspect).join(", ")}. " \
|
|
208
|
+
"Valid: #{VALID_THRESHOLD_KEYS.map(&:inspect).join(", ")}"
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
unless max_size.is_a?(Integer) && max_size > 0
|
|
212
|
+
raise ArgumentError, "max_size must be a positive integer, got #{max_size.inspect}"
|
|
213
|
+
end
|
|
214
|
+
unless subscriber_queue_size.is_a?(Integer) && subscriber_queue_size > 0
|
|
215
|
+
raise ArgumentError, "subscriber_queue_size must be a positive integer, got #{subscriber_queue_size.inspect}"
|
|
216
|
+
end
|
|
217
|
+
unless diagnostic_ring_size.is_a?(Integer) && diagnostic_ring_size > 0
|
|
218
|
+
raise ArgumentError, "diagnostic_ring_size must be a positive integer, got #{diagnostic_ring_size.inspect}"
|
|
219
|
+
end
|
|
220
|
+
ratio = thresholds[:queue_pressure_ratio]
|
|
221
|
+
if ratio && !(ratio.is_a?(Numeric) && ratio >= 0.0 && ratio <= 1.0)
|
|
222
|
+
raise ArgumentError, "queue_pressure_ratio must be between 0.0 and 1.0, got #{ratio.inspect}"
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
@max_size = max_size
|
|
226
|
+
@subscriber_queue_size = subscriber_queue_size
|
|
227
|
+
@overflow = overflow
|
|
228
|
+
@close_policy = close_policy
|
|
229
|
+
@alert_thresholds = thresholds
|
|
230
|
+
@diagnostics = DiagnosticRing.new(diagnostic_ring_size)
|
|
231
|
+
@ring = []
|
|
232
|
+
@records = []
|
|
233
|
+
@mutex = Mutex.new
|
|
234
|
+
@sequence = 0
|
|
235
|
+
@emitted_total = 0
|
|
236
|
+
@delivered_total = 0
|
|
237
|
+
@dropped_total = 0
|
|
238
|
+
@overflow_dropped_total = 0
|
|
239
|
+
@failed_total = 0
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Register a subscriber handler for one or more store names.
|
|
243
|
+
# +stores:+ — Array of store name symbols/strings, or [] for all stores (wildcard).
|
|
244
|
+
# Returns a Subscription handle; call handle.close to unsubscribe.
|
|
245
|
+
def subscribe(stores:, &handler)
|
|
246
|
+
raise ArgumentError, "subscribe requires a block" unless handler
|
|
247
|
+
|
|
248
|
+
q = SubscriberQueue.new(max_size: @subscriber_queue_size, overflow: @overflow)
|
|
249
|
+
record = SubscriptionRecord.new(
|
|
250
|
+
id: SecureRandom.hex(8),
|
|
251
|
+
stores: Array(stores).map(&:to_s),
|
|
252
|
+
handler: handler,
|
|
253
|
+
queue: q,
|
|
254
|
+
thread: nil,
|
|
255
|
+
overflow: @overflow,
|
|
256
|
+
close_policy: @close_policy,
|
|
257
|
+
delivered_total: 0,
|
|
258
|
+
overflow_dropped_total: 0,
|
|
259
|
+
failed_total: 0,
|
|
260
|
+
status: :active
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
thread = Thread.new do
|
|
264
|
+
loop do
|
|
265
|
+
event = q.pop
|
|
266
|
+
break if event.nil?
|
|
267
|
+
begin
|
|
268
|
+
handler.call(event)
|
|
269
|
+
@mutex.synchronize do
|
|
270
|
+
@delivered_total += 1
|
|
271
|
+
record.delivered_total += 1
|
|
272
|
+
end
|
|
273
|
+
rescue StandardError => e
|
|
274
|
+
ts = Process.clock_gettime(Process::CLOCK_REALTIME)
|
|
275
|
+
@mutex.synchronize do
|
|
276
|
+
@failed_total += 1
|
|
277
|
+
record.failed_total += 1
|
|
278
|
+
record.status = :failed
|
|
279
|
+
end
|
|
280
|
+
@diagnostics.push({
|
|
281
|
+
type: :subscriber_failed,
|
|
282
|
+
subscriber_id: record.id,
|
|
283
|
+
stores: record.stores,
|
|
284
|
+
error_class: e.class.name,
|
|
285
|
+
message: e.message.to_s.slice(0, 200),
|
|
286
|
+
ts: ts
|
|
287
|
+
})
|
|
288
|
+
remove_record(record, record_diagnostic: false)
|
|
289
|
+
break
|
|
290
|
+
end
|
|
291
|
+
end
|
|
292
|
+
end
|
|
293
|
+
record.thread = thread
|
|
294
|
+
|
|
295
|
+
@mutex.synchronize { @records << record }
|
|
296
|
+
@diagnostics.push({
|
|
297
|
+
type: :subscriber_subscribed,
|
|
298
|
+
subscriber_id: record.id,
|
|
299
|
+
stores: record.stores,
|
|
300
|
+
ts: Process.clock_gettime(Process::CLOCK_REALTIME)
|
|
301
|
+
})
|
|
302
|
+
|
|
303
|
+
Subscription.new(record, self)
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
# Build a ChangeEvent from +fact+, add to the ring buffer, and enqueue to
|
|
307
|
+
# matching subscriber queues. Returns the emitted ChangeEvent immediately.
|
|
308
|
+
def emit(fact)
|
|
309
|
+
event = @mutex.synchronize do
|
|
310
|
+
@sequence += 1
|
|
311
|
+
e = ChangeEvent.from_fact(fact, sequence: @sequence)
|
|
312
|
+
@emitted_total += 1
|
|
313
|
+
if @ring.size >= @max_size
|
|
314
|
+
@ring.shift
|
|
315
|
+
@dropped_total += 1
|
|
316
|
+
end
|
|
317
|
+
@ring << e
|
|
318
|
+
e
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
fan_out(event)
|
|
322
|
+
event
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
# Number of active subscribers, optionally filtered by store name.
|
|
326
|
+
# Wildcard subscribers (stores == []) are counted for every store.
|
|
327
|
+
def subscriber_count(store = nil)
|
|
328
|
+
@mutex.synchronize do
|
|
329
|
+
if store
|
|
330
|
+
@records.count { |r| r.stores.empty? || r.stores.include?(store.to_s) }
|
|
331
|
+
else
|
|
332
|
+
@records.size
|
|
333
|
+
end
|
|
334
|
+
end
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
# Replay retained ChangeEvents from the in-memory ring.
|
|
338
|
+
#
|
|
339
|
+
# +cursor+ — nil or { sequence: Integer }
|
|
340
|
+
# +stores+ — nil (all) or Array of store name symbols/strings to filter
|
|
341
|
+
# +limit+ — nil (all matching) or Integer cap on returned events
|
|
342
|
+
#
|
|
343
|
+
# Returns a Hash:
|
|
344
|
+
# {
|
|
345
|
+
# status: :ok | :cursor_too_old,
|
|
346
|
+
# events: [ChangeEvent, ...],
|
|
347
|
+
# cursor: { sequence: N } | nil,
|
|
348
|
+
# oldest_cursor: { sequence: N } | nil,
|
|
349
|
+
# newest_cursor: { sequence: N } | nil,
|
|
350
|
+
# dropped_total: Integer
|
|
351
|
+
# }
|
|
352
|
+
def replay(cursor: nil, stores: nil, limit: nil)
|
|
353
|
+
@mutex.synchronize do
|
|
354
|
+
if @ring.empty?
|
|
355
|
+
return {
|
|
356
|
+
status: :ok,
|
|
357
|
+
events: [],
|
|
358
|
+
cursor: nil,
|
|
359
|
+
oldest_cursor: nil,
|
|
360
|
+
newest_cursor: nil,
|
|
361
|
+
dropped_total: @dropped_total
|
|
362
|
+
}
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
oldest_seq = @ring.first.cursor[:sequence]
|
|
366
|
+
newest_seq = @ring.last.cursor[:sequence]
|
|
367
|
+
|
|
368
|
+
candidates =
|
|
369
|
+
if cursor.nil?
|
|
370
|
+
@ring.dup
|
|
371
|
+
else
|
|
372
|
+
req_seq = Integer(cursor[:sequence])
|
|
373
|
+
|
|
374
|
+
if req_seq < oldest_seq - 1
|
|
375
|
+
return {
|
|
376
|
+
status: :cursor_too_old,
|
|
377
|
+
events: [],
|
|
378
|
+
cursor: { sequence: newest_seq },
|
|
379
|
+
oldest_cursor: { sequence: oldest_seq },
|
|
380
|
+
newest_cursor: { sequence: newest_seq },
|
|
381
|
+
dropped_total: @dropped_total
|
|
382
|
+
}
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
if req_seq >= newest_seq
|
|
386
|
+
return {
|
|
387
|
+
status: :ok,
|
|
388
|
+
events: [],
|
|
389
|
+
cursor: { sequence: newest_seq },
|
|
390
|
+
oldest_cursor: { sequence: oldest_seq },
|
|
391
|
+
newest_cursor: { sequence: newest_seq },
|
|
392
|
+
dropped_total: @dropped_total
|
|
393
|
+
}
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
@ring.select { |e| e.cursor[:sequence] > req_seq }
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
if stores && !stores.empty?
|
|
400
|
+
store_strs = Array(stores).map(&:to_s)
|
|
401
|
+
candidates = candidates.select { |e| store_strs.include?(e.store.to_s) }
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
candidates = candidates.first(limit) if limit
|
|
405
|
+
|
|
406
|
+
result_cursor =
|
|
407
|
+
if candidates.last
|
|
408
|
+
{ sequence: candidates.last.cursor[:sequence] }
|
|
409
|
+
else
|
|
410
|
+
{ sequence: newest_seq }
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
{
|
|
414
|
+
status: :ok,
|
|
415
|
+
events: candidates,
|
|
416
|
+
cursor: result_cursor,
|
|
417
|
+
oldest_cursor: { sequence: oldest_seq },
|
|
418
|
+
newest_cursor: { sequence: newest_seq },
|
|
419
|
+
dropped_total: @dropped_total
|
|
420
|
+
}
|
|
421
|
+
end
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
# Compact snapshot of current changefeed state for observability.
|
|
425
|
+
# Includes +alerts+ (evaluated against configured thresholds) and
|
|
426
|
+
# +diagnostics+ (recent bounded ring of lifecycle/failure entries).
|
|
427
|
+
#
|
|
428
|
+
# +dropped_total+ — retained ring drops (ring full)
|
|
429
|
+
# +overflow_dropped_total+ — subscriber queue drops (slow consumer)
|
|
430
|
+
# +total_queued+ — sum of all active subscriber queue sizes (backpressure)
|
|
431
|
+
def snapshot
|
|
432
|
+
@mutex.synchronize do
|
|
433
|
+
total_queued = @records.sum { |r| r.queue.size }
|
|
434
|
+
current = {
|
|
435
|
+
emitted_total: @emitted_total,
|
|
436
|
+
delivered_total: @delivered_total,
|
|
437
|
+
dropped_total: @dropped_total,
|
|
438
|
+
overflow_dropped_total: @overflow_dropped_total,
|
|
439
|
+
failed_total: @failed_total,
|
|
440
|
+
buffered: @ring.size,
|
|
441
|
+
max_size: @max_size,
|
|
442
|
+
subscriber_count: @records.size,
|
|
443
|
+
subscriber_queue_size: @subscriber_queue_size,
|
|
444
|
+
overflow: @overflow,
|
|
445
|
+
close_policy: @close_policy,
|
|
446
|
+
total_queued: total_queued,
|
|
447
|
+
oldest_sequence: @ring.first&.cursor&.fetch(:sequence, nil),
|
|
448
|
+
newest_sequence: @ring.last&.cursor&.fetch(:sequence, nil)
|
|
449
|
+
}
|
|
450
|
+
current[:alerts] = compute_alerts(current)
|
|
451
|
+
current[:diagnostics] = @diagnostics.snapshot
|
|
452
|
+
current
|
|
453
|
+
end
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
# Per-subscriber state snapshot for diagnosing slow/failing consumers.
|
|
457
|
+
#
|
|
458
|
+
# Returns an Array of Hashes — one per active subscriber — with fields:
|
|
459
|
+
# id, stores, queue_size, queue_max_size, overflow, close_policy,
|
|
460
|
+
# status, delivered_total, overflow_dropped_total, failed_total
|
|
461
|
+
#
|
|
462
|
+
# Subscribers that have already failed or been closed are not listed.
|
|
463
|
+
def subscriber_snapshot
|
|
464
|
+
@mutex.synchronize do
|
|
465
|
+
@records.map do |r|
|
|
466
|
+
{
|
|
467
|
+
id: r.id,
|
|
468
|
+
stores: r.stores,
|
|
469
|
+
queue_size: r.queue.size,
|
|
470
|
+
queue_max_size: @subscriber_queue_size,
|
|
471
|
+
overflow: r.overflow,
|
|
472
|
+
close_policy: r.close_policy,
|
|
473
|
+
status: r.status,
|
|
474
|
+
delivered_total: r.delivered_total,
|
|
475
|
+
overflow_dropped_total: r.overflow_dropped_total,
|
|
476
|
+
failed_total: r.failed_total
|
|
477
|
+
}
|
|
478
|
+
end
|
|
479
|
+
end
|
|
480
|
+
end
|
|
481
|
+
|
|
482
|
+
protected
|
|
483
|
+
|
|
484
|
+
# Removes +record+ from the active list and closes its queue.
|
|
485
|
+
# Respects the record's +close_policy+ (:drain or :discard).
|
|
486
|
+
# Does not join the worker thread — safe to call from inside the worker.
|
|
487
|
+
# Subscription#close handles the join for external callers.
|
|
488
|
+
#
|
|
489
|
+
# +record_diagnostic:+ — when true (default), records a :subscriber_closed
|
|
490
|
+
# diagnostic entry. Pass false when the caller has already recorded a more
|
|
491
|
+
# specific entry (e.g., :subscriber_failed from the worker rescue block).
|
|
492
|
+
def remove_record(record, record_diagnostic: true)
|
|
493
|
+
return unless record
|
|
494
|
+
@mutex.synchronize { @records.reject! { |r| r.equal?(record) } }
|
|
495
|
+
record.queue&.close(discard: record.close_policy == :discard)
|
|
496
|
+
return unless record_diagnostic
|
|
497
|
+
@diagnostics.push({
|
|
498
|
+
type: :subscriber_closed,
|
|
499
|
+
subscriber_id: record.id,
|
|
500
|
+
stores: record.stores,
|
|
501
|
+
ts: Process.clock_gettime(Process::CLOCK_REALTIME)
|
|
502
|
+
})
|
|
503
|
+
end
|
|
504
|
+
|
|
505
|
+
private
|
|
506
|
+
|
|
507
|
+
def fan_out(event)
|
|
508
|
+
store_s = event.store.to_s
|
|
509
|
+
matching = @mutex.synchronize {
|
|
510
|
+
@records.select { |r| r.stores.empty? || r.stores.include?(store_s) }.dup
|
|
511
|
+
}
|
|
512
|
+
overflow_records = []
|
|
513
|
+
matching.each do |record|
|
|
514
|
+
overflow_records << record if record.queue.push(event)
|
|
515
|
+
end
|
|
516
|
+
unless overflow_records.empty?
|
|
517
|
+
@mutex.synchronize do
|
|
518
|
+
overflow_records.each do |r|
|
|
519
|
+
@overflow_dropped_total += 1
|
|
520
|
+
r.overflow_dropped_total += 1
|
|
521
|
+
end
|
|
522
|
+
end
|
|
523
|
+
ts = Process.clock_gettime(Process::CLOCK_REALTIME)
|
|
524
|
+
overflow_records.each do |r|
|
|
525
|
+
@diagnostics.push({
|
|
526
|
+
type: :subscriber_overflow,
|
|
527
|
+
subscriber_id: r.id,
|
|
528
|
+
ts: ts
|
|
529
|
+
})
|
|
530
|
+
end
|
|
531
|
+
end
|
|
532
|
+
end
|
|
533
|
+
|
|
534
|
+
# Evaluate configured alert thresholds against the current snapshot values.
|
|
535
|
+
# Called from within #snapshot while @mutex is held.
|
|
536
|
+
# Per-subscriber ratio alert accesses subscriber queue sizes (safe by lock order).
|
|
537
|
+
def compute_alerts(snap)
|
|
538
|
+
alerts = []
|
|
539
|
+
|
|
540
|
+
if (t = @alert_thresholds[:failed_total]) && snap[:failed_total] >= t
|
|
541
|
+
alerts << {
|
|
542
|
+
code: :changefeed_subscriber_failures,
|
|
543
|
+
severity: :warning,
|
|
544
|
+
value: snap[:failed_total],
|
|
545
|
+
threshold: t
|
|
546
|
+
}
|
|
547
|
+
end
|
|
548
|
+
|
|
549
|
+
if (t = @alert_thresholds[:overflow_dropped_total]) && snap[:overflow_dropped_total] >= t
|
|
550
|
+
alerts << {
|
|
551
|
+
code: :changefeed_overflow_drops,
|
|
552
|
+
severity: :warning,
|
|
553
|
+
value: snap[:overflow_dropped_total],
|
|
554
|
+
threshold: t
|
|
555
|
+
}
|
|
556
|
+
end
|
|
557
|
+
|
|
558
|
+
if (t = @alert_thresholds[:total_queued]) && snap[:total_queued] >= t
|
|
559
|
+
alerts << {
|
|
560
|
+
code: :changefeed_queue_pressure,
|
|
561
|
+
severity: :warning,
|
|
562
|
+
value: snap[:total_queued],
|
|
563
|
+
threshold: t
|
|
564
|
+
}
|
|
565
|
+
end
|
|
566
|
+
|
|
567
|
+
if (ratio_t = @alert_thresholds[:queue_pressure_ratio]) && @subscriber_queue_size > 0
|
|
568
|
+
@records.each do |r|
|
|
569
|
+
ratio = r.queue.size.to_f / @subscriber_queue_size
|
|
570
|
+
next if ratio < ratio_t
|
|
571
|
+
alerts << {
|
|
572
|
+
code: :changefeed_queue_pressure,
|
|
573
|
+
severity: :warning,
|
|
574
|
+
subscriber_id: r.id,
|
|
575
|
+
value: ratio.round(3),
|
|
576
|
+
threshold: ratio_t
|
|
577
|
+
}
|
|
578
|
+
end
|
|
579
|
+
end
|
|
580
|
+
|
|
581
|
+
alerts
|
|
582
|
+
end
|
|
583
|
+
end
|
|
584
|
+
end
|
|
585
|
+
end
|