ruby_reactor 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,8 +16,10 @@ module RubyReactor
16
16
 
17
17
  def store_context(context_id, serialized_context, reactor_class_name)
18
18
  key = context_key(context_id, reactor_class_name)
19
- # Use standard SET for compatibility (ReJSON not strictly required for full docs)
20
- @redis.set(key, serialized_context, ex: 86_400) # 24h TTL
19
+ # Use standard SET for compatibility (ReJSON not strictly required for full docs).
20
+ # TTL is re-stamped on every write so long-running / snoozed contexts
21
+ # never expire mid-flight (Phase 4).
22
+ @redis.set(key, serialized_context, ex: durability_ttl)
21
23
  end
22
24
 
23
25
  def retrieve_context(context_id, reactor_class_name)
@@ -28,52 +30,84 @@ module RubyReactor
28
30
  JSON.parse(json)
29
31
  end
30
32
 
33
+ # Durable map storage is ALWAYS index-keyed (HSET), regardless of
34
+ # strict_ordering. The index->slot mapping makes completion recoverable
35
+ # (missing = (0...count) - HKEYS) and re-dispatch idempotent (re-running
36
+ # index i overwrites slot i, never duplicates). strict_ordering is now only
37
+ # a read-order convenience, not a storage-layout switch (Phase 5).
38
+ # rubocop:disable Lint/UnusedMethodArgument
31
39
  def store_map_result(map_id, index, serialized_result, reactor_class_name, strict_ordering: true)
32
40
  key = map_results_key(map_id, reactor_class_name)
33
-
34
- if strict_ordering
35
- # Use Hash for strict ordering by index
36
- # HSET key index serialized_result
37
- @redis.hset(key, index.to_s, serialized_result.to_json)
38
- else
39
- # Loose ordering: just push to list
40
- @redis.rpush(key, serialized_result.to_json)
41
- end
42
-
43
- @redis.expire(key, 86_400)
41
+ @redis.hset(key, index.to_s, serialized_result.to_json)
42
+ @redis.expire(key, durability_ttl)
44
43
  end
45
44
 
46
45
  def retrieve_map_results(map_id, reactor_class_name, strict_ordering: true)
46
+ # rubocop:enable Lint/UnusedMethodArgument
47
47
  key = map_results_key(map_id, reactor_class_name)
48
+ results = @redis.hgetall(key)
49
+ # Index-keyed for both modes; sort by index so reads are deterministic.
50
+ results.keys.sort_by(&:to_i).map { |k| JSON.parse(results[k]) }
51
+ end
48
52
 
49
- if strict_ordering
50
- results = @redis.hgetall(key)
51
- # Sort by index (key)
52
- results.keys.sort_by(&:to_i).map { |k| JSON.parse(results[k]) }
53
- else
54
- results = @redis.lrange(key, 0, -1)
55
- results.map { |r| JSON.parse(r) }
56
- end
53
+ # Indices that have NO stored result yet: the authoritative, idempotent
54
+ # signal for what the map sweeper must (re)dispatch.
55
+ def missing_map_indices(map_id, count, reactor_class_name)
56
+ key = map_results_key(map_id, reactor_class_name)
57
+ present = @redis.hkeys(key).map(&:to_i)
58
+ (0...count).to_a - present
57
59
  end
58
60
 
59
61
  def set_map_counter(map_id, count, reactor_class_name)
60
62
  key = map_counter_key(map_id, reactor_class_name)
61
- @redis.set(key, count, ex: 86_400)
63
+ @redis.set(key, count, ex: durability_ttl)
62
64
  end
63
65
 
64
- def initialize_map_operation(map_id, count, parent_reactor_class_name, reactor_class_info:, strict_ordering: true)
66
+ # rubocop:disable Metrics/ParameterLists
67
+ def initialize_map_operation(map_id, count, parent_reactor_class_name, reactor_class_info:, strict_ordering: true,
68
+ parent_context_id: nil, step_name: nil, parent_is_map_element: false,
69
+ outer_map_id: nil, outer_index: nil)
65
70
  # Ensure counter is set
66
71
  set_map_counter(map_id, count, parent_reactor_class_name)
67
72
 
68
- # Store metadata
73
+ # Store metadata. parent_context_id/step_name let the map sweeper recover
74
+ # without re-deriving the map_id (which is brittle to split on ':'). The
75
+ # nested-map fields (parent_is_map_element + outer_map_id/outer_index)
76
+ # record which liveness lock the parent actually holds (N1): a nested
77
+ # map's parent is itself a map element running under a `map_element:` lock,
78
+ # not an `async:` lock.
69
79
  key = "reactor:#{parent_reactor_class_name}:map:#{map_id}:metadata"
70
80
  metadata = {
81
+ map_id: map_id,
71
82
  count: count,
72
83
  strict_ordering: strict_ordering,
73
84
  reactor_class_info: reactor_class_info,
85
+ parent_context_id: parent_context_id,
86
+ parent_reactor_class_name: parent_reactor_class_name,
87
+ step_name: step_name,
88
+ parent_is_map_element: parent_is_map_element,
89
+ outer_map_id: outer_map_id,
90
+ outer_index: outer_index,
74
91
  created_at: Time.now.to_i
75
92
  }
76
- @redis.set(key, metadata.to_json, ex: 86_400)
93
+ @redis.set(key, metadata.to_json, ex: durability_ttl)
94
+ end
95
+ # rubocop:enable Metrics/ParameterLists
96
+
97
+ # Enumerate active map operations for the map sweeper (Phase 5d). Returns
98
+ # the parsed metadata hash for each (includes map_id, count,
99
+ # parent_context_id, step_name, parent_reactor_class_name, and the nested-map
100
+ # lock fields). Bounded by `count` to keep a sweep cheap.
101
+ def scan_maps(count: 1000)
102
+ results = []
103
+ @redis.scan_each(match: "reactor:*:map:*:metadata", count: 100) do |key|
104
+ json = @redis.get(key)
105
+ next unless json
106
+
107
+ results << JSON.parse(json)
108
+ return results if results.size >= count
109
+ end
110
+ results
77
111
  end
78
112
 
79
113
  def retrieve_map_metadata(map_id, reactor_class_name)
@@ -87,7 +121,7 @@ module RubyReactor
87
121
  def increment_map_counter(map_id, reactor_class_name)
88
122
  key = map_counter_key(map_id, reactor_class_name)
89
123
  @redis.incr(key)
90
- @redis.expire(key, 86_400)
124
+ @redis.expire(key, durability_ttl)
91
125
  end
92
126
 
93
127
  def decrement_map_counter(map_id, reactor_class_name)
@@ -97,7 +131,7 @@ module RubyReactor
97
131
 
98
132
  def set_last_queued_index(map_id, index, reactor_class_name)
99
133
  key = map_last_queued_index_key(map_id, reactor_class_name)
100
- @redis.set(key, index, ex: 86_400)
134
+ @redis.set(key, index, ex: durability_ttl)
101
135
  end
102
136
 
103
137
  def increment_last_queued_index(map_id, reactor_class_name)
@@ -109,7 +143,7 @@ module RubyReactor
109
143
  key = correlation_id_key(correlation_id, reactor_class_name)
110
144
  # Store mapping correlation_id -> context_id
111
145
  # Try to set if not exists
112
- success = @redis.set(key, context_id, nx: true, ex: 86_400) # 24h TTL
146
+ success = @redis.set(key, context_id, nx: true, ex: durability_ttl)
113
147
 
114
148
  return if success
115
149
 
@@ -118,7 +152,7 @@ module RubyReactor
118
152
 
119
153
  if existing_context_id == context_id
120
154
  # Refresh TTL
121
- @redis.expire(key, 86_400)
155
+ @redis.expire(key, durability_ttl)
122
156
  return
123
157
  end
124
158
 
@@ -216,7 +250,7 @@ module RubyReactor
216
250
  def store_map_element_context_id(map_id, context_id, reactor_class_name)
217
251
  key = map_element_contexts_key(map_id, reactor_class_name)
218
252
  @redis.rpush(key, context_id)
219
- @redis.expire(key, 86_400)
253
+ @redis.expire(key, durability_ttl)
220
254
  end
221
255
 
222
256
  def retrieve_map_element_context_ids(map_id, reactor_class_name)
@@ -232,7 +266,7 @@ module RubyReactor
232
266
  def store_map_failed_context_id(map_id, context_id, reactor_class_name)
233
267
  key = map_failed_context_key(map_id, reactor_class_name)
234
268
  # Only store the first failure (nx: true)
235
- @redis.set(key, context_id, nx: true, ex: 86_400)
269
+ @redis.set(key, context_id, nx: true, ex: durability_ttl)
236
270
  end
237
271
 
238
272
  def retrieve_map_failed_context_id(map_id, reactor_class_name)
@@ -242,12 +276,12 @@ module RubyReactor
242
276
 
243
277
  def set_map_offset(map_id, offset, reactor_class_name)
244
278
  key = map_offset_key(map_id, reactor_class_name)
245
- @redis.set(key, offset, ex: 86_400)
279
+ @redis.set(key, offset, ex: durability_ttl)
246
280
  end
247
281
 
248
282
  def set_map_offset_if_not_exists(map_id, offset, reactor_class_name)
249
283
  key = map_offset_key(map_id, reactor_class_name)
250
- @redis.set(key, offset, nx: true, ex: 86_400)
284
+ @redis.set(key, offset, nx: true, ex: durability_ttl)
251
285
  end
252
286
 
253
287
  def retrieve_map_offset(map_id, reactor_class_name)
@@ -260,43 +294,32 @@ module RubyReactor
260
294
  @redis.incrby(key, increment)
261
295
  end
262
296
 
297
+ # rubocop:disable Lint/UnusedMethodArgument
263
298
  def retrieve_map_results_batch(map_id, reactor_class_name, offset:, limit:, strict_ordering: true)
299
+ # Always index-keyed now (Phase 5): HMGET the contiguous index window.
264
300
  key = map_results_key(map_id, reactor_class_name)
265
-
266
- if strict_ordering
267
- # For Hash based results (indexed), we can use HMGET if we know the keys.
268
- # Since we use 0-based index keys, we can generate the keys for the batch.
269
- fields = (offset...(offset + limit)).map(&:to_s)
270
- results = @redis.hmget(key, *fields)
271
-
272
- # HMGET returns nil for missing fields, compact them?
273
- # Or should we respect the holes?
274
- # Map results are usually dense.
275
- results.compact.map { |r| JSON.parse(r) }
276
- else
277
- # For List based results
278
- # LRANGE uses inclusive ending index
279
- end_index = offset + limit - 1
280
- results = @redis.lrange(key, offset, end_index)
281
- results.map { |r| JSON.parse(r) }
282
- end
301
+ fields = (offset...(offset + limit)).map(&:to_s)
302
+ results = @redis.hmget(key, *fields)
303
+ results.compact.map { |r| JSON.parse(r) }
283
304
  end
305
+ # rubocop:enable Lint/UnusedMethodArgument
284
306
 
285
307
  def count_map_results(map_id, reactor_class_name)
286
308
  key = map_results_key(map_id, reactor_class_name)
287
- type = @redis.type(key)
288
-
289
- if type == "hash"
290
- @redis.hlen(key)
291
- elsif type == "list"
292
- @redis.llen(key)
293
- else
294
- 0
295
- end
309
+ @redis.hlen(key)
296
310
  end
297
311
 
298
312
  private
299
313
 
314
+ # Single source of truth for the retention window of all durability-bearing
315
+ # state (context blob, map results/counters/metadata/offsets, correlation
316
+ # ids). Map state is load-bearing for resume exactly like the context, so it
317
+ # must share the context's configurable TTL — a shorter map TTL would expire
318
+ # map results mid-flight and break recovery. Re-stamped on every write.
319
+ def durability_ttl
320
+ RubyReactor.configuration.context_ttl
321
+ end
322
+
300
323
  def fetch_and_filter_reactors(keys)
301
324
  return [] if keys.empty?
302
325
 
@@ -210,6 +210,14 @@ module RubyReactor
210
210
  # rate-limit/period state without leaking Redis-specific calls into
211
211
  # test code.
212
212
 
213
+ # Liveness check for a logical lock by its BARE key (e.g. "async:<id>").
214
+ # Prepends the "lock:" prefix that Lock applies. True while a worker holds
215
+ # (and auto-extends) the lock; false once it expires — the sweeper's
216
+ # "worker died" signal.
217
+ def lock_held?(key)
218
+ @redis.exists?("lock:#{key}")
219
+ end
220
+
213
221
  # Returns { owner:, count: } for a held lock, or nil if free.
214
222
  # `prefixed_key` is the full key (e.g. "lock:order:42").
215
223
  def lock_info(prefixed_key)
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyReactor
4
+ # Re-enqueues non-terminal top-level reactor contexts whose worker died.
5
+ #
6
+ # The per-context liveness lock (`async:<id>`, Phase 1) is the signal: a live
7
+ # worker holds and auto-extends it, so its ABSENCE on a context still marked
8
+ # `running` means the worker crashed without finishing. The sweeper re-enqueues
9
+ # such contexts by id (identity-only payload, Phase 2).
10
+ #
11
+ # `run_once` is pure and idempotent — call it periodically; the cadence is the
12
+ # host's to wire (sidekiq-cron, sidekiq-scheduler, a self-rescheduling worker,
13
+ # or external cron). The interval bounds recovery latency. No scheduling
14
+ # dependency is added to the gem.
15
+ #
16
+ # Safety depends on Phase 1: if a context is mis-judged dead (GC pause, liveness
17
+ # race) and re-enqueued while its worker is actually alive, the duplicate hits
18
+ # the live lock -> ContextLockContention -> uncapped snooze -> no double run.
19
+ #
20
+ # Map fan-out (element/collector jobs) is NOT covered here — those contexts
21
+ # carry parent_context_id and scan_reactors filters them out (F6). The map
22
+ # sweeper (Phase 5) owns them.
23
+ class Sweeper
24
+ # Default upper bound on contexts inspected per sweep. scan_reactors caps its
25
+ # result at this count; a host with more in-flight reactors than this should
26
+ # raise it (or sweep more frequently).
27
+ DEFAULT_LIMIT = 1000
28
+
29
+ def self.run_once(limit: DEFAULT_LIMIT)
30
+ new.run_once(limit: limit)
31
+ end
32
+
33
+ def initialize(storage: nil, async_router: nil, logger: nil)
34
+ @storage = storage || RubyReactor.configuration.storage_adapter
35
+ @async_router = async_router || RubyReactor.configuration.async_router
36
+ @logger = logger || RubyReactor.configuration.logger
37
+ end
38
+
39
+ # Scans stored top-level reactors and re-enqueues the running-but-unlocked
40
+ # ones. Returns the number of contexts re-enqueued.
41
+ def run_once(limit: DEFAULT_LIMIT)
42
+ reenqueued = 0
43
+
44
+ @storage.scan_reactors(count: limit).each do |reactor|
45
+ next unless reactor[:status] == "running" # non-terminal only
46
+ next if @storage.lock_held?("async:#{reactor[:id]}") # worker alive -> leave alone
47
+
48
+ @async_router.perform_async(reactor[:id], reactor[:class])
49
+ reenqueued += 1
50
+ rescue StandardError => e
51
+ # One bad record must not abort the whole sweep.
52
+ @logger.warn("RubyReactor::Sweeper failed to re-enqueue #{reactor[:id]}: #{e.class}: #{e.message}")
53
+ end
54
+
55
+ reenqueued
56
+ end
57
+ end
58
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RubyReactor
4
- VERSION = "0.5.2"
4
+ VERSION = "0.5.3"
5
5
  end
data/lib/ruby_reactor.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "zeitwerk"
4
4
  require "pathname"
5
+ require "securerandom"
5
6
  require_relative "ruby_reactor/registry"
6
7
  require_relative "ruby_reactor/utils/code_extractor"
7
8
  require_relative "ruby_reactor/dsl/lockable" # Add this
@@ -330,6 +331,47 @@ module RubyReactor
330
331
  Configuration.instance
331
332
  end
332
333
 
334
+ # The name under which a reactor class's durable state is keyed in storage
335
+ # (`reactor:<name>:context:<id>`, map metadata, etc.). MUST be stable across
336
+ # processes: the enqueuing process writes the blob under this name and a
337
+ # *different* worker process reads it back by the same name. So an anonymous
338
+ # class falls back to a fixed constant, NOT `object_id` — object_id is
339
+ # process-local and would make the worker's read key miss the writer's key.
340
+ # The context_id in the key still disambiguates distinct anonymous reactors.
341
+ # (A truly anonymous class can't be reconstituted by name in another process,
342
+ # so cross-process resume of one is inherently unsupported; this only keeps
343
+ # the keys self-consistent within a process — e.g. inline tests.)
344
+ def self.reactor_storage_name(reactor_class)
345
+ return "AnonymousReactor" if reactor_class.nil?
346
+
347
+ reactor_class.name || "AnonymousReactor"
348
+ end
349
+
350
+ # Kick the self-rescheduling recovery sweeper chain. Call once per cluster —
351
+ # typically from an initializer (`RubyReactor.start_sweeper!`). Idempotent:
352
+ # calling it on every process boot is safe because the worker claims each tick
353
+ # by time-window, so duplicate kicks collapse to a single chain. No-op when
354
+ # `config.sweeper_enabled` is false. Returns the scheduled job id, or nil when
355
+ # disabled or when this window's tick was already claimed by another caller.
356
+ def self.start_sweeper!
357
+ return unless configuration.sweeper_enabled
358
+
359
+ SidekiqWorkers::SweeperWorker.schedule_next
360
+ end
361
+
362
+ # Run both recovery sweepers exactly once and return their counts. The
363
+ # synchronous escape hatch for hosts that schedule recovery with their own
364
+ # cron / k8s CronJob instead of the in-cluster chain (set
365
+ # `config.sweeper_enabled = false` and call this from `rake ruby_reactor:sweep`
366
+ # or a binstub).
367
+ def self.sweep_once(limit: nil)
368
+ limit ||= configuration.sweeper_limit
369
+ {
370
+ reactors: Sweeper.run_once(limit: limit),
371
+ maps: Map::Sweeper.run_once(limit: limit)
372
+ }
373
+ end
374
+
333
375
  def self.root
334
376
  Pathname.new(File.expand_path("..", __dir__))
335
377
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby_reactor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Artur
@@ -136,6 +136,7 @@ files:
136
136
  - lib/ruby_reactor/map/element_executor.rb
137
137
  - lib/ruby_reactor/map/helpers.rb
138
138
  - lib/ruby_reactor/map/result_enumerator.rb
139
+ - lib/ruby_reactor/map/sweeper.rb
139
140
  - lib/ruby_reactor/max_retries_exhausted_failure.rb
140
141
  - lib/ruby_reactor/middleware.rb
141
142
  - lib/ruby_reactor/middleware_runner.rb
@@ -159,6 +160,7 @@ files:
159
160
  - lib/ruby_reactor/sidekiq_adapter.rb
160
161
  - lib/ruby_reactor/sidekiq_workers/map_collector_worker.rb
161
162
  - lib/ruby_reactor/sidekiq_workers/map_element_worker.rb
163
+ - lib/ruby_reactor/sidekiq_workers/sweeper_worker.rb
162
164
  - lib/ruby_reactor/sidekiq_workers/worker.rb
163
165
  - lib/ruby_reactor/step.rb
164
166
  - lib/ruby_reactor/step/compose_step.rb
@@ -168,6 +170,7 @@ files:
168
170
  - lib/ruby_reactor/storage/redis_adapter.rb
169
171
  - lib/ruby_reactor/storage/redis_locking.rb
170
172
  - lib/ruby_reactor/storage/redis_ordered_locking.rb
173
+ - lib/ruby_reactor/sweeper.rb
171
174
  - lib/ruby_reactor/template/base.rb
172
175
  - lib/ruby_reactor/template/dynamic_source.rb
173
176
  - lib/ruby_reactor/template/element.rb