RubyGems - ruby_reactor - Versions diffs - 0.5.2 → 0.5.3 - Mend

ruby_reactor 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +4 -4
data/.release-please-manifest.json +1 -1
data/CHANGELOG.md +7 -0
data/README.md +147 -34
data/lib/ruby_reactor/configuration.rb +66 -2
data/lib/ruby_reactor/context_serializer.rb +9 -4
data/lib/ruby_reactor/executor/ordered_lock_support.rb +1 -1
data/lib/ruby_reactor/executor/retry_manager.rb +7 -2
data/lib/ruby_reactor/executor/step_executor.rb +25 -5
data/lib/ruby_reactor/executor.rb +85 -3
data/lib/ruby_reactor/lock.rb +13 -0
data/lib/ruby_reactor/map/collector.rb +41 -0
data/lib/ruby_reactor/map/dispatcher.rb +42 -0
data/lib/ruby_reactor/map/element_executor.rb +39 -0
data/lib/ruby_reactor/map/helpers.rb +10 -3
data/lib/ruby_reactor/map/sweeper.rb +110 -0
data/lib/ruby_reactor/reactor.rb +7 -5
data/lib/ruby_reactor/sidekiq_adapter.rb +9 -8
data/lib/ruby_reactor/sidekiq_workers/sweeper_worker.rb +73 -0
data/lib/ruby_reactor/sidekiq_workers/worker.rb +42 -34
data/lib/ruby_reactor/step/map_step.rb +18 -2
data/lib/ruby_reactor/storage/redis_adapter.rb +83 -60
data/lib/ruby_reactor/storage/redis_locking.rb +8 -0
data/lib/ruby_reactor/sweeper.rb +58 -0
data/lib/ruby_reactor/version.rb +1 -1
data/lib/ruby_reactor.rb +42 -0
metadata +4 -1

data/lib/ruby_reactor/storage/redis_adapter.rb CHANGED Viewed

@@ -16,8 +16,10 @@ module RubyReactor
       def store_context(context_id, serialized_context, reactor_class_name)
         key = context_key(context_id, reactor_class_name)
-        # Use standard SET for compatibility (ReJSON not strictly required for full docs)
-        @redis.set(key, serialized_context, ex: 86_400) # 24h TTL
+        # Use standard SET for compatibility (ReJSON not strictly required for full docs).
+        # TTL is re-stamped on every write so long-running / snoozed contexts
+        # never expire mid-flight (Phase 4).
+        @redis.set(key, serialized_context, ex: durability_ttl)
       end
       def retrieve_context(context_id, reactor_class_name)
@@ -28,52 +30,84 @@ module RubyReactor
         JSON.parse(json)
       end
+      # Durable map storage is ALWAYS index-keyed (HSET), regardless of
+      # strict_ordering. The index->slot mapping makes completion recoverable
+      # (missing = (0...count) - HKEYS) and re-dispatch idempotent (re-running
+      # index i overwrites slot i, never duplicates). strict_ordering is now only
+      # a read-order convenience, not a storage-layout switch (Phase 5).
+      # rubocop:disable Lint/UnusedMethodArgument
       def store_map_result(map_id, index, serialized_result, reactor_class_name, strict_ordering: true)
         key = map_results_key(map_id, reactor_class_name)
-        if strict_ordering
-          # Use Hash for strict ordering by index
-          # HSET key index serialized_result
-          @redis.hset(key, index.to_s, serialized_result.to_json)
-        else
-          # Loose ordering: just push to list
-          @redis.rpush(key, serialized_result.to_json)
-        end
-        @redis.expire(key, 86_400)
+        @redis.hset(key, index.to_s, serialized_result.to_json)
+        @redis.expire(key, durability_ttl)
       end
       def retrieve_map_results(map_id, reactor_class_name, strict_ordering: true)
+        # rubocop:enable Lint/UnusedMethodArgument
         key = map_results_key(map_id, reactor_class_name)
+        results = @redis.hgetall(key)
+        # Index-keyed for both modes; sort by index so reads are deterministic.
+        results.keys.sort_by(&:to_i).map { |k| JSON.parse(results[k]) }
+      end
-        if strict_ordering
-          results = @redis.hgetall(key)
-          # Sort by index (key)
-          results.keys.sort_by(&:to_i).map { |k| JSON.parse(results[k]) }
-        else
-          results = @redis.lrange(key, 0, -1)
-          results.map { |r| JSON.parse(r) }
-        end
+      # Indices that have NO stored result yet: the authoritative, idempotent
+      # signal for what the map sweeper must (re)dispatch.
+      def missing_map_indices(map_id, count, reactor_class_name)
+        key = map_results_key(map_id, reactor_class_name)
+        present = @redis.hkeys(key).map(&:to_i)
+        (0...count).to_a - present
       end
       def set_map_counter(map_id, count, reactor_class_name)
         key = map_counter_key(map_id, reactor_class_name)
-        @redis.set(key, count, ex: 86_400)
+        @redis.set(key, count, ex: durability_ttl)
       end
-      def initialize_map_operation(map_id, count, parent_reactor_class_name, reactor_class_info:, strict_ordering: true)
+      # rubocop:disable Metrics/ParameterLists
+      def initialize_map_operation(map_id, count, parent_reactor_class_name, reactor_class_info:, strict_ordering: true,
+                                   parent_context_id: nil, step_name: nil, parent_is_map_element: false,
+                                   outer_map_id: nil, outer_index: nil)
         # Ensure counter is set
         set_map_counter(map_id, count, parent_reactor_class_name)
-        # Store metadata
+        # Store metadata. parent_context_id/step_name let the map sweeper recover
+        # without re-deriving the map_id (which is brittle to split on ':'). The
+        # nested-map fields (parent_is_map_element + outer_map_id/outer_index)
+        # record which liveness lock the parent actually holds (N1): a nested
+        # map's parent is itself a map element running under a `map_element:` lock,
+        # not an `async:` lock.
         key = "reactor:#{parent_reactor_class_name}:map:#{map_id}:metadata"
         metadata = {
+          map_id: map_id,
           count: count,
           strict_ordering: strict_ordering,
           reactor_class_info: reactor_class_info,
+          parent_context_id: parent_context_id,
+          parent_reactor_class_name: parent_reactor_class_name,
+          step_name: step_name,
+          parent_is_map_element: parent_is_map_element,
+          outer_map_id: outer_map_id,
+          outer_index: outer_index,
           created_at: Time.now.to_i
         }
-        @redis.set(key, metadata.to_json, ex: 86_400)
+        @redis.set(key, metadata.to_json, ex: durability_ttl)
+      end
+      # rubocop:enable Metrics/ParameterLists
+      # Enumerate active map operations for the map sweeper (Phase 5d). Returns
+      # the parsed metadata hash for each (includes map_id, count,
+      # parent_context_id, step_name, parent_reactor_class_name, and the nested-map
+      # lock fields). Bounded by `count` to keep a sweep cheap.
+      def scan_maps(count: 1000)
+        results = []
+        @redis.scan_each(match: "reactor:*:map:*:metadata", count: 100) do |key|
+          json = @redis.get(key)
+          next unless json
+          results << JSON.parse(json)
+          return results if results.size >= count
+        end
+        results
       end
       def retrieve_map_metadata(map_id, reactor_class_name)
@@ -87,7 +121,7 @@ module RubyReactor
       def increment_map_counter(map_id, reactor_class_name)
         key = map_counter_key(map_id, reactor_class_name)
         @redis.incr(key)
-        @redis.expire(key, 86_400)
+        @redis.expire(key, durability_ttl)
       end
       def decrement_map_counter(map_id, reactor_class_name)
@@ -97,7 +131,7 @@ module RubyReactor
       def set_last_queued_index(map_id, index, reactor_class_name)
         key = map_last_queued_index_key(map_id, reactor_class_name)
-        @redis.set(key, index, ex: 86_400)
+        @redis.set(key, index, ex: durability_ttl)
       end
       def increment_last_queued_index(map_id, reactor_class_name)
@@ -109,7 +143,7 @@ module RubyReactor
         key = correlation_id_key(correlation_id, reactor_class_name)
         # Store mapping correlation_id -> context_id
         # Try to set if not exists
-        success = @redis.set(key, context_id, nx: true, ex: 86_400) # 24h TTL
+        success = @redis.set(key, context_id, nx: true, ex: durability_ttl)
         return if success
@@ -118,7 +152,7 @@ module RubyReactor
         if existing_context_id == context_id
           # Refresh TTL
-          @redis.expire(key, 86_400)
+          @redis.expire(key, durability_ttl)
           return
         end
@@ -216,7 +250,7 @@ module RubyReactor
       def store_map_element_context_id(map_id, context_id, reactor_class_name)
         key = map_element_contexts_key(map_id, reactor_class_name)
         @redis.rpush(key, context_id)
-        @redis.expire(key, 86_400)
+        @redis.expire(key, durability_ttl)
       end
       def retrieve_map_element_context_ids(map_id, reactor_class_name)
@@ -232,7 +266,7 @@ module RubyReactor
       def store_map_failed_context_id(map_id, context_id, reactor_class_name)
         key = map_failed_context_key(map_id, reactor_class_name)
         # Only store the first failure (nx: true)
-        @redis.set(key, context_id, nx: true, ex: 86_400)
+        @redis.set(key, context_id, nx: true, ex: durability_ttl)
       end
       def retrieve_map_failed_context_id(map_id, reactor_class_name)
@@ -242,12 +276,12 @@ module RubyReactor
       def set_map_offset(map_id, offset, reactor_class_name)
         key = map_offset_key(map_id, reactor_class_name)
-        @redis.set(key, offset, ex: 86_400)
+        @redis.set(key, offset, ex: durability_ttl)
       end
       def set_map_offset_if_not_exists(map_id, offset, reactor_class_name)
         key = map_offset_key(map_id, reactor_class_name)
-        @redis.set(key, offset, nx: true, ex: 86_400)
+        @redis.set(key, offset, nx: true, ex: durability_ttl)
       end
       def retrieve_map_offset(map_id, reactor_class_name)
@@ -260,43 +294,32 @@ module RubyReactor
         @redis.incrby(key, increment)
       end
+      # rubocop:disable Lint/UnusedMethodArgument
       def retrieve_map_results_batch(map_id, reactor_class_name, offset:, limit:, strict_ordering: true)
+        # Always index-keyed now (Phase 5): HMGET the contiguous index window.
         key = map_results_key(map_id, reactor_class_name)
-        if strict_ordering
-          # For Hash based results (indexed), we can use HMGET if we know the keys.
-          # Since we use 0-based index keys, we can generate the keys for the batch.
-          fields = (offset...(offset + limit)).map(&:to_s)
-          results = @redis.hmget(key, *fields)
-          # HMGET returns nil for missing fields, compact them?
-          # Or should we respect the holes?
-          # Map results are usually dense.
-          results.compact.map { |r| JSON.parse(r) }
-        else
-          # For List based results
-          # LRANGE uses inclusive ending index
-          end_index = offset + limit - 1
-          results = @redis.lrange(key, offset, end_index)
-          results.map { |r| JSON.parse(r) }
-        end
+        fields = (offset...(offset + limit)).map(&:to_s)
+        results = @redis.hmget(key, *fields)
+        results.compact.map { |r| JSON.parse(r) }
       end
+      # rubocop:enable Lint/UnusedMethodArgument
       def count_map_results(map_id, reactor_class_name)
         key = map_results_key(map_id, reactor_class_name)
-        type = @redis.type(key)
-        if type == "hash"
-          @redis.hlen(key)
-        elsif type == "list"
-          @redis.llen(key)
-        else
-          0
-        end
+        @redis.hlen(key)
       end
       private
+      # Single source of truth for the retention window of all durability-bearing
+      # state (context blob, map results/counters/metadata/offsets, correlation
+      # ids). Map state is load-bearing for resume exactly like the context, so it
+      # must share the context's configurable TTL — a shorter map TTL would expire
+      # map results mid-flight and break recovery. Re-stamped on every write.
+      def durability_ttl
+        RubyReactor.configuration.context_ttl
+      end
       def fetch_and_filter_reactors(keys)
         return [] if keys.empty?

data/lib/ruby_reactor/storage/redis_locking.rb CHANGED Viewed

@@ -210,6 +210,14 @@ module RubyReactor
       # rate-limit/period state without leaking Redis-specific calls into
       # test code.
+      # Liveness check for a logical lock by its BARE key (e.g. "async:<id>").
+      # Prepends the "lock:" prefix that Lock applies. True while a worker holds
+      # (and auto-extends) the lock; false once it expires — the sweeper's
+      # "worker died" signal.
+      def lock_held?(key)
+        @redis.exists?("lock:#{key}")
+      end
       # Returns { owner:, count: } for a held lock, or nil if free.
       # `prefixed_key` is the full key (e.g. "lock:order:42").
       def lock_info(prefixed_key)

data/lib/ruby_reactor/sweeper.rb ADDED Viewed

@@ -0,0 +1,58 @@
+# frozen_string_literal: true
+module RubyReactor
+  # Re-enqueues non-terminal top-level reactor contexts whose worker died.
+  #
+  # The per-context liveness lock (`async:<id>`, Phase 1) is the signal: a live
+  # worker holds and auto-extends it, so its ABSENCE on a context still marked
+  # `running` means the worker crashed without finishing. The sweeper re-enqueues
+  # such contexts by id (identity-only payload, Phase 2).
+  #
+  # `run_once` is pure and idempotent — call it periodically; the cadence is the
+  # host's to wire (sidekiq-cron, sidekiq-scheduler, a self-rescheduling worker,
+  # or external cron). The interval bounds recovery latency. No scheduling
+  # dependency is added to the gem.
+  #
+  # Safety depends on Phase 1: if a context is mis-judged dead (GC pause, liveness
+  # race) and re-enqueued while its worker is actually alive, the duplicate hits
+  # the live lock -> ContextLockContention -> uncapped snooze -> no double run.
+  #
+  # Map fan-out (element/collector jobs) is NOT covered here — those contexts
+  # carry parent_context_id and scan_reactors filters them out (F6). The map
+  # sweeper (Phase 5) owns them.
+  class Sweeper
+    # Default upper bound on contexts inspected per sweep. scan_reactors caps its
+    # result at this count; a host with more in-flight reactors than this should
+    # raise it (or sweep more frequently).
+    DEFAULT_LIMIT = 1000
+    def self.run_once(limit: DEFAULT_LIMIT)
+      new.run_once(limit: limit)
+    end
+    def initialize(storage: nil, async_router: nil, logger: nil)
+      @storage = storage || RubyReactor.configuration.storage_adapter
+      @async_router = async_router || RubyReactor.configuration.async_router
+      @logger = logger || RubyReactor.configuration.logger
+    end
+    # Scans stored top-level reactors and re-enqueues the running-but-unlocked
+    # ones. Returns the number of contexts re-enqueued.
+    def run_once(limit: DEFAULT_LIMIT)
+      reenqueued = 0
+      @storage.scan_reactors(count: limit).each do |reactor|
+        next unless reactor[:status] == "running" # non-terminal only
+        next if @storage.lock_held?("async:#{reactor[:id]}") # worker alive -> leave alone
+        @async_router.perform_async(reactor[:id], reactor[:class])
+        reenqueued += 1
+      rescue StandardError => e
+        # One bad record must not abort the whole sweep.
+        @logger.warn("RubyReactor::Sweeper failed to re-enqueue #{reactor[:id]}: #{e.class}: #{e.message}")
+      end
+      reenqueued
+    end
+  end
+end

data/lib/ruby_reactor/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module RubyReactor
-  VERSION = "0.5.2"
+  VERSION = "0.5.3"
 end

data/lib/ruby_reactor.rb CHANGED Viewed

@@ -2,6 +2,7 @@
 require "zeitwerk"
 require "pathname"
+require "securerandom"
 require_relative "ruby_reactor/registry"
 require_relative "ruby_reactor/utils/code_extractor"
 require_relative "ruby_reactor/dsl/lockable" # Add this
@@ -330,6 +331,47 @@ module RubyReactor
     Configuration.instance
   end
+  # The name under which a reactor class's durable state is keyed in storage
+  # (`reactor:<name>:context:<id>`, map metadata, etc.). MUST be stable across
+  # processes: the enqueuing process writes the blob under this name and a
+  # *different* worker process reads it back by the same name. So an anonymous
+  # class falls back to a fixed constant, NOT `object_id` — object_id is
+  # process-local and would make the worker's read key miss the writer's key.
+  # The context_id in the key still disambiguates distinct anonymous reactors.
+  # (A truly anonymous class can't be reconstituted by name in another process,
+  # so cross-process resume of one is inherently unsupported; this only keeps
+  # the keys self-consistent within a process — e.g. inline tests.)
+  def self.reactor_storage_name(reactor_class)
+    return "AnonymousReactor" if reactor_class.nil?
+    reactor_class.name || "AnonymousReactor"
+  end
+  # Kick the self-rescheduling recovery sweeper chain. Call once per cluster —
+  # typically from an initializer (`RubyReactor.start_sweeper!`). Idempotent:
+  # calling it on every process boot is safe because the worker claims each tick
+  # by time-window, so duplicate kicks collapse to a single chain. No-op when
+  # `config.sweeper_enabled` is false. Returns the scheduled job id, or nil when
+  # disabled or when this window's tick was already claimed by another caller.
+  def self.start_sweeper!
+    return unless configuration.sweeper_enabled
+    SidekiqWorkers::SweeperWorker.schedule_next
+  end
+  # Run both recovery sweepers exactly once and return their counts. The
+  # synchronous escape hatch for hosts that schedule recovery with their own
+  # cron / k8s CronJob instead of the in-cluster chain (set
+  # `config.sweeper_enabled = false` and call this from `rake ruby_reactor:sweep`
+  # or a binstub).
+  def self.sweep_once(limit: nil)
+    limit ||= configuration.sweeper_limit
+    {
+      reactors: Sweeper.run_once(limit: limit),
+      maps: Map::Sweeper.run_once(limit: limit)
+    }
+  end
   def self.root
     Pathname.new(File.expand_path("..", __dir__))
   end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: ruby_reactor
 version: !ruby/object:Gem::Version
-  version: 0.5.2
+  version: 0.5.3
 platform: ruby
 authors:
 - Artur
@@ -136,6 +136,7 @@ files:
 - lib/ruby_reactor/map/element_executor.rb
 - lib/ruby_reactor/map/helpers.rb
 - lib/ruby_reactor/map/result_enumerator.rb
+- lib/ruby_reactor/map/sweeper.rb
 - lib/ruby_reactor/max_retries_exhausted_failure.rb
 - lib/ruby_reactor/middleware.rb
 - lib/ruby_reactor/middleware_runner.rb
@@ -159,6 +160,7 @@ files:
 - lib/ruby_reactor/sidekiq_adapter.rb
 - lib/ruby_reactor/sidekiq_workers/map_collector_worker.rb
 - lib/ruby_reactor/sidekiq_workers/map_element_worker.rb
+- lib/ruby_reactor/sidekiq_workers/sweeper_worker.rb
 - lib/ruby_reactor/sidekiq_workers/worker.rb
 - lib/ruby_reactor/step.rb
 - lib/ruby_reactor/step/compose_step.rb
@@ -168,6 +170,7 @@ files:
 - lib/ruby_reactor/storage/redis_adapter.rb
 - lib/ruby_reactor/storage/redis_locking.rb
 - lib/ruby_reactor/storage/redis_ordered_locking.rb
+- lib/ruby_reactor/sweeper.rb
 - lib/ruby_reactor/template/base.rb
 - lib/ruby_reactor/template/dynamic_source.rb
 - lib/ruby_reactor/template/element.rb