RubyGems - chrono_forge - Versions diffs - 0.9.1 → 0.10.0 - Mend

chrono_forge 0.9.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

data/lib/chrono_forge/executor/methods/branch.rb ADDED Viewed

@@ -0,0 +1,185 @@
+module ChronoForge
+  module Executor
+    module Methods
+      module Branch
+        # Opens a named branch — a durable fan-out step. Spawns inside the block
+        # eagerly create + enqueue child workflows; the branch SEALS when the
+        # block closes. Returns without waiting (branches are concurrent; the
+        # join is a separate merge_branches / automerge).
+        def branch(name, automerge: false)
+          raise ArgumentError, "branch requires a block" unless block_given?
+          raise ArgumentError, "branch blocks cannot be nested" if @current_branch
+          validate_step_name_segment!(name)
+          step_name = "branch$#{name}"
+          log = find_or_create_execution_log!(step_name) { |l| l.started_at = Time.current }
+          # The sealed branch log may be a readonly, id-less cache stand-in; fetch
+          # the real id so the registry/merge can scope children to it.
+          log_id = log.id || ExecutionLog.where(workflow: @workflow, step_name: step_name).pick(:id)
+          (@open_branches ||= {})[name.to_s] = {automerge: automerge, log_id: log_id}
+          # ---- THE single most important correctness/performance property ----
+          # A SEALED branch skips its block ENTIRELY. The expensive source
+          # enumeration in spawn_each never re-runs after sealing. Do not move
+          # dispatch out from behind this guard.
+          unless log.completed?
+            @current_branch = {name: name.to_s, log: log}
+            begin
+              yield
+            ensure
+              @current_branch = nil
+            end
+            log.update!(state: :completed, completed_at: Time.current)
+          end
+          # automerge joins the branch inline, the moment its block closes (eager
+          # dispatch + immediate await). Deferred/concurrent joins use an explicit
+          # merge_branches instead. Runs on every pass so replay re-checks via the
+          # merge$<name> log's own idempotency; the inline merge removes the branch
+          # from @open_branches on completion, so the completion gate won't see it.
+          merge_branches(name) if automerge
+          name
+        end
+        # Dispatch a single child into the current branch.
+        def spawn(name, workflow_class, **kwargs)
+          cb = current_branch!
+          validate_step_name_segment!(name)
+          child_key = "#{@workflow.key}$#{cb[:name]}$#{name}"
+          dispatch_children(cb, [[child_key, workflow_class, kwargs]])
+          name
+        end
+        # Dispatch one child per item of `source`, streamed. AR relations use
+        # keyset iteration (find_in_batches start:) for constant memory and are
+        # keyed by record id; any other enumerable uses an offset cursor and is
+        # keyed `name_{index}` by position. Either way the source must re-enumerate
+        # identically across replays. For AR sources that additionally means STABLE
+        # MEMBERSHIP: dispatch resumes from the last primary key on crash-recovery,
+        # so a row entering the relation below the cursor after it passed (e.g. a
+        # mutating `where(state:)` scope) never gets a child — point spawn_each at a
+        # set fixed for the branch's lifetime. The block returns [WorkflowClass,
+        # kwargs] (or a bare class).
+        def spawn_each(name, source, of: 1000)
+          cb = current_branch!
+          validate_step_name_segment!(name)
+          cursor = cb[:log].metadata&.dig("cursors", name.to_s) || {}
+          n = cursor["n"] || 0
+          if source.is_a?(ActiveRecord::Relation)
+            # spawn_each iterates by primary key (find_in_batches) so the stream
+            # re-enumerates identically across replays. An explicit .order would
+            # make iteration non-deterministic, so reject it up front with a clear
+            # error rather than letting find_in_batches raise deep in the loop.
+            if source.order_values.present?
+              raise NotExecutableError,
+                "spawn_each iterates #{source.model_name} by primary key; remove the " \
+                "explicit .order(...) (or default-scope order) from the source relation"
+            end
+            source.find_in_batches(batch_size: of, start: cursor["pk"]) do |records|
+              entries = records.map do |record|
+                klass, kw = normalize_spawn(yield(record))
+                # Stable per-record key: an inclusive find_in_batches re-yield of the
+                # boundary record on crash-resume produces the SAME key, so insert_all
+                # dedups it (idempotent). Sequential indexing would duplicate it.
+                ck = "#{@workflow.key}$#{cb[:name]}$#{name}_#{record.id}"
+                [ck, klass, kw]
+              end
+              dispatch_children(cb, entries)
+              advance_cursor!(cb, name, pk: records.last.id)
+            end
+          else
+            source.drop(n).each_slice(of) do |slice|
+              entries = slice.map do |item|
+                klass, kw = normalize_spawn(yield(item))
+                ck = "#{@workflow.key}$#{cb[:name]}$#{name}_#{n}"
+                n += 1
+                [ck, klass, kw]
+              end
+              dispatch_children(cb, entries)
+              advance_cursor!(cb, name, n: n)
+            end
+          end
+          name
+        end
+        private
+        def current_branch!
+          @current_branch || raise(NotInBranchError, "spawn/spawn_each may only be called inside a branch block")
+        end
+        # Bulk-create child workflow rows then bulk-enqueue their jobs.
+        # perform_all_later bypasses the class-level perform_later guard, so we
+        # validate the args ourselves before enqueuing.
+        def dispatch_children(cb, entries)
+          return if entries.empty?
+          now = Time.current
+          rows = entries.map do |child_key, klass, kwargs|
+            validate_child_enqueue!(child_key, kwargs)
+            {
+              key: child_key, job_class: klass.to_s,
+              kwargs: kwargs, options: {}, context: {},
+              state: Workflow.states[:idle],
+              parent_execution_log_id: cb[:log].id,
+              created_at: now, updated_at: now
+            }
+          end
+          # On-conflict-ignore makes re-dispatch (crash recovery) idempotent.
+          Workflow.insert_all(rows, unique_by: [:job_class, :key])
+          # Enqueue only children still :idle. On a crash-resume the boundary chunk
+          # is re-dispatched; its rows already exist (insert_all ignored them) and
+          # may already have run — re-enqueuing a completed/running child would only
+          # raise NotExecutableError and dead-letter. Freshly inserted rows are
+          # :idle (we enqueue after inserting, so no worker can have touched them),
+          # so first-time dispatch enqueues the whole batch.
+          keys = entries.map { |child_key, _klass, _kwargs| child_key }
+          idle = Workflow.where(key: keys, state: Workflow.states[:idle]).pluck(:key).to_set
+          jobs = entries.filter_map do |child_key, klass, kwargs|
+            klass.new(child_key, **kwargs) if idle.include?(child_key)
+          end
+          ActiveJob.perform_all_later(jobs) if jobs.any?
+        end
+        # Mirrors the class-level __validate_enqueue! (executor.rb) because
+        # perform_all_later bypasses that guard — the two must stay in sync.
+        def validate_child_enqueue!(child_key, kwargs)
+          unless child_key.is_a?(String)
+            raise ArgumentError, "child key must be a String (got #{child_key.inspect})"
+          end
+          reserved = kwargs.keys.map(&:to_sym) & RESERVED_KWARGS
+          if reserved.any?
+            raise ArgumentError, "#{reserved.join(", ")} are reserved ChronoForge keywords"
+          end
+        end
+        # Advance (and persist) a spawn_each cursor on the branch log.
+        # `n` is the running item index; `pk` is the AR keyset position (nil for
+        # plain enumerables). (Used by spawn_each in a later task.)
+        def advance_cursor!(cb, spawn_name, n: nil, pk: nil)
+          meta = cb[:log].metadata || {}
+          cursors = meta["cursors"] || {}
+          entry = cursors[spawn_name.to_s] || {}
+          entry["n"] = n unless n.nil?
+          entry["pk"] = pk unless pk.nil?
+          cursors[spawn_name.to_s] = entry
+          meta["cursors"] = cursors
+          cb[:log].update!(metadata: meta)
+        end
+        # Normalize a spawn_each block return: [Klass, kwargs] or a bare Klass.
+        def normalize_spawn(result)
+          klass, kwargs = Array(result)
+          unless klass.is_a?(Module)
+            raise ArgumentError,
+              "spawn_each block must return a workflow class or [class, kwargs] (got #{result.inspect})"
+          end
+          [klass, kwargs || {}]
+        end
+      end
+    end
+  end
+end

data/lib/chrono_forge/executor/methods/durably_execute.rb CHANGED Viewed

@@ -9,19 +9,22 @@ module ChronoForge
         # execution log, ensuring idempotent behavior during workflow replays.
         #
         # @param method [Symbol] The name of the instance method to execute
-        # @param max_attempts [Integer] Maximum retry attempts before failing (default: 3)
+        # @param retry_policy [RetryPolicy, nil] Per-call retry policy. When nil,
+        #   uses the class-level `retry_policy` default, then the step built-in
+        #   (RetryPolicy.step_default: 3 attempts, exponential backoff capped at 30s).
         # @param name [String, nil] Custom name for the execution step. Defaults to method name.
         #   Used to create unique step names for execution logs.
         #
         # @return [nil]
         #
-        # @raise [ExecutionFailedError] When the method fails after max_attempts
+        # @raise [ExecutionFailedError] When the method fails after the policy's max_attempts
         #
         # @example Basic usage
         #   durably_execute :send_welcome_email
         #
-        # @example With custom retry attempts
-        #   durably_execute :critical_payment_processing, max_attempts: 5
+        # @example With a custom retry policy
+        #   durably_execute :critical_payment_processing,
+        #     retry_policy: RetryPolicy.new(max_attempts: 5)
         #
         # @example With custom name for tracking
         #   durably_execute :complex_calculation, name: "phase_1_calculation"
@@ -33,7 +36,7 @@ module ChronoForge
         #     Rails.logger.info "Successfully uploaded file to S3"
         #   end
         #
-        #   durably_execute :upload_to_s3, max_attempts: 5
+        #   durably_execute :upload_to_s3, retry_policy: RetryPolicy.new(max_attempts: 5)
         #
         # == Behavior
         #
@@ -43,9 +46,9 @@ module ChronoForge
         # already completed, it will be skipped.
         #
         # === Retry Logic
-        # - Failed executions are automatically retried with exponential backoff
-        # - Backoff calculation: 2^attempt seconds (capped at 2^5 = 32 seconds)
-        # - After max_attempts, ExecutionFailedError is raised
+        # - Failed executions are retried per the resolved RetryPolicy
+        # - Backoff and attempt cap come from that policy (see RetryPolicy)
+        # - After the policy's max_attempts, ExecutionFailedError is raised
         #
         # === Error Handling
         # - All exceptions except HaltExecutionFlow are caught and handled
@@ -59,7 +62,8 @@ module ChronoForge
         # - Stores error details when failures occur
         # - Enables monitoring and debugging of execution history
         #
-        def durably_execute(method, max_attempts: 3, name: nil)
+        def durably_execute(method, retry_policy: nil, name: nil)
+          policy = step_retry_policy(retry_policy)
           validate_step_name_segment!(name || method)
           step_name = "durably_execute$#{name || method}"
           # Find or create execution log
@@ -97,16 +101,14 @@ module ChronoForge
             self.class::ExecutionTracker.track_error(workflow, e, execution_log: execution_log)
             # Optional retry logic
-            if execution_log.attempts < max_attempts
-              # Reschedule with exponential backoff
-              backoff = (2**[execution_log.attempts, 5].min).seconds
-              self.class
-                .set(wait: backoff)
-                .perform_later(
-                  @workflow.key,
-                  retry_method: method
-                )
+            backoff = policy.retry_backoff(e, attempts: execution_log.attempts) do |policy_key|
+              bump_retry_count!(execution_log, policy_key)
+            end
+            if backoff
+              # Reschedule with the policy's backoff (published after lock release).
+              # The workflow replays on resume and skips completed steps, so the
+              # rescheduled run picks this step up again by its execution log.
+              enqueue_continuation(wait: backoff)
               # Halt current execution
               halt_execution!

data/lib/chrono_forge/executor/methods/durably_repeat.rb CHANGED Viewed

@@ -14,10 +14,12 @@ module ChronoForge
         # @param till [Symbol, Proc] The condition to check for stopping repetition. Should return
         #   true when repetition should stop. Can be a symbol for instance methods or a callable.
         # @param start_at [Time, nil] When to start the periodic task. Defaults to coordination_log.created_at + every
-        # @param max_attempts [Integer] Maximum retry attempts per individual execution (default: 3)
+        # @param retry_policy [RetryPolicy, nil] Per-call retry policy for an individual
+        #   execution. When nil, uses the class-level `retry_policy` default, then the
+        #   step built-in (RetryPolicy.step_default: 3 attempts, backoff capped at 30s).
         # @param timeout [ActiveSupport::Duration] How long after scheduled time an execution is
         #   considered stale and skipped (default: 1.hour). This enables catch-up behavior.
-        # @param on_error [Symbol] How to handle repetition failures after max_attempts. Options:
+        # @param on_error [Symbol] How to handle repetition failures after the policy's max_attempts. Options:
         #   - :continue (default): Log failure and continue with next scheduled execution
         #   - :fail_workflow: Raise ExecutionFailedError to fail the entire workflow
         # @param name [String, nil] Custom name for the periodic task. Defaults to method name.
@@ -60,7 +62,7 @@ module ChronoForge
         #     every: 1.day,
         #     till: :reports_complete?,
         #     start_at: Date.tomorrow.beginning_of_day,
-        #     max_attempts: 5,
+        #     retry_policy: RetryPolicy.new(max_attempts: 5),
         #     timeout: 2.hours,
         #     on_error: :fail_workflow,
         #     name: "daily_reports"
@@ -89,7 +91,7 @@ module ChronoForge
         # - Eventually reaches current/future execution times
         #
         # === Error Handling
-        # - Individual execution failures are retried up to `max_attempts` with exponential backoff
+        # - Individual execution failures are retried per the resolved RetryPolicy
         # - After max attempts, behavior depends on `on_error` parameter:
         #   - `:continue`: Failed execution is logged, next execution is scheduled
         #   - `:fail_workflow`: ExecutionFailedError is raised, failing the entire workflow
@@ -100,7 +102,8 @@ module ChronoForge
         # - Coordination log: `durably_repeat$#{name}` - tracks overall periodic task state
         # - Repetition logs: `durably_repeat$#{name}$#{timestamp}` - tracks individual executions
         #
-        def durably_repeat(method, every:, till:, start_at: nil, max_attempts: 3, timeout: 1.hour, on_error: :continue, name: nil)
+        def durably_repeat(method, every:, till:, start_at: nil, retry_policy: nil, timeout: 1.hour, on_error: :continue, name: nil)
+          policy = step_retry_policy(retry_policy)
           validate_step_name_segment!(name || method)
           step_name = "durably_repeat$#{name || method}"
@@ -145,13 +148,108 @@ module ChronoForge
             coordination_log.created_at + every
           end
-          execute_or_schedule_repetition(method, coordination_log, next_execution_at, every, max_attempts, timeout, on_error)
+          next_execution_at = fast_forward_expired_prefix(coordination_log, next_execution_at, every, timeout)
+          execute_or_schedule_repetition(method, coordination_log, next_execution_at, every, policy, timeout, on_error)
           nil
         end
         private
-        def execute_or_schedule_repetition(method, coordination_log, next_execution_at, every, max_attempts, timeout, on_error)
+        # Catch-up fast-forward. A tick `t` is expired (its work is skipped) iff
+        # `Time.current > t + timeout`, i.e. `t < now - timeout`. Rather than
+        # walking one zero-delay job per expired tick, jump straight to the first
+        # non-expired tick on the same grid (see #advance_to_first_valid_tick).
+        #
+        # Anchoring the arithmetic on `next_execution_at` (already on the canonical
+        # grid: start_at / created_at+every / last_execution_at+every all land on
+        # it, because last_execution_at stores the *scheduled* time, not wall-clock)
+        # keeps the result exactly on the grid — no drift, for fixed AND calendar
+        # intervals.
+        #
+        # Returns `next_execution_at` unchanged when nothing is expired. Otherwise
+        # advances the coordination log's last_execution_at so a replay recomputes
+        # the same first tick, and writes ONE summary ExecutionLog for the whole
+        # skipped prefix (no per-tick timeout rows).
+        def fast_forward_expired_prefix(coordination_log, next_execution_at, every, timeout)
+          cutoff = Time.current - timeout
+          return next_execution_at if next_execution_at >= cutoff
+          first_valid, n = advance_to_first_valid_tick(next_execution_at, every, cutoff)
+          last_skipped = first_valid - every
+          Rails.logger.info {
+            "ChronoForge:#{self.class}(#{@workflow.key}) durably_repeat fast-forwarded " \
+            "#{n} expired tick(s) to #{first_valid.iso8601}"
+          }
+          # Single summary row for the skipped prefix, on the last skipped grid
+          # tick. This never collides with the first_valid repetition row, but it
+          # CAN reuse a prior cycle's pending repetition log at the same tick
+          # (e.g. a tick that was scheduled-for-later then later fast-forwarded
+          # over). Write the metadata in the update! so the fast_forward summary
+          # fields are present whether the row is newly created or reused.
+          summary_step = "#{coordination_log.step_name}$#{last_skipped.to_i}"
+          summary_log = find_or_create_execution_log!(summary_step) do |log|
+            log.started_at = Time.current
+          end
+          summary_log.update!(
+            state: :failed,
+            error_class: "TimeoutError",
+            error_message: "Fast-forwarded #{n} expired tick(s)",
+            completed_at: Time.current,
+            metadata: (summary_log.metadata || {}).merge(
+              "fast_forwarded" => n,
+              "from" => next_execution_at.iso8601,
+              "to" => last_skipped.iso8601,
+              "scheduled_for" => last_skipped.iso8601,
+              "timeout_at" => (last_skipped + timeout).iso8601,
+              "parent_id" => coordination_log.id
+            )
+          )
+          # Record progress: a replay recomputes naive_next = last + every = first_valid.
+          # Use .iso8601 (second precision) to match the existing last_execution_at
+          # format so resumed pre-existing workflows keep the same on-disk grid.
+          coordination_log.update!(
+            metadata: coordination_log.metadata.merge("last_execution_at" => last_skipped.iso8601)
+          )
+          first_valid
+        end
+        # Walk the canonical grid from `from` to the first tick at/after `cutoff`,
+        # returning [first_valid_tick, ticks_skipped].
+        #
+        # The split is at one day, which is exactly where ActiveSupport switches
+        # arithmetic:
+        #
+        # - Sub-day intervals (hours/minutes/seconds) are absolute (seconds-based):
+        #   `from + n*every` is mathematically exact, no DST or clamping. These are
+        #   also the only intervals whose missed-tick count can explode (1.second
+        #   dormant a year ≈ 31M ticks), so we MUST jump in closed form.
+        #
+        # - Day-and-larger intervals go through calendar arithmetic (a "day" across
+        #   DST is 23h/25h; months clamp at end-of-month), so `from + n*every` can
+        #   drift off the grid (Jan 31 + 3.months = Apr 30, but stepping +1.month
+        #   three times lands on Apr 28). Their count over any realistic dormancy is
+        #   small (daily over a decade ≈ 3650), so we step the grid exactly.
+        def advance_to_first_valid_tick(from, every, cutoff)
+          if every < 1.day
+            n = ((cutoff - from) / every.to_f).ceil
+            [from + (n * every), n]
+          else
+            tick = from
+            n = 0
+            while tick < cutoff
+              tick += every
+              n += 1
+            end
+            [tick, n]
+          end
+        end
+        def execute_or_schedule_repetition(method, coordination_log, next_execution_at, every, policy, timeout, on_error)
           step_name = "#{coordination_log.step_name}$#{next_execution_at.to_i}"
           # Create execution log for this specific repetition
@@ -175,7 +273,7 @@ module ChronoForge
           # Check if it's time to execute this repetition
           if next_execution_at <= Time.current
-            execute_repetition_now(method, repetition_log, coordination_log, next_execution_at, every, max_attempts, timeout, on_error)
+            execute_repetition_now(method, repetition_log, coordination_log, next_execution_at, every, policy, timeout, on_error)
           else
             schedule_repetition_for_later(repetition_log, next_execution_at)
           end
@@ -185,16 +283,14 @@ module ChronoForge
           # Calculate delay until execution time
           delay = [next_execution_at - Time.current, 0].max.seconds
-          # Schedule the workflow to run at the specified time
-          self.class
-            .set(wait: delay)
-            .perform_later(@workflow.key)
+          # Schedule the workflow to run at the specified time (published after release).
+          enqueue_continuation(wait: delay)
           # Halt current execution until scheduled time
           halt_execution!
         end
-        def execute_repetition_now(method, repetition_log, coordination_log, execution_time, every, max_attempts, timeout, on_error)
+        def execute_repetition_now(method, repetition_log, coordination_log, execution_time, every, policy, timeout, on_error)
           # Check for timeout
           if Time.current > repetition_log.metadata["timeout_at"]
             repetition_log.update!(
@@ -223,13 +319,12 @@ module ChronoForge
           self.class::ExecutionTracker.track_error(@workflow, e, execution_log: repetition_log)
           # Handle retry logic for this specific repetition
-          if repetition_log.attempts < max_attempts
-            # Reschedule this same repetition with exponential backoff
-            backoff = (2**[repetition_log.attempts, 5].min).seconds
-            self.class
-              .set(wait: backoff)
-              .perform_later(@workflow.key)
+          backoff = policy.retry_backoff(e, attempts: repetition_log.attempts) do |policy_key|
+            bump_retry_count!(repetition_log, policy_key)
+          end
+          if backoff
+            # Reschedule this same repetition with the policy's backoff (after release).
+            enqueue_continuation(wait: backoff)
             # Halt current execution
             halt_execution!
@@ -243,7 +338,7 @@ module ChronoForge
             # Handle failure based on on_error setting
             if on_error == :fail_workflow
-              raise ExecutionFailedError, "Periodic task #{method} failed after #{max_attempts} attempts: #{e.message}"
+              raise ExecutionFailedError, "Periodic task #{method} failed after #{repetition_log.attempts} attempts: #{e.message}"
             else
               # Continue with next execution despite this failure
               schedule_next_execution_after_completion(coordination_log, execution_time, every)
@@ -279,10 +374,8 @@ module ChronoForge
           # Calculate delay until next execution
           delay = [next_execution_time - Time.current, 0].max.seconds
-          # Schedule the workflow to run for the next periodic execution
-          self.class
-            .set(wait: delay)
-            .perform_later(@workflow.key)
+          # Schedule the next periodic execution (published after lock release).
+          enqueue_continuation(wait: delay)
           # Halt current execution
           halt_execution!

data/lib/chrono_forge/executor/methods/merge_branches.rb ADDED Viewed

@@ -0,0 +1,83 @@
+module ChronoForge
+  module Executor
+    module Methods
+      module MergeBranches
+        # Join one or more named branches. Separate from dispatch so branches run
+        # concurrently. Does one immediate check; if not done, hands off to the
+        # lightweight BranchMergeJob and halts (the heavy parent is not replayed
+        # per poll). Cadence clamps between min/max, scaled by pending.
+        def merge_branches(*names, min_interval: 5.seconds, max_interval: 5.minutes)
+          names.each do |nm|
+            validate_step_name_segment!(nm)  # rejects "$"
+            if nm.to_s.include?(",")
+              raise InvalidStepName,
+                "branch name may not contain ',' (reserved merge separator): #{nm.inspect}"
+            end
+          end
+          # Validate cadence here, in the parent, so a misconfiguration fails at the
+          # call site instead of deep inside the poller — where (pending * FACTOR)
+          # .clamp(min, max) would raise ArgumentError, a non-transient error that
+          # dead-letters BranchMergeJob and orphans the parent.
+          if min_interval > max_interval
+            raise ArgumentError,
+              "min_interval (#{min_interval}) must be <= max_interval (#{max_interval})"
+          end
+          names = names.map(&:to_s).uniq
+          step_name = "merge$#{names.sort.join(",")}"
+          log = find_or_create_execution_log!(step_name) { |l| l.started_at = Time.current }
+          if log.completed?
+            # Already done — remove from registry so the completion gate does not
+            # see these as unmerged, then skip.
+            names.each { |nm| @open_branches&.delete(nm.to_s) }
+            return
+          end
+          branch_log_ids = names.map { |nm| open_branch!(nm)[:log_id] }
+          if branches_done?(branch_log_ids)
+            names.each { |nm| @open_branches&.delete(nm.to_s) }
+            log.update!(state: :completed, completed_at: Time.current)
+            return
+          end
+          enqueue_branch_merge_job(branch_log_ids, min_interval, max_interval)
+          halt_execution!
+        end
+        alias_method :merge_branch, :merge_branches
+        private
+        def open_branch!(name)
+          (@open_branches || {}).fetch(name.to_s) do
+            raise UnknownBranchError, "no open branch named #{name.inspect} (open it with `branch #{name.inspect} do … end` first)"
+          end
+        end
+        def branches_done?(branch_log_ids)
+          branch_log_ids.all? { |id| BranchProbe.done?(id) }
+        end
+        def enqueue_branch_merge_job(branch_log_ids, min_interval, max_interval)
+          # Mint a fresh fencing token and stamp it on each branch log under a row
+          # lock — the read-modify-write must not clobber a concurrent poll-state
+          # write from an in-flight poller. Rotating the token orphans any prior
+          # poller chain (its token no longer matches), so only the chain we enqueue
+          # below drives the merge. See BranchMergeJob#superseded?.
+          token = SecureRandom.uuid
+          ExecutionLog.where(id: branch_log_ids).find_each do |log|
+            log.with_lock do
+              log.update!(metadata: (log.metadata || {}).merge("poll_token" => token))
+            end
+          end
+          BranchMergeJob.perform_later(
+            @workflow.key, self.class.to_s, branch_log_ids,
+            min_interval.to_i, max_interval.to_i, token
+          )
+        end
+      end
+    end
+  end
+end

data/lib/chrono_forge/executor/methods/wait.rb CHANGED Viewed

@@ -102,10 +102,8 @@ module ChronoForge
             last_executed_at: Time.current
           )
-          # Reschedule the job
-          self.class
-            .set(wait: duration)
-            .perform_later(@workflow.key)
+          # Record the reschedule; the executor publishes it after lock release.
+          enqueue_continuation(wait: duration)
           # Halt current execution
           halt_execution!