PyPI - brawny - Versions diffs - 0.1.13__py3-none-any.whl → 0.1.22__py3-none-any.whl - Mend

brawny 0.1.13py3-none-any.whl → 0.1.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (135) hide show

brawny/__init__.py +2 -0
brawny/_context.py +5 -5
brawny/_rpc/__init__.py +36 -12
brawny/_rpc/broadcast.py +14 -13
brawny/_rpc/caller.py +243 -0
brawny/_rpc/client.py +539 -0
brawny/_rpc/clients.py +11 -11
brawny/_rpc/context.py +23 -0
brawny/_rpc/errors.py +465 -31
brawny/_rpc/gas.py +7 -6
brawny/_rpc/pool.py +18 -0
brawny/_rpc/retry.py +266 -0
brawny/_rpc/retry_policy.py +81 -0
brawny/accounts.py +28 -9
brawny/alerts/__init__.py +15 -18
brawny/alerts/abi_resolver.py +212 -36
brawny/alerts/base.py +2 -2
brawny/alerts/contracts.py +77 -10
brawny/alerts/errors.py +30 -3
brawny/alerts/events.py +38 -5
brawny/alerts/health.py +19 -13
brawny/alerts/send.py +513 -55
brawny/api.py +39 -11
brawny/assets/AGENTS.md +325 -0
brawny/async_runtime.py +48 -0
brawny/chain.py +3 -3
brawny/cli/commands/__init__.py +2 -0
brawny/cli/commands/console.py +69 -19
brawny/cli/commands/contract.py +2 -2
brawny/cli/commands/controls.py +121 -0
brawny/cli/commands/health.py +2 -2
brawny/cli/commands/job_dev.py +6 -5
brawny/cli/commands/jobs.py +99 -2
brawny/cli/commands/maintenance.py +13 -29
brawny/cli/commands/migrate.py +1 -0
brawny/cli/commands/run.py +10 -3
brawny/cli/commands/script.py +8 -3
brawny/cli/commands/signer.py +143 -26
brawny/cli/helpers.py +0 -3
brawny/cli_templates.py +25 -349
brawny/config/__init__.py +4 -1
brawny/config/models.py +43 -57
brawny/config/parser.py +268 -57
brawny/config/validation.py +52 -15
brawny/daemon/context.py +4 -2
brawny/daemon/core.py +185 -63
brawny/daemon/loops.py +166 -98
brawny/daemon/supervisor.py +261 -0
brawny/db/__init__.py +14 -26
brawny/db/base.py +248 -151
brawny/db/global_cache.py +11 -1
brawny/db/migrate.py +175 -28
brawny/db/migrations/001_init.sql +4 -3
brawny/db/migrations/010_add_nonce_gap_index.sql +1 -1
brawny/db/migrations/011_add_job_logs.sql +1 -2
brawny/db/migrations/012_add_claimed_by.sql +2 -2
brawny/db/migrations/013_attempt_unique.sql +10 -0
brawny/db/migrations/014_add_lease_expires_at.sql +5 -0
brawny/db/migrations/015_add_signer_alias.sql +14 -0
brawny/db/migrations/016_runtime_controls_and_quarantine.sql +32 -0
brawny/db/migrations/017_add_job_drain.sql +6 -0
brawny/db/migrations/018_add_nonce_reset_audit.sql +20 -0
brawny/db/migrations/019_add_job_cooldowns.sql +8 -0
brawny/db/migrations/020_attempt_unique_initial.sql +7 -0
brawny/db/ops/__init__.py +3 -25
brawny/db/ops/logs.py +1 -2
brawny/db/queries.py +47 -91
brawny/db/serialized.py +65 -0
brawny/db/sqlite/__init__.py +1001 -0
brawny/db/sqlite/connection.py +231 -0
brawny/db/sqlite/execute.py +116 -0
brawny/db/sqlite/mappers.py +190 -0
brawny/db/sqlite/repos/attempts.py +372 -0
brawny/db/sqlite/repos/block_state.py +102 -0
brawny/db/sqlite/repos/cache.py +104 -0
brawny/db/sqlite/repos/intents.py +1021 -0
brawny/db/sqlite/repos/jobs.py +200 -0
brawny/db/sqlite/repos/maintenance.py +182 -0
brawny/db/sqlite/repos/signers_nonces.py +566 -0
brawny/db/sqlite/tx.py +119 -0
brawny/http.py +194 -0
brawny/invariants.py +11 -24
brawny/jobs/base.py +8 -0
brawny/jobs/job_validation.py +2 -1
brawny/keystore.py +83 -7
brawny/lifecycle.py +64 -12
brawny/logging.py +0 -2
brawny/metrics.py +84 -12
brawny/model/contexts.py +111 -9
brawny/model/enums.py +1 -0
brawny/model/errors.py +18 -0
brawny/model/types.py +47 -131
brawny/network_guard.py +133 -0
brawny/networks/__init__.py +5 -5
brawny/networks/config.py +1 -7
brawny/networks/manager.py +14 -11
brawny/runtime_controls.py +74 -0
brawny/scheduler/poller.py +11 -7
brawny/scheduler/reorg.py +95 -39
brawny/scheduler/runner.py +442 -168
brawny/scheduler/shutdown.py +3 -3
brawny/script_tx.py +3 -3
brawny/telegram.py +53 -7
brawny/testing.py +1 -0
brawny/timeout.py +38 -0
brawny/tx/executor.py +922 -308
brawny/tx/intent.py +54 -16
brawny/tx/monitor.py +31 -12
brawny/tx/nonce.py +212 -90
brawny/tx/replacement.py +69 -18
brawny/tx/retry_policy.py +24 -0
brawny/tx/stages/types.py +75 -0
brawny/types.py +18 -0
brawny/utils.py +41 -0
{brawny-0.1.13.dist-info → brawny-0.1.22.dist-info}/METADATA +3 -3
brawny-0.1.22.dist-info/RECORD +163 -0
brawny/_rpc/manager.py +0 -982
brawny/_rpc/selector.py +0 -156
brawny/db/base_new.py +0 -165
brawny/db/mappers.py +0 -182
brawny/db/migrations/008_add_transactions.sql +0 -72
brawny/db/ops/attempts.py +0 -108
brawny/db/ops/blocks.py +0 -83
brawny/db/ops/cache.py +0 -93
brawny/db/ops/intents.py +0 -296
brawny/db/ops/jobs.py +0 -110
brawny/db/ops/nonces.py +0 -322
brawny/db/postgres.py +0 -2535
brawny/db/postgres_new.py +0 -196
brawny/db/sqlite.py +0 -2733
brawny/db/sqlite_new.py +0 -191
brawny-0.1.13.dist-info/RECORD +0 -141
{brawny-0.1.13.dist-info → brawny-0.1.22.dist-info}/WHEEL +0 -0
{brawny-0.1.13.dist-info → brawny-0.1.22.dist-info}/entry_points.txt +0 -0
{brawny-0.1.13.dist-info → brawny-0.1.22.dist-info}/top_level.txt +0 -0

brawny/tx/executor.py CHANGED Viewed

@@ -16,6 +16,7 @@ works with already-persisted intents.
 from __future__ import annotations
 import random
+import time
 from dataclasses import dataclass
 from datetime import datetime, timezone, timedelta
 from enum import Enum
@@ -26,8 +27,24 @@ from web3 import Web3
 from brawny.logging import LogEvents, get_logger
 from brawny.tx.utils import normalize_tx_dict
+from brawny.tx.stages.types import (
+    Fail,
+    Ok,
+    Retry,
+    RetryDecision,
+    RunContext,
+    StageName,
+    StageResult,
+    StageOutcome,
+)
+from brawny.tx import retry_policy
 from brawny.metrics import (
-    ATTEMPT_WRITE_FAILURES,
+    EXECUTOR_ATTEMPT_DURATION_SECONDS,
+    EXECUTOR_STAGE_OUTCOME,
+    EXECUTOR_STAGE_STARTED,
+    EXECUTOR_STAGE_TIMEOUTS,
+    CLAIM_RELEASED_PRE_ATTEMPT,
+    CLAIM_RELEASE_SKIPPED,
     SIMULATION_NETWORK_ERRORS,
     SIMULATION_RETRIES,
     SIMULATION_REVERTED,
@@ -41,15 +58,24 @@ from brawny.model.errors import (
     DatabaseError,
     FailureStage,
     FailureType,
+    InvariantViolation,
     SimulationNetworkError,
     SimulationReverted,
 )
 from brawny.model.types import GasParams, TxAttempt, TxIntent
-from brawny._rpc.context import set_job_context as set_rpc_job_context, reset_job_context as reset_rpc_job_context
-from brawny._rpc.errors import RPCError
+from brawny.types import ClaimedIntent
+from brawny._rpc.context import (
+    set_job_context as set_rpc_job_context,
+    reset_job_context as reset_rpc_job_context,
+    set_intent_budget_context as set_rpc_intent_budget_context,
+    reset_intent_budget_context as reset_rpc_intent_budget_context,
+)
+from brawny._rpc.errors import RPCError, RPCRetryableError, RpcErrorKind
 from brawny.tx.nonce import NonceManager
 from brawny.tx.intent import transition_intent
-from brawny.utils import ensure_utc, utc_now
+from brawny.timeout import Deadline
+from brawny.utils import ensure_utc, utc_now, serialize_error
+from brawny.config.validation import canonicalize_endpoints
 if TYPE_CHECKING:
     from brawny.config import Config
@@ -57,13 +83,79 @@ if TYPE_CHECKING:
     from brawny.jobs.base import Job
     from brawny.keystore import Keystore
     from brawny.lifecycle import LifecycleDispatcher
-    from brawny._rpc.manager import RPCManager
+    from brawny._rpc.clients import ReadClient
 logger = get_logger(__name__)
 # Simulation retry settings
 MAX_SIMULATION_RETRIES = 2  # Total attempts = 3 (1 initial + 2 retries)
+STAGE_BUILD_TX = "build_tx"
+STAGE_SIGN = "sign"
+STAGE_CREATE_ATTEMPT = "create_attempt"
+STAGE_BROADCAST = "broadcast"
+STAGE_TIMEOUT_SECONDS: dict[StageName, float] = {
+    StageName.GAP_CHECK: 5.0,
+    StageName.RESERVE_NONCE: 5.0,
+    StageName.BUILD_TX: 10.0,
+    StageName.SIMULATE: 20.0,
+    StageName.SIGN: 2.0,
+    StageName.BROADCAST: 20.0,
+    StageName.MONITOR_TICK: 10.0,
+    StageName.FINALIZE: 5.0,
+}
+def maybe_release_pre_attempt_claim(
+    db: Database,
+    claimed: ClaimedIntent,
+    exc: Exception,
+    stage: str,
+) -> bool:
+    """Release claim if no attempt exists and token matches.
+    Returns True if claim was released, False otherwise.
+    Never raises - swallows DB errors to avoid masking original exception.
+    """
+    try:
+        released = db.release_claim_if_token_and_no_attempts(
+            intent_id=claimed.intent_id,
+            claim_token=claimed.claim_token,
+        )
+        # If monotonic is already captured at claim time, prefer it for elapsed_ms.
+        claimed_at = ensure_utc(claimed.claimed_at)
+        elapsed_ms = (utc_now() - claimed_at).total_seconds() * 1000
+        metrics = get_metrics()
+        if released:
+            logger.exception(
+                "claim.released_pre_attempt",
+                intent_id=str(claimed.intent_id),
+                stage=stage,
+                exc_type=type(exc).__name__,
+                elapsed_ms=elapsed_ms,
+            )
+            metrics.counter(CLAIM_RELEASED_PRE_ATTEMPT).inc(stage=stage)
+        else:
+            logger.debug(
+                "claim.release_skipped",
+                intent_id=str(claimed.intent_id),
+                stage=stage,
+            )
+            metrics.counter(CLAIM_RELEASE_SKIPPED).inc(stage=stage)
+        return released
+    except Exception as db_error:
+        logger.error(
+            "claim.release_db_error",
+            intent_id=str(claimed.intent_id),
+            error=str(db_error),
+        )
+        return False
 class ExecutionResult(str, Enum):
     """Result of transaction execution."""
@@ -106,7 +198,7 @@ class TxExecutor:
     def __init__(
         self,
         db: Database,
-        rpc: RPCManager,
+        rpc: ReadClient,
         keystore: Keystore,
         config: Config,
         lifecycle: "LifecycleDispatcher | None" = None,
@@ -131,17 +223,55 @@ class TxExecutor:
         self._jobs = jobs
         self._chain_id = config.chain_id
+    def _error_from_data(self, data: dict[str, object]) -> Exception | None:
+        error_obj = data.get("exception")
+        if isinstance(error_obj, Exception):
+            return error_obj
+        error = data.get("error")
+        if isinstance(error, dict):
+            message = error.get("error") or str(error)
+            return RuntimeError(message)
+        if error is None:
+            return None
+        return RuntimeError(str(error))
     @property
     def nonce_manager(self) -> NonceManager:
         """Get the nonce manager."""
         return self._nonce_manager
+    def process_claimed_intent(
+        self,
+        claimed: ClaimedIntent,
+        *,
+        intent: TxIntent | None = None,
+    ) -> ExecutionOutcome:
+        """Process a claimed intent with safe pre-attempt claim release."""
+        stage = "unknown"
+        try:
+            if intent is None:
+                intent = self._db.get_intent(claimed.intent_id)
+            if intent is None:
+                raise RuntimeError(f"Claimed intent not found: {claimed.intent_id}")
+            stage = STAGE_BUILD_TX
+            return self.execute(intent)
+        except Exception as exc:
+            maybe_release_pre_attempt_claim(
+                db=self._db,
+                claimed=claimed,
+                exc=exc,
+                stage=stage,
+            )
+            raise
     # =========================================================================
     # Nonce Gap Detection (Pre-flight check)
     # =========================================================================
     def _check_nonce_gap(
-        self, signer_address: str
+        self,
+        signer_address: str,
+        deadline: Deadline | None = None,
     ) -> tuple[bool, int | None, float | None]:
         """Check if signer is blocked by a nonce gap.
@@ -151,7 +281,11 @@ class TxExecutor:
         """
         from brawny.model.enums import NonceStatus
-        chain_pending = self._rpc.get_transaction_count(signer_address, "pending")
+        chain_pending = self._rpc.get_transaction_count(
+            signer_address,
+            "pending",
+            deadline=deadline,
+        )
         # Get all active reservations (RESERVED or IN_FLIGHT)
         active = self._nonce_manager.get_active_reservations(signer_address)
@@ -229,6 +363,7 @@ class TxExecutor:
         signer_address: str | None = None,
         to_address: str | None = None,
         job: "Job | None" = None,
+        deadline: Deadline | None = None,
     ) -> GasParams:
         """Estimate gas for a transaction intent.
@@ -265,9 +400,16 @@ class TxExecutor:
                 if intent.data:
                     tx_params["data"] = intent.data
-                estimated = self._rpc.estimate_gas(tx_params)
+                estimated = self._rpc.estimate_gas(tx_params, deadline=deadline)
                 gas_limit = int(estimated * self._config.gas_limit_multiplier)
             except Exception as e:
+                if isinstance(e, RPCError) and e.code == RpcErrorKind.EXECUTION_REVERTED.value:
+                    logger.warning(
+                        "gas.estimate_reverted",
+                        intent_id=str(intent.intent_id),
+                        error=str(e),
+                    )
+                    raise
                 logger.warning(
                     "gas.estimate_failed",
                     intent_id=str(intent.intent_id),
@@ -289,7 +431,7 @@ class TxExecutor:
             max_fee = int(intent.max_fee_per_gas)
         else:
             # Compute from quote (sync cache only)
-            quote = self._rpc.gas_quote_sync()
+            quote = self._rpc.gas_quote_sync(deadline=deadline)
             if quote is None:
                 # No cached quote - raise retriable error (don't guess)
@@ -435,6 +577,13 @@ class TxExecutor:
         finally:
             reset_rpc_job_context(rpc_ctx_token)
+    def _resolve_deadline(self, intent: TxIntent) -> Deadline:
+        """Resolve overall deadline for an intent."""
+        if intent.deadline_ts:
+            remaining = (intent.deadline_ts - datetime.now(timezone.utc)).total_seconds()
+            return Deadline.from_seconds(remaining)
+        return Deadline.from_seconds(self._config.default_deadline_seconds)
     def _execute_with_context(
         self,
         intent: TxIntent,
@@ -476,417 +625,879 @@ class TxExecutor:
                 address=signer_address,
             )
-        # Ensure to_address is checksummed
-        to_address = Web3.to_checksum_address(intent.to_address)
+        # Set RPC budget key after signer resolution (uses canonical signer)
+        budget_key = f"{self._chain_id}:{signer_address.lower()}:{intent.intent_id}"
+        budget_token = set_rpc_intent_budget_context(budget_key)
+        try:
+            # Ensure to_address is checksummed
+            to_address = Web3.to_checksum_address(intent.to_address)
+            job = self._jobs.get(intent.job_id) if self._jobs else None
-        # 1. Validate deadline
-        if intent.deadline_ts:
-            if datetime.now(timezone.utc) > intent.deadline_ts:
+            deadline = self._resolve_deadline(intent)
+            ctx = RunContext(
+                intent=intent,
+                chain_id=self._chain_id,
+                signer_address=signer_address,
+                to_address=to_address,
+                job=job,
+                logger=logger,
+                config=self._config,
+                rpc=self._rpc,
+                db=self._db,
+                nonce_manager=self._nonce_manager,
+                keystore=self._keystore,
+                lifecycle=self._lifecycle,
+                deadline=deadline,
+            )
+            stage = StageName.GAP_CHECK
+            data: dict[str, object] = {}
+            while True:
+                result = self._run_stage(stage, ctx, data)
+                outcome = self._apply_result(stage, result, ctx, _retry_intent)
+                if outcome.done:
+                    return outcome.final
+                stage = outcome.next_stage
+                data = outcome.data or {}
+        finally:
+            reset_rpc_intent_budget_context(budget_token)
+    def _run_stage(self, stage: StageName, ctx: RunContext, data: dict[str, object]) -> StageResult:
+        metrics = get_metrics()
+        stage_start = time.monotonic()
+        metrics.counter(EXECUTOR_STAGE_STARTED).inc(stage=stage.value)
+        if ctx.deadline.expired():
+            err = TimeoutError("Intent deadline expired")
+            ctx.logger.warning(
+                "executor.deadline_expired",
+                intent_id=str(ctx.intent.intent_id),
+                job_id=ctx.intent.job_id,
+                stage=stage.value,
+                deadline_remaining_seconds=ctx.deadline.remaining(),
+            )
+            return Fail(
+                "deadline_expired",
+                True,
+                {
+                    "execution_result": ExecutionResult.DEADLINE_EXPIRED,
+                    "error": serialize_error(err),
+                    "exception": err,
+                },
+            )
+        stage_budget = STAGE_TIMEOUT_SECONDS.get(stage, 10.0)
+        stage_deadline = ctx.deadline.child(stage_budget)
+        if stage_deadline.expired():
+            metrics.counter(EXECUTOR_STAGE_TIMEOUTS).inc(stage=stage.value)
+            err = TimeoutError(f"Stage timeout: {stage.value}")
+            ctx.logger.warning(
+                "executor.stage_timeout",
+                intent_id=str(ctx.intent.intent_id),
+                job_id=ctx.intent.job_id,
+                stage=stage.value,
+                deadline_remaining_seconds=ctx.deadline.remaining(),
+            )
+            return Retry(
+                stage,
+                RetryDecision(None, reason="stage_timeout"),
+                {"error": serialize_error(err), "exception": err},
+            )
+        if stage == StageName.GAP_CHECK:
+            result = self._stage_gap_check(ctx, stage_deadline)
+        elif stage == StageName.RESERVE_NONCE:
+            result = self._stage_reserve_nonce(ctx, stage_deadline)
+        elif stage == StageName.BUILD_TX:
+            result = self._stage_build_tx(ctx, data, stage_deadline)
+        elif stage == StageName.SIMULATE:
+            result = self._stage_simulate(ctx, data, stage_deadline)
+        elif stage == StageName.SIGN:
+            result = self._stage_sign(ctx, data, stage_deadline)
+        elif stage == StageName.BROADCAST:
+            result = self._stage_broadcast(ctx, data, stage_deadline)
+        elif stage == StageName.MONITOR_TICK:
+            result = self._stage_monitor_tick(ctx, data, stage_deadline)
+        elif stage == StageName.FINALIZE:
+            result = Ok(StageName.FINALIZE, data)
+        else:
+            err = RuntimeError(f"Unknown stage {stage}")
+            result = Fail("unknown_stage", True, {"error": serialize_error(err), "exception": err})
+        outcome_label = "ok"
+        if isinstance(result, Retry):
+            outcome_label = "retry"
+        elif isinstance(result, Fail):
+            outcome_label = "fail"
+        metrics.counter(EXECUTOR_STAGE_OUTCOME).inc(
+            stage=stage.value,
+            outcome=outcome_label,
+        )
+        metrics.histogram(EXECUTOR_ATTEMPT_DURATION_SECONDS).observe(
+            time.monotonic() - stage_start,
+            stage=stage.value,
+        )
+        return result
+    def _apply_result(
+        self,
+        stage: StageName,
+        result: StageResult,
+        ctx: RunContext,
+        _retry_intent: Callable[[str], None],
+    ) -> StageOutcome:
+        intent = ctx.intent
+        if isinstance(result, Ok):
+            if stage == StageName.SIGN:
+                return self._apply_sign_result(ctx, result.data, _retry_intent)
+            if stage == StageName.BROADCAST:
+                return self._apply_broadcast_result(ctx, result.data, _retry_intent)
+            if stage == StageName.MONITOR_TICK:
+                return StageOutcome(done=True, final=result.data.get("execution_outcome"))
+            if stage == StageName.FINALIZE:
+                return StageOutcome(done=True, final=result.data.get("execution_outcome"))
+            return StageOutcome(done=False, next_stage=result.next_stage, data=result.data)
+        if isinstance(result, Retry):
+            error = result.data.get("error")
+            error_obj = self._error_from_data(result.data)
+            nonce = result.data.get("nonce")
+            if nonce is not None and result.data.get("release_nonce"):
+                ctx.nonce_manager.release(ctx.signer_address, int(nonce))
+            failure_type = result.data.get("failure_type")
+            failure_stage = result.data.get("failure_stage")
+            if error_obj is not None and failure_type and ctx.lifecycle:
+                ctx.lifecycle.on_failed(
+                    intent, None, error_obj,
+                    failure_type=failure_type,
+                    failure_stage=failure_stage or FailureStage.PRE_BROADCAST,
+                    cleanup_trigger=False,
+                )
+            _retry_intent(result.retry.reason or "retry")
+            return StageOutcome(
+                done=True,
+                final=ExecutionOutcome(
+                    result=ExecutionResult.FAILED,
+                    intent=intent,
+                    attempt=None,
+                    error=error_obj,
+                ),
+            )
+        if isinstance(result, Fail):
+            error = result.data.get("error")
+            error_obj = self._error_from_data(result.data)
+            execution_result = result.data.get("execution_result")
+            nonce = result.data.get("nonce")
+            if nonce is not None and result.data.get("release_nonce"):
+                ctx.nonce_manager.release(ctx.signer_address, int(nonce))
+            if result.data.get("simulation_error") is not None:
+                simulation_error = result.data["simulation_error"]
+                job = ctx.job
+                return StageOutcome(done=True, final=self._handle_simulation_failure(job, intent, simulation_error))
+            if execution_result == ExecutionResult.DEADLINE_EXPIRED:
                 transition_intent(
-                    self._db,
+                    ctx.db,
                     intent.intent_id,
                     IntentStatus.ABANDONED,
                     "deadline_expired",
-                    chain_id=self._chain_id,
+                    chain_id=ctx.chain_id,
                 )
-                if self._lifecycle:
-                    self._lifecycle.on_failed(
+                if ctx.lifecycle:
+                    ctx.lifecycle.on_failed(
                         intent,
                         None,
-                        TimeoutError("Intent deadline expired"),
+                        error_obj or TimeoutError("Intent deadline expired"),
                         failure_type=FailureType.DEADLINE_EXPIRED,
                         failure_stage=FailureStage.PRE_BROADCAST,
                     )
-                return ExecutionOutcome(
-                    result=ExecutionResult.DEADLINE_EXPIRED,
+                return StageOutcome(
+                    done=True,
+                    final=ExecutionOutcome(
+                        result=ExecutionResult.DEADLINE_EXPIRED,
+                        intent=intent,
+                        attempt=None,
+                        error=error_obj or TimeoutError("Intent deadline expired"),
+                    ),
+                )
+            if execution_result == ExecutionResult.BLOCKED:
+                return StageOutcome(
+                    done=True,
+                    final=ExecutionOutcome(
+                        result=ExecutionResult.BLOCKED,
+                        intent=intent,
+                        attempt=None,
+                        error=error_obj,
+                    ),
+                )
+            failure_type = result.data.get("failure_type")
+            failure_stage = result.data.get("failure_stage")
+            if error_obj is not None and failure_type and ctx.lifecycle:
+                ctx.lifecycle.on_failed(
+                    intent, None, error_obj,
+                    failure_type=failure_type,
+                    failure_stage=failure_stage or FailureStage.PRE_BROADCAST,
+                    cleanup_trigger=False,
+                )
+            if not result.fatal:
+                _retry_intent(result.reason)
+            return StageOutcome(
+                done=True,
+                final=ExecutionOutcome(
+                    result=ExecutionResult.FAILED,
                     intent=intent,
                     attempt=None,
-                    error=TimeoutError("Intent deadline expired"),
-                )
+                    error=error_obj,
+                ),
+            )
+        return StageOutcome(
+            done=True,
+            final=ExecutionOutcome(
+                result=ExecutionResult.FAILED,
+                intent=intent,
+                attempt=None,
+                error=RuntimeError("Unknown stage result"),
+            ),
+        )
+    def _stage_gap_check(self, ctx: RunContext, deadline: Deadline) -> StageResult:
+        intent = ctx.intent
+        if intent.deadline_ts and datetime.now(timezone.utc) > intent.deadline_ts:
+            err = TimeoutError("Intent deadline expired")
+            return Fail(
+                "deadline_expired",
+                True,
+                {
+                    "execution_result": ExecutionResult.DEADLINE_EXPIRED,
+                    "error": serialize_error(err),
+                    "exception": err,
+                },
+            )
-        # 1.5 Pre-flight gap check - don't reserve if signer is blocked
         try:
-            is_blocked, oldest_nonce, oldest_age = self._check_nonce_gap(signer_address)
+            is_blocked, oldest_nonce, oldest_age = self._check_nonce_gap(
+                ctx.signer_address,
+                deadline=deadline,
+            )
         except Exception as e:
-            # Fail-safe: if we cannot validate nonce-gap safety, do NOT proceed
-            logger.warning(
+            ctx.logger.warning(
                 "nonce.gap_check_failed",
                 intent_id=str(intent.intent_id),
-                signer=signer_address,
+                signer=ctx.signer_address,
                 error=str(e)[:100],
             )
-            _retry_intent("nonce_gap_check_failed")
-            return ExecutionOutcome(
-                result=ExecutionResult.FAILED,
-                intent=intent,
-                attempt=None,
-                error=e,
+            decision = retry_policy.decide(StageName.GAP_CHECK.value, e)
+            return Retry(
+                StageName.GAP_CHECK,
+                decision or RetryDecision(None, reason="nonce_gap_check_failed"),
+                {"error": serialize_error(e), "exception": e},
             )
         if is_blocked:
-            gap_duration = self._get_gap_duration(signer_address)
-            logger.warning(
+            err = RuntimeError(
+                f"Nonce gap detected for {ctx.signer_address}, waiting for TxReplacer"
+            )
+            gap_duration = self._get_gap_duration(ctx.signer_address)
+            ctx.logger.warning(
                 "nonce.gap_blocked",
                 intent_id=str(intent.intent_id),
                 job_id=intent.job_id,
-                signer=signer_address,
+                signer=ctx.signer_address,
                 blocked_duration_seconds=gap_duration,
                 oldest_in_flight_nonce=oldest_nonce,
                 oldest_in_flight_age_seconds=oldest_age,
             )
-            # Check config for unsafe reset mode
-            if self._config.allow_unsafe_nonce_reset:
-                logger.warning("nonce.unsafe_reset_triggered", signer=signer_address)
-                self._nonce_manager.reconcile(signer_address)
-                self._clear_gap_tracking(signer_address)
-                # Fall through to normal execution
+            if ctx.config.allow_unsafe_nonce_reset:
+                ctx.logger.warning("nonce.unsafe_reset_triggered", signer=ctx.signer_address)
+                ctx.nonce_manager.force_reset(
+                    ctx.signer_address,
+                    source="executor",
+                    reason=f"allow_unsafe_nonce_reset=True, gap_duration={gap_duration}s",
+                )
             else:
-                # Alert if blocked too long
-                if gap_duration > self._config.nonce_gap_alert_seconds:
-                    self._alert_nonce_gap(signer_address, gap_duration, oldest_nonce, oldest_age)
-                # Return BLOCKED - don't reserve, don't retry immediately
-                # Let TxReplacer handle recovery via fee bumping
-                return ExecutionOutcome(
-                    result=ExecutionResult.BLOCKED,
-                    intent=intent,
-                    attempt=None,
-                    error=RuntimeError(
-                        f"Nonce gap detected for {signer_address}, waiting for TxReplacer"
-                    ),
+                if gap_duration > ctx.config.nonce_gap_alert_seconds:
+                    self._alert_nonce_gap(ctx.signer_address, gap_duration, oldest_nonce, oldest_age)
+                return Fail(
+                    "nonce_gap_blocked",
+                    True,
+                    {
+                        "execution_result": ExecutionResult.BLOCKED,
+                        "error": serialize_error(err),
+                        "exception": err,
+                    },
                 )
-        # 2. Reserve nonce
+        return Ok(StageName.RESERVE_NONCE, {})
+    def _stage_reserve_nonce(self, ctx: RunContext, deadline: Deadline) -> StageResult:
         try:
-            nonce = self._nonce_manager.reserve_nonce(
-                signer_address,
-                intent_id=intent.intent_id,
+            nonce = ctx.nonce_manager.reserve_nonce(
+                ctx.signer_address,
+                intent_id=ctx.intent.intent_id,
+                deadline=deadline,
             )
         except Exception as e:
-            logger.error(
+            ctx.logger.error(
                 "nonce.reservation_failed",
-                intent_id=str(intent.intent_id),
-                signer=signer_address,
+                intent_id=str(ctx.intent.intent_id),
+                signer=ctx.signer_address,
                 error=str(e),
             )
-            if self._lifecycle:
-                self._lifecycle.on_failed(
-                    intent, None, e,
-                    failure_type=FailureType.NONCE_FAILED,
-                    failure_stage=FailureStage.PRE_BROADCAST,
-                    cleanup_trigger=False,
-                )
-            _retry_intent("nonce_reservation_failed")
-            return ExecutionOutcome(
-                result=ExecutionResult.FAILED,
-                intent=intent,
-                attempt=None,
-                error=e,
+            decision = retry_policy.decide(StageName.RESERVE_NONCE.value, e)
+            return Retry(
+                StageName.RESERVE_NONCE,
+                decision or RetryDecision(None, reason="nonce_reservation_failed"),
+                {
+                    "error": serialize_error(e),
+                    "exception": e,
+                    "failure_type": FailureType.NONCE_FAILED,
+                    "failure_stage": FailureStage.PRE_BROADCAST,
+                },
             )
-        # NOTE: Gap detection moved to pre-flight check (step 1.5)
-        # The pre-flight check returns BLOCKED if there's a nonce gap,
-        # allowing TxReplacer to handle recovery instead of auto-abandoning.
+        return Ok(StageName.BUILD_TX, {"nonce": nonce})
-        # 3. Estimate gas
-        job = self._jobs.get(intent.job_id) if self._jobs else None
+    def _stage_build_tx(self, ctx: RunContext, data: dict[str, object], deadline: Deadline) -> StageResult:
+        nonce = int(data["nonce"])
         try:
-            gas_params = self.estimate_gas(intent, signer_address, to_address, job=job)
+            gas_params = self.estimate_gas(
+                ctx.intent,
+                ctx.signer_address,
+                ctx.to_address,
+                job=ctx.job,
+                deadline=deadline,
+            )
         except Exception as e:
+            if isinstance(e, RPCRetryableError):
+                decision = retry_policy.decide(StageName.BUILD_TX.value, e)
+                return Retry(
+                    StageName.BUILD_TX,
+                    decision or RetryDecision(None, reason="rpc_timeout"),
+                    {
+                        "error": serialize_error(e),
+                        "exception": e,
+                        "nonce": nonce,
+                        "release_nonce": True,
+                    },
+                )
             if "RetriableExecutionError" in type(e).__name__ or "No gas quote" in str(e):
-                logger.warning(
+                ctx.logger.warning(
                     "gas.no_quote_available",
-                    intent_id=str(intent.intent_id),
-                    job_id=intent.job_id,
+                    intent_id=str(ctx.intent.intent_id),
+                    job_id=ctx.intent.job_id,
                     error=str(e),
                 )
-                # Release nonce before retry
-                self._nonce_manager.release(signer_address, nonce)
-                _retry_intent("no_gas_quote")
-                return ExecutionOutcome(
-                    result=ExecutionResult.FAILED,
-                    intent=intent,
-                    attempt=None,
-                    error=e,
+                decision = retry_policy.decide(StageName.BUILD_TX.value, e)
+                return Retry(
+                    StageName.BUILD_TX,
+                    decision or RetryDecision(None, reason="no_gas_quote"),
+                    {
+                        "error": serialize_error(e),
+                        "exception": e,
+                        "nonce": nonce,
+                        "release_nonce": True,
+                    },
                 )
-            raise
+            return Fail(
+                "estimate_gas_failed",
+                True,
+                {
+                    "error": serialize_error(e),
+                    "exception": e,
+                    "nonce": nonce,
+                    "release_nonce": True,
+                },
+            )
-        # 4. Build tx dict for simulation
-        tx_dict = self._build_tx_dict(intent, nonce, gas_params, to_address)
-        tx_dict["from"] = signer_address  # Required for simulation
+        tx_dict = self._build_tx_dict(ctx.intent, nonce, gas_params, ctx.to_address)
+        tx_dict["from"] = ctx.signer_address
+        return Ok(StageName.SIMULATE, {"nonce": nonce, "gas_params": gas_params, "tx_dict": tx_dict})
-        # 5. Simulation step (runs unless job opts out)
-        if job and not getattr(job, "disable_simulation", False):
-            try:
-                self._simulate_with_retry(job, intent, tx_dict)
-            except (SimulationReverted, SimulationNetworkError) as e:
-                # Release nonce on simulation failure
-                self._nonce_manager.release(signer_address, nonce)
-                return self._handle_simulation_failure(job, intent, e)
+    def _stage_simulate(self, ctx: RunContext, data: dict[str, object], deadline: Deadline) -> StageResult:
+        if ctx.job is None or getattr(ctx.job, "disable_simulation", False):
+            return Ok(StageName.SIGN, data)
-        # 6. Sign transaction (only if simulation passed)
         try:
-            signed_tx = self._keystore.sign_transaction(
-                tx_dict,
-                signer_address,
+            self._simulate_with_retry(ctx.job, ctx.intent, data["tx_dict"], deadline)
+        except (SimulationReverted, SimulationNetworkError) as e:
+            return Fail(
+                "simulation_failed",
+                True,
+                {"simulation_error": e, "nonce": data.get("nonce"), "release_nonce": True},
             )
+        return Ok(StageName.SIGN, data)
+    def _stage_sign(self, ctx: RunContext, data: dict[str, object], deadline: Deadline) -> StageResult:
+        nonce = int(data["nonce"])
+        gas_params = data["gas_params"]
+        tx_dict = data["tx_dict"]
+        try:
+            signed_tx = ctx.keystore.sign_transaction(tx_dict, ctx.signer_address)
         except Exception as e:
-            logger.error(
+            ctx.logger.error(
                 "tx.sign_failed",
-                intent_id=str(intent.intent_id),
-                job_id=intent.job_id,
+                intent_id=str(ctx.intent.intent_id),
+                job_id=ctx.intent.job_id,
                 error=str(e),
             )
-            # Release nonce on sign failure
-            self._nonce_manager.release(signer_address, nonce)
-            if self._lifecycle:
-                self._lifecycle.on_failed(
-                    intent, None, e,
-                    failure_type=FailureType.SIGN_FAILED,
-                    failure_stage=FailureStage.PRE_BROADCAST,
-                    cleanup_trigger=False,
-                )
-            _retry_intent("sign_failed")
-            return ExecutionOutcome(
-                result=ExecutionResult.FAILED,
-                intent=intent,
-                attempt=None,
-                error=e,
+            decision = retry_policy.decide(StageName.SIGN.value, e)
+            return Retry(
+                StageName.SIGN,
+                decision or RetryDecision(None, reason="sign_failed"),
+                {
+                    "error": serialize_error(e),
+                    "exception": e,
+                    "nonce": nonce,
+                    "release_nonce": True,
+                    "failure_type": FailureType.SIGN_FAILED,
+                    "failure_stage": FailureStage.PRE_BROADCAST,
+                },
             )
-        # Warn if priority fee is suspiciously low (< 0.1 gwei)
+        tx_hash = self._compute_signed_tx_hash(signed_tx)
         if gas_params.max_priority_fee_per_gas < 100_000_000:
-            logger.warning(
+            ctx.logger.warning(
                 "gas.priority_fee_very_low",
-                intent_id=str(intent.intent_id),
-                job_id=intent.job_id,
+                intent_id=str(ctx.intent.intent_id),
+                job_id=ctx.intent.job_id,
                 priority_fee_wei=gas_params.max_priority_fee_per_gas,
                 priority_fee_gwei=gas_params.max_priority_fee_per_gas / 1e9,
                 hint="Transaction may not be included - validators receive almost no tip",
             )
-        logger.info(
+        ctx.logger.info(
             LogEvents.TX_SIGN,
-            intent_id=str(intent.intent_id),
-            job_id=intent.job_id,
-            signer=signer_address,
+            intent_id=str(ctx.intent.intent_id),
+            job_id=ctx.intent.job_id,
+            signer=ctx.signer_address,
             nonce=nonce,
             gas_limit=gas_params.gas_limit,
             max_fee=gas_params.max_fee_per_gas,
             priority_fee=gas_params.max_priority_fee_per_gas,
         )
-        # 7. Broadcast with RPC group routing
-        attempt: TxAttempt | None = None
-        attempt_id = uuid4()
-        tx_hash: str | None = None
-        endpoint_url: str | None = None
-        try:
-            # Update intent status to sending
-            if not transition_intent(
-                self._db,
-                intent.intent_id,
-                IntentStatus.SENDING,
-                "broadcast_start",
-                chain_id=self._chain_id,
-            ):
-                raise RuntimeError("Intent status not claimable for sending")
-            # Check for existing binding (for retry isolation)
-            binding = self._db.get_broadcast_binding(intent.intent_id)
-            job_id = job.job_id if job else None
-            if binding is not None:
-                # RETRY: Use persisted endpoints (NEVER current config)
-                group_name, endpoints = binding
-                is_first_broadcast = False
-                # Advisory log if job's config changed
-                if job:
-                    from brawny.config.routing import resolve_job_groups
-                    _, job_broadcast_group = resolve_job_groups(self._config, job)
-                    if job_broadcast_group != group_name:
-                        logger.warning(
-                            "broadcast_group_mismatch",
-                            intent_id=str(intent.intent_id),
-                            job_id=job_id,
-                            persisted_group=group_name,
-                            current_job_group=job_broadcast_group,
-                        )
-            else:
-                # FIRST BROADCAST: Resolve group + endpoints from config (no silent fallback)
-                if job is None:
-                    from brawny.config.routing import resolve_default_group
-                    group_name = resolve_default_group(self._config)
-                else:
-                    from brawny.config.routing import resolve_job_groups
-                    _, group_name = resolve_job_groups(self._config, job)
-                endpoints = self._config.rpc_groups[group_name].endpoints
-                is_first_broadcast = True
+        data.update({"signed_tx": signed_tx, "tx_hash": tx_hash, "nonce": nonce, "gas_params": gas_params})
+        return Ok(StageName.BROADCAST, data)
-            # Broadcast transaction using RPC groups
-            from brawny._rpc.broadcast import broadcast_transaction
-            from brawny._rpc.errors import RPCGroupUnavailableError
+    def _stage_broadcast(self, ctx: RunContext, data: dict[str, object], deadline: Deadline) -> StageResult:
+        signed_tx = data["signed_tx"]
+        endpoints = data["endpoints"]
+        group_name = data["broadcast_group"]
+        job_id = ctx.job.job_id if ctx.job else None
+        resume_pending_send = bool(data.get("resume_pending_send"))
+        if resume_pending_send and data.get("tx_hash"):
             try:
-                tx_hash, endpoint_url = broadcast_transaction(
-                    raw_tx=signed_tx.raw_transaction,
-                    endpoints=endpoints,
-                    group_name=group_name,
-                    config=self._config,
+                exists = self._probe_pending_send(str(data["tx_hash"]), deadline)
+            except Exception as e:
+                ctx.logger.warning(
+                    "broadcast.probe_failed",
+                    intent_id=str(ctx.intent.intent_id),
                     job_id=job_id,
+                    error=str(e)[:200],
                 )
-            except RPCGroupUnavailableError as e:
-                logger.error(
-                    "broadcast_unavailable",
-                    intent_id=str(intent.intent_id),
-                    job_id=job_id,
-                    broadcast_group=group_name,
-                    endpoints=endpoints,
-                    error=str(e.last_error) if e.last_error else None,
+                return Retry(
+                    StageName.BROADCAST,
+                    RetryDecision(None, reason="probe_unknown"),
+                    {
+                        "error": serialize_error(e),
+                        "exception": e,
+                        "nonce": data.get("nonce"),
+                        "attempt_id": data.get("attempt_id"),
+                    },
                 )
-                raise
+            if exists:
+                return Ok(StageName.FINALIZE, {**data, "already_known": True, "endpoint_url": None})
-            # Create attempt record (+ binding if first broadcast)
-            current_block = self._rpc.get_block_number()
-            attempt = self._db.create_attempt(
-                attempt_id=attempt_id,
-                intent_id=intent.intent_id,
-                nonce=nonce,
-                gas_params_json=gas_params.to_json(),
-                status=AttemptStatus.BROADCAST.value,
-                tx_hash=tx_hash,
-                broadcast_group=group_name,
-                endpoint_url=endpoint_url,
-                binding=(group_name, endpoints) if is_first_broadcast else None,
+        nonce = int(data["nonce"])
+        try:
+            ctx.db.require_bound_and_attempt(ctx.intent.intent_id, nonce, endpoints)
+        except InvariantViolation as exc:
+            ctx.logger.error(
+                "broadcast.invariant_violation",
+                intent_id=str(ctx.intent.intent_id),
+                job_id=job_id,
+                error=str(exc)[:200],
+            )
+            transition_intent(
+                ctx.db,
+                ctx.intent.intent_id,
+                IntentStatus.FAILED,
+                "missing_binding_or_attempt",
+                chain_id=ctx.chain_id,
+            )
+            return Fail(
+                "missing_binding_or_attempt",
+                True,
+                {
+                    "error": serialize_error(exc),
+                    "exception": exc,
+                    "failure_type": FailureType.UNKNOWN,
+                    "failure_stage": FailureStage.PRE_BROADCAST,
+                },
             )
-            # Update attempt with broadcast block and time
-            self._db.update_attempt_status(
-                attempt_id,
-                AttemptStatus.BROADCAST.value,
-                broadcast_block=current_block,
-                broadcast_at=datetime.now(timezone.utc),
+        from brawny._rpc.broadcast import broadcast_transaction
+        from brawny._rpc.errors import RPCGroupUnavailableError
+        try:
+            tx_hash, endpoint_url = broadcast_transaction(
+                raw_tx=signed_tx.raw_transaction,
+                endpoints=endpoints,
+                group_name=group_name,
+                config=ctx.config,
+                job_id=job_id,
+                deadline=deadline,
+            )
+        except RPCGroupUnavailableError as e:
+            ctx.logger.error(
+                "broadcast_unavailable",
+                intent_id=str(ctx.intent.intent_id),
+                job_id=job_id,
+                broadcast_group=group_name,
+                endpoints=endpoints,
+                error=str(e.last_error) if e.last_error else None,
+            )
+            return Fail(
+                "broadcast_failed",
+                False,
+                {
+                    "error": serialize_error(e),
+                    "exception": e,
+                    "nonce": data.get("nonce"),
+                    "attempt_id": data.get("attempt_id"),
+                },
+            )
+        except (RPCError, DatabaseError, OSError, ValueError, RuntimeError) as e:
+            ctx.logger.error(
+                "tx.broadcast_failed",
+                intent_id=str(ctx.intent.intent_id),
+                job_id=job_id,
+                attempt_id=str(data.get("attempt_id")) if data.get("attempt_id") else None,
+                error=str(e),
+            )
+            return Fail(
+                "broadcast_failed",
+                False,
+                {
+                    "error": serialize_error(e),
+                    "exception": e,
+                    "nonce": data.get("nonce"),
+                    "attempt_id": data.get("attempt_id"),
+                },
             )
-            # Mark nonce as in-flight
-            self._nonce_manager.mark_in_flight(signer_address, nonce, intent.intent_id)
+        return Ok(StageName.FINALIZE, {**data, "endpoint_url": endpoint_url, "tx_hash_rpc": tx_hash})
-            # Update intent to pending
-            if not transition_intent(
-                self._db,
+    def _stage_monitor_tick(self, ctx: RunContext, data: dict[str, object], deadline: Deadline) -> StageResult:
+        return Ok(StageName.FINALIZE, data)
+    def _apply_sign_result(
+        self,
+        ctx: RunContext,
+        data: dict[str, object],
+        _retry_intent: Callable[[str], None],
+    ) -> StageOutcome:
+        intent = ctx.intent
+        nonce = int(data["nonce"])
+        gas_params = data["gas_params"]
+        tx_hash = data["tx_hash"]
+        if not transition_intent(
+            ctx.db,
+            intent.intent_id,
+            IntentStatus.SENDING,
+            "broadcast_start",
+            chain_id=ctx.chain_id,
+        ):
+            _retry_intent("broadcast_start_failed")
+            return StageOutcome(
+                done=True,
+                final=ExecutionOutcome(
+                    result=ExecutionResult.FAILED,
+                    intent=intent,
+                    attempt=None,
+                    error=RuntimeError("Intent status not claimable for sending"),
+                ),
+            )
+        attempt = self._find_attempt_by_hash(intent.intent_id, tx_hash)
+        attempt_preexisting = attempt is not None
+        if attempt and attempt.status in (
+            AttemptStatus.BROADCAST.value,
+            AttemptStatus.PENDING.value,
+            AttemptStatus.CONFIRMED.value,
+        ):
+            ctx.nonce_manager.mark_in_flight(ctx.signer_address, nonce, intent.intent_id)
+            transition_intent(
+                ctx.db,
                 intent.intent_id,
                 IntentStatus.PENDING,
                 "broadcast_complete",
-                chain_id=self._chain_id,
-            ):
-                raise RuntimeError("Intent status not in sending state")
-            logger.info(
-                LogEvents.TX_BROADCAST,
-                intent_id=str(intent.intent_id),
-                job_id=intent.job_id,
-                attempt_id=str(attempt_id),
-                tx_hash=tx_hash,
-                signer=signer_address,
-                nonce=nonce,
-                broadcast_group=group_name,
-                endpoint_url=endpoint_url[:50] if endpoint_url else None,
+                chain_id=ctx.chain_id,
             )
-            metrics = get_metrics()
-            metrics.counter(TX_BROADCAST).inc(
-                chain_id=self._chain_id,
-                job_id=intent.job_id,
+            return StageOutcome(
+                done=True,
+                final=ExecutionOutcome(
+                    result=ExecutionResult.PENDING,
+                    intent=intent,
+                    attempt=attempt,
+                    tx_hash=attempt.tx_hash,
+                ),
             )
-            # Refresh attempt
-            attempt = self._db.get_attempt(attempt_id)
-            if self._lifecycle and attempt is not None:
-                self._lifecycle.on_submitted(intent, attempt)
+        group_name, endpoints = self._resolve_broadcast_binding(ctx)
-        except (RPCError, DatabaseError, OSError, ValueError, RuntimeError) as e:
-            # Expected broadcast-related errors - handle gracefully
-            logger.error(
-                "tx.broadcast_failed",
-                intent_id=str(intent.intent_id),
-                job_id=intent.job_id,
-                attempt_id=str(attempt_id),
-                error=str(e),
-            )
+        attempt_id = attempt.attempt_id if attempt else uuid4()
+        if attempt is None:
+            try:
+                attempt = ctx.db.create_attempt_once(
+                    attempt_id=attempt_id,
+                    intent_id=intent.intent_id,
+                    nonce=nonce,
+                    gas_params_json=gas_params.to_json(),
+                    status=AttemptStatus.PENDING_SEND.value,
+                    tx_hash=tx_hash,
+                    broadcast_group=group_name,
+                    endpoint_url=None,
+                    binding=(group_name, endpoints),
+                    actor=intent.job_id,
+                    reason="initial_attempt",
+                    source="executor",
+                )
+            except InvariantViolation as e:
+                ctx.logger.error(
+                    "broadcast.binding_failed",
+                    intent_id=str(intent.intent_id),
+                    job_id=intent.job_id,
+                    error=str(e)[:200],
+                )
+                transition_intent(
+                    ctx.db,
+                    intent.intent_id,
+                    IntentStatus.FAILED,
+                    "binding_failed",
+                    chain_id=ctx.chain_id,
+                )
+                return StageOutcome(
+                    done=True,
+                    final=ExecutionOutcome(
+                        result=ExecutionResult.FAILED,
+                        intent=intent,
+                        attempt=None,
+                        error=e,
+                    ),
+                )
+            except Exception as e:
+                ctx.logger.error(
+                    "attempt.create_failed",
+                    intent_id=str(intent.intent_id),
+                    job_id=intent.job_id,
+                    error=str(e)[:200],
+                )
+                _retry_intent("attempt_create_failed")
+                return StageOutcome(
+                    done=True,
+                    final=ExecutionOutcome(
+                        result=ExecutionResult.FAILED,
+                        intent=intent,
+                        attempt=None,
+                        error=e,
+                    ),
+                )
+        data.update(
+            {
+                "attempt_id": attempt_id,
+                "broadcast_group": group_name,
+                "endpoints": endpoints,
+                "resume_pending_send": attempt_preexisting and attempt.status in (
+                    AttemptStatus.PENDING_SEND.value,
+                    AttemptStatus.SIGNED.value,
+                ),
+            }
+        )
+        return StageOutcome(done=False, next_stage=StageName.BROADCAST, data=data)
+    def _apply_broadcast_result(
+        self,
+        ctx: RunContext,
+        data: dict[str, object],
+        _retry_intent: Callable[[str], None],
+    ) -> StageOutcome:
+        intent = ctx.intent
+        attempt_id = data.get("attempt_id")
+        nonce = int(data["nonce"])
+        if data.get("error") is not None:
+            error = data["error"]
+            error_obj = self._error_from_data(data)
             metrics = get_metrics()
             metrics.counter(TX_FAILED).inc(
-                chain_id=self._chain_id,
+                chain_id=ctx.chain_id,
                 job_id=intent.job_id,
                 reason="broadcast_failed",
             )
-            # Create failed attempt record if we haven't yet
-            if attempt is None:
-                try:
-                    attempt = self._db.create_attempt(
-                        attempt_id=attempt_id,
-                        intent_id=intent.intent_id,
-                        nonce=nonce,
-                        gas_params_json=gas_params.to_json(),
-                        status=AttemptStatus.FAILED.value,
-                    )
-                except Exception as attempt_error:
-                    # Never silently swallow - log with full context for reconstruction
-                    # exc_info=True captures attempt_error traceback (current exception)
-                    logger.error(
-                        "attempt.write_failed",
-                        intent_id=str(intent.intent_id),
-                        nonce=nonce,
-                        tx_hash=tx_hash if "tx_hash" in dir() else None,
-                        original_error=str(e),
-                        attempt_error=str(attempt_error),
-                        attempt_error_type=type(attempt_error).__name__,
-                        exc_info=True,
-                    )
-                    metrics.counter(ATTEMPT_WRITE_FAILURES).inc(stage="broadcast_failure")
-                    # Continue with cleanup - attempt is None but we have logs
-            if attempt is not None:
-                self._db.update_attempt_status(
+            if attempt_id is not None:
+                ctx.db.update_attempt_status(
                     attempt_id,
                     AttemptStatus.FAILED.value,
                     error_code="broadcast_failed",
-                    error_detail=str(e)[:500],
+                    error_detail=str(error_obj or error)[:500],
                 )
-            # Release nonce on broadcast failure
-            self._nonce_manager.release(signer_address, nonce)
-            if self._lifecycle:
-                self._lifecycle.on_failed(
-                    intent, attempt, e,
+            ctx.nonce_manager.release(ctx.signer_address, nonce)
+            if ctx.lifecycle:
+                ctx.lifecycle.on_failed(
+                    intent, None, error_obj or RuntimeError(str(error)),
                     failure_type=FailureType.BROADCAST_FAILED,
                     failure_stage=FailureStage.BROADCAST,
                     cleanup_trigger=False,
                 )
             _retry_intent("broadcast_failed")
+            return StageOutcome(
+                done=True,
+                final=ExecutionOutcome(
+                    result=ExecutionResult.FAILED,
+                    intent=intent,
+                    attempt=None,
+                    error=error_obj or RuntimeError(str(error)),
+                ),
+            )
-            return ExecutionOutcome(
-                result=ExecutionResult.FAILED,
-                intent=intent,
-                attempt=attempt,
-                error=e,
+        tx_hash = data.get("tx_hash_rpc") or data.get("tx_hash")
+        endpoint_url = data.get("endpoint_url")
+        if attempt_id is not None:
+            broadcast_deadline = ctx.deadline.child(STAGE_TIMEOUT_SECONDS[StageName.BROADCAST])
+            current_block = ctx.rpc.get_block_number(deadline=broadcast_deadline)
+            ctx.db.update_attempt_status(
+                attempt_id,
+                AttemptStatus.BROADCAST.value,
+                tx_hash=str(tx_hash) if tx_hash else None,
+                broadcast_block=current_block,
+                broadcast_at=datetime.now(timezone.utc),
+                endpoint_url=endpoint_url,
             )
-        return ExecutionOutcome(
-            result=ExecutionResult.PENDING,
-            intent=intent,
-            attempt=attempt,
+        ctx.nonce_manager.mark_in_flight(ctx.signer_address, nonce, intent.intent_id)
+        if not transition_intent(
+            ctx.db,
+            intent.intent_id,
+            IntentStatus.PENDING,
+            "broadcast_complete",
+            chain_id=ctx.chain_id,
+        ):
+            return StageOutcome(
+                done=True,
+                final=ExecutionOutcome(
+                    result=ExecutionResult.FAILED,
+                    intent=intent,
+                    attempt=None,
+                    error=RuntimeError("Intent status not in sending state"),
+                ),
+            )
+        ctx.logger.info(
+            LogEvents.TX_BROADCAST,
+            intent_id=str(intent.intent_id),
+            job_id=intent.job_id,
+            attempt_id=str(attempt_id) if attempt_id else None,
             tx_hash=tx_hash,
+            signer=ctx.signer_address,
+            nonce=nonce,
+            broadcast_group=data.get("broadcast_group"),
+            endpoint_url=str(endpoint_url)[:50] if endpoint_url else None,
+        )
+        metrics = get_metrics()
+        metrics.counter(TX_BROADCAST).inc(
+            chain_id=ctx.chain_id,
+            job_id=intent.job_id,
         )
+        attempt = ctx.db.get_attempt(attempt_id) if attempt_id else None
+        if ctx.lifecycle and attempt is not None:
+            ctx.lifecycle.on_submitted(intent, attempt)
+        return StageOutcome(
+            done=True,
+            final=ExecutionOutcome(
+                result=ExecutionResult.PENDING,
+                intent=intent,
+                attempt=attempt,
+                tx_hash=tx_hash,
+            ),
+        )
+    def _resolve_broadcast_binding(
+        self,
+        ctx: RunContext,
+    ) -> tuple[str, list[str]]:
+        binding = ctx.db.get_broadcast_binding(ctx.intent.intent_id)
+        job_id = ctx.job.job_id if ctx.job else None
+        if binding is not None:
+            group_name, endpoints = binding
+            if ctx.job:
+                from brawny.config.routing import resolve_job_groups
+                _, job_broadcast_group = resolve_job_groups(ctx.config, ctx.job)
+                if job_broadcast_group != group_name:
+                    ctx.logger.warning(
+                        "broadcast_group_mismatch",
+                        intent_id=str(ctx.intent.intent_id),
+                        job_id=job_id,
+                        persisted_group=group_name,
+                        current_job_group=job_broadcast_group,
+                    )
+        else:
+            if ctx.job is None:
+                from brawny.config.routing import resolve_default_group
+                group_name = resolve_default_group(ctx.config)
+            else:
+                from brawny.config.routing import resolve_job_groups
+                _, group_name = resolve_job_groups(ctx.config, ctx.job)
+            endpoints = ctx.config.rpc_groups[group_name].endpoints
+        return group_name, canonicalize_endpoints(endpoints)
+    def _find_attempt_by_hash(self, intent_id: UUID, tx_hash: str) -> TxAttempt | None:
+        attempts = self._db.get_attempts_for_intent(intent_id)
+        for attempt in attempts:
+            if attempt.tx_hash and attempt.tx_hash.lower() == tx_hash.lower():
+                return attempt
+        return None
+    def _compute_signed_tx_hash(self, signed_tx: object) -> str:
+        if hasattr(signed_tx, "hash"):
+            tx_hash = signed_tx.hash
+            if hasattr(tx_hash, "hex"):
+                return f"0x{tx_hash.hex()}"
+            return str(tx_hash)
+        return f"0x{Web3.keccak(signed_tx.raw_transaction).hex()}"
+    def _probe_pending_send(self, tx_hash: str, deadline: Deadline) -> bool:
+        receipt = self._rpc.get_transaction_receipt(tx_hash, deadline=deadline)
+        if receipt:
+            return True
+        tx = self._rpc.get_transaction_by_hash(tx_hash, deadline=deadline)
+        return tx is not None
     def _build_tx_dict(
         self,
         intent: TxIntent,
@@ -930,6 +1541,7 @@ class TxExecutor:
         job: "Job",
         intent: TxIntent,
         tx: dict,
+        deadline: Deadline,
     ) -> str:
         """Simulate transaction with retry on network errors.
@@ -952,8 +1564,10 @@ class TxExecutor:
         for attempt in range(MAX_SIMULATION_RETRIES + 1):
             try:
+                if deadline.expired():
+                    raise SimulationNetworkError("Simulation deadline exhausted")
                 # Run simulation (uses job RPC if specified)
-                output = self._rpc.simulate_transaction(tx, rpc_url=rpc_url)
+                output = self._rpc.simulate_transaction(tx, rpc_url=rpc_url, deadline=deadline)
                 # Run job's custom validation (if defined)
                 if hasattr(job, "validate_simulation"):

brawny 0.1.13__py3-none-any.whl → 0.1.22__py3-none-any.whl

brawny 0.1.13py3-none-any.whl → 0.1.22py3-none-any.whl