brawny 0.1.13__py3-none-any.whl → 0.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. brawny/__init__.py +2 -0
  2. brawny/_context.py +5 -5
  3. brawny/_rpc/__init__.py +36 -12
  4. brawny/_rpc/broadcast.py +14 -13
  5. brawny/_rpc/caller.py +243 -0
  6. brawny/_rpc/client.py +539 -0
  7. brawny/_rpc/clients.py +11 -11
  8. brawny/_rpc/context.py +23 -0
  9. brawny/_rpc/errors.py +465 -31
  10. brawny/_rpc/gas.py +7 -6
  11. brawny/_rpc/pool.py +18 -0
  12. brawny/_rpc/retry.py +266 -0
  13. brawny/_rpc/retry_policy.py +81 -0
  14. brawny/accounts.py +28 -9
  15. brawny/alerts/__init__.py +15 -18
  16. brawny/alerts/abi_resolver.py +212 -36
  17. brawny/alerts/base.py +2 -2
  18. brawny/alerts/contracts.py +77 -10
  19. brawny/alerts/errors.py +30 -3
  20. brawny/alerts/events.py +38 -5
  21. brawny/alerts/health.py +19 -13
  22. brawny/alerts/send.py +513 -55
  23. brawny/api.py +39 -11
  24. brawny/assets/AGENTS.md +325 -0
  25. brawny/async_runtime.py +48 -0
  26. brawny/chain.py +3 -3
  27. brawny/cli/commands/__init__.py +2 -0
  28. brawny/cli/commands/console.py +69 -19
  29. brawny/cli/commands/contract.py +2 -2
  30. brawny/cli/commands/controls.py +121 -0
  31. brawny/cli/commands/health.py +2 -2
  32. brawny/cli/commands/job_dev.py +6 -5
  33. brawny/cli/commands/jobs.py +99 -2
  34. brawny/cli/commands/maintenance.py +13 -29
  35. brawny/cli/commands/migrate.py +1 -0
  36. brawny/cli/commands/run.py +10 -3
  37. brawny/cli/commands/script.py +8 -3
  38. brawny/cli/commands/signer.py +143 -26
  39. brawny/cli/helpers.py +0 -3
  40. brawny/cli_templates.py +25 -349
  41. brawny/config/__init__.py +4 -1
  42. brawny/config/models.py +43 -57
  43. brawny/config/parser.py +268 -57
  44. brawny/config/validation.py +52 -15
  45. brawny/daemon/context.py +4 -2
  46. brawny/daemon/core.py +185 -63
  47. brawny/daemon/loops.py +166 -98
  48. brawny/daemon/supervisor.py +261 -0
  49. brawny/db/__init__.py +14 -26
  50. brawny/db/base.py +248 -151
  51. brawny/db/global_cache.py +11 -1
  52. brawny/db/migrate.py +175 -28
  53. brawny/db/migrations/001_init.sql +4 -3
  54. brawny/db/migrations/010_add_nonce_gap_index.sql +1 -1
  55. brawny/db/migrations/011_add_job_logs.sql +1 -2
  56. brawny/db/migrations/012_add_claimed_by.sql +2 -2
  57. brawny/db/migrations/013_attempt_unique.sql +10 -0
  58. brawny/db/migrations/014_add_lease_expires_at.sql +5 -0
  59. brawny/db/migrations/015_add_signer_alias.sql +14 -0
  60. brawny/db/migrations/016_runtime_controls_and_quarantine.sql +32 -0
  61. brawny/db/migrations/017_add_job_drain.sql +6 -0
  62. brawny/db/migrations/018_add_nonce_reset_audit.sql +20 -0
  63. brawny/db/migrations/019_add_job_cooldowns.sql +8 -0
  64. brawny/db/migrations/020_attempt_unique_initial.sql +7 -0
  65. brawny/db/ops/__init__.py +3 -25
  66. brawny/db/ops/logs.py +1 -2
  67. brawny/db/queries.py +47 -91
  68. brawny/db/serialized.py +65 -0
  69. brawny/db/sqlite/__init__.py +1001 -0
  70. brawny/db/sqlite/connection.py +231 -0
  71. brawny/db/sqlite/execute.py +116 -0
  72. brawny/db/sqlite/mappers.py +190 -0
  73. brawny/db/sqlite/repos/attempts.py +372 -0
  74. brawny/db/sqlite/repos/block_state.py +102 -0
  75. brawny/db/sqlite/repos/cache.py +104 -0
  76. brawny/db/sqlite/repos/intents.py +1021 -0
  77. brawny/db/sqlite/repos/jobs.py +200 -0
  78. brawny/db/sqlite/repos/maintenance.py +182 -0
  79. brawny/db/sqlite/repos/signers_nonces.py +566 -0
  80. brawny/db/sqlite/tx.py +119 -0
  81. brawny/http.py +194 -0
  82. brawny/invariants.py +11 -24
  83. brawny/jobs/base.py +8 -0
  84. brawny/jobs/job_validation.py +2 -1
  85. brawny/keystore.py +83 -7
  86. brawny/lifecycle.py +64 -12
  87. brawny/logging.py +0 -2
  88. brawny/metrics.py +84 -12
  89. brawny/model/contexts.py +111 -9
  90. brawny/model/enums.py +1 -0
  91. brawny/model/errors.py +18 -0
  92. brawny/model/types.py +47 -131
  93. brawny/network_guard.py +133 -0
  94. brawny/networks/__init__.py +5 -5
  95. brawny/networks/config.py +1 -7
  96. brawny/networks/manager.py +14 -11
  97. brawny/runtime_controls.py +74 -0
  98. brawny/scheduler/poller.py +11 -7
  99. brawny/scheduler/reorg.py +95 -39
  100. brawny/scheduler/runner.py +442 -168
  101. brawny/scheduler/shutdown.py +3 -3
  102. brawny/script_tx.py +3 -3
  103. brawny/telegram.py +53 -7
  104. brawny/testing.py +1 -0
  105. brawny/timeout.py +38 -0
  106. brawny/tx/executor.py +922 -308
  107. brawny/tx/intent.py +54 -16
  108. brawny/tx/monitor.py +31 -12
  109. brawny/tx/nonce.py +212 -90
  110. brawny/tx/replacement.py +69 -18
  111. brawny/tx/retry_policy.py +24 -0
  112. brawny/tx/stages/types.py +75 -0
  113. brawny/types.py +18 -0
  114. brawny/utils.py +41 -0
  115. {brawny-0.1.13.dist-info → brawny-0.1.22.dist-info}/METADATA +3 -3
  116. brawny-0.1.22.dist-info/RECORD +163 -0
  117. brawny/_rpc/manager.py +0 -982
  118. brawny/_rpc/selector.py +0 -156
  119. brawny/db/base_new.py +0 -165
  120. brawny/db/mappers.py +0 -182
  121. brawny/db/migrations/008_add_transactions.sql +0 -72
  122. brawny/db/ops/attempts.py +0 -108
  123. brawny/db/ops/blocks.py +0 -83
  124. brawny/db/ops/cache.py +0 -93
  125. brawny/db/ops/intents.py +0 -296
  126. brawny/db/ops/jobs.py +0 -110
  127. brawny/db/ops/nonces.py +0 -322
  128. brawny/db/postgres.py +0 -2535
  129. brawny/db/postgres_new.py +0 -196
  130. brawny/db/sqlite.py +0 -2733
  131. brawny/db/sqlite_new.py +0 -191
  132. brawny-0.1.13.dist-info/RECORD +0 -141
  133. {brawny-0.1.13.dist-info → brawny-0.1.22.dist-info}/WHEEL +0 -0
  134. {brawny-0.1.13.dist-info → brawny-0.1.22.dist-info}/entry_points.txt +0 -0
  135. {brawny-0.1.13.dist-info → brawny-0.1.22.dist-info}/top_level.txt +0 -0
brawny/tx/executor.py CHANGED
@@ -16,6 +16,7 @@ works with already-persisted intents.
16
16
  from __future__ import annotations
17
17
 
18
18
  import random
19
+ import time
19
20
  from dataclasses import dataclass
20
21
  from datetime import datetime, timezone, timedelta
21
22
  from enum import Enum
@@ -26,8 +27,24 @@ from web3 import Web3
26
27
 
27
28
  from brawny.logging import LogEvents, get_logger
28
29
  from brawny.tx.utils import normalize_tx_dict
30
+ from brawny.tx.stages.types import (
31
+ Fail,
32
+ Ok,
33
+ Retry,
34
+ RetryDecision,
35
+ RunContext,
36
+ StageName,
37
+ StageResult,
38
+ StageOutcome,
39
+ )
40
+ from brawny.tx import retry_policy
29
41
  from brawny.metrics import (
30
- ATTEMPT_WRITE_FAILURES,
42
+ EXECUTOR_ATTEMPT_DURATION_SECONDS,
43
+ EXECUTOR_STAGE_OUTCOME,
44
+ EXECUTOR_STAGE_STARTED,
45
+ EXECUTOR_STAGE_TIMEOUTS,
46
+ CLAIM_RELEASED_PRE_ATTEMPT,
47
+ CLAIM_RELEASE_SKIPPED,
31
48
  SIMULATION_NETWORK_ERRORS,
32
49
  SIMULATION_RETRIES,
33
50
  SIMULATION_REVERTED,
@@ -41,15 +58,24 @@ from brawny.model.errors import (
41
58
  DatabaseError,
42
59
  FailureStage,
43
60
  FailureType,
61
+ InvariantViolation,
44
62
  SimulationNetworkError,
45
63
  SimulationReverted,
46
64
  )
47
65
  from brawny.model.types import GasParams, TxAttempt, TxIntent
48
- from brawny._rpc.context import set_job_context as set_rpc_job_context, reset_job_context as reset_rpc_job_context
49
- from brawny._rpc.errors import RPCError
66
+ from brawny.types import ClaimedIntent
67
+ from brawny._rpc.context import (
68
+ set_job_context as set_rpc_job_context,
69
+ reset_job_context as reset_rpc_job_context,
70
+ set_intent_budget_context as set_rpc_intent_budget_context,
71
+ reset_intent_budget_context as reset_rpc_intent_budget_context,
72
+ )
73
+ from brawny._rpc.errors import RPCError, RPCRetryableError, RpcErrorKind
50
74
  from brawny.tx.nonce import NonceManager
51
75
  from brawny.tx.intent import transition_intent
52
- from brawny.utils import ensure_utc, utc_now
76
+ from brawny.timeout import Deadline
77
+ from brawny.utils import ensure_utc, utc_now, serialize_error
78
+ from brawny.config.validation import canonicalize_endpoints
53
79
 
54
80
  if TYPE_CHECKING:
55
81
  from brawny.config import Config
@@ -57,13 +83,79 @@ if TYPE_CHECKING:
57
83
  from brawny.jobs.base import Job
58
84
  from brawny.keystore import Keystore
59
85
  from brawny.lifecycle import LifecycleDispatcher
60
- from brawny._rpc.manager import RPCManager
86
+ from brawny._rpc.clients import ReadClient
61
87
 
62
88
  logger = get_logger(__name__)
63
89
 
64
90
  # Simulation retry settings
65
91
  MAX_SIMULATION_RETRIES = 2 # Total attempts = 3 (1 initial + 2 retries)
66
92
 
93
+ STAGE_BUILD_TX = "build_tx"
94
+ STAGE_SIGN = "sign"
95
+ STAGE_CREATE_ATTEMPT = "create_attempt"
96
+ STAGE_BROADCAST = "broadcast"
97
+
98
+ STAGE_TIMEOUT_SECONDS: dict[StageName, float] = {
99
+ StageName.GAP_CHECK: 5.0,
100
+ StageName.RESERVE_NONCE: 5.0,
101
+ StageName.BUILD_TX: 10.0,
102
+ StageName.SIMULATE: 20.0,
103
+ StageName.SIGN: 2.0,
104
+ StageName.BROADCAST: 20.0,
105
+ StageName.MONITOR_TICK: 10.0,
106
+ StageName.FINALIZE: 5.0,
107
+ }
108
+
109
+
110
+ def maybe_release_pre_attempt_claim(
111
+ db: Database,
112
+ claimed: ClaimedIntent,
113
+ exc: Exception,
114
+ stage: str,
115
+ ) -> bool:
116
+ """Release claim if no attempt exists and token matches.
117
+
118
+ Returns True if claim was released, False otherwise.
119
+ Never raises - swallows DB errors to avoid masking original exception.
120
+ """
121
+ try:
122
+ released = db.release_claim_if_token_and_no_attempts(
123
+ intent_id=claimed.intent_id,
124
+ claim_token=claimed.claim_token,
125
+ )
126
+
127
+ # If monotonic is already captured at claim time, prefer it for elapsed_ms.
128
+ claimed_at = ensure_utc(claimed.claimed_at)
129
+ elapsed_ms = (utc_now() - claimed_at).total_seconds() * 1000
130
+ metrics = get_metrics()
131
+
132
+ if released:
133
+ logger.exception(
134
+ "claim.released_pre_attempt",
135
+ intent_id=str(claimed.intent_id),
136
+ stage=stage,
137
+ exc_type=type(exc).__name__,
138
+ elapsed_ms=elapsed_ms,
139
+ )
140
+ metrics.counter(CLAIM_RELEASED_PRE_ATTEMPT).inc(stage=stage)
141
+ else:
142
+ logger.debug(
143
+ "claim.release_skipped",
144
+ intent_id=str(claimed.intent_id),
145
+ stage=stage,
146
+ )
147
+ metrics.counter(CLAIM_RELEASE_SKIPPED).inc(stage=stage)
148
+
149
+ return released
150
+
151
+ except Exception as db_error:
152
+ logger.error(
153
+ "claim.release_db_error",
154
+ intent_id=str(claimed.intent_id),
155
+ error=str(db_error),
156
+ )
157
+ return False
158
+
67
159
 
68
160
  class ExecutionResult(str, Enum):
69
161
  """Result of transaction execution."""
@@ -106,7 +198,7 @@ class TxExecutor:
106
198
  def __init__(
107
199
  self,
108
200
  db: Database,
109
- rpc: RPCManager,
201
+ rpc: ReadClient,
110
202
  keystore: Keystore,
111
203
  config: Config,
112
204
  lifecycle: "LifecycleDispatcher | None" = None,
@@ -131,17 +223,55 @@ class TxExecutor:
131
223
  self._jobs = jobs
132
224
  self._chain_id = config.chain_id
133
225
 
226
+ def _error_from_data(self, data: dict[str, object]) -> Exception | None:
227
+ error_obj = data.get("exception")
228
+ if isinstance(error_obj, Exception):
229
+ return error_obj
230
+ error = data.get("error")
231
+ if isinstance(error, dict):
232
+ message = error.get("error") or str(error)
233
+ return RuntimeError(message)
234
+ if error is None:
235
+ return None
236
+ return RuntimeError(str(error))
237
+
134
238
  @property
135
239
  def nonce_manager(self) -> NonceManager:
136
240
  """Get the nonce manager."""
137
241
  return self._nonce_manager
138
242
 
243
+ def process_claimed_intent(
244
+ self,
245
+ claimed: ClaimedIntent,
246
+ *,
247
+ intent: TxIntent | None = None,
248
+ ) -> ExecutionOutcome:
249
+ """Process a claimed intent with safe pre-attempt claim release."""
250
+ stage = "unknown"
251
+ try:
252
+ if intent is None:
253
+ intent = self._db.get_intent(claimed.intent_id)
254
+ if intent is None:
255
+ raise RuntimeError(f"Claimed intent not found: {claimed.intent_id}")
256
+ stage = STAGE_BUILD_TX
257
+ return self.execute(intent)
258
+ except Exception as exc:
259
+ maybe_release_pre_attempt_claim(
260
+ db=self._db,
261
+ claimed=claimed,
262
+ exc=exc,
263
+ stage=stage,
264
+ )
265
+ raise
266
+
139
267
  # =========================================================================
140
268
  # Nonce Gap Detection (Pre-flight check)
141
269
  # =========================================================================
142
270
 
143
271
  def _check_nonce_gap(
144
- self, signer_address: str
272
+ self,
273
+ signer_address: str,
274
+ deadline: Deadline | None = None,
145
275
  ) -> tuple[bool, int | None, float | None]:
146
276
  """Check if signer is blocked by a nonce gap.
147
277
 
@@ -151,7 +281,11 @@ class TxExecutor:
151
281
  """
152
282
  from brawny.model.enums import NonceStatus
153
283
 
154
- chain_pending = self._rpc.get_transaction_count(signer_address, "pending")
284
+ chain_pending = self._rpc.get_transaction_count(
285
+ signer_address,
286
+ "pending",
287
+ deadline=deadline,
288
+ )
155
289
 
156
290
  # Get all active reservations (RESERVED or IN_FLIGHT)
157
291
  active = self._nonce_manager.get_active_reservations(signer_address)
@@ -229,6 +363,7 @@ class TxExecutor:
229
363
  signer_address: str | None = None,
230
364
  to_address: str | None = None,
231
365
  job: "Job | None" = None,
366
+ deadline: Deadline | None = None,
232
367
  ) -> GasParams:
233
368
  """Estimate gas for a transaction intent.
234
369
 
@@ -265,9 +400,16 @@ class TxExecutor:
265
400
  if intent.data:
266
401
  tx_params["data"] = intent.data
267
402
 
268
- estimated = self._rpc.estimate_gas(tx_params)
403
+ estimated = self._rpc.estimate_gas(tx_params, deadline=deadline)
269
404
  gas_limit = int(estimated * self._config.gas_limit_multiplier)
270
405
  except Exception as e:
406
+ if isinstance(e, RPCError) and e.code == RpcErrorKind.EXECUTION_REVERTED.value:
407
+ logger.warning(
408
+ "gas.estimate_reverted",
409
+ intent_id=str(intent.intent_id),
410
+ error=str(e),
411
+ )
412
+ raise
271
413
  logger.warning(
272
414
  "gas.estimate_failed",
273
415
  intent_id=str(intent.intent_id),
@@ -289,7 +431,7 @@ class TxExecutor:
289
431
  max_fee = int(intent.max_fee_per_gas)
290
432
  else:
291
433
  # Compute from quote (sync cache only)
292
- quote = self._rpc.gas_quote_sync()
434
+ quote = self._rpc.gas_quote_sync(deadline=deadline)
293
435
 
294
436
  if quote is None:
295
437
  # No cached quote - raise retriable error (don't guess)
@@ -435,6 +577,13 @@ class TxExecutor:
435
577
  finally:
436
578
  reset_rpc_job_context(rpc_ctx_token)
437
579
 
580
+ def _resolve_deadline(self, intent: TxIntent) -> Deadline:
581
+ """Resolve overall deadline for an intent."""
582
+ if intent.deadline_ts:
583
+ remaining = (intent.deadline_ts - datetime.now(timezone.utc)).total_seconds()
584
+ return Deadline.from_seconds(remaining)
585
+ return Deadline.from_seconds(self._config.default_deadline_seconds)
586
+
438
587
  def _execute_with_context(
439
588
  self,
440
589
  intent: TxIntent,
@@ -476,417 +625,879 @@ class TxExecutor:
476
625
  address=signer_address,
477
626
  )
478
627
 
479
- # Ensure to_address is checksummed
480
- to_address = Web3.to_checksum_address(intent.to_address)
628
+ # Set RPC budget key after signer resolution (uses canonical signer)
629
+ budget_key = f"{self._chain_id}:{signer_address.lower()}:{intent.intent_id}"
630
+ budget_token = set_rpc_intent_budget_context(budget_key)
631
+ try:
632
+ # Ensure to_address is checksummed
633
+ to_address = Web3.to_checksum_address(intent.to_address)
634
+ job = self._jobs.get(intent.job_id) if self._jobs else None
481
635
 
482
- # 1. Validate deadline
483
- if intent.deadline_ts:
484
- if datetime.now(timezone.utc) > intent.deadline_ts:
636
+ deadline = self._resolve_deadline(intent)
637
+ ctx = RunContext(
638
+ intent=intent,
639
+ chain_id=self._chain_id,
640
+ signer_address=signer_address,
641
+ to_address=to_address,
642
+ job=job,
643
+ logger=logger,
644
+ config=self._config,
645
+ rpc=self._rpc,
646
+ db=self._db,
647
+ nonce_manager=self._nonce_manager,
648
+ keystore=self._keystore,
649
+ lifecycle=self._lifecycle,
650
+ deadline=deadline,
651
+ )
652
+
653
+ stage = StageName.GAP_CHECK
654
+ data: dict[str, object] = {}
655
+
656
+ while True:
657
+ result = self._run_stage(stage, ctx, data)
658
+ outcome = self._apply_result(stage, result, ctx, _retry_intent)
659
+ if outcome.done:
660
+ return outcome.final
661
+ stage = outcome.next_stage
662
+ data = outcome.data or {}
663
+ finally:
664
+ reset_rpc_intent_budget_context(budget_token)
665
+
666
+ def _run_stage(self, stage: StageName, ctx: RunContext, data: dict[str, object]) -> StageResult:
667
+ metrics = get_metrics()
668
+ stage_start = time.monotonic()
669
+ metrics.counter(EXECUTOR_STAGE_STARTED).inc(stage=stage.value)
670
+
671
+ if ctx.deadline.expired():
672
+ err = TimeoutError("Intent deadline expired")
673
+ ctx.logger.warning(
674
+ "executor.deadline_expired",
675
+ intent_id=str(ctx.intent.intent_id),
676
+ job_id=ctx.intent.job_id,
677
+ stage=stage.value,
678
+ deadline_remaining_seconds=ctx.deadline.remaining(),
679
+ )
680
+ return Fail(
681
+ "deadline_expired",
682
+ True,
683
+ {
684
+ "execution_result": ExecutionResult.DEADLINE_EXPIRED,
685
+ "error": serialize_error(err),
686
+ "exception": err,
687
+ },
688
+ )
689
+
690
+ stage_budget = STAGE_TIMEOUT_SECONDS.get(stage, 10.0)
691
+ stage_deadline = ctx.deadline.child(stage_budget)
692
+ if stage_deadline.expired():
693
+ metrics.counter(EXECUTOR_STAGE_TIMEOUTS).inc(stage=stage.value)
694
+ err = TimeoutError(f"Stage timeout: {stage.value}")
695
+ ctx.logger.warning(
696
+ "executor.stage_timeout",
697
+ intent_id=str(ctx.intent.intent_id),
698
+ job_id=ctx.intent.job_id,
699
+ stage=stage.value,
700
+ deadline_remaining_seconds=ctx.deadline.remaining(),
701
+ )
702
+ return Retry(
703
+ stage,
704
+ RetryDecision(None, reason="stage_timeout"),
705
+ {"error": serialize_error(err), "exception": err},
706
+ )
707
+
708
+ if stage == StageName.GAP_CHECK:
709
+ result = self._stage_gap_check(ctx, stage_deadline)
710
+ elif stage == StageName.RESERVE_NONCE:
711
+ result = self._stage_reserve_nonce(ctx, stage_deadline)
712
+ elif stage == StageName.BUILD_TX:
713
+ result = self._stage_build_tx(ctx, data, stage_deadline)
714
+ elif stage == StageName.SIMULATE:
715
+ result = self._stage_simulate(ctx, data, stage_deadline)
716
+ elif stage == StageName.SIGN:
717
+ result = self._stage_sign(ctx, data, stage_deadline)
718
+ elif stage == StageName.BROADCAST:
719
+ result = self._stage_broadcast(ctx, data, stage_deadline)
720
+ elif stage == StageName.MONITOR_TICK:
721
+ result = self._stage_monitor_tick(ctx, data, stage_deadline)
722
+ elif stage == StageName.FINALIZE:
723
+ result = Ok(StageName.FINALIZE, data)
724
+ else:
725
+ err = RuntimeError(f"Unknown stage {stage}")
726
+ result = Fail("unknown_stage", True, {"error": serialize_error(err), "exception": err})
727
+
728
+ outcome_label = "ok"
729
+ if isinstance(result, Retry):
730
+ outcome_label = "retry"
731
+ elif isinstance(result, Fail):
732
+ outcome_label = "fail"
733
+ metrics.counter(EXECUTOR_STAGE_OUTCOME).inc(
734
+ stage=stage.value,
735
+ outcome=outcome_label,
736
+ )
737
+ metrics.histogram(EXECUTOR_ATTEMPT_DURATION_SECONDS).observe(
738
+ time.monotonic() - stage_start,
739
+ stage=stage.value,
740
+ )
741
+ return result
742
+
743
+ def _apply_result(
744
+ self,
745
+ stage: StageName,
746
+ result: StageResult,
747
+ ctx: RunContext,
748
+ _retry_intent: Callable[[str], None],
749
+ ) -> StageOutcome:
750
+ intent = ctx.intent
751
+
752
+ if isinstance(result, Ok):
753
+ if stage == StageName.SIGN:
754
+ return self._apply_sign_result(ctx, result.data, _retry_intent)
755
+ if stage == StageName.BROADCAST:
756
+ return self._apply_broadcast_result(ctx, result.data, _retry_intent)
757
+ if stage == StageName.MONITOR_TICK:
758
+ return StageOutcome(done=True, final=result.data.get("execution_outcome"))
759
+ if stage == StageName.FINALIZE:
760
+ return StageOutcome(done=True, final=result.data.get("execution_outcome"))
761
+ return StageOutcome(done=False, next_stage=result.next_stage, data=result.data)
762
+
763
+ if isinstance(result, Retry):
764
+ error = result.data.get("error")
765
+ error_obj = self._error_from_data(result.data)
766
+ nonce = result.data.get("nonce")
767
+ if nonce is not None and result.data.get("release_nonce"):
768
+ ctx.nonce_manager.release(ctx.signer_address, int(nonce))
769
+
770
+ failure_type = result.data.get("failure_type")
771
+ failure_stage = result.data.get("failure_stage")
772
+ if error_obj is not None and failure_type and ctx.lifecycle:
773
+ ctx.lifecycle.on_failed(
774
+ intent, None, error_obj,
775
+ failure_type=failure_type,
776
+ failure_stage=failure_stage or FailureStage.PRE_BROADCAST,
777
+ cleanup_trigger=False,
778
+ )
779
+
780
+ _retry_intent(result.retry.reason or "retry")
781
+ return StageOutcome(
782
+ done=True,
783
+ final=ExecutionOutcome(
784
+ result=ExecutionResult.FAILED,
785
+ intent=intent,
786
+ attempt=None,
787
+ error=error_obj,
788
+ ),
789
+ )
790
+
791
+ if isinstance(result, Fail):
792
+ error = result.data.get("error")
793
+ error_obj = self._error_from_data(result.data)
794
+ execution_result = result.data.get("execution_result")
795
+
796
+ nonce = result.data.get("nonce")
797
+ if nonce is not None and result.data.get("release_nonce"):
798
+ ctx.nonce_manager.release(ctx.signer_address, int(nonce))
799
+
800
+ if result.data.get("simulation_error") is not None:
801
+ simulation_error = result.data["simulation_error"]
802
+ job = ctx.job
803
+ return StageOutcome(done=True, final=self._handle_simulation_failure(job, intent, simulation_error))
804
+
805
+ if execution_result == ExecutionResult.DEADLINE_EXPIRED:
485
806
  transition_intent(
486
- self._db,
807
+ ctx.db,
487
808
  intent.intent_id,
488
809
  IntentStatus.ABANDONED,
489
810
  "deadline_expired",
490
- chain_id=self._chain_id,
811
+ chain_id=ctx.chain_id,
491
812
  )
492
- if self._lifecycle:
493
- self._lifecycle.on_failed(
813
+ if ctx.lifecycle:
814
+ ctx.lifecycle.on_failed(
494
815
  intent,
495
816
  None,
496
- TimeoutError("Intent deadline expired"),
817
+ error_obj or TimeoutError("Intent deadline expired"),
497
818
  failure_type=FailureType.DEADLINE_EXPIRED,
498
819
  failure_stage=FailureStage.PRE_BROADCAST,
499
820
  )
500
- return ExecutionOutcome(
501
- result=ExecutionResult.DEADLINE_EXPIRED,
821
+ return StageOutcome(
822
+ done=True,
823
+ final=ExecutionOutcome(
824
+ result=ExecutionResult.DEADLINE_EXPIRED,
825
+ intent=intent,
826
+ attempt=None,
827
+ error=error_obj or TimeoutError("Intent deadline expired"),
828
+ ),
829
+ )
830
+
831
+ if execution_result == ExecutionResult.BLOCKED:
832
+ return StageOutcome(
833
+ done=True,
834
+ final=ExecutionOutcome(
835
+ result=ExecutionResult.BLOCKED,
836
+ intent=intent,
837
+ attempt=None,
838
+ error=error_obj,
839
+ ),
840
+ )
841
+
842
+ failure_type = result.data.get("failure_type")
843
+ failure_stage = result.data.get("failure_stage")
844
+ if error_obj is not None and failure_type and ctx.lifecycle:
845
+ ctx.lifecycle.on_failed(
846
+ intent, None, error_obj,
847
+ failure_type=failure_type,
848
+ failure_stage=failure_stage or FailureStage.PRE_BROADCAST,
849
+ cleanup_trigger=False,
850
+ )
851
+
852
+ if not result.fatal:
853
+ _retry_intent(result.reason)
854
+
855
+ return StageOutcome(
856
+ done=True,
857
+ final=ExecutionOutcome(
858
+ result=ExecutionResult.FAILED,
502
859
  intent=intent,
503
860
  attempt=None,
504
- error=TimeoutError("Intent deadline expired"),
505
- )
861
+ error=error_obj,
862
+ ),
863
+ )
864
+
865
+ return StageOutcome(
866
+ done=True,
867
+ final=ExecutionOutcome(
868
+ result=ExecutionResult.FAILED,
869
+ intent=intent,
870
+ attempt=None,
871
+ error=RuntimeError("Unknown stage result"),
872
+ ),
873
+ )
874
+
875
+ def _stage_gap_check(self, ctx: RunContext, deadline: Deadline) -> StageResult:
876
+ intent = ctx.intent
877
+ if intent.deadline_ts and datetime.now(timezone.utc) > intent.deadline_ts:
878
+ err = TimeoutError("Intent deadline expired")
879
+ return Fail(
880
+ "deadline_expired",
881
+ True,
882
+ {
883
+ "execution_result": ExecutionResult.DEADLINE_EXPIRED,
884
+ "error": serialize_error(err),
885
+ "exception": err,
886
+ },
887
+ )
506
888
 
507
- # 1.5 Pre-flight gap check - don't reserve if signer is blocked
508
889
  try:
509
- is_blocked, oldest_nonce, oldest_age = self._check_nonce_gap(signer_address)
890
+ is_blocked, oldest_nonce, oldest_age = self._check_nonce_gap(
891
+ ctx.signer_address,
892
+ deadline=deadline,
893
+ )
510
894
  except Exception as e:
511
- # Fail-safe: if we cannot validate nonce-gap safety, do NOT proceed
512
- logger.warning(
895
+ ctx.logger.warning(
513
896
  "nonce.gap_check_failed",
514
897
  intent_id=str(intent.intent_id),
515
- signer=signer_address,
898
+ signer=ctx.signer_address,
516
899
  error=str(e)[:100],
517
900
  )
518
- _retry_intent("nonce_gap_check_failed")
519
- return ExecutionOutcome(
520
- result=ExecutionResult.FAILED,
521
- intent=intent,
522
- attempt=None,
523
- error=e,
901
+ decision = retry_policy.decide(StageName.GAP_CHECK.value, e)
902
+ return Retry(
903
+ StageName.GAP_CHECK,
904
+ decision or RetryDecision(None, reason="nonce_gap_check_failed"),
905
+ {"error": serialize_error(e), "exception": e},
524
906
  )
525
907
 
526
908
  if is_blocked:
527
- gap_duration = self._get_gap_duration(signer_address)
528
-
529
- logger.warning(
909
+ err = RuntimeError(
910
+ f"Nonce gap detected for {ctx.signer_address}, waiting for TxReplacer"
911
+ )
912
+ gap_duration = self._get_gap_duration(ctx.signer_address)
913
+ ctx.logger.warning(
530
914
  "nonce.gap_blocked",
531
915
  intent_id=str(intent.intent_id),
532
916
  job_id=intent.job_id,
533
- signer=signer_address,
917
+ signer=ctx.signer_address,
534
918
  blocked_duration_seconds=gap_duration,
535
919
  oldest_in_flight_nonce=oldest_nonce,
536
920
  oldest_in_flight_age_seconds=oldest_age,
537
921
  )
538
922
 
539
- # Check config for unsafe reset mode
540
- if self._config.allow_unsafe_nonce_reset:
541
- logger.warning("nonce.unsafe_reset_triggered", signer=signer_address)
542
- self._nonce_manager.reconcile(signer_address)
543
- self._clear_gap_tracking(signer_address)
544
- # Fall through to normal execution
923
+ if ctx.config.allow_unsafe_nonce_reset:
924
+ ctx.logger.warning("nonce.unsafe_reset_triggered", signer=ctx.signer_address)
925
+ ctx.nonce_manager.force_reset(
926
+ ctx.signer_address,
927
+ source="executor",
928
+ reason=f"allow_unsafe_nonce_reset=True, gap_duration={gap_duration}s",
929
+ )
545
930
  else:
546
- # Alert if blocked too long
547
- if gap_duration > self._config.nonce_gap_alert_seconds:
548
- self._alert_nonce_gap(signer_address, gap_duration, oldest_nonce, oldest_age)
549
-
550
- # Return BLOCKED - don't reserve, don't retry immediately
551
- # Let TxReplacer handle recovery via fee bumping
552
- return ExecutionOutcome(
553
- result=ExecutionResult.BLOCKED,
554
- intent=intent,
555
- attempt=None,
556
- error=RuntimeError(
557
- f"Nonce gap detected for {signer_address}, waiting for TxReplacer"
558
- ),
931
+ if gap_duration > ctx.config.nonce_gap_alert_seconds:
932
+ self._alert_nonce_gap(ctx.signer_address, gap_duration, oldest_nonce, oldest_age)
933
+ return Fail(
934
+ "nonce_gap_blocked",
935
+ True,
936
+ {
937
+ "execution_result": ExecutionResult.BLOCKED,
938
+ "error": serialize_error(err),
939
+ "exception": err,
940
+ },
559
941
  )
560
942
 
561
- # 2. Reserve nonce
943
+ return Ok(StageName.RESERVE_NONCE, {})
944
+
945
+ def _stage_reserve_nonce(self, ctx: RunContext, deadline: Deadline) -> StageResult:
562
946
  try:
563
- nonce = self._nonce_manager.reserve_nonce(
564
- signer_address,
565
- intent_id=intent.intent_id,
947
+ nonce = ctx.nonce_manager.reserve_nonce(
948
+ ctx.signer_address,
949
+ intent_id=ctx.intent.intent_id,
950
+ deadline=deadline,
566
951
  )
567
952
  except Exception as e:
568
- logger.error(
953
+ ctx.logger.error(
569
954
  "nonce.reservation_failed",
570
- intent_id=str(intent.intent_id),
571
- signer=signer_address,
955
+ intent_id=str(ctx.intent.intent_id),
956
+ signer=ctx.signer_address,
572
957
  error=str(e),
573
958
  )
574
- if self._lifecycle:
575
- self._lifecycle.on_failed(
576
- intent, None, e,
577
- failure_type=FailureType.NONCE_FAILED,
578
- failure_stage=FailureStage.PRE_BROADCAST,
579
- cleanup_trigger=False,
580
- )
581
- _retry_intent("nonce_reservation_failed")
582
- return ExecutionOutcome(
583
- result=ExecutionResult.FAILED,
584
- intent=intent,
585
- attempt=None,
586
- error=e,
959
+ decision = retry_policy.decide(StageName.RESERVE_NONCE.value, e)
960
+ return Retry(
961
+ StageName.RESERVE_NONCE,
962
+ decision or RetryDecision(None, reason="nonce_reservation_failed"),
963
+ {
964
+ "error": serialize_error(e),
965
+ "exception": e,
966
+ "failure_type": FailureType.NONCE_FAILED,
967
+ "failure_stage": FailureStage.PRE_BROADCAST,
968
+ },
587
969
  )
588
970
 
589
- # NOTE: Gap detection moved to pre-flight check (step 1.5)
590
- # The pre-flight check returns BLOCKED if there's a nonce gap,
591
- # allowing TxReplacer to handle recovery instead of auto-abandoning.
971
+ return Ok(StageName.BUILD_TX, {"nonce": nonce})
592
972
 
593
- # 3. Estimate gas
594
- job = self._jobs.get(intent.job_id) if self._jobs else None
973
+ def _stage_build_tx(self, ctx: RunContext, data: dict[str, object], deadline: Deadline) -> StageResult:
974
+ nonce = int(data["nonce"])
595
975
  try:
596
- gas_params = self.estimate_gas(intent, signer_address, to_address, job=job)
976
+ gas_params = self.estimate_gas(
977
+ ctx.intent,
978
+ ctx.signer_address,
979
+ ctx.to_address,
980
+ job=ctx.job,
981
+ deadline=deadline,
982
+ )
597
983
  except Exception as e:
984
+ if isinstance(e, RPCRetryableError):
985
+ decision = retry_policy.decide(StageName.BUILD_TX.value, e)
986
+ return Retry(
987
+ StageName.BUILD_TX,
988
+ decision or RetryDecision(None, reason="rpc_timeout"),
989
+ {
990
+ "error": serialize_error(e),
991
+ "exception": e,
992
+ "nonce": nonce,
993
+ "release_nonce": True,
994
+ },
995
+ )
598
996
  if "RetriableExecutionError" in type(e).__name__ or "No gas quote" in str(e):
599
- logger.warning(
997
+ ctx.logger.warning(
600
998
  "gas.no_quote_available",
601
- intent_id=str(intent.intent_id),
602
- job_id=intent.job_id,
999
+ intent_id=str(ctx.intent.intent_id),
1000
+ job_id=ctx.intent.job_id,
603
1001
  error=str(e),
604
1002
  )
605
- # Release nonce before retry
606
- self._nonce_manager.release(signer_address, nonce)
607
- _retry_intent("no_gas_quote")
608
- return ExecutionOutcome(
609
- result=ExecutionResult.FAILED,
610
- intent=intent,
611
- attempt=None,
612
- error=e,
1003
+ decision = retry_policy.decide(StageName.BUILD_TX.value, e)
1004
+ return Retry(
1005
+ StageName.BUILD_TX,
1006
+ decision or RetryDecision(None, reason="no_gas_quote"),
1007
+ {
1008
+ "error": serialize_error(e),
1009
+ "exception": e,
1010
+ "nonce": nonce,
1011
+ "release_nonce": True,
1012
+ },
613
1013
  )
614
- raise
1014
+ return Fail(
1015
+ "estimate_gas_failed",
1016
+ True,
1017
+ {
1018
+ "error": serialize_error(e),
1019
+ "exception": e,
1020
+ "nonce": nonce,
1021
+ "release_nonce": True,
1022
+ },
1023
+ )
615
1024
 
616
- # 4. Build tx dict for simulation
617
- tx_dict = self._build_tx_dict(intent, nonce, gas_params, to_address)
618
- tx_dict["from"] = signer_address # Required for simulation
1025
+ tx_dict = self._build_tx_dict(ctx.intent, nonce, gas_params, ctx.to_address)
1026
+ tx_dict["from"] = ctx.signer_address
1027
+ return Ok(StageName.SIMULATE, {"nonce": nonce, "gas_params": gas_params, "tx_dict": tx_dict})
619
1028
 
620
- # 5. Simulation step (runs unless job opts out)
621
- if job and not getattr(job, "disable_simulation", False):
622
- try:
623
- self._simulate_with_retry(job, intent, tx_dict)
624
- except (SimulationReverted, SimulationNetworkError) as e:
625
- # Release nonce on simulation failure
626
- self._nonce_manager.release(signer_address, nonce)
627
- return self._handle_simulation_failure(job, intent, e)
1029
+ def _stage_simulate(self, ctx: RunContext, data: dict[str, object], deadline: Deadline) -> StageResult:
1030
+ if ctx.job is None or getattr(ctx.job, "disable_simulation", False):
1031
+ return Ok(StageName.SIGN, data)
628
1032
 
629
- # 6. Sign transaction (only if simulation passed)
630
1033
  try:
631
- signed_tx = self._keystore.sign_transaction(
632
- tx_dict,
633
- signer_address,
1034
+ self._simulate_with_retry(ctx.job, ctx.intent, data["tx_dict"], deadline)
1035
+ except (SimulationReverted, SimulationNetworkError) as e:
1036
+ return Fail(
1037
+ "simulation_failed",
1038
+ True,
1039
+ {"simulation_error": e, "nonce": data.get("nonce"), "release_nonce": True},
634
1040
  )
1041
+ return Ok(StageName.SIGN, data)
1042
+
1043
+ def _stage_sign(self, ctx: RunContext, data: dict[str, object], deadline: Deadline) -> StageResult:
1044
+ nonce = int(data["nonce"])
1045
+ gas_params = data["gas_params"]
1046
+ tx_dict = data["tx_dict"]
1047
+ try:
1048
+ signed_tx = ctx.keystore.sign_transaction(tx_dict, ctx.signer_address)
635
1049
  except Exception as e:
636
- logger.error(
1050
+ ctx.logger.error(
637
1051
  "tx.sign_failed",
638
- intent_id=str(intent.intent_id),
639
- job_id=intent.job_id,
1052
+ intent_id=str(ctx.intent.intent_id),
1053
+ job_id=ctx.intent.job_id,
640
1054
  error=str(e),
641
1055
  )
642
- # Release nonce on sign failure
643
- self._nonce_manager.release(signer_address, nonce)
644
- if self._lifecycle:
645
- self._lifecycle.on_failed(
646
- intent, None, e,
647
- failure_type=FailureType.SIGN_FAILED,
648
- failure_stage=FailureStage.PRE_BROADCAST,
649
- cleanup_trigger=False,
650
- )
651
- _retry_intent("sign_failed")
652
- return ExecutionOutcome(
653
- result=ExecutionResult.FAILED,
654
- intent=intent,
655
- attempt=None,
656
- error=e,
1056
+ decision = retry_policy.decide(StageName.SIGN.value, e)
1057
+ return Retry(
1058
+ StageName.SIGN,
1059
+ decision or RetryDecision(None, reason="sign_failed"),
1060
+ {
1061
+ "error": serialize_error(e),
1062
+ "exception": e,
1063
+ "nonce": nonce,
1064
+ "release_nonce": True,
1065
+ "failure_type": FailureType.SIGN_FAILED,
1066
+ "failure_stage": FailureStage.PRE_BROADCAST,
1067
+ },
657
1068
  )
658
1069
 
659
- # Warn if priority fee is suspiciously low (< 0.1 gwei)
1070
+ tx_hash = self._compute_signed_tx_hash(signed_tx)
660
1071
  if gas_params.max_priority_fee_per_gas < 100_000_000:
661
- logger.warning(
1072
+ ctx.logger.warning(
662
1073
  "gas.priority_fee_very_low",
663
- intent_id=str(intent.intent_id),
664
- job_id=intent.job_id,
1074
+ intent_id=str(ctx.intent.intent_id),
1075
+ job_id=ctx.intent.job_id,
665
1076
  priority_fee_wei=gas_params.max_priority_fee_per_gas,
666
1077
  priority_fee_gwei=gas_params.max_priority_fee_per_gas / 1e9,
667
1078
  hint="Transaction may not be included - validators receive almost no tip",
668
1079
  )
669
1080
 
670
- logger.info(
1081
+ ctx.logger.info(
671
1082
  LogEvents.TX_SIGN,
672
- intent_id=str(intent.intent_id),
673
- job_id=intent.job_id,
674
- signer=signer_address,
1083
+ intent_id=str(ctx.intent.intent_id),
1084
+ job_id=ctx.intent.job_id,
1085
+ signer=ctx.signer_address,
675
1086
  nonce=nonce,
676
1087
  gas_limit=gas_params.gas_limit,
677
1088
  max_fee=gas_params.max_fee_per_gas,
678
1089
  priority_fee=gas_params.max_priority_fee_per_gas,
679
1090
  )
680
1091
 
681
- # 7. Broadcast with RPC group routing
682
- attempt: TxAttempt | None = None
683
- attempt_id = uuid4()
684
- tx_hash: str | None = None
685
- endpoint_url: str | None = None
686
-
687
- try:
688
- # Update intent status to sending
689
- if not transition_intent(
690
- self._db,
691
- intent.intent_id,
692
- IntentStatus.SENDING,
693
- "broadcast_start",
694
- chain_id=self._chain_id,
695
- ):
696
- raise RuntimeError("Intent status not claimable for sending")
697
-
698
- # Check for existing binding (for retry isolation)
699
- binding = self._db.get_broadcast_binding(intent.intent_id)
700
- job_id = job.job_id if job else None
701
-
702
- if binding is not None:
703
- # RETRY: Use persisted endpoints (NEVER current config)
704
- group_name, endpoints = binding
705
- is_first_broadcast = False
706
-
707
- # Advisory log if job's config changed
708
- if job:
709
- from brawny.config.routing import resolve_job_groups
710
-
711
- _, job_broadcast_group = resolve_job_groups(self._config, job)
712
- if job_broadcast_group != group_name:
713
- logger.warning(
714
- "broadcast_group_mismatch",
715
- intent_id=str(intent.intent_id),
716
- job_id=job_id,
717
- persisted_group=group_name,
718
- current_job_group=job_broadcast_group,
719
- )
720
- else:
721
- # FIRST BROADCAST: Resolve group + endpoints from config (no silent fallback)
722
- if job is None:
723
- from brawny.config.routing import resolve_default_group
724
-
725
- group_name = resolve_default_group(self._config)
726
- else:
727
- from brawny.config.routing import resolve_job_groups
728
-
729
- _, group_name = resolve_job_groups(self._config, job)
730
- endpoints = self._config.rpc_groups[group_name].endpoints
731
-
732
- is_first_broadcast = True
1092
+ data.update({"signed_tx": signed_tx, "tx_hash": tx_hash, "nonce": nonce, "gas_params": gas_params})
1093
+ return Ok(StageName.BROADCAST, data)
733
1094
 
734
- # Broadcast transaction using RPC groups
735
- from brawny._rpc.broadcast import broadcast_transaction
736
- from brawny._rpc.errors import RPCGroupUnavailableError
1095
+ def _stage_broadcast(self, ctx: RunContext, data: dict[str, object], deadline: Deadline) -> StageResult:
1096
+ signed_tx = data["signed_tx"]
1097
+ endpoints = data["endpoints"]
1098
+ group_name = data["broadcast_group"]
1099
+ job_id = ctx.job.job_id if ctx.job else None
737
1100
 
1101
+ resume_pending_send = bool(data.get("resume_pending_send"))
1102
+ if resume_pending_send and data.get("tx_hash"):
738
1103
  try:
739
- tx_hash, endpoint_url = broadcast_transaction(
740
- raw_tx=signed_tx.raw_transaction,
741
- endpoints=endpoints,
742
- group_name=group_name,
743
- config=self._config,
1104
+ exists = self._probe_pending_send(str(data["tx_hash"]), deadline)
1105
+ except Exception as e:
1106
+ ctx.logger.warning(
1107
+ "broadcast.probe_failed",
1108
+ intent_id=str(ctx.intent.intent_id),
744
1109
  job_id=job_id,
1110
+ error=str(e)[:200],
745
1111
  )
746
- except RPCGroupUnavailableError as e:
747
- logger.error(
748
- "broadcast_unavailable",
749
- intent_id=str(intent.intent_id),
750
- job_id=job_id,
751
- broadcast_group=group_name,
752
- endpoints=endpoints,
753
- error=str(e.last_error) if e.last_error else None,
1112
+ return Retry(
1113
+ StageName.BROADCAST,
1114
+ RetryDecision(None, reason="probe_unknown"),
1115
+ {
1116
+ "error": serialize_error(e),
1117
+ "exception": e,
1118
+ "nonce": data.get("nonce"),
1119
+ "attempt_id": data.get("attempt_id"),
1120
+ },
754
1121
  )
755
- raise
1122
+ if exists:
1123
+ return Ok(StageName.FINALIZE, {**data, "already_known": True, "endpoint_url": None})
756
1124
 
757
- # Create attempt record (+ binding if first broadcast)
758
- current_block = self._rpc.get_block_number()
759
- attempt = self._db.create_attempt(
760
- attempt_id=attempt_id,
761
- intent_id=intent.intent_id,
762
- nonce=nonce,
763
- gas_params_json=gas_params.to_json(),
764
- status=AttemptStatus.BROADCAST.value,
765
- tx_hash=tx_hash,
766
- broadcast_group=group_name,
767
- endpoint_url=endpoint_url,
768
- binding=(group_name, endpoints) if is_first_broadcast else None,
1125
+ nonce = int(data["nonce"])
1126
+ try:
1127
+ ctx.db.require_bound_and_attempt(ctx.intent.intent_id, nonce, endpoints)
1128
+ except InvariantViolation as exc:
1129
+ ctx.logger.error(
1130
+ "broadcast.invariant_violation",
1131
+ intent_id=str(ctx.intent.intent_id),
1132
+ job_id=job_id,
1133
+ error=str(exc)[:200],
1134
+ )
1135
+ transition_intent(
1136
+ ctx.db,
1137
+ ctx.intent.intent_id,
1138
+ IntentStatus.FAILED,
1139
+ "missing_binding_or_attempt",
1140
+ chain_id=ctx.chain_id,
1141
+ )
1142
+ return Fail(
1143
+ "missing_binding_or_attempt",
1144
+ True,
1145
+ {
1146
+ "error": serialize_error(exc),
1147
+ "exception": exc,
1148
+ "failure_type": FailureType.UNKNOWN,
1149
+ "failure_stage": FailureStage.PRE_BROADCAST,
1150
+ },
769
1151
  )
770
1152
 
771
- # Update attempt with broadcast block and time
772
- self._db.update_attempt_status(
773
- attempt_id,
774
- AttemptStatus.BROADCAST.value,
775
- broadcast_block=current_block,
776
- broadcast_at=datetime.now(timezone.utc),
1153
+ from brawny._rpc.broadcast import broadcast_transaction
1154
+ from brawny._rpc.errors import RPCGroupUnavailableError
1155
+
1156
+ try:
1157
+ tx_hash, endpoint_url = broadcast_transaction(
1158
+ raw_tx=signed_tx.raw_transaction,
1159
+ endpoints=endpoints,
1160
+ group_name=group_name,
1161
+ config=ctx.config,
1162
+ job_id=job_id,
1163
+ deadline=deadline,
1164
+ )
1165
+ except RPCGroupUnavailableError as e:
1166
+ ctx.logger.error(
1167
+ "broadcast_unavailable",
1168
+ intent_id=str(ctx.intent.intent_id),
1169
+ job_id=job_id,
1170
+ broadcast_group=group_name,
1171
+ endpoints=endpoints,
1172
+ error=str(e.last_error) if e.last_error else None,
1173
+ )
1174
+ return Fail(
1175
+ "broadcast_failed",
1176
+ False,
1177
+ {
1178
+ "error": serialize_error(e),
1179
+ "exception": e,
1180
+ "nonce": data.get("nonce"),
1181
+ "attempt_id": data.get("attempt_id"),
1182
+ },
1183
+ )
1184
+ except (RPCError, DatabaseError, OSError, ValueError, RuntimeError) as e:
1185
+ ctx.logger.error(
1186
+ "tx.broadcast_failed",
1187
+ intent_id=str(ctx.intent.intent_id),
1188
+ job_id=job_id,
1189
+ attempt_id=str(data.get("attempt_id")) if data.get("attempt_id") else None,
1190
+ error=str(e),
1191
+ )
1192
+ return Fail(
1193
+ "broadcast_failed",
1194
+ False,
1195
+ {
1196
+ "error": serialize_error(e),
1197
+ "exception": e,
1198
+ "nonce": data.get("nonce"),
1199
+ "attempt_id": data.get("attempt_id"),
1200
+ },
777
1201
  )
778
1202
 
779
- # Mark nonce as in-flight
780
- self._nonce_manager.mark_in_flight(signer_address, nonce, intent.intent_id)
1203
+ return Ok(StageName.FINALIZE, {**data, "endpoint_url": endpoint_url, "tx_hash_rpc": tx_hash})
781
1204
 
782
- # Update intent to pending
783
- if not transition_intent(
784
- self._db,
1205
+ def _stage_monitor_tick(self, ctx: RunContext, data: dict[str, object], deadline: Deadline) -> StageResult:
1206
+ return Ok(StageName.FINALIZE, data)
1207
+
1208
+ def _apply_sign_result(
1209
+ self,
1210
+ ctx: RunContext,
1211
+ data: dict[str, object],
1212
+ _retry_intent: Callable[[str], None],
1213
+ ) -> StageOutcome:
1214
+ intent = ctx.intent
1215
+ nonce = int(data["nonce"])
1216
+ gas_params = data["gas_params"]
1217
+ tx_hash = data["tx_hash"]
1218
+
1219
+ if not transition_intent(
1220
+ ctx.db,
1221
+ intent.intent_id,
1222
+ IntentStatus.SENDING,
1223
+ "broadcast_start",
1224
+ chain_id=ctx.chain_id,
1225
+ ):
1226
+ _retry_intent("broadcast_start_failed")
1227
+ return StageOutcome(
1228
+ done=True,
1229
+ final=ExecutionOutcome(
1230
+ result=ExecutionResult.FAILED,
1231
+ intent=intent,
1232
+ attempt=None,
1233
+ error=RuntimeError("Intent status not claimable for sending"),
1234
+ ),
1235
+ )
1236
+
1237
+ attempt = self._find_attempt_by_hash(intent.intent_id, tx_hash)
1238
+ attempt_preexisting = attempt is not None
1239
+ if attempt and attempt.status in (
1240
+ AttemptStatus.BROADCAST.value,
1241
+ AttemptStatus.PENDING.value,
1242
+ AttemptStatus.CONFIRMED.value,
1243
+ ):
1244
+ ctx.nonce_manager.mark_in_flight(ctx.signer_address, nonce, intent.intent_id)
1245
+ transition_intent(
1246
+ ctx.db,
785
1247
  intent.intent_id,
786
1248
  IntentStatus.PENDING,
787
1249
  "broadcast_complete",
788
- chain_id=self._chain_id,
789
- ):
790
- raise RuntimeError("Intent status not in sending state")
791
-
792
- logger.info(
793
- LogEvents.TX_BROADCAST,
794
- intent_id=str(intent.intent_id),
795
- job_id=intent.job_id,
796
- attempt_id=str(attempt_id),
797
- tx_hash=tx_hash,
798
- signer=signer_address,
799
- nonce=nonce,
800
- broadcast_group=group_name,
801
- endpoint_url=endpoint_url[:50] if endpoint_url else None,
1250
+ chain_id=ctx.chain_id,
802
1251
  )
803
- metrics = get_metrics()
804
- metrics.counter(TX_BROADCAST).inc(
805
- chain_id=self._chain_id,
806
- job_id=intent.job_id,
1252
+ return StageOutcome(
1253
+ done=True,
1254
+ final=ExecutionOutcome(
1255
+ result=ExecutionResult.PENDING,
1256
+ intent=intent,
1257
+ attempt=attempt,
1258
+ tx_hash=attempt.tx_hash,
1259
+ ),
807
1260
  )
808
1261
 
809
- # Refresh attempt
810
- attempt = self._db.get_attempt(attempt_id)
811
- if self._lifecycle and attempt is not None:
812
- self._lifecycle.on_submitted(intent, attempt)
1262
+ group_name, endpoints = self._resolve_broadcast_binding(ctx)
813
1263
 
814
- except (RPCError, DatabaseError, OSError, ValueError, RuntimeError) as e:
815
- # Expected broadcast-related errors - handle gracefully
816
- logger.error(
817
- "tx.broadcast_failed",
818
- intent_id=str(intent.intent_id),
819
- job_id=intent.job_id,
820
- attempt_id=str(attempt_id),
821
- error=str(e),
822
- )
1264
+ attempt_id = attempt.attempt_id if attempt else uuid4()
1265
+ if attempt is None:
1266
+ try:
1267
+ attempt = ctx.db.create_attempt_once(
1268
+ attempt_id=attempt_id,
1269
+ intent_id=intent.intent_id,
1270
+ nonce=nonce,
1271
+ gas_params_json=gas_params.to_json(),
1272
+ status=AttemptStatus.PENDING_SEND.value,
1273
+ tx_hash=tx_hash,
1274
+ broadcast_group=group_name,
1275
+ endpoint_url=None,
1276
+ binding=(group_name, endpoints),
1277
+ actor=intent.job_id,
1278
+ reason="initial_attempt",
1279
+ source="executor",
1280
+ )
1281
+ except InvariantViolation as e:
1282
+ ctx.logger.error(
1283
+ "broadcast.binding_failed",
1284
+ intent_id=str(intent.intent_id),
1285
+ job_id=intent.job_id,
1286
+ error=str(e)[:200],
1287
+ )
1288
+ transition_intent(
1289
+ ctx.db,
1290
+ intent.intent_id,
1291
+ IntentStatus.FAILED,
1292
+ "binding_failed",
1293
+ chain_id=ctx.chain_id,
1294
+ )
1295
+ return StageOutcome(
1296
+ done=True,
1297
+ final=ExecutionOutcome(
1298
+ result=ExecutionResult.FAILED,
1299
+ intent=intent,
1300
+ attempt=None,
1301
+ error=e,
1302
+ ),
1303
+ )
1304
+ except Exception as e:
1305
+ ctx.logger.error(
1306
+ "attempt.create_failed",
1307
+ intent_id=str(intent.intent_id),
1308
+ job_id=intent.job_id,
1309
+ error=str(e)[:200],
1310
+ )
1311
+ _retry_intent("attempt_create_failed")
1312
+ return StageOutcome(
1313
+ done=True,
1314
+ final=ExecutionOutcome(
1315
+ result=ExecutionResult.FAILED,
1316
+ intent=intent,
1317
+ attempt=None,
1318
+ error=e,
1319
+ ),
1320
+ )
1321
+
1322
+ data.update(
1323
+ {
1324
+ "attempt_id": attempt_id,
1325
+ "broadcast_group": group_name,
1326
+ "endpoints": endpoints,
1327
+ "resume_pending_send": attempt_preexisting and attempt.status in (
1328
+ AttemptStatus.PENDING_SEND.value,
1329
+ AttemptStatus.SIGNED.value,
1330
+ ),
1331
+ }
1332
+ )
1333
+ return StageOutcome(done=False, next_stage=StageName.BROADCAST, data=data)
1334
+
1335
+ def _apply_broadcast_result(
1336
+ self,
1337
+ ctx: RunContext,
1338
+ data: dict[str, object],
1339
+ _retry_intent: Callable[[str], None],
1340
+ ) -> StageOutcome:
1341
+ intent = ctx.intent
1342
+ attempt_id = data.get("attempt_id")
1343
+ nonce = int(data["nonce"])
1344
+
1345
+ if data.get("error") is not None:
1346
+ error = data["error"]
1347
+ error_obj = self._error_from_data(data)
823
1348
  metrics = get_metrics()
824
1349
  metrics.counter(TX_FAILED).inc(
825
- chain_id=self._chain_id,
1350
+ chain_id=ctx.chain_id,
826
1351
  job_id=intent.job_id,
827
1352
  reason="broadcast_failed",
828
1353
  )
829
-
830
- # Create failed attempt record if we haven't yet
831
- if attempt is None:
832
- try:
833
- attempt = self._db.create_attempt(
834
- attempt_id=attempt_id,
835
- intent_id=intent.intent_id,
836
- nonce=nonce,
837
- gas_params_json=gas_params.to_json(),
838
- status=AttemptStatus.FAILED.value,
839
- )
840
- except Exception as attempt_error:
841
- # Never silently swallow - log with full context for reconstruction
842
- # exc_info=True captures attempt_error traceback (current exception)
843
- logger.error(
844
- "attempt.write_failed",
845
- intent_id=str(intent.intent_id),
846
- nonce=nonce,
847
- tx_hash=tx_hash if "tx_hash" in dir() else None,
848
- original_error=str(e),
849
- attempt_error=str(attempt_error),
850
- attempt_error_type=type(attempt_error).__name__,
851
- exc_info=True,
852
- )
853
- metrics.counter(ATTEMPT_WRITE_FAILURES).inc(stage="broadcast_failure")
854
- # Continue with cleanup - attempt is None but we have logs
855
-
856
- if attempt is not None:
857
- self._db.update_attempt_status(
1354
+ if attempt_id is not None:
1355
+ ctx.db.update_attempt_status(
858
1356
  attempt_id,
859
1357
  AttemptStatus.FAILED.value,
860
1358
  error_code="broadcast_failed",
861
- error_detail=str(e)[:500],
1359
+ error_detail=str(error_obj or error)[:500],
862
1360
  )
863
-
864
- # Release nonce on broadcast failure
865
- self._nonce_manager.release(signer_address, nonce)
866
-
867
- if self._lifecycle:
868
- self._lifecycle.on_failed(
869
- intent, attempt, e,
1361
+ ctx.nonce_manager.release(ctx.signer_address, nonce)
1362
+ if ctx.lifecycle:
1363
+ ctx.lifecycle.on_failed(
1364
+ intent, None, error_obj or RuntimeError(str(error)),
870
1365
  failure_type=FailureType.BROADCAST_FAILED,
871
1366
  failure_stage=FailureStage.BROADCAST,
872
1367
  cleanup_trigger=False,
873
1368
  )
874
1369
  _retry_intent("broadcast_failed")
1370
+ return StageOutcome(
1371
+ done=True,
1372
+ final=ExecutionOutcome(
1373
+ result=ExecutionResult.FAILED,
1374
+ intent=intent,
1375
+ attempt=None,
1376
+ error=error_obj or RuntimeError(str(error)),
1377
+ ),
1378
+ )
875
1379
 
876
- return ExecutionOutcome(
877
- result=ExecutionResult.FAILED,
878
- intent=intent,
879
- attempt=attempt,
880
- error=e,
1380
+ tx_hash = data.get("tx_hash_rpc") or data.get("tx_hash")
1381
+ endpoint_url = data.get("endpoint_url")
1382
+
1383
+ if attempt_id is not None:
1384
+ broadcast_deadline = ctx.deadline.child(STAGE_TIMEOUT_SECONDS[StageName.BROADCAST])
1385
+ current_block = ctx.rpc.get_block_number(deadline=broadcast_deadline)
1386
+ ctx.db.update_attempt_status(
1387
+ attempt_id,
1388
+ AttemptStatus.BROADCAST.value,
1389
+ tx_hash=str(tx_hash) if tx_hash else None,
1390
+ broadcast_block=current_block,
1391
+ broadcast_at=datetime.now(timezone.utc),
1392
+ endpoint_url=endpoint_url,
881
1393
  )
882
1394
 
883
- return ExecutionOutcome(
884
- result=ExecutionResult.PENDING,
885
- intent=intent,
886
- attempt=attempt,
1395
+ ctx.nonce_manager.mark_in_flight(ctx.signer_address, nonce, intent.intent_id)
1396
+
1397
+ if not transition_intent(
1398
+ ctx.db,
1399
+ intent.intent_id,
1400
+ IntentStatus.PENDING,
1401
+ "broadcast_complete",
1402
+ chain_id=ctx.chain_id,
1403
+ ):
1404
+ return StageOutcome(
1405
+ done=True,
1406
+ final=ExecutionOutcome(
1407
+ result=ExecutionResult.FAILED,
1408
+ intent=intent,
1409
+ attempt=None,
1410
+ error=RuntimeError("Intent status not in sending state"),
1411
+ ),
1412
+ )
1413
+
1414
+ ctx.logger.info(
1415
+ LogEvents.TX_BROADCAST,
1416
+ intent_id=str(intent.intent_id),
1417
+ job_id=intent.job_id,
1418
+ attempt_id=str(attempt_id) if attempt_id else None,
887
1419
  tx_hash=tx_hash,
1420
+ signer=ctx.signer_address,
1421
+ nonce=nonce,
1422
+ broadcast_group=data.get("broadcast_group"),
1423
+ endpoint_url=str(endpoint_url)[:50] if endpoint_url else None,
1424
+ )
1425
+ metrics = get_metrics()
1426
+ metrics.counter(TX_BROADCAST).inc(
1427
+ chain_id=ctx.chain_id,
1428
+ job_id=intent.job_id,
888
1429
  )
889
1430
 
1431
+ attempt = ctx.db.get_attempt(attempt_id) if attempt_id else None
1432
+ if ctx.lifecycle and attempt is not None:
1433
+ ctx.lifecycle.on_submitted(intent, attempt)
1434
+
1435
+ return StageOutcome(
1436
+ done=True,
1437
+ final=ExecutionOutcome(
1438
+ result=ExecutionResult.PENDING,
1439
+ intent=intent,
1440
+ attempt=attempt,
1441
+ tx_hash=tx_hash,
1442
+ ),
1443
+ )
1444
+
1445
+ def _resolve_broadcast_binding(
1446
+ self,
1447
+ ctx: RunContext,
1448
+ ) -> tuple[str, list[str]]:
1449
+ binding = ctx.db.get_broadcast_binding(ctx.intent.intent_id)
1450
+ job_id = ctx.job.job_id if ctx.job else None
1451
+
1452
+ if binding is not None:
1453
+ group_name, endpoints = binding
1454
+ if ctx.job:
1455
+ from brawny.config.routing import resolve_job_groups
1456
+
1457
+ _, job_broadcast_group = resolve_job_groups(ctx.config, ctx.job)
1458
+ if job_broadcast_group != group_name:
1459
+ ctx.logger.warning(
1460
+ "broadcast_group_mismatch",
1461
+ intent_id=str(ctx.intent.intent_id),
1462
+ job_id=job_id,
1463
+ persisted_group=group_name,
1464
+ current_job_group=job_broadcast_group,
1465
+ )
1466
+ else:
1467
+ if ctx.job is None:
1468
+ from brawny.config.routing import resolve_default_group
1469
+
1470
+ group_name = resolve_default_group(ctx.config)
1471
+ else:
1472
+ from brawny.config.routing import resolve_job_groups
1473
+
1474
+ _, group_name = resolve_job_groups(ctx.config, ctx.job)
1475
+ endpoints = ctx.config.rpc_groups[group_name].endpoints
1476
+
1477
+ return group_name, canonicalize_endpoints(endpoints)
1478
+
1479
+ def _find_attempt_by_hash(self, intent_id: UUID, tx_hash: str) -> TxAttempt | None:
1480
+ attempts = self._db.get_attempts_for_intent(intent_id)
1481
+ for attempt in attempts:
1482
+ if attempt.tx_hash and attempt.tx_hash.lower() == tx_hash.lower():
1483
+ return attempt
1484
+ return None
1485
+
1486
+ def _compute_signed_tx_hash(self, signed_tx: object) -> str:
1487
+ if hasattr(signed_tx, "hash"):
1488
+ tx_hash = signed_tx.hash
1489
+ if hasattr(tx_hash, "hex"):
1490
+ return f"0x{tx_hash.hex()}"
1491
+ return str(tx_hash)
1492
+ return f"0x{Web3.keccak(signed_tx.raw_transaction).hex()}"
1493
+
1494
+ def _probe_pending_send(self, tx_hash: str, deadline: Deadline) -> bool:
1495
+ receipt = self._rpc.get_transaction_receipt(tx_hash, deadline=deadline)
1496
+ if receipt:
1497
+ return True
1498
+ tx = self._rpc.get_transaction_by_hash(tx_hash, deadline=deadline)
1499
+ return tx is not None
1500
+
890
1501
  def _build_tx_dict(
891
1502
  self,
892
1503
  intent: TxIntent,
@@ -930,6 +1541,7 @@ class TxExecutor:
930
1541
  job: "Job",
931
1542
  intent: TxIntent,
932
1543
  tx: dict,
1544
+ deadline: Deadline,
933
1545
  ) -> str:
934
1546
  """Simulate transaction with retry on network errors.
935
1547
 
@@ -952,8 +1564,10 @@ class TxExecutor:
952
1564
 
953
1565
  for attempt in range(MAX_SIMULATION_RETRIES + 1):
954
1566
  try:
1567
+ if deadline.expired():
1568
+ raise SimulationNetworkError("Simulation deadline exhausted")
955
1569
  # Run simulation (uses job RPC if specified)
956
- output = self._rpc.simulate_transaction(tx, rpc_url=rpc_url)
1570
+ output = self._rpc.simulate_transaction(tx, rpc_url=rpc_url, deadline=deadline)
957
1571
 
958
1572
  # Run job's custom validation (if defined)
959
1573
  if hasattr(job, "validate_simulation"):