brawny 0.1.13__py3-none-any.whl → 0.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. brawny/__init__.py +2 -0
  2. brawny/_context.py +5 -5
  3. brawny/_rpc/__init__.py +36 -12
  4. brawny/_rpc/broadcast.py +14 -13
  5. brawny/_rpc/caller.py +243 -0
  6. brawny/_rpc/client.py +539 -0
  7. brawny/_rpc/clients.py +11 -11
  8. brawny/_rpc/context.py +23 -0
  9. brawny/_rpc/errors.py +465 -31
  10. brawny/_rpc/gas.py +7 -6
  11. brawny/_rpc/pool.py +18 -0
  12. brawny/_rpc/retry.py +266 -0
  13. brawny/_rpc/retry_policy.py +81 -0
  14. brawny/accounts.py +28 -9
  15. brawny/alerts/__init__.py +15 -18
  16. brawny/alerts/abi_resolver.py +212 -36
  17. brawny/alerts/base.py +2 -2
  18. brawny/alerts/contracts.py +77 -10
  19. brawny/alerts/errors.py +30 -3
  20. brawny/alerts/events.py +38 -5
  21. brawny/alerts/health.py +19 -13
  22. brawny/alerts/send.py +513 -55
  23. brawny/api.py +39 -11
  24. brawny/assets/AGENTS.md +325 -0
  25. brawny/async_runtime.py +48 -0
  26. brawny/chain.py +3 -3
  27. brawny/cli/commands/__init__.py +2 -0
  28. brawny/cli/commands/console.py +69 -19
  29. brawny/cli/commands/contract.py +2 -2
  30. brawny/cli/commands/controls.py +121 -0
  31. brawny/cli/commands/health.py +2 -2
  32. brawny/cli/commands/job_dev.py +6 -5
  33. brawny/cli/commands/jobs.py +99 -2
  34. brawny/cli/commands/maintenance.py +13 -29
  35. brawny/cli/commands/migrate.py +1 -0
  36. brawny/cli/commands/run.py +10 -3
  37. brawny/cli/commands/script.py +8 -3
  38. brawny/cli/commands/signer.py +143 -26
  39. brawny/cli/helpers.py +0 -3
  40. brawny/cli_templates.py +25 -349
  41. brawny/config/__init__.py +4 -1
  42. brawny/config/models.py +43 -57
  43. brawny/config/parser.py +268 -57
  44. brawny/config/validation.py +52 -15
  45. brawny/daemon/context.py +4 -2
  46. brawny/daemon/core.py +185 -63
  47. brawny/daemon/loops.py +166 -98
  48. brawny/daemon/supervisor.py +261 -0
  49. brawny/db/__init__.py +14 -26
  50. brawny/db/base.py +248 -151
  51. brawny/db/global_cache.py +11 -1
  52. brawny/db/migrate.py +175 -28
  53. brawny/db/migrations/001_init.sql +4 -3
  54. brawny/db/migrations/010_add_nonce_gap_index.sql +1 -1
  55. brawny/db/migrations/011_add_job_logs.sql +1 -2
  56. brawny/db/migrations/012_add_claimed_by.sql +2 -2
  57. brawny/db/migrations/013_attempt_unique.sql +10 -0
  58. brawny/db/migrations/014_add_lease_expires_at.sql +5 -0
  59. brawny/db/migrations/015_add_signer_alias.sql +14 -0
  60. brawny/db/migrations/016_runtime_controls_and_quarantine.sql +32 -0
  61. brawny/db/migrations/017_add_job_drain.sql +6 -0
  62. brawny/db/migrations/018_add_nonce_reset_audit.sql +20 -0
  63. brawny/db/migrations/019_add_job_cooldowns.sql +8 -0
  64. brawny/db/migrations/020_attempt_unique_initial.sql +7 -0
  65. brawny/db/ops/__init__.py +3 -25
  66. brawny/db/ops/logs.py +1 -2
  67. brawny/db/queries.py +47 -91
  68. brawny/db/serialized.py +65 -0
  69. brawny/db/sqlite/__init__.py +1001 -0
  70. brawny/db/sqlite/connection.py +231 -0
  71. brawny/db/sqlite/execute.py +116 -0
  72. brawny/db/sqlite/mappers.py +190 -0
  73. brawny/db/sqlite/repos/attempts.py +372 -0
  74. brawny/db/sqlite/repos/block_state.py +102 -0
  75. brawny/db/sqlite/repos/cache.py +104 -0
  76. brawny/db/sqlite/repos/intents.py +1021 -0
  77. brawny/db/sqlite/repos/jobs.py +200 -0
  78. brawny/db/sqlite/repos/maintenance.py +182 -0
  79. brawny/db/sqlite/repos/signers_nonces.py +566 -0
  80. brawny/db/sqlite/tx.py +119 -0
  81. brawny/http.py +194 -0
  82. brawny/invariants.py +11 -24
  83. brawny/jobs/base.py +8 -0
  84. brawny/jobs/job_validation.py +2 -1
  85. brawny/keystore.py +83 -7
  86. brawny/lifecycle.py +64 -12
  87. brawny/logging.py +0 -2
  88. brawny/metrics.py +84 -12
  89. brawny/model/contexts.py +111 -9
  90. brawny/model/enums.py +1 -0
  91. brawny/model/errors.py +18 -0
  92. brawny/model/types.py +47 -131
  93. brawny/network_guard.py +133 -0
  94. brawny/networks/__init__.py +5 -5
  95. brawny/networks/config.py +1 -7
  96. brawny/networks/manager.py +14 -11
  97. brawny/runtime_controls.py +74 -0
  98. brawny/scheduler/poller.py +11 -7
  99. brawny/scheduler/reorg.py +95 -39
  100. brawny/scheduler/runner.py +442 -168
  101. brawny/scheduler/shutdown.py +3 -3
  102. brawny/script_tx.py +3 -3
  103. brawny/telegram.py +53 -7
  104. brawny/testing.py +1 -0
  105. brawny/timeout.py +38 -0
  106. brawny/tx/executor.py +922 -308
  107. brawny/tx/intent.py +54 -16
  108. brawny/tx/monitor.py +31 -12
  109. brawny/tx/nonce.py +212 -90
  110. brawny/tx/replacement.py +69 -18
  111. brawny/tx/retry_policy.py +24 -0
  112. brawny/tx/stages/types.py +75 -0
  113. brawny/types.py +18 -0
  114. brawny/utils.py +41 -0
  115. {brawny-0.1.13.dist-info → brawny-0.1.22.dist-info}/METADATA +3 -3
  116. brawny-0.1.22.dist-info/RECORD +163 -0
  117. brawny/_rpc/manager.py +0 -982
  118. brawny/_rpc/selector.py +0 -156
  119. brawny/db/base_new.py +0 -165
  120. brawny/db/mappers.py +0 -182
  121. brawny/db/migrations/008_add_transactions.sql +0 -72
  122. brawny/db/ops/attempts.py +0 -108
  123. brawny/db/ops/blocks.py +0 -83
  124. brawny/db/ops/cache.py +0 -93
  125. brawny/db/ops/intents.py +0 -296
  126. brawny/db/ops/jobs.py +0 -110
  127. brawny/db/ops/nonces.py +0 -322
  128. brawny/db/postgres.py +0 -2535
  129. brawny/db/postgres_new.py +0 -196
  130. brawny/db/sqlite.py +0 -2733
  131. brawny/db/sqlite_new.py +0 -191
  132. brawny-0.1.13.dist-info/RECORD +0 -141
  133. {brawny-0.1.13.dist-info → brawny-0.1.22.dist-info}/WHEEL +0 -0
  134. {brawny-0.1.13.dist-info → brawny-0.1.22.dist-info}/entry_points.txt +0 -0
  135. {brawny-0.1.13.dist-info → brawny-0.1.22.dist-info}/top_level.txt +0 -0
brawny/tx/nonce.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """Centralized nonce manager for transaction execution.
2
2
 
3
3
  Implements the nonce management strategy from SPEC 8:
4
- - Reserve nonce with SERIALIZABLE isolation
4
+ - Reserve nonce with SQLite BEGIN EXCLUSIVE locking
5
5
  - Nonce status transitions (reserved → in_flight → released/orphaned)
6
6
  - Reconciliation loop for startup and periodic sync
7
7
  - SQLite-specific locking for development
@@ -11,19 +11,25 @@ Jobs NEVER allocate or set nonces - the nonce manager owns all nonce operations.
11
11
 
12
12
  from __future__ import annotations
13
13
 
14
+ import time
14
15
  from contextlib import contextmanager
15
16
  from typing import TYPE_CHECKING, Generator
16
17
  from uuid import UUID
17
18
 
19
+ from cachetools import TTLCache
18
20
  from web3 import Web3
19
21
 
20
22
  from brawny.logging import LogEvents, get_logger
21
23
  from brawny.model.enums import NonceStatus
24
+ from brawny.timeout import Deadline
25
+
26
+ # Rate limit gap detection logs to once per 60 seconds per signer
27
+ GAP_LOG_RATE_LIMIT_SECONDS = 60
22
28
 
23
29
  if TYPE_CHECKING:
24
30
  from brawny.db.base import Database
25
31
  from brawny.model.types import NonceReservation
26
- from brawny._rpc.manager import RPCManager
32
+ from brawny._rpc.clients import ReadClient
27
33
 
28
34
  logger = get_logger(__name__)
29
35
 
@@ -40,7 +46,7 @@ class NonceManager:
40
46
  def __init__(
41
47
  self,
42
48
  db: Database,
43
- rpc: RPCManager,
49
+ rpc: ReadClient,
44
50
  chain_id: int,
45
51
  ) -> None:
46
52
  """Initialize nonce manager.
@@ -53,11 +59,16 @@ class NonceManager:
53
59
  self._db = db
54
60
  self._rpc = rpc
55
61
  self._chain_id = chain_id
62
+ # Rate limiting for gap detection logs: signer_address -> last_log_time
63
+ # Single-threaded access (from executor) - no lock needed
64
+ # Low cardinality keys (signer addresses): maxsize=1000, ttl=1h
65
+ self._gap_log_times: TTLCache[str, float] = TTLCache(maxsize=1000, ttl=3600)
56
66
 
57
67
  def reserve_nonce(
58
68
  self,
59
69
  signer_address: str,
60
70
  intent_id: UUID | None = None,
71
+ deadline: Deadline | None = None,
61
72
  ) -> int:
62
73
  """Reserve the next available nonce for a signer.
63
74
 
@@ -82,7 +93,9 @@ class NonceManager:
82
93
 
83
94
  try:
84
95
  chain_nonce = self._rpc.get_transaction_count(
85
- Web3.to_checksum_address(signer_address), block_identifier="pending"
96
+ Web3.to_checksum_address(signer_address),
97
+ block_identifier="pending",
98
+ deadline=deadline,
86
99
  )
87
100
  except Exception as e:
88
101
  logger.warning(
@@ -92,6 +105,34 @@ class NonceManager:
92
105
  )
93
106
  chain_nonce = None
94
107
 
108
+ signer_state = self._db.get_signer_state(self._chain_id, signer_address)
109
+ if (
110
+ signer_state
111
+ and chain_nonce is not None
112
+ and signer_state.last_synced_chain_nonce is not None
113
+ and chain_nonce < signer_state.last_synced_chain_nonce
114
+ ):
115
+ self._db.set_signer_quarantined(
116
+ self._chain_id,
117
+ signer_address,
118
+ reason="stale_chain_nonce",
119
+ source="nonce_reserve",
120
+ )
121
+ logger.warning(
122
+ "nonce.stale_chain_nonce",
123
+ signer=signer_address,
124
+ chain_nonce=chain_nonce,
125
+ last_synced_chain_nonce=signer_state.last_synced_chain_nonce,
126
+ )
127
+ raise RuntimeError("Stale chain nonce detected; signer quarantined")
128
+ if signer_state and signer_state.quarantined_at:
129
+ logger.warning(
130
+ "nonce.signer_quarantined",
131
+ signer=signer_address,
132
+ reason=signer_state.quarantine_reason,
133
+ )
134
+ raise RuntimeError("Signer is quarantined; nonce reservation blocked")
135
+
95
136
  nonce = self._db.reserve_nonce_atomic(
96
137
  chain_id=self._chain_id,
97
138
  address=signer_address,
@@ -138,6 +179,8 @@ class NonceManager:
138
179
  self,
139
180
  signer_address: str,
140
181
  nonce: int,
182
+ reason: str | None = None,
183
+ source: str | None = None,
141
184
  ) -> bool:
142
185
  """Release a nonce reservation (after confirm/fail/abandon).
143
186
 
@@ -150,7 +193,11 @@ class NonceManager:
150
193
  """
151
194
  signer_address = signer_address.lower()
152
195
  return self._db.release_nonce_reservation(
153
- self._chain_id, signer_address, nonce
196
+ self._chain_id,
197
+ signer_address,
198
+ nonce,
199
+ reason=reason,
200
+ source=source,
154
201
  )
155
202
 
156
203
  @contextmanager
@@ -265,14 +312,18 @@ class NonceManager:
265
312
  ]
266
313
 
267
314
  def reconcile(self, signer_address: str | None = None) -> dict[str, int]:
268
- """Reconcile nonce reservations with chain state.
315
+ """Reconcile nonce reservations using SAFE operations only.
316
+
317
+ SAFETY INVARIANTS:
318
+ - NEVER mutates signers.next_nonce (use force_reset() for that)
319
+ - Only releases reservations provable from DB state (confirmed intents)
320
+ - Gap detection is observability-only (log + metric, no action)
269
321
 
270
322
  Run at startup and periodically to:
271
- - Reset next_nonce when gap detected (CRITICAL for recovery)
272
- - Update signer's synced chain nonce
273
- - Mark stale reservations as released or orphaned
274
- - Clean up confirmed/used nonces
275
- - Release gap reservations (nonces >= chain_nonce with no tx in mempool)
323
+ - Update signer's synced chain nonce (observability)
324
+ - Detect nonce gaps and emit alerts (no auto-reset)
325
+ - Release reservations for DB-confirmed intents
326
+ - Clean up old orphaned reservations (time-based)
276
327
 
277
328
  Args:
278
329
  signer_address: Optional specific signer to reconcile.
@@ -281,13 +332,14 @@ class NonceManager:
281
332
  Returns:
282
333
  Dictionary with reconciliation stats
283
334
  """
335
+ from brawny.metrics import NONCE_GAP_DETECTED, get_metrics
336
+
284
337
  stats = {
285
338
  "signers_checked": 0,
286
339
  "nonces_released": 0,
287
- "nonces_orphaned": 0,
288
340
  "orphans_cleaned": 0,
289
- "next_nonce_reset": 0,
290
- "gap_reservations_released": 0,
341
+ "gaps_detected": 0,
342
+ "orphans_marked": 0,
291
343
  }
292
344
 
293
345
  if signer_address:
@@ -296,89 +348,70 @@ class NonceManager:
296
348
  else:
297
349
  signers = self._db.get_all_signers(self._chain_id)
298
350
 
351
+ metrics = get_metrics()
352
+
299
353
  for signer in signers:
300
354
  stats["signers_checked"] += 1
301
355
 
302
356
  try:
303
- # Get current chain nonce
357
+ # Get current chain nonce (for observability only)
304
358
  chain_nonce = self._rpc.get_transaction_count(
305
359
  Web3.to_checksum_address(signer.signer_address), block_identifier="pending"
306
360
  )
307
361
 
308
- # Update signer's synced chain nonce
362
+ # Update signer's synced chain nonce (observability only)
309
363
  self._db.update_signer_chain_nonce(
310
364
  self._chain_id, signer.signer_address, chain_nonce
311
365
  )
312
366
 
313
- # CRITICAL FIX: Reset next_nonce when gap detected
314
- # Without this, reserve_nonce_atomic() keeps returning stale nonces
367
+ # Gap detection: log + metric only, NO auto-reset
368
+ # Reason: single-endpoint "pending nonce" is not a truth source.
369
+ # Auto-reset based on RPC can brick the system during RPC incidents.
370
+ # Recovery requires explicit operator action via force_reset().
315
371
  if chain_nonce < signer.next_nonce:
316
372
  gap_size = signer.next_nonce - chain_nonce
317
- logger.warning(
318
- "nonce.gap_reset",
319
- signer=signer.signer_address,
320
- old_next_nonce=signer.next_nonce,
321
- chain_nonce=chain_nonce,
322
- gap_size=gap_size,
323
- )
324
- self._db.update_signer_next_nonce(
325
- self._chain_id, signer.signer_address, chain_nonce
326
- )
327
- stats["next_nonce_reset"] += 1
373
+ stats["gaps_detected"] += 1
328
374
 
329
- # Release all non-released reservations >= chain_nonce
330
- # These are "gap" reservations whose txs are no longer in mempool
331
- gap_reservations = self._db.get_reservations_for_signer(
332
- self._chain_id, signer.signer_address
375
+ # Emit metric for alerting (always - metrics are cheap)
376
+ metrics.counter(NONCE_GAP_DETECTED).inc(
377
+ chain_id=self._chain_id,
378
+ signer=signer.signer_address[:10], # Truncate for cardinality
333
379
  )
334
- for reservation in gap_reservations:
335
- if reservation.status == NonceStatus.RELEASED:
336
- continue
337
- if reservation.nonce >= chain_nonce:
338
- # This reservation is in the gap - tx doesn't exist
339
- self.release(signer.signer_address, reservation.nonce)
340
- stats["gap_reservations_released"] += 1
341
- logger.debug(
342
- "nonce.gap_reservation_released",
343
- signer=signer.signer_address,
344
- nonce=reservation.nonce,
345
- intent_id=str(reservation.intent_id) if reservation.intent_id else None,
346
- )
347
-
348
- # Get stale reservations (nonce < chain_nonce)
349
- stale_reservations = self._db.get_reservations_below_nonce(
350
- self._chain_id, signer.signer_address, chain_nonce
351
- )
352
-
353
- for reservation in stale_reservations:
354
- if reservation.status == NonceStatus.RELEASED:
355
- # Already released, skip
356
- continue
357
380
 
358
- if reservation.intent_id:
359
- # Has associated intent - check if confirmed
360
- attempt = self._db.get_latest_attempt_for_intent(
361
- reservation.intent_id
381
+ # Rate-limited log warning (per signer)
382
+ now = time.monotonic()
383
+ last_log = self._gap_log_times.get(signer.signer_address, 0)
384
+ if now - last_log >= GAP_LOG_RATE_LIMIT_SECONDS:
385
+ self._gap_log_times[signer.signer_address] = now
386
+ logger.warning(
387
+ "nonce.gap_detected",
388
+ signer=signer.signer_address,
389
+ chain_id=self._chain_id,
390
+ db_next_nonce=signer.next_nonce,
391
+ chain_pending_nonce=chain_nonce,
392
+ gap_size=gap_size,
393
+ action="none",
394
+ recovery=f"Run 'brawny signer force-reset {signer.signer_address[:10]}...'",
362
395
  )
363
- if attempt and attempt.status.value == "confirmed":
364
- # Confirmed - release the reservation
365
- self.release(signer.signer_address, reservation.nonce)
366
- stats["nonces_released"] += 1
367
- else:
368
- # Not confirmed but nonce is used - orphaned
369
- self.mark_orphaned(signer.signer_address, reservation.nonce)
370
- stats["nonces_orphaned"] += 1
371
- else:
372
- # No intent - just release
373
- self.release(signer.signer_address, reservation.nonce)
374
- stats["nonces_released"] += 1
396
+
397
+ # SAFE CLEANUP: Only release reservations provable from DB state
398
+ # We iterate all non-released reservations and check DB for confirmation
399
+ released_count = self._release_confirmed_reservations(signer.signer_address)
400
+ stats["nonces_released"] += released_count
401
+
402
+ # Mark reservations below chain nonce with no intent as orphaned.
403
+ # This handles external transactions consuming a nonce.
404
+ orphans_marked = self._mark_orphaned_below_chain_nonce(
405
+ signer.signer_address, chain_nonce
406
+ )
407
+ stats["orphans_marked"] += orphans_marked
375
408
 
376
409
  logger.info(
377
410
  LogEvents.NONCE_RECONCILE,
378
411
  signer=signer.signer_address,
379
412
  chain_nonce=chain_nonce,
380
- stale_count=len(stale_reservations),
381
- next_nonce_was_reset=chain_nonce < signer.next_nonce,
413
+ released_count=released_count,
414
+ orphans_marked=orphans_marked,
382
415
  )
383
416
 
384
417
  except Exception as e:
@@ -388,11 +421,57 @@ class NonceManager:
388
421
  error=str(e),
389
422
  )
390
423
 
391
- # Cleanup old orphaned reservations (24+ hours old)
424
+ # Cleanup old orphaned reservations (24+ hours old, time-based)
392
425
  stats["orphans_cleaned"] = self.cleanup_orphaned()
393
426
 
394
427
  return stats
395
428
 
429
+ def _mark_orphaned_below_chain_nonce(self, signer_address: str, chain_nonce: int) -> int:
430
+ """Mark reservations below chain nonce with no intent as orphaned.
431
+
432
+ Uses chain nonce as a lower bound to prevent reusing externally consumed nonces.
433
+ """
434
+ signer_address = signer_address.lower()
435
+ reservations = self._db.get_reservations_below_nonce(
436
+ self._chain_id, signer_address, chain_nonce
437
+ )
438
+ marked = 0
439
+ for reservation in reservations:
440
+ if reservation.status in (NonceStatus.RELEASED, NonceStatus.ORPHANED):
441
+ continue
442
+ if reservation.intent_id is not None:
443
+ continue
444
+ if self.mark_orphaned(signer_address, reservation.nonce):
445
+ marked += 1
446
+ return marked
447
+
448
+ def _release_confirmed_reservations(self, signer_address: str) -> int:
449
+ """Release reservations for intents that are DB-confirmed.
450
+
451
+ SAFE: Only uses DB state, never RPC.
452
+ """
453
+ signer_address = signer_address.lower()
454
+ reservations = self._db.get_reservations_for_signer(
455
+ self._chain_id, signer_address
456
+ )
457
+
458
+ released = 0
459
+ for reservation in reservations:
460
+ if reservation.status == NonceStatus.RELEASED:
461
+ continue
462
+
463
+ if not reservation.intent_id:
464
+ # No intent attached - skip (could be pre-broadcast)
465
+ continue
466
+
467
+ # Check if intent is confirmed IN THE DATABASE
468
+ attempt = self._db.get_latest_attempt_for_intent(reservation.intent_id)
469
+ if attempt and attempt.status.value == "confirmed":
470
+ self.release(signer_address, reservation.nonce)
471
+ released += 1
472
+
473
+ return released
474
+
396
475
  def cleanup_orphaned(self, older_than_hours: int = 24) -> int:
397
476
  """Delete orphaned nonce reservations older than specified hours.
398
477
 
@@ -447,35 +526,55 @@ class NonceManager:
447
526
 
448
527
  return chain_nonce
449
528
 
450
- def force_reset(self, signer_address: str) -> int:
451
- """Force reset nonce state to match chain. Returns new next_nonce.
529
+ def force_reset(
530
+ self,
531
+ signer_address: str,
532
+ source: str = "unknown",
533
+ reason: str | None = None,
534
+ target_nonce: int | None = None,
535
+ ) -> int:
536
+ """Force reset nonce state. Returns new next_nonce.
452
537
 
453
- USE WITH CAUTION: May cause issues if dropped txs later mine.
538
+ USE WITH CAUTION: Destructive operation that may cause issues if
539
+ dropped txs later mine. This requires explicit operator action.
454
540
 
455
541
  This will:
456
- - Query current chain pending nonce
457
- - Reset local next_nonce to match chain
458
- - Release all reservations with nonce >= chain_pending_nonce
542
+ - Query current chain pending nonce (or use target_nonce if provided)
543
+ - Reset local next_nonce to match
544
+ - Release all reservations with nonce >= target
459
545
  - Clear gap tracking
546
+ - Emit audit log and metric
460
547
 
461
548
  Args:
462
549
  signer_address: Ethereum address of the signer
550
+ source: Where this reset originated ("cli", "executor", "api")
551
+ reason: Human-readable reason for the reset
552
+ target_nonce: Optional explicit target. If None, uses chain pending nonce.
463
553
 
464
554
  Returns:
465
- The new next_nonce (equal to chain pending nonce)
555
+ The new next_nonce
466
556
  """
557
+ from brawny.metrics import NONCE_FORCE_RESET, get_metrics
558
+
467
559
  signer_address = signer_address.lower()
468
- chain_nonce = self._rpc.get_transaction_count(
469
- Web3.to_checksum_address(signer_address), block_identifier="pending"
470
- )
471
560
 
472
- # Release all reservations at or above chain nonce
561
+ # Get target nonce
562
+ if target_nonce is None:
563
+ target_nonce = self._rpc.get_transaction_count(
564
+ Web3.to_checksum_address(signer_address), block_identifier="pending"
565
+ )
566
+
567
+ # Get current state for audit logging
568
+ current_state = self._db.get_signer_state(self._chain_id, signer_address)
569
+ old_next_nonce = current_state.next_nonce if current_state else None
570
+
571
+ # Release all reservations at or above target nonce
473
572
  reservations = self._db.get_reservations_for_signer(
474
573
  self._chain_id, signer_address
475
574
  )
476
575
  released_count = 0
477
576
  for r in reservations:
478
- if r.nonce >= chain_nonce and r.status in (
577
+ if r.nonce >= target_nonce and r.status in (
479
578
  NonceStatus.RESERVED,
480
579
  NonceStatus.IN_FLIGHT,
481
580
  ):
@@ -483,16 +582,39 @@ class NonceManager:
483
582
  released_count += 1
484
583
 
485
584
  # Reset next_nonce
486
- self._db.update_signer_next_nonce(self._chain_id, signer_address, chain_nonce)
585
+ self._db.update_signer_next_nonce(self._chain_id, signer_address, target_nonce)
487
586
 
488
587
  # Clear gap tracking
489
588
  self._db.clear_gap_started_at(self._chain_id, signer_address)
490
589
 
590
+ # Emit metric for observability
591
+ metrics = get_metrics()
592
+ metrics.counter(NONCE_FORCE_RESET).inc(
593
+ chain_id=self._chain_id,
594
+ signer=signer_address[:10], # Truncate for cardinality
595
+ source=source,
596
+ )
597
+
598
+ # Explicit audit log - this is a destructive operation
491
599
  logger.warning(
492
600
  "nonce.force_reset",
493
601
  signer=signer_address,
494
- new_next_nonce=chain_nonce,
602
+ old_next_nonce=old_next_nonce,
603
+ new_next_nonce=target_nonce,
495
604
  released_reservations=released_count,
605
+ source=source,
606
+ reason=reason or "not provided",
496
607
  )
497
608
 
498
- return chain_nonce
609
+ # Durable audit record in DB (survives log rotation)
610
+ self._db.record_nonce_reset_audit(
611
+ chain_id=self._chain_id,
612
+ signer_address=signer_address,
613
+ old_next_nonce=old_next_nonce,
614
+ new_next_nonce=target_nonce,
615
+ released_reservations=released_count,
616
+ source=source,
617
+ reason=reason,
618
+ )
619
+
620
+ return target_nonce
brawny/tx/replacement.py CHANGED
@@ -19,6 +19,7 @@ from web3 import Web3
19
19
  from brawny.logging import LogEvents, get_logger
20
20
  from brawny.metrics import TX_REPLACED, get_metrics
21
21
  from brawny.model.enums import AttemptStatus, IntentStatus
22
+ from brawny.timeout import Deadline
22
23
  from brawny.tx.intent import transition_intent
23
24
  from brawny.tx.utils import normalize_tx_dict
24
25
  from brawny.model.types import GasParams
@@ -29,11 +30,14 @@ if TYPE_CHECKING:
29
30
  from brawny.keystore import Keystore
30
31
  from brawny.lifecycle import LifecycleDispatcher
31
32
  from brawny.model.types import TxAttempt, TxIntent
32
- from brawny._rpc.manager import RPCManager
33
+ from brawny._rpc.clients import BroadcastClient
33
34
  from brawny.tx.nonce import NonceManager
35
+ from brawny.runtime_controls import RuntimeControls
34
36
 
35
37
  logger = get_logger(__name__)
36
38
 
39
+ REPLACER_TICK_TIMEOUT_SECONDS = 10.0
40
+
37
41
 
38
42
  @dataclass
39
43
  class ReplacementResult:
@@ -60,11 +64,12 @@ class TxReplacer:
60
64
  def __init__(
61
65
  self,
62
66
  db: Database,
63
- rpc: RPCManager,
67
+ rpc: BroadcastClient,
64
68
  keystore: Keystore,
65
69
  nonce_manager: NonceManager,
66
70
  config: Config,
67
71
  lifecycle: "LifecycleDispatcher | None" = None,
72
+ controls: "RuntimeControls | None" = None,
68
73
  ) -> None:
69
74
  """Initialize transaction replacer.
70
75
 
@@ -81,6 +86,7 @@ class TxReplacer:
81
86
  self._nonce_manager = nonce_manager
82
87
  self._config = config
83
88
  self._lifecycle = lifecycle
89
+ self._controls = controls
84
90
 
85
91
  def calculate_replacement_fees(self, old_params: GasParams) -> GasParams:
86
92
  """Calculate bumped fees for replacement transaction.
@@ -114,7 +120,12 @@ class TxReplacer:
114
120
  attempts = self._db.get_attempts_for_intent(intent_id)
115
121
  return sum(1 for a in attempts if a.replaces_attempt_id is not None)
116
122
 
117
- def should_replace(self, intent: TxIntent, attempt: TxAttempt) -> bool:
123
+ def should_replace(
124
+ self,
125
+ intent: TxIntent,
126
+ attempt: TxAttempt,
127
+ deadline: Deadline | None,
128
+ ) -> bool:
118
129
  """Check if a transaction should be replaced.
119
130
 
120
131
  Args:
@@ -154,21 +165,30 @@ class TxReplacer:
154
165
  return False
155
166
 
156
167
  # Check if still pending (no receipt)
157
- receipt = self._rpc.get_transaction_receipt(attempt.tx_hash)
168
+ if deadline is not None and deadline.expired():
169
+ return False
170
+ receipt = self._rpc.get_transaction_receipt(attempt.tx_hash, deadline=deadline)
158
171
  if receipt is not None:
159
172
  # Has receipt - don't replace
160
173
  return False
161
174
 
162
175
  # Check blocks elapsed
163
176
  try:
164
- current_block = self._rpc.get_block_number()
177
+ current_block = self._rpc.get_block_number(deadline=deadline)
165
178
  blocks_since = current_block - attempt.broadcast_block
166
179
 
167
180
  required_blocks = self._config.stuck_tx_blocks * wait_multiplier
168
181
  if blocks_since < required_blocks:
169
182
  return False
170
- except Exception:
171
- pass
183
+ except Exception as e:
184
+ logger.debug(
185
+ "replacement.block_number_failed",
186
+ intent_id=str(intent.intent_id),
187
+ attempt_id=str(attempt.attempt_id),
188
+ tx_hash=attempt.tx_hash,
189
+ error=str(e)[:200],
190
+ )
191
+ return False
172
192
 
173
193
  return True
174
194
 
@@ -176,6 +196,7 @@ class TxReplacer:
176
196
  self,
177
197
  intent: TxIntent,
178
198
  attempt: TxAttempt,
199
+ deadline: Deadline | None,
179
200
  ) -> ReplacementResult:
180
201
  """Create a replacement transaction with bumped fees.
181
202
 
@@ -190,7 +211,7 @@ class TxReplacer:
190
211
  """
191
212
  if attempt.tx_hash:
192
213
  try:
193
- receipt = self._rpc.get_transaction_receipt(attempt.tx_hash)
214
+ receipt = self._rpc.get_transaction_receipt(attempt.tx_hash, deadline=deadline)
194
215
  except Exception as e:
195
216
  logger.warning(
196
217
  "replacement.receipt_check_failed",
@@ -266,16 +287,22 @@ class TxReplacer:
266
287
  intent_id=intent.intent_id,
267
288
  nonce=attempt.nonce, # Same nonce
268
289
  gas_params_json=new_gas_params.to_json(),
269
- status=AttemptStatus.SIGNED.value,
290
+ status=AttemptStatus.PENDING_SEND.value,
270
291
  replaces_attempt_id=attempt.attempt_id,
292
+ actor=intent.job_id,
293
+ reason="replacement_attempt",
294
+ source="replacer",
271
295
  )
272
296
 
273
297
  try:
274
298
  # Broadcast replacement
275
- tx_hash, _endpoint_url = self._rpc.send_raw_transaction(signed_tx.raw_transaction)
299
+ tx_hash, _endpoint_url = self._rpc.send_raw_transaction(
300
+ signed_tx.raw_transaction,
301
+ deadline=deadline,
302
+ )
276
303
 
277
304
  # Update new attempt with tx_hash
278
- current_block = self._rpc.get_block_number()
305
+ current_block = self._rpc.get_block_number(deadline=deadline)
279
306
  self._db.update_attempt_status(
280
307
  new_attempt_id,
281
308
  AttemptStatus.BROADCAST.value,
@@ -403,16 +430,32 @@ class TxReplacer:
403
430
  "checked": 0,
404
431
  "replaced": 0,
405
432
  "abandoned": 0,
433
+ "paused": 0,
406
434
  "errors": 0,
407
435
  }
408
436
 
437
+ if self._controls and self._controls.is_active("pause_replacements"):
438
+ logger.warning("replacement.paused_globally")
439
+ return results
440
+
409
441
  # Get pending intents
410
442
  pending_intents = self._db.get_intents_by_status(
411
443
  IntentStatus.PENDING.value,
412
444
  chain_id=self._config.chain_id,
413
445
  )
414
446
 
447
+ deadline = Deadline.from_seconds(REPLACER_TICK_TIMEOUT_SECONDS)
448
+
415
449
  for intent in pending_intents:
450
+ if deadline.expired():
451
+ logger.warning("replacement.tick_timeout", pending_remaining=len(pending_intents))
452
+ break
453
+ signer_state = self._db.get_signer_state(
454
+ self._config.chain_id,
455
+ intent.signer_address,
456
+ )
457
+ if signer_state and signer_state.replacements_paused:
458
+ continue
416
459
  attempt = self._db.get_latest_attempt_for_intent(intent.intent_id)
417
460
  if not attempt or not attempt.tx_hash:
418
461
  continue
@@ -420,20 +463,28 @@ class TxReplacer:
420
463
  results["checked"] += 1
421
464
 
422
465
  try:
423
- if self.should_replace(intent, attempt):
466
+ if self.should_replace(intent, attempt, deadline):
424
467
  # Check if we've exceeded max replacements
425
468
  replacement_count = self.get_replacement_count(intent.intent_id)
426
469
  if replacement_count >= self._config.max_replacement_attempts:
427
- self.abandon_intent(
428
- intent,
429
- attempt,
430
- f"Max replacement attempts ({self._config.max_replacement_attempts}) exceeded",
470
+ self._db.set_replacements_paused(
471
+ self._config.chain_id,
472
+ intent.signer_address,
473
+ True,
474
+ reason="replacement_budget_exceeded",
475
+ source="replacer",
476
+ )
477
+ logger.warning(
478
+ "replacement.paused_signer",
479
+ intent_id=str(intent.intent_id),
480
+ signer=intent.signer_address,
481
+ count=replacement_count,
431
482
  )
432
- results["abandoned"] += 1
483
+ results["paused"] += 1
433
484
  continue
434
485
 
435
486
  # Attempt replacement
436
- result = self.replace_transaction(intent, attempt)
487
+ result = self.replace_transaction(intent, attempt, deadline)
437
488
  if result.success:
438
489
  results["replaced"] += 1
439
490
  else: