brawny 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. brawny/__init__.py +106 -0
  2. brawny/_context.py +232 -0
  3. brawny/_rpc/__init__.py +38 -0
  4. brawny/_rpc/broadcast.py +172 -0
  5. brawny/_rpc/clients.py +98 -0
  6. brawny/_rpc/context.py +49 -0
  7. brawny/_rpc/errors.py +252 -0
  8. brawny/_rpc/gas.py +158 -0
  9. brawny/_rpc/manager.py +982 -0
  10. brawny/_rpc/selector.py +156 -0
  11. brawny/accounts.py +534 -0
  12. brawny/alerts/__init__.py +132 -0
  13. brawny/alerts/abi_resolver.py +530 -0
  14. brawny/alerts/base.py +152 -0
  15. brawny/alerts/context.py +271 -0
  16. brawny/alerts/contracts.py +635 -0
  17. brawny/alerts/encoded_call.py +201 -0
  18. brawny/alerts/errors.py +267 -0
  19. brawny/alerts/events.py +680 -0
  20. brawny/alerts/function_caller.py +364 -0
  21. brawny/alerts/health.py +185 -0
  22. brawny/alerts/routing.py +118 -0
  23. brawny/alerts/send.py +364 -0
  24. brawny/api.py +660 -0
  25. brawny/chain.py +93 -0
  26. brawny/cli/__init__.py +16 -0
  27. brawny/cli/app.py +17 -0
  28. brawny/cli/bootstrap.py +37 -0
  29. brawny/cli/commands/__init__.py +41 -0
  30. brawny/cli/commands/abi.py +93 -0
  31. brawny/cli/commands/accounts.py +632 -0
  32. brawny/cli/commands/console.py +495 -0
  33. brawny/cli/commands/contract.py +139 -0
  34. brawny/cli/commands/health.py +112 -0
  35. brawny/cli/commands/init_project.py +86 -0
  36. brawny/cli/commands/intents.py +130 -0
  37. brawny/cli/commands/job_dev.py +254 -0
  38. brawny/cli/commands/jobs.py +308 -0
  39. brawny/cli/commands/logs.py +87 -0
  40. brawny/cli/commands/maintenance.py +182 -0
  41. brawny/cli/commands/migrate.py +51 -0
  42. brawny/cli/commands/networks.py +253 -0
  43. brawny/cli/commands/run.py +249 -0
  44. brawny/cli/commands/script.py +209 -0
  45. brawny/cli/commands/signer.py +248 -0
  46. brawny/cli/helpers.py +265 -0
  47. brawny/cli_templates.py +1445 -0
  48. brawny/config/__init__.py +74 -0
  49. brawny/config/models.py +404 -0
  50. brawny/config/parser.py +633 -0
  51. brawny/config/routing.py +55 -0
  52. brawny/config/validation.py +246 -0
  53. brawny/daemon/__init__.py +14 -0
  54. brawny/daemon/context.py +69 -0
  55. brawny/daemon/core.py +702 -0
  56. brawny/daemon/loops.py +327 -0
  57. brawny/db/__init__.py +78 -0
  58. brawny/db/base.py +986 -0
  59. brawny/db/base_new.py +165 -0
  60. brawny/db/circuit_breaker.py +97 -0
  61. brawny/db/global_cache.py +298 -0
  62. brawny/db/mappers.py +182 -0
  63. brawny/db/migrate.py +349 -0
  64. brawny/db/migrations/001_init.sql +186 -0
  65. brawny/db/migrations/002_add_included_block.sql +7 -0
  66. brawny/db/migrations/003_add_broadcast_at.sql +10 -0
  67. brawny/db/migrations/004_broadcast_binding.sql +20 -0
  68. brawny/db/migrations/005_add_retry_after.sql +9 -0
  69. brawny/db/migrations/006_add_retry_count_column.sql +11 -0
  70. brawny/db/migrations/007_add_gap_tracking.sql +18 -0
  71. brawny/db/migrations/008_add_transactions.sql +72 -0
  72. brawny/db/migrations/009_add_intent_metadata.sql +5 -0
  73. brawny/db/migrations/010_add_nonce_gap_index.sql +9 -0
  74. brawny/db/migrations/011_add_job_logs.sql +24 -0
  75. brawny/db/migrations/012_add_claimed_by.sql +5 -0
  76. brawny/db/ops/__init__.py +29 -0
  77. brawny/db/ops/attempts.py +108 -0
  78. brawny/db/ops/blocks.py +83 -0
  79. brawny/db/ops/cache.py +93 -0
  80. brawny/db/ops/intents.py +296 -0
  81. brawny/db/ops/jobs.py +110 -0
  82. brawny/db/ops/logs.py +97 -0
  83. brawny/db/ops/nonces.py +322 -0
  84. brawny/db/postgres.py +2535 -0
  85. brawny/db/postgres_new.py +196 -0
  86. brawny/db/queries.py +584 -0
  87. brawny/db/sqlite.py +2733 -0
  88. brawny/db/sqlite_new.py +191 -0
  89. brawny/history.py +126 -0
  90. brawny/interfaces.py +136 -0
  91. brawny/invariants.py +155 -0
  92. brawny/jobs/__init__.py +26 -0
  93. brawny/jobs/base.py +287 -0
  94. brawny/jobs/discovery.py +233 -0
  95. brawny/jobs/job_validation.py +111 -0
  96. brawny/jobs/kv.py +125 -0
  97. brawny/jobs/registry.py +283 -0
  98. brawny/keystore.py +484 -0
  99. brawny/lifecycle.py +551 -0
  100. brawny/logging.py +290 -0
  101. brawny/metrics.py +594 -0
  102. brawny/model/__init__.py +53 -0
  103. brawny/model/contexts.py +319 -0
  104. brawny/model/enums.py +70 -0
  105. brawny/model/errors.py +194 -0
  106. brawny/model/events.py +93 -0
  107. brawny/model/startup.py +20 -0
  108. brawny/model/types.py +483 -0
  109. brawny/networks/__init__.py +96 -0
  110. brawny/networks/config.py +269 -0
  111. brawny/networks/manager.py +423 -0
  112. brawny/obs/__init__.py +67 -0
  113. brawny/obs/emit.py +158 -0
  114. brawny/obs/health.py +175 -0
  115. brawny/obs/heartbeat.py +133 -0
  116. brawny/reconciliation.py +108 -0
  117. brawny/scheduler/__init__.py +19 -0
  118. brawny/scheduler/poller.py +472 -0
  119. brawny/scheduler/reorg.py +632 -0
  120. brawny/scheduler/runner.py +708 -0
  121. brawny/scheduler/shutdown.py +371 -0
  122. brawny/script_tx.py +297 -0
  123. brawny/scripting.py +251 -0
  124. brawny/startup.py +76 -0
  125. brawny/telegram.py +393 -0
  126. brawny/testing.py +108 -0
  127. brawny/tx/__init__.py +41 -0
  128. brawny/tx/executor.py +1071 -0
  129. brawny/tx/fees.py +50 -0
  130. brawny/tx/intent.py +423 -0
  131. brawny/tx/monitor.py +628 -0
  132. brawny/tx/nonce.py +498 -0
  133. brawny/tx/replacement.py +456 -0
  134. brawny/tx/utils.py +26 -0
  135. brawny/utils.py +205 -0
  136. brawny/validation.py +69 -0
  137. brawny-0.1.13.dist-info/METADATA +156 -0
  138. brawny-0.1.13.dist-info/RECORD +141 -0
  139. brawny-0.1.13.dist-info/WHEEL +5 -0
  140. brawny-0.1.13.dist-info/entry_points.txt +2 -0
  141. brawny-0.1.13.dist-info/top_level.txt +1 -0
brawny/obs/__init__.py ADDED
@@ -0,0 +1,67 @@
1
+ """Observability module for brawny.
2
+
3
+ Provides structured logging, liveness heartbeats, and readiness health checks.
4
+
5
+ See LOGGING_METRICS_PLAN.md for design rationale and usage patterns.
6
+
7
+ Quick Reference:
8
+ # Logging via emit() gateway
9
+ from brawny.obs import emit, get_logger, bind_intent
10
+
11
+ log = get_logger(worker_id=1, chain_id=1)
12
+ log = bind_intent(log, intent_id=str(intent.intent_id), job_id=intent.job_id)
13
+ emit(log, level="info", event="tx", result="broadcast", tx_hash=hash)
14
+
15
+ # Heartbeat for liveness
16
+ from brawny.obs import get_heartbeat
17
+
18
+ heartbeat = get_heartbeat("block_poller")
19
+ heartbeat.beat() # Call in loop
20
+
21
+ # Health state for readiness
22
+ from brawny.obs import get_health_state
23
+
24
+ health = get_health_state()
25
+ health.update_db(db.ping())
26
+ if not health.is_ready():
27
+ return 503
28
+ """
29
+
30
+ from brawny.obs.emit import (
31
+ ALLOWED,
32
+ RUN_ID,
33
+ bind_attempt,
34
+ bind_intent,
35
+ emit,
36
+ get_logger,
37
+ )
38
+ from brawny.obs.health import (
39
+ HealthState,
40
+ get_health_state,
41
+ reset_health_state,
42
+ )
43
+ from brawny.obs.heartbeat import (
44
+ Heartbeat,
45
+ all_heartbeat_ages,
46
+ any_stale,
47
+ get_heartbeat,
48
+ )
49
+
50
+ __all__ = [
51
+ # emit.py
52
+ "ALLOWED",
53
+ "RUN_ID",
54
+ "bind_attempt",
55
+ "bind_intent",
56
+ "emit",
57
+ "get_logger",
58
+ # health.py
59
+ "HealthState",
60
+ "get_health_state",
61
+ "reset_health_state",
62
+ # heartbeat.py
63
+ "Heartbeat",
64
+ "all_heartbeat_ages",
65
+ "any_stale",
66
+ "get_heartbeat",
67
+ ]
brawny/obs/emit.py ADDED
@@ -0,0 +1,158 @@
1
+ """Structured logging gateway for brawny.
2
+
3
+ The emit() function is the single enforcement choke point for all logging.
4
+ It validates event/result pairs, normalizes error fields, and controls trace inclusion.
5
+
6
+ See LOGGING_METRICS_PLAN.md for design rationale.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import os
12
+ import uuid
13
+ from typing import TYPE_CHECKING
14
+
15
+ import structlog
16
+
17
+ if TYPE_CHECKING:
18
+ from typing import Any
19
+
20
+ # Run ID for correlating logs across restarts
21
+ RUN_ID = os.environ.get("BRAWNY_RUN_ID") or f"run_{uuid.uuid4().hex[:12]}"
22
+
23
+ # Allowed event families and their valid results
24
+ # Any (event, result) pair not in this dict will raise ValueError
25
+ ALLOWED: dict[str, set[str]] = {
26
+ "job.check": {"triggered", "skipped", "timeout", "error"},
27
+ "intent": {"created", "claimed", "executed", "failed", "status"},
28
+ "tx": {"signed", "broadcast", "confirmed", "failed", "replaced"},
29
+ "rpc": {"ok", "error", "timeout"},
30
+ "nonce": {"reserved", "released", "reconciled"},
31
+ "block": {"processed", "reorg"},
32
+ "system": {"started", "draining", "shutdown"},
33
+ }
34
+
35
+ # Error message truncation limit
36
+ ERROR_MESSAGE_MAX_LENGTH = 500
37
+
38
+
39
+ def emit(
40
+ log: structlog.stdlib.BoundLogger,
41
+ *,
42
+ level: str,
43
+ event: str,
44
+ result: str,
45
+ err: Exception | None = None,
46
+ is_terminal: bool = False,
47
+ **fields: Any,
48
+ ) -> None:
49
+ """Emit a structured log event.
50
+
51
+ This is the single enforcement choke point for all logging in brawny.
52
+ It validates event/result pairs, normalizes error fields, and controls
53
+ stack trace inclusion.
54
+
55
+ Args:
56
+ log: Bound logger instance
57
+ level: Log level ("debug", "info", "warning", "error")
58
+ event: Event family (e.g., "tx", "intent", "job.check")
59
+ result: Event result (e.g., "confirmed", "failed", "triggered")
60
+ err: Optional exception for error events
61
+ is_terminal: If True and err is provided, include stack trace
62
+ **fields: Additional context fields
63
+
64
+ Raises:
65
+ ValueError: If (event, result) pair is not in ALLOWED
66
+
67
+ Example:
68
+ emit(log, level="info", event="tx", result="broadcast", tx_hash=hash)
69
+ emit(log, level="error", event="tx", result="failed", err=e, is_terminal=True)
70
+ """
71
+ # Validate event/result pair
72
+ if event not in ALLOWED:
73
+ raise ValueError(f"Invalid event family: {event!r}. Must be one of: {sorted(ALLOWED.keys())}")
74
+ if result not in ALLOWED[event]:
75
+ raise ValueError(
76
+ f"Invalid result {result!r} for event {event!r}. Must be one of: {sorted(ALLOWED[event])}"
77
+ )
78
+
79
+ # Normalize error fields
80
+ if err is not None:
81
+ msg = str(err)
82
+ fields["error_type"] = type(err).__name__
83
+ # Truncate long error messages
84
+ if len(msg) > ERROR_MESSAGE_MAX_LENGTH:
85
+ fields["error"] = msg[:ERROR_MESSAGE_MAX_LENGTH] + "..."
86
+ else:
87
+ fields["error"] = msg
88
+
89
+ # Get the logging function for this level
90
+ log_fn = getattr(log, level.lower())
91
+
92
+ # Dispatch - exc_info must be a kwarg to logger, not a field
93
+ if err is not None and is_terminal:
94
+ log_fn(event, result=result, exc_info=True, **fields)
95
+ else:
96
+ log_fn(event, result=result, **fields)
97
+
98
+
99
+ def get_logger(**bind: Any) -> structlog.stdlib.BoundLogger:
100
+ """Get a logger with run_id bound.
101
+
102
+ Use this at component boundaries to get a base logger.
103
+
104
+ Args:
105
+ **bind: Additional fields to bind (e.g., worker_id, chain_id)
106
+
107
+ Returns:
108
+ Bound logger with run_id and any additional fields
109
+
110
+ Example:
111
+ log = get_logger(worker_id=1, chain_id=1)
112
+ """
113
+ return structlog.get_logger("brawny").bind(run_id=RUN_ID, **bind)
114
+
115
+
116
+ def bind_intent(
117
+ log: structlog.stdlib.BoundLogger,
118
+ *,
119
+ intent_id: str,
120
+ job_id: str,
121
+ ) -> structlog.stdlib.BoundLogger:
122
+ """Bind intent context to a logger.
123
+
124
+ Use when processing a specific intent.
125
+
126
+ Args:
127
+ log: Base logger
128
+ intent_id: Intent UUID as string
129
+ job_id: Job identifier
130
+
131
+ Returns:
132
+ Logger with intent context bound
133
+ """
134
+ return log.bind(intent_id=intent_id, job_id=job_id)
135
+
136
+
137
+ def bind_attempt(
138
+ log: structlog.stdlib.BoundLogger,
139
+ *,
140
+ attempt_id: str,
141
+ nonce: int | None = None,
142
+ ) -> structlog.stdlib.BoundLogger:
143
+ """Bind attempt context to a logger.
144
+
145
+ Use when processing a specific transaction attempt.
146
+
147
+ Args:
148
+ log: Base logger (typically with intent context already bound)
149
+ attempt_id: Attempt UUID as string
150
+ nonce: Transaction nonce (optional, but include when known)
151
+
152
+ Returns:
153
+ Logger with attempt context bound
154
+ """
155
+ # Use 'is not None' to correctly handle nonce=0
156
+ if nonce is not None:
157
+ return log.bind(attempt_id=attempt_id, nonce=nonce)
158
+ return log.bind(attempt_id=attempt_id)
brawny/obs/health.py ADDED
@@ -0,0 +1,175 @@
1
+ """Cached health state for readiness probes.
2
+
3
+ Readiness probes (/readyz) must be fast and never block on slow checks.
4
+ This module provides cached health state that's updated by background loops.
5
+
6
+ See LOGGING_METRICS_PLAN.md Section 4.1.3 for design rationale.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import time
12
+ from dataclasses import dataclass, field
13
+ from threading import Lock
14
+ from typing import TYPE_CHECKING
15
+
16
+ if TYPE_CHECKING:
17
+ from threading import Thread
18
+
19
+
20
+ @dataclass
21
+ class HealthState:
22
+ """Cached health state for readiness checks.
23
+
24
+ Updated by background loops, read by /readyz endpoint.
25
+ All fields are protected by a lock for thread safety.
26
+
27
+ NOT READY when:
28
+ - shutdown_requested is True (draining)
29
+ - db_ok is False
30
+ - rpc_ok is False
31
+ - workers_ok is False
32
+
33
+ Usage:
34
+ # Background loop updates state
35
+ health_state.update_db(db.ping())
36
+ health_state.update_rpc(rpc.any_healthy())
37
+ health_state.update_workers(worker_threads)
38
+
39
+ # /readyz reads cached state (fast, never blocks)
40
+ if not health_state.is_ready():
41
+ return Response(status_code=503)
42
+ return Response(status_code=200)
43
+ """
44
+
45
+ # Cached component health
46
+ db_ok: bool = field(default=True)
47
+ rpc_ok: bool = field(default=True)
48
+ workers_ok: bool = field(default=True)
49
+
50
+ # Draining state
51
+ shutdown_requested: bool = field(default=False)
52
+
53
+ # Last update timestamps (for staleness detection)
54
+ last_db_check: float = field(default=0.0)
55
+ last_rpc_check: float = field(default=0.0)
56
+ last_workers_check: float = field(default=0.0)
57
+
58
+ # Thread safety
59
+ _lock: Lock = field(default_factory=Lock, repr=False)
60
+
61
+ def update_db(self, ok: bool) -> None:
62
+ """Update database health state."""
63
+ with self._lock:
64
+ self.db_ok = ok
65
+ self.last_db_check = time.time()
66
+
67
+ def update_rpc(self, ok: bool) -> None:
68
+ """Update RPC health state."""
69
+ with self._lock:
70
+ self.rpc_ok = ok
71
+ self.last_rpc_check = time.time()
72
+
73
+ def update_workers(self, threads: list["Thread"]) -> None:
74
+ """Update worker health state.
75
+
76
+ Args:
77
+ threads: List of worker threads
78
+ """
79
+ with self._lock:
80
+ self.workers_ok = any(t.is_alive() for t in threads) if threads else False
81
+ self.last_workers_check = time.time()
82
+
83
+ def request_shutdown(self) -> None:
84
+ """Mark the system as draining.
85
+
86
+ Call this at the start of graceful shutdown.
87
+ /readyz will return 503 immediately.
88
+ """
89
+ with self._lock:
90
+ self.shutdown_requested = True
91
+
92
+ def is_ready(self) -> bool:
93
+ """Check if the system is ready to accept work.
94
+
95
+ Returns:
96
+ True if ready, False if not ready (should return 503)
97
+ """
98
+ with self._lock:
99
+ if self.shutdown_requested:
100
+ return False
101
+ if not self.db_ok:
102
+ return False
103
+ if not self.rpc_ok:
104
+ return False
105
+ if not self.workers_ok:
106
+ return False
107
+ return True
108
+
109
+ def readiness_reasons(self) -> list[str]:
110
+ """Get human-readable reasons for not being ready.
111
+
112
+ Useful for /healthz diagnostics.
113
+
114
+ Returns:
115
+ List of reasons why the system is not ready, empty if ready
116
+ """
117
+ reasons = []
118
+ with self._lock:
119
+ if self.shutdown_requested:
120
+ reasons.append("shutdown_requested")
121
+ if not self.db_ok:
122
+ reasons.append("db_unhealthy")
123
+ if not self.rpc_ok:
124
+ reasons.append("rpc_unhealthy")
125
+ if not self.workers_ok:
126
+ reasons.append("no_workers_alive")
127
+ return reasons
128
+
129
+ def to_dict(self) -> dict[str, object]:
130
+ """Get full health state as a dictionary.
131
+
132
+ Useful for /healthz JSON response.
133
+ """
134
+ with self._lock:
135
+ # Compute ready inline to avoid deadlock (is_ready also acquires lock)
136
+ ready = (
137
+ not self.shutdown_requested
138
+ and self.db_ok
139
+ and self.rpc_ok
140
+ and self.workers_ok
141
+ )
142
+ return {
143
+ "ready": ready,
144
+ "shutdown_requested": self.shutdown_requested,
145
+ "db_ok": self.db_ok,
146
+ "rpc_ok": self.rpc_ok,
147
+ "workers_ok": self.workers_ok,
148
+ "last_db_check": self.last_db_check,
149
+ "last_rpc_check": self.last_rpc_check,
150
+ "last_workers_check": self.last_workers_check,
151
+ }
152
+
153
+
154
+ # Global health state singleton
155
+ _health_state: HealthState | None = None
156
+ _health_state_lock = Lock()
157
+
158
+
159
+ def get_health_state() -> HealthState:
160
+ """Get the global health state singleton.
161
+
162
+ Creates the singleton on first access.
163
+ """
164
+ global _health_state
165
+ with _health_state_lock:
166
+ if _health_state is None:
167
+ _health_state = HealthState()
168
+ return _health_state
169
+
170
+
171
+ def reset_health_state() -> None:
172
+ """Reset the global health state (for testing)."""
173
+ global _health_state
174
+ with _health_state_lock:
175
+ _health_state = None
@@ -0,0 +1,133 @@
1
+ """Heartbeat-based liveness for brawny.
2
+
3
+ Liveness is NOT "can we respond to HTTP?" - it's "is the core loop making progress?"
4
+
5
+ The Heartbeat class tracks when critical loops last made progress.
6
+ /livez returns 503 when the heartbeat is stale (no progress in 30s).
7
+
8
+ See LOGGING_METRICS_PLAN.md Section 4.1.2 for design rationale.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import time
14
+ from dataclasses import dataclass, field
15
+ from threading import Lock
16
+
17
+
18
+ @dataclass
19
+ class Heartbeat:
20
+ """Track liveness of a critical loop.
21
+
22
+ A heartbeat is considered stale if beat() hasn't been called within
23
+ max_age_seconds. This indicates the loop is stuck or deadlocked.
24
+
25
+ Thread-safe: beat() and is_stale() can be called from different threads.
26
+
27
+ Usage:
28
+ heartbeat = Heartbeat()
29
+
30
+ # In the critical loop
31
+ while not stop_event.is_set():
32
+ heartbeat.beat()
33
+ # ... do work ...
34
+
35
+ # In /livez endpoint
36
+ if heartbeat.is_stale():
37
+ return Response(status_code=503)
38
+ return Response(status_code=200)
39
+ """
40
+
41
+ last_beat_ts: float = field(default=0.0)
42
+ _lock: Lock = field(default_factory=Lock, repr=False)
43
+
44
+ def beat(self) -> None:
45
+ """Record that the loop is making progress.
46
+
47
+ Call this at the start of each loop iteration.
48
+ """
49
+ with self._lock:
50
+ self.last_beat_ts = time.time()
51
+
52
+ def is_stale(self, max_age_seconds: float = 30.0) -> bool:
53
+ """Check if the heartbeat is stale.
54
+
55
+ Args:
56
+ max_age_seconds: Maximum allowed time since last beat.
57
+ Default 30s is a reasonable choice for most loops.
58
+
59
+ Returns:
60
+ True if:
61
+ - beat() was never called (last_beat_ts == 0.0), OR
62
+ - More than max_age_seconds have passed since last beat
63
+ """
64
+ with self._lock:
65
+ if self.last_beat_ts == 0.0:
66
+ # Never started
67
+ return True
68
+ return (time.time() - self.last_beat_ts) > max_age_seconds
69
+
70
+ def age_seconds(self) -> float:
71
+ """Get the age of the last heartbeat in seconds.
72
+
73
+ Returns:
74
+ Seconds since last beat, or float('inf') if never beat.
75
+ """
76
+ with self._lock:
77
+ if self.last_beat_ts == 0.0:
78
+ return float("inf")
79
+ return time.time() - self.last_beat_ts
80
+
81
+
82
+ # Global heartbeats for critical loops
83
+ _heartbeats: dict[str, Heartbeat] = {}
84
+ _heartbeats_lock = Lock()
85
+
86
+
87
+ def get_heartbeat(name: str) -> Heartbeat:
88
+ """Get or create a named heartbeat.
89
+
90
+ Use this to track different critical loops:
91
+ - "block_poller" for the block processing loop
92
+ - "monitor" for the transaction monitor loop
93
+
94
+ Args:
95
+ name: Identifier for the heartbeat
96
+
97
+ Returns:
98
+ The Heartbeat instance for this name
99
+ """
100
+ with _heartbeats_lock:
101
+ if name not in _heartbeats:
102
+ _heartbeats[name] = Heartbeat()
103
+ return _heartbeats[name]
104
+
105
+
106
+ def any_stale(max_age_seconds: float = 30.0) -> bool:
107
+ """Check if any registered heartbeat is stale.
108
+
109
+ Use this in /livez to check overall system liveness.
110
+
111
+ Args:
112
+ max_age_seconds: Maximum allowed time since last beat
113
+
114
+ Returns:
115
+ True if any heartbeat is stale
116
+ """
117
+ with _heartbeats_lock:
118
+ if not _heartbeats:
119
+ # No heartbeats registered yet - system is starting up
120
+ return False
121
+ return any(hb.is_stale(max_age_seconds) for hb in _heartbeats.values())
122
+
123
+
124
+ def all_heartbeat_ages() -> dict[str, float]:
125
+ """Get the age of all registered heartbeats.
126
+
127
+ Useful for /healthz diagnostics endpoint.
128
+
129
+ Returns:
130
+ Dict mapping heartbeat name to age in seconds
131
+ """
132
+ with _heartbeats_lock:
133
+ return {name: hb.age_seconds() for name, hb in _heartbeats.items()}
@@ -0,0 +1,108 @@
1
+ """Startup reconciliation for detecting and repairing inconsistent state.
2
+
3
+ Phase 1 implementation: runs at startup only.
4
+ Phase 2 will add periodic reconciliation after metrics prove stability.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import asdict, dataclass
10
+ from typing import TYPE_CHECKING
11
+
12
+ from brawny.logging import get_logger
13
+ from brawny.metrics import get_metrics
14
+
15
+ if TYPE_CHECKING:
16
+ from brawny.db.base import Database
17
+
18
+ logger = get_logger(__name__)
19
+
20
+
21
+ @dataclass
22
+ class ReconciliationStats:
23
+ """Statistics from a reconciliation run."""
24
+
25
+ orphaned_claims_cleared: int = 0
26
+ orphaned_nonces_released: int = 0
27
+ pending_without_attempts: int = 0
28
+ stale_claims: int = 0
29
+
30
+
31
+ def reconcile_startup(db: Database, chain_id: int) -> ReconciliationStats:
32
+ """Run reconciliation checks at startup.
33
+
34
+ Repairs:
35
+ - Orphaned claims (status != claimed but claim_token set, stale)
36
+ - Orphaned nonces (reserved but intent is terminal and stale)
37
+
38
+ Detects (logs only, no repair):
39
+ - Pending intents without attempts (data integrity issue)
40
+ - Stale claimed intents (worker may have crashed)
41
+
42
+ Args:
43
+ db: Database connection
44
+ chain_id: Chain ID to reconcile
45
+
46
+ Returns:
47
+ Statistics from the reconciliation run
48
+ """
49
+ stats = ReconciliationStats()
50
+
51
+ # Repair: clear orphaned claims (with time guard)
52
+ stats.orphaned_claims_cleared = db.clear_orphaned_claims(
53
+ chain_id, older_than_minutes=2
54
+ )
55
+ if stats.orphaned_claims_cleared > 0:
56
+ logger.warning(
57
+ "reconciliation.orphaned_claims_cleared",
58
+ count=stats.orphaned_claims_cleared,
59
+ chain_id=chain_id,
60
+ )
61
+
62
+ # Repair: release orphaned nonces (with time guard)
63
+ stats.orphaned_nonces_released = db.release_orphaned_nonces(
64
+ chain_id, older_than_minutes=5
65
+ )
66
+ if stats.orphaned_nonces_released > 0:
67
+ logger.warning(
68
+ "reconciliation.orphaned_nonces_released",
69
+ count=stats.orphaned_nonces_released,
70
+ chain_id=chain_id,
71
+ )
72
+
73
+ # Detect: pending without attempts (log only - needs investigation)
74
+ stats.pending_without_attempts = db.count_pending_without_attempts(chain_id)
75
+ if stats.pending_without_attempts > 0:
76
+ logger.error(
77
+ "reconciliation.pending_without_attempts",
78
+ count=stats.pending_without_attempts,
79
+ chain_id=chain_id,
80
+ action="manual_investigation_required",
81
+ )
82
+
83
+ # Detect: stale claims (log only - may self-recover or need intervention)
84
+ stats.stale_claims = db.count_stale_claims(chain_id, older_than_minutes=10)
85
+ if stats.stale_claims > 0:
86
+ logger.warning(
87
+ "reconciliation.stale_claims",
88
+ count=stats.stale_claims,
89
+ chain_id=chain_id,
90
+ )
91
+
92
+ # Emit metrics
93
+ metrics = get_metrics()
94
+ metrics.gauge("brawny_reconciliation_orphaned_claims").set(
95
+ stats.orphaned_claims_cleared, chain_id=chain_id
96
+ )
97
+ metrics.gauge("brawny_reconciliation_orphaned_nonces").set(
98
+ stats.orphaned_nonces_released, chain_id=chain_id
99
+ )
100
+ metrics.gauge("brawny_reconciliation_pending_no_attempts").set(
101
+ stats.pending_without_attempts, chain_id=chain_id
102
+ )
103
+ metrics.gauge("brawny_reconciliation_stale_claims").set(
104
+ stats.stale_claims, chain_id=chain_id
105
+ )
106
+
107
+ logger.info("reconciliation.completed", **asdict(stats), chain_id=chain_id)
108
+ return stats
@@ -0,0 +1,19 @@
1
+ """Block poller, reorg detection, and job scheduler."""
2
+
3
+ from brawny.scheduler.poller import BlockPoller, PollResult
4
+ from brawny.scheduler.reorg import ReorgDetector, ReorgResult
5
+ from brawny.scheduler.runner import BlockResult, JobResult, JobRunner
6
+ from brawny.scheduler.shutdown import ShutdownContext, ShutdownHandler, ShutdownStats
7
+
8
+ __all__ = [
9
+ "BlockPoller",
10
+ "PollResult",
11
+ "ReorgDetector",
12
+ "ReorgResult",
13
+ "JobRunner",
14
+ "JobResult",
15
+ "BlockResult",
16
+ "ShutdownHandler",
17
+ "ShutdownContext",
18
+ "ShutdownStats",
19
+ ]