brawny 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- brawny/__init__.py +106 -0
- brawny/_context.py +232 -0
- brawny/_rpc/__init__.py +38 -0
- brawny/_rpc/broadcast.py +172 -0
- brawny/_rpc/clients.py +98 -0
- brawny/_rpc/context.py +49 -0
- brawny/_rpc/errors.py +252 -0
- brawny/_rpc/gas.py +158 -0
- brawny/_rpc/manager.py +982 -0
- brawny/_rpc/selector.py +156 -0
- brawny/accounts.py +534 -0
- brawny/alerts/__init__.py +132 -0
- brawny/alerts/abi_resolver.py +530 -0
- brawny/alerts/base.py +152 -0
- brawny/alerts/context.py +271 -0
- brawny/alerts/contracts.py +635 -0
- brawny/alerts/encoded_call.py +201 -0
- brawny/alerts/errors.py +267 -0
- brawny/alerts/events.py +680 -0
- brawny/alerts/function_caller.py +364 -0
- brawny/alerts/health.py +185 -0
- brawny/alerts/routing.py +118 -0
- brawny/alerts/send.py +364 -0
- brawny/api.py +660 -0
- brawny/chain.py +93 -0
- brawny/cli/__init__.py +16 -0
- brawny/cli/app.py +17 -0
- brawny/cli/bootstrap.py +37 -0
- brawny/cli/commands/__init__.py +41 -0
- brawny/cli/commands/abi.py +93 -0
- brawny/cli/commands/accounts.py +632 -0
- brawny/cli/commands/console.py +495 -0
- brawny/cli/commands/contract.py +139 -0
- brawny/cli/commands/health.py +112 -0
- brawny/cli/commands/init_project.py +86 -0
- brawny/cli/commands/intents.py +130 -0
- brawny/cli/commands/job_dev.py +254 -0
- brawny/cli/commands/jobs.py +308 -0
- brawny/cli/commands/logs.py +87 -0
- brawny/cli/commands/maintenance.py +182 -0
- brawny/cli/commands/migrate.py +51 -0
- brawny/cli/commands/networks.py +253 -0
- brawny/cli/commands/run.py +249 -0
- brawny/cli/commands/script.py +209 -0
- brawny/cli/commands/signer.py +248 -0
- brawny/cli/helpers.py +265 -0
- brawny/cli_templates.py +1445 -0
- brawny/config/__init__.py +74 -0
- brawny/config/models.py +404 -0
- brawny/config/parser.py +633 -0
- brawny/config/routing.py +55 -0
- brawny/config/validation.py +246 -0
- brawny/daemon/__init__.py +14 -0
- brawny/daemon/context.py +69 -0
- brawny/daemon/core.py +702 -0
- brawny/daemon/loops.py +327 -0
- brawny/db/__init__.py +78 -0
- brawny/db/base.py +986 -0
- brawny/db/base_new.py +165 -0
- brawny/db/circuit_breaker.py +97 -0
- brawny/db/global_cache.py +298 -0
- brawny/db/mappers.py +182 -0
- brawny/db/migrate.py +349 -0
- brawny/db/migrations/001_init.sql +186 -0
- brawny/db/migrations/002_add_included_block.sql +7 -0
- brawny/db/migrations/003_add_broadcast_at.sql +10 -0
- brawny/db/migrations/004_broadcast_binding.sql +20 -0
- brawny/db/migrations/005_add_retry_after.sql +9 -0
- brawny/db/migrations/006_add_retry_count_column.sql +11 -0
- brawny/db/migrations/007_add_gap_tracking.sql +18 -0
- brawny/db/migrations/008_add_transactions.sql +72 -0
- brawny/db/migrations/009_add_intent_metadata.sql +5 -0
- brawny/db/migrations/010_add_nonce_gap_index.sql +9 -0
- brawny/db/migrations/011_add_job_logs.sql +24 -0
- brawny/db/migrations/012_add_claimed_by.sql +5 -0
- brawny/db/ops/__init__.py +29 -0
- brawny/db/ops/attempts.py +108 -0
- brawny/db/ops/blocks.py +83 -0
- brawny/db/ops/cache.py +93 -0
- brawny/db/ops/intents.py +296 -0
- brawny/db/ops/jobs.py +110 -0
- brawny/db/ops/logs.py +97 -0
- brawny/db/ops/nonces.py +322 -0
- brawny/db/postgres.py +2535 -0
- brawny/db/postgres_new.py +196 -0
- brawny/db/queries.py +584 -0
- brawny/db/sqlite.py +2733 -0
- brawny/db/sqlite_new.py +191 -0
- brawny/history.py +126 -0
- brawny/interfaces.py +136 -0
- brawny/invariants.py +155 -0
- brawny/jobs/__init__.py +26 -0
- brawny/jobs/base.py +287 -0
- brawny/jobs/discovery.py +233 -0
- brawny/jobs/job_validation.py +111 -0
- brawny/jobs/kv.py +125 -0
- brawny/jobs/registry.py +283 -0
- brawny/keystore.py +484 -0
- brawny/lifecycle.py +551 -0
- brawny/logging.py +290 -0
- brawny/metrics.py +594 -0
- brawny/model/__init__.py +53 -0
- brawny/model/contexts.py +319 -0
- brawny/model/enums.py +70 -0
- brawny/model/errors.py +194 -0
- brawny/model/events.py +93 -0
- brawny/model/startup.py +20 -0
- brawny/model/types.py +483 -0
- brawny/networks/__init__.py +96 -0
- brawny/networks/config.py +269 -0
- brawny/networks/manager.py +423 -0
- brawny/obs/__init__.py +67 -0
- brawny/obs/emit.py +158 -0
- brawny/obs/health.py +175 -0
- brawny/obs/heartbeat.py +133 -0
- brawny/reconciliation.py +108 -0
- brawny/scheduler/__init__.py +19 -0
- brawny/scheduler/poller.py +472 -0
- brawny/scheduler/reorg.py +632 -0
- brawny/scheduler/runner.py +708 -0
- brawny/scheduler/shutdown.py +371 -0
- brawny/script_tx.py +297 -0
- brawny/scripting.py +251 -0
- brawny/startup.py +76 -0
- brawny/telegram.py +393 -0
- brawny/testing.py +108 -0
- brawny/tx/__init__.py +41 -0
- brawny/tx/executor.py +1071 -0
- brawny/tx/fees.py +50 -0
- brawny/tx/intent.py +423 -0
- brawny/tx/monitor.py +628 -0
- brawny/tx/nonce.py +498 -0
- brawny/tx/replacement.py +456 -0
- brawny/tx/utils.py +26 -0
- brawny/utils.py +205 -0
- brawny/validation.py +69 -0
- brawny-0.1.13.dist-info/METADATA +156 -0
- brawny-0.1.13.dist-info/RECORD +141 -0
- brawny-0.1.13.dist-info/WHEEL +5 -0
- brawny-0.1.13.dist-info/entry_points.txt +2 -0
- brawny-0.1.13.dist-info/top_level.txt +1 -0
brawny/daemon/core.py
ADDED
|
@@ -0,0 +1,702 @@
|
|
|
1
|
+
"""BrawnyDaemon - Main daemon orchestrator.
|
|
2
|
+
|
|
3
|
+
Provides the core daemon class that manages all components and threads.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import itertools
|
|
10
|
+
import os
|
|
11
|
+
import socket
|
|
12
|
+
import threading
|
|
13
|
+
import time
|
|
14
|
+
from threading import Event, Lock, Thread
|
|
15
|
+
from typing import TYPE_CHECKING, Callable
|
|
16
|
+
|
|
17
|
+
from brawny.alerts.contracts import ContractSystem
|
|
18
|
+
from brawny.alerts.health import health_alert
|
|
19
|
+
from brawny.alerts.send import create_send_health
|
|
20
|
+
from brawny.daemon.context import DaemonContext, DaemonState, RuntimeOverrides
|
|
21
|
+
from brawny.daemon.loops import run_monitor, run_worker
|
|
22
|
+
from brawny.db import create_database
|
|
23
|
+
from brawny.db.migrate import Migrator, verify_critical_schema
|
|
24
|
+
from brawny.jobs.discovery import (
|
|
25
|
+
JobDiscoveryFailed,
|
|
26
|
+
JobLoadError,
|
|
27
|
+
auto_discover_jobs,
|
|
28
|
+
discover_jobs,
|
|
29
|
+
)
|
|
30
|
+
from brawny.jobs.job_validation import validate_all_jobs
|
|
31
|
+
from brawny.jobs.registry import get_registry
|
|
32
|
+
from brawny.keystore import create_keystore
|
|
33
|
+
from brawny.lifecycle import LifecycleDispatcher
|
|
34
|
+
from brawny.logging import get_logger
|
|
35
|
+
from brawny.metrics import ACTIVE_WORKERS, get_metrics
|
|
36
|
+
from brawny.model.enums import IntentStatus
|
|
37
|
+
from brawny.model.startup import StartupMessage
|
|
38
|
+
from brawny.model.types import BlockInfo
|
|
39
|
+
from brawny._rpc import RPCManager
|
|
40
|
+
from brawny.scheduler.poller import BlockPoller
|
|
41
|
+
from brawny.scheduler.reorg import ReorgDetector
|
|
42
|
+
from brawny.scheduler.runner import JobRunner
|
|
43
|
+
from brawny.startup import reconcile_pending_intents
|
|
44
|
+
from brawny.tx.executor import TxExecutor
|
|
45
|
+
from brawny.tx.intent import transition_intent
|
|
46
|
+
from brawny.tx.monitor import TxMonitor
|
|
47
|
+
from brawny.tx.replacement import TxReplacer
|
|
48
|
+
from brawny.validation import validate_job_routing
|
|
49
|
+
from brawny.telegram import TelegramBot
|
|
50
|
+
|
|
51
|
+
if TYPE_CHECKING:
|
|
52
|
+
from brawny.config import Config
|
|
53
|
+
from brawny.config.models import TelegramConfig
|
|
54
|
+
from brawny.db.base import Database
|
|
55
|
+
from brawny.jobs.base import Job
|
|
56
|
+
from brawny.keystore import Keystore
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class BrawnyDaemon:
|
|
60
|
+
"""Main daemon orchestrator.
|
|
61
|
+
|
|
62
|
+
Manages all components, threads, and lifecycle for the brawny daemon.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def __init__(
|
|
66
|
+
self,
|
|
67
|
+
config: "Config",
|
|
68
|
+
overrides: RuntimeOverrides | None = None,
|
|
69
|
+
extra_modules: list[str] | None = None,
|
|
70
|
+
) -> None:
|
|
71
|
+
"""Initialize the daemon.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
config: Application configuration
|
|
75
|
+
overrides: Runtime overrides for dry_run, once, worker_count, etc.
|
|
76
|
+
extra_modules: Additional job modules to discover
|
|
77
|
+
"""
|
|
78
|
+
self.config = config
|
|
79
|
+
self.overrides = overrides or RuntimeOverrides()
|
|
80
|
+
self._extra_modules = extra_modules or []
|
|
81
|
+
self._log = get_logger(__name__)
|
|
82
|
+
|
|
83
|
+
# Components (initialized in start())
|
|
84
|
+
self._db: Database | None = None
|
|
85
|
+
self._rpc: RPCManager | None = None
|
|
86
|
+
self._keystore: Keystore | None = None
|
|
87
|
+
self._contract_system: ContractSystem | None = None
|
|
88
|
+
self._lifecycle: LifecycleDispatcher | None = None
|
|
89
|
+
self._executor: TxExecutor | None = None
|
|
90
|
+
self._monitor: TxMonitor | None = None
|
|
91
|
+
self._replacer: TxReplacer | None = None
|
|
92
|
+
self._job_runner: JobRunner | None = None
|
|
93
|
+
self._reorg_detector: ReorgDetector | None = None
|
|
94
|
+
self._poller: BlockPoller | None = None
|
|
95
|
+
|
|
96
|
+
# Jobs
|
|
97
|
+
self._jobs: dict[str, Job] = {}
|
|
98
|
+
|
|
99
|
+
# Telegram (cached instance)
|
|
100
|
+
self._telegram_bot: TelegramBot | None = None
|
|
101
|
+
|
|
102
|
+
# Health alerting (initialized in initialize())
|
|
103
|
+
self._health_send_fn: Callable[..., None] | None = None
|
|
104
|
+
self._health_chat_id: str | None = None
|
|
105
|
+
self._health_cooldown: int = 1800
|
|
106
|
+
|
|
107
|
+
# Threading
|
|
108
|
+
self._stop = Event()
|
|
109
|
+
self._wakeup_hint = Event()
|
|
110
|
+
self._worker_threads: list[Thread] = []
|
|
111
|
+
self._monitor_thread: Thread | None = None
|
|
112
|
+
self._monitor_stop = Event()
|
|
113
|
+
|
|
114
|
+
# Inflight tracking
|
|
115
|
+
self._inflight_lock = Lock()
|
|
116
|
+
self._inflight_count = 0
|
|
117
|
+
self._inflight_zero = Event()
|
|
118
|
+
self._inflight_zero.set()
|
|
119
|
+
|
|
120
|
+
# Claim token generation
|
|
121
|
+
self._claim_counter = itertools.count(1)
|
|
122
|
+
self._hostname = socket.gethostname()
|
|
123
|
+
self._pid = os.getpid()
|
|
124
|
+
|
|
125
|
+
# Async event loop (owned by daemon, used by runner for async job.check())
|
|
126
|
+
self._loop: asyncio.AbstractEventLoop = asyncio.new_event_loop()
|
|
127
|
+
asyncio.set_event_loop(self._loop) # Make it the current loop for this thread
|
|
128
|
+
self._loop_thread_id: int = threading.get_ident() # Assert ownership
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def db(self) -> "Database":
|
|
132
|
+
"""Get database connection."""
|
|
133
|
+
assert self._db is not None, "Daemon not started"
|
|
134
|
+
return self._db
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def rpc(self) -> RPCManager:
|
|
138
|
+
"""Get RPC manager."""
|
|
139
|
+
assert self._rpc is not None, "Daemon not started"
|
|
140
|
+
return self._rpc
|
|
141
|
+
|
|
142
|
+
@property
|
|
143
|
+
def jobs(self) -> dict[str, "Job"]:
|
|
144
|
+
"""Get discovered jobs."""
|
|
145
|
+
return self._jobs
|
|
146
|
+
|
|
147
|
+
@property
|
|
148
|
+
def keystore(self) -> "Keystore | None":
|
|
149
|
+
"""Get keystore (None in dry_run mode)."""
|
|
150
|
+
return self._keystore
|
|
151
|
+
|
|
152
|
+
def _check_schema(self) -> None:
|
|
153
|
+
"""Verify critical DB schema columns exist. Hard-fail if not."""
|
|
154
|
+
assert self._db is not None
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
verify_critical_schema(self._db)
|
|
158
|
+
except Exception as exc:
|
|
159
|
+
error_msg = str(exc)
|
|
160
|
+
self._log.critical(
|
|
161
|
+
"schema.validation_failed",
|
|
162
|
+
error=error_msg,
|
|
163
|
+
table="tx_intents",
|
|
164
|
+
)
|
|
165
|
+
health_alert(
|
|
166
|
+
component="brawny.startup.schema",
|
|
167
|
+
chain_id=self.config.chain_id,
|
|
168
|
+
error=error_msg,
|
|
169
|
+
level="critical",
|
|
170
|
+
action="Run: brawny migrate",
|
|
171
|
+
db_dialect=self._db.dialect,
|
|
172
|
+
force_send=True,
|
|
173
|
+
send_fn=self._health_send_fn,
|
|
174
|
+
health_chat_id=self._health_chat_id,
|
|
175
|
+
)
|
|
176
|
+
raise SystemExit(f"DB schema mismatch: {error_msg}. Run: brawny migrate") from exc
|
|
177
|
+
|
|
178
|
+
def _make_claim_token(self, worker_id: int) -> str:
|
|
179
|
+
"""Generate a unique claim token for a worker."""
|
|
180
|
+
return f"{self._hostname}:{self._pid}:{worker_id}:{next(self._claim_counter)}"
|
|
181
|
+
|
|
182
|
+
def _make_claimed_by(self, worker_id: int) -> str:
|
|
183
|
+
"""Generate a stable claimed_by identifier for a worker."""
|
|
184
|
+
return f"{self._hostname}:{self._pid}:{worker_id}"
|
|
185
|
+
|
|
186
|
+
def _inflight_start(self) -> None:
|
|
187
|
+
"""Mark an inflight operation starting."""
|
|
188
|
+
with self._inflight_lock:
|
|
189
|
+
self._inflight_count += 1
|
|
190
|
+
self._inflight_zero.clear()
|
|
191
|
+
|
|
192
|
+
def _inflight_done(self) -> None:
|
|
193
|
+
"""Mark an inflight operation complete."""
|
|
194
|
+
with self._inflight_lock:
|
|
195
|
+
self._inflight_count = max(0, self._inflight_count - 1)
|
|
196
|
+
if self._inflight_count == 0:
|
|
197
|
+
self._inflight_zero.set()
|
|
198
|
+
|
|
199
|
+
def _on_intent_created(self, intent_id: str) -> None:
|
|
200
|
+
"""Callback when intent is created."""
|
|
201
|
+
self._wakeup_hint.set()
|
|
202
|
+
|
|
203
|
+
def _process_block(self, block: BlockInfo) -> None:
|
|
204
|
+
"""Process a single block."""
|
|
205
|
+
assert self._job_runner is not None
|
|
206
|
+
|
|
207
|
+
self._log.info(
|
|
208
|
+
"block.ingest.start",
|
|
209
|
+
block_number=block.block_number,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
block_result = self._job_runner.process_block(block)
|
|
213
|
+
|
|
214
|
+
self._log.info(
|
|
215
|
+
"block.ingest.done",
|
|
216
|
+
block_number=block.block_number,
|
|
217
|
+
jobs_checked=block_result.jobs_checked,
|
|
218
|
+
jobs_triggered=block_result.jobs_triggered,
|
|
219
|
+
intents_created=block_result.intents_created,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
def _discover_jobs(self) -> list[JobLoadError]:
|
|
223
|
+
"""Discover and register jobs based on config.
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
List of JobLoadError for any modules that failed to load.
|
|
227
|
+
"""
|
|
228
|
+
registry = get_registry()
|
|
229
|
+
registry.clear() # Start fresh to prevent partial state leakage
|
|
230
|
+
|
|
231
|
+
if self._extra_modules:
|
|
232
|
+
discovered, errors = discover_jobs(self._extra_modules)
|
|
233
|
+
else:
|
|
234
|
+
discovered, errors = auto_discover_jobs()
|
|
235
|
+
|
|
236
|
+
# Log discovery summary
|
|
237
|
+
self._log.info(
|
|
238
|
+
"job.discovery.complete",
|
|
239
|
+
jobs_loaded=len(discovered),
|
|
240
|
+
jobs_failed=len(errors),
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
if errors:
|
|
244
|
+
registry.clear() # Don't leave partial state
|
|
245
|
+
return errors
|
|
246
|
+
|
|
247
|
+
self._jobs = {job.job_id: job for job in registry.get_all()}
|
|
248
|
+
return []
|
|
249
|
+
|
|
250
|
+
def _validate_jobs(self) -> tuple[dict[str, list[str]], list[str]]:
|
|
251
|
+
"""Validate discovered jobs.
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
Tuple of (validation_errors, routing_errors)
|
|
255
|
+
"""
|
|
256
|
+
validation_errors: dict[str, list[str]] = {}
|
|
257
|
+
routing_errors: list[str] = []
|
|
258
|
+
|
|
259
|
+
if self._jobs:
|
|
260
|
+
keystore = self._keystore if not self.overrides.dry_run else None
|
|
261
|
+
validation_errors = validate_all_jobs(self._jobs, keystore=keystore)
|
|
262
|
+
routing_errors = validate_job_routing(self.config, self._jobs)
|
|
263
|
+
|
|
264
|
+
return validation_errors, routing_errors
|
|
265
|
+
|
|
266
|
+
def _validate_telegram_config(self) -> list[str]:
|
|
267
|
+
"""Validate telegram configuration and routing.
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
List of validation errors (empty if valid)
|
|
271
|
+
"""
|
|
272
|
+
from brawny.alerts.routing import validate_targets
|
|
273
|
+
from brawny.model.errors import ConfigError
|
|
274
|
+
|
|
275
|
+
tg = self.config.telegram
|
|
276
|
+
errors: list[str] = []
|
|
277
|
+
|
|
278
|
+
# Check if any routing is configured (use truthiness, not is not None)
|
|
279
|
+
has_routing = bool(tg.default) or any(getattr(j, "_alert_to", None) for j in self._jobs.values())
|
|
280
|
+
|
|
281
|
+
# Validate all name references
|
|
282
|
+
valid_names = set(tg.chats.keys())
|
|
283
|
+
|
|
284
|
+
# Validate default targets
|
|
285
|
+
invalid = validate_targets(tg.default, valid_names)
|
|
286
|
+
for name in invalid:
|
|
287
|
+
errors.append(f"telegram.default references unknown chat '{name}'")
|
|
288
|
+
|
|
289
|
+
# Validate each job's alert_to target
|
|
290
|
+
for job_id, job in self._jobs.items():
|
|
291
|
+
target = getattr(job, "_alert_to", None)
|
|
292
|
+
if target is None:
|
|
293
|
+
continue
|
|
294
|
+
|
|
295
|
+
invalid = validate_targets(target, valid_names)
|
|
296
|
+
for name in invalid:
|
|
297
|
+
errors.append(
|
|
298
|
+
f"Job '{job_id}' references unknown telegram chat '{name}'. "
|
|
299
|
+
f"Valid names: {sorted(valid_names)}"
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
if errors:
|
|
303
|
+
for err in errors:
|
|
304
|
+
self._log.error("telegram.routing.invalid", error=err)
|
|
305
|
+
return errors
|
|
306
|
+
|
|
307
|
+
# Warn about configuration issues (non-fatal)
|
|
308
|
+
if has_routing and not tg.bot_token:
|
|
309
|
+
self._log.warning(
|
|
310
|
+
"telegram.bot_token_missing",
|
|
311
|
+
message="Jobs use alert_to= or telegram.default is set, but bot_token is missing",
|
|
312
|
+
)
|
|
313
|
+
elif tg.bot_token and not tg.default and not any(getattr(j, "_alert_to", None) for j in self._jobs.values()):
|
|
314
|
+
self._log.warning(
|
|
315
|
+
"telegram.no_default_targets",
|
|
316
|
+
message="bot_token set but no default targets and no jobs use alert_to=",
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
return []
|
|
320
|
+
|
|
321
|
+
def _reconcile_startup(self) -> None:
|
|
322
|
+
"""Reconcile state on startup."""
|
|
323
|
+
assert self._db is not None
|
|
324
|
+
assert self._monitor is not None or self.overrides.dry_run
|
|
325
|
+
|
|
326
|
+
# Reconcile nonces
|
|
327
|
+
if self._executor and self._executor.nonce_manager:
|
|
328
|
+
self._log.info("startup.reconcile_nonces")
|
|
329
|
+
self._executor.nonce_manager.reconcile()
|
|
330
|
+
|
|
331
|
+
# Recover SENDING intents
|
|
332
|
+
stuck_sending = self._db.get_intents_by_status(
|
|
333
|
+
IntentStatus.SENDING.value,
|
|
334
|
+
chain_id=self.config.chain_id,
|
|
335
|
+
)
|
|
336
|
+
for intent in stuck_sending:
|
|
337
|
+
attempt = self._db.get_latest_attempt_for_intent(intent.intent_id)
|
|
338
|
+
if attempt and attempt.tx_hash:
|
|
339
|
+
transition_intent(
|
|
340
|
+
self._db,
|
|
341
|
+
intent.intent_id,
|
|
342
|
+
IntentStatus.PENDING,
|
|
343
|
+
"startup_recover_sending",
|
|
344
|
+
chain_id=self.config.chain_id,
|
|
345
|
+
)
|
|
346
|
+
else:
|
|
347
|
+
# No tx_hash means intent never got broadcast - reset to CREATED
|
|
348
|
+
if attempt and self._executor and self._executor.nonce_manager:
|
|
349
|
+
from brawny.model.enums import AttemptStatus
|
|
350
|
+
self._db.update_attempt_status(
|
|
351
|
+
attempt.attempt_id,
|
|
352
|
+
AttemptStatus.FAILED.value,
|
|
353
|
+
error_code="startup_stuck",
|
|
354
|
+
error_detail="Stuck in SENDING without broadcast",
|
|
355
|
+
)
|
|
356
|
+
self._executor.nonce_manager.release(intent.signer_address, attempt.nonce)
|
|
357
|
+
transition_intent(
|
|
358
|
+
self._db,
|
|
359
|
+
intent.intent_id,
|
|
360
|
+
IntentStatus.CREATED,
|
|
361
|
+
"startup_recover_sending",
|
|
362
|
+
chain_id=self.config.chain_id,
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
if stuck_sending:
|
|
366
|
+
self._log.warning(
|
|
367
|
+
"startup.recover_sending_intents",
|
|
368
|
+
count=len(stuck_sending),
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
# Reconcile pending intents
|
|
372
|
+
if self._monitor:
|
|
373
|
+
reconcile_pending_intents(
|
|
374
|
+
self._db,
|
|
375
|
+
self._monitor,
|
|
376
|
+
self.config.chain_id,
|
|
377
|
+
self._log,
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
def _start_workers(self) -> None:
|
|
381
|
+
"""Start worker threads."""
|
|
382
|
+
if self.overrides.dry_run:
|
|
383
|
+
return
|
|
384
|
+
|
|
385
|
+
worker_count = (
|
|
386
|
+
self.overrides.worker_count
|
|
387
|
+
if self.overrides.worker_count is not None
|
|
388
|
+
else self.config.worker_count
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
ctx = DaemonContext(
|
|
392
|
+
config=self.config,
|
|
393
|
+
log=self._log,
|
|
394
|
+
db=self._db,
|
|
395
|
+
rpc=self._rpc,
|
|
396
|
+
executor=self._executor,
|
|
397
|
+
monitor=self._monitor,
|
|
398
|
+
replacer=self._replacer,
|
|
399
|
+
nonce_manager=self._executor.nonce_manager if self._executor else None,
|
|
400
|
+
chain_id=self.config.chain_id,
|
|
401
|
+
health_send_fn=self._health_send_fn,
|
|
402
|
+
health_chat_id=self._health_chat_id,
|
|
403
|
+
health_cooldown=self._health_cooldown,
|
|
404
|
+
)
|
|
405
|
+
state = DaemonState(
|
|
406
|
+
make_claim_token=self._make_claim_token,
|
|
407
|
+
make_claimed_by=self._make_claimed_by,
|
|
408
|
+
inflight_inc=self._inflight_start,
|
|
409
|
+
inflight_dec=self._inflight_done,
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
for i in range(worker_count):
|
|
413
|
+
t = Thread(
|
|
414
|
+
target=run_worker,
|
|
415
|
+
args=(i, self._stop, self._wakeup_hint, ctx, state, self.overrides.dry_run),
|
|
416
|
+
daemon=True,
|
|
417
|
+
)
|
|
418
|
+
t.start()
|
|
419
|
+
self._worker_threads.append(t)
|
|
420
|
+
|
|
421
|
+
# Start monitor thread
|
|
422
|
+
self._monitor_thread = Thread(
|
|
423
|
+
target=run_monitor,
|
|
424
|
+
args=(self._monitor_stop, ctx, self._worker_threads),
|
|
425
|
+
daemon=True,
|
|
426
|
+
)
|
|
427
|
+
self._monitor_thread.start()
|
|
428
|
+
|
|
429
|
+
# Initial gauge
|
|
430
|
+
metrics = get_metrics()
|
|
431
|
+
metrics.gauge(ACTIVE_WORKERS).set(
|
|
432
|
+
len(self._worker_threads),
|
|
433
|
+
chain_id=self.config.chain_id,
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
def _shutdown(self) -> None:
|
|
437
|
+
"""Shutdown the daemon gracefully."""
|
|
438
|
+
self._log.info("daemon.shutdown.start")
|
|
439
|
+
|
|
440
|
+
# Signal stop
|
|
441
|
+
self._stop.set()
|
|
442
|
+
self._wakeup_hint.set()
|
|
443
|
+
self._monitor_stop.set()
|
|
444
|
+
|
|
445
|
+
# Wait for inflight
|
|
446
|
+
if not self._inflight_zero.is_set():
|
|
447
|
+
self._log.info(
|
|
448
|
+
"shutdown.await_inflight",
|
|
449
|
+
inflight=self._inflight_count,
|
|
450
|
+
grace_seconds=self.config.shutdown_grace_seconds,
|
|
451
|
+
)
|
|
452
|
+
start_wait = time.time()
|
|
453
|
+
self._inflight_zero.wait(timeout=self.config.shutdown_grace_seconds)
|
|
454
|
+
wait_elapsed = time.time() - start_wait
|
|
455
|
+
remaining = max(0.0, self.config.shutdown_grace_seconds - wait_elapsed)
|
|
456
|
+
|
|
457
|
+
# Join workers
|
|
458
|
+
for t in self._worker_threads:
|
|
459
|
+
t.join(timeout=remaining)
|
|
460
|
+
|
|
461
|
+
# Join monitor
|
|
462
|
+
if self._monitor_thread:
|
|
463
|
+
self._monitor_thread.join(timeout=5.0)
|
|
464
|
+
|
|
465
|
+
# Log any threads still alive
|
|
466
|
+
alive = [t for t in self._worker_threads if t.is_alive()]
|
|
467
|
+
if alive:
|
|
468
|
+
self._log.warning("shutdown.threads_still_alive", count=len(alive))
|
|
469
|
+
|
|
470
|
+
# Close event loop
|
|
471
|
+
if self._loop and not self._loop.is_closed():
|
|
472
|
+
self._loop.close()
|
|
473
|
+
|
|
474
|
+
self._log.info("daemon.shutdown.complete")
|
|
475
|
+
|
|
476
|
+
def initialize(
|
|
477
|
+
self,
|
|
478
|
+
) -> tuple[dict[str, list[str]], list[str], list["StartupMessage"]]:
|
|
479
|
+
"""Initialize all components.
|
|
480
|
+
|
|
481
|
+
Returns:
|
|
482
|
+
Tuple of (validation_errors, routing_errors, startup_messages) for jobs
|
|
483
|
+
"""
|
|
484
|
+
startup_messages: list[StartupMessage] = []
|
|
485
|
+
|
|
486
|
+
# Database
|
|
487
|
+
self._db = create_database(
|
|
488
|
+
self.config.database_url,
|
|
489
|
+
pool_size=self.config.database_pool_size,
|
|
490
|
+
pool_max_overflow=self.config.database_pool_max_overflow,
|
|
491
|
+
pool_timeout=self.config.database_pool_timeout_seconds,
|
|
492
|
+
circuit_breaker_failures=self.config.db_circuit_breaker_failures,
|
|
493
|
+
circuit_breaker_seconds=self.config.db_circuit_breaker_seconds,
|
|
494
|
+
)
|
|
495
|
+
self._db.connect()
|
|
496
|
+
|
|
497
|
+
# Migrations
|
|
498
|
+
migrator = Migrator(self._db)
|
|
499
|
+
pending = migrator.pending()
|
|
500
|
+
if pending:
|
|
501
|
+
self._log.info("migrations.applying", count=len(pending))
|
|
502
|
+
migrator.migrate()
|
|
503
|
+
|
|
504
|
+
# RPC
|
|
505
|
+
self._rpc = RPCManager.from_config(self.config)
|
|
506
|
+
|
|
507
|
+
self._log.info(
|
|
508
|
+
"startup.finality_policy",
|
|
509
|
+
chain_id=self.config.chain_id,
|
|
510
|
+
finality_confirmations=self.config.finality_confirmations,
|
|
511
|
+
read_only=True,
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
# Keystore (only in live mode)
|
|
515
|
+
if not self.overrides.dry_run:
|
|
516
|
+
self._keystore = create_keystore(
|
|
517
|
+
self.config.keystore_type,
|
|
518
|
+
keystore_path=self.config.keystore_path,
|
|
519
|
+
allowed_signers=[],
|
|
520
|
+
)
|
|
521
|
+
# Make keystore available for signer_address() helper
|
|
522
|
+
from brawny.api import _set_keystore
|
|
523
|
+
_set_keystore(self._keystore)
|
|
524
|
+
|
|
525
|
+
# Collect keystore warnings
|
|
526
|
+
startup_messages.extend(self._keystore.get_warnings())
|
|
527
|
+
|
|
528
|
+
# Discover jobs
|
|
529
|
+
load_errors = self._discover_jobs()
|
|
530
|
+
if load_errors:
|
|
531
|
+
for err in load_errors:
|
|
532
|
+
self._log.error(
|
|
533
|
+
"job.module_load_failed",
|
|
534
|
+
path=err.path,
|
|
535
|
+
message=err.message,
|
|
536
|
+
traceback=err.traceback,
|
|
537
|
+
)
|
|
538
|
+
raise JobDiscoveryFailed(load_errors)
|
|
539
|
+
|
|
540
|
+
# Sanity check: don't run with zero jobs
|
|
541
|
+
if not self._jobs:
|
|
542
|
+
raise RuntimeError("No jobs discovered - check your jobs directory")
|
|
543
|
+
|
|
544
|
+
validation_errors, routing_errors = self._validate_jobs()
|
|
545
|
+
|
|
546
|
+
# Validate telegram routing (fails hard on unknown names)
|
|
547
|
+
telegram_errors = self._validate_telegram_config()
|
|
548
|
+
if telegram_errors:
|
|
549
|
+
from brawny.model.errors import ConfigError
|
|
550
|
+
raise ConfigError(
|
|
551
|
+
f"Invalid telegram routing: {len(telegram_errors)} error(s)\n"
|
|
552
|
+
+ "\n".join(f" - {e}" for e in telegram_errors)
|
|
553
|
+
)
|
|
554
|
+
|
|
555
|
+
# Cache TelegramBot instance (if configured)
|
|
556
|
+
if self.config.telegram.bot_token:
|
|
557
|
+
self._telegram_bot = TelegramBot(token=self.config.telegram.bot_token)
|
|
558
|
+
|
|
559
|
+
# Initialize health alerting
|
|
560
|
+
tg = self.config.telegram
|
|
561
|
+
if tg and tg.health_chat:
|
|
562
|
+
resolved = tg.chats.get(tg.health_chat)
|
|
563
|
+
if resolved:
|
|
564
|
+
self._health_chat_id = resolved
|
|
565
|
+
if self._telegram_bot:
|
|
566
|
+
self._health_send_fn = create_send_health(self._telegram_bot)
|
|
567
|
+
else:
|
|
568
|
+
# health_chat configured but not found in chats - warn loudly
|
|
569
|
+
self._log.warning(
|
|
570
|
+
"health_chat_missing",
|
|
571
|
+
health_chat=tg.health_chat,
|
|
572
|
+
available_chats=list(tg.chats.keys()),
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
if tg:
|
|
576
|
+
self._health_cooldown = tg.health_cooldown_seconds
|
|
577
|
+
|
|
578
|
+
# Validate schema (after health is set up so we can alert on failure)
|
|
579
|
+
self._check_schema()
|
|
580
|
+
|
|
581
|
+
# Contract system
|
|
582
|
+
self._contract_system = ContractSystem(self._rpc, self.config)
|
|
583
|
+
|
|
584
|
+
# Lifecycle
|
|
585
|
+
self._lifecycle = LifecycleDispatcher(
|
|
586
|
+
self._db,
|
|
587
|
+
self._rpc,
|
|
588
|
+
self.config,
|
|
589
|
+
self._jobs,
|
|
590
|
+
contract_system=self._contract_system,
|
|
591
|
+
telegram_bot=self._telegram_bot,
|
|
592
|
+
)
|
|
593
|
+
|
|
594
|
+
# TX execution components (only in live mode)
|
|
595
|
+
if self._keystore:
|
|
596
|
+
self._executor = TxExecutor(
|
|
597
|
+
self._db, self._rpc, self._keystore, self.config,
|
|
598
|
+
lifecycle=self._lifecycle,
|
|
599
|
+
jobs=self._jobs,
|
|
600
|
+
)
|
|
601
|
+
self._monitor = TxMonitor(
|
|
602
|
+
self._db, self._rpc, self._executor.nonce_manager, self.config,
|
|
603
|
+
lifecycle=self._lifecycle
|
|
604
|
+
)
|
|
605
|
+
self._replacer = TxReplacer(
|
|
606
|
+
self._db, self._rpc, self._keystore, self._executor.nonce_manager, self.config,
|
|
607
|
+
lifecycle=self._lifecycle
|
|
608
|
+
)
|
|
609
|
+
|
|
610
|
+
# Job runner
|
|
611
|
+
self._job_runner = JobRunner(
|
|
612
|
+
self._db,
|
|
613
|
+
self._rpc,
|
|
614
|
+
self.config,
|
|
615
|
+
self._jobs,
|
|
616
|
+
lifecycle=self._lifecycle,
|
|
617
|
+
contract_system=self._contract_system,
|
|
618
|
+
loop=self._loop,
|
|
619
|
+
loop_thread_id=self._loop_thread_id,
|
|
620
|
+
)
|
|
621
|
+
self._job_runner._on_intent_created = self._on_intent_created
|
|
622
|
+
|
|
623
|
+
# Reorg detector
|
|
624
|
+
self._reorg_detector = ReorgDetector(
|
|
625
|
+
db=self._db,
|
|
626
|
+
rpc=self._rpc,
|
|
627
|
+
chain_id=self.config.chain_id,
|
|
628
|
+
reorg_depth=self.config.reorg_depth,
|
|
629
|
+
block_hash_history_size=self.config.block_hash_history_size,
|
|
630
|
+
finality_confirmations=self.config.finality_confirmations,
|
|
631
|
+
lifecycle=self._lifecycle,
|
|
632
|
+
health_send_fn=self._health_send_fn,
|
|
633
|
+
health_chat_id=self._health_chat_id,
|
|
634
|
+
health_cooldown=self._health_cooldown,
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
# Block poller
|
|
638
|
+
self._poller = BlockPoller(
|
|
639
|
+
self._db, self._rpc, self.config, self._process_block,
|
|
640
|
+
reorg_detector=self._reorg_detector,
|
|
641
|
+
health_send_fn=self._health_send_fn,
|
|
642
|
+
health_chat_id=self._health_chat_id,
|
|
643
|
+
health_cooldown=self._health_cooldown,
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
# Register jobs in database
|
|
647
|
+
for job_id, job in self._jobs.items():
|
|
648
|
+
self._db.upsert_job(job_id, job.name, job.check_interval_blocks)
|
|
649
|
+
|
|
650
|
+
return validation_errors, routing_errors, startup_messages
|
|
651
|
+
|
|
652
|
+
def run(self, blocking: bool = True) -> None:
|
|
653
|
+
"""Run the daemon.
|
|
654
|
+
|
|
655
|
+
Args:
|
|
656
|
+
blocking: If True, block until shutdown. If False, return immediately.
|
|
657
|
+
"""
|
|
658
|
+
assert self._poller is not None, "Daemon not initialized"
|
|
659
|
+
|
|
660
|
+
# Startup reconciliation
|
|
661
|
+
self._reconcile_startup()
|
|
662
|
+
|
|
663
|
+
# Warm gas cache before workers start (eliminates cold-start race)
|
|
664
|
+
try:
|
|
665
|
+
self._loop.run_until_complete(
|
|
666
|
+
asyncio.wait_for(self._rpc.gas_quote(), timeout=5.0)
|
|
667
|
+
)
|
|
668
|
+
self._log.debug("startup.gas_cache_warmed")
|
|
669
|
+
except Exception as e:
|
|
670
|
+
self._log.warning("startup.gas_cache_warm_failed", error=str(e))
|
|
671
|
+
|
|
672
|
+
# Start workers
|
|
673
|
+
self._start_workers()
|
|
674
|
+
|
|
675
|
+
try:
|
|
676
|
+
if self.overrides.once:
|
|
677
|
+
# Single iteration mode
|
|
678
|
+
self._poller._poll_once()
|
|
679
|
+
else:
|
|
680
|
+
# Normal polling mode
|
|
681
|
+
try:
|
|
682
|
+
self._poller.start(blocking=blocking)
|
|
683
|
+
except KeyboardInterrupt:
|
|
684
|
+
self._log.info("daemon.keyboard_interrupt")
|
|
685
|
+
finally:
|
|
686
|
+
self._shutdown()
|
|
687
|
+
|
|
688
|
+
def stop(self, timeout: float = 5.0) -> None:
|
|
689
|
+
"""Stop the daemon.
|
|
690
|
+
|
|
691
|
+
Signals all components to stop. Called from shutdown handler.
|
|
692
|
+
|
|
693
|
+
Args:
|
|
694
|
+
timeout: Timeout for stopping the poller
|
|
695
|
+
"""
|
|
696
|
+
# Signal workers and monitor to stop
|
|
697
|
+
self._stop.set()
|
|
698
|
+
self._wakeup_hint.set()
|
|
699
|
+
self._monitor_stop.set()
|
|
700
|
+
|
|
701
|
+
if self._poller:
|
|
702
|
+
self._poller.stop(timeout=timeout)
|