brawny 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. brawny/__init__.py +106 -0
  2. brawny/_context.py +232 -0
  3. brawny/_rpc/__init__.py +38 -0
  4. brawny/_rpc/broadcast.py +172 -0
  5. brawny/_rpc/clients.py +98 -0
  6. brawny/_rpc/context.py +49 -0
  7. brawny/_rpc/errors.py +252 -0
  8. brawny/_rpc/gas.py +158 -0
  9. brawny/_rpc/manager.py +982 -0
  10. brawny/_rpc/selector.py +156 -0
  11. brawny/accounts.py +534 -0
  12. brawny/alerts/__init__.py +132 -0
  13. brawny/alerts/abi_resolver.py +530 -0
  14. brawny/alerts/base.py +152 -0
  15. brawny/alerts/context.py +271 -0
  16. brawny/alerts/contracts.py +635 -0
  17. brawny/alerts/encoded_call.py +201 -0
  18. brawny/alerts/errors.py +267 -0
  19. brawny/alerts/events.py +680 -0
  20. brawny/alerts/function_caller.py +364 -0
  21. brawny/alerts/health.py +185 -0
  22. brawny/alerts/routing.py +118 -0
  23. brawny/alerts/send.py +364 -0
  24. brawny/api.py +660 -0
  25. brawny/chain.py +93 -0
  26. brawny/cli/__init__.py +16 -0
  27. brawny/cli/app.py +17 -0
  28. brawny/cli/bootstrap.py +37 -0
  29. brawny/cli/commands/__init__.py +41 -0
  30. brawny/cli/commands/abi.py +93 -0
  31. brawny/cli/commands/accounts.py +632 -0
  32. brawny/cli/commands/console.py +495 -0
  33. brawny/cli/commands/contract.py +139 -0
  34. brawny/cli/commands/health.py +112 -0
  35. brawny/cli/commands/init_project.py +86 -0
  36. brawny/cli/commands/intents.py +130 -0
  37. brawny/cli/commands/job_dev.py +254 -0
  38. brawny/cli/commands/jobs.py +308 -0
  39. brawny/cli/commands/logs.py +87 -0
  40. brawny/cli/commands/maintenance.py +182 -0
  41. brawny/cli/commands/migrate.py +51 -0
  42. brawny/cli/commands/networks.py +253 -0
  43. brawny/cli/commands/run.py +249 -0
  44. brawny/cli/commands/script.py +209 -0
  45. brawny/cli/commands/signer.py +248 -0
  46. brawny/cli/helpers.py +265 -0
  47. brawny/cli_templates.py +1445 -0
  48. brawny/config/__init__.py +74 -0
  49. brawny/config/models.py +404 -0
  50. brawny/config/parser.py +633 -0
  51. brawny/config/routing.py +55 -0
  52. brawny/config/validation.py +246 -0
  53. brawny/daemon/__init__.py +14 -0
  54. brawny/daemon/context.py +69 -0
  55. brawny/daemon/core.py +702 -0
  56. brawny/daemon/loops.py +327 -0
  57. brawny/db/__init__.py +78 -0
  58. brawny/db/base.py +986 -0
  59. brawny/db/base_new.py +165 -0
  60. brawny/db/circuit_breaker.py +97 -0
  61. brawny/db/global_cache.py +298 -0
  62. brawny/db/mappers.py +182 -0
  63. brawny/db/migrate.py +349 -0
  64. brawny/db/migrations/001_init.sql +186 -0
  65. brawny/db/migrations/002_add_included_block.sql +7 -0
  66. brawny/db/migrations/003_add_broadcast_at.sql +10 -0
  67. brawny/db/migrations/004_broadcast_binding.sql +20 -0
  68. brawny/db/migrations/005_add_retry_after.sql +9 -0
  69. brawny/db/migrations/006_add_retry_count_column.sql +11 -0
  70. brawny/db/migrations/007_add_gap_tracking.sql +18 -0
  71. brawny/db/migrations/008_add_transactions.sql +72 -0
  72. brawny/db/migrations/009_add_intent_metadata.sql +5 -0
  73. brawny/db/migrations/010_add_nonce_gap_index.sql +9 -0
  74. brawny/db/migrations/011_add_job_logs.sql +24 -0
  75. brawny/db/migrations/012_add_claimed_by.sql +5 -0
  76. brawny/db/ops/__init__.py +29 -0
  77. brawny/db/ops/attempts.py +108 -0
  78. brawny/db/ops/blocks.py +83 -0
  79. brawny/db/ops/cache.py +93 -0
  80. brawny/db/ops/intents.py +296 -0
  81. brawny/db/ops/jobs.py +110 -0
  82. brawny/db/ops/logs.py +97 -0
  83. brawny/db/ops/nonces.py +322 -0
  84. brawny/db/postgres.py +2535 -0
  85. brawny/db/postgres_new.py +196 -0
  86. brawny/db/queries.py +584 -0
  87. brawny/db/sqlite.py +2733 -0
  88. brawny/db/sqlite_new.py +191 -0
  89. brawny/history.py +126 -0
  90. brawny/interfaces.py +136 -0
  91. brawny/invariants.py +155 -0
  92. brawny/jobs/__init__.py +26 -0
  93. brawny/jobs/base.py +287 -0
  94. brawny/jobs/discovery.py +233 -0
  95. brawny/jobs/job_validation.py +111 -0
  96. brawny/jobs/kv.py +125 -0
  97. brawny/jobs/registry.py +283 -0
  98. brawny/keystore.py +484 -0
  99. brawny/lifecycle.py +551 -0
  100. brawny/logging.py +290 -0
  101. brawny/metrics.py +594 -0
  102. brawny/model/__init__.py +53 -0
  103. brawny/model/contexts.py +319 -0
  104. brawny/model/enums.py +70 -0
  105. brawny/model/errors.py +194 -0
  106. brawny/model/events.py +93 -0
  107. brawny/model/startup.py +20 -0
  108. brawny/model/types.py +483 -0
  109. brawny/networks/__init__.py +96 -0
  110. brawny/networks/config.py +269 -0
  111. brawny/networks/manager.py +423 -0
  112. brawny/obs/__init__.py +67 -0
  113. brawny/obs/emit.py +158 -0
  114. brawny/obs/health.py +175 -0
  115. brawny/obs/heartbeat.py +133 -0
  116. brawny/reconciliation.py +108 -0
  117. brawny/scheduler/__init__.py +19 -0
  118. brawny/scheduler/poller.py +472 -0
  119. brawny/scheduler/reorg.py +632 -0
  120. brawny/scheduler/runner.py +708 -0
  121. brawny/scheduler/shutdown.py +371 -0
  122. brawny/script_tx.py +297 -0
  123. brawny/scripting.py +251 -0
  124. brawny/startup.py +76 -0
  125. brawny/telegram.py +393 -0
  126. brawny/testing.py +108 -0
  127. brawny/tx/__init__.py +41 -0
  128. brawny/tx/executor.py +1071 -0
  129. brawny/tx/fees.py +50 -0
  130. brawny/tx/intent.py +423 -0
  131. brawny/tx/monitor.py +628 -0
  132. brawny/tx/nonce.py +498 -0
  133. brawny/tx/replacement.py +456 -0
  134. brawny/tx/utils.py +26 -0
  135. brawny/utils.py +205 -0
  136. brawny/validation.py +69 -0
  137. brawny-0.1.13.dist-info/METADATA +156 -0
  138. brawny-0.1.13.dist-info/RECORD +141 -0
  139. brawny-0.1.13.dist-info/WHEEL +5 -0
  140. brawny-0.1.13.dist-info/entry_points.txt +2 -0
  141. brawny-0.1.13.dist-info/top_level.txt +1 -0
brawny/daemon/core.py ADDED
@@ -0,0 +1,702 @@
1
+ """BrawnyDaemon - Main daemon orchestrator.
2
+
3
+ Provides the core daemon class that manages all components and threads.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import asyncio
9
+ import itertools
10
+ import os
11
+ import socket
12
+ import threading
13
+ import time
14
+ from threading import Event, Lock, Thread
15
+ from typing import TYPE_CHECKING, Callable
16
+
17
+ from brawny.alerts.contracts import ContractSystem
18
+ from brawny.alerts.health import health_alert
19
+ from brawny.alerts.send import create_send_health
20
+ from brawny.daemon.context import DaemonContext, DaemonState, RuntimeOverrides
21
+ from brawny.daemon.loops import run_monitor, run_worker
22
+ from brawny.db import create_database
23
+ from brawny.db.migrate import Migrator, verify_critical_schema
24
+ from brawny.jobs.discovery import (
25
+ JobDiscoveryFailed,
26
+ JobLoadError,
27
+ auto_discover_jobs,
28
+ discover_jobs,
29
+ )
30
+ from brawny.jobs.job_validation import validate_all_jobs
31
+ from brawny.jobs.registry import get_registry
32
+ from brawny.keystore import create_keystore
33
+ from brawny.lifecycle import LifecycleDispatcher
34
+ from brawny.logging import get_logger
35
+ from brawny.metrics import ACTIVE_WORKERS, get_metrics
36
+ from brawny.model.enums import IntentStatus
37
+ from brawny.model.startup import StartupMessage
38
+ from brawny.model.types import BlockInfo
39
+ from brawny._rpc import RPCManager
40
+ from brawny.scheduler.poller import BlockPoller
41
+ from brawny.scheduler.reorg import ReorgDetector
42
+ from brawny.scheduler.runner import JobRunner
43
+ from brawny.startup import reconcile_pending_intents
44
+ from brawny.tx.executor import TxExecutor
45
+ from brawny.tx.intent import transition_intent
46
+ from brawny.tx.monitor import TxMonitor
47
+ from brawny.tx.replacement import TxReplacer
48
+ from brawny.validation import validate_job_routing
49
+ from brawny.telegram import TelegramBot
50
+
51
+ if TYPE_CHECKING:
52
+ from brawny.config import Config
53
+ from brawny.config.models import TelegramConfig
54
+ from brawny.db.base import Database
55
+ from brawny.jobs.base import Job
56
+ from brawny.keystore import Keystore
57
+
58
+
59
+ class BrawnyDaemon:
60
+ """Main daemon orchestrator.
61
+
62
+ Manages all components, threads, and lifecycle for the brawny daemon.
63
+ """
64
+
65
+ def __init__(
66
+ self,
67
+ config: "Config",
68
+ overrides: RuntimeOverrides | None = None,
69
+ extra_modules: list[str] | None = None,
70
+ ) -> None:
71
+ """Initialize the daemon.
72
+
73
+ Args:
74
+ config: Application configuration
75
+ overrides: Runtime overrides for dry_run, once, worker_count, etc.
76
+ extra_modules: Additional job modules to discover
77
+ """
78
+ self.config = config
79
+ self.overrides = overrides or RuntimeOverrides()
80
+ self._extra_modules = extra_modules or []
81
+ self._log = get_logger(__name__)
82
+
83
+ # Components (initialized in start())
84
+ self._db: Database | None = None
85
+ self._rpc: RPCManager | None = None
86
+ self._keystore: Keystore | None = None
87
+ self._contract_system: ContractSystem | None = None
88
+ self._lifecycle: LifecycleDispatcher | None = None
89
+ self._executor: TxExecutor | None = None
90
+ self._monitor: TxMonitor | None = None
91
+ self._replacer: TxReplacer | None = None
92
+ self._job_runner: JobRunner | None = None
93
+ self._reorg_detector: ReorgDetector | None = None
94
+ self._poller: BlockPoller | None = None
95
+
96
+ # Jobs
97
+ self._jobs: dict[str, Job] = {}
98
+
99
+ # Telegram (cached instance)
100
+ self._telegram_bot: TelegramBot | None = None
101
+
102
+ # Health alerting (initialized in initialize())
103
+ self._health_send_fn: Callable[..., None] | None = None
104
+ self._health_chat_id: str | None = None
105
+ self._health_cooldown: int = 1800
106
+
107
+ # Threading
108
+ self._stop = Event()
109
+ self._wakeup_hint = Event()
110
+ self._worker_threads: list[Thread] = []
111
+ self._monitor_thread: Thread | None = None
112
+ self._monitor_stop = Event()
113
+
114
+ # Inflight tracking
115
+ self._inflight_lock = Lock()
116
+ self._inflight_count = 0
117
+ self._inflight_zero = Event()
118
+ self._inflight_zero.set()
119
+
120
+ # Claim token generation
121
+ self._claim_counter = itertools.count(1)
122
+ self._hostname = socket.gethostname()
123
+ self._pid = os.getpid()
124
+
125
+ # Async event loop (owned by daemon, used by runner for async job.check())
126
+ self._loop: asyncio.AbstractEventLoop = asyncio.new_event_loop()
127
+ asyncio.set_event_loop(self._loop) # Make it the current loop for this thread
128
+ self._loop_thread_id: int = threading.get_ident() # Assert ownership
129
+
130
+ @property
131
+ def db(self) -> "Database":
132
+ """Get database connection."""
133
+ assert self._db is not None, "Daemon not started"
134
+ return self._db
135
+
136
+ @property
137
+ def rpc(self) -> RPCManager:
138
+ """Get RPC manager."""
139
+ assert self._rpc is not None, "Daemon not started"
140
+ return self._rpc
141
+
142
+ @property
143
+ def jobs(self) -> dict[str, "Job"]:
144
+ """Get discovered jobs."""
145
+ return self._jobs
146
+
147
+ @property
148
+ def keystore(self) -> "Keystore | None":
149
+ """Get keystore (None in dry_run mode)."""
150
+ return self._keystore
151
+
152
+ def _check_schema(self) -> None:
153
+ """Verify critical DB schema columns exist. Hard-fail if not."""
154
+ assert self._db is not None
155
+
156
+ try:
157
+ verify_critical_schema(self._db)
158
+ except Exception as exc:
159
+ error_msg = str(exc)
160
+ self._log.critical(
161
+ "schema.validation_failed",
162
+ error=error_msg,
163
+ table="tx_intents",
164
+ )
165
+ health_alert(
166
+ component="brawny.startup.schema",
167
+ chain_id=self.config.chain_id,
168
+ error=error_msg,
169
+ level="critical",
170
+ action="Run: brawny migrate",
171
+ db_dialect=self._db.dialect,
172
+ force_send=True,
173
+ send_fn=self._health_send_fn,
174
+ health_chat_id=self._health_chat_id,
175
+ )
176
+ raise SystemExit(f"DB schema mismatch: {error_msg}. Run: brawny migrate") from exc
177
+
178
+ def _make_claim_token(self, worker_id: int) -> str:
179
+ """Generate a unique claim token for a worker."""
180
+ return f"{self._hostname}:{self._pid}:{worker_id}:{next(self._claim_counter)}"
181
+
182
+ def _make_claimed_by(self, worker_id: int) -> str:
183
+ """Generate a stable claimed_by identifier for a worker."""
184
+ return f"{self._hostname}:{self._pid}:{worker_id}"
185
+
186
+ def _inflight_start(self) -> None:
187
+ """Mark an inflight operation starting."""
188
+ with self._inflight_lock:
189
+ self._inflight_count += 1
190
+ self._inflight_zero.clear()
191
+
192
+ def _inflight_done(self) -> None:
193
+ """Mark an inflight operation complete."""
194
+ with self._inflight_lock:
195
+ self._inflight_count = max(0, self._inflight_count - 1)
196
+ if self._inflight_count == 0:
197
+ self._inflight_zero.set()
198
+
199
+ def _on_intent_created(self, intent_id: str) -> None:
200
+ """Callback when intent is created."""
201
+ self._wakeup_hint.set()
202
+
203
+ def _process_block(self, block: BlockInfo) -> None:
204
+ """Process a single block."""
205
+ assert self._job_runner is not None
206
+
207
+ self._log.info(
208
+ "block.ingest.start",
209
+ block_number=block.block_number,
210
+ )
211
+
212
+ block_result = self._job_runner.process_block(block)
213
+
214
+ self._log.info(
215
+ "block.ingest.done",
216
+ block_number=block.block_number,
217
+ jobs_checked=block_result.jobs_checked,
218
+ jobs_triggered=block_result.jobs_triggered,
219
+ intents_created=block_result.intents_created,
220
+ )
221
+
222
+ def _discover_jobs(self) -> list[JobLoadError]:
223
+ """Discover and register jobs based on config.
224
+
225
+ Returns:
226
+ List of JobLoadError for any modules that failed to load.
227
+ """
228
+ registry = get_registry()
229
+ registry.clear() # Start fresh to prevent partial state leakage
230
+
231
+ if self._extra_modules:
232
+ discovered, errors = discover_jobs(self._extra_modules)
233
+ else:
234
+ discovered, errors = auto_discover_jobs()
235
+
236
+ # Log discovery summary
237
+ self._log.info(
238
+ "job.discovery.complete",
239
+ jobs_loaded=len(discovered),
240
+ jobs_failed=len(errors),
241
+ )
242
+
243
+ if errors:
244
+ registry.clear() # Don't leave partial state
245
+ return errors
246
+
247
+ self._jobs = {job.job_id: job for job in registry.get_all()}
248
+ return []
249
+
250
+ def _validate_jobs(self) -> tuple[dict[str, list[str]], list[str]]:
251
+ """Validate discovered jobs.
252
+
253
+ Returns:
254
+ Tuple of (validation_errors, routing_errors)
255
+ """
256
+ validation_errors: dict[str, list[str]] = {}
257
+ routing_errors: list[str] = []
258
+
259
+ if self._jobs:
260
+ keystore = self._keystore if not self.overrides.dry_run else None
261
+ validation_errors = validate_all_jobs(self._jobs, keystore=keystore)
262
+ routing_errors = validate_job_routing(self.config, self._jobs)
263
+
264
+ return validation_errors, routing_errors
265
+
266
+ def _validate_telegram_config(self) -> list[str]:
267
+ """Validate telegram configuration and routing.
268
+
269
+ Returns:
270
+ List of validation errors (empty if valid)
271
+ """
272
+ from brawny.alerts.routing import validate_targets
273
+ from brawny.model.errors import ConfigError
274
+
275
+ tg = self.config.telegram
276
+ errors: list[str] = []
277
+
278
+ # Check if any routing is configured (use truthiness, not is not None)
279
+ has_routing = bool(tg.default) or any(getattr(j, "_alert_to", None) for j in self._jobs.values())
280
+
281
+ # Validate all name references
282
+ valid_names = set(tg.chats.keys())
283
+
284
+ # Validate default targets
285
+ invalid = validate_targets(tg.default, valid_names)
286
+ for name in invalid:
287
+ errors.append(f"telegram.default references unknown chat '{name}'")
288
+
289
+ # Validate each job's alert_to target
290
+ for job_id, job in self._jobs.items():
291
+ target = getattr(job, "_alert_to", None)
292
+ if target is None:
293
+ continue
294
+
295
+ invalid = validate_targets(target, valid_names)
296
+ for name in invalid:
297
+ errors.append(
298
+ f"Job '{job_id}' references unknown telegram chat '{name}'. "
299
+ f"Valid names: {sorted(valid_names)}"
300
+ )
301
+
302
+ if errors:
303
+ for err in errors:
304
+ self._log.error("telegram.routing.invalid", error=err)
305
+ return errors
306
+
307
+ # Warn about configuration issues (non-fatal)
308
+ if has_routing and not tg.bot_token:
309
+ self._log.warning(
310
+ "telegram.bot_token_missing",
311
+ message="Jobs use alert_to= or telegram.default is set, but bot_token is missing",
312
+ )
313
+ elif tg.bot_token and not tg.default and not any(getattr(j, "_alert_to", None) for j in self._jobs.values()):
314
+ self._log.warning(
315
+ "telegram.no_default_targets",
316
+ message="bot_token set but no default targets and no jobs use alert_to=",
317
+ )
318
+
319
+ return []
320
+
321
+ def _reconcile_startup(self) -> None:
322
+ """Reconcile state on startup."""
323
+ assert self._db is not None
324
+ assert self._monitor is not None or self.overrides.dry_run
325
+
326
+ # Reconcile nonces
327
+ if self._executor and self._executor.nonce_manager:
328
+ self._log.info("startup.reconcile_nonces")
329
+ self._executor.nonce_manager.reconcile()
330
+
331
+ # Recover SENDING intents
332
+ stuck_sending = self._db.get_intents_by_status(
333
+ IntentStatus.SENDING.value,
334
+ chain_id=self.config.chain_id,
335
+ )
336
+ for intent in stuck_sending:
337
+ attempt = self._db.get_latest_attempt_for_intent(intent.intent_id)
338
+ if attempt and attempt.tx_hash:
339
+ transition_intent(
340
+ self._db,
341
+ intent.intent_id,
342
+ IntentStatus.PENDING,
343
+ "startup_recover_sending",
344
+ chain_id=self.config.chain_id,
345
+ )
346
+ else:
347
+ # No tx_hash means intent never got broadcast - reset to CREATED
348
+ if attempt and self._executor and self._executor.nonce_manager:
349
+ from brawny.model.enums import AttemptStatus
350
+ self._db.update_attempt_status(
351
+ attempt.attempt_id,
352
+ AttemptStatus.FAILED.value,
353
+ error_code="startup_stuck",
354
+ error_detail="Stuck in SENDING without broadcast",
355
+ )
356
+ self._executor.nonce_manager.release(intent.signer_address, attempt.nonce)
357
+ transition_intent(
358
+ self._db,
359
+ intent.intent_id,
360
+ IntentStatus.CREATED,
361
+ "startup_recover_sending",
362
+ chain_id=self.config.chain_id,
363
+ )
364
+
365
+ if stuck_sending:
366
+ self._log.warning(
367
+ "startup.recover_sending_intents",
368
+ count=len(stuck_sending),
369
+ )
370
+
371
+ # Reconcile pending intents
372
+ if self._monitor:
373
+ reconcile_pending_intents(
374
+ self._db,
375
+ self._monitor,
376
+ self.config.chain_id,
377
+ self._log,
378
+ )
379
+
380
+ def _start_workers(self) -> None:
381
+ """Start worker threads."""
382
+ if self.overrides.dry_run:
383
+ return
384
+
385
+ worker_count = (
386
+ self.overrides.worker_count
387
+ if self.overrides.worker_count is not None
388
+ else self.config.worker_count
389
+ )
390
+
391
+ ctx = DaemonContext(
392
+ config=self.config,
393
+ log=self._log,
394
+ db=self._db,
395
+ rpc=self._rpc,
396
+ executor=self._executor,
397
+ monitor=self._monitor,
398
+ replacer=self._replacer,
399
+ nonce_manager=self._executor.nonce_manager if self._executor else None,
400
+ chain_id=self.config.chain_id,
401
+ health_send_fn=self._health_send_fn,
402
+ health_chat_id=self._health_chat_id,
403
+ health_cooldown=self._health_cooldown,
404
+ )
405
+ state = DaemonState(
406
+ make_claim_token=self._make_claim_token,
407
+ make_claimed_by=self._make_claimed_by,
408
+ inflight_inc=self._inflight_start,
409
+ inflight_dec=self._inflight_done,
410
+ )
411
+
412
+ for i in range(worker_count):
413
+ t = Thread(
414
+ target=run_worker,
415
+ args=(i, self._stop, self._wakeup_hint, ctx, state, self.overrides.dry_run),
416
+ daemon=True,
417
+ )
418
+ t.start()
419
+ self._worker_threads.append(t)
420
+
421
+ # Start monitor thread
422
+ self._monitor_thread = Thread(
423
+ target=run_monitor,
424
+ args=(self._monitor_stop, ctx, self._worker_threads),
425
+ daemon=True,
426
+ )
427
+ self._monitor_thread.start()
428
+
429
+ # Initial gauge
430
+ metrics = get_metrics()
431
+ metrics.gauge(ACTIVE_WORKERS).set(
432
+ len(self._worker_threads),
433
+ chain_id=self.config.chain_id,
434
+ )
435
+
436
+ def _shutdown(self) -> None:
437
+ """Shutdown the daemon gracefully."""
438
+ self._log.info("daemon.shutdown.start")
439
+
440
+ # Signal stop
441
+ self._stop.set()
442
+ self._wakeup_hint.set()
443
+ self._monitor_stop.set()
444
+
445
+ # Wait for inflight
446
+ if not self._inflight_zero.is_set():
447
+ self._log.info(
448
+ "shutdown.await_inflight",
449
+ inflight=self._inflight_count,
450
+ grace_seconds=self.config.shutdown_grace_seconds,
451
+ )
452
+ start_wait = time.time()
453
+ self._inflight_zero.wait(timeout=self.config.shutdown_grace_seconds)
454
+ wait_elapsed = time.time() - start_wait
455
+ remaining = max(0.0, self.config.shutdown_grace_seconds - wait_elapsed)
456
+
457
+ # Join workers
458
+ for t in self._worker_threads:
459
+ t.join(timeout=remaining)
460
+
461
+ # Join monitor
462
+ if self._monitor_thread:
463
+ self._monitor_thread.join(timeout=5.0)
464
+
465
+ # Log any threads still alive
466
+ alive = [t for t in self._worker_threads if t.is_alive()]
467
+ if alive:
468
+ self._log.warning("shutdown.threads_still_alive", count=len(alive))
469
+
470
+ # Close event loop
471
+ if self._loop and not self._loop.is_closed():
472
+ self._loop.close()
473
+
474
+ self._log.info("daemon.shutdown.complete")
475
+
476
+ def initialize(
477
+ self,
478
+ ) -> tuple[dict[str, list[str]], list[str], list["StartupMessage"]]:
479
+ """Initialize all components.
480
+
481
+ Returns:
482
+ Tuple of (validation_errors, routing_errors, startup_messages) for jobs
483
+ """
484
+ startup_messages: list[StartupMessage] = []
485
+
486
+ # Database
487
+ self._db = create_database(
488
+ self.config.database_url,
489
+ pool_size=self.config.database_pool_size,
490
+ pool_max_overflow=self.config.database_pool_max_overflow,
491
+ pool_timeout=self.config.database_pool_timeout_seconds,
492
+ circuit_breaker_failures=self.config.db_circuit_breaker_failures,
493
+ circuit_breaker_seconds=self.config.db_circuit_breaker_seconds,
494
+ )
495
+ self._db.connect()
496
+
497
+ # Migrations
498
+ migrator = Migrator(self._db)
499
+ pending = migrator.pending()
500
+ if pending:
501
+ self._log.info("migrations.applying", count=len(pending))
502
+ migrator.migrate()
503
+
504
+ # RPC
505
+ self._rpc = RPCManager.from_config(self.config)
506
+
507
+ self._log.info(
508
+ "startup.finality_policy",
509
+ chain_id=self.config.chain_id,
510
+ finality_confirmations=self.config.finality_confirmations,
511
+ read_only=True,
512
+ )
513
+
514
+ # Keystore (only in live mode)
515
+ if not self.overrides.dry_run:
516
+ self._keystore = create_keystore(
517
+ self.config.keystore_type,
518
+ keystore_path=self.config.keystore_path,
519
+ allowed_signers=[],
520
+ )
521
+ # Make keystore available for signer_address() helper
522
+ from brawny.api import _set_keystore
523
+ _set_keystore(self._keystore)
524
+
525
+ # Collect keystore warnings
526
+ startup_messages.extend(self._keystore.get_warnings())
527
+
528
+ # Discover jobs
529
+ load_errors = self._discover_jobs()
530
+ if load_errors:
531
+ for err in load_errors:
532
+ self._log.error(
533
+ "job.module_load_failed",
534
+ path=err.path,
535
+ message=err.message,
536
+ traceback=err.traceback,
537
+ )
538
+ raise JobDiscoveryFailed(load_errors)
539
+
540
+ # Sanity check: don't run with zero jobs
541
+ if not self._jobs:
542
+ raise RuntimeError("No jobs discovered - check your jobs directory")
543
+
544
+ validation_errors, routing_errors = self._validate_jobs()
545
+
546
+ # Validate telegram routing (fails hard on unknown names)
547
+ telegram_errors = self._validate_telegram_config()
548
+ if telegram_errors:
549
+ from brawny.model.errors import ConfigError
550
+ raise ConfigError(
551
+ f"Invalid telegram routing: {len(telegram_errors)} error(s)\n"
552
+ + "\n".join(f" - {e}" for e in telegram_errors)
553
+ )
554
+
555
+ # Cache TelegramBot instance (if configured)
556
+ if self.config.telegram.bot_token:
557
+ self._telegram_bot = TelegramBot(token=self.config.telegram.bot_token)
558
+
559
+ # Initialize health alerting
560
+ tg = self.config.telegram
561
+ if tg and tg.health_chat:
562
+ resolved = tg.chats.get(tg.health_chat)
563
+ if resolved:
564
+ self._health_chat_id = resolved
565
+ if self._telegram_bot:
566
+ self._health_send_fn = create_send_health(self._telegram_bot)
567
+ else:
568
+ # health_chat configured but not found in chats - warn loudly
569
+ self._log.warning(
570
+ "health_chat_missing",
571
+ health_chat=tg.health_chat,
572
+ available_chats=list(tg.chats.keys()),
573
+ )
574
+
575
+ if tg:
576
+ self._health_cooldown = tg.health_cooldown_seconds
577
+
578
+ # Validate schema (after health is set up so we can alert on failure)
579
+ self._check_schema()
580
+
581
+ # Contract system
582
+ self._contract_system = ContractSystem(self._rpc, self.config)
583
+
584
+ # Lifecycle
585
+ self._lifecycle = LifecycleDispatcher(
586
+ self._db,
587
+ self._rpc,
588
+ self.config,
589
+ self._jobs,
590
+ contract_system=self._contract_system,
591
+ telegram_bot=self._telegram_bot,
592
+ )
593
+
594
+ # TX execution components (only in live mode)
595
+ if self._keystore:
596
+ self._executor = TxExecutor(
597
+ self._db, self._rpc, self._keystore, self.config,
598
+ lifecycle=self._lifecycle,
599
+ jobs=self._jobs,
600
+ )
601
+ self._monitor = TxMonitor(
602
+ self._db, self._rpc, self._executor.nonce_manager, self.config,
603
+ lifecycle=self._lifecycle
604
+ )
605
+ self._replacer = TxReplacer(
606
+ self._db, self._rpc, self._keystore, self._executor.nonce_manager, self.config,
607
+ lifecycle=self._lifecycle
608
+ )
609
+
610
+ # Job runner
611
+ self._job_runner = JobRunner(
612
+ self._db,
613
+ self._rpc,
614
+ self.config,
615
+ self._jobs,
616
+ lifecycle=self._lifecycle,
617
+ contract_system=self._contract_system,
618
+ loop=self._loop,
619
+ loop_thread_id=self._loop_thread_id,
620
+ )
621
+ self._job_runner._on_intent_created = self._on_intent_created
622
+
623
+ # Reorg detector
624
+ self._reorg_detector = ReorgDetector(
625
+ db=self._db,
626
+ rpc=self._rpc,
627
+ chain_id=self.config.chain_id,
628
+ reorg_depth=self.config.reorg_depth,
629
+ block_hash_history_size=self.config.block_hash_history_size,
630
+ finality_confirmations=self.config.finality_confirmations,
631
+ lifecycle=self._lifecycle,
632
+ health_send_fn=self._health_send_fn,
633
+ health_chat_id=self._health_chat_id,
634
+ health_cooldown=self._health_cooldown,
635
+ )
636
+
637
+ # Block poller
638
+ self._poller = BlockPoller(
639
+ self._db, self._rpc, self.config, self._process_block,
640
+ reorg_detector=self._reorg_detector,
641
+ health_send_fn=self._health_send_fn,
642
+ health_chat_id=self._health_chat_id,
643
+ health_cooldown=self._health_cooldown,
644
+ )
645
+
646
+ # Register jobs in database
647
+ for job_id, job in self._jobs.items():
648
+ self._db.upsert_job(job_id, job.name, job.check_interval_blocks)
649
+
650
+ return validation_errors, routing_errors, startup_messages
651
+
652
+ def run(self, blocking: bool = True) -> None:
653
+ """Run the daemon.
654
+
655
+ Args:
656
+ blocking: If True, block until shutdown. If False, return immediately.
657
+ """
658
+ assert self._poller is not None, "Daemon not initialized"
659
+
660
+ # Startup reconciliation
661
+ self._reconcile_startup()
662
+
663
+ # Warm gas cache before workers start (eliminates cold-start race)
664
+ try:
665
+ self._loop.run_until_complete(
666
+ asyncio.wait_for(self._rpc.gas_quote(), timeout=5.0)
667
+ )
668
+ self._log.debug("startup.gas_cache_warmed")
669
+ except Exception as e:
670
+ self._log.warning("startup.gas_cache_warm_failed", error=str(e))
671
+
672
+ # Start workers
673
+ self._start_workers()
674
+
675
+ try:
676
+ if self.overrides.once:
677
+ # Single iteration mode
678
+ self._poller._poll_once()
679
+ else:
680
+ # Normal polling mode
681
+ try:
682
+ self._poller.start(blocking=blocking)
683
+ except KeyboardInterrupt:
684
+ self._log.info("daemon.keyboard_interrupt")
685
+ finally:
686
+ self._shutdown()
687
+
688
+ def stop(self, timeout: float = 5.0) -> None:
689
+ """Stop the daemon.
690
+
691
+ Signals all components to stop. Called from shutdown handler.
692
+
693
+ Args:
694
+ timeout: Timeout for stopping the poller
695
+ """
696
+ # Signal workers and monitor to stop
697
+ self._stop.set()
698
+ self._wakeup_hint.set()
699
+ self._monitor_stop.set()
700
+
701
+ if self._poller:
702
+ self._poller.stop(timeout=timeout)