brawny 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- brawny/__init__.py +106 -0
- brawny/_context.py +232 -0
- brawny/_rpc/__init__.py +38 -0
- brawny/_rpc/broadcast.py +172 -0
- brawny/_rpc/clients.py +98 -0
- brawny/_rpc/context.py +49 -0
- brawny/_rpc/errors.py +252 -0
- brawny/_rpc/gas.py +158 -0
- brawny/_rpc/manager.py +982 -0
- brawny/_rpc/selector.py +156 -0
- brawny/accounts.py +534 -0
- brawny/alerts/__init__.py +132 -0
- brawny/alerts/abi_resolver.py +530 -0
- brawny/alerts/base.py +152 -0
- brawny/alerts/context.py +271 -0
- brawny/alerts/contracts.py +635 -0
- brawny/alerts/encoded_call.py +201 -0
- brawny/alerts/errors.py +267 -0
- brawny/alerts/events.py +680 -0
- brawny/alerts/function_caller.py +364 -0
- brawny/alerts/health.py +185 -0
- brawny/alerts/routing.py +118 -0
- brawny/alerts/send.py +364 -0
- brawny/api.py +660 -0
- brawny/chain.py +93 -0
- brawny/cli/__init__.py +16 -0
- brawny/cli/app.py +17 -0
- brawny/cli/bootstrap.py +37 -0
- brawny/cli/commands/__init__.py +41 -0
- brawny/cli/commands/abi.py +93 -0
- brawny/cli/commands/accounts.py +632 -0
- brawny/cli/commands/console.py +495 -0
- brawny/cli/commands/contract.py +139 -0
- brawny/cli/commands/health.py +112 -0
- brawny/cli/commands/init_project.py +86 -0
- brawny/cli/commands/intents.py +130 -0
- brawny/cli/commands/job_dev.py +254 -0
- brawny/cli/commands/jobs.py +308 -0
- brawny/cli/commands/logs.py +87 -0
- brawny/cli/commands/maintenance.py +182 -0
- brawny/cli/commands/migrate.py +51 -0
- brawny/cli/commands/networks.py +253 -0
- brawny/cli/commands/run.py +249 -0
- brawny/cli/commands/script.py +209 -0
- brawny/cli/commands/signer.py +248 -0
- brawny/cli/helpers.py +265 -0
- brawny/cli_templates.py +1445 -0
- brawny/config/__init__.py +74 -0
- brawny/config/models.py +404 -0
- brawny/config/parser.py +633 -0
- brawny/config/routing.py +55 -0
- brawny/config/validation.py +246 -0
- brawny/daemon/__init__.py +14 -0
- brawny/daemon/context.py +69 -0
- brawny/daemon/core.py +702 -0
- brawny/daemon/loops.py +327 -0
- brawny/db/__init__.py +78 -0
- brawny/db/base.py +986 -0
- brawny/db/base_new.py +165 -0
- brawny/db/circuit_breaker.py +97 -0
- brawny/db/global_cache.py +298 -0
- brawny/db/mappers.py +182 -0
- brawny/db/migrate.py +349 -0
- brawny/db/migrations/001_init.sql +186 -0
- brawny/db/migrations/002_add_included_block.sql +7 -0
- brawny/db/migrations/003_add_broadcast_at.sql +10 -0
- brawny/db/migrations/004_broadcast_binding.sql +20 -0
- brawny/db/migrations/005_add_retry_after.sql +9 -0
- brawny/db/migrations/006_add_retry_count_column.sql +11 -0
- brawny/db/migrations/007_add_gap_tracking.sql +18 -0
- brawny/db/migrations/008_add_transactions.sql +72 -0
- brawny/db/migrations/009_add_intent_metadata.sql +5 -0
- brawny/db/migrations/010_add_nonce_gap_index.sql +9 -0
- brawny/db/migrations/011_add_job_logs.sql +24 -0
- brawny/db/migrations/012_add_claimed_by.sql +5 -0
- brawny/db/ops/__init__.py +29 -0
- brawny/db/ops/attempts.py +108 -0
- brawny/db/ops/blocks.py +83 -0
- brawny/db/ops/cache.py +93 -0
- brawny/db/ops/intents.py +296 -0
- brawny/db/ops/jobs.py +110 -0
- brawny/db/ops/logs.py +97 -0
- brawny/db/ops/nonces.py +322 -0
- brawny/db/postgres.py +2535 -0
- brawny/db/postgres_new.py +196 -0
- brawny/db/queries.py +584 -0
- brawny/db/sqlite.py +2733 -0
- brawny/db/sqlite_new.py +191 -0
- brawny/history.py +126 -0
- brawny/interfaces.py +136 -0
- brawny/invariants.py +155 -0
- brawny/jobs/__init__.py +26 -0
- brawny/jobs/base.py +287 -0
- brawny/jobs/discovery.py +233 -0
- brawny/jobs/job_validation.py +111 -0
- brawny/jobs/kv.py +125 -0
- brawny/jobs/registry.py +283 -0
- brawny/keystore.py +484 -0
- brawny/lifecycle.py +551 -0
- brawny/logging.py +290 -0
- brawny/metrics.py +594 -0
- brawny/model/__init__.py +53 -0
- brawny/model/contexts.py +319 -0
- brawny/model/enums.py +70 -0
- brawny/model/errors.py +194 -0
- brawny/model/events.py +93 -0
- brawny/model/startup.py +20 -0
- brawny/model/types.py +483 -0
- brawny/networks/__init__.py +96 -0
- brawny/networks/config.py +269 -0
- brawny/networks/manager.py +423 -0
- brawny/obs/__init__.py +67 -0
- brawny/obs/emit.py +158 -0
- brawny/obs/health.py +175 -0
- brawny/obs/heartbeat.py +133 -0
- brawny/reconciliation.py +108 -0
- brawny/scheduler/__init__.py +19 -0
- brawny/scheduler/poller.py +472 -0
- brawny/scheduler/reorg.py +632 -0
- brawny/scheduler/runner.py +708 -0
- brawny/scheduler/shutdown.py +371 -0
- brawny/script_tx.py +297 -0
- brawny/scripting.py +251 -0
- brawny/startup.py +76 -0
- brawny/telegram.py +393 -0
- brawny/testing.py +108 -0
- brawny/tx/__init__.py +41 -0
- brawny/tx/executor.py +1071 -0
- brawny/tx/fees.py +50 -0
- brawny/tx/intent.py +423 -0
- brawny/tx/monitor.py +628 -0
- brawny/tx/nonce.py +498 -0
- brawny/tx/replacement.py +456 -0
- brawny/tx/utils.py +26 -0
- brawny/utils.py +205 -0
- brawny/validation.py +69 -0
- brawny-0.1.13.dist-info/METADATA +156 -0
- brawny-0.1.13.dist-info/RECORD +141 -0
- brawny-0.1.13.dist-info/WHEEL +5 -0
- brawny-0.1.13.dist-info/entry_points.txt +2 -0
- brawny-0.1.13.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,708 @@
|
|
|
1
|
+
"""Job runner for evaluating and executing jobs.
|
|
2
|
+
|
|
3
|
+
Implements the job evaluation logic from SPEC 5.3:
|
|
4
|
+
- Evaluate jobs sequentially by job_id
|
|
5
|
+
- Run check() with timeout
|
|
6
|
+
- Create intents for triggered jobs
|
|
7
|
+
- Schedule intents for worker pickup
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
import inspect
|
|
14
|
+
import threading
|
|
15
|
+
import time
|
|
16
|
+
from uuid import UUID
|
|
17
|
+
from collections.abc import Callable
|
|
18
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
19
|
+
from concurrent.futures import TimeoutError as FuturesTimeout
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
from functools import lru_cache
|
|
22
|
+
from typing import TYPE_CHECKING
|
|
23
|
+
|
|
24
|
+
from brawny._context import _current_job, _job_ctx, reset_check_block, set_check_block
|
|
25
|
+
from brawny._rpc.context import reset_job_context as reset_rpc_job_context
|
|
26
|
+
from brawny._rpc.context import set_job_context as set_rpc_job_context
|
|
27
|
+
from brawny.jobs.base import Job # Runtime import for legacy API detection
|
|
28
|
+
from brawny.jobs.kv import DatabaseJobKVStore
|
|
29
|
+
from brawny.logging import LogEvents, get_logger
|
|
30
|
+
from brawny.metrics import (
|
|
31
|
+
INTENTS_CREATED,
|
|
32
|
+
JOB_BUILD_TIMEOUTS,
|
|
33
|
+
JOB_CHECK_SECONDS,
|
|
34
|
+
JOB_CHECK_TIMEOUTS,
|
|
35
|
+
JOBS_TRIGGERED,
|
|
36
|
+
LAST_INTENT_CREATED_TIMESTAMP,
|
|
37
|
+
get_metrics,
|
|
38
|
+
)
|
|
39
|
+
from brawny.model.contexts import BlockContext, BuildContext, CheckContext
|
|
40
|
+
from brawny.model.types import BlockInfo, Trigger
|
|
41
|
+
|
|
42
|
+
if TYPE_CHECKING:
|
|
43
|
+
from brawny._rpc.clients import RPCClients
|
|
44
|
+
from brawny._rpc.manager import RPCManager
|
|
45
|
+
from brawny.alerts.contracts import ContractSystem
|
|
46
|
+
from brawny.config import Config
|
|
47
|
+
from brawny.db.base import Database
|
|
48
|
+
from brawny.lifecycle import LifecycleDispatcher
|
|
49
|
+
from brawny.model.types import TxIntent, TxIntentSpec
|
|
50
|
+
|
|
51
|
+
logger = get_logger(__name__)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@lru_cache(maxsize=1024)
|
|
55
|
+
def _accepts_ctx(job_class: type, method_name: str) -> bool:
|
|
56
|
+
"""Determine if method can safely receive ctx as a positional argument.
|
|
57
|
+
|
|
58
|
+
The question: "Can I legally call this method with one positional arg (ctx)?"
|
|
59
|
+
|
|
60
|
+
Returns True only if:
|
|
61
|
+
- Method has *args (can always accept one positional), OR
|
|
62
|
+
- First positional param is named 'ctx'
|
|
63
|
+
|
|
64
|
+
This prevents accidentally passing ctx to a method like:
|
|
65
|
+
def check(self, foo, ctx): # Would break if we pass ctx as foo
|
|
66
|
+
|
|
67
|
+
Cached by (job_class, method_name) for stability across decorators.
|
|
68
|
+
"""
|
|
69
|
+
method = getattr(job_class, method_name)
|
|
70
|
+
sig = inspect.signature(method)
|
|
71
|
+
|
|
72
|
+
params = [p for p in sig.parameters.values() if p.name != "self"]
|
|
73
|
+
|
|
74
|
+
# If there's *args, one positional is always safe
|
|
75
|
+
if any(p.kind is inspect.Parameter.VAR_POSITIONAL for p in params):
|
|
76
|
+
return True
|
|
77
|
+
|
|
78
|
+
positional = [
|
|
79
|
+
p for p in params
|
|
80
|
+
if p.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
if not positional:
|
|
84
|
+
return False
|
|
85
|
+
|
|
86
|
+
# Conservative: only pass ctx if the first positional param is named 'ctx'
|
|
87
|
+
return positional[0].name == "ctx"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class JobResult:
|
|
92
|
+
"""Result of running a job check."""
|
|
93
|
+
|
|
94
|
+
job_id: str
|
|
95
|
+
triggered: bool = False
|
|
96
|
+
trigger: Trigger | None = None
|
|
97
|
+
intent_created: bool = False
|
|
98
|
+
skipped: bool = False
|
|
99
|
+
error: Exception | None = None
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@dataclass
|
|
103
|
+
class BlockResult:
|
|
104
|
+
"""Result of processing a block."""
|
|
105
|
+
|
|
106
|
+
block_number: int
|
|
107
|
+
jobs_checked: int = 0
|
|
108
|
+
jobs_triggered: int = 0
|
|
109
|
+
intents_created: int = 0
|
|
110
|
+
errors: list[str] = field(default_factory=list)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class JobRunner:
|
|
114
|
+
"""Job runner for evaluating and executing registered jobs.
|
|
115
|
+
|
|
116
|
+
Jobs are evaluated sequentially within a block (deterministic order).
|
|
117
|
+
Multiple workers can process intents concurrently.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
def __init__(
|
|
121
|
+
self,
|
|
122
|
+
db: Database,
|
|
123
|
+
rpc: RPCManager,
|
|
124
|
+
config: Config,
|
|
125
|
+
jobs: dict[str, Job],
|
|
126
|
+
on_intent_created: Callable[[str], None] | None = None,
|
|
127
|
+
lifecycle: LifecycleDispatcher | None = None,
|
|
128
|
+
contract_system: ContractSystem | None = None,
|
|
129
|
+
loop: asyncio.AbstractEventLoop | None = None,
|
|
130
|
+
loop_thread_id: int | None = None,
|
|
131
|
+
) -> None:
|
|
132
|
+
"""Initialize job runner.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
db: Database connection
|
|
136
|
+
rpc: RPC manager (default group client)
|
|
137
|
+
config: Application configuration
|
|
138
|
+
jobs: Dictionary of job_id -> Job instances
|
|
139
|
+
on_intent_created: Callback when intent is created (for worker scheduling)
|
|
140
|
+
loop: Event loop for async job.check() support
|
|
141
|
+
loop_thread_id: Thread ID that owns the loop (for assertion)
|
|
142
|
+
"""
|
|
143
|
+
self._db = db
|
|
144
|
+
self._rpc = rpc
|
|
145
|
+
self._config = config
|
|
146
|
+
self._jobs = jobs
|
|
147
|
+
self._chain_id = config.chain_id
|
|
148
|
+
self._on_intent_created = on_intent_created
|
|
149
|
+
self._lifecycle = lifecycle
|
|
150
|
+
self._contract_system = contract_system
|
|
151
|
+
self._loop = loop
|
|
152
|
+
self._loop_thread_id = loop_thread_id
|
|
153
|
+
|
|
154
|
+
# RPC clients cache for per-job read routing
|
|
155
|
+
from brawny._rpc.clients import RPCClients
|
|
156
|
+
self._rpc_clients: RPCClients = RPCClients(config)
|
|
157
|
+
|
|
158
|
+
# Thread pool for job check timeouts (used for sync jobs only)
|
|
159
|
+
self._executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="job_check")
|
|
160
|
+
self._abandoned_executors = 0
|
|
161
|
+
|
|
162
|
+
def _recreate_executor_after_timeout(self, operation: str, job_id: str) -> None:
|
|
163
|
+
"""Recreate the executor after a timeout to prevent deadlock.
|
|
164
|
+
|
|
165
|
+
When a job times out, the worker thread continues running but the future
|
|
166
|
+
is cancelled. With max_workers=1, this blocks all subsequent job operations.
|
|
167
|
+
We recreate the executor to abandon the stuck thread and continue processing.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
operation: The operation that timed out ("check" or "build")
|
|
171
|
+
job_id: The job that caused the timeout
|
|
172
|
+
"""
|
|
173
|
+
logger.warning(
|
|
174
|
+
"runner.executor_recreated",
|
|
175
|
+
operation=operation,
|
|
176
|
+
job_id=job_id,
|
|
177
|
+
reason="Abandoning stuck thread after timeout",
|
|
178
|
+
abandoned_executors=self._abandoned_executors + 1,
|
|
179
|
+
)
|
|
180
|
+
# Don't wait for the stuck thread - just abandon it
|
|
181
|
+
self._executor.shutdown(wait=False, cancel_futures=True)
|
|
182
|
+
self._abandoned_executors += 1
|
|
183
|
+
self._executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="job_check")
|
|
184
|
+
|
|
185
|
+
def process_block(self, block: BlockInfo) -> BlockResult:
|
|
186
|
+
"""Process a block by evaluating all enabled jobs.
|
|
187
|
+
|
|
188
|
+
Jobs are evaluated in job_id order (deterministic).
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
block: Block information
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
BlockResult with processing stats
|
|
195
|
+
"""
|
|
196
|
+
result = BlockResult(block_number=block.block_number)
|
|
197
|
+
|
|
198
|
+
# Warm gas quote cache at start of block (for executor)
|
|
199
|
+
if self._loop is not None:
|
|
200
|
+
try:
|
|
201
|
+
self._loop.run_until_complete(
|
|
202
|
+
asyncio.wait_for(self._rpc.gas_quote(), timeout=5.0)
|
|
203
|
+
)
|
|
204
|
+
except Exception as e:
|
|
205
|
+
logger.warning("gas.cache_warm_failed", error=str(e))
|
|
206
|
+
|
|
207
|
+
# Get enabled jobs sorted by job_id
|
|
208
|
+
enabled_jobs = self._db.get_enabled_jobs()
|
|
209
|
+
|
|
210
|
+
for job_config in enabled_jobs:
|
|
211
|
+
job_id = job_config.job_id
|
|
212
|
+
|
|
213
|
+
# Get job instance from registry
|
|
214
|
+
job = self._jobs.get(job_id)
|
|
215
|
+
if job is None:
|
|
216
|
+
# Job in DB but not discovered - skip silently
|
|
217
|
+
# (orphaned jobs are warned about once at startup)
|
|
218
|
+
continue
|
|
219
|
+
|
|
220
|
+
# Check interval
|
|
221
|
+
last_checked = job_config.last_checked_block_number
|
|
222
|
+
if last_checked is not None and (block.block_number - last_checked) < job.check_interval_blocks:
|
|
223
|
+
logger.debug(
|
|
224
|
+
LogEvents.JOB_CHECK_SKIP,
|
|
225
|
+
job_id=job_id,
|
|
226
|
+
block_number=block.block_number,
|
|
227
|
+
last_checked=last_checked,
|
|
228
|
+
interval=job.check_interval_blocks,
|
|
229
|
+
)
|
|
230
|
+
continue
|
|
231
|
+
backoff_until = self._db.get_job_kv(job_id, "backoff_until_block")
|
|
232
|
+
if isinstance(backoff_until, int) and block.block_number <= backoff_until:
|
|
233
|
+
logger.debug(
|
|
234
|
+
"job.check_backoff",
|
|
235
|
+
job_id=job_id,
|
|
236
|
+
block_number=block.block_number,
|
|
237
|
+
backoff_until=backoff_until,
|
|
238
|
+
)
|
|
239
|
+
continue
|
|
240
|
+
|
|
241
|
+
if job.max_in_flight_intents is not None:
|
|
242
|
+
active_count = self._db.get_active_intent_count(
|
|
243
|
+
job_id,
|
|
244
|
+
chain_id=self._chain_id,
|
|
245
|
+
)
|
|
246
|
+
if active_count >= job.max_in_flight_intents:
|
|
247
|
+
logger.warning(
|
|
248
|
+
"job.check.backpressure",
|
|
249
|
+
job_id=job_id,
|
|
250
|
+
block_number=block.block_number,
|
|
251
|
+
active_intents=active_count,
|
|
252
|
+
limit=job.max_in_flight_intents,
|
|
253
|
+
)
|
|
254
|
+
self._db.update_job_checked(
|
|
255
|
+
job_id,
|
|
256
|
+
block.block_number,
|
|
257
|
+
triggered=False,
|
|
258
|
+
)
|
|
259
|
+
continue
|
|
260
|
+
|
|
261
|
+
# Run job check
|
|
262
|
+
job_result = self._run_job_check(job, block)
|
|
263
|
+
result.jobs_checked += 1
|
|
264
|
+
|
|
265
|
+
if job_result.error:
|
|
266
|
+
if self._config.job_error_backoff_blocks > 0:
|
|
267
|
+
self._db.set_job_kv(
|
|
268
|
+
job_id,
|
|
269
|
+
"backoff_until_block",
|
|
270
|
+
block.block_number + self._config.job_error_backoff_blocks,
|
|
271
|
+
)
|
|
272
|
+
result.errors.append(f"{job_id}: {job_result.error}")
|
|
273
|
+
continue
|
|
274
|
+
|
|
275
|
+
# Update last checked
|
|
276
|
+
self._db.update_job_checked(
|
|
277
|
+
job_id,
|
|
278
|
+
block.block_number,
|
|
279
|
+
triggered=job_result.triggered,
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
if job_result.triggered and job_result.trigger:
|
|
283
|
+
result.jobs_triggered += 1
|
|
284
|
+
|
|
285
|
+
# Create intent if tx required
|
|
286
|
+
if job_result.trigger.tx_required:
|
|
287
|
+
try:
|
|
288
|
+
intent, is_new = self._create_intent_for_trigger(
|
|
289
|
+
job, block, job_result.trigger
|
|
290
|
+
)
|
|
291
|
+
if is_new:
|
|
292
|
+
result.intents_created += 1
|
|
293
|
+
metrics = get_metrics()
|
|
294
|
+
metrics.counter(INTENTS_CREATED).inc(
|
|
295
|
+
chain_id=self._chain_id,
|
|
296
|
+
job_id=job_id,
|
|
297
|
+
)
|
|
298
|
+
metrics.gauge(LAST_INTENT_CREATED_TIMESTAMP).set(
|
|
299
|
+
time.time(),
|
|
300
|
+
chain_id=self._chain_id,
|
|
301
|
+
)
|
|
302
|
+
if self._lifecycle:
|
|
303
|
+
self._lifecycle.on_triggered(
|
|
304
|
+
job,
|
|
305
|
+
job_result.trigger,
|
|
306
|
+
block,
|
|
307
|
+
intent.intent_id,
|
|
308
|
+
)
|
|
309
|
+
except Exception as e:
|
|
310
|
+
logger.error(
|
|
311
|
+
"intent.creation_failed",
|
|
312
|
+
job_id=job_id,
|
|
313
|
+
error=str(e),
|
|
314
|
+
)
|
|
315
|
+
if self._config.job_error_backoff_blocks > 0:
|
|
316
|
+
self._db.set_job_kv(
|
|
317
|
+
job_id,
|
|
318
|
+
"backoff_until_block",
|
|
319
|
+
block.block_number + self._config.job_error_backoff_blocks,
|
|
320
|
+
)
|
|
321
|
+
result.errors.append(f"{job_id} intent: {e}")
|
|
322
|
+
else:
|
|
323
|
+
if self._lifecycle:
|
|
324
|
+
self._lifecycle.on_triggered(
|
|
325
|
+
job,
|
|
326
|
+
job_result.trigger,
|
|
327
|
+
block,
|
|
328
|
+
None,
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
return result
|
|
332
|
+
|
|
333
|
+
def _run_job_check(self, job: Job, block: BlockInfo) -> JobResult:
|
|
334
|
+
"""Run a job's check method with timeout.
|
|
335
|
+
|
|
336
|
+
Supports both sync and async check() methods. Async jobs use the
|
|
337
|
+
daemon's event loop; sync jobs use the thread pool executor.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
job: Job instance
|
|
341
|
+
block: Block information
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
JobResult with check outcome
|
|
345
|
+
"""
|
|
346
|
+
logger.debug(
|
|
347
|
+
LogEvents.JOB_CHECK_START,
|
|
348
|
+
job_id=job.job_id,
|
|
349
|
+
block_number=block.block_number,
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
metrics = get_metrics()
|
|
353
|
+
start_time = time.perf_counter()
|
|
354
|
+
|
|
355
|
+
# Build check context (phase-specific)
|
|
356
|
+
ctx = self._build_check_context(job, block)
|
|
357
|
+
|
|
358
|
+
from brawny.scripting import set_job_context
|
|
359
|
+
|
|
360
|
+
try:
|
|
361
|
+
# Use async path if loop is available
|
|
362
|
+
if self._loop is not None and self._loop_thread_id is not None:
|
|
363
|
+
# Assert we're on the correct thread (loop owner)
|
|
364
|
+
assert threading.get_ident() == self._loop_thread_id, \
|
|
365
|
+
"check_job called from wrong thread"
|
|
366
|
+
|
|
367
|
+
trigger = self._loop.run_until_complete(
|
|
368
|
+
asyncio.wait_for(
|
|
369
|
+
self._run_check_async(job, block, ctx),
|
|
370
|
+
timeout=job.check_timeout_seconds,
|
|
371
|
+
)
|
|
372
|
+
)
|
|
373
|
+
else:
|
|
374
|
+
# Fallback to sync executor (for tests or when no loop provided)
|
|
375
|
+
def _call_with_job_context() -> Trigger | None:
|
|
376
|
+
ctx_token = _job_ctx.set(ctx)
|
|
377
|
+
job_token = _current_job.set(job)
|
|
378
|
+
check_block_token = set_check_block(ctx.block.number)
|
|
379
|
+
thread_rpc_ctx_token = set_rpc_job_context(job.job_id)
|
|
380
|
+
set_job_context(True)
|
|
381
|
+
logger.debug(
|
|
382
|
+
"check.block_pinned",
|
|
383
|
+
job_id=job.job_id,
|
|
384
|
+
block_number=ctx.block.number,
|
|
385
|
+
)
|
|
386
|
+
try:
|
|
387
|
+
# Call with or without ctx based on signature
|
|
388
|
+
if _accepts_ctx(type(job), "check"):
|
|
389
|
+
return job.check(ctx)
|
|
390
|
+
else:
|
|
391
|
+
return job.check()
|
|
392
|
+
finally:
|
|
393
|
+
set_job_context(False)
|
|
394
|
+
reset_rpc_job_context(thread_rpc_ctx_token)
|
|
395
|
+
reset_check_block(check_block_token)
|
|
396
|
+
_job_ctx.reset(ctx_token)
|
|
397
|
+
_current_job.reset(job_token)
|
|
398
|
+
|
|
399
|
+
future = self._executor.submit(_call_with_job_context)
|
|
400
|
+
trigger = future.result(timeout=job.check_timeout_seconds)
|
|
401
|
+
|
|
402
|
+
if trigger:
|
|
403
|
+
logger.info(
|
|
404
|
+
LogEvents.JOB_CHECK_TRIGGERED,
|
|
405
|
+
job_id=job.job_id,
|
|
406
|
+
block_number=block.block_number,
|
|
407
|
+
reason=trigger.reason,
|
|
408
|
+
tx_required=trigger.tx_required,
|
|
409
|
+
)
|
|
410
|
+
metrics.counter(JOBS_TRIGGERED).inc(
|
|
411
|
+
chain_id=self._chain_id,
|
|
412
|
+
job_id=job.job_id,
|
|
413
|
+
)
|
|
414
|
+
return JobResult(
|
|
415
|
+
job_id=job.job_id,
|
|
416
|
+
triggered=True,
|
|
417
|
+
trigger=trigger,
|
|
418
|
+
)
|
|
419
|
+
else:
|
|
420
|
+
return JobResult(job_id=job.job_id, triggered=False)
|
|
421
|
+
|
|
422
|
+
except (asyncio.TimeoutError, FuturesTimeout):
|
|
423
|
+
logger.error(
|
|
424
|
+
LogEvents.JOB_CHECK_TIMEOUT,
|
|
425
|
+
job_id=job.job_id,
|
|
426
|
+
block_number=block.block_number,
|
|
427
|
+
timeout=job.check_timeout_seconds,
|
|
428
|
+
)
|
|
429
|
+
metrics.counter(JOB_CHECK_TIMEOUTS).inc(
|
|
430
|
+
chain_id=self._chain_id,
|
|
431
|
+
job_id=job.job_id,
|
|
432
|
+
)
|
|
433
|
+
# Recreate executor to prevent stuck thread from blocking future jobs
|
|
434
|
+
if self._loop is None:
|
|
435
|
+
self._recreate_executor_after_timeout("check", job.job_id)
|
|
436
|
+
return JobResult(
|
|
437
|
+
job_id=job.job_id,
|
|
438
|
+
error=TimeoutError(f"Job check timed out after {job.check_timeout_seconds}s"),
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
except Exception as e:
|
|
442
|
+
logger.error(
|
|
443
|
+
"job.check.error",
|
|
444
|
+
job_id=job.job_id,
|
|
445
|
+
block_number=block.block_number,
|
|
446
|
+
error=str(e),
|
|
447
|
+
)
|
|
448
|
+
return JobResult(job_id=job.job_id, error=e)
|
|
449
|
+
finally:
|
|
450
|
+
duration = time.perf_counter() - start_time
|
|
451
|
+
metrics.histogram(JOB_CHECK_SECONDS).observe(
|
|
452
|
+
duration,
|
|
453
|
+
chain_id=self._chain_id,
|
|
454
|
+
job_id=job.job_id,
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
async def _run_check_async(self, job: Job, block: BlockInfo, ctx: CheckContext) -> Trigger | None:
|
|
458
|
+
"""Run job.check(), handling sync or async."""
|
|
459
|
+
from brawny.scripting import set_job_context
|
|
460
|
+
|
|
461
|
+
ctx_token = _job_ctx.set(ctx)
|
|
462
|
+
job_token = _current_job.set(job)
|
|
463
|
+
check_block_token = set_check_block(ctx.block.number)
|
|
464
|
+
async_rpc_ctx_token = set_rpc_job_context(job.job_id)
|
|
465
|
+
set_job_context(True)
|
|
466
|
+
logger.debug(
|
|
467
|
+
"check.block_pinned",
|
|
468
|
+
job_id=job.job_id,
|
|
469
|
+
block_number=ctx.block.number,
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
try:
|
|
473
|
+
# Call with or without ctx based on signature
|
|
474
|
+
if _accepts_ctx(type(job), "check"):
|
|
475
|
+
result = job.check(ctx)
|
|
476
|
+
else:
|
|
477
|
+
result = job.check()
|
|
478
|
+
|
|
479
|
+
if inspect.isawaitable(result):
|
|
480
|
+
return await result
|
|
481
|
+
return result
|
|
482
|
+
finally:
|
|
483
|
+
set_job_context(False)
|
|
484
|
+
reset_rpc_job_context(async_rpc_ctx_token)
|
|
485
|
+
reset_check_block(check_block_token)
|
|
486
|
+
_job_ctx.reset(ctx_token)
|
|
487
|
+
_current_job.reset(job_token)
|
|
488
|
+
|
|
489
|
+
def _build_check_context(self, job: Job, block: BlockInfo) -> CheckContext:
|
|
490
|
+
"""Build a CheckContext for job check phase.
|
|
491
|
+
|
|
492
|
+
Args:
|
|
493
|
+
job: Job instance
|
|
494
|
+
block: Block information
|
|
495
|
+
|
|
496
|
+
Returns:
|
|
497
|
+
CheckContext with block-pinned RPC access and read+write KV
|
|
498
|
+
"""
|
|
499
|
+
# Get read RPC client for this job's read_group
|
|
500
|
+
from brawny.alerts.contracts import SimpleContractFactory
|
|
501
|
+
from brawny.config.routing import resolve_job_groups
|
|
502
|
+
|
|
503
|
+
read_group, _ = resolve_job_groups(self._config, job)
|
|
504
|
+
rpc = self._rpc_clients.get_read_client(read_group)
|
|
505
|
+
|
|
506
|
+
# Build BlockContext (immutable snapshot)
|
|
507
|
+
block_ctx = BlockContext(
|
|
508
|
+
number=block.block_number,
|
|
509
|
+
timestamp=block.timestamp,
|
|
510
|
+
hash=block.block_hash,
|
|
511
|
+
base_fee=0, # TODO: Get from block if available
|
|
512
|
+
chain_id=block.chain_id,
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
# Build ContractFactory
|
|
516
|
+
contracts = SimpleContractFactory(self._contract_system) if self._contract_system else None
|
|
517
|
+
|
|
518
|
+
return CheckContext(
|
|
519
|
+
block=block_ctx,
|
|
520
|
+
kv=DatabaseJobKVStore(self._db, job.job_id),
|
|
521
|
+
job_id=job.job_id,
|
|
522
|
+
rpc=rpc,
|
|
523
|
+
logger=logger.bind(job_id=job.job_id, chain_id=block.chain_id),
|
|
524
|
+
contracts=contracts,
|
|
525
|
+
_db=self._db,
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
def _build_build_context(
|
|
529
|
+
self, job: Job, block: BlockInfo, trigger: Trigger, signer_address: str
|
|
530
|
+
) -> BuildContext:
|
|
531
|
+
"""Build a BuildContext for job build phase.
|
|
532
|
+
|
|
533
|
+
Args:
|
|
534
|
+
job: Job instance
|
|
535
|
+
block: Block information
|
|
536
|
+
trigger: The trigger from check()
|
|
537
|
+
signer_address: Resolved signer address
|
|
538
|
+
|
|
539
|
+
Returns:
|
|
540
|
+
BuildContext with trigger, signer, and read-only KV
|
|
541
|
+
"""
|
|
542
|
+
from brawny.alerts.contracts import SimpleContractFactory
|
|
543
|
+
from brawny.config.routing import resolve_job_groups
|
|
544
|
+
|
|
545
|
+
read_group, _ = resolve_job_groups(self._config, job)
|
|
546
|
+
rpc = self._rpc_clients.get_read_client(read_group)
|
|
547
|
+
|
|
548
|
+
# Build BlockContext
|
|
549
|
+
block_ctx = BlockContext(
|
|
550
|
+
number=block.block_number,
|
|
551
|
+
timestamp=block.timestamp,
|
|
552
|
+
hash=block.block_hash,
|
|
553
|
+
base_fee=0,
|
|
554
|
+
chain_id=block.chain_id,
|
|
555
|
+
)
|
|
556
|
+
|
|
557
|
+
contracts = SimpleContractFactory(self._contract_system) if self._contract_system else None
|
|
558
|
+
|
|
559
|
+
return BuildContext(
|
|
560
|
+
block=block_ctx,
|
|
561
|
+
trigger=trigger,
|
|
562
|
+
job_id=job.job_id,
|
|
563
|
+
signer_address=signer_address,
|
|
564
|
+
rpc=rpc,
|
|
565
|
+
logger=logger.bind(job_id=job.job_id, chain_id=block.chain_id),
|
|
566
|
+
contracts=contracts,
|
|
567
|
+
kv=DatabaseJobKVStore(self._db, job.job_id), # KVReader (read-only access)
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
def _create_intent_for_trigger(
|
|
571
|
+
self,
|
|
572
|
+
job: Job,
|
|
573
|
+
block: BlockInfo,
|
|
574
|
+
trigger: Trigger,
|
|
575
|
+
) -> tuple[TxIntent | None, bool]:
|
|
576
|
+
"""Create a transaction intent for a triggered job.
|
|
577
|
+
|
|
578
|
+
Args:
|
|
579
|
+
job: Job that triggered
|
|
580
|
+
block: Block information
|
|
581
|
+
trigger: Trigger with intent details
|
|
582
|
+
"""
|
|
583
|
+
from brawny.tx.intent import create_intent
|
|
584
|
+
|
|
585
|
+
# Resolve signer address for build context
|
|
586
|
+
signer_address = job.signer_address
|
|
587
|
+
|
|
588
|
+
# Build context for build_tx (phase-specific)
|
|
589
|
+
ctx = self._build_build_context(job, block, trigger, signer_address)
|
|
590
|
+
|
|
591
|
+
from brawny.scripting import set_job_context
|
|
592
|
+
|
|
593
|
+
def _call_build_with_job_context() -> TxIntentSpec:
|
|
594
|
+
# Set contextvars for implicit context (inside worker thread)
|
|
595
|
+
ctx_token = _job_ctx.set(ctx)
|
|
596
|
+
job_token = _current_job.set(job)
|
|
597
|
+
rpc_ctx_token = set_rpc_job_context(job.job_id)
|
|
598
|
+
set_job_context(True)
|
|
599
|
+
try:
|
|
600
|
+
# Support legacy build_intent(trigger) API:
|
|
601
|
+
# If job has build_intent but didn't override build_tx, use legacy API
|
|
602
|
+
if hasattr(job, "build_intent") and type(job).build_tx is Job.build_tx:
|
|
603
|
+
return job.build_intent(ctx.trigger)
|
|
604
|
+
# Call with or without ctx based on signature
|
|
605
|
+
if _accepts_ctx(type(job), "build_tx"):
|
|
606
|
+
return job.build_tx(ctx)
|
|
607
|
+
else:
|
|
608
|
+
return job.build_tx()
|
|
609
|
+
finally:
|
|
610
|
+
set_job_context(False)
|
|
611
|
+
reset_rpc_job_context(rpc_ctx_token)
|
|
612
|
+
_job_ctx.reset(ctx_token)
|
|
613
|
+
_current_job.reset(job_token)
|
|
614
|
+
|
|
615
|
+
# Call job's build_tx method
|
|
616
|
+
try:
|
|
617
|
+
future = self._executor.submit(_call_build_with_job_context)
|
|
618
|
+
spec = future.result(timeout=job.build_timeout_seconds)
|
|
619
|
+
except FuturesTimeout:
|
|
620
|
+
logger.error(
|
|
621
|
+
"job.build.timeout",
|
|
622
|
+
job_id=job.job_id,
|
|
623
|
+
block_number=block.block_number,
|
|
624
|
+
timeout=job.build_timeout_seconds,
|
|
625
|
+
)
|
|
626
|
+
metrics = get_metrics()
|
|
627
|
+
metrics.counter(JOB_BUILD_TIMEOUTS).inc(
|
|
628
|
+
chain_id=self._chain_id,
|
|
629
|
+
job_id=job.job_id,
|
|
630
|
+
)
|
|
631
|
+
# Recreate executor to prevent stuck thread from blocking future jobs
|
|
632
|
+
self._recreate_executor_after_timeout("build", job.job_id)
|
|
633
|
+
raise TimeoutError(f"build_tx timed out after {job.build_timeout_seconds}s")
|
|
634
|
+
|
|
635
|
+
# Compute idempotency parts
|
|
636
|
+
idem_parts = list(trigger.idempotency_parts)
|
|
637
|
+
if not idem_parts:
|
|
638
|
+
# Default: use block number as idempotency part
|
|
639
|
+
idem_parts = [block.block_number]
|
|
640
|
+
|
|
641
|
+
# Resolve broadcast group and endpoints once at intent creation
|
|
642
|
+
from brawny._rpc.broadcast import get_broadcast_endpoints
|
|
643
|
+
from brawny.config.routing import resolve_job_groups
|
|
644
|
+
|
|
645
|
+
_, broadcast_group = resolve_job_groups(self._config, job)
|
|
646
|
+
broadcast_endpoints = get_broadcast_endpoints(self._config, broadcast_group)
|
|
647
|
+
|
|
648
|
+
# Create intent with idempotency and broadcast binding
|
|
649
|
+
# trigger.reason is auto-merged into metadata
|
|
650
|
+
with self._db.transaction():
|
|
651
|
+
inflight = self._db.get_inflight_intents_for_scope(
|
|
652
|
+
self._chain_id,
|
|
653
|
+
job.job_id,
|
|
654
|
+
signer_address,
|
|
655
|
+
spec.to_address,
|
|
656
|
+
)
|
|
657
|
+
if inflight:
|
|
658
|
+
existing = inflight[0]
|
|
659
|
+
existing_id = existing.get("intent_id")
|
|
660
|
+
existing_status = existing.get("status")
|
|
661
|
+
existing_claimed_at = existing.get("claimed_at")
|
|
662
|
+
existing_attempts = 0
|
|
663
|
+
if existing_id:
|
|
664
|
+
try:
|
|
665
|
+
existing_attempts = len(
|
|
666
|
+
self._db.get_attempts_for_intent(UUID(existing_id))
|
|
667
|
+
)
|
|
668
|
+
except Exception:
|
|
669
|
+
existing_attempts = 0
|
|
670
|
+
logger.info(
|
|
671
|
+
"intent.create.skipped_inflight",
|
|
672
|
+
job_id=job.job_id,
|
|
673
|
+
signer=signer_address,
|
|
674
|
+
to_address=spec.to_address,
|
|
675
|
+
existing_intent_id=str(existing_id) if existing_id else None,
|
|
676
|
+
existing_status=existing_status,
|
|
677
|
+
existing_claimed_at=existing_claimed_at,
|
|
678
|
+
existing_attempt_count=existing_attempts,
|
|
679
|
+
)
|
|
680
|
+
if len(inflight) > 1:
|
|
681
|
+
logger.warning(
|
|
682
|
+
"invariant.multiple_inflight_intents",
|
|
683
|
+
job_id=job.job_id,
|
|
684
|
+
signer=signer_address,
|
|
685
|
+
to_address=spec.to_address,
|
|
686
|
+
count=len(inflight),
|
|
687
|
+
)
|
|
688
|
+
return None, False
|
|
689
|
+
|
|
690
|
+
intent, is_new = create_intent(
|
|
691
|
+
db=self._db,
|
|
692
|
+
job_id=job.job_id,
|
|
693
|
+
chain_id=self._chain_id,
|
|
694
|
+
spec=spec,
|
|
695
|
+
idem_parts=idem_parts,
|
|
696
|
+
broadcast_group=broadcast_group,
|
|
697
|
+
broadcast_endpoints=broadcast_endpoints,
|
|
698
|
+
trigger=trigger,
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
if is_new and self._on_intent_created:
|
|
702
|
+
self._on_intent_created(str(intent.intent_id))
|
|
703
|
+
|
|
704
|
+
return intent, is_new
|
|
705
|
+
|
|
706
|
+
def close(self) -> None:
|
|
707
|
+
"""Shutdown the runner."""
|
|
708
|
+
self._executor.shutdown(wait=True)
|