abstract-block-dumper 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,13 +21,19 @@ def get_ready_to_retry_attempts() -> QuerySet[abd_models.TaskAttempt]:
21
21
 
22
22
 
23
23
  def executed_block_numbers(executable_path: str, args_json: str, from_block: int, to_block: int) -> set[int]:
24
- block_numbers = abd_models.TaskAttempt.objects.filter(
25
- executable_path=executable_path,
26
- args_json=args_json,
27
- block_number__gte=from_block,
28
- block_number__lt=to_block,
29
- status=abd_models.TaskAttempt.Status.SUCCESS,
30
- ).values_list("block_number", flat=True)
24
+ # Use iterator() to avoid Django's QuerySet caching which causes memory leaks
25
+ # during long-running backfill operations
26
+ block_numbers = (
27
+ abd_models.TaskAttempt.objects.filter(
28
+ executable_path=executable_path,
29
+ args_json=args_json,
30
+ block_number__gte=from_block,
31
+ block_number__lt=to_block,
32
+ status=abd_models.TaskAttempt.Status.SUCCESS,
33
+ )
34
+ .values_list("block_number", flat=True)
35
+ .iterator()
36
+ )
31
37
  return set(block_numbers)
32
38
 
33
39
 
@@ -23,15 +23,14 @@ class RegistryItem:
23
23
  """Check if condition matches for given block and arguments."""
24
24
  try:
25
25
  return self.condition(block_number, **kwargs)
26
- except Exception as e:
27
- logger.error(
26
+ except Exception as exc:
27
+ logger.exception(
28
28
  "Condition evaluation failed",
29
29
  condition=self.function.__name__,
30
30
  block_number=block_number,
31
- exc_info=True,
32
31
  )
33
- msg = f"Failed to evaluate condition: {e}"
34
- raise ConditionEvaluationError(msg) from e
32
+ msg = "Failed to evaluate condition"
33
+ raise ConditionEvaluationError(msg) from exc
35
34
 
36
35
  def get_execution_args(self) -> list[dict[str, Any]]:
37
36
  """Get list of argument sets for execution."""
@@ -0,0 +1,446 @@
1
+ """
2
+ Backfill scheduler for historical block processing.
3
+
4
+ This module provides a dedicated scheduler for backfilling historical blocks
5
+ with rate limiting and automatic archive network switching.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import time
11
+ from dataclasses import dataclass
12
+ from typing import TYPE_CHECKING
13
+
14
+ import structlog
15
+
16
+ import abstract_block_dumper._internal.dal.django_dal as abd_dal
17
+ import abstract_block_dumper._internal.services.utils as abd_utils
18
+ from abstract_block_dumper._internal.services.block_processor import BlockProcessor, block_processor_factory
19
+ from abstract_block_dumper._internal.services.metrics import (
20
+ BlockProcessingTimer,
21
+ increment_archive_network_usage,
22
+ increment_blocks_processed,
23
+ set_backfill_progress,
24
+ set_block_lag,
25
+ set_current_block,
26
+ )
27
+ from abstract_block_dumper._internal.services.utils import serialize_args
28
+
29
+ if TYPE_CHECKING:
30
+ import bittensor as bt
31
+
32
+ from abstract_block_dumper._internal.dal.memory_registry import RegistryItem
33
+
34
+ logger = structlog.get_logger(__name__)
35
+
36
+ # Blocks older than this threshold from current head require archive network
37
+ ARCHIVE_BLOCK_THRESHOLD = 300
38
+
39
+ # Progress logging interval
40
+ PROGRESS_LOG_INTERVAL = 100
41
+ ARCHIVE_NETWORK = "archive"
42
+
43
+ # Memory cleanup interval (every N blocks)
44
+ MEMORY_CLEANUP_INTERVAL = 1000
45
+
46
+
47
+ @dataclass
48
+ class DryRunStats:
49
+ """Statistics for dry-run mode."""
50
+
51
+ total_blocks: int = 0
52
+ already_processed: int = 0
53
+ blocks_needing_tasks: int = 0
54
+ estimated_tasks: int = 0
55
+
56
+
57
+ class BackfillScheduler:
58
+ """Scheduler for backfilling historical blocks with rate limiting."""
59
+
60
+ def __init__(
61
+ self,
62
+ block_processor: BlockProcessor,
63
+ network: str,
64
+ from_block: int,
65
+ to_block: int,
66
+ rate_limit: float = 1.0,
67
+ dry_run: bool = False,
68
+ ) -> None:
69
+ """
70
+ Initialize the backfill scheduler.
71
+
72
+ Args:
73
+ block_processor: The block processor to use for task execution.
74
+ network: The bittensor network name (e.g., 'finney').
75
+ from_block: Starting block number (inclusive).
76
+ to_block: Ending block number (inclusive).
77
+ rate_limit: Seconds to sleep between processing each block.
78
+ dry_run: If True, preview what would be processed without executing.
79
+
80
+ """
81
+ self.block_processor = block_processor
82
+ self.network = network
83
+ self.from_block = from_block
84
+ self.to_block = to_block
85
+ self.rate_limit = rate_limit
86
+ self.dry_run = dry_run
87
+ self.is_running = False
88
+ self._subtensor: bt.Subtensor | None = None
89
+ self._archive_subtensor: bt.Subtensor | None = None
90
+ self._current_head_cache: int | None = None
91
+
92
+ @property
93
+ def subtensor(self) -> bt.Subtensor:
94
+ """Get the regular subtensor connection, creating it if needed."""
95
+ if self._subtensor is None:
96
+ self._subtensor = abd_utils.get_bittensor_client(self.network)
97
+ return self._subtensor
98
+
99
+ @property
100
+ def archive_subtensor(self) -> bt.Subtensor:
101
+ """Get the archive subtensor connection, creating it if needed."""
102
+ if self._archive_subtensor is None:
103
+ self._archive_subtensor = abd_utils.get_bittensor_client("archive")
104
+ return self._archive_subtensor
105
+
106
+ def get_subtensor_for_block(self, block_number: int) -> bt.Subtensor:
107
+ """
108
+ Get the appropriate subtensor for the given block number.
109
+
110
+ Uses archive network for blocks older than ARCHIVE_BLOCK_THRESHOLD
111
+ from the current head.
112
+ """
113
+ if self._current_head_cache is None:
114
+ self._current_head_cache = self.subtensor.get_current_block()
115
+
116
+ blocks_behind = self._current_head_cache - block_number
117
+
118
+ if blocks_behind > ARCHIVE_BLOCK_THRESHOLD:
119
+ logger.debug(
120
+ "Using archive network for old block",
121
+ block_number=block_number,
122
+ blocks_behind=blocks_behind,
123
+ )
124
+ return self.archive_subtensor
125
+ return self.subtensor
126
+
127
+ def _get_network_type_for_block(self, block_number: int) -> str:
128
+ """Get the network type string for a block (for display purposes)."""
129
+ if self._current_head_cache is None:
130
+ self._current_head_cache = self.subtensor.get_current_block()
131
+
132
+ blocks_behind = self._current_head_cache - block_number
133
+ return ARCHIVE_NETWORK if blocks_behind > ARCHIVE_BLOCK_THRESHOLD else self.network
134
+
135
+ def start(self) -> DryRunStats | None:
136
+ """
137
+ Start processing blocks from from_block to to_block.
138
+
139
+ Returns:
140
+ DryRunStats if dry_run is True, None otherwise.
141
+
142
+ """
143
+ self.is_running = True
144
+
145
+ # Refresh current head for accurate archive network decisions
146
+ self._current_head_cache = self.subtensor.get_current_block()
147
+
148
+ total_blocks = self.to_block - self.from_block + 1
149
+ network_type = self._get_network_type_for_block(self.from_block)
150
+
151
+ logger.info(
152
+ "BackfillScheduler starting",
153
+ from_block=self.from_block,
154
+ to_block=self.to_block,
155
+ total_blocks=total_blocks,
156
+ rate_limit=self.rate_limit,
157
+ dry_run=self.dry_run,
158
+ network_type=network_type,
159
+ current_head=self._current_head_cache,
160
+ )
161
+
162
+ if self.dry_run:
163
+ return self._run_dry_run()
164
+
165
+ self._run_backfill()
166
+ return None
167
+
168
+ def _run_dry_run(self) -> DryRunStats:
169
+ """
170
+ Run in dry-run mode to preview what would be processed.
171
+
172
+ Optimized to fetch all executed blocks in one query per registry item,
173
+ instead of querying for each block individually.
174
+ """
175
+ stats = DryRunStats(total_blocks=self.to_block - self.from_block + 1)
176
+
177
+ registry_items = self.block_processor.registry.get_functions()
178
+
179
+ # Pre-fetch all executed blocks for each registry item + args combination
180
+ # This reduces N queries (one per block) to M queries (one per registry item + args)
181
+ executed_blocks_cache: dict[tuple[str, str], set[int]] = {}
182
+
183
+ logger.info(
184
+ "Dry run: pre-fetching executed blocks",
185
+ from_block=self.from_block,
186
+ to_block=self.to_block,
187
+ registry_items_count=len(registry_items),
188
+ )
189
+
190
+ for registry_item in registry_items:
191
+ for args in registry_item.get_execution_args():
192
+ args_json = serialize_args(args)
193
+ cache_key = (registry_item.executable_path, args_json)
194
+
195
+ # Fetch all executed blocks in the range with a single query
196
+ executed_blocks_cache[cache_key] = abd_dal.executed_block_numbers(
197
+ registry_item.executable_path,
198
+ args_json,
199
+ self.from_block,
200
+ self.to_block + 1,
201
+ )
202
+
203
+ logger.info(
204
+ "Dry run: analyzing blocks",
205
+ cache_entries=len(executed_blocks_cache),
206
+ )
207
+
208
+ # Track which blocks have at least one task
209
+ blocks_with_tasks: set[int] = set()
210
+
211
+ for registry_item in registry_items:
212
+ for args in registry_item.get_execution_args():
213
+ args_json = serialize_args(args)
214
+ cache_key = (registry_item.executable_path, args_json)
215
+ executed_blocks = executed_blocks_cache[cache_key]
216
+
217
+ for block_number in range(self.from_block, self.to_block + 1):
218
+ if not self.is_running:
219
+ break
220
+
221
+ if block_number in executed_blocks:
222
+ continue
223
+
224
+ # Check if condition matches
225
+ try:
226
+ if registry_item.match_condition(block_number, **args):
227
+ stats.estimated_tasks += 1
228
+ blocks_with_tasks.add(block_number)
229
+ except Exception as exc:
230
+ logger.debug(
231
+ "Error evaluating match condition during dry run",
232
+ function_name=registry_item.function.__name__,
233
+ block_number=block_number,
234
+ args=args,
235
+ error=str(exc),
236
+ )
237
+
238
+ stats.blocks_needing_tasks = len(blocks_with_tasks)
239
+ stats.already_processed = stats.total_blocks - stats.blocks_needing_tasks
240
+
241
+ return stats
242
+
243
+ def _run_backfill(self) -> None:
244
+ """Run the actual backfill process."""
245
+ processed_count = 0
246
+ total_blocks = self.to_block - self.from_block + 1
247
+
248
+ # Set initial metrics
249
+ set_backfill_progress(self.from_block, self.to_block, self.from_block)
250
+
251
+ # Pre-fetch all executed blocks to avoid per-block DB queries
252
+ logger.info(
253
+ "Pre-fetching executed blocks",
254
+ from_block=self.from_block,
255
+ to_block=self.to_block,
256
+ )
257
+ executed_blocks_cache = self._prefetch_executed_blocks()
258
+ logger.info(
259
+ "Pre-fetch complete",
260
+ cache_entries=len(executed_blocks_cache),
261
+ )
262
+
263
+ try:
264
+ for block_number in range(self.from_block, self.to_block + 1):
265
+ if not self.is_running:
266
+ logger.info("BackfillScheduler stopping early", processed_count=processed_count)
267
+ break
268
+
269
+ try:
270
+ with BlockProcessingTimer(mode="backfill"):
271
+ self._process_block(block_number, executed_blocks_cache)
272
+
273
+ processed_count += 1
274
+
275
+ # Update metrics
276
+ set_current_block("backfill", block_number)
277
+ set_backfill_progress(self.from_block, self.to_block, block_number)
278
+ increment_blocks_processed("backfill")
279
+
280
+ # Track block lag (distance from chain head)
281
+ if self._current_head_cache:
282
+ set_block_lag("backfill", self._current_head_cache - block_number)
283
+
284
+ # Log progress periodically
285
+ if processed_count % PROGRESS_LOG_INTERVAL == 0:
286
+ progress_pct = (processed_count / total_blocks) * 100
287
+ logger.info(
288
+ "Backfill progress",
289
+ processed=processed_count,
290
+ total=total_blocks,
291
+ progress_percent=f"{progress_pct:.1f}%",
292
+ current_block=block_number,
293
+ )
294
+
295
+ # Rate limiting between block submissions
296
+ if block_number < self.to_block and self.rate_limit > 0:
297
+ time.sleep(self.rate_limit)
298
+
299
+ except KeyboardInterrupt:
300
+ raise
301
+ except Exception:
302
+ logger.exception(
303
+ "Error processing block during backfill",
304
+ block_number=block_number,
305
+ )
306
+ # Continue with next block
307
+ time.sleep(self.rate_limit)
308
+
309
+ except KeyboardInterrupt:
310
+ logger.info(
311
+ "BackfillScheduler interrupted",
312
+ processed_count=processed_count,
313
+ last_block=self.from_block + processed_count - 1 if processed_count > 0 else self.from_block,
314
+ )
315
+ self.stop()
316
+
317
+ logger.info(
318
+ "BackfillScheduler completed",
319
+ processed_count=processed_count,
320
+ total_blocks=total_blocks,
321
+ )
322
+
323
+ def _prefetch_executed_blocks(self) -> dict[tuple[str, str], set[int]]:
324
+ """Pre-fetch all executed blocks for all registry items in the range."""
325
+ cache: dict[tuple[str, str], set[int]] = {}
326
+
327
+ for registry_item in self.block_processor.registry.get_functions():
328
+ for args in registry_item.get_execution_args():
329
+ args_json = serialize_args(args)
330
+ cache_key = (registry_item.executable_path, args_json)
331
+
332
+ cache[cache_key] = abd_dal.executed_block_numbers(
333
+ registry_item.executable_path,
334
+ args_json,
335
+ self.from_block,
336
+ self.to_block + 1,
337
+ )
338
+
339
+ return cache
340
+
341
+ def _process_block(
342
+ self,
343
+ block_number: int,
344
+ executed_blocks_cache: dict[tuple[str, str], set[int]],
345
+ ) -> None:
346
+ """Process a single block during backfill."""
347
+ for registry_item in self.block_processor.registry.get_functions():
348
+ try:
349
+ self._process_registry_item_for_backfill(
350
+ registry_item,
351
+ block_number,
352
+ executed_blocks_cache,
353
+ )
354
+ except Exception:
355
+ logger.exception(
356
+ "Error processing registry item during backfill",
357
+ function_name=registry_item.function.__name__,
358
+ block_number=block_number,
359
+ )
360
+
361
+ def _requires_archive_network(self, block_number: int) -> bool:
362
+ """Check if a block requires archive network based on age."""
363
+ if self._current_head_cache is None:
364
+ return False
365
+ blocks_behind = self._current_head_cache - block_number
366
+ return blocks_behind > ARCHIVE_BLOCK_THRESHOLD
367
+
368
+ def _process_registry_item_for_backfill(
369
+ self,
370
+ registry_item: RegistryItem,
371
+ block_number: int,
372
+ executed_blocks_cache: dict[tuple[str, str], set[int]],
373
+ ) -> None:
374
+ """Process a registry item for backfill - only submits if not already executed."""
375
+ for args in registry_item.get_execution_args():
376
+ args_json = serialize_args(args)
377
+ cache_key = (registry_item.executable_path, args_json)
378
+
379
+ # Check if already executed using pre-fetched cache
380
+ executed_blocks = executed_blocks_cache.get(cache_key, set())
381
+
382
+ if block_number in executed_blocks:
383
+ continue
384
+
385
+ # Check condition and execute
386
+ try:
387
+ if registry_item.match_condition(block_number, **args):
388
+ use_archive = self._requires_archive_network(block_number)
389
+ if use_archive:
390
+ increment_archive_network_usage()
391
+ logger.debug(
392
+ "Backfilling block",
393
+ function_name=registry_item.function.__name__,
394
+ block_number=block_number,
395
+ args=args,
396
+ use_archive=use_archive,
397
+ )
398
+ self.block_processor.executor.execute(
399
+ registry_item,
400
+ block_number,
401
+ args,
402
+ use_archive=use_archive,
403
+ )
404
+ except Exception:
405
+ logger.exception(
406
+ "Error during backfill task execution",
407
+ function_name=registry_item.function.__name__,
408
+ block_number=block_number,
409
+ args=args,
410
+ )
411
+
412
+ def stop(self) -> None:
413
+ """Stop the backfill scheduler."""
414
+ self.is_running = False
415
+ logger.info("BackfillScheduler stopped")
416
+
417
+
418
+ def backfill_scheduler_factory(
419
+ from_block: int,
420
+ to_block: int,
421
+ network: str = "finney",
422
+ rate_limit: float = 1.0,
423
+ dry_run: bool = False,
424
+ ) -> BackfillScheduler:
425
+ """
426
+ Factory for BackfillScheduler.
427
+
428
+ Args:
429
+ from_block: Starting block number (inclusive).
430
+ to_block: Ending block number (inclusive).
431
+ network: Bittensor network name. Defaults to "finney".
432
+ rate_limit: Seconds to sleep between blocks. Defaults to 1.0.
433
+ dry_run: If True, preview without executing. Defaults to False.
434
+
435
+ Returns:
436
+ Configured BackfillScheduler instance.
437
+
438
+ """
439
+ return BackfillScheduler(
440
+ block_processor=block_processor_factory(),
441
+ network=network,
442
+ from_block=from_block,
443
+ to_block=to_block,
444
+ rate_limit=rate_limit,
445
+ dry_run=dry_run,
446
+ )
@@ -1,3 +1,5 @@
1
+ import time
2
+
1
3
  import structlog
2
4
  from django.db import transaction
3
5
 
@@ -5,7 +7,6 @@ import abstract_block_dumper._internal.dal.django_dal as abd_dal
5
7
  from abstract_block_dumper._internal.dal.memory_registry import BaseRegistry, RegistryItem, task_registry
6
8
  from abstract_block_dumper._internal.exceptions import ConditionEvaluationError
7
9
  from abstract_block_dumper._internal.services.executor import CeleryExecutor
8
- from abstract_block_dumper._internal.services.utils import serialize_args
9
10
  from abstract_block_dumper.models import TaskAttempt
10
11
 
11
12
  logger = structlog.get_logger(__name__)
@@ -18,16 +19,15 @@ class BlockProcessor:
18
19
  self._cleanup_phantom_tasks()
19
20
 
20
21
  def process_block(self, block_number: int) -> None:
22
+ """Process a single block - executes registered tasks for this block only."""
21
23
  for registry_item in self.registry.get_functions():
22
24
  try:
23
- self.process_backfill(registry_item, block_number)
24
25
  self.process_registry_item(registry_item, block_number)
25
26
  except Exception:
26
- logger.error(
27
+ logger.exception(
27
28
  "Error processing registry item",
28
29
  function_name=registry_item.function.__name__,
29
30
  block_number=block_number,
30
- exc_info=True,
31
31
  )
32
32
 
33
33
  def process_registry_item(self, registry_item: RegistryItem, block_number: int) -> None:
@@ -43,64 +43,28 @@ class BlockProcessor:
43
43
  )
44
44
  # Continue with other tasks
45
45
  except Exception:
46
- logger.error("Unexpected error processing task", exc_info=True)
47
-
48
- def process_backfill(self, registry_item: RegistryItem, current_block: int) -> None:
49
- if not registry_item.backfilling_lookback:
50
- return None
51
-
52
- start_block = max(0, current_block - registry_item.backfilling_lookback)
53
-
54
- logger.info(
55
- "Processing backfill",
56
- function_name=registry_item.function.__name__,
57
- start_block=start_block,
58
- current_block=current_block,
59
- lookback=registry_item.backfilling_lookback,
60
- )
61
-
62
- execution_args_list = registry_item.get_execution_args()
63
-
64
- for args in execution_args_list:
65
- args_json = serialize_args(args)
66
-
67
- executed_blocks = abd_dal.executed_block_numbers(
68
- registry_item.executable_path,
69
- args_json,
70
- start_block,
71
- current_block,
72
- )
73
-
74
- for block_number in range(start_block, current_block):
75
- if block_number in executed_blocks:
76
- continue
46
+ logger.exception("Unexpected error processing task")
77
47
 
78
- try:
79
- if registry_item.match_condition(block_number, **args):
80
- logger.debug(
81
- "Backfilling block",
82
- function_name=registry_item.function.__name__,
83
- block_number=block_number,
84
- args=args,
85
- )
86
- self.executor.execute(registry_item, block_number, args)
87
- except Exception:
88
- logger.error(
89
- "Error during backfill",
90
- function_name=registry_item.function.__name__,
91
- block_number=block_number,
92
- args=args,
93
- exc_info=True,
94
- )
95
-
96
- def recover_failed_retries(self) -> None:
48
+ def recover_failed_retries(self, poll_interval: int, batch_size: int | None = None) -> None:
97
49
  """
98
50
  Recover failed tasks that are ready to be retried.
99
51
 
100
52
  This handles tasks that may have been lost due to scheduler restarts.
53
+
54
+ Args:
55
+ poll_interval: Seconds to sleep between processing each retry.
56
+ batch_size: Maximum number of retries to process. If None, process all.
57
+
101
58
  """
102
59
  retry_count = 0
103
- for retry_attempt in abd_dal.get_ready_to_retry_attempts():
60
+ retry_attempts = abd_dal.get_ready_to_retry_attempts()
61
+
62
+ # Apply batch size limit if specified
63
+ if batch_size is not None:
64
+ retry_attempts = retry_attempts[:batch_size]
65
+
66
+ for retry_attempt in retry_attempts:
67
+ time.sleep(poll_interval)
104
68
  try:
105
69
  # Find the registry item to get celery_kwargs
106
70
  registry_item = self.registry.get_by_executable_path(retry_attempt.executable_path)
@@ -148,10 +112,9 @@ class BlockProcessor:
148
112
  attempt_count=task_attempt.attempt_count,
149
113
  )
150
114
  except Exception:
151
- logger.error(
115
+ logger.exception(
152
116
  "Failed to recover retry",
153
117
  task_id=retry_attempt.id,
154
- exc_info=True,
155
118
  )
156
119
  # Reload task to see current state after potential execution failure
157
120
  try:
@@ -4,13 +4,21 @@ import structlog
4
4
 
5
5
  import abstract_block_dumper._internal.dal.django_dal as abd_dal
6
6
  from abstract_block_dumper._internal.dal.memory_registry import RegistryItem
7
+ from abstract_block_dumper._internal.services.metrics import increment_tasks_submitted
7
8
  from abstract_block_dumper.models import TaskAttempt
8
9
 
9
10
  logger = structlog.get_logger(__name__)
10
11
 
11
12
 
12
13
  class CeleryExecutor:
13
- def execute(self, registry_item: RegistryItem, block_number: int, args: dict[str, Any]) -> None:
14
+ def execute(
15
+ self,
16
+ registry_item: RegistryItem,
17
+ block_number: int,
18
+ args: dict[str, Any],
19
+ *,
20
+ use_archive: bool = False,
21
+ ) -> None:
14
22
  task_attempt, created = abd_dal.task_create_or_get_pending(
15
23
  block_number=block_number,
16
24
  executable_path=registry_item.executable_path,
@@ -26,6 +34,7 @@ class CeleryExecutor:
26
34
 
27
35
  task_kwargs = {
28
36
  "block_number": block_number,
37
+ "_use_archive_network": use_archive, # Runtime hint, not stored in DB
29
38
  **args,
30
39
  }
31
40
 
@@ -49,6 +58,12 @@ class CeleryExecutor:
49
58
  celery_kwargs=apply_async_kwargs,
50
59
  )
51
60
 
52
- celery_task = registry_item.function.apply_async(**apply_async_kwargs)
61
+ # Don't store AsyncResult to avoid memory accumulation during long runs
62
+ # The task ID can be retrieved from the task_attempt if needed
63
+ registry_item.function.apply_async(**apply_async_kwargs)
53
64
 
54
- logger.debug("Celery task scheduled", task_id=task_attempt.id, celery_task_id=celery_task.id)
65
+ # Track task submission metric
66
+ task_name = registry_item.executable_path.split(".")[-1]
67
+ increment_tasks_submitted(task_name)
68
+
69
+ logger.debug("Celery task scheduled", task_id=task_attempt.id)
@@ -0,0 +1,237 @@
1
+ """
2
+ Optional Prometheus metrics for block dumper.
3
+
4
+ Metrics are only available if prometheus_client is installed.
5
+ Install with: pip install abstract-block-dumper[prometheus]
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import TYPE_CHECKING, Any, Self
11
+
12
+ import structlog
13
+
14
+ if TYPE_CHECKING:
15
+ from types import TracebackType
16
+
17
+ logger = structlog.get_logger(__name__)
18
+
19
+ # Conditional import - metrics only work if prometheus_client is installed
20
+ try:
21
+ from prometheus_client import Counter, Gauge, Histogram
22
+
23
+ PROMETHEUS_AVAILABLE = True
24
+ except ImportError:
25
+ PROMETHEUS_AVAILABLE = False
26
+
27
+
28
+ # Define no-op placeholders when prometheus is not available
29
+ BLOCKS_PROCESSED = None
30
+ TASKS_SUBMITTED = None
31
+ CURRENT_BLOCK = None
32
+ BACKFILL_PROGRESS = None
33
+ BACKFILL_FROM_BLOCK = None
34
+ BACKFILL_TO_BLOCK = None
35
+ BLOCK_PROCESSING_TIME = None
36
+ # Task-level metrics
37
+ TASK_EXECUTIONS = None
38
+ TASK_EXECUTION_TIME = None
39
+ TASK_RETRIES = None
40
+ # Business/observability metrics
41
+ BLOCK_LAG = None # How far behind the chain head
42
+ PENDING_TASKS = None # Current pending tasks count
43
+ REGISTERED_TASKS = None # Number of registered task types
44
+ ARCHIVE_NETWORK_USAGE = None # Counter for archive network fallback
45
+
46
+ if PROMETHEUS_AVAILABLE:
47
+ BLOCKS_PROCESSED = Counter( # type: ignore
48
+ "block_dumper_blocks_processed_total",
49
+ "Total blocks processed",
50
+ ["mode"], # 'realtime' or 'backfill'
51
+ )
52
+ TASKS_SUBMITTED = Counter( # type: ignore
53
+ "block_dumper_tasks_submitted_total",
54
+ "Total tasks submitted to Celery",
55
+ ["task_name"],
56
+ )
57
+ CURRENT_BLOCK = Gauge( # type: ignore
58
+ "block_dumper_current_block",
59
+ "Current block being processed",
60
+ ["mode"],
61
+ )
62
+ BACKFILL_PROGRESS = Gauge( # type: ignore
63
+ "block_dumper_backfill_progress_percent",
64
+ "Backfill progress percentage",
65
+ )
66
+ BACKFILL_FROM_BLOCK = Gauge( # type: ignore
67
+ "block_dumper_backfill_from_block",
68
+ "Backfill starting block number",
69
+ )
70
+ BACKFILL_TO_BLOCK = Gauge( # type: ignore
71
+ "block_dumper_backfill_to_block",
72
+ "Backfill target block number",
73
+ )
74
+ BLOCK_PROCESSING_TIME = Histogram( # type: ignore
75
+ "block_dumper_block_processing_seconds",
76
+ "Time spent processing each block",
77
+ ["mode"],
78
+ )
79
+ # Task-level metrics
80
+ TASK_EXECUTIONS = Counter( # type: ignore
81
+ "block_dumper_task_executions_total",
82
+ "Total task executions by status",
83
+ ["task_name", "status"], # status: 'success', 'failed'
84
+ )
85
+ TASK_EXECUTION_TIME = Histogram( # type: ignore
86
+ "block_dumper_task_execution_seconds",
87
+ "Time spent executing each task",
88
+ ["task_name"],
89
+ buckets=(0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0, 120.0),
90
+ )
91
+ TASK_RETRIES = Counter( # type: ignore
92
+ "block_dumper_task_retries_total",
93
+ "Total task retry attempts",
94
+ ["task_name"],
95
+ )
96
+ # Business/observability metrics
97
+ BLOCK_LAG = Gauge( # type: ignore
98
+ "block_dumper_block_lag",
99
+ "Number of blocks behind the chain head",
100
+ ["mode"], # 'realtime' or 'backfill'
101
+ )
102
+ PENDING_TASKS = Gauge( # type: ignore
103
+ "block_dumper_pending_tasks",
104
+ "Current number of pending tasks in queue",
105
+ )
106
+ REGISTERED_TASKS = Gauge( # type: ignore
107
+ "block_dumper_registered_tasks",
108
+ "Number of registered task types",
109
+ )
110
+ ARCHIVE_NETWORK_USAGE = Counter( # type: ignore
111
+ "block_dumper_archive_network_requests_total",
112
+ "Total requests using archive network",
113
+ )
114
+
115
+
116
+ def increment_blocks_processed(mode: str) -> None:
117
+ """Increment the blocks processed counter."""
118
+ if PROMETHEUS_AVAILABLE and BLOCKS_PROCESSED is not None:
119
+ BLOCKS_PROCESSED.labels(mode=mode).inc()
120
+
121
+
122
+ def increment_tasks_submitted(task_name: str) -> None:
123
+ """Increment the tasks submitted counter."""
124
+ if PROMETHEUS_AVAILABLE and TASKS_SUBMITTED is not None:
125
+ TASKS_SUBMITTED.labels(task_name=task_name).inc()
126
+
127
+
128
+ def set_current_block(mode: str, block_number: int) -> None:
129
+ """Set the current block being processed."""
130
+ if PROMETHEUS_AVAILABLE and CURRENT_BLOCK is not None:
131
+ CURRENT_BLOCK.labels(mode=mode).set(block_number)
132
+
133
+
134
+ def set_backfill_progress(from_block: int, to_block: int, current_block: int) -> None:
135
+ """Set backfill progress metrics."""
136
+ if not PROMETHEUS_AVAILABLE:
137
+ return
138
+
139
+ if BACKFILL_FROM_BLOCK is not None:
140
+ BACKFILL_FROM_BLOCK.set(from_block)
141
+ if BACKFILL_TO_BLOCK is not None:
142
+ BACKFILL_TO_BLOCK.set(to_block)
143
+
144
+ if BACKFILL_PROGRESS is not None:
145
+ total_blocks = to_block - from_block
146
+ if total_blocks > 0:
147
+ processed = current_block - from_block
148
+ progress = (processed / total_blocks) * 100
149
+ BACKFILL_PROGRESS.set(progress)
150
+
151
+
152
+ def set_block_lag(mode: str, lag: int) -> None:
153
+ """Set the current block lag (distance from chain head)."""
154
+ if PROMETHEUS_AVAILABLE and BLOCK_LAG is not None:
155
+ BLOCK_LAG.labels(mode=mode).set(lag)
156
+
157
+
158
+ def set_pending_tasks(count: int) -> None:
159
+ """Set the current number of pending tasks."""
160
+ if PROMETHEUS_AVAILABLE and PENDING_TASKS is not None:
161
+ PENDING_TASKS.set(count)
162
+
163
+
164
+ def set_registered_tasks(count: int) -> None:
165
+ """Set the number of registered task types."""
166
+ if PROMETHEUS_AVAILABLE and REGISTERED_TASKS is not None:
167
+ REGISTERED_TASKS.set(count)
168
+
169
+
170
+ def increment_archive_network_usage() -> None:
171
+ """Increment the archive network usage counter."""
172
+ if PROMETHEUS_AVAILABLE and ARCHIVE_NETWORK_USAGE is not None:
173
+ ARCHIVE_NETWORK_USAGE.inc()
174
+
175
+
176
+ def record_task_execution(task_name: str, status: str) -> None:
177
+ """Record a task execution with status (success/failed)."""
178
+ if PROMETHEUS_AVAILABLE and TASK_EXECUTIONS is not None:
179
+ TASK_EXECUTIONS.labels(task_name=task_name, status=status).inc()
180
+
181
+
182
+ def record_task_retry(task_name: str) -> None:
183
+ """Record a task retry attempt."""
184
+ if PROMETHEUS_AVAILABLE and TASK_RETRIES is not None:
185
+ TASK_RETRIES.labels(task_name=task_name).inc()
186
+
187
+
188
+ def observe_task_execution_time(task_name: str, duration: float) -> None:
189
+ """Record task execution duration in seconds."""
190
+ if PROMETHEUS_AVAILABLE and TASK_EXECUTION_TIME is not None:
191
+ TASK_EXECUTION_TIME.labels(task_name=task_name).observe(duration)
192
+
193
+
194
+ class TaskExecutionTimer:
195
+ """Context manager for timing task execution."""
196
+
197
+ def __init__(self, task_name: str) -> None:
198
+ self.task_name = task_name
199
+ self._timer: Any = None
200
+
201
+ def __enter__(self) -> Self:
202
+ if PROMETHEUS_AVAILABLE and TASK_EXECUTION_TIME is not None:
203
+ self._timer = TASK_EXECUTION_TIME.labels(task_name=self.task_name).time()
204
+ self._timer.__enter__()
205
+ return self
206
+
207
+ def __exit__(
208
+ self,
209
+ exc_type: type[BaseException] | None,
210
+ exc_val: BaseException | None,
211
+ exc_tb: TracebackType | None,
212
+ ) -> None:
213
+ if self._timer is not None:
214
+ self._timer.__exit__(exc_type, exc_val, exc_tb)
215
+
216
+
217
+ class BlockProcessingTimer:
218
+ """Context manager for timing block processing."""
219
+
220
+ def __init__(self, mode: str) -> None:
221
+ self.mode = mode
222
+ self._timer: Any = None
223
+
224
+ def __enter__(self) -> Self:
225
+ if PROMETHEUS_AVAILABLE and BLOCK_PROCESSING_TIME is not None:
226
+ self._timer = BLOCK_PROCESSING_TIME.labels(mode=self.mode).time()
227
+ self._timer.__enter__() # Start the timer
228
+ return self
229
+
230
+ def __exit__(
231
+ self,
232
+ exc_type: type[BaseException] | None,
233
+ exc_val: BaseException | None,
234
+ exc_tb: TracebackType | None,
235
+ ) -> None:
236
+ if self._timer is not None:
237
+ self._timer.__exit__(exc_type, exc_val, exc_tb)
@@ -7,6 +7,13 @@ from django.conf import settings
7
7
  import abstract_block_dumper._internal.dal.django_dal as abd_dal
8
8
  import abstract_block_dumper._internal.services.utils as abd_utils
9
9
  from abstract_block_dumper._internal.services.block_processor import BlockProcessor, block_processor_factory
10
+ from abstract_block_dumper._internal.services.metrics import (
11
+ BlockProcessingTimer,
12
+ increment_blocks_processed,
13
+ set_block_lag,
14
+ set_current_block,
15
+ set_registered_tasks,
16
+ )
10
17
 
11
18
  logger = structlog.get_logger(__name__)
12
19
 
@@ -20,10 +27,12 @@ class TaskScheduler:
20
27
  block_processor: BlockProcessor,
21
28
  network: str,
22
29
  poll_interval: int,
30
+ realtime_head_only: bool = False,
23
31
  ) -> None:
24
32
  self.block_processor = block_processor
25
33
  self.network = network
26
34
  self.poll_interval = poll_interval
35
+ self.realtime_head_only = realtime_head_only
27
36
  self.last_processed_block = -1
28
37
  self.is_running = False
29
38
  self._subtensor: bt.Subtensor | None = None
@@ -87,25 +96,49 @@ class TaskScheduler:
87
96
 
88
97
  self.initialize_last_block()
89
98
 
99
+ registered_tasks_count = len(self.block_processor.registry.get_functions())
100
+ set_registered_tasks(registered_tasks_count)
101
+
90
102
  logger.info(
91
103
  "TaskScheduler started",
92
104
  last_processed_block=self.last_processed_block,
93
- registry_functions=len(self.block_processor.registry.get_functions()),
105
+ registry_functions=registered_tasks_count,
94
106
  )
95
107
 
96
108
  while self.is_running:
97
109
  try:
98
- # Process lost retries first
99
- self.block_processor.recover_failed_retries()
110
+ if self._current_block_cache is not None:
111
+ self.subtensor = self.get_subtensor_for_block(self._current_block_cache)
100
112
 
101
113
  # Update current block cache for archive network decision
102
114
  self._current_block_cache = self.subtensor.get_current_block()
103
115
  current_block = self._current_block_cache
104
116
 
105
- for block_number in range(self.last_processed_block + 1, current_block + 1):
106
- self.block_processor.process_block(block_number)
117
+ if self.realtime_head_only:
118
+ # Only process the current head block, skip if already processed
119
+ if current_block != self.last_processed_block:
120
+ with BlockProcessingTimer(mode="realtime"):
121
+ self.block_processor.process_block(current_block)
122
+
123
+ set_current_block("realtime", current_block)
124
+ increment_blocks_processed("realtime")
125
+ set_block_lag("realtime", 0) # Head-only mode has no lag
126
+ self.last_processed_block = current_block
127
+
107
128
  time.sleep(self.poll_interval)
108
- self.last_processed_block = block_number
129
+ else:
130
+ # Original behavior: process all blocks from last_processed to current
131
+ for block_number in range(self.last_processed_block + 1, current_block + 1):
132
+ with BlockProcessingTimer(mode="realtime"):
133
+ self.block_processor.process_block(block_number)
134
+
135
+ # Update metrics
136
+ set_current_block("realtime", block_number)
137
+ increment_blocks_processed("realtime")
138
+ set_block_lag("realtime", current_block - block_number)
139
+
140
+ time.sleep(self.poll_interval)
141
+ self.last_processed_block = block_number
109
142
 
110
143
  except KeyboardInterrupt:
111
144
  logger.info("TaskScheduler stopping due to KeyboardInterrupt.")
@@ -158,4 +191,5 @@ def task_scheduler_factory(network: str = "finney") -> TaskScheduler:
158
191
  block_processor=block_processor_factory(),
159
192
  network=network,
160
193
  poll_interval=getattr(settings, "BLOCK_DUMPER_POLL_INTERVAL", 1),
194
+ realtime_head_only=getattr(settings, "BLOCK_DUMPER_REALTIME_HEAD_ONLY", True),
161
195
  )
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.0.7'
32
- __version_tuple__ = version_tuple = (0, 0, 7)
31
+ __version__ = version = '0.0.9'
32
+ __version_tuple__ = version_tuple = (0, 0, 9)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -0,0 +1,162 @@
1
+ """Management command for backfilling historical blocks."""
2
+
3
+ from django.core.management.base import BaseCommand
4
+
5
+ from abstract_block_dumper._internal.dal.memory_registry import task_registry
6
+ from abstract_block_dumper._internal.discovery import ensure_modules_loaded
7
+ from abstract_block_dumper._internal.services.backfill_scheduler import (
8
+ ARCHIVE_BLOCK_THRESHOLD,
9
+ BackfillScheduler,
10
+ backfill_scheduler_factory,
11
+ )
12
+
13
+
14
+ class Command(BaseCommand):
15
+ help = "Backfill historical blocks with rate limiting."
16
+
17
+ def add_arguments(self, parser) -> None:
18
+ parser.add_argument(
19
+ "--from-block",
20
+ type=int,
21
+ required=True,
22
+ help="Starting block number (inclusive)",
23
+ )
24
+ parser.add_argument(
25
+ "--to-block",
26
+ type=int,
27
+ required=True,
28
+ help="Ending block number (inclusive)",
29
+ )
30
+ parser.add_argument(
31
+ "--rate-limit",
32
+ type=float,
33
+ default=1.0,
34
+ help="Seconds to sleep between processing each block (default: 1.0)",
35
+ )
36
+ parser.add_argument(
37
+ "--network",
38
+ type=str,
39
+ default="finney",
40
+ help="Bittensor network name (default: finney)",
41
+ )
42
+ parser.add_argument(
43
+ "--dry-run",
44
+ action="store_true",
45
+ help="Preview blocks to backfill without executing tasks",
46
+ )
47
+
48
+ def handle(self, *args, **options) -> None:
49
+ from_block = options["from_block"]
50
+ to_block = options["to_block"]
51
+ rate_limit = options["rate_limit"]
52
+ network = options["network"]
53
+ dry_run = options["dry_run"]
54
+
55
+ # Validate arguments
56
+ if from_block > to_block:
57
+ self.stderr.write(self.style.ERROR(f"--from-block ({from_block}) must be <= --to-block ({to_block})"))
58
+ return
59
+
60
+ if rate_limit < 0:
61
+ self.stderr.write(self.style.ERROR("--rate-limit must be >= 0"))
62
+ return
63
+
64
+ # Load registered functions
65
+ self.stdout.write("Syncing decorated functions...")
66
+ ensure_modules_loaded()
67
+ functions_counter = len(task_registry.get_functions())
68
+ self.stdout.write(self.style.SUCCESS(f"Synced {functions_counter} functions"))
69
+
70
+ if functions_counter == 0:
71
+ self.stderr.write(self.style.WARNING("No functions registered. Nothing to backfill."))
72
+ return
73
+
74
+ # Create scheduler
75
+ scheduler = backfill_scheduler_factory(
76
+ from_block=from_block,
77
+ to_block=to_block,
78
+ network=network,
79
+ rate_limit=rate_limit,
80
+ dry_run=dry_run,
81
+ )
82
+
83
+ total_blocks = to_block - from_block + 1
84
+
85
+ if dry_run:
86
+ self._handle_dry_run(scheduler, from_block, to_block, total_blocks, rate_limit)
87
+ else:
88
+ self._handle_backfill(scheduler, from_block, to_block, total_blocks, rate_limit)
89
+
90
+ def _handle_dry_run(
91
+ self, scheduler: BackfillScheduler, from_block: int, to_block: int, total_blocks: int, rate_limit: float
92
+ ) -> None:
93
+ """Handle dry-run mode output."""
94
+ self.stdout.write("")
95
+ self.stdout.write(self.style.WARNING("Dry-run mode: previewing blocks to backfill (no tasks will be executed)"))
96
+ self.stdout.write("")
97
+
98
+ # Get network type
99
+ scheduler._current_head_cache = scheduler.subtensor.get_current_block()
100
+ network_type = scheduler._get_network_type_for_block(from_block)
101
+
102
+ self.stdout.write(f"Block range: {from_block} -> {to_block} ({total_blocks} blocks)")
103
+ operator = ">" if network_type == "archive" else "<="
104
+ self.stdout.write(f"Network: {network_type} (blocks {operator}{ARCHIVE_BLOCK_THRESHOLD} behind head)")
105
+ self.stdout.write(f"Current head: {scheduler._current_head_cache}")
106
+ self.stdout.write("")
107
+
108
+ # Show registry items
109
+ self.stdout.write("Registry items:")
110
+ for registry_item in scheduler.block_processor.registry.get_functions():
111
+ self.stdout.write(f" - {registry_item.executable_path}")
112
+ self.stdout.write("")
113
+
114
+ # Run dry-run
115
+ self.stdout.write("Analyzing blocks...")
116
+ stats = scheduler.start()
117
+
118
+ if stats is None:
119
+ self.stderr.write(self.style.ERROR("Dry-run failed"))
120
+ return
121
+
122
+ # Output summary
123
+ self.stdout.write("")
124
+ self.stdout.write(self.style.SUCCESS("Summary:"))
125
+ self.stdout.write(f" Total blocks in range: {stats.total_blocks}")
126
+ self.stdout.write(f" Already processed (all tasks done): {stats.already_processed}")
127
+ self.stdout.write(f" Blocks needing tasks: {stats.blocks_needing_tasks}")
128
+ self.stdout.write(f" Estimated tasks to submit: {stats.estimated_tasks}")
129
+
130
+ if rate_limit > 0 and stats.blocks_needing_tasks > 0:
131
+ estimated_seconds = stats.blocks_needing_tasks * rate_limit
132
+ if estimated_seconds < 60:
133
+ time_str = f"~{estimated_seconds:.0f} seconds"
134
+ elif estimated_seconds < 3600:
135
+ time_str = f"~{estimated_seconds / 60:.1f} minutes"
136
+ else:
137
+ time_str = f"~{estimated_seconds / 3600:.1f} hours"
138
+ self.stdout.write(f" Estimated time at {rate_limit}s rate limit: {time_str}")
139
+
140
+ def _handle_backfill(self, scheduler, from_block: int, to_block: int, total_blocks: int, rate_limit: float) -> None:
141
+ """Handle actual backfill execution."""
142
+ self.stdout.write("")
143
+ self.stdout.write(f"Starting backfill: {from_block} -> {to_block} ({total_blocks} blocks)")
144
+ self.stdout.write(f"Rate limit: {rate_limit} seconds between blocks")
145
+
146
+ if rate_limit > 0:
147
+ estimated_seconds = total_blocks * rate_limit
148
+ if estimated_seconds < 60:
149
+ time_str = f"~{estimated_seconds:.0f} seconds"
150
+ elif estimated_seconds < 3600:
151
+ time_str = f"~{estimated_seconds / 60:.1f} minutes"
152
+ else:
153
+ time_str = f"~{estimated_seconds / 3600:.1f} hours"
154
+ self.stdout.write(f"Estimated max time: {time_str}")
155
+
156
+ self.stdout.write("")
157
+ self.stdout.write("Press Ctrl+C to stop gracefully...")
158
+ self.stdout.write("")
159
+
160
+ scheduler.start()
161
+
162
+ self.stdout.write(self.style.SUCCESS("Backfill completed"))
@@ -1,3 +1,5 @@
1
+ import inspect
2
+ import time
1
3
  from collections.abc import Callable
2
4
  from typing import Any, cast
3
5
 
@@ -9,6 +11,11 @@ import abstract_block_dumper._internal.dal.django_dal as abd_dal
9
11
  import abstract_block_dumper._internal.services.utils as abd_utils
10
12
  from abstract_block_dumper._internal.dal.memory_registry import RegistryItem, task_registry
11
13
  from abstract_block_dumper._internal.exceptions import CeleryTaskLockedError
14
+ from abstract_block_dumper._internal.services.metrics import (
15
+ observe_task_execution_time,
16
+ record_task_execution,
17
+ record_task_retry,
18
+ )
12
19
  from abstract_block_dumper.models import TaskAttempt
13
20
 
14
21
  logger = structlog.get_logger(__name__)
@@ -61,18 +68,28 @@ def schedule_retry(task_attempt: TaskAttempt) -> None:
61
68
  eta=task_attempt.next_retry_at,
62
69
  )
63
70
 
71
+ # Record retry metric
72
+ task_name = task_attempt.executable_path.split(".")[-1]
73
+ record_task_retry(task_name)
74
+
64
75
 
65
76
  def _celery_task_wrapper(
66
77
  func: Callable[..., Any], block_number: int, **kwargs: dict[str, Any]
67
78
  ) -> dict[str, Any] | None:
68
79
  executable_path = abd_utils.get_executable_path(func)
69
80
 
81
+ # Extract runtime hints that shouldn't be stored in DB
82
+ use_archive_network = kwargs.pop("_use_archive_network", False)
83
+
84
+ # Create db_kwargs without runtime hints for DB lookup
85
+ db_kwargs = {k: v for k, v in kwargs.items() if not k.startswith("_")}
86
+
70
87
  with transaction.atomic():
71
88
  try:
72
89
  task_attempt = TaskAttempt.objects.select_for_update(nowait=True).get(
73
90
  block_number=block_number,
74
91
  executable_path=executable_path,
75
- args_json=abd_utils.serialize_args(kwargs),
92
+ args_json=abd_utils.serialize_args(db_kwargs),
76
93
  )
77
94
  except TaskAttempt.DoesNotExist as exc:
78
95
  msg = "TaskAttempt not found - task may have been canceled directly"
@@ -95,32 +112,55 @@ def _celery_task_wrapper(
95
112
  abd_dal.task_mark_as_started(task_attempt, abd_utils.get_current_celery_task_id())
96
113
 
97
114
  # Start task execution
115
+ # Pass _use_archive_network only if the function accepts **kwargs
116
+ # Otherwise, strip it to avoid TypeError
117
+ execution_kwargs = {"block_number": block_number, **kwargs}
118
+
119
+ # Check if function accepts **kwargs before adding _use_archive_network
120
+ sig = inspect.signature(func)
121
+ has_var_keyword = any(p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values())
122
+ if has_var_keyword:
123
+ execution_kwargs["_use_archive_network"] = use_archive_network
124
+
125
+ task_name = executable_path.split(".")[-1] # Get short task name
126
+ start_time = time.perf_counter()
127
+
98
128
  try:
99
- execution_kwargs = {"block_number": block_number, **kwargs}
100
129
  logger.info(
101
130
  "Starting task execution",
102
131
  task_id=task_attempt.id,
103
132
  block_number=block_number,
104
133
  executable_path=executable_path,
105
134
  celery_task_id=task_attempt.celery_task_id,
106
- execution_kwargs=execution_kwargs,
135
+ use_archive_network=use_archive_network,
107
136
  )
108
137
 
109
138
  result = func(**execution_kwargs)
139
+ execution_duration = time.perf_counter() - start_time
110
140
 
111
141
  abd_dal.task_mark_as_success(task_attempt, result)
112
142
 
113
- logger.info("Task completed successfully", task_id=task_attempt.id)
143
+ # Record success metrics
144
+ record_task_execution(task_name, "success")
145
+ observe_task_execution_time(task_name, execution_duration)
146
+
147
+ logger.info("Task completed successfully", task_id=task_attempt.id, duration=execution_duration)
114
148
  return {"result": result}
115
149
  except Exception as e:
150
+ execution_duration = time.perf_counter() - start_time
116
151
  logger.exception(
117
152
  "Task execution failed",
118
153
  task_id=task_attempt.id,
119
154
  error_type=type(e).__name__,
120
155
  error_message=str(e),
156
+ duration=execution_duration,
121
157
  )
122
158
  abd_dal.task_mark_as_failed(task_attempt)
123
159
 
160
+ # Record failure metrics
161
+ record_task_execution(task_name, "failed")
162
+ observe_task_execution_time(task_name, execution_duration)
163
+
124
164
  # Schedule retry after transaction commits:
125
165
  if abd_dal.task_can_retry(task_attempt):
126
166
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstract-block-dumper
3
- Version: 0.0.7
3
+ Version: 0.0.9
4
4
  Project-URL: Source, https://github.com/bactensor/abstract-block-dumper
5
5
  Project-URL: Issue Tracker, https://github.com/bactensor/abstract-block-dumper/issues
6
6
  Author-email: Reef Technologies <opensource@reef.pl>
@@ -18,6 +18,8 @@ Requires-Dist: bittensor>=9.10.1
18
18
  Requires-Dist: celery>=5.3
19
19
  Requires-Dist: django<6.0,>=3.2
20
20
  Requires-Dist: structlog>=25.4.0
21
+ Provides-Extra: prometheus
22
+ Requires-Dist: prometheus-client>=0.17.0; extra == 'prometheus'
21
23
  Description-Content-Type: text/markdown
22
24
 
23
25
  # Abstract Block Dumper
@@ -1,5 +1,5 @@
1
1
  abstract_block_dumper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- abstract_block_dumper/_version.py,sha256=AV58KqMkBGaCvmPdbd3g9huyNXfIVxjw8QbCMdaeivU,704
2
+ abstract_block_dumper/_version.py,sha256=X2FLFwBoUmgJPsOV-1l-SxIXNSTX1TQ036Kf2j9Mc68,704
3
3
  abstract_block_dumper/admin.py,sha256=3J3I_QOKFgfMNpTXW-rTQGO_q5Ls6uNuL0FkPVdIsYg,1654
4
4
  abstract_block_dumper/apps.py,sha256=DXATdrjsL3T2IletTbKeD6unr8ScLaxg7wz0nAHTAns,215
5
5
  abstract_block_dumper/models.py,sha256=MO9824dmHB6xF3PrFE_RERh7whVjQtS4tt6QA0wSbg0,2022
@@ -8,22 +8,25 @@ abstract_block_dumper/_internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
8
8
  abstract_block_dumper/_internal/discovery.py,sha256=sISOL8vq6rC0pOndrCfWKDZjyYwzzZIChG-BH9mteq0,745
9
9
  abstract_block_dumper/_internal/exceptions.py,sha256=jVXQ8b3gneno2XYvO0XisJPMlkAWb6H5u10egIpPJ4k,335
10
10
  abstract_block_dumper/_internal/dal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- abstract_block_dumper/_internal/dal/django_dal.py,sha256=pBGEFeo_U0ac2Za-dwzJvf04Ng8lP51aR60c_DUrGIw,5426
12
- abstract_block_dumper/_internal/dal/memory_registry.py,sha256=yMNF7jrvWGF-S1pqyR2zOCNLWwrdsImcvV6cGqu1wYE,2972
11
+ abstract_block_dumper/_internal/dal/django_dal.py,sha256=i9jocanfptjXw5lfE2xBYvx5mo1g98IoMjlS-WjGP88,5623
12
+ abstract_block_dumper/_internal/dal/memory_registry.py,sha256=m9Yms-cuemi9_5q_Kn_zsJnxDPEiuAUkESIAltD60QY,2943
13
13
  abstract_block_dumper/_internal/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- abstract_block_dumper/_internal/services/block_processor.py,sha256=wB-zeft3Ys8zmqCdF_v12rXd6umNWvGfy2Ts6XSGkL8,8132
15
- abstract_block_dumper/_internal/services/executor.py,sha256=ZZmQ9TzoNEoAE4amiU8lHRsTfP7YusUkWXasrArfo2g,1806
16
- abstract_block_dumper/_internal/services/scheduler.py,sha256=lhkyJ6wXGVtFAijs2Edz4ZVXAT9RP6GAY_Dh_Yg-wd4,6113
14
+ abstract_block_dumper/_internal/services/backfill_scheduler.py,sha256=XgsVYXaz6pR4PBA9giesjhR74x1qLX2281-eQgM5qhs,16311
15
+ abstract_block_dumper/_internal/services/block_processor.py,sha256=NC7p1oD38FpaZb6EbykBolP32uY069abumOvXrjOBV0,6644
16
+ abstract_block_dumper/_internal/services/executor.py,sha256=WhpHhOAi4cI-qdEE8-DSt9xZwooOpSc9_uDMQBBoHUM,2317
17
+ abstract_block_dumper/_internal/services/metrics.py,sha256=Gg-PQYZ98caaS52wm1EqhtPURXlfrVjk2t3-8nccqfo,7821
18
+ abstract_block_dumper/_internal/services/scheduler.py,sha256=B62ZoOxSv8i5mverBWv67BzoyBd14f0wWnNQ329pJds,7770
17
19
  abstract_block_dumper/_internal/services/utils.py,sha256=QSs2hBHWOPgNgKPf_ZmADXuqEiqK5mWZp7JblvQgxZQ,1140
18
20
  abstract_block_dumper/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
21
  abstract_block_dumper/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ abstract_block_dumper/management/commands/backfill_blocks_v1.py,sha256=EmNUozAZn8uThjCvusZe7poNrw9RYy-MafMg2wu3XeQ,6392
20
23
  abstract_block_dumper/management/commands/block_tasks_v1.py,sha256=jSi04ahIKYwlm_dNKCUGL_cmALv1iP-ZjfXrmz0pn-4,880
21
24
  abstract_block_dumper/migrations/0001_initial.py,sha256=ImPHC3G6kPkq4Xn_4YVAm4Labh1Xi7PkCRszYRGpTiI,2298
22
25
  abstract_block_dumper/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
26
  abstract_block_dumper/v1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
27
  abstract_block_dumper/v1/celery.py,sha256=X4IqVs5i6ZpyY7fy1SqMZgsZy4SXP-jK2qG-FYnjU38,1722
25
- abstract_block_dumper/v1/decorators.py,sha256=SBl8XP9qhKyTdsKaRREW870BZGidEe0C_nmxnwh76lo,8156
28
+ abstract_block_dumper/v1/decorators.py,sha256=4FXsBScT_5Wadl8FadRcZZtyLytZUzSTX4V5DI0IuRs,9820
26
29
  abstract_block_dumper/v1/tasks.py,sha256=u9iMYdDUqzYT3yPrNwZecHnlweZ3yFipV9BcIWHCbus,2647
27
- abstract_block_dumper-0.0.7.dist-info/METADATA,sha256=bg7lku8X3hdZI9DBoi_IfVHmP_pAuCKNWHjWenZyI2Q,12902
28
- abstract_block_dumper-0.0.7.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
29
- abstract_block_dumper-0.0.7.dist-info/RECORD,,
30
+ abstract_block_dumper-0.0.9.dist-info/METADATA,sha256=xWskTf2HCMNuFGA5M2KSyyejjBLKgQ6OBv2Q5VuL5_I,12993
31
+ abstract_block_dumper-0.0.9.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
32
+ abstract_block_dumper-0.0.9.dist-info/RECORD,,