abstract-block-dumper 0.0.7__tar.gz → 0.0.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/CHANGELOG.md +5 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/PKG-INFO +3 -1
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/pyproject.toml +5 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/_internal/dal/django_dal.py +13 -7
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/_internal/dal/memory_registry.py +4 -5
- abstract_block_dumper-0.0.8/src/abstract_block_dumper/_internal/services/backfill_scheduler.py +438 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/_internal/services/block_processor.py +20 -57
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/_internal/services/executor.py +13 -3
- abstract_block_dumper-0.0.8/src/abstract_block_dumper/_internal/services/metrics.py +128 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/_internal/services/scheduler.py +32 -5
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/_version.py +2 -2
- abstract_block_dumper-0.0.8/src/abstract_block_dumper/management/commands/backfill_blocks_v1.py +162 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/v1/decorators.py +17 -2
- abstract_block_dumper-0.0.8/tests/integration/test_backfill_scheduler.py +232 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/tests/integration/test_registered_celery_tasks.py +19 -12
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/tests/integration/test_scheduler.py +1 -1
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/uv.lock +16 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/.cruft.json +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/.github/dependabot.yml +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/.github/workflows/ci.yml +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/.github/workflows/publish.yml +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/.gitignore +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/.pre-commit-config.yaml +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/.shellcheckrc +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/README.md +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/SECURITY.md +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/docs/3rd_party/cookiecutter-rt-pkg/CHANGELOG.md +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/.dockerignore +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/.gitignore +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/Dockerfile +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/README.md +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/block_explorer/__init__.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/block_explorer/admin.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/block_explorer/apps.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/block_explorer/management/__init__.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/block_explorer/management/commands/__init__.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/block_explorer/management/commands/create_admin.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/block_explorer/migrations/__init__.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/block_explorer/models.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/block_explorer/tasks.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/block_explorer/tests.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/block_explorer/views.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/docker-compose.yml +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/example_project/__init__.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/example_project/asgi.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/example_project/celery.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/example_project/settings.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/example_project/urls.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/example_project/wsgi.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/main.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/manage.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/pyproject.toml +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/pytest.ini +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/example_project/uv.lock +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/noxfile.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/__init__.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/_internal/__init__.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/_internal/dal/__init__.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/_internal/discovery.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/_internal/exceptions.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/_internal/services/__init__.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/_internal/services/utils.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/admin.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/apps.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/management/__init__.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/management/commands/__init__.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/management/commands/block_tasks_v1.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/migrations/0001_initial.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/migrations/__init__.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/models.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/py.typed +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/v1/__init__.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/v1/celery.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/src/abstract_block_dumper/v1/tasks.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/tests/__init__.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/tests/conftest.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/tests/django_fixtures.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/tests/fatories.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/tests/integration/__init__.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/tests/integration/test_block_processor.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/tests/integration/test_concurrent_processing.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/tests/integration/test_multi_arguments_tasks.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/tests/integration/test_task_registration.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/tests/settings.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/tests/unit/test_celery_integration.py +0 -0
- {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.8}/tests/unit/test_decorator.py +0 -0
|
@@ -9,6 +9,11 @@ upcoming release can be found in [changelog.d](changelog.d).
|
|
|
9
9
|
|
|
10
10
|
<!-- towncrier release notes start -->
|
|
11
11
|
|
|
12
|
+
## [0.0.8](https://github.com/bactensor/abstract-block-dumper/releases/tag/v0.0.8) - 2025-12-04
|
|
13
|
+
|
|
14
|
+
No significant changes.
|
|
15
|
+
|
|
16
|
+
|
|
12
17
|
## [0.0.7](https://github.com/bactensor/abstract-block-dumper/releases/tag/v0.0.7) - 2025-12-01
|
|
13
18
|
|
|
14
19
|
No significant changes.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: abstract-block-dumper
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.8
|
|
4
4
|
Project-URL: Source, https://github.com/bactensor/abstract-block-dumper
|
|
5
5
|
Project-URL: Issue Tracker, https://github.com/bactensor/abstract-block-dumper/issues
|
|
6
6
|
Author-email: Reef Technologies <opensource@reef.pl>
|
|
@@ -18,6 +18,8 @@ Requires-Dist: bittensor>=9.10.1
|
|
|
18
18
|
Requires-Dist: celery>=5.3
|
|
19
19
|
Requires-Dist: django<6.0,>=3.2
|
|
20
20
|
Requires-Dist: structlog>=25.4.0
|
|
21
|
+
Provides-Extra: prometheus
|
|
22
|
+
Requires-Dist: prometheus-client>=0.17.0; extra == 'prometheus'
|
|
21
23
|
Description-Content-Type: text/markdown
|
|
22
24
|
|
|
23
25
|
# Abstract Block Dumper
|
|
@@ -27,6 +27,9 @@ dependencies = [
|
|
|
27
27
|
"structlog>=25.4.0",
|
|
28
28
|
]
|
|
29
29
|
|
|
30
|
+
[project.optional-dependencies]
|
|
31
|
+
prometheus = ["prometheus-client>=0.17.0"]
|
|
32
|
+
|
|
30
33
|
[project.urls]
|
|
31
34
|
"Source" = "https://github.com/bactensor/abstract-block-dumper"
|
|
32
35
|
"Issue Tracker" = "https://github.com/bactensor/abstract-block-dumper/issues"
|
|
@@ -160,6 +163,8 @@ module = [
|
|
|
160
163
|
"cloudpickle",
|
|
161
164
|
"cloudpickle.*",
|
|
162
165
|
"nox",
|
|
166
|
+
"prometheus_client",
|
|
167
|
+
"prometheus_client.*",
|
|
163
168
|
"pytest",
|
|
164
169
|
"tests.*",
|
|
165
170
|
]
|
|
@@ -21,13 +21,19 @@ def get_ready_to_retry_attempts() -> QuerySet[abd_models.TaskAttempt]:
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
def executed_block_numbers(executable_path: str, args_json: str, from_block: int, to_block: int) -> set[int]:
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
24
|
+
# Use iterator() to avoid Django's QuerySet caching which causes memory leaks
|
|
25
|
+
# during long-running backfill operations
|
|
26
|
+
block_numbers = (
|
|
27
|
+
abd_models.TaskAttempt.objects.filter(
|
|
28
|
+
executable_path=executable_path,
|
|
29
|
+
args_json=args_json,
|
|
30
|
+
block_number__gte=from_block,
|
|
31
|
+
block_number__lt=to_block,
|
|
32
|
+
status=abd_models.TaskAttempt.Status.SUCCESS,
|
|
33
|
+
)
|
|
34
|
+
.values_list("block_number", flat=True)
|
|
35
|
+
.iterator()
|
|
36
|
+
)
|
|
31
37
|
return set(block_numbers)
|
|
32
38
|
|
|
33
39
|
|
|
@@ -23,15 +23,14 @@ class RegistryItem:
|
|
|
23
23
|
"""Check if condition matches for given block and arguments."""
|
|
24
24
|
try:
|
|
25
25
|
return self.condition(block_number, **kwargs)
|
|
26
|
-
except Exception as
|
|
27
|
-
logger.
|
|
26
|
+
except Exception as exc:
|
|
27
|
+
logger.exception(
|
|
28
28
|
"Condition evaluation failed",
|
|
29
29
|
condition=self.function.__name__,
|
|
30
30
|
block_number=block_number,
|
|
31
|
-
exc_info=True,
|
|
32
31
|
)
|
|
33
|
-
msg =
|
|
34
|
-
raise ConditionEvaluationError(msg) from
|
|
32
|
+
msg = "Failed to evaluate condition"
|
|
33
|
+
raise ConditionEvaluationError(msg) from exc
|
|
35
34
|
|
|
36
35
|
def get_execution_args(self) -> list[dict[str, Any]]:
|
|
37
36
|
"""Get list of argument sets for execution."""
|
abstract_block_dumper-0.0.8/src/abstract_block_dumper/_internal/services/backfill_scheduler.py
ADDED
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Backfill scheduler for historical block processing.
|
|
3
|
+
|
|
4
|
+
This module provides a dedicated scheduler for backfilling historical blocks
|
|
5
|
+
with rate limiting and automatic archive network switching.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import time
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from typing import TYPE_CHECKING
|
|
13
|
+
|
|
14
|
+
import structlog
|
|
15
|
+
|
|
16
|
+
import abstract_block_dumper._internal.dal.django_dal as abd_dal
|
|
17
|
+
import abstract_block_dumper._internal.services.utils as abd_utils
|
|
18
|
+
from abstract_block_dumper._internal.services.block_processor import BlockProcessor, block_processor_factory
|
|
19
|
+
from abstract_block_dumper._internal.services.metrics import (
|
|
20
|
+
BlockProcessingTimer,
|
|
21
|
+
increment_blocks_processed,
|
|
22
|
+
set_backfill_progress,
|
|
23
|
+
set_current_block,
|
|
24
|
+
)
|
|
25
|
+
from abstract_block_dumper._internal.services.utils import serialize_args
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
import bittensor as bt
|
|
29
|
+
|
|
30
|
+
from abstract_block_dumper._internal.dal.memory_registry import RegistryItem
|
|
31
|
+
|
|
32
|
+
logger = structlog.get_logger(__name__)
|
|
33
|
+
|
|
34
|
+
# Blocks older than this threshold from current head require archive network
|
|
35
|
+
ARCHIVE_BLOCK_THRESHOLD = 300
|
|
36
|
+
|
|
37
|
+
# Progress logging interval
|
|
38
|
+
PROGRESS_LOG_INTERVAL = 100
|
|
39
|
+
ARCHIVE_NETWORK = "archive"
|
|
40
|
+
|
|
41
|
+
# Memory cleanup interval (every N blocks)
|
|
42
|
+
MEMORY_CLEANUP_INTERVAL = 1000
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class DryRunStats:
|
|
47
|
+
"""Statistics for dry-run mode."""
|
|
48
|
+
|
|
49
|
+
total_blocks: int = 0
|
|
50
|
+
already_processed: int = 0
|
|
51
|
+
blocks_needing_tasks: int = 0
|
|
52
|
+
estimated_tasks: int = 0
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class BackfillScheduler:
|
|
56
|
+
"""Scheduler for backfilling historical blocks with rate limiting."""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
block_processor: BlockProcessor,
|
|
61
|
+
network: str,
|
|
62
|
+
from_block: int,
|
|
63
|
+
to_block: int,
|
|
64
|
+
rate_limit: float = 1.0,
|
|
65
|
+
dry_run: bool = False,
|
|
66
|
+
) -> None:
|
|
67
|
+
"""
|
|
68
|
+
Initialize the backfill scheduler.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
block_processor: The block processor to use for task execution.
|
|
72
|
+
network: The bittensor network name (e.g., 'finney').
|
|
73
|
+
from_block: Starting block number (inclusive).
|
|
74
|
+
to_block: Ending block number (inclusive).
|
|
75
|
+
rate_limit: Seconds to sleep between processing each block.
|
|
76
|
+
dry_run: If True, preview what would be processed without executing.
|
|
77
|
+
|
|
78
|
+
"""
|
|
79
|
+
self.block_processor = block_processor
|
|
80
|
+
self.network = network
|
|
81
|
+
self.from_block = from_block
|
|
82
|
+
self.to_block = to_block
|
|
83
|
+
self.rate_limit = rate_limit
|
|
84
|
+
self.dry_run = dry_run
|
|
85
|
+
self.is_running = False
|
|
86
|
+
self._subtensor: bt.Subtensor | None = None
|
|
87
|
+
self._archive_subtensor: bt.Subtensor | None = None
|
|
88
|
+
self._current_head_cache: int | None = None
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def subtensor(self) -> bt.Subtensor:
|
|
92
|
+
"""Get the regular subtensor connection, creating it if needed."""
|
|
93
|
+
if self._subtensor is None:
|
|
94
|
+
self._subtensor = abd_utils.get_bittensor_client(self.network)
|
|
95
|
+
return self._subtensor
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def archive_subtensor(self) -> bt.Subtensor:
|
|
99
|
+
"""Get the archive subtensor connection, creating it if needed."""
|
|
100
|
+
if self._archive_subtensor is None:
|
|
101
|
+
self._archive_subtensor = abd_utils.get_bittensor_client("archive")
|
|
102
|
+
return self._archive_subtensor
|
|
103
|
+
|
|
104
|
+
def get_subtensor_for_block(self, block_number: int) -> bt.Subtensor:
|
|
105
|
+
"""
|
|
106
|
+
Get the appropriate subtensor for the given block number.
|
|
107
|
+
|
|
108
|
+
Uses archive network for blocks older than ARCHIVE_BLOCK_THRESHOLD
|
|
109
|
+
from the current head.
|
|
110
|
+
"""
|
|
111
|
+
if self._current_head_cache is None:
|
|
112
|
+
self._current_head_cache = self.subtensor.get_current_block()
|
|
113
|
+
|
|
114
|
+
blocks_behind = self._current_head_cache - block_number
|
|
115
|
+
|
|
116
|
+
if blocks_behind > ARCHIVE_BLOCK_THRESHOLD:
|
|
117
|
+
logger.debug(
|
|
118
|
+
"Using archive network for old block",
|
|
119
|
+
block_number=block_number,
|
|
120
|
+
blocks_behind=blocks_behind,
|
|
121
|
+
)
|
|
122
|
+
return self.archive_subtensor
|
|
123
|
+
return self.subtensor
|
|
124
|
+
|
|
125
|
+
def _get_network_type_for_block(self, block_number: int) -> str:
|
|
126
|
+
"""Get the network type string for a block (for display purposes)."""
|
|
127
|
+
if self._current_head_cache is None:
|
|
128
|
+
self._current_head_cache = self.subtensor.get_current_block()
|
|
129
|
+
|
|
130
|
+
blocks_behind = self._current_head_cache - block_number
|
|
131
|
+
return ARCHIVE_NETWORK if blocks_behind > ARCHIVE_BLOCK_THRESHOLD else self.network
|
|
132
|
+
|
|
133
|
+
def start(self) -> DryRunStats | None:
|
|
134
|
+
"""
|
|
135
|
+
Start processing blocks from from_block to to_block.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
DryRunStats if dry_run is True, None otherwise.
|
|
139
|
+
|
|
140
|
+
"""
|
|
141
|
+
self.is_running = True
|
|
142
|
+
|
|
143
|
+
# Refresh current head for accurate archive network decisions
|
|
144
|
+
self._current_head_cache = self.subtensor.get_current_block()
|
|
145
|
+
|
|
146
|
+
total_blocks = self.to_block - self.from_block + 1
|
|
147
|
+
network_type = self._get_network_type_for_block(self.from_block)
|
|
148
|
+
|
|
149
|
+
logger.info(
|
|
150
|
+
"BackfillScheduler starting",
|
|
151
|
+
from_block=self.from_block,
|
|
152
|
+
to_block=self.to_block,
|
|
153
|
+
total_blocks=total_blocks,
|
|
154
|
+
rate_limit=self.rate_limit,
|
|
155
|
+
dry_run=self.dry_run,
|
|
156
|
+
network_type=network_type,
|
|
157
|
+
current_head=self._current_head_cache,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
if self.dry_run:
|
|
161
|
+
return self._run_dry_run()
|
|
162
|
+
|
|
163
|
+
self._run_backfill()
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
def _run_dry_run(self) -> DryRunStats:
|
|
167
|
+
"""
|
|
168
|
+
Run in dry-run mode to preview what would be processed.
|
|
169
|
+
|
|
170
|
+
Optimized to fetch all executed blocks in one query per registry item,
|
|
171
|
+
instead of querying for each block individually.
|
|
172
|
+
"""
|
|
173
|
+
stats = DryRunStats(total_blocks=self.to_block - self.from_block + 1)
|
|
174
|
+
|
|
175
|
+
registry_items = self.block_processor.registry.get_functions()
|
|
176
|
+
|
|
177
|
+
# Pre-fetch all executed blocks for each registry item + args combination
|
|
178
|
+
# This reduces N queries (one per block) to M queries (one per registry item + args)
|
|
179
|
+
executed_blocks_cache: dict[tuple[str, str], set[int]] = {}
|
|
180
|
+
|
|
181
|
+
logger.info(
|
|
182
|
+
"Dry run: pre-fetching executed blocks",
|
|
183
|
+
from_block=self.from_block,
|
|
184
|
+
to_block=self.to_block,
|
|
185
|
+
registry_items_count=len(registry_items),
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
for registry_item in registry_items:
|
|
189
|
+
for args in registry_item.get_execution_args():
|
|
190
|
+
args_json = serialize_args(args)
|
|
191
|
+
cache_key = (registry_item.executable_path, args_json)
|
|
192
|
+
|
|
193
|
+
# Fetch all executed blocks in the range with a single query
|
|
194
|
+
executed_blocks_cache[cache_key] = abd_dal.executed_block_numbers(
|
|
195
|
+
registry_item.executable_path,
|
|
196
|
+
args_json,
|
|
197
|
+
self.from_block,
|
|
198
|
+
self.to_block + 1,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
logger.info(
|
|
202
|
+
"Dry run: analyzing blocks",
|
|
203
|
+
cache_entries=len(executed_blocks_cache),
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# Track which blocks have at least one task
|
|
207
|
+
blocks_with_tasks: set[int] = set()
|
|
208
|
+
|
|
209
|
+
for registry_item in registry_items:
|
|
210
|
+
for args in registry_item.get_execution_args():
|
|
211
|
+
args_json = serialize_args(args)
|
|
212
|
+
cache_key = (registry_item.executable_path, args_json)
|
|
213
|
+
executed_blocks = executed_blocks_cache[cache_key]
|
|
214
|
+
|
|
215
|
+
for block_number in range(self.from_block, self.to_block + 1):
|
|
216
|
+
if not self.is_running:
|
|
217
|
+
break
|
|
218
|
+
|
|
219
|
+
if block_number in executed_blocks:
|
|
220
|
+
continue
|
|
221
|
+
|
|
222
|
+
# Check if condition matches
|
|
223
|
+
try:
|
|
224
|
+
if registry_item.match_condition(block_number, **args):
|
|
225
|
+
stats.estimated_tasks += 1
|
|
226
|
+
blocks_with_tasks.add(block_number)
|
|
227
|
+
except Exception as exc:
|
|
228
|
+
logger.debug(
|
|
229
|
+
"Error evaluating match condition during dry run",
|
|
230
|
+
function_name=registry_item.function.__name__,
|
|
231
|
+
block_number=block_number,
|
|
232
|
+
args=args,
|
|
233
|
+
error=str(exc),
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
stats.blocks_needing_tasks = len(blocks_with_tasks)
|
|
237
|
+
stats.already_processed = stats.total_blocks - stats.blocks_needing_tasks
|
|
238
|
+
|
|
239
|
+
return stats
|
|
240
|
+
|
|
241
|
+
def _run_backfill(self) -> None:
|
|
242
|
+
"""Run the actual backfill process."""
|
|
243
|
+
processed_count = 0
|
|
244
|
+
total_blocks = self.to_block - self.from_block + 1
|
|
245
|
+
|
|
246
|
+
# Set initial metrics
|
|
247
|
+
set_backfill_progress(self.from_block, self.to_block, self.from_block)
|
|
248
|
+
|
|
249
|
+
# Pre-fetch all executed blocks to avoid per-block DB queries
|
|
250
|
+
logger.info(
|
|
251
|
+
"Pre-fetching executed blocks",
|
|
252
|
+
from_block=self.from_block,
|
|
253
|
+
to_block=self.to_block,
|
|
254
|
+
)
|
|
255
|
+
executed_blocks_cache = self._prefetch_executed_blocks()
|
|
256
|
+
logger.info(
|
|
257
|
+
"Pre-fetch complete",
|
|
258
|
+
cache_entries=len(executed_blocks_cache),
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
try:
|
|
262
|
+
for block_number in range(self.from_block, self.to_block + 1):
|
|
263
|
+
if not self.is_running:
|
|
264
|
+
logger.info("BackfillScheduler stopping early", processed_count=processed_count)
|
|
265
|
+
break
|
|
266
|
+
|
|
267
|
+
try:
|
|
268
|
+
with BlockProcessingTimer(mode="backfill"):
|
|
269
|
+
self._process_block(block_number, executed_blocks_cache)
|
|
270
|
+
|
|
271
|
+
processed_count += 1
|
|
272
|
+
|
|
273
|
+
# Update metrics
|
|
274
|
+
set_current_block("backfill", block_number)
|
|
275
|
+
set_backfill_progress(self.from_block, self.to_block, block_number)
|
|
276
|
+
increment_blocks_processed("backfill")
|
|
277
|
+
|
|
278
|
+
# Log progress periodically
|
|
279
|
+
if processed_count % PROGRESS_LOG_INTERVAL == 0:
|
|
280
|
+
progress_pct = (processed_count / total_blocks) * 100
|
|
281
|
+
logger.info(
|
|
282
|
+
"Backfill progress",
|
|
283
|
+
processed=processed_count,
|
|
284
|
+
total=total_blocks,
|
|
285
|
+
progress_percent=f"{progress_pct:.1f}%",
|
|
286
|
+
current_block=block_number,
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# Rate limiting between block submissions
|
|
290
|
+
if block_number < self.to_block and self.rate_limit > 0:
|
|
291
|
+
time.sleep(self.rate_limit)
|
|
292
|
+
|
|
293
|
+
except KeyboardInterrupt:
|
|
294
|
+
raise
|
|
295
|
+
except Exception:
|
|
296
|
+
logger.exception(
|
|
297
|
+
"Error processing block during backfill",
|
|
298
|
+
block_number=block_number,
|
|
299
|
+
)
|
|
300
|
+
# Continue with next block
|
|
301
|
+
time.sleep(self.rate_limit)
|
|
302
|
+
|
|
303
|
+
except KeyboardInterrupt:
|
|
304
|
+
logger.info(
|
|
305
|
+
"BackfillScheduler interrupted",
|
|
306
|
+
processed_count=processed_count,
|
|
307
|
+
last_block=self.from_block + processed_count - 1 if processed_count > 0 else self.from_block,
|
|
308
|
+
)
|
|
309
|
+
self.stop()
|
|
310
|
+
|
|
311
|
+
logger.info(
|
|
312
|
+
"BackfillScheduler completed",
|
|
313
|
+
processed_count=processed_count,
|
|
314
|
+
total_blocks=total_blocks,
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
def _prefetch_executed_blocks(self) -> dict[tuple[str, str], set[int]]:
|
|
318
|
+
"""Pre-fetch all executed blocks for all registry items in the range."""
|
|
319
|
+
cache: dict[tuple[str, str], set[int]] = {}
|
|
320
|
+
|
|
321
|
+
for registry_item in self.block_processor.registry.get_functions():
|
|
322
|
+
for args in registry_item.get_execution_args():
|
|
323
|
+
args_json = serialize_args(args)
|
|
324
|
+
cache_key = (registry_item.executable_path, args_json)
|
|
325
|
+
|
|
326
|
+
cache[cache_key] = abd_dal.executed_block_numbers(
|
|
327
|
+
registry_item.executable_path,
|
|
328
|
+
args_json,
|
|
329
|
+
self.from_block,
|
|
330
|
+
self.to_block + 1,
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
return cache
|
|
334
|
+
|
|
335
|
+
def _process_block(
|
|
336
|
+
self,
|
|
337
|
+
block_number: int,
|
|
338
|
+
executed_blocks_cache: dict[tuple[str, str], set[int]],
|
|
339
|
+
) -> None:
|
|
340
|
+
"""Process a single block during backfill."""
|
|
341
|
+
for registry_item in self.block_processor.registry.get_functions():
|
|
342
|
+
try:
|
|
343
|
+
self._process_registry_item_for_backfill(
|
|
344
|
+
registry_item,
|
|
345
|
+
block_number,
|
|
346
|
+
executed_blocks_cache,
|
|
347
|
+
)
|
|
348
|
+
except Exception:
|
|
349
|
+
logger.exception(
|
|
350
|
+
"Error processing registry item during backfill",
|
|
351
|
+
function_name=registry_item.function.__name__,
|
|
352
|
+
block_number=block_number,
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
def _requires_archive_network(self, block_number: int) -> bool:
|
|
356
|
+
"""Check if a block requires archive network based on age."""
|
|
357
|
+
if self._current_head_cache is None:
|
|
358
|
+
return False
|
|
359
|
+
blocks_behind = self._current_head_cache - block_number
|
|
360
|
+
return blocks_behind > ARCHIVE_BLOCK_THRESHOLD
|
|
361
|
+
|
|
362
|
+
def _process_registry_item_for_backfill(
|
|
363
|
+
self,
|
|
364
|
+
registry_item: RegistryItem,
|
|
365
|
+
block_number: int,
|
|
366
|
+
executed_blocks_cache: dict[tuple[str, str], set[int]],
|
|
367
|
+
) -> None:
|
|
368
|
+
"""Process a registry item for backfill - only submits if not already executed."""
|
|
369
|
+
for args in registry_item.get_execution_args():
|
|
370
|
+
args_json = serialize_args(args)
|
|
371
|
+
cache_key = (registry_item.executable_path, args_json)
|
|
372
|
+
|
|
373
|
+
# Check if already executed using pre-fetched cache
|
|
374
|
+
executed_blocks = executed_blocks_cache.get(cache_key, set())
|
|
375
|
+
|
|
376
|
+
if block_number in executed_blocks:
|
|
377
|
+
continue
|
|
378
|
+
|
|
379
|
+
# Check condition and execute
|
|
380
|
+
try:
|
|
381
|
+
if registry_item.match_condition(block_number, **args):
|
|
382
|
+
use_archive = self._requires_archive_network(block_number)
|
|
383
|
+
logger.debug(
|
|
384
|
+
"Backfilling block",
|
|
385
|
+
function_name=registry_item.function.__name__,
|
|
386
|
+
block_number=block_number,
|
|
387
|
+
args=args,
|
|
388
|
+
use_archive=use_archive,
|
|
389
|
+
)
|
|
390
|
+
self.block_processor.executor.execute(
|
|
391
|
+
registry_item,
|
|
392
|
+
block_number,
|
|
393
|
+
args,
|
|
394
|
+
use_archive=use_archive,
|
|
395
|
+
)
|
|
396
|
+
except Exception:
|
|
397
|
+
logger.exception(
|
|
398
|
+
"Error during backfill task execution",
|
|
399
|
+
function_name=registry_item.function.__name__,
|
|
400
|
+
block_number=block_number,
|
|
401
|
+
args=args,
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
def stop(self) -> None:
|
|
405
|
+
"""Stop the backfill scheduler."""
|
|
406
|
+
self.is_running = False
|
|
407
|
+
logger.info("BackfillScheduler stopped")
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def backfill_scheduler_factory(
|
|
411
|
+
from_block: int,
|
|
412
|
+
to_block: int,
|
|
413
|
+
network: str = "finney",
|
|
414
|
+
rate_limit: float = 1.0,
|
|
415
|
+
dry_run: bool = False,
|
|
416
|
+
) -> BackfillScheduler:
|
|
417
|
+
"""
|
|
418
|
+
Factory for BackfillScheduler.
|
|
419
|
+
|
|
420
|
+
Args:
|
|
421
|
+
from_block: Starting block number (inclusive).
|
|
422
|
+
to_block: Ending block number (inclusive).
|
|
423
|
+
network: Bittensor network name. Defaults to "finney".
|
|
424
|
+
rate_limit: Seconds to sleep between blocks. Defaults to 1.0.
|
|
425
|
+
dry_run: If True, preview without executing. Defaults to False.
|
|
426
|
+
|
|
427
|
+
Returns:
|
|
428
|
+
Configured BackfillScheduler instance.
|
|
429
|
+
|
|
430
|
+
"""
|
|
431
|
+
return BackfillScheduler(
|
|
432
|
+
block_processor=block_processor_factory(),
|
|
433
|
+
network=network,
|
|
434
|
+
from_block=from_block,
|
|
435
|
+
to_block=to_block,
|
|
436
|
+
rate_limit=rate_limit,
|
|
437
|
+
dry_run=dry_run,
|
|
438
|
+
)
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import time
|
|
2
|
+
|
|
1
3
|
import structlog
|
|
2
4
|
from django.db import transaction
|
|
3
5
|
|
|
@@ -5,7 +7,6 @@ import abstract_block_dumper._internal.dal.django_dal as abd_dal
|
|
|
5
7
|
from abstract_block_dumper._internal.dal.memory_registry import BaseRegistry, RegistryItem, task_registry
|
|
6
8
|
from abstract_block_dumper._internal.exceptions import ConditionEvaluationError
|
|
7
9
|
from abstract_block_dumper._internal.services.executor import CeleryExecutor
|
|
8
|
-
from abstract_block_dumper._internal.services.utils import serialize_args
|
|
9
10
|
from abstract_block_dumper.models import TaskAttempt
|
|
10
11
|
|
|
11
12
|
logger = structlog.get_logger(__name__)
|
|
@@ -18,16 +19,15 @@ class BlockProcessor:
|
|
|
18
19
|
self._cleanup_phantom_tasks()
|
|
19
20
|
|
|
20
21
|
def process_block(self, block_number: int) -> None:
|
|
22
|
+
"""Process a single block - executes registered tasks for this block only."""
|
|
21
23
|
for registry_item in self.registry.get_functions():
|
|
22
24
|
try:
|
|
23
|
-
self.process_backfill(registry_item, block_number)
|
|
24
25
|
self.process_registry_item(registry_item, block_number)
|
|
25
26
|
except Exception:
|
|
26
|
-
logger.
|
|
27
|
+
logger.exception(
|
|
27
28
|
"Error processing registry item",
|
|
28
29
|
function_name=registry_item.function.__name__,
|
|
29
30
|
block_number=block_number,
|
|
30
|
-
exc_info=True,
|
|
31
31
|
)
|
|
32
32
|
|
|
33
33
|
def process_registry_item(self, registry_item: RegistryItem, block_number: int) -> None:
|
|
@@ -43,64 +43,28 @@ class BlockProcessor:
|
|
|
43
43
|
)
|
|
44
44
|
# Continue with other tasks
|
|
45
45
|
except Exception:
|
|
46
|
-
logger.
|
|
47
|
-
|
|
48
|
-
def process_backfill(self, registry_item: RegistryItem, current_block: int) -> None:
|
|
49
|
-
if not registry_item.backfilling_lookback:
|
|
50
|
-
return None
|
|
51
|
-
|
|
52
|
-
start_block = max(0, current_block - registry_item.backfilling_lookback)
|
|
53
|
-
|
|
54
|
-
logger.info(
|
|
55
|
-
"Processing backfill",
|
|
56
|
-
function_name=registry_item.function.__name__,
|
|
57
|
-
start_block=start_block,
|
|
58
|
-
current_block=current_block,
|
|
59
|
-
lookback=registry_item.backfilling_lookback,
|
|
60
|
-
)
|
|
61
|
-
|
|
62
|
-
execution_args_list = registry_item.get_execution_args()
|
|
63
|
-
|
|
64
|
-
for args in execution_args_list:
|
|
65
|
-
args_json = serialize_args(args)
|
|
66
|
-
|
|
67
|
-
executed_blocks = abd_dal.executed_block_numbers(
|
|
68
|
-
registry_item.executable_path,
|
|
69
|
-
args_json,
|
|
70
|
-
start_block,
|
|
71
|
-
current_block,
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
for block_number in range(start_block, current_block):
|
|
75
|
-
if block_number in executed_blocks:
|
|
76
|
-
continue
|
|
46
|
+
logger.exception("Unexpected error processing task")
|
|
77
47
|
|
|
78
|
-
|
|
79
|
-
if registry_item.match_condition(block_number, **args):
|
|
80
|
-
logger.debug(
|
|
81
|
-
"Backfilling block",
|
|
82
|
-
function_name=registry_item.function.__name__,
|
|
83
|
-
block_number=block_number,
|
|
84
|
-
args=args,
|
|
85
|
-
)
|
|
86
|
-
self.executor.execute(registry_item, block_number, args)
|
|
87
|
-
except Exception:
|
|
88
|
-
logger.error(
|
|
89
|
-
"Error during backfill",
|
|
90
|
-
function_name=registry_item.function.__name__,
|
|
91
|
-
block_number=block_number,
|
|
92
|
-
args=args,
|
|
93
|
-
exc_info=True,
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
def recover_failed_retries(self) -> None:
|
|
48
|
+
def recover_failed_retries(self, poll_interval: int, batch_size: int | None = None) -> None:
|
|
97
49
|
"""
|
|
98
50
|
Recover failed tasks that are ready to be retried.
|
|
99
51
|
|
|
100
52
|
This handles tasks that may have been lost due to scheduler restarts.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
poll_interval: Seconds to sleep between processing each retry.
|
|
56
|
+
batch_size: Maximum number of retries to process. If None, process all.
|
|
57
|
+
|
|
101
58
|
"""
|
|
102
59
|
retry_count = 0
|
|
103
|
-
|
|
60
|
+
retry_attempts = abd_dal.get_ready_to_retry_attempts()
|
|
61
|
+
|
|
62
|
+
# Apply batch size limit if specified
|
|
63
|
+
if batch_size is not None:
|
|
64
|
+
retry_attempts = retry_attempts[:batch_size]
|
|
65
|
+
|
|
66
|
+
for retry_attempt in retry_attempts:
|
|
67
|
+
time.sleep(poll_interval)
|
|
104
68
|
try:
|
|
105
69
|
# Find the registry item to get celery_kwargs
|
|
106
70
|
registry_item = self.registry.get_by_executable_path(retry_attempt.executable_path)
|
|
@@ -148,10 +112,9 @@ class BlockProcessor:
|
|
|
148
112
|
attempt_count=task_attempt.attempt_count,
|
|
149
113
|
)
|
|
150
114
|
except Exception:
|
|
151
|
-
logger.
|
|
115
|
+
logger.exception(
|
|
152
116
|
"Failed to recover retry",
|
|
153
117
|
task_id=retry_attempt.id,
|
|
154
|
-
exc_info=True,
|
|
155
118
|
)
|
|
156
119
|
# Reload task to see current state after potential execution failure
|
|
157
120
|
try:
|