abstract-block-dumper 0.0.7__tar.gz → 0.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/CHANGELOG.md +10 -0
  2. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/PKG-INFO +3 -1
  3. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/pyproject.toml +5 -0
  4. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/_internal/dal/django_dal.py +13 -7
  5. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/_internal/dal/memory_registry.py +4 -5
  6. abstract_block_dumper-0.0.9/src/abstract_block_dumper/_internal/services/backfill_scheduler.py +446 -0
  7. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/_internal/services/block_processor.py +20 -57
  8. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/_internal/services/executor.py +18 -3
  9. abstract_block_dumper-0.0.9/src/abstract_block_dumper/_internal/services/metrics.py +237 -0
  10. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/_internal/services/scheduler.py +40 -6
  11. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/_version.py +2 -2
  12. abstract_block_dumper-0.0.9/src/abstract_block_dumper/management/commands/backfill_blocks_v1.py +162 -0
  13. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/v1/decorators.py +44 -4
  14. abstract_block_dumper-0.0.9/tests/integration/test_backfill_scheduler.py +232 -0
  15. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/tests/integration/test_registered_celery_tasks.py +19 -12
  16. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/tests/integration/test_scheduler.py +1 -1
  17. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/uv.lock +16 -0
  18. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/.cruft.json +0 -0
  19. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/.github/dependabot.yml +0 -0
  20. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/.github/workflows/ci.yml +0 -0
  21. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/.github/workflows/publish.yml +0 -0
  22. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/.gitignore +0 -0
  23. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/.pre-commit-config.yaml +0 -0
  24. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/.shellcheckrc +0 -0
  25. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/README.md +0 -0
  26. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/SECURITY.md +0 -0
  27. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/docs/3rd_party/cookiecutter-rt-pkg/CHANGELOG.md +0 -0
  28. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/.dockerignore +0 -0
  29. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/.gitignore +0 -0
  30. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/Dockerfile +0 -0
  31. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/README.md +0 -0
  32. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/block_explorer/__init__.py +0 -0
  33. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/block_explorer/admin.py +0 -0
  34. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/block_explorer/apps.py +0 -0
  35. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/block_explorer/management/__init__.py +0 -0
  36. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/block_explorer/management/commands/__init__.py +0 -0
  37. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/block_explorer/management/commands/create_admin.py +0 -0
  38. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/block_explorer/migrations/__init__.py +0 -0
  39. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/block_explorer/models.py +0 -0
  40. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/block_explorer/tasks.py +0 -0
  41. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/block_explorer/tests.py +0 -0
  42. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/block_explorer/views.py +0 -0
  43. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/docker-compose.yml +0 -0
  44. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/example_project/__init__.py +0 -0
  45. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/example_project/asgi.py +0 -0
  46. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/example_project/celery.py +0 -0
  47. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/example_project/settings.py +0 -0
  48. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/example_project/urls.py +0 -0
  49. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/example_project/wsgi.py +0 -0
  50. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/main.py +0 -0
  51. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/manage.py +0 -0
  52. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/pyproject.toml +0 -0
  53. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/pytest.ini +0 -0
  54. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/example_project/uv.lock +0 -0
  55. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/noxfile.py +0 -0
  56. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/__init__.py +0 -0
  57. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/_internal/__init__.py +0 -0
  58. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/_internal/dal/__init__.py +0 -0
  59. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/_internal/discovery.py +0 -0
  60. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/_internal/exceptions.py +0 -0
  61. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/_internal/services/__init__.py +0 -0
  62. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/_internal/services/utils.py +0 -0
  63. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/admin.py +0 -0
  64. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/apps.py +0 -0
  65. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/management/__init__.py +0 -0
  66. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/management/commands/__init__.py +0 -0
  67. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/management/commands/block_tasks_v1.py +0 -0
  68. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/migrations/0001_initial.py +0 -0
  69. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/migrations/__init__.py +0 -0
  70. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/models.py +0 -0
  71. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/py.typed +0 -0
  72. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/v1/__init__.py +0 -0
  73. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/v1/celery.py +0 -0
  74. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/src/abstract_block_dumper/v1/tasks.py +0 -0
  75. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/tests/__init__.py +0 -0
  76. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/tests/conftest.py +0 -0
  77. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/tests/django_fixtures.py +0 -0
  78. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/tests/fatories.py +0 -0
  79. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/tests/integration/__init__.py +0 -0
  80. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/tests/integration/test_block_processor.py +0 -0
  81. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/tests/integration/test_concurrent_processing.py +0 -0
  82. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/tests/integration/test_multi_arguments_tasks.py +0 -0
  83. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/tests/integration/test_task_registration.py +0 -0
  84. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/tests/settings.py +0 -0
  85. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/tests/unit/test_celery_integration.py +0 -0
  86. {abstract_block_dumper-0.0.7 → abstract_block_dumper-0.0.9}/tests/unit/test_decorator.py +0 -0
@@ -9,6 +9,16 @@ upcoming release can be found in [changelog.d](changelog.d).
9
9
 
10
10
  <!-- towncrier release notes start -->
11
11
 
12
+ ## [0.0.9](https://github.com/bactensor/abstract-block-dumper/releases/tag/v0.0.9) - 2025-12-04
13
+
14
+ No significant changes.
15
+
16
+
17
+ ## [0.0.8](https://github.com/bactensor/abstract-block-dumper/releases/tag/v0.0.8) - 2025-12-04
18
+
19
+ No significant changes.
20
+
21
+
12
22
  ## [0.0.7](https://github.com/bactensor/abstract-block-dumper/releases/tag/v0.0.7) - 2025-12-01
13
23
 
14
24
  No significant changes.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstract-block-dumper
3
- Version: 0.0.7
3
+ Version: 0.0.9
4
4
  Project-URL: Source, https://github.com/bactensor/abstract-block-dumper
5
5
  Project-URL: Issue Tracker, https://github.com/bactensor/abstract-block-dumper/issues
6
6
  Author-email: Reef Technologies <opensource@reef.pl>
@@ -18,6 +18,8 @@ Requires-Dist: bittensor>=9.10.1
18
18
  Requires-Dist: celery>=5.3
19
19
  Requires-Dist: django<6.0,>=3.2
20
20
  Requires-Dist: structlog>=25.4.0
21
+ Provides-Extra: prometheus
22
+ Requires-Dist: prometheus-client>=0.17.0; extra == 'prometheus'
21
23
  Description-Content-Type: text/markdown
22
24
 
23
25
  # Abstract Block Dumper
@@ -27,6 +27,9 @@ dependencies = [
27
27
  "structlog>=25.4.0",
28
28
  ]
29
29
 
30
+ [project.optional-dependencies]
31
+ prometheus = ["prometheus-client>=0.17.0"]
32
+
30
33
  [project.urls]
31
34
  "Source" = "https://github.com/bactensor/abstract-block-dumper"
32
35
  "Issue Tracker" = "https://github.com/bactensor/abstract-block-dumper/issues"
@@ -160,6 +163,8 @@ module = [
160
163
  "cloudpickle",
161
164
  "cloudpickle.*",
162
165
  "nox",
166
+ "prometheus_client",
167
+ "prometheus_client.*",
163
168
  "pytest",
164
169
  "tests.*",
165
170
  ]
@@ -21,13 +21,19 @@ def get_ready_to_retry_attempts() -> QuerySet[abd_models.TaskAttempt]:
21
21
 
22
22
 
23
23
  def executed_block_numbers(executable_path: str, args_json: str, from_block: int, to_block: int) -> set[int]:
24
- block_numbers = abd_models.TaskAttempt.objects.filter(
25
- executable_path=executable_path,
26
- args_json=args_json,
27
- block_number__gte=from_block,
28
- block_number__lt=to_block,
29
- status=abd_models.TaskAttempt.Status.SUCCESS,
30
- ).values_list("block_number", flat=True)
24
+ # Use iterator() to avoid Django's QuerySet caching which causes memory leaks
25
+ # during long-running backfill operations
26
+ block_numbers = (
27
+ abd_models.TaskAttempt.objects.filter(
28
+ executable_path=executable_path,
29
+ args_json=args_json,
30
+ block_number__gte=from_block,
31
+ block_number__lt=to_block,
32
+ status=abd_models.TaskAttempt.Status.SUCCESS,
33
+ )
34
+ .values_list("block_number", flat=True)
35
+ .iterator()
36
+ )
31
37
  return set(block_numbers)
32
38
 
33
39
 
@@ -23,15 +23,14 @@ class RegistryItem:
23
23
  """Check if condition matches for given block and arguments."""
24
24
  try:
25
25
  return self.condition(block_number, **kwargs)
26
- except Exception as e:
27
- logger.error(
26
+ except Exception as exc:
27
+ logger.exception(
28
28
  "Condition evaluation failed",
29
29
  condition=self.function.__name__,
30
30
  block_number=block_number,
31
- exc_info=True,
32
31
  )
33
- msg = f"Failed to evaluate condition: {e}"
34
- raise ConditionEvaluationError(msg) from e
32
+ msg = "Failed to evaluate condition"
33
+ raise ConditionEvaluationError(msg) from exc
35
34
 
36
35
  def get_execution_args(self) -> list[dict[str, Any]]:
37
36
  """Get list of argument sets for execution."""
@@ -0,0 +1,446 @@
1
+ """
2
+ Backfill scheduler for historical block processing.
3
+
4
+ This module provides a dedicated scheduler for backfilling historical blocks
5
+ with rate limiting and automatic archive network switching.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import time
11
+ from dataclasses import dataclass
12
+ from typing import TYPE_CHECKING
13
+
14
+ import structlog
15
+
16
+ import abstract_block_dumper._internal.dal.django_dal as abd_dal
17
+ import abstract_block_dumper._internal.services.utils as abd_utils
18
+ from abstract_block_dumper._internal.services.block_processor import BlockProcessor, block_processor_factory
19
+ from abstract_block_dumper._internal.services.metrics import (
20
+ BlockProcessingTimer,
21
+ increment_archive_network_usage,
22
+ increment_blocks_processed,
23
+ set_backfill_progress,
24
+ set_block_lag,
25
+ set_current_block,
26
+ )
27
+ from abstract_block_dumper._internal.services.utils import serialize_args
28
+
29
+ if TYPE_CHECKING:
30
+ import bittensor as bt
31
+
32
+ from abstract_block_dumper._internal.dal.memory_registry import RegistryItem
33
+
34
+ logger = structlog.get_logger(__name__)
35
+
36
+ # Blocks older than this threshold from current head require archive network
37
+ ARCHIVE_BLOCK_THRESHOLD = 300
38
+
39
+ # Progress logging interval
40
+ PROGRESS_LOG_INTERVAL = 100
41
+ ARCHIVE_NETWORK = "archive"
42
+
43
+ # Memory cleanup interval (every N blocks)
44
+ MEMORY_CLEANUP_INTERVAL = 1000
45
+
46
+
47
+ @dataclass
48
+ class DryRunStats:
49
+ """Statistics for dry-run mode."""
50
+
51
+ total_blocks: int = 0
52
+ already_processed: int = 0
53
+ blocks_needing_tasks: int = 0
54
+ estimated_tasks: int = 0
55
+
56
+
57
+ class BackfillScheduler:
58
+ """Scheduler for backfilling historical blocks with rate limiting."""
59
+
60
+ def __init__(
61
+ self,
62
+ block_processor: BlockProcessor,
63
+ network: str,
64
+ from_block: int,
65
+ to_block: int,
66
+ rate_limit: float = 1.0,
67
+ dry_run: bool = False,
68
+ ) -> None:
69
+ """
70
+ Initialize the backfill scheduler.
71
+
72
+ Args:
73
+ block_processor: The block processor to use for task execution.
74
+ network: The bittensor network name (e.g., 'finney').
75
+ from_block: Starting block number (inclusive).
76
+ to_block: Ending block number (inclusive).
77
+ rate_limit: Seconds to sleep between processing each block.
78
+ dry_run: If True, preview what would be processed without executing.
79
+
80
+ """
81
+ self.block_processor = block_processor
82
+ self.network = network
83
+ self.from_block = from_block
84
+ self.to_block = to_block
85
+ self.rate_limit = rate_limit
86
+ self.dry_run = dry_run
87
+ self.is_running = False
88
+ self._subtensor: bt.Subtensor | None = None
89
+ self._archive_subtensor: bt.Subtensor | None = None
90
+ self._current_head_cache: int | None = None
91
+
92
+ @property
93
+ def subtensor(self) -> bt.Subtensor:
94
+ """Get the regular subtensor connection, creating it if needed."""
95
+ if self._subtensor is None:
96
+ self._subtensor = abd_utils.get_bittensor_client(self.network)
97
+ return self._subtensor
98
+
99
+ @property
100
+ def archive_subtensor(self) -> bt.Subtensor:
101
+ """Get the archive subtensor connection, creating it if needed."""
102
+ if self._archive_subtensor is None:
103
+ self._archive_subtensor = abd_utils.get_bittensor_client("archive")
104
+ return self._archive_subtensor
105
+
106
+ def get_subtensor_for_block(self, block_number: int) -> bt.Subtensor:
107
+ """
108
+ Get the appropriate subtensor for the given block number.
109
+
110
+ Uses archive network for blocks older than ARCHIVE_BLOCK_THRESHOLD
111
+ from the current head.
112
+ """
113
+ if self._current_head_cache is None:
114
+ self._current_head_cache = self.subtensor.get_current_block()
115
+
116
+ blocks_behind = self._current_head_cache - block_number
117
+
118
+ if blocks_behind > ARCHIVE_BLOCK_THRESHOLD:
119
+ logger.debug(
120
+ "Using archive network for old block",
121
+ block_number=block_number,
122
+ blocks_behind=blocks_behind,
123
+ )
124
+ return self.archive_subtensor
125
+ return self.subtensor
126
+
127
+ def _get_network_type_for_block(self, block_number: int) -> str:
128
+ """Get the network type string for a block (for display purposes)."""
129
+ if self._current_head_cache is None:
130
+ self._current_head_cache = self.subtensor.get_current_block()
131
+
132
+ blocks_behind = self._current_head_cache - block_number
133
+ return ARCHIVE_NETWORK if blocks_behind > ARCHIVE_BLOCK_THRESHOLD else self.network
134
+
135
+ def start(self) -> DryRunStats | None:
136
+ """
137
+ Start processing blocks from from_block to to_block.
138
+
139
+ Returns:
140
+ DryRunStats if dry_run is True, None otherwise.
141
+
142
+ """
143
+ self.is_running = True
144
+
145
+ # Refresh current head for accurate archive network decisions
146
+ self._current_head_cache = self.subtensor.get_current_block()
147
+
148
+ total_blocks = self.to_block - self.from_block + 1
149
+ network_type = self._get_network_type_for_block(self.from_block)
150
+
151
+ logger.info(
152
+ "BackfillScheduler starting",
153
+ from_block=self.from_block,
154
+ to_block=self.to_block,
155
+ total_blocks=total_blocks,
156
+ rate_limit=self.rate_limit,
157
+ dry_run=self.dry_run,
158
+ network_type=network_type,
159
+ current_head=self._current_head_cache,
160
+ )
161
+
162
+ if self.dry_run:
163
+ return self._run_dry_run()
164
+
165
+ self._run_backfill()
166
+ return None
167
+
168
+ def _run_dry_run(self) -> DryRunStats:
169
+ """
170
+ Run in dry-run mode to preview what would be processed.
171
+
172
+ Optimized to fetch all executed blocks in one query per registry item,
173
+ instead of querying for each block individually.
174
+ """
175
+ stats = DryRunStats(total_blocks=self.to_block - self.from_block + 1)
176
+
177
+ registry_items = self.block_processor.registry.get_functions()
178
+
179
+ # Pre-fetch all executed blocks for each registry item + args combination
180
+ # This reduces N queries (one per block) to M queries (one per registry item + args)
181
+ executed_blocks_cache: dict[tuple[str, str], set[int]] = {}
182
+
183
+ logger.info(
184
+ "Dry run: pre-fetching executed blocks",
185
+ from_block=self.from_block,
186
+ to_block=self.to_block,
187
+ registry_items_count=len(registry_items),
188
+ )
189
+
190
+ for registry_item in registry_items:
191
+ for args in registry_item.get_execution_args():
192
+ args_json = serialize_args(args)
193
+ cache_key = (registry_item.executable_path, args_json)
194
+
195
+ # Fetch all executed blocks in the range with a single query
196
+ executed_blocks_cache[cache_key] = abd_dal.executed_block_numbers(
197
+ registry_item.executable_path,
198
+ args_json,
199
+ self.from_block,
200
+ self.to_block + 1,
201
+ )
202
+
203
+ logger.info(
204
+ "Dry run: analyzing blocks",
205
+ cache_entries=len(executed_blocks_cache),
206
+ )
207
+
208
+ # Track which blocks have at least one task
209
+ blocks_with_tasks: set[int] = set()
210
+
211
+ for registry_item in registry_items:
212
+ for args in registry_item.get_execution_args():
213
+ args_json = serialize_args(args)
214
+ cache_key = (registry_item.executable_path, args_json)
215
+ executed_blocks = executed_blocks_cache[cache_key]
216
+
217
+ for block_number in range(self.from_block, self.to_block + 1):
218
+ if not self.is_running:
219
+ break
220
+
221
+ if block_number in executed_blocks:
222
+ continue
223
+
224
+ # Check if condition matches
225
+ try:
226
+ if registry_item.match_condition(block_number, **args):
227
+ stats.estimated_tasks += 1
228
+ blocks_with_tasks.add(block_number)
229
+ except Exception as exc:
230
+ logger.debug(
231
+ "Error evaluating match condition during dry run",
232
+ function_name=registry_item.function.__name__,
233
+ block_number=block_number,
234
+ args=args,
235
+ error=str(exc),
236
+ )
237
+
238
+ stats.blocks_needing_tasks = len(blocks_with_tasks)
239
+ stats.already_processed = stats.total_blocks - stats.blocks_needing_tasks
240
+
241
+ return stats
242
+
243
+ def _run_backfill(self) -> None:
244
+ """Run the actual backfill process."""
245
+ processed_count = 0
246
+ total_blocks = self.to_block - self.from_block + 1
247
+
248
+ # Set initial metrics
249
+ set_backfill_progress(self.from_block, self.to_block, self.from_block)
250
+
251
+ # Pre-fetch all executed blocks to avoid per-block DB queries
252
+ logger.info(
253
+ "Pre-fetching executed blocks",
254
+ from_block=self.from_block,
255
+ to_block=self.to_block,
256
+ )
257
+ executed_blocks_cache = self._prefetch_executed_blocks()
258
+ logger.info(
259
+ "Pre-fetch complete",
260
+ cache_entries=len(executed_blocks_cache),
261
+ )
262
+
263
+ try:
264
+ for block_number in range(self.from_block, self.to_block + 1):
265
+ if not self.is_running:
266
+ logger.info("BackfillScheduler stopping early", processed_count=processed_count)
267
+ break
268
+
269
+ try:
270
+ with BlockProcessingTimer(mode="backfill"):
271
+ self._process_block(block_number, executed_blocks_cache)
272
+
273
+ processed_count += 1
274
+
275
+ # Update metrics
276
+ set_current_block("backfill", block_number)
277
+ set_backfill_progress(self.from_block, self.to_block, block_number)
278
+ increment_blocks_processed("backfill")
279
+
280
+ # Track block lag (distance from chain head)
281
+ if self._current_head_cache:
282
+ set_block_lag("backfill", self._current_head_cache - block_number)
283
+
284
+ # Log progress periodically
285
+ if processed_count % PROGRESS_LOG_INTERVAL == 0:
286
+ progress_pct = (processed_count / total_blocks) * 100
287
+ logger.info(
288
+ "Backfill progress",
289
+ processed=processed_count,
290
+ total=total_blocks,
291
+ progress_percent=f"{progress_pct:.1f}%",
292
+ current_block=block_number,
293
+ )
294
+
295
+ # Rate limiting between block submissions
296
+ if block_number < self.to_block and self.rate_limit > 0:
297
+ time.sleep(self.rate_limit)
298
+
299
+ except KeyboardInterrupt:
300
+ raise
301
+ except Exception:
302
+ logger.exception(
303
+ "Error processing block during backfill",
304
+ block_number=block_number,
305
+ )
306
+ # Continue with next block
307
+ time.sleep(self.rate_limit)
308
+
309
+ except KeyboardInterrupt:
310
+ logger.info(
311
+ "BackfillScheduler interrupted",
312
+ processed_count=processed_count,
313
+ last_block=self.from_block + processed_count - 1 if processed_count > 0 else self.from_block,
314
+ )
315
+ self.stop()
316
+
317
+ logger.info(
318
+ "BackfillScheduler completed",
319
+ processed_count=processed_count,
320
+ total_blocks=total_blocks,
321
+ )
322
+
323
+ def _prefetch_executed_blocks(self) -> dict[tuple[str, str], set[int]]:
324
+ """Pre-fetch all executed blocks for all registry items in the range."""
325
+ cache: dict[tuple[str, str], set[int]] = {}
326
+
327
+ for registry_item in self.block_processor.registry.get_functions():
328
+ for args in registry_item.get_execution_args():
329
+ args_json = serialize_args(args)
330
+ cache_key = (registry_item.executable_path, args_json)
331
+
332
+ cache[cache_key] = abd_dal.executed_block_numbers(
333
+ registry_item.executable_path,
334
+ args_json,
335
+ self.from_block,
336
+ self.to_block + 1,
337
+ )
338
+
339
+ return cache
340
+
341
+ def _process_block(
342
+ self,
343
+ block_number: int,
344
+ executed_blocks_cache: dict[tuple[str, str], set[int]],
345
+ ) -> None:
346
+ """Process a single block during backfill."""
347
+ for registry_item in self.block_processor.registry.get_functions():
348
+ try:
349
+ self._process_registry_item_for_backfill(
350
+ registry_item,
351
+ block_number,
352
+ executed_blocks_cache,
353
+ )
354
+ except Exception:
355
+ logger.exception(
356
+ "Error processing registry item during backfill",
357
+ function_name=registry_item.function.__name__,
358
+ block_number=block_number,
359
+ )
360
+
361
+ def _requires_archive_network(self, block_number: int) -> bool:
362
+ """Check if a block requires archive network based on age."""
363
+ if self._current_head_cache is None:
364
+ return False
365
+ blocks_behind = self._current_head_cache - block_number
366
+ return blocks_behind > ARCHIVE_BLOCK_THRESHOLD
367
+
368
+ def _process_registry_item_for_backfill(
369
+ self,
370
+ registry_item: RegistryItem,
371
+ block_number: int,
372
+ executed_blocks_cache: dict[tuple[str, str], set[int]],
373
+ ) -> None:
374
+ """Process a registry item for backfill - only submits if not already executed."""
375
+ for args in registry_item.get_execution_args():
376
+ args_json = serialize_args(args)
377
+ cache_key = (registry_item.executable_path, args_json)
378
+
379
+ # Check if already executed using pre-fetched cache
380
+ executed_blocks = executed_blocks_cache.get(cache_key, set())
381
+
382
+ if block_number in executed_blocks:
383
+ continue
384
+
385
+ # Check condition and execute
386
+ try:
387
+ if registry_item.match_condition(block_number, **args):
388
+ use_archive = self._requires_archive_network(block_number)
389
+ if use_archive:
390
+ increment_archive_network_usage()
391
+ logger.debug(
392
+ "Backfilling block",
393
+ function_name=registry_item.function.__name__,
394
+ block_number=block_number,
395
+ args=args,
396
+ use_archive=use_archive,
397
+ )
398
+ self.block_processor.executor.execute(
399
+ registry_item,
400
+ block_number,
401
+ args,
402
+ use_archive=use_archive,
403
+ )
404
+ except Exception:
405
+ logger.exception(
406
+ "Error during backfill task execution",
407
+ function_name=registry_item.function.__name__,
408
+ block_number=block_number,
409
+ args=args,
410
+ )
411
+
412
+ def stop(self) -> None:
413
+ """Stop the backfill scheduler."""
414
+ self.is_running = False
415
+ logger.info("BackfillScheduler stopped")
416
+
417
+
418
+ def backfill_scheduler_factory(
419
+ from_block: int,
420
+ to_block: int,
421
+ network: str = "finney",
422
+ rate_limit: float = 1.0,
423
+ dry_run: bool = False,
424
+ ) -> BackfillScheduler:
425
+ """
426
+ Factory for BackfillScheduler.
427
+
428
+ Args:
429
+ from_block: Starting block number (inclusive).
430
+ to_block: Ending block number (inclusive).
431
+ network: Bittensor network name. Defaults to "finney".
432
+ rate_limit: Seconds to sleep between blocks. Defaults to 1.0.
433
+ dry_run: If True, preview without executing. Defaults to False.
434
+
435
+ Returns:
436
+ Configured BackfillScheduler instance.
437
+
438
+ """
439
+ return BackfillScheduler(
440
+ block_processor=block_processor_factory(),
441
+ network=network,
442
+ from_block=from_block,
443
+ to_block=to_block,
444
+ rate_limit=rate_limit,
445
+ dry_run=dry_run,
446
+ )
@@ -1,3 +1,5 @@
1
+ import time
2
+
1
3
  import structlog
2
4
  from django.db import transaction
3
5
 
@@ -5,7 +7,6 @@ import abstract_block_dumper._internal.dal.django_dal as abd_dal
5
7
  from abstract_block_dumper._internal.dal.memory_registry import BaseRegistry, RegistryItem, task_registry
6
8
  from abstract_block_dumper._internal.exceptions import ConditionEvaluationError
7
9
  from abstract_block_dumper._internal.services.executor import CeleryExecutor
8
- from abstract_block_dumper._internal.services.utils import serialize_args
9
10
  from abstract_block_dumper.models import TaskAttempt
10
11
 
11
12
  logger = structlog.get_logger(__name__)
@@ -18,16 +19,15 @@ class BlockProcessor:
18
19
  self._cleanup_phantom_tasks()
19
20
 
20
21
  def process_block(self, block_number: int) -> None:
22
+ """Process a single block - executes registered tasks for this block only."""
21
23
  for registry_item in self.registry.get_functions():
22
24
  try:
23
- self.process_backfill(registry_item, block_number)
24
25
  self.process_registry_item(registry_item, block_number)
25
26
  except Exception:
26
- logger.error(
27
+ logger.exception(
27
28
  "Error processing registry item",
28
29
  function_name=registry_item.function.__name__,
29
30
  block_number=block_number,
30
- exc_info=True,
31
31
  )
32
32
 
33
33
  def process_registry_item(self, registry_item: RegistryItem, block_number: int) -> None:
@@ -43,64 +43,28 @@ class BlockProcessor:
43
43
  )
44
44
  # Continue with other tasks
45
45
  except Exception:
46
- logger.error("Unexpected error processing task", exc_info=True)
47
-
48
- def process_backfill(self, registry_item: RegistryItem, current_block: int) -> None:
49
- if not registry_item.backfilling_lookback:
50
- return None
51
-
52
- start_block = max(0, current_block - registry_item.backfilling_lookback)
53
-
54
- logger.info(
55
- "Processing backfill",
56
- function_name=registry_item.function.__name__,
57
- start_block=start_block,
58
- current_block=current_block,
59
- lookback=registry_item.backfilling_lookback,
60
- )
61
-
62
- execution_args_list = registry_item.get_execution_args()
63
-
64
- for args in execution_args_list:
65
- args_json = serialize_args(args)
66
-
67
- executed_blocks = abd_dal.executed_block_numbers(
68
- registry_item.executable_path,
69
- args_json,
70
- start_block,
71
- current_block,
72
- )
73
-
74
- for block_number in range(start_block, current_block):
75
- if block_number in executed_blocks:
76
- continue
46
+ logger.exception("Unexpected error processing task")
77
47
 
78
- try:
79
- if registry_item.match_condition(block_number, **args):
80
- logger.debug(
81
- "Backfilling block",
82
- function_name=registry_item.function.__name__,
83
- block_number=block_number,
84
- args=args,
85
- )
86
- self.executor.execute(registry_item, block_number, args)
87
- except Exception:
88
- logger.error(
89
- "Error during backfill",
90
- function_name=registry_item.function.__name__,
91
- block_number=block_number,
92
- args=args,
93
- exc_info=True,
94
- )
95
-
96
- def recover_failed_retries(self) -> None:
48
+ def recover_failed_retries(self, poll_interval: int, batch_size: int | None = None) -> None:
97
49
  """
98
50
  Recover failed tasks that are ready to be retried.
99
51
 
100
52
  This handles tasks that may have been lost due to scheduler restarts.
53
+
54
+ Args:
55
+ poll_interval: Seconds to sleep between processing each retry.
56
+ batch_size: Maximum number of retries to process. If None, process all.
57
+
101
58
  """
102
59
  retry_count = 0
103
- for retry_attempt in abd_dal.get_ready_to_retry_attempts():
60
+ retry_attempts = abd_dal.get_ready_to_retry_attempts()
61
+
62
+ # Apply batch size limit if specified
63
+ if batch_size is not None:
64
+ retry_attempts = retry_attempts[:batch_size]
65
+
66
+ for retry_attempt in retry_attempts:
67
+ time.sleep(poll_interval)
104
68
  try:
105
69
  # Find the registry item to get celery_kwargs
106
70
  registry_item = self.registry.get_by_executable_path(retry_attempt.executable_path)
@@ -148,10 +112,9 @@ class BlockProcessor:
148
112
  attempt_count=task_attempt.attempt_count,
149
113
  )
150
114
  except Exception:
151
- logger.error(
115
+ logger.exception(
152
116
  "Failed to recover retry",
153
117
  task_id=retry_attempt.id,
154
- exc_info=True,
155
118
  )
156
119
  # Reload task to see current state after potential execution failure
157
120
  try: