abstract-block-dumper 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,54 @@
1
+ # Generated by Django 5.2.6 on 2025-10-06 21:32
2
+
3
+ from django.db import migrations, models
4
+
5
+
6
+ class Migration(migrations.Migration):
7
+ initial = True
8
+
9
+ dependencies = []
10
+
11
+ operations = [
12
+ migrations.CreateModel(
13
+ name="TaskAttempt",
14
+ fields=[
15
+ ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
16
+ ("block_number", models.PositiveIntegerField(db_index=True)),
17
+ ("executable_path", models.CharField(max_length=255)),
18
+ ("args_json", models.TextField(default="{}")),
19
+ (
20
+ "status",
21
+ models.CharField(
22
+ choices=[
23
+ ("pending", "Pending"),
24
+ ("running", "Running"),
25
+ ("success", "Success"),
26
+ ("failed", "Failed"),
27
+ ],
28
+ default="pending",
29
+ max_length=20,
30
+ ),
31
+ ),
32
+ ("celery_task_id", models.CharField(blank=True, max_length=50, null=True)),
33
+ ("execution_result", models.JSONField(null=True)),
34
+ ("last_attempted_at", models.DateTimeField(blank=True, null=True)),
35
+ ("attempt_count", models.PositiveIntegerField(default=0)),
36
+ ("next_retry_at", models.DateTimeField(blank=True, null=True)),
37
+ ("created_at", models.DateTimeField(auto_now_add=True)),
38
+ ("updated_at", models.DateTimeField(auto_now=True)),
39
+ ],
40
+ options={
41
+ "verbose_name": "Task Attempt",
42
+ "verbose_name_plural": "Task Attempts",
43
+ "indexes": [
44
+ models.Index(fields=["status", "next_retry_at"], name="abstract_bl_status_c3786f_idx"),
45
+ models.Index(fields=["block_number", "executable_path"], name="abstract_bl_block_n_97b116_idx"),
46
+ ],
47
+ "constraints": [
48
+ models.UniqueConstraint(
49
+ fields=("block_number", "executable_path", "args_json"), name="unique_task_attempt"
50
+ )
51
+ ],
52
+ },
53
+ ),
54
+ ]
File without changes
@@ -0,0 +1,59 @@
1
+ import json
2
+ from typing import Any
3
+
4
+ from django.db import models
5
+
6
+ import abstract_block_dumper.services.utils as abd_utils
7
+
8
+
9
+ class TaskAttempt(models.Model):
10
+ class Status(models.TextChoices):
11
+ PENDING = "pending", "Pending"
12
+ RUNNING = "running", "Running"
13
+ SUCCESS = "success", "Success"
14
+ FAILED = "failed", "Failed"
15
+
16
+ # Execution
17
+ block_number = models.PositiveIntegerField(db_index=True)
18
+ executable_path = models.CharField(max_length=255)
19
+ args_json = models.TextField(default="{}")
20
+
21
+ # Execution state
22
+ status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING)
23
+ celery_task_id = models.CharField(max_length=50, blank=True, null=True)
24
+ execution_result = models.JSONField(null=True)
25
+
26
+ # Retry Management
27
+ last_attempted_at = models.DateTimeField(null=True, blank=True)
28
+ attempt_count = models.PositiveIntegerField(default=0)
29
+ next_retry_at = models.DateTimeField(null=True, blank=True)
30
+
31
+ created_at = models.DateTimeField(auto_now_add=True)
32
+ updated_at = models.DateTimeField(auto_now=True)
33
+
34
+ class Meta:
35
+ verbose_name = "Task Attempt"
36
+ verbose_name_plural = "Task Attempts"
37
+ indexes = [
38
+ models.Index(fields=["status", "next_retry_at"]),
39
+ models.Index(fields=["block_number", "executable_path"]),
40
+ ]
41
+ constraints = [
42
+ models.UniqueConstraint(
43
+ fields=["block_number", "executable_path", "args_json"], name="unique_task_attempt"
44
+ ),
45
+ ]
46
+
47
+ def __str__(self) -> str:
48
+ return f"TaskAttempt(block={self.block_number}, path={self.executable_path}, status={self.status})"
49
+
50
+ @property
51
+ def args_dict(self) -> dict[str, Any]:
52
+ try:
53
+ return json.loads(self.args_json)
54
+ except (json.JSONDecodeError, TypeError):
55
+ return {}
56
+
57
+ @args_dict.setter
58
+ def args_dict(self, value: dict[str, Any]) -> None:
59
+ self.args_json = abd_utils.serialize_args(value)
File without changes
File without changes
@@ -0,0 +1,190 @@
1
+ import structlog
2
+ from django.db import transaction
3
+
4
+ import abstract_block_dumper.dal.django_dal as abd_dal
5
+ from abstract_block_dumper.dal.memory_registry import BaseRegistry, RegistryItem, task_registry
6
+ from abstract_block_dumper.exceptions import ConditionEvaluationError
7
+ from abstract_block_dumper.models import TaskAttempt
8
+ from abstract_block_dumper.services.executor import CeleryExecutor
9
+ from abstract_block_dumper.services.utils import serialize_args
10
+
11
+ logger = structlog.get_logger(__name__)
12
+
13
+
14
+ class BlockProcessor:
15
+ def __init__(self, executor: CeleryExecutor, registry: BaseRegistry) -> None:
16
+ self.executor = executor
17
+ self.registry = registry
18
+ self._cleanup_phantom_tasks()
19
+
20
+ def process_block(self, block_number: int) -> None:
21
+ for registry_item in self.registry.get_functions():
22
+ try:
23
+ self.process_backfill(registry_item, block_number)
24
+ self.process_registry_item(registry_item, block_number)
25
+ except Exception:
26
+ logger.error(
27
+ "Error processing registry item",
28
+ function_name=registry_item.function.__name__,
29
+ block_number=block_number,
30
+ exc_info=True,
31
+ )
32
+
33
+ def process_registry_item(self, registry_item: RegistryItem, block_number: int) -> None:
34
+ for args in registry_item.get_execution_args():
35
+ try:
36
+ if registry_item.match_condition(block_number, **args):
37
+ self.executor.execute(registry_item, block_number, args)
38
+ except ConditionEvaluationError as e:
39
+ logger.warning(
40
+ "Condition evaluation failed, skipping task",
41
+ function_name=registry_item.function.__name__,
42
+ error=str(e),
43
+ )
44
+ # Continue with other tasks
45
+ except Exception:
46
+ logger.error("Unexpected error processing task", exc_info=True)
47
+
48
+ def process_backfill(self, registry_item: RegistryItem, current_block: int) -> None:
49
+ if not registry_item.backfilling_lookback:
50
+ return None
51
+
52
+ start_block = max(0, current_block - registry_item.backfilling_lookback)
53
+
54
+ logger.info(
55
+ "Processing backfill",
56
+ function_name=registry_item.function.__name__,
57
+ start_block=start_block,
58
+ current_block=current_block,
59
+ lookback=registry_item.backfilling_lookback,
60
+ )
61
+
62
+ execution_args_list = registry_item.get_execution_args()
63
+
64
+ for args in execution_args_list:
65
+ args_json = serialize_args(args)
66
+
67
+ executed_blocks = abd_dal.executed_block_numbers(
68
+ registry_item.executable_path,
69
+ args_json,
70
+ start_block,
71
+ current_block,
72
+ )
73
+
74
+ for block_number in range(start_block, current_block):
75
+ if block_number in executed_blocks:
76
+ continue
77
+
78
+ try:
79
+ if registry_item.match_condition(block_number, **args):
80
+ logger.debug(
81
+ "Backfilling block",
82
+ function_name=registry_item.function.__name__,
83
+ block_number=block_number,
84
+ args=args,
85
+ )
86
+ self.executor.execute(registry_item, block_number, args)
87
+ except Exception:
88
+ logger.error(
89
+ "Error during backfill",
90
+ function_name=registry_item.function.__name__,
91
+ block_number=block_number,
92
+ args=args,
93
+ exc_info=True,
94
+ )
95
+
96
+ def recover_failed_retries(self) -> None:
97
+ """
98
+ Recover failed tasks that are ready to be retried.
99
+
100
+ This handles tasks that may have been lost due to scheduler restarts.
101
+ """
102
+ retry_count = 0
103
+ for task_attempt in abd_dal.get_ready_to_retry_attempts():
104
+ try:
105
+ # Find the registry item to get celery_kwargs
106
+ registry_item = self.registry.get_by_executable_path(task_attempt.executable_path)
107
+ if not registry_item:
108
+ logger.warning(
109
+ "Registry item not found for failed task, skipping retry recovery",
110
+ task_id=task_attempt.id,
111
+ executable_path=task_attempt.executable_path,
112
+ )
113
+ continue
114
+
115
+ # Use atomic transaction to prevent race conditions
116
+ with transaction.atomic():
117
+ # Re-fetch with select_for_update to prevent concurrent modifications
118
+ task_attempt = TaskAttempt.objects.select_for_update(nowait=True).get(id=task_attempt.id)
119
+
120
+ # Verify task is still in FAILED state and ready for retry
121
+ if task_attempt.status == TaskAttempt.Status.SUCCESS:
122
+ logger.info(
123
+ "Task was already recovered",
124
+ task_id=task_attempt.id,
125
+ current_status=task_attempt.status,
126
+ )
127
+ continue
128
+
129
+ if not abd_dal.task_can_retry(task_attempt):
130
+ logger.info(
131
+ "Task cannot be retried, skipping recovery",
132
+ task_id=task_attempt.id,
133
+ attempt_count=task_attempt.attempt_count,
134
+ )
135
+ continue
136
+
137
+ # Reset to PENDING and clear celery_task_id
138
+ abd_dal.reset_to_pending(task_attempt)
139
+
140
+ # Execute outside of transaction to avoid holding locks too long
141
+ self.executor.execute(registry_item, task_attempt.block_number, task_attempt.args_dict)
142
+ retry_count += 1
143
+
144
+ logger.info(
145
+ "Recovered orphaned retry",
146
+ task_id=task_attempt.id,
147
+ block_number=task_attempt.block_number,
148
+ attempt_count=task_attempt.attempt_count,
149
+ )
150
+ except Exception:
151
+ logger.error(
152
+ "Failed to recover retry",
153
+ task_id=task_attempt.id,
154
+ exc_info=True,
155
+ )
156
+ # Reload task to see current state after potential execution failure
157
+ try:
158
+ task_attempt.refresh_from_db()
159
+ # If task is still PENDING after error, revert to FAILED
160
+ # (execution may have failed before celery task could mark it)
161
+ if task_attempt.status == TaskAttempt.Status.PENDING:
162
+ abd_dal.revert_to_failed(task_attempt)
163
+ except TaskAttempt.DoesNotExist:
164
+ # Task was deleted during recovery, nothing to revert
165
+ pass
166
+
167
+ if retry_count > 0:
168
+ logger.info("Retry recovery completed", recovered_count=retry_count)
169
+
170
+ def _cleanup_phantom_tasks(self) -> None:
171
+ """
172
+ Clean up tasks marked as SUCCESS but never actually started.
173
+ Only removes tasks that were created recently (within last hour) to avoid
174
+ deleting legitimate tasks marked as success by external processes.
175
+ """
176
+ recent_phantom_tasks = abd_dal.get_recent_phantom_tasks()
177
+ count = recent_phantom_tasks.count()
178
+ if count > 0:
179
+ recent_phantom_tasks.delete()
180
+ logger.info("Cleaned up recent phantom tasks on initialization", count=count)
181
+
182
+
183
+ def block_processor_factory(
184
+ executor: CeleryExecutor | None = None,
185
+ registry: BaseRegistry | None = None,
186
+ ) -> BlockProcessor:
187
+ return BlockProcessor(
188
+ executor=executor or CeleryExecutor(),
189
+ registry=registry or task_registry,
190
+ )
@@ -0,0 +1,54 @@
1
+ from typing import Any
2
+
3
+ import structlog
4
+
5
+ import abstract_block_dumper.dal.django_dal as abd_dal
6
+ from abstract_block_dumper.dal.memory_registry import RegistryItem
7
+ from abstract_block_dumper.models import TaskAttempt
8
+
9
+ logger = structlog.get_logger(__name__)
10
+
11
+
12
+ class CeleryExecutor:
13
+ def execute(self, registry_item: RegistryItem, block_number: int, args: dict[str, Any]) -> None:
14
+ task_attempt, created = abd_dal.task_create_or_get_pending(
15
+ block_number=block_number,
16
+ executable_path=registry_item.executable_path,
17
+ args=args,
18
+ )
19
+ if not created and task_attempt.status != TaskAttempt.Status.PENDING:
20
+ logger.debug(
21
+ "Task already exists",
22
+ task_id=task_attempt.id,
23
+ status=task_attempt.status,
24
+ )
25
+ return
26
+
27
+ task_kwargs = {
28
+ "block_number": block_number,
29
+ **args,
30
+ }
31
+
32
+ apply_async_kwargs: dict[str, Any] = {"kwargs": task_kwargs}
33
+
34
+ if task_attempt.next_retry_at:
35
+ apply_async_kwargs["eta"] = task_attempt.next_retry_at
36
+
37
+ celery_options = {
38
+ k: v for k, v in (registry_item.celery_kwargs or {}).items() if k not in ("kwargs", "eta", "args")
39
+ }
40
+
41
+ apply_async_kwargs.update(celery_options)
42
+
43
+ logger.info(
44
+ "Scheduling Celery task",
45
+ task_id=task_attempt.id,
46
+ block_number=task_attempt.block_number,
47
+ executable_path=task_attempt.executable_path,
48
+ args=args,
49
+ celery_kwargs=apply_async_kwargs,
50
+ )
51
+
52
+ celery_task = registry_item.function.apply_async(**apply_async_kwargs)
53
+
54
+ logger.debug("Celery task scheduled", task_id=task_attempt.id, celery_task_id=celery_task.id)
@@ -0,0 +1,92 @@
1
+ import time
2
+
3
+ import bittensor as bt
4
+ import structlog
5
+ from django.conf import settings
6
+
7
+ import abstract_block_dumper.dal.django_dal as abd_dal
8
+ import abstract_block_dumper.services.utils as abd_utils
9
+ from abstract_block_dumper.services.block_processor import BlockProcessor, block_processor_factory
10
+
11
+ logger = structlog.get_logger(__name__)
12
+
13
+
14
+ class TaskScheduler:
15
+ def __init__(
16
+ self,
17
+ block_processor: BlockProcessor,
18
+ subtensor: bt.Subtensor,
19
+ poll_interval: int,
20
+ ) -> None:
21
+ self.block_processor = block_processor
22
+ self.subtensor = subtensor
23
+ self.poll_interval = poll_interval
24
+ self.last_processed_block = -1
25
+ self.is_running = False
26
+
27
+ def start(self) -> None:
28
+ self.is_running = True
29
+
30
+ self.initialize_last_block()
31
+
32
+ logger.info(
33
+ "TaskScheduler started",
34
+ last_processed_block=self.last_processed_block,
35
+ registry_functions=len(self.block_processor.registry.get_functions()),
36
+ )
37
+
38
+ while self.is_running:
39
+ try:
40
+ # Process lost retries first
41
+ self.block_processor.recover_failed_retries()
42
+
43
+ current_block = self.subtensor.get_current_block()
44
+
45
+ for block_number in range(self.last_processed_block + 1, current_block + 1):
46
+ self.block_processor.process_block(block_number)
47
+ self.last_processed_block = block_number
48
+
49
+ time.sleep(self.poll_interval)
50
+ except KeyboardInterrupt:
51
+ logger.info("TaskScheduler stopping due to KeyboardInterrupt.")
52
+ self.stop()
53
+ break
54
+ except Exception:
55
+ logger.error("Fatal scheduler error", exc_info=True)
56
+ # resume the loop even if task failed
57
+ time.sleep(self.poll_interval)
58
+
59
+ def stop(self) -> None:
60
+ self.is_running = False
61
+ logger.info("TaskScheduler stopped.")
62
+
63
+ def initialize_last_block(self) -> None:
64
+ start_from_block_setting = getattr(settings, "BLOCK_DUMPER_START_FROM_BLOCK")
65
+
66
+ if start_from_block_setting is not None:
67
+ if start_from_block_setting == "current":
68
+ self.last_processed_block = self.subtensor.get_current_block()
69
+ logger.info(f"Starting from current blockchain block {self.last_processed_block}")
70
+
71
+ elif isinstance(start_from_block_setting, int):
72
+ self.last_processed_block = start_from_block_setting
73
+ logger.info(f"Starting from configured block {self.last_processed_block}")
74
+ else:
75
+ raise ValueError(f"Invalid BLOCK_DUMPER_START_FROM_BLOCK value: {start_from_block_setting}")
76
+ else:
77
+ # Default behavior - resume from database
78
+ last_block_number = abd_dal.get_the_latest_executed_block_number()
79
+
80
+ self.last_processed_block = last_block_number or self.subtensor.get_current_block()
81
+ logger.info(
82
+ "Resume from the last database block or start from the current block",
83
+ last_processed_block=self.last_processed_block,
84
+ )
85
+
86
+
87
+ def task_scheduler_factory() -> TaskScheduler:
88
+ return TaskScheduler(
89
+ block_processor=block_processor_factory(),
90
+ subtensor=abd_utils.get_bittensor_client(),
91
+ poll_interval=getattr(settings, "BLOCK_DUMPER_POLL_INTERVAL", 1),
92
+ )
@@ -0,0 +1,51 @@
1
+ import json
2
+ from collections.abc import Callable
3
+ from functools import cache
4
+
5
+ import bittensor as bt
6
+ import structlog
7
+ from celery import current_task
8
+ from django.conf import settings
9
+
10
+ logger = structlog.get_logger(__name__)
11
+
12
+
13
+ @cache
14
+ def get_bittensor_client() -> bt.Subtensor:
15
+ """
16
+ Get a cached bittensor client.
17
+
18
+ The client is cached indefinitely since network configuration
19
+ doesn't change during runtime.
20
+ """
21
+ DEFAULT_BITTENSOR_NETWORK = "finney"
22
+ network = getattr(settings, "BITTENSOR_NETWORK", DEFAULT_BITTENSOR_NETWORK)
23
+ logger.info(f"Creating new bittensor client for network: {network}")
24
+ return bt.subtensor(network=network)
25
+
26
+
27
+ def get_current_celery_task_id() -> str:
28
+ """
29
+ Get current celery task id
30
+ """
31
+ try:
32
+ celery_task_id = current_task.id
33
+ except Exception:
34
+ celery_task_id = ""
35
+ return str(celery_task_id)
36
+
37
+
38
+ def get_executable_path(func: Callable) -> str:
39
+ """
40
+ Get executable path for the callable `func`
41
+ """
42
+ return ".".join([func.__module__, func.__name__])
43
+
44
+
45
+ def get_max_attempt_limit() -> int:
46
+ default_max_attempts = 3
47
+ return getattr(settings, "BLOCK_DUMPER_MAX_ATTEMPTS", default_max_attempts)
48
+
49
+
50
+ def serialize_args(args: dict) -> str:
51
+ return json.dumps(args, sort_keys=True)
@@ -0,0 +1,75 @@
1
+ """
2
+ Maintenance tasks for Abstract Block Dumper.
3
+
4
+ This module contains utility tasks for maintaining the TaskAttempt database,
5
+ such as cleaning up old completed or failed tasks.
6
+ """
7
+
8
+ from datetime import timedelta
9
+
10
+ from celery import shared_task
11
+ from django.db.models import Q
12
+ from django.utils import timezone
13
+
14
+ from abstract_block_dumper.models import TaskAttempt
15
+
16
+
17
+ @shared_task(name="abstract_block_dumper.cleanup_old_tasks")
18
+ def cleanup_old_tasks(days: int = 7) -> dict[str, int | str]:
19
+ """
20
+ Delete all succeeded or unrecoverable failed tasks older than the specified number of days.
21
+
22
+ This task helps maintain database performance by removing old task records that are
23
+ no longer needed. It targets:
24
+ - Tasks with SUCCESS status
25
+ - Tasks with FAILED status (which are unrecoverable/exhausted retries)
26
+
27
+ Tasks with PENDING or RUNNING status are never deleted to ensure ongoing work is preserved.
28
+
29
+ Args:
30
+ days: Number of days to retain. Tasks older than this will be deleted. Default is 7.
31
+
32
+ Returns:
33
+ A dictionary containing:
34
+ - deleted_count: Number of task attempts deleted
35
+ - cutoff_date: ISO formatted datetime string of the cutoff date used
36
+
37
+ Example:
38
+ # Delete tasks older than 7 days (default)
39
+ cleanup_old_tasks()
40
+
41
+ # Delete tasks older than 30 days
42
+ cleanup_old_tasks(days=30)
43
+
44
+ Recommended Usage:
45
+ Run this task daily via cron or Celery beat to maintain optimal database performance.
46
+ For production systems with high task volumes, consider running it more frequently.
47
+
48
+ Example cron (daily at 2 AM):
49
+ 0 2 * * * python manage.py shell -c \
50
+ "from abstract_block_dumper.tasks import cleanup_old_tasks; cleanup_old_tasks.delay()"
51
+
52
+ Example Celery beat schedule (in settings.py):
53
+ CELERY_BEAT_SCHEDULE = {
54
+ 'cleanup-old-tasks': {
55
+ 'task': 'abstract_block_dumper.cleanup_old_tasks',
56
+ 'schedule': crontab(hour=2, minute=0), # Daily at 2 AM
57
+ 'kwargs': {'days': 7},
58
+ },
59
+ }
60
+
61
+ """
62
+ cutoff_date = timezone.now() - timedelta(days=days)
63
+
64
+ # Query for tasks that are either succeeded or failed (unrecoverable)
65
+ # We only delete completed work, never pending or running tasks
66
+ tasks_to_delete = TaskAttempt.objects.filter(
67
+ Q(status=TaskAttempt.Status.SUCCESS) | Q(status=TaskAttempt.Status.FAILED), updated_at__lt=cutoff_date
68
+ )
69
+
70
+ deleted_count, _ = tasks_to_delete.delete()
71
+
72
+ return {
73
+ "deleted_count": deleted_count,
74
+ "cutoff_date": cutoff_date.isoformat(),
75
+ }