PyPI - abstract-block-dumper - Versions diffs - 0.0.5__py3-none-any.whl - Mend

abstract-block-dumper 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

abstract_block_dumper/__init__.py +0 -0
abstract_block_dumper/_version.py +34 -0
abstract_block_dumper/admin.py +73 -0
abstract_block_dumper/apps.py +7 -0
abstract_block_dumper/dal/__init__.py +0 -0
abstract_block_dumper/dal/django_dal.py +150 -0
abstract_block_dumper/dal/memory_registry.py +105 -0
abstract_block_dumper/decorators.py +211 -0
abstract_block_dumper/discovery.py +24 -0
abstract_block_dumper/exceptions.py +16 -0
abstract_block_dumper/management/__init__.py +0 -0
abstract_block_dumper/management/commands/block_tasks.py +19 -0
abstract_block_dumper/migrations/0001_initial.py +54 -0
abstract_block_dumper/migrations/__init__.py +0 -0
abstract_block_dumper/models.py +59 -0
abstract_block_dumper/py.typed +0 -0
abstract_block_dumper/services/__init__.py +0 -0
abstract_block_dumper/services/block_processor.py +190 -0
abstract_block_dumper/services/executor.py +54 -0
abstract_block_dumper/services/scheduler.py +92 -0
abstract_block_dumper/services/utils.py +51 -0
abstract_block_dumper/tasks.py +75 -0
abstract_block_dumper-0.0.5.dist-info/METADATA +311 -0
abstract_block_dumper-0.0.5.dist-info/RECORD +25 -0
abstract_block_dumper-0.0.5.dist-info/WHEEL +4 -0

abstract_block_dumper/migrations/0001_initial.py ADDED Viewed

@@ -0,0 +1,54 @@
+# Generated by Django 5.2.6 on 2025-10-06 21:32
+from django.db import migrations, models
+class Migration(migrations.Migration):
+    initial = True
+    dependencies = []
+    operations = [
+        migrations.CreateModel(
+            name="TaskAttempt",
+            fields=[
+                ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
+                ("block_number", models.PositiveIntegerField(db_index=True)),
+                ("executable_path", models.CharField(max_length=255)),
+                ("args_json", models.TextField(default="{}")),
+                (
+                    "status",
+                    models.CharField(
+                        choices=[
+                            ("pending", "Pending"),
+                            ("running", "Running"),
+                            ("success", "Success"),
+                            ("failed", "Failed"),
+                        ],
+                        default="pending",
+                        max_length=20,
+                    ),
+                ),
+                ("celery_task_id", models.CharField(blank=True, max_length=50, null=True)),
+                ("execution_result", models.JSONField(null=True)),
+                ("last_attempted_at", models.DateTimeField(blank=True, null=True)),
+                ("attempt_count", models.PositiveIntegerField(default=0)),
+                ("next_retry_at", models.DateTimeField(blank=True, null=True)),
+                ("created_at", models.DateTimeField(auto_now_add=True)),
+                ("updated_at", models.DateTimeField(auto_now=True)),
+            ],
+            options={
+                "verbose_name": "Task Attempt",
+                "verbose_name_plural": "Task Attempts",
+                "indexes": [
+                    models.Index(fields=["status", "next_retry_at"], name="abstract_bl_status_c3786f_idx"),
+                    models.Index(fields=["block_number", "executable_path"], name="abstract_bl_block_n_97b116_idx"),
+                ],
+                "constraints": [
+                    models.UniqueConstraint(
+                        fields=("block_number", "executable_path", "args_json"), name="unique_task_attempt"
+                    )
+                ],
+            },
+        ),
+    ]

abstract_block_dumper/migrations/__init__.py ADDED Viewed

File without changes

abstract_block_dumper/models.py ADDED Viewed

@@ -0,0 +1,59 @@
+import json
+from typing import Any
+from django.db import models
+import abstract_block_dumper.services.utils as abd_utils
+class TaskAttempt(models.Model):
+    class Status(models.TextChoices):
+        PENDING = "pending", "Pending"
+        RUNNING = "running", "Running"
+        SUCCESS = "success", "Success"
+        FAILED = "failed", "Failed"
+    # Execution
+    block_number = models.PositiveIntegerField(db_index=True)
+    executable_path = models.CharField(max_length=255)
+    args_json = models.TextField(default="{}")
+    # Execution state
+    status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING)
+    celery_task_id = models.CharField(max_length=50, blank=True, null=True)
+    execution_result = models.JSONField(null=True)
+    # Retry Management
+    last_attempted_at = models.DateTimeField(null=True, blank=True)
+    attempt_count = models.PositiveIntegerField(default=0)
+    next_retry_at = models.DateTimeField(null=True, blank=True)
+    created_at = models.DateTimeField(auto_now_add=True)
+    updated_at = models.DateTimeField(auto_now=True)
+    class Meta:
+        verbose_name = "Task Attempt"
+        verbose_name_plural = "Task Attempts"
+        indexes = [
+            models.Index(fields=["status", "next_retry_at"]),
+            models.Index(fields=["block_number", "executable_path"]),
+        ]
+        constraints = [
+            models.UniqueConstraint(
+                fields=["block_number", "executable_path", "args_json"], name="unique_task_attempt"
+            ),
+        ]
+    def __str__(self) -> str:
+        return f"TaskAttempt(block={self.block_number}, path={self.executable_path}, status={self.status})"
+    @property
+    def args_dict(self) -> dict[str, Any]:
+        try:
+            return json.loads(self.args_json)
+        except (json.JSONDecodeError, TypeError):
+            return {}
+    @args_dict.setter
+    def args_dict(self, value: dict[str, Any]) -> None:
+        self.args_json = abd_utils.serialize_args(value)

abstract_block_dumper/py.typed ADDED Viewed

File without changes

abstract_block_dumper/services/__init__.py ADDED Viewed

File without changes

abstract_block_dumper/services/block_processor.py ADDED Viewed

@@ -0,0 +1,190 @@
+import structlog
+from django.db import transaction
+import abstract_block_dumper.dal.django_dal as abd_dal
+from abstract_block_dumper.dal.memory_registry import BaseRegistry, RegistryItem, task_registry
+from abstract_block_dumper.exceptions import ConditionEvaluationError
+from abstract_block_dumper.models import TaskAttempt
+from abstract_block_dumper.services.executor import CeleryExecutor
+from abstract_block_dumper.services.utils import serialize_args
+logger = structlog.get_logger(__name__)
+class BlockProcessor:
+    def __init__(self, executor: CeleryExecutor, registry: BaseRegistry) -> None:
+        self.executor = executor
+        self.registry = registry
+        self._cleanup_phantom_tasks()
+    def process_block(self, block_number: int) -> None:
+        for registry_item in self.registry.get_functions():
+            try:
+                self.process_backfill(registry_item, block_number)
+                self.process_registry_item(registry_item, block_number)
+            except Exception:
+                logger.error(
+                    "Error processing registry item",
+                    function_name=registry_item.function.__name__,
+                    block_number=block_number,
+                    exc_info=True,
+                )
+    def process_registry_item(self, registry_item: RegistryItem, block_number: int) -> None:
+        for args in registry_item.get_execution_args():
+            try:
+                if registry_item.match_condition(block_number, **args):
+                    self.executor.execute(registry_item, block_number, args)
+            except ConditionEvaluationError as e:
+                logger.warning(
+                    "Condition evaluation failed, skipping task",
+                    function_name=registry_item.function.__name__,
+                    error=str(e),
+                )
+                # Continue with other tasks
+            except Exception:
+                logger.error("Unexpected error processing task", exc_info=True)
+    def process_backfill(self, registry_item: RegistryItem, current_block: int) -> None:
+        if not registry_item.backfilling_lookback:
+            return None
+        start_block = max(0, current_block - registry_item.backfilling_lookback)
+        logger.info(
+            "Processing backfill",
+            function_name=registry_item.function.__name__,
+            start_block=start_block,
+            current_block=current_block,
+            lookback=registry_item.backfilling_lookback,
+        )
+        execution_args_list = registry_item.get_execution_args()
+        for args in execution_args_list:
+            args_json = serialize_args(args)
+            executed_blocks = abd_dal.executed_block_numbers(
+                registry_item.executable_path,
+                args_json,
+                start_block,
+                current_block,
+            )
+            for block_number in range(start_block, current_block):
+                if block_number in executed_blocks:
+                    continue
+                try:
+                    if registry_item.match_condition(block_number, **args):
+                        logger.debug(
+                            "Backfilling block",
+                            function_name=registry_item.function.__name__,
+                            block_number=block_number,
+                            args=args,
+                        )
+                        self.executor.execute(registry_item, block_number, args)
+                except Exception:
+                    logger.error(
+                        "Error during backfill",
+                        function_name=registry_item.function.__name__,
+                        block_number=block_number,
+                        args=args,
+                        exc_info=True,
+                    )
+    def recover_failed_retries(self) -> None:
+        """
+        Recover failed tasks that are ready to be retried.
+        This handles tasks that may have been lost due to scheduler restarts.
+        """
+        retry_count = 0
+        for task_attempt in abd_dal.get_ready_to_retry_attempts():
+            try:
+                # Find the registry item to get celery_kwargs
+                registry_item = self.registry.get_by_executable_path(task_attempt.executable_path)
+                if not registry_item:
+                    logger.warning(
+                        "Registry item not found for failed task, skipping retry recovery",
+                        task_id=task_attempt.id,
+                        executable_path=task_attempt.executable_path,
+                    )
+                    continue
+                # Use atomic transaction to prevent race conditions
+                with transaction.atomic():
+                    # Re-fetch with select_for_update to prevent concurrent modifications
+                    task_attempt = TaskAttempt.objects.select_for_update(nowait=True).get(id=task_attempt.id)
+                    # Verify task is still in FAILED state and ready for retry
+                    if task_attempt.status == TaskAttempt.Status.SUCCESS:
+                        logger.info(
+                            "Task was already recovered",
+                            task_id=task_attempt.id,
+                            current_status=task_attempt.status,
+                        )
+                        continue
+                    if not abd_dal.task_can_retry(task_attempt):
+                        logger.info(
+                            "Task cannot be retried, skipping recovery",
+                            task_id=task_attempt.id,
+                            attempt_count=task_attempt.attempt_count,
+                        )
+                        continue
+                    # Reset to PENDING and clear celery_task_id
+                    abd_dal.reset_to_pending(task_attempt)
+                # Execute outside of transaction to avoid holding locks too long
+                self.executor.execute(registry_item, task_attempt.block_number, task_attempt.args_dict)
+                retry_count += 1
+                logger.info(
+                    "Recovered orphaned retry",
+                    task_id=task_attempt.id,
+                    block_number=task_attempt.block_number,
+                    attempt_count=task_attempt.attempt_count,
+                )
+            except Exception:
+                logger.error(
+                    "Failed to recover retry",
+                    task_id=task_attempt.id,
+                    exc_info=True,
+                )
+                # Reload task to see current state after potential execution failure
+                try:
+                    task_attempt.refresh_from_db()
+                    # If task is still PENDING after error, revert to FAILED
+                    # (execution may have failed before celery task could mark it)
+                    if task_attempt.status == TaskAttempt.Status.PENDING:
+                        abd_dal.revert_to_failed(task_attempt)
+                except TaskAttempt.DoesNotExist:
+                    # Task was deleted during recovery, nothing to revert
+                    pass
+        if retry_count > 0:
+            logger.info("Retry recovery completed", recovered_count=retry_count)
+    def _cleanup_phantom_tasks(self) -> None:
+        """
+        Clean up tasks marked as SUCCESS but never actually started.
+        Only removes tasks that were created recently (within last hour) to avoid
+        deleting legitimate tasks marked as success by external processes.
+        """
+        recent_phantom_tasks = abd_dal.get_recent_phantom_tasks()
+        count = recent_phantom_tasks.count()
+        if count > 0:
+            recent_phantom_tasks.delete()
+            logger.info("Cleaned up recent phantom tasks on initialization", count=count)
+def block_processor_factory(
+    executor: CeleryExecutor | None = None,
+    registry: BaseRegistry | None = None,
+) -> BlockProcessor:
+    return BlockProcessor(
+        executor=executor or CeleryExecutor(),
+        registry=registry or task_registry,
+    )

abstract_block_dumper/services/executor.py ADDED Viewed

@@ -0,0 +1,54 @@
+from typing import Any
+import structlog
+import abstract_block_dumper.dal.django_dal as abd_dal
+from abstract_block_dumper.dal.memory_registry import RegistryItem
+from abstract_block_dumper.models import TaskAttempt
+logger = structlog.get_logger(__name__)
+class CeleryExecutor:
+    def execute(self, registry_item: RegistryItem, block_number: int, args: dict[str, Any]) -> None:
+        task_attempt, created = abd_dal.task_create_or_get_pending(
+            block_number=block_number,
+            executable_path=registry_item.executable_path,
+            args=args,
+        )
+        if not created and task_attempt.status != TaskAttempt.Status.PENDING:
+            logger.debug(
+                "Task already exists",
+                task_id=task_attempt.id,
+                status=task_attempt.status,
+            )
+            return
+        task_kwargs = {
+            "block_number": block_number,
+            **args,
+        }
+        apply_async_kwargs: dict[str, Any] = {"kwargs": task_kwargs}
+        if task_attempt.next_retry_at:
+            apply_async_kwargs["eta"] = task_attempt.next_retry_at
+        celery_options = {
+            k: v for k, v in (registry_item.celery_kwargs or {}).items() if k not in ("kwargs", "eta", "args")
+        }
+        apply_async_kwargs.update(celery_options)
+        logger.info(
+            "Scheduling Celery task",
+            task_id=task_attempt.id,
+            block_number=task_attempt.block_number,
+            executable_path=task_attempt.executable_path,
+            args=args,
+            celery_kwargs=apply_async_kwargs,
+        )
+        celery_task = registry_item.function.apply_async(**apply_async_kwargs)
+        logger.debug("Celery task scheduled", task_id=task_attempt.id, celery_task_id=celery_task.id)

abstract_block_dumper/services/scheduler.py ADDED Viewed

@@ -0,0 +1,92 @@
+import time
+import bittensor as bt
+import structlog
+from django.conf import settings
+import abstract_block_dumper.dal.django_dal as abd_dal
+import abstract_block_dumper.services.utils as abd_utils
+from abstract_block_dumper.services.block_processor import BlockProcessor, block_processor_factory
+logger = structlog.get_logger(__name__)
+class TaskScheduler:
+    def __init__(
+        self,
+        block_processor: BlockProcessor,
+        subtensor: bt.Subtensor,
+        poll_interval: int,
+    ) -> None:
+        self.block_processor = block_processor
+        self.subtensor = subtensor
+        self.poll_interval = poll_interval
+        self.last_processed_block = -1
+        self.is_running = False
+    def start(self) -> None:
+        self.is_running = True
+        self.initialize_last_block()
+        logger.info(
+            "TaskScheduler started",
+            last_processed_block=self.last_processed_block,
+            registry_functions=len(self.block_processor.registry.get_functions()),
+        )
+        while self.is_running:
+            try:
+                # Process lost retries first
+                self.block_processor.recover_failed_retries()
+                current_block = self.subtensor.get_current_block()
+                for block_number in range(self.last_processed_block + 1, current_block + 1):
+                    self.block_processor.process_block(block_number)
+                    self.last_processed_block = block_number
+                time.sleep(self.poll_interval)
+            except KeyboardInterrupt:
+                logger.info("TaskScheduler stopping due to KeyboardInterrupt.")
+                self.stop()
+                break
+            except Exception:
+                logger.error("Fatal scheduler error", exc_info=True)
+                # resume the loop even if task failed
+                time.sleep(self.poll_interval)
+    def stop(self) -> None:
+        self.is_running = False
+        logger.info("TaskScheduler stopped.")
+    def initialize_last_block(self) -> None:
+        start_from_block_setting = getattr(settings, "BLOCK_DUMPER_START_FROM_BLOCK")
+        if start_from_block_setting is not None:
+            if start_from_block_setting == "current":
+                self.last_processed_block = self.subtensor.get_current_block()
+                logger.info(f"Starting from current blockchain block {self.last_processed_block}")
+            elif isinstance(start_from_block_setting, int):
+                self.last_processed_block = start_from_block_setting
+                logger.info(f"Starting from configured block {self.last_processed_block}")
+            else:
+                raise ValueError(f"Invalid BLOCK_DUMPER_START_FROM_BLOCK value: {start_from_block_setting}")
+        else:
+            # Default behavior - resume from database
+            last_block_number = abd_dal.get_the_latest_executed_block_number()
+            self.last_processed_block = last_block_number or self.subtensor.get_current_block()
+            logger.info(
+                "Resume from the last database block or start from the current block",
+                last_processed_block=self.last_processed_block,
+            )
+def task_scheduler_factory() -> TaskScheduler:
+    return TaskScheduler(
+        block_processor=block_processor_factory(),
+        subtensor=abd_utils.get_bittensor_client(),
+        poll_interval=getattr(settings, "BLOCK_DUMPER_POLL_INTERVAL", 1),
+    )

abstract_block_dumper/services/utils.py ADDED Viewed

@@ -0,0 +1,51 @@
+import json
+from collections.abc import Callable
+from functools import cache
+import bittensor as bt
+import structlog
+from celery import current_task
+from django.conf import settings
+logger = structlog.get_logger(__name__)
+@cache
+def get_bittensor_client() -> bt.Subtensor:
+    """
+    Get a cached bittensor client.
+    The client is cached indefinitely since network configuration
+    doesn't change during runtime.
+    """
+    DEFAULT_BITTENSOR_NETWORK = "finney"
+    network = getattr(settings, "BITTENSOR_NETWORK", DEFAULT_BITTENSOR_NETWORK)
+    logger.info(f"Creating new bittensor client for network: {network}")
+    return bt.subtensor(network=network)
+def get_current_celery_task_id() -> str:
+    """
+    Get current celery task id
+    """
+    try:
+        celery_task_id = current_task.id
+    except Exception:
+        celery_task_id = ""
+    return str(celery_task_id)
+def get_executable_path(func: Callable) -> str:
+    """
+    Get executable path for the callable `func`
+    """
+    return ".".join([func.__module__, func.__name__])
+def get_max_attempt_limit() -> int:
+    default_max_attempts = 3
+    return getattr(settings, "BLOCK_DUMPER_MAX_ATTEMPTS", default_max_attempts)
+def serialize_args(args: dict) -> str:
+    return json.dumps(args, sort_keys=True)

abstract_block_dumper/tasks.py ADDED Viewed

@@ -0,0 +1,75 @@
+"""
+Maintenance tasks for Abstract Block Dumper.
+This module contains utility tasks for maintaining the TaskAttempt database,
+such as cleaning up old completed or failed tasks.
+"""
+from datetime import timedelta
+from celery import shared_task
+from django.db.models import Q
+from django.utils import timezone
+from abstract_block_dumper.models import TaskAttempt
+@shared_task(name="abstract_block_dumper.cleanup_old_tasks")
+def cleanup_old_tasks(days: int = 7) -> dict[str, int | str]:
+    """
+    Delete all succeeded or unrecoverable failed tasks older than the specified number of days.
+    This task helps maintain database performance by removing old task records that are
+    no longer needed. It targets:
+    - Tasks with SUCCESS status
+    - Tasks with FAILED status (which are unrecoverable/exhausted retries)
+    Tasks with PENDING or RUNNING status are never deleted to ensure ongoing work is preserved.
+    Args:
+        days: Number of days to retain. Tasks older than this will be deleted. Default is 7.
+    Returns:
+        A dictionary containing:
+            - deleted_count: Number of task attempts deleted
+            - cutoff_date: ISO formatted datetime string of the cutoff date used
+    Example:
+        # Delete tasks older than 7 days (default)
+        cleanup_old_tasks()
+        # Delete tasks older than 30 days
+        cleanup_old_tasks(days=30)
+    Recommended Usage:
+        Run this task daily via cron or Celery beat to maintain optimal database performance.
+        For production systems with high task volumes, consider running it more frequently.
+        Example cron (daily at 2 AM):
+        0 2 * * * python manage.py shell -c \
+            "from abstract_block_dumper.tasks import cleanup_old_tasks; cleanup_old_tasks.delay()"
+        Example Celery beat schedule (in settings.py):
+        CELERY_BEAT_SCHEDULE = {
+            'cleanup-old-tasks': {
+                'task': 'abstract_block_dumper.cleanup_old_tasks',
+                'schedule': crontab(hour=2, minute=0),  # Daily at 2 AM
+                'kwargs': {'days': 7},
+            },
+        }
+    """
+    cutoff_date = timezone.now() - timedelta(days=days)
+    # Query for tasks that are either succeeded or failed (unrecoverable)
+    # We only delete completed work, never pending or running tasks
+    tasks_to_delete = TaskAttempt.objects.filter(
+        Q(status=TaskAttempt.Status.SUCCESS) | Q(status=TaskAttempt.Status.FAILED), updated_at__lt=cutoff_date
+    )
+    deleted_count, _ = tasks_to_delete.delete()
+    return {
+        "deleted_count": deleted_count,
+        "cutoff_date": cutoff_date.isoformat(),
+    }