abstract-block-dumper 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,34 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '0.0.1'
32
+ __version_tuple__ = version_tuple = (0, 0, 1)
33
+
34
+ __commit_id__ = commit_id = None
@@ -0,0 +1,73 @@
1
+ from django.contrib import admin
2
+
3
+ from abstract_block_dumper.models import TaskAttempt
4
+
5
+
6
+ @admin.register(TaskAttempt)
7
+ class TaskAttemptAdmin(admin.ModelAdmin):
8
+ list_display = [
9
+ "executable_path",
10
+ "block_number",
11
+ "status",
12
+ ]
13
+ list_filter = [
14
+ "status",
15
+ "executable_path",
16
+ ]
17
+ search_fields = ["celery_task_id", "block_number"]
18
+ readonly_fields = [
19
+ "block_number",
20
+ "executable_path",
21
+ "args_json",
22
+ "status",
23
+ # Execution fields
24
+ "celery_task_id",
25
+ "execution_result",
26
+ # Attempts & Retry fields
27
+ "last_attempted_at",
28
+ "attempt_count",
29
+ "next_retry_at",
30
+ "created_at",
31
+ "updated_at",
32
+ ]
33
+ fieldsets = (
34
+ (
35
+ None,
36
+ {
37
+ "fields": (
38
+ "block_number",
39
+ "executable_path",
40
+ "args_json",
41
+ )
42
+ },
43
+ ),
44
+ (
45
+ "Task Execution",
46
+ {
47
+ "fields": (
48
+ "status",
49
+ "celery_task_id",
50
+ "execution_result",
51
+ )
52
+ },
53
+ ),
54
+ (
55
+ "Retry Information",
56
+ {
57
+ "fields": (
58
+ "last_attempted_at",
59
+ "attempt_count",
60
+ "next_retry_at",
61
+ )
62
+ },
63
+ ),
64
+ (
65
+ "Timestamps",
66
+ {
67
+ "fields": (
68
+ "created_at",
69
+ "updated_at",
70
+ )
71
+ },
72
+ ),
73
+ )
@@ -0,0 +1,7 @@
1
+ from django.apps import AppConfig
2
+
3
+
4
+ class AbstractBlockDumperConfig(AppConfig):
5
+ default_auto_field = "django.db.models.BigAutoField"
6
+ name = "abstract_block_dumper"
7
+ verbose_name = "Abstract Block Dumper"
File without changes
@@ -0,0 +1,150 @@
1
+ from datetime import timedelta
2
+ from typing import Any
3
+
4
+ from django.conf import settings
5
+ from django.db import transaction
6
+ from django.db.models.query import QuerySet
7
+ from django.utils import timezone
8
+
9
+ import abstract_block_dumper.models as abd_models
10
+ import abstract_block_dumper.services.utils as abd_utils
11
+
12
+
13
+ def get_ready_to_retry_attempts() -> QuerySet[abd_models.TaskAttempt]:
14
+ return abd_models.TaskAttempt.objects.filter(
15
+ next_retry_at__isnull=False,
16
+ next_retry_at__lte=timezone.now(),
17
+ attempt_count__lt=abd_utils.get_max_attempt_limit(),
18
+ ).exclude(
19
+ status=abd_models.TaskAttempt.Status.SUCCESS,
20
+ )
21
+
22
+
23
+ def executed_block_numbers(executable_path: str, args_json: str, from_block: int, to_block: int) -> set[int]:
24
+ block_numbers = abd_models.TaskAttempt.objects.filter(
25
+ executable_path=executable_path,
26
+ args_json=args_json,
27
+ block_number__gte=from_block,
28
+ block_number__lt=to_block,
29
+ status=abd_models.TaskAttempt.Status.SUCCESS,
30
+ ).values_list("block_number", flat=True)
31
+ return set(block_numbers)
32
+
33
+
34
+ def reset_to_pending(task: abd_models.TaskAttempt) -> None:
35
+ task.celery_task_id = None
36
+ task.status = abd_models.TaskAttempt.Status.PENDING
37
+ task.save()
38
+
39
+
40
+ def revert_to_failed(task: abd_models.TaskAttempt) -> None:
41
+ task.status = abd_models.TaskAttempt.Status.FAILED
42
+ task.save()
43
+
44
+
45
+ def get_recent_phantom_tasks() -> QuerySet[abd_models.TaskAttempt]:
46
+ """
47
+ Get tasks marked as SUCCESS but never actually started.
48
+
49
+ Only clean up recent phantom tasks to avoid deleting legitimate external successes
50
+ """
51
+ return abd_models.TaskAttempt.objects.filter(
52
+ status=abd_models.TaskAttempt.Status.SUCCESS,
53
+ last_attempted_at__isnull=True,
54
+ celery_task_id__isnull=True, # Additional safety check
55
+ created_at__gte=timezone.now() - timedelta(hours=1), # Only recent tasks
56
+ )
57
+
58
+
59
+ def task_can_retry(task: abd_models.TaskAttempt) -> bool:
60
+ blocked_statuses = {task.Status.SUCCESS, task.Status.RUNNING}
61
+ return task.status not in blocked_statuses and task.attempt_count < abd_utils.get_max_attempt_limit()
62
+
63
+
64
+ def task_mark_as_started(task: abd_models.TaskAttempt, celery_task_id: str) -> None:
65
+ task.celery_task_id = celery_task_id
66
+ task.status = abd_models.TaskAttempt.Status.RUNNING
67
+ task.last_attempted_at = timezone.now()
68
+ task.save()
69
+
70
+
71
+ def task_mark_as_success(task: abd_models.TaskAttempt, result_data: dict) -> None:
72
+ task.status = task.Status.SUCCESS
73
+ task.execution_result = result_data
74
+ task.last_attempted_at = timezone.now()
75
+ task.next_retry_at = None
76
+ task.save()
77
+
78
+
79
+ def task_mark_as_failed(task) -> None:
80
+ DEFAULT_BLOCK_TASK_RETRY_BACKOFF = 1
81
+ MAX_RETRY_DELAY_MINUTES = 1440 # 24 hours max delay
82
+
83
+ task.status = task.Status.FAILED
84
+ task.last_attempted_at = timezone.now()
85
+ task.attempt_count += 1
86
+
87
+ if task_can_retry(task):
88
+ base_retry_backoff = getattr(settings, "BLOCK_TASK_RETRY_BACKOFF", DEFAULT_BLOCK_TASK_RETRY_BACKOFF)
89
+ max_delay_minutes = getattr(settings, "BLOCK_TASK_MAX_RETRY_DELAY_MINUTES", MAX_RETRY_DELAY_MINUTES)
90
+
91
+ # Calculate exponential backoff with bounds checking
92
+ backoff_minutes = base_retry_backoff**task.attempt_count
93
+ backoff_minutes = min(backoff_minutes, max_delay_minutes)
94
+
95
+ task.next_retry_at = timezone.now() + timedelta(minutes=backoff_minutes)
96
+ else:
97
+ task.next_retry_at = None
98
+ task.save()
99
+
100
+
101
+ def task_schedule_to_retry(task):
102
+ task.status = abd_models.TaskAttempt.Status.PENDING
103
+ task.save()
104
+
105
+
106
+ def task_create_or_get_pending(
107
+ block_number: int,
108
+ executable_path: str,
109
+ args: dict[str, Any] | None = None,
110
+ ) -> tuple[abd_models.TaskAttempt, bool]:
111
+ """
112
+ Create or get a pending task attempt.
113
+ Returns (task, created) where created indicates if a new task was created.
114
+
115
+ For failed tasks that can retry:
116
+ - If next_retry_at is in the future, leave task as FAILED (will be picked up by scheduler)
117
+ - If next_retry_at is in the past or None, reset to PENDING for immediate execution
118
+ """
119
+ if args is None:
120
+ args = {}
121
+
122
+ args_json = abd_utils.serialize_args(args)
123
+
124
+ with transaction.atomic():
125
+ task, created = abd_models.TaskAttempt.objects.get_or_create(
126
+ block_number=block_number,
127
+ executable_path=executable_path,
128
+ args_json=args_json,
129
+ defaults={"status": abd_models.TaskAttempt.Status.PENDING},
130
+ )
131
+
132
+ # Don't modify tasks that are already in a terminal or active state
133
+ active_state = {abd_models.TaskAttempt.Status.SUCCESS, abd_models.TaskAttempt.Status.RUNNING}
134
+ if created or task.status in active_state:
135
+ return task, created
136
+
137
+ # For failed tasks that can retry, only reset to PENDING if retry time has passed
138
+ if task.status == abd_models.TaskAttempt.Status.FAILED and task_can_retry(task):
139
+ now = timezone.now()
140
+ if task.next_retry_at is None or task.next_retry_at <= now:
141
+ task.status = abd_models.TaskAttempt.Status.PENDING
142
+ task.save()
143
+ return task, created
144
+
145
+
146
+ def get_the_latest_executed_block_number() -> int | None:
147
+ qs = abd_models.TaskAttempt.objects.order_by("-block_number").first()
148
+ if qs:
149
+ return qs.block_number
150
+ return None
@@ -0,0 +1,105 @@
1
+ import abc
2
+ from collections.abc import Callable
3
+ from dataclasses import dataclass, field
4
+ from typing import Any
5
+
6
+ import structlog
7
+ from celery import Task
8
+
9
+ from abstract_block_dumper.exceptions import ConditionEvaluationError
10
+
11
+ logger = structlog.getLogger(__name__)
12
+
13
+
14
+ @dataclass
15
+ class RegistryItem:
16
+ condition: Callable[..., bool]
17
+ function: Task
18
+ args: list[dict[str, Any]] | None = None
19
+ backfilling_lookback: int | None = None
20
+ celery_kwargs: dict[str, Any] = field(default_factory=dict)
21
+
22
+ def match_condition(self, block_number: int, **kwargs) -> bool:
23
+ """
24
+ Check if condition matches for given block and arguments
25
+ """
26
+ try:
27
+ return self.condition(block_number, **kwargs)
28
+ except Exception as e:
29
+ logger.error(
30
+ "Condition evaluation failed",
31
+ condition=self.function.__name__,
32
+ block_number=block_number,
33
+ exc_info=True,
34
+ )
35
+ raise ConditionEvaluationError(f"Failed to evaluate condition: {e}") from e
36
+
37
+ def get_execution_args(self) -> list[dict[str, Any]]:
38
+ """
39
+ Get list of argument sets for execution
40
+ """
41
+ return self.args or [{}]
42
+
43
+ @property
44
+ def executable_path(self) -> str:
45
+ """
46
+ Get the importable path to the function.
47
+ """
48
+ if hasattr(self.function, "name") and self.function.name is not None:
49
+ return self.function.name
50
+
51
+ return ".".join([self.function.__module__, self.function.__name__])
52
+
53
+ def requires_backfilling(self) -> bool:
54
+ """
55
+ Check if this item requires backfilling.
56
+ """
57
+ return self.backfilling_lookback is not None
58
+
59
+
60
+ class BaseRegistry(abc.ABC):
61
+ @abc.abstractmethod
62
+ def register_item(self, item: RegistryItem) -> None:
63
+ pass
64
+
65
+ @abc.abstractmethod
66
+ def get_functions(self) -> list[RegistryItem]:
67
+ pass
68
+
69
+ @abc.abstractmethod
70
+ def clear(self) -> None:
71
+ pass
72
+
73
+ @abc.abstractmethod
74
+ def get_by_executable_path(self, executable_path: str) -> RegistryItem | None:
75
+ pass
76
+
77
+
78
+ class MemoryRegistry(BaseRegistry):
79
+ _functions: list[RegistryItem] = []
80
+
81
+ def register_item(self, item: RegistryItem) -> None:
82
+ self._functions.append(item)
83
+ logger.info(
84
+ "Registered function",
85
+ function_name=item.function.__name__,
86
+ executable_path=item.executable_path,
87
+ args=item.args,
88
+ backfilling_lookback=item.backfilling_lookback,
89
+ )
90
+
91
+ def get_functions(self) -> list[RegistryItem]:
92
+ return self._functions
93
+
94
+ def clear(self) -> None:
95
+ self._functions = []
96
+
97
+ def get_by_executable_path(self, executable_path: str) -> RegistryItem:
98
+ for registry_item in self.get_functions():
99
+ if registry_item.executable_path == executable_path:
100
+ return registry_item
101
+ # TODO: Improve this
102
+ raise Exception("Function Not Found")
103
+
104
+
105
+ task_registry = MemoryRegistry()
@@ -0,0 +1,211 @@
1
+ from collections.abc import Callable
2
+ from typing import Any, cast
3
+
4
+ import structlog
5
+ from celery import Task, shared_task
6
+ from django.db import OperationalError, transaction
7
+
8
+ import abstract_block_dumper.dal.django_dal as abd_dal
9
+ import abstract_block_dumper.services.utils as abd_utils
10
+ from abstract_block_dumper.dal.memory_registry import RegistryItem, task_registry
11
+ from abstract_block_dumper.exceptions import CeleryTaskLocked
12
+ from abstract_block_dumper.models import TaskAttempt
13
+
14
+ logger = structlog.get_logger(__name__)
15
+
16
+
17
+ def schedule_retry(task_attempt: TaskAttempt) -> None:
18
+ """
19
+ Schedule a retry for a failed task by calling the decorated Celery task directly.
20
+
21
+ Task must already be in FAILED state with next_retry_at set by mark_failed()
22
+ """
23
+
24
+ if not task_attempt.next_retry_at:
25
+ logger.error(
26
+ "Cannot schedule retry without next_retry_at",
27
+ task_id=task_attempt.id,
28
+ block_number=task_attempt.block_number,
29
+ executable_path=task_attempt.executable_path,
30
+ )
31
+
32
+ if task_attempt.status != TaskAttempt.Status.FAILED:
33
+ logger.warning(
34
+ "Attempted to schedule retry for non-failed task",
35
+ task_id=task_attempt.id,
36
+ status=task_attempt.status,
37
+ )
38
+ return
39
+
40
+ logger.info(
41
+ "Scheduling retry",
42
+ task_id=task_attempt.id,
43
+ attempt_count=task_attempt.attempt_count,
44
+ next_retry_at=task_attempt.next_retry_at,
45
+ )
46
+
47
+ abd_dal.task_schedule_to_retry(task_attempt)
48
+
49
+ celery_task = task_registry.get_by_executable_path(task_attempt.executable_path)
50
+ if not celery_task:
51
+ logger.error(
52
+ "Cannot schedule retry - task not found in registry",
53
+ executable_path=task_attempt.executable_path,
54
+ )
55
+ return
56
+
57
+ celery_task.function.apply_async(
58
+ kwargs={
59
+ "block_number": task_attempt.block_number,
60
+ **task_attempt.args_dict,
61
+ },
62
+ eta=task_attempt.next_retry_at,
63
+ )
64
+
65
+
66
+ def _celery_task_wrapper(func, block_number: int, **kwargs) -> dict[str, Any] | None:
67
+ executable_path = abd_utils.get_executable_path(func)
68
+
69
+ with transaction.atomic():
70
+ try:
71
+ task_attempt = TaskAttempt.objects.select_for_update(nowait=True).get(
72
+ block_number=block_number,
73
+ executable_path=executable_path,
74
+ args_json=abd_utils.serialize_args(kwargs),
75
+ )
76
+ except TaskAttempt.DoesNotExist:
77
+ logger.warning(
78
+ "TaskAttempt not found - task may have been canceled directly",
79
+ block_number=block_number,
80
+ executable_path=executable_path,
81
+ )
82
+ raise CeleryTaskLocked("TaskAttempt not found - task may have been canceled directly")
83
+ except OperationalError as e:
84
+ logger.info(
85
+ "Task already being processed by another worker",
86
+ block_number=block_number,
87
+ executable_path=executable_path,
88
+ operational_error=str(e),
89
+ )
90
+ raise CeleryTaskLocked("Task already being processed by another worker")
91
+
92
+ if task_attempt.status != TaskAttempt.Status.PENDING:
93
+ logger.info(
94
+ "Task already processed",
95
+ task_id=task_attempt.id,
96
+ status=task_attempt.status,
97
+ )
98
+ return None
99
+
100
+ abd_dal.task_mark_as_started(task_attempt, abd_utils.get_current_celery_task_id())
101
+
102
+ # Start task execution
103
+ try:
104
+ execution_kwargs = {"block_number": block_number, **kwargs}
105
+ logger.info(
106
+ "Starting task execution",
107
+ task_id=task_attempt.id,
108
+ block_number=block_number,
109
+ executable_path=executable_path,
110
+ celery_task_id=task_attempt.celery_task_id,
111
+ execution_kwargs=execution_kwargs,
112
+ )
113
+
114
+ result = func(**execution_kwargs)
115
+
116
+ abd_dal.task_mark_as_success(task_attempt, result)
117
+
118
+ logger.info("Task completed successfully", task_id=task_attempt.id)
119
+ return {"result": result}
120
+ except Exception as e:
121
+ logger.error(
122
+ "Task execution failed",
123
+ task_id=task_attempt.id,
124
+ error_type=type(e).__name__,
125
+ exc_info=True,
126
+ )
127
+ abd_dal.task_mark_as_failed(task_attempt)
128
+
129
+ # Schedule retry after transaction commits:
130
+ if abd_dal.task_can_retry(task_attempt):
131
+ try:
132
+ schedule_retry(task_attempt)
133
+ except Exception:
134
+ logger.error(
135
+ "Failed to schedule retry",
136
+ task_id=task_attempt.id,
137
+ exc_info=True,
138
+ )
139
+ return None
140
+
141
+
142
+ def block_task(
143
+ condition: Callable[..., bool],
144
+ args: list[dict[str, Any]] | None = None,
145
+ backfilling_lookback: int | None = None,
146
+ celery_kwargs: dict[str, Any] | None = None,
147
+ ) -> Callable[..., Any]:
148
+ """
149
+ Decorator for registering block tasks.
150
+
151
+ Args:
152
+ condition: Lambda function that determines when to execute
153
+ args: List of argument dictionaries for multi-execution
154
+ backfilling_lookback: Number of blocks to backfill
155
+ celery_kwargs: Additional Celery task parameters
156
+
157
+ Examples:
158
+ @block_task(
159
+ condition=lambda bn: bn % 100 == 0
160
+ )
161
+ def simple_task(block_number: int):
162
+ pass
163
+
164
+ @block_task(
165
+ condition=lambda bn, netuid: bn + netuid % 100 == 0,
166
+ args=[{"netuid": 3}, {"netuid": 22}],
167
+ backfilling_lookback=300,
168
+ celery_kwargs={"queue": "high-priority"}
169
+ )
170
+ def multi_netuid_task(block_number: int, netuid: int):
171
+ pass
172
+
173
+ """
174
+
175
+ def decorator(func: Callable[..., Any]) -> Any:
176
+ if not callable(condition):
177
+ raise ValueError("condition must be a callable.")
178
+
179
+ # Celery task wrapper
180
+ def shared_celery_task(block_number: int, **kwargs) -> None | Any:
181
+ """
182
+ Wrapper that handles TaskAttempt tracking and executed the original
183
+ function
184
+
185
+ This entire wrapper becomes a Celery task.
186
+ """
187
+ return _celery_task_wrapper(func, block_number, **kwargs)
188
+
189
+ # Wrap with celery shared_task
190
+ celery_task = shared_task(
191
+ name=abd_utils.get_executable_path(func),
192
+ bind=False,
193
+ **celery_kwargs or {},
194
+ )(shared_celery_task)
195
+
196
+ # Store original function referefence for introspection
197
+ celery_task._original_func = func
198
+
199
+ # Register the Celery task
200
+ task_registry.register_item(
201
+ RegistryItem(
202
+ condition=condition,
203
+ function=cast(Task, celery_task),
204
+ args=args,
205
+ backfilling_lookback=backfilling_lookback,
206
+ celery_kwargs=celery_kwargs or {},
207
+ )
208
+ )
209
+ return celery_task
210
+
211
+ return decorator
@@ -0,0 +1,24 @@
1
+ import importlib
2
+
3
+ import structlog
4
+
5
+ logger = structlog.get_logger(__name__)
6
+
7
+
8
+ def ensure_modules_loaded() -> None:
9
+ """
10
+ Ensure common tasks modules are imported to trigger @block_task registration.
11
+
12
+ @block_task must be loaded, otherwise it won't be registered.
13
+ """
14
+ from django.apps import apps
15
+
16
+ for app_config in apps.get_app_configs():
17
+ for module_suffix in ["tasks", "block_tasks"]:
18
+ try:
19
+ importlib.import_module(f"{app_config.name}.{module_suffix}")
20
+ except ModuleNotFoundError:
21
+ continue
22
+ except ImportError as e:
23
+ logger.warning(f"Failed to import {app_config.name}.{module_suffix}: {e}")
24
+ continue
@@ -0,0 +1,16 @@
1
+ class AbstractBlockDumperError(Exception):
2
+ """Base exception for all Abstract Block Dumper errors."""
3
+
4
+ pass
5
+
6
+
7
+ class ConditionEvaluationError(AbstractBlockDumperError):
8
+ """Condition failed to evaluate."""
9
+
10
+ pass
11
+
12
+
13
+ class CeleryTaskLocked(Exception):
14
+ """Celery task execution is locked"""
15
+
16
+ pass
File without changes
@@ -0,0 +1,19 @@
1
+ from django.core.management.base import BaseCommand
2
+
3
+ from abstract_block_dumper.dal.memory_registry import task_registry
4
+ from abstract_block_dumper.discovery import ensure_modules_loaded
5
+ from abstract_block_dumper.services.scheduler import task_scheduler_factory
6
+
7
+
8
+ class Command(BaseCommand):
9
+ help = "Run the block scheduler daemon."
10
+
11
+ def handle(self, *args, **options) -> None:
12
+ self.stdout.write("Syncing decorated functions...")
13
+ ensure_modules_loaded()
14
+ functions_counter = len(task_registry.get_functions())
15
+ self.stdout.write(self.style.SUCCESS(f"Synced {functions_counter} functions"))
16
+
17
+ scheduler = task_scheduler_factory()
18
+ self.stdout.write("Starting block scheduler...")
19
+ scheduler.start()