PyPI - horsies - Versions diffs - 0.1.0a1__py3-none-any.whl - Mend

horsies 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

horsies/__init__.py +115 -0
horsies/core/__init__.py +0 -0
horsies/core/app.py +552 -0
horsies/core/banner.py +144 -0
horsies/core/brokers/__init__.py +5 -0
horsies/core/brokers/listener.py +444 -0
horsies/core/brokers/postgres.py +864 -0
horsies/core/cli.py +624 -0
horsies/core/codec/serde.py +575 -0
horsies/core/errors.py +535 -0
horsies/core/logging.py +90 -0
horsies/core/models/__init__.py +0 -0
horsies/core/models/app.py +268 -0
horsies/core/models/broker.py +79 -0
horsies/core/models/queues.py +23 -0
horsies/core/models/recovery.py +101 -0
horsies/core/models/schedule.py +229 -0
horsies/core/models/task_pg.py +307 -0
horsies/core/models/tasks.py +332 -0
horsies/core/models/workflow.py +1988 -0
horsies/core/models/workflow_pg.py +245 -0
horsies/core/registry/tasks.py +101 -0
horsies/core/scheduler/__init__.py +26 -0
horsies/core/scheduler/calculator.py +267 -0
horsies/core/scheduler/service.py +569 -0
horsies/core/scheduler/state.py +260 -0
horsies/core/task_decorator.py +615 -0
horsies/core/types/status.py +38 -0
horsies/core/utils/imports.py +203 -0
horsies/core/utils/loop_runner.py +44 -0
horsies/core/worker/current.py +17 -0
horsies/core/worker/worker.py +1967 -0
horsies/core/workflows/__init__.py +23 -0
horsies/core/workflows/engine.py +2344 -0
horsies/core/workflows/recovery.py +501 -0
horsies/core/workflows/registry.py +97 -0
horsies/py.typed +0 -0
horsies-0.1.0a1.dist-info/METADATA +31 -0
horsies-0.1.0a1.dist-info/RECORD +42 -0
horsies-0.1.0a1.dist-info/WHEEL +5 -0
horsies-0.1.0a1.dist-info/entry_points.txt +2 -0
horsies-0.1.0a1.dist-info/top_level.txt +1 -0

horsies/core/models/workflow_pg.py ADDED Viewed

@@ -0,0 +1,245 @@
+"""SQLAlchemy models for workflow persistence."""
+from __future__ import annotations
+from datetime import datetime, timezone
+from typing import Optional
+from sqlalchemy import String, Text, Integer, DateTime, ForeignKey, UniqueConstraint
+from sqlalchemy.dialects.postgresql import ARRAY, JSONB
+from sqlalchemy.orm import Mapped, mapped_column
+from horsies.core.models.task_pg import Base
+class WorkflowModel(Base):
+    """
+    SQLAlchemy model for workflow instances.
+    Tracks the overall state of a workflow execution, including:
+    - Current status (PENDING, RUNNING, COMPLETED, FAILED, PAUSED, CANCELLED)
+    - Error handling policy (fail or pause on task error)
+    - Explicit output task (if specified)
+    - Final result and any errors
+    - Parent workflow relationship (for nested/subworkflows)
+    """
+    __tablename__ = 'horsies_workflows'
+    # Primary key
+    id: Mapped[str] = mapped_column(
+        String(36), primary_key=True
+    )  # UUID stored as string for consistency with tasks
+    # Workflow metadata
+    name: Mapped[str] = mapped_column(String(255), nullable=False)
+    status: Mapped[str] = mapped_column(
+        String(50), nullable=False, default='PENDING', index=True
+    )
+    on_error: Mapped[str] = mapped_column(String(50), nullable=False, default='fail')
+    # Output task configuration
+    output_task_index: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
+    # Success policy (serialized as JSONB with task indices)
+    # Format: {"cases": [{"required_indices": [0, 2]}], "optional_indices": [1]}
+    success_policy: Mapped[Optional[dict[str, list[int]]]] = mapped_column(
+        JSONB, nullable=True
+    )
+    # Results and errors
+    result: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
+    error: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
+    # -------------------------------------------------------------------------
+    # Workflow definition identity (for import-based recovery/conditions)
+    # -------------------------------------------------------------------------
+    workflow_def_module: Mapped[Optional[str]] = mapped_column(
+        String(512), nullable=True
+    )
+    workflow_def_qualname: Mapped[Optional[str]] = mapped_column(
+        String(512), nullable=True
+    )
+    # -------------------------------------------------------------------------
+    # Subworkflow support: parent-child relationship
+    # -------------------------------------------------------------------------
+    # Parent workflow (if this is a subworkflow)
+    parent_workflow_id: Mapped[Optional[str]] = mapped_column(
+        String(36),
+        ForeignKey('horsies_workflows.id', ondelete='CASCADE'),
+        nullable=True,
+        index=True,
+    )
+    # Index of the SubWorkflowNode in the parent workflow
+    parent_task_index: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
+    # Nesting depth (0 = root, 1 = child, 2 = grandchild, etc.)
+    depth: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
+    # Root workflow ID for efficient queries across nesting levels
+    root_workflow_id: Mapped[Optional[str]] = mapped_column(
+        String(36), nullable=True, index=True
+    )
+    # -------------------------------------------------------------------------
+    # Timestamps
+    # -------------------------------------------------------------------------
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), nullable=False, default=datetime.now(timezone.utc)
+    )
+    started_at: Mapped[Optional[datetime]] = mapped_column(
+        DateTime(timezone=True), nullable=True
+    )
+    completed_at: Mapped[Optional[datetime]] = mapped_column(
+        DateTime(timezone=True), nullable=True
+    )
+    updated_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True),
+        nullable=False,
+        default=datetime.now(timezone.utc),
+        onupdate=datetime.now(timezone.utc),
+        index=True,
+    )
+class WorkflowTaskModel(Base):
+    """
+    SQLAlchemy model for workflow task nodes.
+    Represents a single task within a workflow DAG, including:
+    - Task specification (name, args, kwargs, queue, priority)
+    - Dependencies (array of task indices this task waits for)
+    - Data flow configuration (args_from mapping, workflow_ctx_from)
+    - Execution state and result
+    - Link to actual task in tasks table once enqueued
+    """
+    __tablename__ = 'horsies_workflow_tasks'
+    # Primary key
+    id: Mapped[str] = mapped_column(String(36), primary_key=True)
+    # Workflow reference
+    workflow_id: Mapped[str] = mapped_column(
+        String(36),
+        ForeignKey('horsies_workflows.id', ondelete='CASCADE'),
+        nullable=False,
+        index=True,
+    )
+    # Position in workflow
+    task_index: Mapped[int] = mapped_column(Integer, nullable=False)
+    node_id: Mapped[Optional[str]] = mapped_column(
+        String(128), nullable=True, index=True
+    )
+    # Task specification
+    task_name: Mapped[str] = mapped_column(String(255), nullable=False)
+    task_args: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
+    task_kwargs: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
+    queue_name: Mapped[str] = mapped_column(
+        String(100), nullable=False, default='default'
+    )
+    priority: Mapped[int] = mapped_column(Integer, nullable=False, default=100)
+    # DAG structure: indices of tasks this task waits for
+    dependencies: Mapped[list[int]] = mapped_column(
+        ARRAY(Integer), nullable=False, default=[]
+    )
+    # Data flow: {"kwarg_name": task_index, ...}
+    args_from: Mapped[Optional[dict[str, int]]] = mapped_column(JSONB, nullable=True)
+    # Context injection: node_ids to include in WorkflowContext
+    workflow_ctx_from: Mapped[Optional[list[str]]] = mapped_column(
+        ARRAY(String), nullable=True
+    )
+    # If True, task runs even if dependencies failed (receives failed TaskResults)
+    allow_failed_deps: Mapped[bool] = mapped_column(default=False, nullable=False)
+    # Join semantics: "all" (default), "any", or "quorum"
+    join_type: Mapped[str] = mapped_column(String(10), nullable=False, default='all')
+    # For join_type="quorum": minimum number of dependencies that must succeed
+    min_success: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
+    # Task options (retry policy, auto_retry_for, etc.) - serialized JSON
+    task_options: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
+    # Execution state
+    status: Mapped[str] = mapped_column(
+        String(50), nullable=False, default='PENDING', index=True
+    )
+    # Link to actual task once enqueued (for TaskNode only)
+    task_id: Mapped[Optional[str]] = mapped_column(
+        String(36), nullable=True, index=True
+    )
+    # -------------------------------------------------------------------------
+    # SubWorkflowNode support
+    # -------------------------------------------------------------------------
+    # True if this node is a SubWorkflowNode (not a TaskNode)
+    is_subworkflow: Mapped[bool] = mapped_column(default=False, nullable=False)
+    # Link to child workflow (for SubWorkflowNode)
+    sub_workflow_id: Mapped[Optional[str]] = mapped_column(
+        String(36),
+        ForeignKey('horsies_workflows.id', ondelete='SET NULL'),
+        nullable=True,
+        index=True,
+    )
+    # Child workflow definition name (for SubWorkflowNode)
+    sub_workflow_name: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
+    # Import path for subworkflow definition (fallback if registry not loaded)
+    sub_workflow_module: Mapped[Optional[str]] = mapped_column(
+        String(512), nullable=True
+    )
+    sub_workflow_qualname: Mapped[Optional[str]] = mapped_column(
+        String(512), nullable=True
+    )
+    # Retry mode for subworkflow (rerun_failed_only, rerun_all, no_rerun)
+    sub_workflow_retry_mode: Mapped[Optional[str]] = mapped_column(
+        String(50), nullable=True
+    )
+    # Summary of subworkflow execution (serialized SubWorkflowSummary)
+    sub_workflow_summary: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
+    # -------------------------------------------------------------------------
+    # Results and errors
+    # -------------------------------------------------------------------------
+    result: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
+    error: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
+    # Timestamps
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), nullable=False, default=datetime.now(timezone.utc)
+    )
+    started_at: Mapped[Optional[datetime]] = mapped_column(
+        DateTime(timezone=True), nullable=True
+    )
+    completed_at: Mapped[Optional[datetime]] = mapped_column(
+        DateTime(timezone=True), nullable=True
+    )
+    # Unique constraint: one task per index per workflow
+    __table_args__ = (
+        UniqueConstraint('workflow_id', 'task_index', name='uq_horsies_workflow_task_index'),
+    )
+# Note: The following index should be created via raw SQL for optimal performance:
+# CREATE INDEX IF NOT EXISTS idx_horsies_workflow_tasks_deps ON horsies_workflow_tasks USING GIN(dependencies);
+# This is handled in the broker's schema initialization.

horsies/core/registry/tasks.py ADDED Viewed

@@ -0,0 +1,101 @@
+# app/core/registry/tasks.py
+from __future__ import annotations
+from typing import Dict, Iterator, MutableMapping, Generic, TypeVar
+from horsies.core.errors import RegistryError, ErrorCode
+T = TypeVar('T')
+class NotRegistered(RegistryError):
+    """Raised when a task name is not present in the registry."""
+    def __init__(self, task_name: str) -> None:
+        super().__init__(
+            message=f"task '{task_name}' not registered",
+            code=ErrorCode.TASK_NOT_REGISTERED,
+            notes=[f"requested task: '{task_name}'"],
+            help_text='ensure the task is defined with @app.task() before use\nor make sure that task is discovered by the app',
+        )
+        self.task_name = task_name
+class DuplicateTaskNameError(RegistryError):
+    """Raised when a task name is registered more than once within the same app."""
+    def __init__(self, task_name: str, context: str = '') -> None:
+        super().__init__(
+            message=f"duplicate task name '{task_name}'",
+            code=ErrorCode.TASK_DUPLICATE_NAME,
+            notes=[context] if context else [],
+            help_text='each task name must be unique within a horsies instance',
+        )
+        self.task_name = task_name
+class TaskRegistry(MutableMapping[str, T], Generic[T]):
+    """Registry mapping task name -> task object.
+    Tracks source locations to detect duplicate registrations:
+    - Same name + same source: silently skip (re-import scenario)
+    - Same name + different source: raise DuplicateTaskNameError
+    """
+    def __init__(self, initial: Dict[str, T] | None = None) -> None:
+        self._data: Dict[str, T] = dict(initial or {})
+        self._sources: Dict[str, str] = {}  # task_name -> "file:lineno"
+    def __getitem__(self, key: str) -> T:
+        try:
+            return self._data[key]
+        except KeyError:
+            raise NotRegistered(key)
+    def __setitem__(self, key: str, value: T) -> None:
+        """Discourage direct assignment; enforce uniqueness like register()."""
+        if key in self._data:
+            raise DuplicateTaskNameError(key, 'detected via direct assignment')
+        self._data[key] = value
+    def __delitem__(self, key: str) -> None:
+        del self._data[key]
+        self._sources.pop(key, None)
+    def __iter__(self) -> Iterator[str]:
+        return iter(self._data)
+    def __len__(self) -> int:
+        return len(self._data)
+    # --- convenience ---
+    def register(self, task: T, *, name: str, source: str | None = None) -> T:
+        """Insert a task under `name`, enforcing uniqueness per app.
+        Args:
+            task: The task object to register.
+            name: The unique name for the task.
+            source: Optional source location string (e.g., "file.py:42").
+                    Used to detect re-imports vs. true duplicates.
+        Returns:
+            The registered task (existing if re-import, new otherwise).
+        Raises:
+            DuplicateTaskNameError: If same name registered from different source.
+        """
+        if name in self._data:
+            existing_source = self._sources.get(name)
+            if existing_source and source and existing_source == source:
+                # Same source location - this is a re-import, skip silently
+                return self._data[name]
+            raise DuplicateTaskNameError(name, 'task with this name already exists')
+        self._data[name] = task
+        if source:
+            self._sources[name] = source
+        return task
+    def unregister(self, name: str) -> None:
+        self._data.pop(name, None)
+        self._sources.pop(name, None)
+    def keys_list(self) -> list[str]:
+        return list(self._data.keys())

horsies/core/scheduler/__init__.py ADDED Viewed

@@ -0,0 +1,26 @@
+# horsies/core/scheduler/__init__.py
+"""
+Scheduler module for executing scheduled tasks.
+Main components:
+- Scheduler: Main service for running scheduled tasks
+- ScheduleStateManager: Database state management
+- calculate_next_run: Next run time calculation
+Example usage:
+    from horsies.core.scheduler import Scheduler
+    scheduler = Scheduler(app)
+    await scheduler.run_forever()
+"""
+from horsies.core.scheduler.service import Scheduler
+from horsies.core.scheduler.state import ScheduleStateManager
+from horsies.core.scheduler.calculator import calculate_next_run, should_run_now
+__all__ = [
+    'Scheduler',
+    'ScheduleStateManager',
+    'calculate_next_run',
+    'should_run_now',
+]

horsies/core/scheduler/calculator.py ADDED Viewed

@@ -0,0 +1,267 @@
+# horsies/core/scheduler/calculator.py
+from __future__ import annotations
+from datetime import datetime, timedelta, timezone
+from zoneinfo import ZoneInfo
+from typing import Optional
+from horsies.core.models.schedule import (
+    SchedulePattern,
+    IntervalSchedule,
+    HourlySchedule,
+    DailySchedule,
+    WeeklySchedule,
+    MonthlySchedule,
+    Weekday,
+)
+def calculate_next_run(
+    pattern: SchedulePattern, from_time: datetime, tz_str: str = 'UTC'
+) -> datetime:
+    """
+    Calculate the next run time for a schedule pattern.
+    Args:
+        pattern: Schedule pattern (interval, hourly, daily, weekly, monthly)
+        from_time: Calculate next run after this time (should be UTC-aware)
+        tz_str: Timezone for schedule evaluation (e.g., "UTC", "America/New_York")
+    Returns:
+        Next run time as UTC-aware datetime
+    Raises:
+        ValueError: If timezone is invalid or pattern type is unknown
+    """
+    # Ensure from_time is UTC-aware
+    if from_time.tzinfo is None:
+        raise ValueError('from_time must be timezone-aware')
+    # Validate timezone string
+    try:
+        tz = ZoneInfo(tz_str)
+    except Exception as e:
+        raise ValueError(f"Invalid timezone '{tz_str}': {e}")
+    # Convert from_time to target timezone for schedule calculations
+    local_time = from_time.astimezone(tz)
+    # Calculate next run based on pattern type (exhaustive match-case)
+    match pattern:
+        case IntervalSchedule():
+            next_run = _calculate_interval(pattern, from_time)
+        case HourlySchedule():
+            next_run = _calculate_hourly(pattern, local_time, tz)
+        case DailySchedule():
+            next_run = _calculate_daily(pattern, local_time, tz)
+        case WeeklySchedule():
+            next_run = _calculate_weekly(pattern, local_time, tz)
+        case MonthlySchedule():
+            next_run = _calculate_monthly(pattern, local_time, tz)
+    # Ensure result is UTC-aware
+    if next_run.tzinfo is None:
+        raise RuntimeError('Calculated next_run is not timezone-aware')
+    return next_run.astimezone(timezone.utc)
+def _calculate_interval(pattern: IntervalSchedule, from_time: datetime) -> datetime:
+    """Calculate next run for interval-based schedule."""
+    total_seconds = pattern.total_seconds()
+    next_run = from_time + timedelta(seconds=total_seconds)
+    return next_run
+def _calculate_hourly(
+    pattern: HourlySchedule, local_time: datetime, tz: ZoneInfo
+) -> datetime:
+    """Calculate next run for hourly schedule."""
+    # Start with current hour at the target minute/second
+    candidate = local_time.replace(
+        minute=pattern.minute, second=pattern.second, microsecond=0
+    )
+    # If we've already passed this time in the current hour, move to next hour
+    if candidate <= local_time:
+        candidate = candidate + timedelta(hours=1)
+    return candidate
+def _calculate_daily(
+    pattern: DailySchedule, local_time: datetime, tz: ZoneInfo
+) -> datetime:
+    """Calculate next run for daily schedule, tolerating DST transitions."""
+    for day_offset in (0, 1, 2):  # try today, tomorrow, day after (in case of DST gaps)
+        candidate_date = local_time + timedelta(days=day_offset)
+        try:
+            candidate = candidate_date.replace(
+                hour=pattern.time.hour,
+                minute=pattern.time.minute,
+                second=pattern.time.second,
+                microsecond=0,
+            )
+        except Exception:
+            continue  # invalid local time (e.g., DST gap), try next day
+        if candidate <= local_time:
+            continue
+        return candidate
+    # Fallback: roll forward one more day if all attempts failed
+    return (local_time + timedelta(days=1)).replace(
+        hour=pattern.time.hour,
+        minute=pattern.time.minute,
+        second=pattern.time.second,
+        microsecond=0,
+    )
+def _calculate_weekly(
+    pattern: WeeklySchedule, local_time: datetime, tz: ZoneInfo
+) -> datetime:
+    """Calculate next run for weekly schedule, tolerating DST transitions."""
+    # Map Weekday enum to Python weekday() values (0=Monday, 6=Sunday)
+    weekday_map = {
+        Weekday.MONDAY: 0,
+        Weekday.TUESDAY: 1,
+        Weekday.WEDNESDAY: 2,
+        Weekday.THURSDAY: 3,
+        Weekday.FRIDAY: 4,
+        Weekday.SATURDAY: 5,
+        Weekday.SUNDAY: 6,
+    }
+    target_weekdays = sorted([weekday_map[d] for d in pattern.days])
+    current_weekday = local_time.weekday()
+    # Start with today at the target time
+    try:
+        candidate = local_time.replace(
+            hour=pattern.time.hour,
+            minute=pattern.time.minute,
+            second=pattern.time.second,
+            microsecond=0,
+        )
+    except Exception:
+        candidate = local_time
+    # Find next matching weekday
+    if current_weekday in target_weekdays and candidate > local_time:
+        # Today matches and time hasn't passed yet
+        return candidate
+    # Find next target weekday
+    days_ahead = None
+    for target_day in target_weekdays:
+        if target_day > current_weekday:
+            days_ahead = target_day - current_weekday
+            break
+    # If no future day this week, wrap to first day next week
+    if days_ahead is None:
+        days_ahead = (7 - current_weekday) + target_weekdays[0]
+    candidate = candidate + timedelta(days=days_ahead)
+    # If the target time is invalid/ambiguous (DST), retry on the computed day by rebuilding datetime
+    for _ in range(2):
+        try:
+            adjusted = candidate.replace(
+                hour=pattern.time.hour,
+                minute=pattern.time.minute,
+                second=pattern.time.second,
+                microsecond=0,
+            )
+            return adjusted
+        except Exception:
+            candidate = candidate + timedelta(days=1)
+    return candidate
+def _calculate_monthly(
+    pattern: MonthlySchedule, local_time: datetime, tz: ZoneInfo
+) -> datetime:
+    """Calculate next run for monthly schedule, tolerating missing days and DST."""
+    # Start with current month at the target day and time
+    try:
+        candidate = local_time.replace(
+            day=pattern.day,
+            hour=pattern.time.hour,
+            minute=pattern.time.minute,
+            second=pattern.time.second,
+            microsecond=0,
+        )
+    except ValueError:
+        # Day doesn't exist in current month (e.g., day=31 in February)
+        # Skip to next month
+        candidate = _next_valid_monthly_date(local_time, pattern, tz)
+        return candidate
+    except Exception:
+        # DST-related invalid time: rebuild on the same date ignoring current time component
+        candidate = _next_valid_monthly_date(
+            local_time - timedelta(days=1), pattern, tz
+        )
+        return candidate
+    # If we've already passed this time this month, move to next month
+    if candidate <= local_time:
+        candidate = _next_valid_monthly_date(local_time, pattern, tz)
+    return candidate
+def _next_valid_monthly_date(
+    local_time: datetime, pattern: MonthlySchedule, tz: ZoneInfo
+) -> datetime:
+    """Find next valid monthly date, skipping months where day doesn't exist."""
+    # Start with next month
+    if local_time.month == 12:
+        candidate_year = local_time.year + 1
+        candidate_month = 1
+    else:
+        candidate_year = local_time.year
+        candidate_month = local_time.month + 1
+    # Try up to 12 months ahead to find valid date
+    for _ in range(12):
+        try:
+            candidate = datetime(
+                year=candidate_year,
+                month=candidate_month,
+                day=pattern.day,
+                hour=pattern.time.hour,
+                minute=pattern.time.minute,
+                second=pattern.time.second,
+                microsecond=0,
+                tzinfo=tz,
+            )
+            return candidate
+        except ValueError:
+            # Day doesn't exist in this month, try next month
+            if candidate_month == 12:
+                candidate_year += 1
+                candidate_month = 1
+            else:
+                candidate_month += 1
+    # Should never reach here unless pattern.day is invalid (>31)
+    raise ValueError(
+        f'Could not find valid date for day={pattern.day} within 12 months'
+    )
+def should_run_now(next_run_at: Optional[datetime], check_time: datetime) -> bool:
+    """
+    Determine if a schedule should run at the current check time.
+    Args:
+        next_run_at: Scheduled next run time (UTC-aware)
+        check_time: Current time to check against (UTC-aware)
+    Returns:
+        True if schedule should run now
+    """
+    if next_run_at is None:
+        # First run - should execute
+        return True
+    # Run if next_run_at is at or before current time
+    return next_run_at <= check_time