PyPI - smartify-ai - Versions diffs - 0.1.0__py3-none-any.whl - Mend

smartify-ai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

smartify/__init__.py +3 -0
smartify/agents/__init__.py +0 -0
smartify/agents/adapters/__init__.py +13 -0
smartify/agents/adapters/anthropic.py +253 -0
smartify/agents/adapters/openai.py +289 -0
smartify/api/__init__.py +26 -0
smartify/api/auth.py +352 -0
smartify/api/errors.py +380 -0
smartify/api/events.py +345 -0
smartify/api/server.py +992 -0
smartify/cli/__init__.py +1 -0
smartify/cli/main.py +430 -0
smartify/engine/__init__.py +64 -0
smartify/engine/approval.py +479 -0
smartify/engine/orchestrator.py +1365 -0
smartify/engine/scheduler.py +380 -0
smartify/engine/spark.py +294 -0
smartify/guardrails/__init__.py +22 -0
smartify/guardrails/breakers.py +409 -0
smartify/models/__init__.py +61 -0
smartify/models/grid.py +625 -0
smartify/notifications/__init__.py +22 -0
smartify/notifications/webhook.py +556 -0
smartify/state/__init__.py +46 -0
smartify/state/checkpoint.py +558 -0
smartify/state/resume.py +301 -0
smartify/state/store.py +370 -0
smartify/tools/__init__.py +17 -0
smartify/tools/base.py +196 -0
smartify/tools/builtin/__init__.py +79 -0
smartify/tools/builtin/file.py +464 -0
smartify/tools/builtin/http.py +195 -0
smartify/tools/builtin/shell.py +137 -0
smartify/tools/mcp/__init__.py +33 -0
smartify/tools/mcp/adapter.py +157 -0
smartify/tools/mcp/client.py +334 -0
smartify/tools/mcp/registry.py +130 -0
smartify/validator/__init__.py +0 -0
smartify/validator/validate.py +271 -0
smartify/workspace/__init__.py +5 -0
smartify/workspace/manager.py +248 -0
smartify_ai-0.1.0.dist-info/METADATA +201 -0
smartify_ai-0.1.0.dist-info/RECORD +46 -0
smartify_ai-0.1.0.dist-info/WHEEL +4 -0
smartify_ai-0.1.0.dist-info/entry_points.txt +2 -0
smartify_ai-0.1.0.dist-info/licenses/LICENSE +21 -0

smartify/state/resume.py ADDED Viewed

@@ -0,0 +1,301 @@
+"""Resume manager for recovering incomplete grid runs.
+Handles:
+1. Scanning for incomplete runs on startup
+2. Reconstructing GridRun from checkpoint
+3. Resuming execution from last checkpoint
+4. Background worker for webhook retries
+"""
+import asyncio
+import logging
+from datetime import datetime
+from typing import Any, Dict, List, Optional, TYPE_CHECKING
+import yaml
+import httpx
+from smartify.state.checkpoint import (
+    CheckpointStore,
+    Checkpoint,
+    CheckpointStatus,
+    WebhookRetryJob,
+    WebhookDeliveryStatus,
+    get_checkpoint_store,
+)
+if TYPE_CHECKING:
+    from smartify.engine.orchestrator import Orchestrator, GridRun
+logger = logging.getLogger(__name__)
+class ResumeManager:
+    """Manages run recovery and webhook retry.
+    Usage:
+        manager = ResumeManager(orchestrator)
+        # On startup, resume any incomplete runs
+        await manager.recover_incomplete_runs()
+        # Start background webhook retry worker
+        await manager.start_webhook_worker()
+    """
+    def __init__(
+        self,
+        orchestrator: "Orchestrator",
+        checkpoint_store: Optional[CheckpointStore] = None,
+        webhook_retry_interval: float = 30.0,
+        max_resume_attempts: int = 3,
+    ):
+        self.orchestrator = orchestrator
+        self.store = checkpoint_store or get_checkpoint_store()
+        self.webhook_retry_interval = webhook_retry_interval
+        self.max_resume_attempts = max_resume_attempts
+        self._webhook_worker_task: Optional[asyncio.Task] = None
+        self._shutdown = False
+    async def recover_incomplete_runs(self) -> List[str]:
+        """Scan for and resume incomplete runs.
+        Returns list of run IDs that were resumed.
+        """
+        resumable = self.store.get_resumable_runs()
+        if not resumable:
+            logger.info("No incomplete runs to resume")
+            return []
+        logger.info(f"Found {len(resumable)} incomplete run(s) to resume")
+        resumed = []
+        for checkpoint in resumable:
+            try:
+                if checkpoint.resume_count >= self.max_resume_attempts:
+                    logger.warning(
+                        f"Run {checkpoint.run_id} exceeded max resume attempts "
+                        f"({checkpoint.resume_count}), marking as failed"
+                    )
+                    self.store.mark_failed(
+                        checkpoint.run_id,
+                        f"Exceeded max resume attempts ({self.max_resume_attempts})"
+                    )
+                    continue
+                await self._resume_run(checkpoint)
+                resumed.append(checkpoint.run_id)
+            except Exception as e:
+                logger.error(f"Failed to resume run {checkpoint.run_id}: {e}")
+                self.store.mark_failed(checkpoint.run_id, str(e))
+        return resumed
+    async def _resume_run(self, checkpoint: Checkpoint) -> None:
+        """Resume a single run from checkpoint."""
+        logger.info(
+            f"Resuming run {checkpoint.run_id} "
+            f"(attempt {checkpoint.resume_count + 1}, "
+            f"{len(checkpoint.completed_nodes)} nodes completed)"
+        )
+        # Increment resume count
+        self.store.increment_resume_count(checkpoint.run_id)
+        # Parse grid YAML
+        grid_dict = yaml.safe_load(checkpoint.grid_yaml)
+        # Load grid through orchestrator
+        run = await self.orchestrator.load_grid(
+            source=grid_dict,
+            inputs=checkpoint.inputs,
+        )
+        # Restore context state
+        run.context.outputs = checkpoint.outputs
+        run.context.total_tokens = checkpoint.total_tokens
+        run.context.total_cost = checkpoint.total_cost
+        # Mark completed nodes in scheduler
+        for node_id in checkpoint.completed_nodes:
+            if node_id in run.scheduler.nodes:
+                output = checkpoint.outputs.get(node_id, {})
+                run.scheduler.mark_completed(node_id, output)
+        # Mark failed nodes
+        for node_id in checkpoint.failed_nodes:
+            if node_id in run.scheduler.nodes:
+                run.scheduler.mark_failed(node_id, "Failed in previous run")
+        # Energize and continue execution
+        await self.orchestrator.energize(run)
+        logger.info(f"Resuming execution for run {checkpoint.run_id}")
+        # Execute (this will continue from where we left off)
+        try:
+            await self.orchestrator.execute(run)
+            self.store.mark_completed(checkpoint.run_id)
+        except Exception as e:
+            self.store.mark_failed(checkpoint.run_id, str(e))
+            raise
+    async def start_webhook_worker(self) -> None:
+        """Start the background webhook retry worker."""
+        if self._webhook_worker_task is not None:
+            logger.warning("Webhook worker already running")
+            return
+        self._shutdown = False
+        self._webhook_worker_task = asyncio.create_task(self._webhook_worker_loop())
+        logger.info("Webhook retry worker started")
+    async def stop_webhook_worker(self) -> None:
+        """Stop the webhook retry worker."""
+        self._shutdown = True
+        if self._webhook_worker_task:
+            self._webhook_worker_task.cancel()
+            try:
+                await self._webhook_worker_task
+            except asyncio.CancelledError:
+                pass
+            self._webhook_worker_task = None
+        logger.info("Webhook retry worker stopped")
+    async def _webhook_worker_loop(self) -> None:
+        """Background loop for retrying failed webhooks."""
+        while not self._shutdown:
+            try:
+                await self._process_webhook_retries()
+            except Exception as e:
+                logger.error(f"Webhook worker error: {e}")
+            await asyncio.sleep(self.webhook_retry_interval)
+    async def _process_webhook_retries(self) -> int:
+        """Process pending webhook retries. Returns count processed."""
+        jobs = self.store.get_pending_webhook_jobs(limit=50)
+        if not jobs:
+            return 0
+        logger.debug(f"Processing {len(jobs)} webhook retry job(s)")
+        processed = 0
+        for job in jobs:
+            try:
+                success = await self._deliver_webhook(job)
+                if success:
+                    self.store.mark_webhook_delivered(job.job_id)
+                    logger.info(f"Webhook job {job.job_id} delivered successfully")
+                else:
+                    self.store.mark_webhook_failed(
+                        job.job_id,
+                        "Delivery failed",
+                        retry_delay_seconds=60.0,
+                    )
+                processed += 1
+            except Exception as e:
+                logger.error(f"Error processing webhook job {job.job_id}: {e}")
+                self.store.mark_webhook_failed(job.job_id, str(e))
+        return processed
+    async def _deliver_webhook(self, job: WebhookRetryJob) -> bool:
+        """Attempt to deliver a webhook."""
+        import hashlib
+        import hmac
+        import json
+        body = json.dumps(job.payload)
+        headers = {
+            "Content-Type": "application/json",
+            "User-Agent": "Smartify-Webhook/1.0",
+            "X-Smartify-Event": job.event_type,
+            "X-Smartify-Retry-Attempt": str(job.attempts + 1),
+            **job.headers,
+        }
+        # Add signature if secret configured
+        if job.secret:
+            signature = hmac.new(
+                job.secret.encode('utf-8'),
+                body.encode('utf-8'),
+                hashlib.sha256,
+            ).hexdigest()
+            headers["X-Smartify-Signature"] = f"sha256={signature}"
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.post(
+                    job.webhook_url,
+                    content=body,
+                    headers=headers,
+                    timeout=30.0,
+                )
+                return 200 <= response.status_code < 300
+        except Exception as e:
+            logger.warning(f"Webhook delivery failed: {e}")
+            return False
+    def queue_failed_webhook(
+        self,
+        event_type: str,
+        grid_id: str,
+        webhook_url: str,
+        payload: Dict[str, Any],
+        headers: Dict[str, str],
+        secret: Optional[str] = None,
+        max_attempts: int = 3,
+    ) -> None:
+        """Queue a failed webhook for retry."""
+        self.store.queue_webhook_retry(
+            event_type=event_type,
+            grid_id=grid_id,
+            webhook_url=webhook_url,
+            payload=payload,
+            headers=headers,
+            secret=secret,
+            max_attempts=max_attempts,
+        )
+    def get_queue_stats(self) -> Dict[str, int]:
+        """Get webhook queue statistics."""
+        return self.store.get_queue_stats()
+    async def cleanup(self, days: int = 7) -> Dict[str, int]:
+        """Clean up old data."""
+        webhook_deleted = self.store.cleanup_old_jobs(days=days)
+        return {
+            "webhook_jobs_deleted": webhook_deleted,
+        }
+# Integration helpers
+def create_checkpointed_orchestrator(
+    db_path: str = "smartify_state.db",
+) -> tuple["Orchestrator", "ResumeManager", CheckpointStore]:
+    """Create an orchestrator with checkpoint support.
+    Returns (orchestrator, resume_manager, checkpoint_store) tuple.
+    """
+    from smartify.engine.orchestrator import Orchestrator
+    store = CheckpointStore(db_path)
+    orchestrator = Orchestrator()
+    manager = ResumeManager(orchestrator, store)
+    return orchestrator, manager, store

smartify/state/store.py ADDED Viewed

@@ -0,0 +1,370 @@
+"""State persistence for Smartify runs.
+Provides storage backends for:
+- Run state (grid + execution state)
+- Node outputs
+- Event logs
+"""
+import json
+import logging
+import sqlite3
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, asdict
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+from enum import Enum
+logger = logging.getLogger(__name__)
+class RunStatus(str, Enum):
+    """Status of a grid run."""
+    PENDING = "pending"
+    RUNNING = "running"
+    PAUSED = "paused"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    STOPPED = "stopped"
+@dataclass
+class RunRecord:
+    """Record of a grid run."""
+    run_id: str
+    grid_id: str
+    grid_name: str
+    status: RunStatus
+    created_at: datetime
+    started_at: Optional[datetime] = None
+    completed_at: Optional[datetime] = None
+    total_tokens: int = 0
+    total_cost: float = 0.0
+    error: Optional[str] = None
+    metadata: Optional[Dict[str, Any]] = None
+    def to_dict(self) -> Dict[str, Any]:
+        d = asdict(self)
+        d['status'] = self.status.value
+        d['created_at'] = self.created_at.isoformat() if self.created_at else None
+        d['started_at'] = self.started_at.isoformat() if self.started_at else None
+        d['completed_at'] = self.completed_at.isoformat() if self.completed_at else None
+        return d
+    @classmethod
+    def from_dict(cls, d: Dict[str, Any]) -> "RunRecord":
+        return cls(
+            run_id=d['run_id'],
+            grid_id=d['grid_id'],
+            grid_name=d['grid_name'],
+            status=RunStatus(d['status']),
+            created_at=datetime.fromisoformat(d['created_at']) if d['created_at'] else datetime.now(),
+            started_at=datetime.fromisoformat(d['started_at']) if d.get('started_at') else None,
+            completed_at=datetime.fromisoformat(d['completed_at']) if d.get('completed_at') else None,
+            total_tokens=d.get('total_tokens', 0),
+            total_cost=d.get('total_cost', 0.0),
+            error=d.get('error'),
+            metadata=d.get('metadata'),
+        )
+@dataclass
+class NodeOutput:
+    """Output from a node execution."""
+    run_id: str
+    node_id: str
+    success: bool
+    output: Optional[Dict[str, Any]]
+    error: Optional[str]
+    started_at: datetime
+    completed_at: datetime
+    tokens_used: int = 0
+class StateStore(ABC):
+    """Abstract base class for state storage."""
+    @abstractmethod
+    def save_run(self, run: RunRecord) -> None:
+        """Save or update a run record."""
+        pass
+    @abstractmethod
+    def get_run(self, run_id: str) -> Optional[RunRecord]:
+        """Get a run by ID."""
+        pass
+    @abstractmethod
+    def list_runs(
+        self,
+        status: Optional[RunStatus] = None,
+        grid_id: Optional[str] = None,
+        limit: int = 100,
+    ) -> List[RunRecord]:
+        """List runs with optional filters."""
+        pass
+    @abstractmethod
+    def delete_run(self, run_id: str) -> bool:
+        """Delete a run and its data."""
+        pass
+    @abstractmethod
+    def save_node_output(self, output: NodeOutput) -> None:
+        """Save node output."""
+        pass
+    @abstractmethod
+    def get_node_outputs(self, run_id: str) -> List[NodeOutput]:
+        """Get all node outputs for a run."""
+        pass
+class InMemoryStore(StateStore):
+    """In-memory state store for testing and development."""
+    def __init__(self):
+        self._runs: Dict[str, RunRecord] = {}
+        self._outputs: Dict[str, List[NodeOutput]] = {}
+    def save_run(self, run: RunRecord) -> None:
+        self._runs[run.run_id] = run
+    def get_run(self, run_id: str) -> Optional[RunRecord]:
+        return self._runs.get(run_id)
+    def list_runs(
+        self,
+        status: Optional[RunStatus] = None,
+        grid_id: Optional[str] = None,
+        limit: int = 100,
+    ) -> List[RunRecord]:
+        runs = list(self._runs.values())
+        if status:
+            runs = [r for r in runs if r.status == status]
+        if grid_id:
+            runs = [r for r in runs if r.grid_id == grid_id]
+        # Sort by created_at descending
+        runs.sort(key=lambda r: r.created_at, reverse=True)
+        return runs[:limit]
+    def delete_run(self, run_id: str) -> bool:
+        if run_id in self._runs:
+            del self._runs[run_id]
+            self._outputs.pop(run_id, None)
+            return True
+        return False
+    def save_node_output(self, output: NodeOutput) -> None:
+        if output.run_id not in self._outputs:
+            self._outputs[output.run_id] = []
+        self._outputs[output.run_id].append(output)
+    def get_node_outputs(self, run_id: str) -> List[NodeOutput]:
+        return self._outputs.get(run_id, [])
+class SQLiteStore(StateStore):
+    """SQLite-based persistent state store."""
+    def __init__(self, db_path: str = "smartify_runs.db"):
+        self.db_path = db_path
+        self._init_db()
+    def _init_db(self) -> None:
+        """Initialize database schema."""
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS runs (
+                    run_id TEXT PRIMARY KEY,
+                    grid_id TEXT NOT NULL,
+                    grid_name TEXT NOT NULL,
+                    status TEXT NOT NULL,
+                    created_at TEXT NOT NULL,
+                    started_at TEXT,
+                    completed_at TEXT,
+                    total_tokens INTEGER DEFAULT 0,
+                    total_cost REAL DEFAULT 0.0,
+                    error TEXT,
+                    metadata TEXT
+                )
+            """)
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS node_outputs (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    run_id TEXT NOT NULL,
+                    node_id TEXT NOT NULL,
+                    success INTEGER NOT NULL,
+                    output TEXT,
+                    error TEXT,
+                    started_at TEXT NOT NULL,
+                    completed_at TEXT NOT NULL,
+                    tokens_used INTEGER DEFAULT 0,
+                    FOREIGN KEY (run_id) REFERENCES runs(run_id)
+                )
+            """)
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_runs_status ON runs(status)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_runs_grid ON runs(grid_id)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_outputs_run ON node_outputs(run_id)")
+            conn.commit()
+    def save_run(self, run: RunRecord) -> None:
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("""
+                INSERT OR REPLACE INTO runs
+                (run_id, grid_id, grid_name, status, created_at, started_at,
+                 completed_at, total_tokens, total_cost, error, metadata)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+            """, (
+                run.run_id,
+                run.grid_id,
+                run.grid_name,
+                run.status.value,
+                run.created_at.isoformat(),
+                run.started_at.isoformat() if run.started_at else None,
+                run.completed_at.isoformat() if run.completed_at else None,
+                run.total_tokens,
+                run.total_cost,
+                run.error,
+                json.dumps(run.metadata) if run.metadata else None,
+            ))
+            conn.commit()
+    def get_run(self, run_id: str) -> Optional[RunRecord]:
+        with sqlite3.connect(self.db_path) as conn:
+            conn.row_factory = sqlite3.Row
+            cursor = conn.execute(
+                "SELECT * FROM runs WHERE run_id = ?", (run_id,)
+            )
+            row = cursor.fetchone()
+            if not row:
+                return None
+            return RunRecord(
+                run_id=row['run_id'],
+                grid_id=row['grid_id'],
+                grid_name=row['grid_name'],
+                status=RunStatus(row['status']),
+                created_at=datetime.fromisoformat(row['created_at']),
+                started_at=datetime.fromisoformat(row['started_at']) if row['started_at'] else None,
+                completed_at=datetime.fromisoformat(row['completed_at']) if row['completed_at'] else None,
+                total_tokens=row['total_tokens'],
+                total_cost=row['total_cost'],
+                error=row['error'],
+                metadata=json.loads(row['metadata']) if row['metadata'] else None,
+            )
+    def list_runs(
+        self,
+        status: Optional[RunStatus] = None,
+        grid_id: Optional[str] = None,
+        limit: int = 100,
+    ) -> List[RunRecord]:
+        query = "SELECT * FROM runs WHERE 1=1"
+        params = []
+        if status:
+            query += " AND status = ?"
+            params.append(status.value)
+        if grid_id:
+            query += " AND grid_id = ?"
+            params.append(grid_id)
+        query += " ORDER BY created_at DESC LIMIT ?"
+        params.append(limit)
+        with sqlite3.connect(self.db_path) as conn:
+            conn.row_factory = sqlite3.Row
+            cursor = conn.execute(query, params)
+            runs = []
+            for row in cursor:
+                runs.append(RunRecord(
+                    run_id=row['run_id'],
+                    grid_id=row['grid_id'],
+                    grid_name=row['grid_name'],
+                    status=RunStatus(row['status']),
+                    created_at=datetime.fromisoformat(row['created_at']),
+                    started_at=datetime.fromisoformat(row['started_at']) if row['started_at'] else None,
+                    completed_at=datetime.fromisoformat(row['completed_at']) if row['completed_at'] else None,
+                    total_tokens=row['total_tokens'],
+                    total_cost=row['total_cost'],
+                    error=row['error'],
+                    metadata=json.loads(row['metadata']) if row['metadata'] else None,
+                ))
+            return runs
+    def delete_run(self, run_id: str) -> bool:
+        with sqlite3.connect(self.db_path) as conn:
+            cursor = conn.execute("DELETE FROM runs WHERE run_id = ?", (run_id,))
+            conn.execute("DELETE FROM node_outputs WHERE run_id = ?", (run_id,))
+            conn.commit()
+            return cursor.rowcount > 0
+    def save_node_output(self, output: NodeOutput) -> None:
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute("""
+                INSERT INTO node_outputs
+                (run_id, node_id, success, output, error, started_at, completed_at, tokens_used)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+            """, (
+                output.run_id,
+                output.node_id,
+                1 if output.success else 0,
+                json.dumps(output.output) if output.output else None,
+                output.error,
+                output.started_at.isoformat(),
+                output.completed_at.isoformat(),
+                output.tokens_used,
+            ))
+            conn.commit()
+    def get_node_outputs(self, run_id: str) -> List[NodeOutput]:
+        with sqlite3.connect(self.db_path) as conn:
+            conn.row_factory = sqlite3.Row
+            cursor = conn.execute(
+                "SELECT * FROM node_outputs WHERE run_id = ? ORDER BY started_at",
+                (run_id,)
+            )
+            outputs = []
+            for row in cursor:
+                outputs.append(NodeOutput(
+                    run_id=row['run_id'],
+                    node_id=row['node_id'],
+                    success=bool(row['success']),
+                    output=json.loads(row['output']) if row['output'] else None,
+                    error=row['error'],
+                    started_at=datetime.fromisoformat(row['started_at']),
+                    completed_at=datetime.fromisoformat(row['completed_at']),
+                    tokens_used=row['tokens_used'],
+                ))
+            return outputs
+# Default store factory
+_default_store: Optional[StateStore] = None
+def get_default_store() -> StateStore:
+    """Get or create the default state store."""
+    global _default_store
+    if _default_store is None:
+        _default_store = InMemoryStore()
+    return _default_store
+def set_default_store(store: StateStore) -> None:
+    """Set the default state store."""
+    global _default_store
+    _default_store = store

smartify/tools/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""Smartify tools - mechanism for LLM nodes to interact with the world."""
+from smartify.tools.base import (
+    Tool,
+    ToolResult,
+    ToolRegistry,
+    get_default_registry,
+    register_tool,
+)
+__all__ = [
+    "Tool",
+    "ToolResult",
+    "ToolRegistry",
+    "get_default_registry",
+    "register_tool",
+]