PyPI - auto-workflow - Versions diffs - 0.1.0__py3-none-any.whl - Mend

auto-workflow 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

assets/logo.svg +6524 -0
auto_workflow/__init__.py +40 -0
auto_workflow/__main__.py +9 -0
auto_workflow/artifacts.py +119 -0
auto_workflow/build.py +158 -0
auto_workflow/cache.py +111 -0
auto_workflow/cli.py +78 -0
auto_workflow/config.py +76 -0
auto_workflow/context.py +32 -0
auto_workflow/dag.py +80 -0
auto_workflow/events.py +22 -0
auto_workflow/exceptions.py +42 -0
auto_workflow/execution.py +45 -0
auto_workflow/fanout.py +59 -0
auto_workflow/flow.py +165 -0
auto_workflow/lifecycle.py +16 -0
auto_workflow/logging_middleware.py +191 -0
auto_workflow/metrics_provider.py +35 -0
auto_workflow/middleware.py +59 -0
auto_workflow/py.typed +0 -0
auto_workflow/scheduler.py +362 -0
auto_workflow/secrets.py +45 -0
auto_workflow/task.py +158 -0
auto_workflow/tracing.py +29 -0
auto_workflow/types.py +27 -0
auto_workflow/utils.py +39 -0
auto_workflow-0.1.0.dist-info/LICENSE +674 -0
auto_workflow-0.1.0.dist-info/METADATA +423 -0
auto_workflow-0.1.0.dist-info/RECORD +31 -0
auto_workflow-0.1.0.dist-info/WHEEL +4 -0
auto_workflow-0.1.0.dist-info/entry_points.txt +3 -0

auto_workflow/__init__.py ADDED Viewed

@@ -0,0 +1,40 @@
+"""Public API surface for auto_workflow (MVP scaffolding)."""
+try:  # pragma: no cover - best-effort version exposure
+    from importlib.metadata import version as _pkg_version
+    __version__ = _pkg_version("auto-workflow")
+except Exception:  # pragma: no cover
+    __version__ = "0"
+from .context import get_context
+from .events import subscribe
+from .fanout import fan_out
+from .flow import Flow, flow
+from .scheduler import FailurePolicy
+from .task import TaskDefinition, task
+# Enable structured pretty logging by default unless explicitly disabled via env
+try:  # pragma: no cover - import side-effect
+    import os
+    if os.environ.get("AUTO_WORKFLOW_DISABLE_STRUCTURED_LOGS", "0") not in ("1", "true", "True"):
+        from .logging_middleware import enable_pretty_logging, register_structured_logging
+        register_structured_logging()
+        # Always attach the pretty handler by default
+        enable_pretty_logging(os.environ.get("AUTO_WORKFLOW_LOG_LEVEL", "INFO"))
+except Exception:
+    # Never fail import due to logging setup
+    pass
+__all__ = [
+    "task",
+    "TaskDefinition",
+    "flow",
+    "Flow",
+    "get_context",
+    "fan_out",
+    "FailurePolicy",
+    "subscribe",
+]

auto_workflow/__main__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Module entry point for `python -m auto_workflow`.
+Delegates to the package CLI defined in `auto_workflow.cli`.
+"""
+from .cli import main
+if __name__ == "__main__":  # pragma: no cover
+    raise SystemExit(main())

auto_workflow/artifacts.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""Artifact storage abstraction (MVP in-memory).
+Note: Pickle is only safe in trusted environments. A JSON serializer option is
+available via config `artifact_serializer=json` for JSON-serializable values.
+"""
+from __future__ import annotations
+import uuid
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+from .config import load_config
+@dataclass(slots=True)
+class ArtifactRef:
+    key: str
+class InMemoryArtifactStore:
+    def __init__(self) -> None:
+        self._store: dict[str, Any] = {}
+    def put(self, value: Any) -> ArtifactRef:
+        key = str(uuid.uuid4())
+        self._store[key] = value
+        return ArtifactRef(key)
+    def get(self, ref: ArtifactRef) -> Any:
+        return self._store[ref.key]
+_STORE = InMemoryArtifactStore()
+def get_store() -> InMemoryArtifactStore:
+    cfg = load_config()
+    backend = cfg.get("artifact_store", "memory")
+    if backend == "memory":
+        return _STORE  # type: ignore
+    if backend == "filesystem":
+        root = Path(cfg.get("artifact_store_path", ".aw_artifacts"))
+        root.mkdir(parents=True, exist_ok=True)
+        return FileSystemArtifactStore(root)  # type: ignore
+    return _STORE  # fallback
+class FileSystemArtifactStore(InMemoryArtifactStore):  # simple extension
+    def __init__(self, root: Path) -> None:
+        super().__init__()
+        self.root = root
+        # serializer: "pickle" (default) or "json" for JSON-serializable values
+        self.serializer = load_config().get("artifact_serializer", "pickle")
+    def put(self, value: Any) -> ArtifactRef:  # type: ignore[override]
+        ref = super().put(value)
+        path = self.root / ref.key
+        with path.open("wb") as f:
+            # best-effort file lock (POSIX); on macOS this is fine, Windows would need msvcrt
+            try:
+                import fcntl
+                fcntl.flock(f.fileno(), fcntl.LOCK_EX)
+            except Exception:
+                pass
+            if self.serializer == "json":
+                import json
+                data = json.dumps(value).encode()
+                f.write(data)
+            else:
+                import pickle
+                pickle.dump(value, f)
+            try:
+                import fcntl
+                fcntl.flock(f.fileno(), fcntl.LOCK_UN)
+            except Exception:
+                pass
+        return ref
+    def get(self, ref: ArtifactRef) -> Any:  # type: ignore[override]
+        path = self.root / ref.key
+        if path.exists():
+            with path.open("rb") as f:
+                try:
+                    import fcntl
+                    fcntl.flock(f.fileno(), fcntl.LOCK_SH)
+                except Exception:
+                    pass
+                if self.serializer == "json":
+                    import json
+                    data = f.read()
+                    try:
+                        import fcntl
+                        fcntl.flock(f.fileno(), fcntl.LOCK_UN)
+                    except Exception:
+                        pass
+                    return json.loads(data.decode())
+                else:
+                    import pickle
+                    try:
+                        obj = pickle.load(f)
+                    finally:
+                        try:
+                            import fcntl
+                            fcntl.flock(f.fileno(), fcntl.LOCK_UN)
+                        except Exception:
+                            pass
+                    return obj
+        return super().get(ref)

auto_workflow/build.py ADDED Viewed

@@ -0,0 +1,158 @@
+"""Graph build structures: TaskInvocation & BuildContext."""
+from __future__ import annotations
+import itertools
+from collections.abc import Iterator
+from contextvars import ContextVar
+from dataclasses import dataclass, field
+from typing import Any
+_build_ctx: ContextVar[BuildContext | None] = ContextVar("aw_build_ctx", default=None)
+@dataclass(slots=True)
+class TaskInvocation:
+    name: str
+    task_name: str
+    fn: Any
+    args: tuple[Any, ...]
+    kwargs: dict[str, Any]
+    definition: Any  # TaskDefinition (forward ref avoided)
+    upstream: set[str] = field(default_factory=set)
+    def __repr__(self) -> str:  # pragma: no cover
+        return f"<TaskInvocation {self.name} ({self.task_name})>"
+    def __hash__(self) -> int:  # allow usage inside sets during build structures
+        return hash(self.name)
+class BuildContext:
+    def __init__(self) -> None:
+        self.invocations: dict[str, TaskInvocation] = {}
+        self._counters: dict[str, itertools.count] = {}
+        self.dynamic_fanouts: list[Any] = []  # populated by fan_out for root placeholders
+    def _next_id(self, task_name: str) -> str:
+        if task_name not in self._counters:
+            self._counters[task_name] = itertools.count(1)
+        idx = next(self._counters[task_name])
+        return f"{task_name}:{idx}"
+    def register(
+        self,
+        task_name: str,
+        fn: Any,
+        args: tuple[Any, ...],
+        kwargs: dict[str, Any],
+        definition: Any,
+    ) -> TaskInvocation:
+        name = self._next_id(task_name)
+        inv = TaskInvocation(
+            name=name, task_name=task_name, fn=fn, args=args, kwargs=kwargs, definition=definition
+        )
+        # Determine upstream dependencies by scanning args/kwargs
+        for dep in iter_invocations((args, kwargs)):
+            inv.upstream.add(dep.name)
+        # Dynamic fan-out placeholder detection
+        try:  # local import to avoid circular
+            from .fanout import DynamicFanOut  # type: ignore
+            def _scan(obj):
+                if isinstance(obj, DynamicFanOut):
+                    inv.upstream.add(obj._source.name)
+                elif isinstance(obj, (list, tuple, set)):
+                    for i in obj:
+                        _scan(i)
+                elif isinstance(obj, dict):
+                    for v in obj.values():
+                        _scan(v)
+            _scan(args)
+            _scan(kwargs)
+        except Exception:  # pragma: no cover
+            pass
+        self.invocations[name] = inv
+        return inv
+    def __enter__(self) -> BuildContext:
+        _build_ctx.set(self)
+        return self
+    def __exit__(self, exc_type, exc, tb) -> None:  # pragma: no cover
+        _build_ctx.set(None)
+def current_build_context() -> BuildContext | None:
+    return _build_ctx.get()
+def iter_invocations(obj: Any) -> Iterator[TaskInvocation]:
+    # TaskInvocation
+    if isinstance(obj, TaskInvocation):
+        yield obj
+        for item in obj.args:
+            yield from iter_invocations(item)
+        for item in obj.kwargs.values():
+            yield from iter_invocations(item)
+        return
+    # Dynamic fan-out placeholder
+    try:
+        from .fanout import DynamicFanOut  # type: ignore
+        if isinstance(obj, DynamicFanOut):
+            yield from iter_invocations(obj._source)
+            for child in obj:
+                yield from iter_invocations(child)
+            return
+    except Exception:  # pragma: no cover
+        pass
+    # Collections
+    if isinstance(obj, (list, tuple, set, frozenset)):
+        for item in obj:
+            yield from iter_invocations(item)
+        return
+    if isinstance(obj, dict):
+        for k, v in obj.items():
+            yield from iter_invocations(k)
+            yield from iter_invocations(v)
+        return
+    if isinstance(obj, tuple) and len(obj) == 2 and hasattr(obj, "_fields"):  # namedtuple—approx
+        for item in obj:  # pragma: no cover
+            yield from iter_invocations(item)
+        return
+    if isinstance(obj, (bytes, str, int, float, type(None))):  # primitives
+        return
+    # generic container attribute iteration omitted
+    return
+def replace_invocations(struct: Any, results: dict[str, Any]) -> Any:
+    if isinstance(struct, TaskInvocation):
+        return results[struct.name]
+    if isinstance(struct, list):
+        return [replace_invocations(s, results) for s in struct]
+    if isinstance(struct, tuple):
+        return tuple(replace_invocations(s, results) for s in struct)
+    if isinstance(struct, set):
+        return {replace_invocations(s, results) for s in struct}
+    if isinstance(struct, dict):
+        return {
+            replace_invocations(k, results): replace_invocations(v, results)
+            for k, v in struct.items()
+        }
+    return struct
+def collect_invocations(struct: Any) -> list[TaskInvocation]:
+    seen: dict[str, TaskInvocation] = {}
+    for inv in iter_invocations(struct):
+        seen[inv.name] = inv
+    return list(seen.values())
+def _inject_cycle(a: TaskInvocation, b: TaskInvocation) -> None:  # pragma: no cover - test utility
+    """Force a cycle between two invocations for testing cycle detection."""
+    a.upstream.add(b.name)
+    b.upstream.add(a.name)

auto_workflow/cache.py ADDED Viewed

@@ -0,0 +1,111 @@
+"""Result cache abstraction with pluggable backends."""
+from __future__ import annotations
+import hashlib
+import pickle
+import time
+from collections import OrderedDict
+from pathlib import Path
+from typing import Any, Protocol
+from .config import load_config
+class ResultCache(Protocol):  # pragma: no cover - interface
+    def get(self, key: str, ttl: int | None) -> Any | None: ...
+    def set(self, key: str, value: Any) -> None: ...
+class InMemoryResultCache:
+    def __init__(self) -> None:
+        # Use OrderedDict for LRU semantics when bounding entries
+        self._store: OrderedDict[str, tuple[float, Any]] = OrderedDict()
+    def get(self, key: str, ttl: int | None) -> Any | None:
+        if ttl is None:
+            return None
+        item = self._store.get(key)
+        if not item:
+            return None
+        ts, value = item
+        if time.time() - ts <= ttl:
+            # mark as recently used for LRU
+            from contextlib import suppress
+            with suppress(Exception):
+                self._store.move_to_end(key)
+            return value
+        return None
+    def set(self, key: str, value: Any) -> None:
+        self._store[key] = (time.time(), value)
+        # mark as recently used
+        from contextlib import suppress
+        with suppress(Exception):
+            self._store.move_to_end(key)
+        # enforce optional LRU bound
+        cfg = load_config()
+        max_entries = cfg.get("result_cache_max_entries")
+        if isinstance(max_entries, str) and max_entries.isdigit():
+            try:
+                max_entries = int(max_entries)
+            except Exception:
+                max_entries = None
+        if isinstance(max_entries, int) and max_entries > 0:
+            while len(self._store) > max_entries:
+                try:
+                    self._store.popitem(last=False)
+                except Exception:
+                    break
+class FileSystemResultCache(InMemoryResultCache):
+    def __init__(self, root: Path) -> None:
+        super().__init__()
+        self.root = root
+        self.root.mkdir(parents=True, exist_ok=True)
+    def _path(self, key: str) -> Path:
+        # use sha256 to create a filesystem-safe path; shard into 2-level dirs
+        h = hashlib.sha256(key.encode()).hexdigest()
+        shard1, shard2 = h[:2], h[2:4]
+        p = self.root / shard1 / shard2
+        p.mkdir(parents=True, exist_ok=True)
+        return p / h
+    def get(self, key: str, ttl: int | None) -> Any | None:  # type: ignore[override]
+        p = self._path(key)
+        if p.exists():
+            try:
+                with p.open("rb") as f:
+                    ts, value = pickle.load(f)
+                if ttl is not None and time.time() - ts <= ttl:
+                    return value
+            except Exception:  # pragma: no cover
+                pass
+        return super().get(key, ttl)
+    def set(self, key: str, value: Any) -> None:  # type: ignore[override]
+        super().set(key, value)
+        p = self._path(key)
+        try:
+            with p.open("wb") as f:
+                pickle.dump((time.time(), value), f)
+        except Exception:  # pragma: no cover
+            pass
+_memory_cache = InMemoryResultCache()
+def get_result_cache() -> ResultCache:
+    cfg = load_config()
+    backend = cfg.get("result_cache", "memory")
+    if backend == "filesystem":
+        from pathlib import Path
+        root = Path(cfg.get("result_cache_path", ".aw_cache"))
+        return FileSystemResultCache(root)
+    return _memory_cache

auto_workflow/cli.py ADDED Viewed

@@ -0,0 +1,78 @@
+"""CLI entry points."""
+from __future__ import annotations
+import argparse
+import importlib
+import json
+def load_flow(dotted: str):
+    if ":" not in dotted:
+        raise SystemExit("Flow path must be module:object")
+    mod_name, attr = dotted.split(":", 1)
+    mod = importlib.import_module(mod_name)
+    flow_obj = getattr(mod, attr)
+    return flow_obj
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser("auto-workflow")
+    sub = parser.add_subparsers(dest="cmd", required=True)
+    run_p = sub.add_parser("run", help="Run a flow")
+    run_p.add_argument("flow", help="module:flow_object path")
+    run_p.add_argument("--failure-policy", default="fail_fast")
+    run_p.add_argument("--max-concurrency", type=int, default=None)
+    run_p.add_argument("--params", help="JSON params dict", default=None)
+    run_p.add_argument("--structured-logs", action="store_true")
+    desc_p = sub.add_parser("describe", help="Describe a flow DAG")
+    desc_p.add_argument("flow", help="module:flow_object path")
+    desc_p.add_argument("--params", help="JSON params dict", default=None)
+    list_p = sub.add_parser("list", help="List flows in a module")
+    list_p.add_argument("module", help="Python module to scan for Flow objects")
+    ns = parser.parse_args(argv)
+    if ns.cmd == "run":
+        if ns.structured_logs:
+            from .logging_middleware import register_structured_logging
+            register_structured_logging()
+        params = json.loads(ns.params) if ns.params else None
+        flow_obj = load_flow(ns.flow)
+        result = flow_obj.run(
+            failure_policy=ns.failure_policy,
+            max_concurrency=ns.max_concurrency,
+            params=params,
+        )
+        print(result)
+        # Best-effort graceful shutdown
+        try:
+            from .lifecycle import shutdown
+            shutdown()
+        except Exception:
+            pass
+        return 0
+    if ns.cmd == "describe":
+        flow_obj = load_flow(ns.flow)
+        params = json.loads(ns.params) if ns.params else None
+        desc = flow_obj.describe(params=params) if params else flow_obj.describe()
+        print(json.dumps(desc, indent=2))
+        return 0
+    if ns.cmd == "list":
+        mod = importlib.import_module(ns.module)
+        out = {}
+        for name, obj in vars(mod).items():
+            from auto_workflow.flow import Flow
+            if isinstance(obj, Flow):
+                out[name] = obj.describe()["count"]
+        print(json.dumps(out, indent=2))
+        return 0
+    return 1
+if __name__ == "__main__":  # pragma: no cover
+    raise SystemExit(main())

auto_workflow/config.py ADDED Viewed

@@ -0,0 +1,76 @@
+"""Configuration loading from pyproject.toml (best-effort)."""
+from __future__ import annotations
+import tomllib
+from functools import lru_cache
+from pathlib import Path
+from typing import Any
+DEFAULTS: dict[str, Any] = {
+    "log_level": "INFO",
+    "max_dynamic_tasks": 2048,
+    "artifact_store": "memory",
+    "artifact_store_path": ".aw_artifacts",
+    "artifact_serializer": "pickle",  # or "json"
+    "result_cache": "memory",
+    "result_cache_path": ".aw_cache",
+    "result_cache_max_entries": None,  # int or None
+    "process_pool_max_workers": None,  # int or None
+}
+@lru_cache(maxsize=1)
+def load_config() -> dict[str, Any]:
+    root = Path(__file__).resolve().parent.parent
+    pyproject = root / "pyproject.toml"
+    data: dict[str, Any] = {}
+    if pyproject.exists():
+        try:
+            with pyproject.open("rb") as f:
+                parsed = tomllib.load(f)
+            tool_cfg = parsed.get("tool", {}).get("auto_workflow", {})
+            if isinstance(tool_cfg, dict):
+                data.update(tool_cfg)
+        except Exception:  # pragma: no cover
+            pass
+    merged = {**DEFAULTS, **data}
+    # env overrides
+    import os
+    for k in list(merged.keys()):
+        env_key = f"AUTO_WORKFLOW_{k.upper()}"
+        if env_key in os.environ:
+            merged[k] = os.environ[env_key]
+    # normalize types for known keys
+    def _to_int(val):
+        if isinstance(val, int):
+            return val
+        if isinstance(val, str) and val.isdigit():
+            try:
+                return int(val)
+            except Exception:
+                return None
+        return None
+    # coerce integers
+    for key in ("max_dynamic_tasks", "process_pool_max_workers", "result_cache_max_entries"):
+        v = merged.get(key)
+        # Preserve strings from env; Flow or call sites will coerce/ignore as needed
+        if isinstance(v, str):
+            continue
+        iv = _to_int(v)
+        if iv is not None and iv > 0:
+            merged[key] = iv
+        elif v is not None:
+            merged[key] = None
+    # constrain artifact_serializer
+    if merged.get("artifact_serializer") not in ("pickle", "json"):
+        merged["artifact_serializer"] = "pickle"
+    return merged
+def reload_config() -> dict[str, Any]:  # pragma: no cover - used in tests
+    load_config.cache_clear()  # type: ignore
+    return load_config()

auto_workflow/context.py ADDED Viewed

@@ -0,0 +1,32 @@
+"""Execution context handling."""
+from __future__ import annotations
+import logging
+import time
+from contextvars import ContextVar
+from dataclasses import dataclass, field
+from typing import Any
+@dataclass(slots=True)
+class RunContext:
+    run_id: str
+    flow_name: str
+    start_time: float = field(default_factory=time.time)
+    params: dict[str, Any] = field(default_factory=dict)
+    logger: logging.Logger = field(default_factory=lambda: logging.getLogger("auto_workflow"))
+_current_context: ContextVar[RunContext | None] = ContextVar("auto_workflow_run_ctx", default=None)
+def set_context(ctx: RunContext) -> None:
+    _current_context.set(ctx)
+def get_context() -> RunContext:
+    ctx = _current_context.get()
+    if ctx is None:
+        raise RuntimeError("No active RunContext; are you inside a flow execution?")
+    return ctx