auto-workflow 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,40 @@
1
+ """Public API surface for auto_workflow (MVP scaffolding)."""
2
+
3
+ try: # pragma: no cover - best-effort version exposure
4
+ from importlib.metadata import version as _pkg_version
5
+
6
+ __version__ = _pkg_version("auto-workflow")
7
+ except Exception: # pragma: no cover
8
+ __version__ = "0"
9
+
10
+ from .context import get_context
11
+ from .events import subscribe
12
+ from .fanout import fan_out
13
+ from .flow import Flow, flow
14
+ from .scheduler import FailurePolicy
15
+ from .task import TaskDefinition, task
16
+
17
+ # Enable structured pretty logging by default unless explicitly disabled via env
18
+ try: # pragma: no cover - import side-effect
19
+ import os
20
+
21
+ if os.environ.get("AUTO_WORKFLOW_DISABLE_STRUCTURED_LOGS", "0") not in ("1", "true", "True"):
22
+ from .logging_middleware import enable_pretty_logging, register_structured_logging
23
+
24
+ register_structured_logging()
25
+ # Always attach the pretty handler by default
26
+ enable_pretty_logging(os.environ.get("AUTO_WORKFLOW_LOG_LEVEL", "INFO"))
27
+ except Exception:
28
+ # Never fail import due to logging setup
29
+ pass
30
+
31
+ __all__ = [
32
+ "task",
33
+ "TaskDefinition",
34
+ "flow",
35
+ "Flow",
36
+ "get_context",
37
+ "fan_out",
38
+ "FailurePolicy",
39
+ "subscribe",
40
+ ]
@@ -0,0 +1,9 @@
1
+ """Module entry point for `python -m auto_workflow`.
2
+
3
+ Delegates to the package CLI defined in `auto_workflow.cli`.
4
+ """
5
+
6
+ from .cli import main
7
+
8
+ if __name__ == "__main__": # pragma: no cover
9
+ raise SystemExit(main())
@@ -0,0 +1,119 @@
1
+ """Artifact storage abstraction (MVP in-memory).
2
+
3
+ Note: Pickle is only safe in trusted environments. A JSON serializer option is
4
+ available via config `artifact_serializer=json` for JSON-serializable values.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import uuid
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from .config import load_config
15
+
16
+
17
+ @dataclass(slots=True)
18
+ class ArtifactRef:
19
+ key: str
20
+
21
+
22
+ class InMemoryArtifactStore:
23
+ def __init__(self) -> None:
24
+ self._store: dict[str, Any] = {}
25
+
26
+ def put(self, value: Any) -> ArtifactRef:
27
+ key = str(uuid.uuid4())
28
+ self._store[key] = value
29
+ return ArtifactRef(key)
30
+
31
+ def get(self, ref: ArtifactRef) -> Any:
32
+ return self._store[ref.key]
33
+
34
+
35
+ _STORE = InMemoryArtifactStore()
36
+
37
+
38
+ def get_store() -> InMemoryArtifactStore:
39
+ cfg = load_config()
40
+ backend = cfg.get("artifact_store", "memory")
41
+ if backend == "memory":
42
+ return _STORE # type: ignore
43
+ if backend == "filesystem":
44
+ root = Path(cfg.get("artifact_store_path", ".aw_artifacts"))
45
+ root.mkdir(parents=True, exist_ok=True)
46
+ return FileSystemArtifactStore(root) # type: ignore
47
+ return _STORE # fallback
48
+
49
+
50
+ class FileSystemArtifactStore(InMemoryArtifactStore): # simple extension
51
+ def __init__(self, root: Path) -> None:
52
+ super().__init__()
53
+ self.root = root
54
+ # serializer: "pickle" (default) or "json" for JSON-serializable values
55
+ self.serializer = load_config().get("artifact_serializer", "pickle")
56
+
57
+ def put(self, value: Any) -> ArtifactRef: # type: ignore[override]
58
+ ref = super().put(value)
59
+ path = self.root / ref.key
60
+ with path.open("wb") as f:
61
+ # best-effort file lock (POSIX); on macOS this is fine, Windows would need msvcrt
62
+ try:
63
+ import fcntl
64
+
65
+ fcntl.flock(f.fileno(), fcntl.LOCK_EX)
66
+ except Exception:
67
+ pass
68
+ if self.serializer == "json":
69
+ import json
70
+
71
+ data = json.dumps(value).encode()
72
+ f.write(data)
73
+ else:
74
+ import pickle
75
+
76
+ pickle.dump(value, f)
77
+ try:
78
+ import fcntl
79
+
80
+ fcntl.flock(f.fileno(), fcntl.LOCK_UN)
81
+ except Exception:
82
+ pass
83
+ return ref
84
+
85
+ def get(self, ref: ArtifactRef) -> Any: # type: ignore[override]
86
+ path = self.root / ref.key
87
+ if path.exists():
88
+ with path.open("rb") as f:
89
+ try:
90
+ import fcntl
91
+
92
+ fcntl.flock(f.fileno(), fcntl.LOCK_SH)
93
+ except Exception:
94
+ pass
95
+ if self.serializer == "json":
96
+ import json
97
+
98
+ data = f.read()
99
+ try:
100
+ import fcntl
101
+
102
+ fcntl.flock(f.fileno(), fcntl.LOCK_UN)
103
+ except Exception:
104
+ pass
105
+ return json.loads(data.decode())
106
+ else:
107
+ import pickle
108
+
109
+ try:
110
+ obj = pickle.load(f)
111
+ finally:
112
+ try:
113
+ import fcntl
114
+
115
+ fcntl.flock(f.fileno(), fcntl.LOCK_UN)
116
+ except Exception:
117
+ pass
118
+ return obj
119
+ return super().get(ref)
auto_workflow/build.py ADDED
@@ -0,0 +1,158 @@
1
+ """Graph build structures: TaskInvocation & BuildContext."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import itertools
6
+ from collections.abc import Iterator
7
+ from contextvars import ContextVar
8
+ from dataclasses import dataclass, field
9
+ from typing import Any
10
+
11
+ _build_ctx: ContextVar[BuildContext | None] = ContextVar("aw_build_ctx", default=None)
12
+
13
+
14
+ @dataclass(slots=True)
15
+ class TaskInvocation:
16
+ name: str
17
+ task_name: str
18
+ fn: Any
19
+ args: tuple[Any, ...]
20
+ kwargs: dict[str, Any]
21
+ definition: Any # TaskDefinition (forward ref avoided)
22
+ upstream: set[str] = field(default_factory=set)
23
+
24
+ def __repr__(self) -> str: # pragma: no cover
25
+ return f"<TaskInvocation {self.name} ({self.task_name})>"
26
+
27
+ def __hash__(self) -> int: # allow usage inside sets during build structures
28
+ return hash(self.name)
29
+
30
+
31
+ class BuildContext:
32
+ def __init__(self) -> None:
33
+ self.invocations: dict[str, TaskInvocation] = {}
34
+ self._counters: dict[str, itertools.count] = {}
35
+ self.dynamic_fanouts: list[Any] = [] # populated by fan_out for root placeholders
36
+
37
+ def _next_id(self, task_name: str) -> str:
38
+ if task_name not in self._counters:
39
+ self._counters[task_name] = itertools.count(1)
40
+ idx = next(self._counters[task_name])
41
+ return f"{task_name}:{idx}"
42
+
43
+ def register(
44
+ self,
45
+ task_name: str,
46
+ fn: Any,
47
+ args: tuple[Any, ...],
48
+ kwargs: dict[str, Any],
49
+ definition: Any,
50
+ ) -> TaskInvocation:
51
+ name = self._next_id(task_name)
52
+ inv = TaskInvocation(
53
+ name=name, task_name=task_name, fn=fn, args=args, kwargs=kwargs, definition=definition
54
+ )
55
+ # Determine upstream dependencies by scanning args/kwargs
56
+ for dep in iter_invocations((args, kwargs)):
57
+ inv.upstream.add(dep.name)
58
+ # Dynamic fan-out placeholder detection
59
+ try: # local import to avoid circular
60
+ from .fanout import DynamicFanOut # type: ignore
61
+
62
+ def _scan(obj):
63
+ if isinstance(obj, DynamicFanOut):
64
+ inv.upstream.add(obj._source.name)
65
+ elif isinstance(obj, (list, tuple, set)):
66
+ for i in obj:
67
+ _scan(i)
68
+ elif isinstance(obj, dict):
69
+ for v in obj.values():
70
+ _scan(v)
71
+
72
+ _scan(args)
73
+ _scan(kwargs)
74
+ except Exception: # pragma: no cover
75
+ pass
76
+ self.invocations[name] = inv
77
+ return inv
78
+
79
+ def __enter__(self) -> BuildContext:
80
+ _build_ctx.set(self)
81
+ return self
82
+
83
+ def __exit__(self, exc_type, exc, tb) -> None: # pragma: no cover
84
+ _build_ctx.set(None)
85
+
86
+
87
+ def current_build_context() -> BuildContext | None:
88
+ return _build_ctx.get()
89
+
90
+
91
+ def iter_invocations(obj: Any) -> Iterator[TaskInvocation]:
92
+ # TaskInvocation
93
+ if isinstance(obj, TaskInvocation):
94
+ yield obj
95
+ for item in obj.args:
96
+ yield from iter_invocations(item)
97
+ for item in obj.kwargs.values():
98
+ yield from iter_invocations(item)
99
+ return
100
+ # Dynamic fan-out placeholder
101
+ try:
102
+ from .fanout import DynamicFanOut # type: ignore
103
+
104
+ if isinstance(obj, DynamicFanOut):
105
+ yield from iter_invocations(obj._source)
106
+ for child in obj:
107
+ yield from iter_invocations(child)
108
+ return
109
+ except Exception: # pragma: no cover
110
+ pass
111
+ # Collections
112
+ if isinstance(obj, (list, tuple, set, frozenset)):
113
+ for item in obj:
114
+ yield from iter_invocations(item)
115
+ return
116
+ if isinstance(obj, dict):
117
+ for k, v in obj.items():
118
+ yield from iter_invocations(k)
119
+ yield from iter_invocations(v)
120
+ return
121
+ if isinstance(obj, tuple) and len(obj) == 2 and hasattr(obj, "_fields"): # namedtuple—approx
122
+ for item in obj: # pragma: no cover
123
+ yield from iter_invocations(item)
124
+ return
125
+ if isinstance(obj, (bytes, str, int, float, type(None))): # primitives
126
+ return
127
+ # generic container attribute iteration omitted
128
+ return
129
+
130
+
131
+ def replace_invocations(struct: Any, results: dict[str, Any]) -> Any:
132
+ if isinstance(struct, TaskInvocation):
133
+ return results[struct.name]
134
+ if isinstance(struct, list):
135
+ return [replace_invocations(s, results) for s in struct]
136
+ if isinstance(struct, tuple):
137
+ return tuple(replace_invocations(s, results) for s in struct)
138
+ if isinstance(struct, set):
139
+ return {replace_invocations(s, results) for s in struct}
140
+ if isinstance(struct, dict):
141
+ return {
142
+ replace_invocations(k, results): replace_invocations(v, results)
143
+ for k, v in struct.items()
144
+ }
145
+ return struct
146
+
147
+
148
+ def collect_invocations(struct: Any) -> list[TaskInvocation]:
149
+ seen: dict[str, TaskInvocation] = {}
150
+ for inv in iter_invocations(struct):
151
+ seen[inv.name] = inv
152
+ return list(seen.values())
153
+
154
+
155
+ def _inject_cycle(a: TaskInvocation, b: TaskInvocation) -> None: # pragma: no cover - test utility
156
+ """Force a cycle between two invocations for testing cycle detection."""
157
+ a.upstream.add(b.name)
158
+ b.upstream.add(a.name)
auto_workflow/cache.py ADDED
@@ -0,0 +1,111 @@
1
+ """Result cache abstraction with pluggable backends."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import pickle
7
+ import time
8
+ from collections import OrderedDict
9
+ from pathlib import Path
10
+ from typing import Any, Protocol
11
+
12
+ from .config import load_config
13
+
14
+
15
+ class ResultCache(Protocol): # pragma: no cover - interface
16
+ def get(self, key: str, ttl: int | None) -> Any | None: ...
17
+ def set(self, key: str, value: Any) -> None: ...
18
+
19
+
20
+ class InMemoryResultCache:
21
+ def __init__(self) -> None:
22
+ # Use OrderedDict for LRU semantics when bounding entries
23
+ self._store: OrderedDict[str, tuple[float, Any]] = OrderedDict()
24
+
25
+ def get(self, key: str, ttl: int | None) -> Any | None:
26
+ if ttl is None:
27
+ return None
28
+ item = self._store.get(key)
29
+ if not item:
30
+ return None
31
+ ts, value = item
32
+ if time.time() - ts <= ttl:
33
+ # mark as recently used for LRU
34
+ from contextlib import suppress
35
+
36
+ with suppress(Exception):
37
+ self._store.move_to_end(key)
38
+ return value
39
+ return None
40
+
41
+ def set(self, key: str, value: Any) -> None:
42
+ self._store[key] = (time.time(), value)
43
+ # mark as recently used
44
+ from contextlib import suppress
45
+
46
+ with suppress(Exception):
47
+ self._store.move_to_end(key)
48
+ # enforce optional LRU bound
49
+ cfg = load_config()
50
+ max_entries = cfg.get("result_cache_max_entries")
51
+ if isinstance(max_entries, str) and max_entries.isdigit():
52
+ try:
53
+ max_entries = int(max_entries)
54
+ except Exception:
55
+ max_entries = None
56
+ if isinstance(max_entries, int) and max_entries > 0:
57
+ while len(self._store) > max_entries:
58
+ try:
59
+ self._store.popitem(last=False)
60
+ except Exception:
61
+ break
62
+
63
+
64
+ class FileSystemResultCache(InMemoryResultCache):
65
+ def __init__(self, root: Path) -> None:
66
+ super().__init__()
67
+ self.root = root
68
+ self.root.mkdir(parents=True, exist_ok=True)
69
+
70
+ def _path(self, key: str) -> Path:
71
+ # use sha256 to create a filesystem-safe path; shard into 2-level dirs
72
+ h = hashlib.sha256(key.encode()).hexdigest()
73
+ shard1, shard2 = h[:2], h[2:4]
74
+ p = self.root / shard1 / shard2
75
+ p.mkdir(parents=True, exist_ok=True)
76
+ return p / h
77
+
78
+ def get(self, key: str, ttl: int | None) -> Any | None: # type: ignore[override]
79
+ p = self._path(key)
80
+ if p.exists():
81
+ try:
82
+ with p.open("rb") as f:
83
+ ts, value = pickle.load(f)
84
+ if ttl is not None and time.time() - ts <= ttl:
85
+ return value
86
+ except Exception: # pragma: no cover
87
+ pass
88
+ return super().get(key, ttl)
89
+
90
+ def set(self, key: str, value: Any) -> None: # type: ignore[override]
91
+ super().set(key, value)
92
+ p = self._path(key)
93
+ try:
94
+ with p.open("wb") as f:
95
+ pickle.dump((time.time(), value), f)
96
+ except Exception: # pragma: no cover
97
+ pass
98
+
99
+
100
+ _memory_cache = InMemoryResultCache()
101
+
102
+
103
+ def get_result_cache() -> ResultCache:
104
+ cfg = load_config()
105
+ backend = cfg.get("result_cache", "memory")
106
+ if backend == "filesystem":
107
+ from pathlib import Path
108
+
109
+ root = Path(cfg.get("result_cache_path", ".aw_cache"))
110
+ return FileSystemResultCache(root)
111
+ return _memory_cache
auto_workflow/cli.py ADDED
@@ -0,0 +1,78 @@
1
+ """CLI entry points."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import importlib
7
+ import json
8
+
9
+
10
+ def load_flow(dotted: str):
11
+ if ":" not in dotted:
12
+ raise SystemExit("Flow path must be module:object")
13
+ mod_name, attr = dotted.split(":", 1)
14
+ mod = importlib.import_module(mod_name)
15
+ flow_obj = getattr(mod, attr)
16
+ return flow_obj
17
+
18
+
19
+ def main(argv: list[str] | None = None) -> int:
20
+ parser = argparse.ArgumentParser("auto-workflow")
21
+ sub = parser.add_subparsers(dest="cmd", required=True)
22
+ run_p = sub.add_parser("run", help="Run a flow")
23
+ run_p.add_argument("flow", help="module:flow_object path")
24
+ run_p.add_argument("--failure-policy", default="fail_fast")
25
+ run_p.add_argument("--max-concurrency", type=int, default=None)
26
+ run_p.add_argument("--params", help="JSON params dict", default=None)
27
+ run_p.add_argument("--structured-logs", action="store_true")
28
+
29
+ desc_p = sub.add_parser("describe", help="Describe a flow DAG")
30
+ desc_p.add_argument("flow", help="module:flow_object path")
31
+ desc_p.add_argument("--params", help="JSON params dict", default=None)
32
+
33
+ list_p = sub.add_parser("list", help="List flows in a module")
34
+ list_p.add_argument("module", help="Python module to scan for Flow objects")
35
+
36
+ ns = parser.parse_args(argv)
37
+ if ns.cmd == "run":
38
+ if ns.structured_logs:
39
+ from .logging_middleware import register_structured_logging
40
+
41
+ register_structured_logging()
42
+ params = json.loads(ns.params) if ns.params else None
43
+ flow_obj = load_flow(ns.flow)
44
+ result = flow_obj.run(
45
+ failure_policy=ns.failure_policy,
46
+ max_concurrency=ns.max_concurrency,
47
+ params=params,
48
+ )
49
+ print(result)
50
+ # Best-effort graceful shutdown
51
+ try:
52
+ from .lifecycle import shutdown
53
+
54
+ shutdown()
55
+ except Exception:
56
+ pass
57
+ return 0
58
+ if ns.cmd == "describe":
59
+ flow_obj = load_flow(ns.flow)
60
+ params = json.loads(ns.params) if ns.params else None
61
+ desc = flow_obj.describe(params=params) if params else flow_obj.describe()
62
+ print(json.dumps(desc, indent=2))
63
+ return 0
64
+ if ns.cmd == "list":
65
+ mod = importlib.import_module(ns.module)
66
+ out = {}
67
+ for name, obj in vars(mod).items():
68
+ from auto_workflow.flow import Flow
69
+
70
+ if isinstance(obj, Flow):
71
+ out[name] = obj.describe()["count"]
72
+ print(json.dumps(out, indent=2))
73
+ return 0
74
+ return 1
75
+
76
+
77
+ if __name__ == "__main__": # pragma: no cover
78
+ raise SystemExit(main())
@@ -0,0 +1,76 @@
1
+ """Configuration loading from pyproject.toml (best-effort)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import tomllib
6
+ from functools import lru_cache
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ DEFAULTS: dict[str, Any] = {
11
+ "log_level": "INFO",
12
+ "max_dynamic_tasks": 2048,
13
+ "artifact_store": "memory",
14
+ "artifact_store_path": ".aw_artifacts",
15
+ "artifact_serializer": "pickle", # or "json"
16
+ "result_cache": "memory",
17
+ "result_cache_path": ".aw_cache",
18
+ "result_cache_max_entries": None, # int or None
19
+ "process_pool_max_workers": None, # int or None
20
+ }
21
+
22
+
23
+ @lru_cache(maxsize=1)
24
+ def load_config() -> dict[str, Any]:
25
+ root = Path(__file__).resolve().parent.parent
26
+ pyproject = root / "pyproject.toml"
27
+ data: dict[str, Any] = {}
28
+ if pyproject.exists():
29
+ try:
30
+ with pyproject.open("rb") as f:
31
+ parsed = tomllib.load(f)
32
+ tool_cfg = parsed.get("tool", {}).get("auto_workflow", {})
33
+ if isinstance(tool_cfg, dict):
34
+ data.update(tool_cfg)
35
+ except Exception: # pragma: no cover
36
+ pass
37
+ merged = {**DEFAULTS, **data}
38
+ # env overrides
39
+ import os
40
+
41
+ for k in list(merged.keys()):
42
+ env_key = f"AUTO_WORKFLOW_{k.upper()}"
43
+ if env_key in os.environ:
44
+ merged[k] = os.environ[env_key]
45
+
46
+ # normalize types for known keys
47
+ def _to_int(val):
48
+ if isinstance(val, int):
49
+ return val
50
+ if isinstance(val, str) and val.isdigit():
51
+ try:
52
+ return int(val)
53
+ except Exception:
54
+ return None
55
+ return None
56
+
57
+ # coerce integers
58
+ for key in ("max_dynamic_tasks", "process_pool_max_workers", "result_cache_max_entries"):
59
+ v = merged.get(key)
60
+ # Preserve strings from env; Flow or call sites will coerce/ignore as needed
61
+ if isinstance(v, str):
62
+ continue
63
+ iv = _to_int(v)
64
+ if iv is not None and iv > 0:
65
+ merged[key] = iv
66
+ elif v is not None:
67
+ merged[key] = None
68
+ # constrain artifact_serializer
69
+ if merged.get("artifact_serializer") not in ("pickle", "json"):
70
+ merged["artifact_serializer"] = "pickle"
71
+ return merged
72
+
73
+
74
+ def reload_config() -> dict[str, Any]: # pragma: no cover - used in tests
75
+ load_config.cache_clear() # type: ignore
76
+ return load_config()
@@ -0,0 +1,32 @@
1
+ """Execution context handling."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import time
7
+ from contextvars import ContextVar
8
+ from dataclasses import dataclass, field
9
+ from typing import Any
10
+
11
+
12
+ @dataclass(slots=True)
13
+ class RunContext:
14
+ run_id: str
15
+ flow_name: str
16
+ start_time: float = field(default_factory=time.time)
17
+ params: dict[str, Any] = field(default_factory=dict)
18
+ logger: logging.Logger = field(default_factory=lambda: logging.getLogger("auto_workflow"))
19
+
20
+
21
+ _current_context: ContextVar[RunContext | None] = ContextVar("auto_workflow_run_ctx", default=None)
22
+
23
+
24
+ def set_context(ctx: RunContext) -> None:
25
+ _current_context.set(ctx)
26
+
27
+
28
+ def get_context() -> RunContext:
29
+ ctx = _current_context.get()
30
+ if ctx is None:
31
+ raise RuntimeError("No active RunContext; are you inside a flow execution?")
32
+ return ctx