pyoco 0.1.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyoco/core/models.py CHANGED
@@ -1,8 +1,18 @@
1
- from typing import Any, Callable, Dict, List, Optional, Set, Union, ForwardRef
1
+ from typing import Any, Callable, Dict, List, Optional, Set, Union
2
2
  from dataclasses import dataclass, field
3
+ from enum import Enum
4
+ import time
5
+ import uuid
6
+ import json
3
7
 
4
8
  @dataclass
5
9
  class Task:
10
+ """
11
+ Represents a single unit of work in the workflow.
12
+
13
+ Designed to be lightweight and serializable.
14
+ Contains metadata about the task, its dependencies, and execution policies.
15
+ """
6
16
  func: Callable
7
17
  name: str
8
18
  dependencies: Set['Task'] = field(default_factory=set)
@@ -32,13 +42,137 @@ class Task:
32
42
  def __repr__(self):
33
43
  return f"<Task {self.name}>"
34
44
 
45
+ class TaskState(Enum):
46
+ PENDING = "PENDING"
47
+ RUNNING = "RUNNING"
48
+ SUCCEEDED = "SUCCEEDED"
49
+ FAILED = "FAILED"
50
+ CANCELLED = "CANCELLED"
51
+
52
+ class RunStatus(Enum):
53
+ PENDING = "PENDING"
54
+ RUNNING = "RUNNING"
55
+ COMPLETED = "COMPLETED"
56
+ FAILED = "FAILED"
57
+ CANCELLING = "CANCELLING"
58
+ CANCELLED = "CANCELLED"
59
+
60
+ @dataclass
61
+ class TaskRecord:
62
+ state: TaskState = TaskState.PENDING
63
+ started_at: Optional[float] = None
64
+ ended_at: Optional[float] = None
65
+ duration_ms: Optional[float] = None
66
+ error: Optional[str] = None
67
+ traceback: Optional[str] = None
68
+ inputs: Dict[str, Any] = field(default_factory=dict)
69
+ output: Any = None
70
+ artifacts: Dict[str, Any] = field(default_factory=dict)
71
+
72
+
73
+ @dataclass
74
+ class RunContext:
75
+ """
76
+ Holds the state of a single workflow execution.
77
+ """
78
+ run_id: str = field(default_factory=lambda: str(uuid.uuid4()))
79
+ flow_name: str = "main"
80
+ params: Dict[str, Any] = field(default_factory=dict)
81
+ status: RunStatus = RunStatus.RUNNING
82
+ tasks: Dict[str, TaskState] = field(default_factory=dict)
83
+ task_records: Dict[str, TaskRecord] = field(default_factory=dict)
84
+ start_time: float = field(default_factory=time.time)
85
+ end_time: Optional[float] = None
86
+ metadata: Dict[str, Any] = field(default_factory=dict)
87
+ logs: List[Dict[str, Any]] = field(default_factory=list)
88
+ _pending_logs: List[Dict[str, Any]] = field(default_factory=list, repr=False)
89
+ _log_seq: int = field(default=0, repr=False)
90
+ log_bytes: Dict[str, int] = field(default_factory=dict)
91
+ metrics_recorded_tasks: Set[str] = field(default_factory=set, repr=False)
92
+ metrics_run_observed: bool = field(default=False, repr=False)
93
+ webhook_notified_status: Optional[str] = field(default=None, repr=False)
94
+
95
+ def ensure_task_record(self, task_name: str) -> TaskRecord:
96
+ if task_name not in self.task_records:
97
+ self.task_records[task_name] = TaskRecord()
98
+ return self.task_records[task_name]
99
+
100
+ def append_log(self, task_name: str, stream: str, payload: str):
101
+ if not payload:
102
+ return
103
+ entry = {
104
+ "seq": self._log_seq,
105
+ "task": task_name,
106
+ "stream": stream,
107
+ "text": payload,
108
+ "timestamp": time.time(),
109
+ }
110
+ self._log_seq += 1
111
+ self.logs.append(entry)
112
+ self._pending_logs.append(entry)
113
+
114
+ def drain_logs(self) -> List[Dict[str, Any]]:
115
+ drained = list(self._pending_logs)
116
+ self._pending_logs.clear()
117
+ return drained
118
+
119
+ def serialize_task_records(self) -> Dict[str, Any]:
120
+ serialized: Dict[str, Any] = {}
121
+ for name, record in self.task_records.items():
122
+ serialized[name] = {
123
+ "state": record.state.value if hasattr(record.state, "value") else record.state,
124
+ "started_at": record.started_at,
125
+ "ended_at": record.ended_at,
126
+ "duration_ms": record.duration_ms,
127
+ "error": record.error,
128
+ "traceback": record.traceback,
129
+ "inputs": {k: self._safe_value(v) for k, v in record.inputs.items()},
130
+ "output": self._safe_value(record.output),
131
+ "artifacts": record.artifacts,
132
+ }
133
+ return serialized
134
+
135
+ def _safe_value(self, value: Any) -> Any:
136
+ if isinstance(value, (str, int, float, bool)) or value is None:
137
+ return value
138
+ try:
139
+ json.dumps(value)
140
+ return value
141
+ except Exception:
142
+ return repr(value)
143
+
35
144
  @dataclass
36
145
  class Flow:
146
+ """
147
+ Represents a Directed Acyclic Graph (DAG) of tasks.
148
+
149
+ Manages the collection of tasks and their dependencies.
150
+ Optimized for single-machine execution without complex scheduling overhead.
151
+ """
37
152
  name: str = "main"
38
153
  tasks: Set[Task] = field(default_factory=set)
39
154
  _tail: Set[Task] = field(default_factory=set)
155
+ _definition: List[Any] = field(default_factory=list, repr=False)
156
+ _has_control_flow: bool = False
40
157
 
41
158
  def __rshift__(self, other):
159
+ from ..dsl.syntax import TaskWrapper, FlowFragment, ensure_fragment
160
+
161
+ if isinstance(other, TaskWrapper):
162
+ fragment = other
163
+ self._record_fragment(fragment)
164
+ self._append_task(fragment.task)
165
+ return self
166
+
167
+ if hasattr(other, "to_subflow"):
168
+ fragment = other if isinstance(other, FlowFragment) else ensure_fragment(other)
169
+ self._record_fragment(fragment)
170
+ if not self._has_control_flow and not fragment.has_control_flow():
171
+ self._append_linear_fragment(fragment)
172
+ else:
173
+ self._has_control_flow = True
174
+ return self
175
+
42
176
  # Flow >> Task/List/Branch
43
177
  new_tasks = []
44
178
  is_branch = False
@@ -114,3 +248,39 @@ class Flow:
114
248
  def add_task(self, task: Task):
115
249
  self.tasks.add(task)
116
250
 
251
+ def has_control_flow(self) -> bool:
252
+ return self._has_control_flow
253
+
254
+ def build_program(self):
255
+ from ..dsl.nodes import SubFlowNode
256
+ return SubFlowNode(list(self._definition))
257
+
258
+ def _record_fragment(self, fragment):
259
+ from ..dsl.nodes import TaskNode
260
+ subflow = fragment.to_subflow()
261
+ self._definition.extend(subflow.steps)
262
+ for task in fragment.task_nodes():
263
+ self.add_task(task)
264
+ if any(not isinstance(step, TaskNode) for step in subflow.steps):
265
+ self._has_control_flow = True
266
+
267
+ def _append_linear_fragment(self, fragment):
268
+ subflow = fragment.to_subflow()
269
+ for step in subflow.steps:
270
+ if hasattr(step, "task"):
271
+ self._append_task(step.task)
272
+
273
+ def _append_task(self, task: Task):
274
+ self.add_task(task)
275
+ if self._has_control_flow:
276
+ self._tail = {task}
277
+ return
278
+
279
+ if not self._tail:
280
+ self._tail = {task}
281
+ return
282
+
283
+ for tail_task in self._tail:
284
+ tail_task.dependents.add(task)
285
+ task.dependencies.add(tail_task)
286
+ self._tail = {task}
pyoco/discovery/loader.py CHANGED
@@ -1,9 +1,10 @@
1
1
  import importlib
2
2
  import pkgutil
3
3
  import sys
4
- from typing import Dict, List, Any
4
+ from typing import Dict, List, Any, Set
5
5
  from ..core.models import Task
6
6
  from ..dsl.syntax import TaskWrapper
7
+ from .plugins import PluginRegistry, iter_entry_points
7
8
 
8
9
  class TaskLoader:
9
10
  def __init__(self, config: Any, strict: bool = False):
@@ -11,6 +12,7 @@ class TaskLoader:
11
12
  self.strict = strict
12
13
  self.tasks: Dict[str, Task] = {}
13
14
  self._explicit_tasks: Set[str] = set()
15
+ self.plugin_reports: List[Dict[str, Any]] = []
14
16
 
15
17
  def load(self):
16
18
  # Load explicitly defined tasks in config FIRST (Higher priority)
@@ -31,6 +33,8 @@ class TaskLoader:
31
33
  for pattern in self.config.discovery.glob_modules:
32
34
  self._load_glob_modules(pattern)
33
35
 
36
+ self._load_entry_point_plugins()
37
+
34
38
  def _register_task(self, name: str, task: Task):
35
39
  if name in self.tasks:
36
40
  if name in self._explicit_tasks:
@@ -45,7 +49,7 @@ class TaskLoader:
45
49
  print(f"Warning: {msg} Overwriting.")
46
50
 
47
51
  # Apply config overlay if exists
48
- if name in self.config.tasks:
52
+ if self.config and name in self.config.tasks:
49
53
  conf = self.config.tasks[name]
50
54
  if not conf.callable:
51
55
  if conf.inputs:
@@ -97,6 +101,29 @@ class TaskLoader:
97
101
  module_name = rel_path.replace(os.sep, ".")[:-3] # strip .py
98
102
  self._load_module(module_name)
99
103
 
104
+ def _load_entry_point_plugins(self):
105
+ entries = iter_entry_points()
106
+ for ep in entries:
107
+ info = {
108
+ "name": ep.name,
109
+ "value": ep.value,
110
+ "module": getattr(ep, "module", ""),
111
+ "tasks": [],
112
+ }
113
+ registry = PluginRegistry(self, ep.name)
114
+ try:
115
+ hook = ep.load()
116
+ if not callable(hook):
117
+ raise TypeError("Entry point must be callable")
118
+ hook(registry)
119
+ info["tasks"] = list(registry.registered_names)
120
+ except Exception as exc:
121
+ info["error"] = str(exc)
122
+ if self.strict:
123
+ raise
124
+ print(f"Warning: Plugin '{ep.name}' failed to load: {exc}")
125
+ self.plugin_reports.append(info)
126
+
100
127
  def _scan_module(self, module: Any):
101
128
  for name, obj in vars(module).items():
102
129
  if isinstance(obj, TaskWrapper):
@@ -124,7 +151,6 @@ class TaskLoader:
124
151
  # Create a Task wrapper
125
152
  t = Task(func=real_func, name=name)
126
153
  t.inputs = conf.inputs
127
- t.inputs = conf.inputs
128
154
  t.outputs = conf.outputs
129
155
  self.tasks[name] = t
130
156
  except (ImportError, AttributeError) as e:
@@ -0,0 +1,92 @@
1
+ from __future__ import annotations
2
+
3
+ from importlib import metadata as importlib_metadata
4
+ from typing import Any, Callable, Dict, List, Optional
5
+
6
+ from ..core.models import Task
7
+ from ..dsl.syntax import TaskWrapper
8
+
9
+
10
+ def iter_entry_points(group: str = "pyoco.tasks"):
11
+ eps = importlib_metadata.entry_points()
12
+ if hasattr(eps, "select"):
13
+ return list(eps.select(group=group))
14
+ return list(eps.get(group, []))
15
+
16
+
17
+ def list_available_plugins() -> List[Dict[str, Any]]:
18
+ plugins = []
19
+ for ep in iter_entry_points():
20
+ plugins.append(
21
+ {
22
+ "name": ep.name,
23
+ "module": getattr(ep, "module", ""),
24
+ "value": ep.value,
25
+ }
26
+ )
27
+ return plugins
28
+
29
+
30
+ class PluginRegistry:
31
+ def __init__(self, loader: Any, provider_name: str) -> None:
32
+ self.loader = loader
33
+ self.provider_name = provider_name
34
+ self.registered_names: List[str] = []
35
+
36
+ def task(
37
+ self,
38
+ func: Optional[Callable] = None,
39
+ *,
40
+ name: Optional[str] = None,
41
+ inputs: Optional[Dict[str, Any]] = None,
42
+ outputs: Optional[List[str]] = None,
43
+ ):
44
+ if func is not None:
45
+ self.register_callable(
46
+ func,
47
+ name=name,
48
+ inputs=inputs or {},
49
+ outputs=outputs or [],
50
+ )
51
+ return func
52
+
53
+ def decorator(inner: Callable):
54
+ self.register_callable(
55
+ inner,
56
+ name=name,
57
+ inputs=inputs or {},
58
+ outputs=outputs or [],
59
+ )
60
+ return inner
61
+
62
+ return decorator
63
+
64
+ def register_callable(
65
+ self,
66
+ func: Callable,
67
+ *,
68
+ name: Optional[str] = None,
69
+ inputs: Optional[Dict[str, Any]] = None,
70
+ outputs: Optional[List[str]] = None,
71
+ ) -> Task:
72
+ task_name = name or getattr(func, "__name__", f"{self.provider_name}_task")
73
+ task = Task(func=func, name=task_name)
74
+ if inputs:
75
+ task.inputs.update(inputs)
76
+ if outputs:
77
+ task.outputs.extend(outputs)
78
+ self.loader._register_task(task_name, task)
79
+ self.registered_names.append(task_name)
80
+ return task
81
+
82
+ def add(self, obj: Any, *, name: Optional[str] = None) -> None:
83
+ if isinstance(obj, TaskWrapper):
84
+ self.loader._register_task(name or obj.task.name, obj.task)
85
+ self.registered_names.append(name or obj.task.name)
86
+ elif isinstance(obj, Task):
87
+ self.loader._register_task(name or obj.name, obj)
88
+ self.registered_names.append(name or obj.name)
89
+ elif callable(obj):
90
+ self.register_callable(obj, name=name)
91
+ else:
92
+ raise TypeError(f"Unsupported task object: {obj!r}")
@@ -0,0 +1,160 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ import re
5
+ from dataclasses import dataclass, field
6
+ from typing import Any, Callable, Dict, Mapping, Optional, Tuple, Union
7
+
8
+
9
+ class ExpressionSyntaxError(ValueError):
10
+ pass
11
+
12
+
13
+ class ExpressionEvaluationError(RuntimeError):
14
+ pass
15
+
16
+
17
+ DOT_PATH_RE = re.compile(r"^[A-Za-z_][\w.]*$")
18
+
19
+
20
+ @dataclass(frozen=True)
21
+ class Expression:
22
+ source: str
23
+ _python: str = field(init=False, repr=False)
24
+ _code: object = field(init=False, repr=False)
25
+
26
+ def __post_init__(self):
27
+ if not isinstance(self.source, str):
28
+ raise TypeError("Expression source must be a string.")
29
+ python_expr = translate(self.source)
30
+ object.__setattr__(self, "_python", python_expr)
31
+ object.__setattr__(self, "_code", compile_safely(python_expr))
32
+
33
+ def evaluate(
34
+ self,
35
+ ctx: Optional[Mapping[str, Any]] = None,
36
+ env: Optional[Mapping[str, Any]] = None,
37
+ extras: Optional[Mapping[str, Any]] = None,
38
+ ) -> Any:
39
+ scope = build_eval_scope(ctx or {}, env or {}, extras or {})
40
+ try:
41
+ return eval(self._code, {"__builtins__": {}}, scope) # noqa: S307
42
+ except Exception as exc:
43
+ raise ExpressionEvaluationError(
44
+ f"Failed to evaluate expression '{self.source}': {exc}"
45
+ ) from exc
46
+
47
+
48
+ def ensure_expression(value: Union[str, Expression]) -> Expression:
49
+ if isinstance(value, Expression):
50
+ return value
51
+ if isinstance(value, str):
52
+ return Expression(value.strip())
53
+ raise TypeError(f"Unsupported expression value: {value!r}")
54
+
55
+
56
+ def translate(expr: str) -> str:
57
+ if "_ctx" in expr or "_env" in expr:
58
+ raise ExpressionSyntaxError("Use $ctx/$env references instead of _ctx/_env.")
59
+ def replace_token(match: re.Match[str]) -> str:
60
+ token = match.group(0)
61
+ if token.startswith("$ctx."):
62
+ path = token[len("$ctx.") :]
63
+ return f"_ctx('{path}')"
64
+ if token.startswith("$env."):
65
+ path = token[len("$env.") :]
66
+ return f"_env('{path}')"
67
+ raise ExpressionSyntaxError(f"Unsupported token '{token}'")
68
+
69
+ token_re = re.compile(r"\$(?:ctx|env)\.[A-Za-z_][\w.]*")
70
+ translated = token_re.sub(replace_token, expr.strip())
71
+ if "$" in translated:
72
+ raise ExpressionSyntaxError("All references must use $ctx.xxx or $env.xxx form.")
73
+ return translated
74
+
75
+
76
+ ALLOWED_NODES = {
77
+ ast.Expression,
78
+ ast.BoolOp,
79
+ ast.BinOp,
80
+ ast.UnaryOp,
81
+ ast.Compare,
82
+ ast.And,
83
+ ast.Or,
84
+ ast.Not,
85
+ ast.Eq,
86
+ ast.NotEq,
87
+ ast.Gt,
88
+ ast.GtE,
89
+ ast.Lt,
90
+ ast.LtE,
91
+ ast.In,
92
+ ast.NotIn,
93
+ ast.Add,
94
+ ast.Sub,
95
+ ast.Mult,
96
+ ast.Div,
97
+ ast.Mod,
98
+ ast.Pow,
99
+ ast.USub,
100
+ ast.UAdd,
101
+ ast.Constant,
102
+ ast.Name,
103
+ ast.Load,
104
+ ast.Call,
105
+ }
106
+
107
+
108
+ def compile_safely(python_expr: str):
109
+ try:
110
+ tree = ast.parse(python_expr, mode="eval")
111
+ except SyntaxError as exc:
112
+ raise ExpressionSyntaxError(str(exc)) from exc
113
+
114
+ for node in ast.walk(tree):
115
+ if not isinstance(node, tuple(ALLOWED_NODES)):
116
+ raise ExpressionSyntaxError(f"Unsupported syntax: {type(node).__name__}")
117
+ if isinstance(node, ast.Name) and node.id not in {"_ctx", "_env"}:
118
+ raise ExpressionSyntaxError(f"Unknown identifier '{node.id}' in expression.")
119
+ if isinstance(node, ast.Call):
120
+ if not isinstance(node.func, ast.Name) or node.func.id not in {"_ctx", "_env"}:
121
+ raise ExpressionSyntaxError("Only $ctx/$env references are allowed.")
122
+ if len(node.args) != 1 or not isinstance(node.args[0], ast.Constant):
123
+ raise ExpressionSyntaxError("Context references must be constant strings.")
124
+ return compile(tree, "<expression>", "eval")
125
+
126
+
127
+ def build_eval_scope(
128
+ ctx: Mapping[str, Any], env: Mapping[str, Any], extras: Mapping[str, Any]
129
+ ) -> Dict[str, Callable[[str], Any]]:
130
+ scope = {
131
+ "_ctx": lambda path: resolve_path(ctx, path, "$ctx"),
132
+ "_env": lambda path: resolve_path(env, path, "$env"),
133
+ }
134
+ scope.update(extras)
135
+ return scope
136
+
137
+
138
+ def resolve_path(data: Mapping[str, Any], path: str, root: str):
139
+ if not DOT_PATH_RE.match(path):
140
+ raise ExpressionEvaluationError(f"Invalid path '{path}' for {root}.")
141
+ parts = path.split(".")
142
+ current: Any = data
143
+ for part in parts:
144
+ if isinstance(current, Mapping):
145
+ if part not in current:
146
+ raise ExpressionEvaluationError(f"{root}.{path} not found.")
147
+ current = current[part]
148
+ else:
149
+ if not hasattr(current, part):
150
+ raise ExpressionEvaluationError(f"{root}.{path} not found.")
151
+ current = getattr(current, part)
152
+ return current
153
+
154
+
155
+ __all__ = [
156
+ "Expression",
157
+ "ensure_expression",
158
+ "ExpressionSyntaxError",
159
+ "ExpressionEvaluationError",
160
+ ]
pyoco/dsl/nodes.py ADDED
@@ -0,0 +1,56 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import List, Optional, Sequence as TypingSequence, Union
5
+
6
+ from ..core.models import Task
7
+ from .expressions import Expression, ensure_expression
8
+
9
+
10
+ DEFAULT_CASE_VALUE = "__default__"
11
+
12
+
13
+ class DSLNode:
14
+ """Base class for all DSL AST nodes."""
15
+
16
+
17
+ @dataclass
18
+ class TaskNode(DSLNode):
19
+ task: Task
20
+
21
+
22
+ @dataclass
23
+ class SubFlowNode(DSLNode):
24
+ steps: List[DSLNode] = field(default_factory=list)
25
+
26
+
27
+ @dataclass
28
+ class RepeatNode(DSLNode):
29
+ body: SubFlowNode
30
+ count: Union[int, Expression]
31
+
32
+
33
+ @dataclass
34
+ class ForEachNode(DSLNode):
35
+ body: SubFlowNode
36
+ source: Expression
37
+ alias: Optional[str] = None
38
+
39
+
40
+ @dataclass
41
+ class UntilNode(DSLNode):
42
+ body: SubFlowNode
43
+ condition: Expression
44
+ max_iter: Optional[int] = None
45
+
46
+
47
+ @dataclass
48
+ class CaseNode(DSLNode):
49
+ value: Union[str, int, float, bool]
50
+ target: SubFlowNode
51
+
52
+
53
+ @dataclass
54
+ class SwitchNode(DSLNode):
55
+ expression: Expression
56
+ cases: List[CaseNode] = field(default_factory=list)