processes 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- processes/__init__.py +24 -0
- processes/html_logging.py +201 -0
- processes/process.py +417 -0
- processes/task.py +302 -0
- processes-1.0.2.dist-info/METADATA +155 -0
- processes-1.0.2.dist-info/RECORD +8 -0
- processes-1.0.2.dist-info/WHEEL +4 -0
- processes-1.0.2.dist-info/licenses/LICENSE +21 -0
processes/__init__.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from importlib.metadata import PackageNotFoundError as _pnfe
|
|
2
|
+
from importlib.metadata import version as _v
|
|
3
|
+
|
|
4
|
+
from .html_logging import HTMLSMTPHandler as HTMLSMTPHandler
|
|
5
|
+
from .process import (
|
|
6
|
+
CircularDependencyError as CircularDependencyError,
|
|
7
|
+
)
|
|
8
|
+
from .process import (
|
|
9
|
+
DependencyNotFoundError as DependencyNotFoundError,
|
|
10
|
+
)
|
|
11
|
+
from .process import (
|
|
12
|
+
Process as Process,
|
|
13
|
+
)
|
|
14
|
+
from .process import (
|
|
15
|
+
TaskNotFoundError as TaskNotFoundError,
|
|
16
|
+
)
|
|
17
|
+
from .task import Task as Task
|
|
18
|
+
from .task import TaskDependency as TaskDependency
|
|
19
|
+
from .task import TaskResult as TaskResult
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
__version__ = _v("processes")
|
|
23
|
+
except _pnfe:
|
|
24
|
+
__version__ = "0.0.0-unknown"
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import logging.handlers
|
|
3
|
+
import smtplib
|
|
4
|
+
import ssl
|
|
5
|
+
import traceback
|
|
6
|
+
from email.mime.text import MIMEText
|
|
7
|
+
from email.utils import formatdate
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class HTMLSMTPHandler(logging.handlers.SMTPHandler):
|
|
11
|
+
"""
|
|
12
|
+
A logging handler that sends log records via SMTP as HTML formatted emails.
|
|
13
|
+
|
|
14
|
+
Extends the standard SMTPHandler to support HTML-formatted email messages,
|
|
15
|
+
enabling richer formatting and styling in error notifications.
|
|
16
|
+
|
|
17
|
+
Attributes
|
|
18
|
+
----------
|
|
19
|
+
mailhost : tuple[str, int]
|
|
20
|
+
A tuple of (host, port) for the SMTP server.
|
|
21
|
+
fromaddr : str
|
|
22
|
+
The email address to send messages from.
|
|
23
|
+
toaddrs : list[str]
|
|
24
|
+
List of email addresses to send messages to.
|
|
25
|
+
credentials : tuple[str, str] | None
|
|
26
|
+
A tuple of (username, password) for SMTP authentication. Defaults to None.
|
|
27
|
+
secure : tuple | tuple[str, str] | tuple[str, str, ssl.SSLContext] | None
|
|
28
|
+
Security configuration for SMTP connection. Can be an empty tuple for no security,
|
|
29
|
+
a tuple of (certfile, keyfile), or (certfile, keyfile, SSLContext).
|
|
30
|
+
Defaults to None.
|
|
31
|
+
timeout : int
|
|
32
|
+
Connection timeout in seconds. Defaults to 5.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
mailhost: tuple[str, int],
|
|
38
|
+
fromaddr: str,
|
|
39
|
+
toaddrs: list[str],
|
|
40
|
+
credentials: tuple[str, str] | None = None,
|
|
41
|
+
secure: tuple[()]
|
|
42
|
+
| tuple[str]
|
|
43
|
+
| tuple[str, str]
|
|
44
|
+
| tuple[str, str, ssl.SSLContext]
|
|
45
|
+
| None = None,
|
|
46
|
+
timeout: int = 5,
|
|
47
|
+
):
|
|
48
|
+
self._crd = credentials
|
|
49
|
+
self._sec = secure
|
|
50
|
+
self._to = timeout
|
|
51
|
+
|
|
52
|
+
super().__init__(
|
|
53
|
+
mailhost,
|
|
54
|
+
fromaddr,
|
|
55
|
+
toaddrs,
|
|
56
|
+
"",
|
|
57
|
+
credentials=credentials,
|
|
58
|
+
secure=secure, # type: ignore[arg-type]
|
|
59
|
+
timeout=timeout,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def copy(self) -> "HTMLSMTPHandler":
|
|
63
|
+
"""Create a shallow copy of this handler.
|
|
64
|
+
|
|
65
|
+
Returns
|
|
66
|
+
-------
|
|
67
|
+
HTMLSMTPHandler
|
|
68
|
+
A new HTMLSMTPHandler instance with the same configuration.
|
|
69
|
+
"""
|
|
70
|
+
return HTMLSMTPHandler(
|
|
71
|
+
self.mailhost, # type: ignore[arg-type]
|
|
72
|
+
self.fromaddr,
|
|
73
|
+
self.toaddrs,
|
|
74
|
+
credentials=self._crd,
|
|
75
|
+
secure=self._sec,
|
|
76
|
+
timeout=self._to,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
def __copy__(self) -> "HTMLSMTPHandler":
|
|
80
|
+
"""Support for copy.copy() method.
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
HTMLSMTPHandler
|
|
85
|
+
A shallow copy of this handler.
|
|
86
|
+
"""
|
|
87
|
+
return self.copy()
|
|
88
|
+
|
|
89
|
+
def emit(self, record: logging.LogRecord) -> None:
|
|
90
|
+
"""Send a log record via email as HTML formatted message.
|
|
91
|
+
|
|
92
|
+
Formats the log record using the handler's formatter and sends it
|
|
93
|
+
as an HTML-formatted email. Errors during sending are handled gracefully.
|
|
94
|
+
|
|
95
|
+
Parameters
|
|
96
|
+
----------
|
|
97
|
+
record : logging.LogRecord
|
|
98
|
+
The log record to send.
|
|
99
|
+
"""
|
|
100
|
+
try:
|
|
101
|
+
port = self.mailport
|
|
102
|
+
if not port:
|
|
103
|
+
port = smtplib.SMTP_PORT
|
|
104
|
+
smtp = smtplib.SMTP(self.mailhost, port)
|
|
105
|
+
msg = self.format(record)
|
|
106
|
+
|
|
107
|
+
# Create MIMEText object with HTML content
|
|
108
|
+
mime_msg = MIMEText(msg, "html")
|
|
109
|
+
mime_msg["From"] = self.fromaddr
|
|
110
|
+
mime_msg["To"] = ",".join(self.toaddrs)
|
|
111
|
+
mime_msg["Subject"] = self.getSubject(record)
|
|
112
|
+
mime_msg["Date"] = formatdate()
|
|
113
|
+
|
|
114
|
+
if self.username:
|
|
115
|
+
if self.secure is not None:
|
|
116
|
+
smtp.starttls(*self.secure)
|
|
117
|
+
smtp.login(self.username, self.password)
|
|
118
|
+
smtp.sendmail(self.fromaddr, self.toaddrs, mime_msg.as_string())
|
|
119
|
+
smtp.quit()
|
|
120
|
+
except Exception:
|
|
121
|
+
self.handleError(record)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class ExceptionHTMLFormatter(logging.Formatter):
|
|
125
|
+
"""
|
|
126
|
+
A logging formatter that converts exception records to HTML format.
|
|
127
|
+
|
|
128
|
+
Formats exception tracebacks with syntax-highlighted HTML styling and
|
|
129
|
+
supports custom post-traceback content. Provides visually appealing
|
|
130
|
+
exception reports suitable for email delivery.
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
134
|
+
"""Format a log record as HTML, with special handling for exceptions.
|
|
135
|
+
|
|
136
|
+
Extracts exception information and traceback, formats them with HTML
|
|
137
|
+
styling, and includes any additional post-traceback content from the
|
|
138
|
+
log record's `post_traceback_html_body` attribute.
|
|
139
|
+
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
record : logging.LogRecord
|
|
143
|
+
The log record to format.
|
|
144
|
+
|
|
145
|
+
Returns
|
|
146
|
+
-------
|
|
147
|
+
str
|
|
148
|
+
HTML-formatted string containing exception details, traceback,
|
|
149
|
+
and styling.
|
|
150
|
+
"""
|
|
151
|
+
# Format the exception details and traceback
|
|
152
|
+
if record.exc_info:
|
|
153
|
+
exception_object = record.exc_info[1]
|
|
154
|
+
exception = str(exception_object)
|
|
155
|
+
tb_str = traceback.format_exc()
|
|
156
|
+
else:
|
|
157
|
+
exception = record.getMessage()
|
|
158
|
+
tb_str = "No traceback available"
|
|
159
|
+
|
|
160
|
+
post_traceback_html_body = getattr(record, "post_traceback_html_body", "")
|
|
161
|
+
|
|
162
|
+
# HTML content
|
|
163
|
+
tb_str = tb_str.replace("\n", "<br>")
|
|
164
|
+
body = f"""
|
|
165
|
+
<html>
|
|
166
|
+
<head>
|
|
167
|
+
<style>
|
|
168
|
+
body {{
|
|
169
|
+
font-family: Arial, sans-serif;
|
|
170
|
+
margin: 20px;
|
|
171
|
+
color: #333;
|
|
172
|
+
}}
|
|
173
|
+
h2 {{
|
|
174
|
+
color: #d9534f;
|
|
175
|
+
}}
|
|
176
|
+
.exception {{
|
|
177
|
+
font-weight: bold;
|
|
178
|
+
color: #d9534f;
|
|
179
|
+
}}
|
|
180
|
+
.traceback {{
|
|
181
|
+
background-color: #f9f2f4;
|
|
182
|
+
border: 1px solid #d9534f;
|
|
183
|
+
padding: 10px;
|
|
184
|
+
font-family: 'Courier New', Courier, monospace;
|
|
185
|
+
white-space: pre-wrap;
|
|
186
|
+
color: #333;
|
|
187
|
+
border-radius: 4px;
|
|
188
|
+
}}
|
|
189
|
+
</style>
|
|
190
|
+
</head>
|
|
191
|
+
<body>
|
|
192
|
+
<h2>Exception Details</h2>
|
|
193
|
+
<p class="exception">Exception: {exception}</p>
|
|
194
|
+
<p><strong>Traceback:</strong></p>
|
|
195
|
+
<div class="traceback">{tb_str}</div>
|
|
196
|
+
<br>
|
|
197
|
+
{post_traceback_html_body}
|
|
198
|
+
</body>
|
|
199
|
+
</html>
|
|
200
|
+
"""
|
|
201
|
+
return body
|
processes/process.py
ADDED
|
@@ -0,0 +1,417 @@
|
|
|
1
|
+
import concurrent.futures
|
|
2
|
+
from types import TracebackType
|
|
3
|
+
from typing import Literal, Self
|
|
4
|
+
|
|
5
|
+
from .task import Task, TaskResult
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DependencyNotFoundError(Exception):
|
|
9
|
+
"""Raised when a task depends on a non-existent task."""
|
|
10
|
+
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TaskNotFoundError(Exception):
|
|
15
|
+
"""Raised when attempting to retrieve a task that does not exist in the process."""
|
|
16
|
+
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CircularDependencyError(Exception):
|
|
21
|
+
"""Raised when circular dependencies are detected among tasks."""
|
|
22
|
+
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ProcessResult:
|
|
27
|
+
"""
|
|
28
|
+
Container for the results of a process execution.
|
|
29
|
+
|
|
30
|
+
Holds the outcomes of all tasks executed in a process, separating successful
|
|
31
|
+
and failed tasks with their respective results.
|
|
32
|
+
|
|
33
|
+
Attributes
|
|
34
|
+
----------
|
|
35
|
+
passed_tasks_results : dict[str, TaskResult]
|
|
36
|
+
Mapping of task names to TaskResult objects for all tasks that executed successfully.
|
|
37
|
+
failed_tasks : set[str]
|
|
38
|
+
Set of task names for all tasks that failed during execution.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self, passed_tasks_results: dict[str, TaskResult], failed_tasks: set[str]):
|
|
42
|
+
self.passed_tasks_results = passed_tasks_results
|
|
43
|
+
self.failed_tasks = failed_tasks
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class Process:
|
|
47
|
+
"""
|
|
48
|
+
Manages and executes a collection of interdependent tasks.
|
|
49
|
+
|
|
50
|
+
A Process orchestrates the execution of multiple tasks, handling dependency
|
|
51
|
+
resolution, task ordering. Task execution can be performed in parallel or sequentially. It
|
|
52
|
+
provides logging management and error propagation for dependent tasks. If a task fails,
|
|
53
|
+
all tasks depending on it are marked as failed without execution, but non-dependent tasks
|
|
54
|
+
continue to run.
|
|
55
|
+
|
|
56
|
+
Attributes
|
|
57
|
+
----------
|
|
58
|
+
tasks : list[Task]
|
|
59
|
+
List of tasks to be executed, automatically sorted by dependencies.
|
|
60
|
+
runner : ProcessRunner
|
|
61
|
+
The runner responsible for executing the tasks.
|
|
62
|
+
|
|
63
|
+
Raises
|
|
64
|
+
------
|
|
65
|
+
TypeError
|
|
66
|
+
If tasks is not a list or contains non-Task elements.
|
|
67
|
+
ValueError
|
|
68
|
+
If duplicate task names are found.
|
|
69
|
+
DependencyNotFoundError
|
|
70
|
+
If a task depends on a non-existent task.
|
|
71
|
+
CircularDependencyError
|
|
72
|
+
If circular dependencies are detected among tasks.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(self, tasks: list[Task]):
|
|
76
|
+
self.tasks = tasks
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
self._check_input_types()
|
|
80
|
+
self._check_duplicate_names()
|
|
81
|
+
self._check_dependencies_exist()
|
|
82
|
+
self._topological_sort()
|
|
83
|
+
except Exception as e:
|
|
84
|
+
self.close_loggers()
|
|
85
|
+
raise e
|
|
86
|
+
self.runner = ProcessRunner(self)
|
|
87
|
+
|
|
88
|
+
def __enter__(self) -> Self:
|
|
89
|
+
"""Called when entering the 'with' block."""
|
|
90
|
+
return self
|
|
91
|
+
|
|
92
|
+
def __exit__(
|
|
93
|
+
self,
|
|
94
|
+
exc_type: type[BaseException] | None,
|
|
95
|
+
exc_value: BaseException | None,
|
|
96
|
+
traceback: TracebackType | None,
|
|
97
|
+
) -> Literal[False]:
|
|
98
|
+
"""Called when exiting the 'with' block, even if an error occurred."""
|
|
99
|
+
self.close_loggers()
|
|
100
|
+
return False
|
|
101
|
+
|
|
102
|
+
def _check_input_types(self) -> None:
|
|
103
|
+
"""Validate that tasks is a list containing only Task objects.
|
|
104
|
+
|
|
105
|
+
Raises
|
|
106
|
+
------
|
|
107
|
+
TypeError
|
|
108
|
+
If tasks is not a list or contains non-Task elements.
|
|
109
|
+
"""
|
|
110
|
+
if not isinstance(self.tasks, list):
|
|
111
|
+
raise TypeError(f"tasks must be list. Got {type(self.tasks)}")
|
|
112
|
+
for task in self.tasks:
|
|
113
|
+
if not isinstance(task, Task):
|
|
114
|
+
raise TypeError(f"task must be Task. Got {type(task)}")
|
|
115
|
+
|
|
116
|
+
def _check_duplicate_names(self) -> None:
|
|
117
|
+
"""Verify that all task names are unique.
|
|
118
|
+
|
|
119
|
+
Raises
|
|
120
|
+
------
|
|
121
|
+
ValueError
|
|
122
|
+
If duplicate task names are found.
|
|
123
|
+
"""
|
|
124
|
+
names = set()
|
|
125
|
+
for task in self.tasks:
|
|
126
|
+
if task.name in names:
|
|
127
|
+
raise ValueError(f"Duplicate task name: {task.name}")
|
|
128
|
+
names.add(task.name)
|
|
129
|
+
|
|
130
|
+
def _check_dependencies_exist(self) -> None:
|
|
131
|
+
"""Verify that all task dependencies refer to existing tasks.
|
|
132
|
+
|
|
133
|
+
Raises
|
|
134
|
+
------
|
|
135
|
+
DependencyNotFoundError
|
|
136
|
+
If a task depends on a non-existent task.
|
|
137
|
+
"""
|
|
138
|
+
names = {t.name for t in self.tasks}
|
|
139
|
+
for task in self.tasks:
|
|
140
|
+
for dep in task.get_dependencies_names():
|
|
141
|
+
if dep not in names:
|
|
142
|
+
raise DependencyNotFoundError(
|
|
143
|
+
f"Task {task.name} depends on missing task: {dep}"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
def _topological_sort(self) -> None:
|
|
147
|
+
"""Sort tasks based on dependencies using Kahn's Algorithm in O(V+E) time.
|
|
148
|
+
|
|
149
|
+
Reorders the task list so that dependencies are always executed before
|
|
150
|
+
tasks that depend on them.
|
|
151
|
+
|
|
152
|
+
Raises
|
|
153
|
+
------
|
|
154
|
+
CircularDependencyError
|
|
155
|
+
If circular dependencies are detected among tasks.
|
|
156
|
+
"""
|
|
157
|
+
in_degree = {t.name: 0 for t in self.tasks}
|
|
158
|
+
graph: dict[str, list[str]] = {t.name: [] for t in self.tasks}
|
|
159
|
+
task_map = {t.name: t for t in self.tasks}
|
|
160
|
+
|
|
161
|
+
for task in self.tasks:
|
|
162
|
+
for dep in task.dependencies:
|
|
163
|
+
graph[dep.task_name].append(task.name)
|
|
164
|
+
in_degree[task.name] += 1
|
|
165
|
+
|
|
166
|
+
queue = [name for name, deg in in_degree.items() if deg == 0]
|
|
167
|
+
sorted_tasks = []
|
|
168
|
+
|
|
169
|
+
while queue:
|
|
170
|
+
u = queue.pop(0)
|
|
171
|
+
sorted_tasks.append(task_map[u])
|
|
172
|
+
for v in graph[u]:
|
|
173
|
+
in_degree[v] -= 1
|
|
174
|
+
if in_degree[v] == 0:
|
|
175
|
+
queue.append(v)
|
|
176
|
+
|
|
177
|
+
if len(sorted_tasks) != len(self.tasks):
|
|
178
|
+
raise CircularDependencyError("Circular dependency detected.")
|
|
179
|
+
self.tasks = sorted_tasks
|
|
180
|
+
|
|
181
|
+
def get_task(self, task_name: str) -> Task:
|
|
182
|
+
"""Retrieve a task by name.
|
|
183
|
+
|
|
184
|
+
Parameters
|
|
185
|
+
----------
|
|
186
|
+
task_name : str
|
|
187
|
+
The name of the task to retrieve.
|
|
188
|
+
|
|
189
|
+
Returns
|
|
190
|
+
-------
|
|
191
|
+
Task
|
|
192
|
+
The task with the specified name.
|
|
193
|
+
|
|
194
|
+
Raises
|
|
195
|
+
------
|
|
196
|
+
TaskNotFoundError
|
|
197
|
+
If no task with the given name exists.
|
|
198
|
+
"""
|
|
199
|
+
for task in self.tasks:
|
|
200
|
+
if task.name == task_name:
|
|
201
|
+
return task
|
|
202
|
+
raise TaskNotFoundError(f"Task not found: {task_name}")
|
|
203
|
+
|
|
204
|
+
def run(self, parallel: bool | None = None, max_workers: int = 4) -> ProcessResult:
|
|
205
|
+
"""Execute all tasks in the process.
|
|
206
|
+
|
|
207
|
+
Runs tasks sequentially or in parallel while respecting dependencies.
|
|
208
|
+
Dependencies are always resolved before dependent tasks are executed.
|
|
209
|
+
|
|
210
|
+
Parameters
|
|
211
|
+
----------
|
|
212
|
+
parallel : bool, optional
|
|
213
|
+
Whether to run tasks in parallel while respecting dependencies.
|
|
214
|
+
If None, automatically set to True for processes with 10 or more tasks,
|
|
215
|
+
False otherwise. Defaults to None.
|
|
216
|
+
max_workers : int, optional
|
|
217
|
+
Maximum number of worker threads for parallel execution. Defaults to 4.
|
|
218
|
+
Only used when parallel=True. If set to 1, falls back to sequential execution.
|
|
219
|
+
|
|
220
|
+
Returns
|
|
221
|
+
-------
|
|
222
|
+
ProcessResult
|
|
223
|
+
Contains passed_tasks_results (dict mapping task names to TaskResult)
|
|
224
|
+
and failed_tasks (set of task names that failed).
|
|
225
|
+
"""
|
|
226
|
+
if parallel is None:
|
|
227
|
+
parallel = len(self.tasks) >= 10
|
|
228
|
+
|
|
229
|
+
max_workers = max(1, max_workers)
|
|
230
|
+
if parallel:
|
|
231
|
+
if max_workers == 1:
|
|
232
|
+
parallel = False # Fallback to sequential if only one worker
|
|
233
|
+
process_result = self.runner.run(parallel, max_workers)
|
|
234
|
+
return process_result
|
|
235
|
+
|
|
236
|
+
def get_dependant_tasks(self, task_name: str) -> list[Task]:
|
|
237
|
+
"""Retrieve all tasks that directly or indirectly depend on a given task.
|
|
238
|
+
|
|
239
|
+
Parameters
|
|
240
|
+
----------
|
|
241
|
+
task_name : str
|
|
242
|
+
The name of the task to find dependants for.
|
|
243
|
+
|
|
244
|
+
Returns
|
|
245
|
+
-------
|
|
246
|
+
list[Task]
|
|
247
|
+
List of all tasks that depend on the specified task, including
|
|
248
|
+
transitive dependencies (tasks that depend on tasks that depend
|
|
249
|
+
on the specified task).
|
|
250
|
+
"""
|
|
251
|
+
found = []
|
|
252
|
+
|
|
253
|
+
def find(name: str) -> None:
|
|
254
|
+
for t in self.tasks:
|
|
255
|
+
if name in t.get_dependencies_names() and t not in found:
|
|
256
|
+
found.append(t)
|
|
257
|
+
find(t.name)
|
|
258
|
+
|
|
259
|
+
find(task_name)
|
|
260
|
+
return found
|
|
261
|
+
|
|
262
|
+
def close_loggers(self) -> None:
|
|
263
|
+
"""Close and clean up all logger handlers for all tasks.
|
|
264
|
+
|
|
265
|
+
Should be called when the process is done to ensure proper resource cleanup.
|
|
266
|
+
"""
|
|
267
|
+
for task in self.tasks:
|
|
268
|
+
for handler in task.logger.handlers:
|
|
269
|
+
handler.close()
|
|
270
|
+
task.logger.removeHandler(handler)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
class ProcessRunner:
|
|
274
|
+
"""
|
|
275
|
+
Executes tasks in a Process, handling both sequential and parallel execution.
|
|
276
|
+
|
|
277
|
+
Manages task execution state, tracks passed and failed tasks, and coordinates
|
|
278
|
+
dependencies during execution.
|
|
279
|
+
|
|
280
|
+
Attributes
|
|
281
|
+
----------
|
|
282
|
+
process : Process
|
|
283
|
+
Reference to the parent Process being executed.
|
|
284
|
+
passed_results : dict[str, TaskResult]
|
|
285
|
+
Results from successfully executed tasks.
|
|
286
|
+
failed_tasks : set[str]
|
|
287
|
+
Names of tasks that failed during execution.
|
|
288
|
+
submitted_tasks : set[str]
|
|
289
|
+
Names of tasks that have been submitted for execution.
|
|
290
|
+
"""
|
|
291
|
+
|
|
292
|
+
def __init__(self, process_ref: Process):
|
|
293
|
+
self.process = process_ref
|
|
294
|
+
self.passed_results: dict[str, TaskResult] = {}
|
|
295
|
+
self.failed_tasks: set[str] = set()
|
|
296
|
+
self.submitted_tasks: set[str] = set()
|
|
297
|
+
|
|
298
|
+
def run(self, parallel: bool, max_workers: int) -> ProcessResult:
|
|
299
|
+
"""Execute all tasks in the process using the specified execution mode.
|
|
300
|
+
|
|
301
|
+
Parameters
|
|
302
|
+
----------
|
|
303
|
+
parallel : bool
|
|
304
|
+
If True, execute tasks in parallel; otherwise execute sequentially.
|
|
305
|
+
max_workers : int
|
|
306
|
+
Maximum number of worker threads for parallel execution.
|
|
307
|
+
|
|
308
|
+
Returns
|
|
309
|
+
-------
|
|
310
|
+
ProcessResult
|
|
311
|
+
The combined results of all task executions.
|
|
312
|
+
"""
|
|
313
|
+
if parallel:
|
|
314
|
+
self._run_parallel(max_workers)
|
|
315
|
+
else:
|
|
316
|
+
self._run_sequential()
|
|
317
|
+
return ProcessResult(self.passed_results, self.failed_tasks)
|
|
318
|
+
|
|
319
|
+
def _is_unrunnable(self, task: Task) -> bool:
|
|
320
|
+
"""Check if a task cannot be run due to failed dependencies.
|
|
321
|
+
|
|
322
|
+
Parameters
|
|
323
|
+
----------
|
|
324
|
+
task : Task
|
|
325
|
+
The task to check.
|
|
326
|
+
|
|
327
|
+
Returns
|
|
328
|
+
-------
|
|
329
|
+
bool
|
|
330
|
+
True if any of the task's dependencies have failed, False otherwise.
|
|
331
|
+
If True, the task is also marked as failed.
|
|
332
|
+
"""
|
|
333
|
+
if any(d.task_name in self.failed_tasks for d in task.dependencies):
|
|
334
|
+
self.failed_tasks.add(task.name) # Propagate failure
|
|
335
|
+
return True
|
|
336
|
+
return False
|
|
337
|
+
|
|
338
|
+
def _all_deps_met(self, task: Task) -> bool:
|
|
339
|
+
"""Check if all dependencies of a task have been successfully executed.
|
|
340
|
+
|
|
341
|
+
Parameters
|
|
342
|
+
----------
|
|
343
|
+
task : Task
|
|
344
|
+
The task to check.
|
|
345
|
+
|
|
346
|
+
Returns
|
|
347
|
+
-------
|
|
348
|
+
bool
|
|
349
|
+
True if all dependencies have passed, False otherwise.
|
|
350
|
+
"""
|
|
351
|
+
return all(d.task_name in self.passed_results for d in task.dependencies)
|
|
352
|
+
|
|
353
|
+
def _run_sequential(self) -> None:
|
|
354
|
+
"""Execute all tasks sequentially in dependency order."""
|
|
355
|
+
for task in self.process.tasks:
|
|
356
|
+
if self._is_unrunnable(task):
|
|
357
|
+
continue
|
|
358
|
+
if self._all_deps_met(task):
|
|
359
|
+
res = task.run(self.process)
|
|
360
|
+
if res.worked:
|
|
361
|
+
self.passed_results[task.name] = res
|
|
362
|
+
else:
|
|
363
|
+
self.failed_tasks.add(task.name)
|
|
364
|
+
|
|
365
|
+
def _run_parallel(self, max_workers: int) -> None:
|
|
366
|
+
"""Execute tasks in parallel using a thread pool while respecting dependencies.
|
|
367
|
+
|
|
368
|
+
Parameters
|
|
369
|
+
----------
|
|
370
|
+
max_workers : int
|
|
371
|
+
Maximum number of worker threads to use.
|
|
372
|
+
|
|
373
|
+
Raises
|
|
374
|
+
------
|
|
375
|
+
RuntimeError
|
|
376
|
+
If execution stalls with no candidates ready and no tasks running.
|
|
377
|
+
"""
|
|
378
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
379
|
+
fut_to_name = {}
|
|
380
|
+
while len(self.passed_results) + len(self.failed_tasks) < len(self.process.tasks):
|
|
381
|
+
# Look for candidates to execute now
|
|
382
|
+
candidates = [
|
|
383
|
+
t
|
|
384
|
+
for t in self.process.tasks
|
|
385
|
+
if t.name not in self.submitted_tasks
|
|
386
|
+
and t.name not in self.failed_tasks
|
|
387
|
+
and not self._is_unrunnable(t)
|
|
388
|
+
and self._all_deps_met(t)
|
|
389
|
+
]
|
|
390
|
+
|
|
391
|
+
# Send tasks for execution and register as Task as submitted
|
|
392
|
+
for task in candidates:
|
|
393
|
+
fut = executor.submit(task.run, self.process)
|
|
394
|
+
fut_to_name[fut] = task.name
|
|
395
|
+
self.submitted_tasks.add(task.name)
|
|
396
|
+
|
|
397
|
+
# If there are tasks pending, wait. As soon one is completed,
|
|
398
|
+
# save as passed or failed and remove from futures.
|
|
399
|
+
if fut_to_name:
|
|
400
|
+
done, _ = concurrent.futures.wait(
|
|
401
|
+
fut_to_name.keys(), return_when="FIRST_COMPLETED"
|
|
402
|
+
)
|
|
403
|
+
for fut in done:
|
|
404
|
+
name = fut_to_name.pop(fut)
|
|
405
|
+
try:
|
|
406
|
+
res = fut.result()
|
|
407
|
+
if res.worked:
|
|
408
|
+
self.passed_results[name] = res
|
|
409
|
+
else:
|
|
410
|
+
self.failed_tasks.add(name)
|
|
411
|
+
except Exception:
|
|
412
|
+
self.failed_tasks.add(name)
|
|
413
|
+
else:
|
|
414
|
+
# No candidates and no running tasks - likely a deadlock or logic error
|
|
415
|
+
raise RuntimeError(
|
|
416
|
+
"Parallel execution stalled: no candidates found and no tasks running"
|
|
417
|
+
)
|
processes/task.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from .process import Process
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
|
|
11
|
+
from .html_logging import ExceptionHTMLFormatter, HTMLSMTPHandler
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TaskResult:
|
|
15
|
+
"""
|
|
16
|
+
Container for the result of a task execution.
|
|
17
|
+
|
|
18
|
+
Holds the outcome of running a task, including whether it succeeded,
|
|
19
|
+
its return value, and any exception that occurred.
|
|
20
|
+
|
|
21
|
+
Attributes
|
|
22
|
+
----------
|
|
23
|
+
worked : bool
|
|
24
|
+
True if the task executed successfully, False if an exception occurred.
|
|
25
|
+
result : Any
|
|
26
|
+
The return value of the task's function if execution succeeded, None if failed.
|
|
27
|
+
exception : Exception | None
|
|
28
|
+
The exception object if execution failed, None if successful.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, worked: bool, result: Any, exception: Exception | None):
|
|
32
|
+
self.worked = worked
|
|
33
|
+
self.result = result
|
|
34
|
+
self.exception = exception
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class TaskDependency:
|
|
38
|
+
"""
|
|
39
|
+
Represents a dependency relationship between tasks.
|
|
40
|
+
|
|
41
|
+
Defines how a task depends on another task, including how the result
|
|
42
|
+
of the dependency should be passed to the dependent task (as additional
|
|
43
|
+
positional arguments, keyword arguments, or both).
|
|
44
|
+
|
|
45
|
+
Attributes
|
|
46
|
+
----------
|
|
47
|
+
task_name : str
|
|
48
|
+
The name of the task this dependency refers to.
|
|
49
|
+
use_result_as_additional_args : bool
|
|
50
|
+
If True, the result of the dependency task will be passed as an
|
|
51
|
+
additional positional argument as the last argument. Defaults to False.
|
|
52
|
+
use_result_as_additional_kwargs : bool
|
|
53
|
+
If True, the result of the dependency task will be passed as a
|
|
54
|
+
keyword argument. Defaults to False.
|
|
55
|
+
additional_kwarg_name : str | None
|
|
56
|
+
The name of the keyword argument to use if use_result_as_additional_kwargs
|
|
57
|
+
is True. Required when use_result_as_additional_kwargs is True.
|
|
58
|
+
Defaults to None.
|
|
59
|
+
|
|
60
|
+
Raises
|
|
61
|
+
------
|
|
62
|
+
TypeError
|
|
63
|
+
If any parameter type is invalid or if use_result_as_additional_kwargs
|
|
64
|
+
is True but additional_kwarg_name is not a string.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
task_name: str,
|
|
70
|
+
use_result_as_additional_args: bool = False,
|
|
71
|
+
use_result_as_additional_kwargs: bool = False,
|
|
72
|
+
additional_kwarg_name: str = "",
|
|
73
|
+
):
|
|
74
|
+
self.task_name = task_name
|
|
75
|
+
self.use_result_as_additional_args = use_result_as_additional_args
|
|
76
|
+
self.use_result_as_additional_kwargs = use_result_as_additional_kwargs
|
|
77
|
+
self.additional_kwarg_name = additional_kwarg_name
|
|
78
|
+
|
|
79
|
+
if not isinstance(self.task_name, str):
|
|
80
|
+
raise TypeError(f"task_name must be of type str. Got {type(self.task_name)}")
|
|
81
|
+
if not isinstance(self.use_result_as_additional_args, bool):
|
|
82
|
+
raise TypeError(
|
|
83
|
+
f"use_result_as_additional_args must be of type bool. "
|
|
84
|
+
f"Got {type(self.use_result_as_additional_args)}"
|
|
85
|
+
)
|
|
86
|
+
if not isinstance(self.use_result_as_additional_kwargs, bool):
|
|
87
|
+
raise TypeError(
|
|
88
|
+
f"use_result_as_additional_kwargs must be of type bool. "
|
|
89
|
+
f"Got {type(self.use_result_as_additional_kwargs)}"
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
if self.use_result_as_additional_kwargs and self.additional_kwarg_name == "":
|
|
93
|
+
raise TypeError(
|
|
94
|
+
"If use_result_as_additional_kwargs is True, additional_kwarg_name"
|
|
95
|
+
" must be a non-empty string."
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def __hash__(self) -> int:
|
|
99
|
+
"""
|
|
100
|
+
Return hash of the dependency based on task name.
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
int
|
|
105
|
+
Hash value based on the task_name attribute.
|
|
106
|
+
"""
|
|
107
|
+
return hash(self.task_name)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class Task:
|
|
111
|
+
"""
|
|
112
|
+
A Task represents a unit of work to be executed within a Process.
|
|
113
|
+
|
|
114
|
+
A Task encapsulates a callable function with its arguments, dependencies on other tasks,
|
|
115
|
+
and logging configuration. Tasks can be executed, by the Process class, sequentially
|
|
116
|
+
or in parallel, with automatic dependency resolution and result passing between dependent tasks.
|
|
117
|
+
|
|
118
|
+
Attributes
|
|
119
|
+
----------
|
|
120
|
+
name : str
|
|
121
|
+
Unique name for the task (cannot contain spaces).
|
|
122
|
+
log_path : str
|
|
123
|
+
File path where task logs will be written.
|
|
124
|
+
func : Callable
|
|
125
|
+
The function to execute when the task runs.
|
|
126
|
+
args : tuple
|
|
127
|
+
Positional arguments to pass to the function. Defaults to empty tuple.
|
|
128
|
+
kwargs : dict
|
|
129
|
+
Keyword arguments to pass to the function. Defaults to empty dict.
|
|
130
|
+
dependencies : list[TaskDependency]
|
|
131
|
+
List of tasks this task depends on. Defaults to empty list.
|
|
132
|
+
html_mail_handler : HTMLSMTPHandler, optional
|
|
133
|
+
Handler for sending error logs via email in HTML format. Defaults to None.
|
|
134
|
+
logger : logging.Logger
|
|
135
|
+
Logger instance for this task, automatically configured.
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
kwargs: dict[str, Any]
|
|
139
|
+
dependencies: list[TaskDependency]
|
|
140
|
+
|
|
141
|
+
def __init__(
|
|
142
|
+
self,
|
|
143
|
+
name: str,
|
|
144
|
+
log_path: str,
|
|
145
|
+
func: Callable[..., Any],
|
|
146
|
+
args: tuple[Any, ...] = (),
|
|
147
|
+
kwargs: dict[str, Any] | None = None,
|
|
148
|
+
dependencies: list[TaskDependency] | None = None,
|
|
149
|
+
html_mail_handler: HTMLSMTPHandler | None = None,
|
|
150
|
+
):
|
|
151
|
+
self.name = name
|
|
152
|
+
self.log_path = log_path
|
|
153
|
+
self.func = func
|
|
154
|
+
self.args = args
|
|
155
|
+
self.html_mail_handler = html_mail_handler
|
|
156
|
+
|
|
157
|
+
if kwargs is None:
|
|
158
|
+
self.kwargs = {}
|
|
159
|
+
else:
|
|
160
|
+
self.kwargs = kwargs
|
|
161
|
+
if dependencies is None:
|
|
162
|
+
self.dependencies = []
|
|
163
|
+
else:
|
|
164
|
+
self.dependencies = dependencies
|
|
165
|
+
|
|
166
|
+
self._check_input_types()
|
|
167
|
+
if " " in self.name:
|
|
168
|
+
raise ValueError(f"Task name cannot contain spaces. Got {self.name}")
|
|
169
|
+
|
|
170
|
+
depedencies_names = []
|
|
171
|
+
for dependency in self.dependencies:
|
|
172
|
+
if dependency.task_name in depedencies_names:
|
|
173
|
+
raise ValueError(f"Duplicate dependency name: {dependency.task_name}")
|
|
174
|
+
depedencies_names.append(dependency.task_name)
|
|
175
|
+
if dependency.task_name == self.name:
|
|
176
|
+
raise ValueError(
|
|
177
|
+
f"Got dependency with same name as Task. "
|
|
178
|
+
f"Task: {self.name}. Dependency: {dependency.task_name}"
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
logger = logging.getLogger(self.name)
|
|
182
|
+
logger.setLevel(logging.DEBUG)
|
|
183
|
+
if logger.hasHandlers():
|
|
184
|
+
logger.handlers.clear()
|
|
185
|
+
|
|
186
|
+
file_handler = logging.FileHandler(self.log_path)
|
|
187
|
+
file_handler.setLevel(logging.INFO)
|
|
188
|
+
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
|
189
|
+
file_handler.setFormatter(formatter)
|
|
190
|
+
logger.addHandler(file_handler)
|
|
191
|
+
|
|
192
|
+
if self.html_mail_handler is not None:
|
|
193
|
+
_html_mail_handler = self.html_mail_handler.copy()
|
|
194
|
+
_html_mail_handler.setFormatter(ExceptionHTMLFormatter())
|
|
195
|
+
_html_mail_handler.setLevel(logging.ERROR)
|
|
196
|
+
_html_mail_handler.subject = f"Error in task {self.name}"
|
|
197
|
+
logger.addHandler(_html_mail_handler)
|
|
198
|
+
|
|
199
|
+
self.logger = logger
|
|
200
|
+
|
|
201
|
+
def _check_input_types(self) -> None:
|
|
202
|
+
"""
|
|
203
|
+
Validates all input parameter types.
|
|
204
|
+
|
|
205
|
+
Raises
|
|
206
|
+
------
|
|
207
|
+
TypeError
|
|
208
|
+
If any parameter is not of the expected type.
|
|
209
|
+
"""
|
|
210
|
+
if not callable(self.func):
|
|
211
|
+
raise TypeError(f"func must be callable. Got {type(self.func)}")
|
|
212
|
+
|
|
213
|
+
if not isinstance(self.args, tuple):
|
|
214
|
+
raise TypeError(f"args must be tuple. Got {type(self.args)}")
|
|
215
|
+
|
|
216
|
+
if not isinstance(self.kwargs, dict):
|
|
217
|
+
raise TypeError(f"kwargs must be dict. Got {type(self.kwargs)}")
|
|
218
|
+
|
|
219
|
+
if self.html_mail_handler is not None and not isinstance(
|
|
220
|
+
self.html_mail_handler, HTMLSMTPHandler
|
|
221
|
+
):
|
|
222
|
+
raise TypeError(
|
|
223
|
+
f"mail_cfg must be of type HTMLSMTPHandler. Got {type(self.html_mail_handler)}"
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
if not isinstance(self.dependencies, list):
|
|
227
|
+
raise TypeError(f"dependencies must be list. Got {type(self.dependencies)}")
|
|
228
|
+
|
|
229
|
+
for dependency in self.dependencies:
|
|
230
|
+
if not isinstance(dependency, TaskDependency):
|
|
231
|
+
raise TypeError(
|
|
232
|
+
f"dependency must be of type TaskDependency. Got {type(dependency)}"
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
def get_dependencies_names(self) -> set[str]:
|
|
236
|
+
"""
|
|
237
|
+
Get the names of all tasks this task depends on.
|
|
238
|
+
|
|
239
|
+
Returns
|
|
240
|
+
-------
|
|
241
|
+
set[str]
|
|
242
|
+
Set of dependency task names.
|
|
243
|
+
"""
|
|
244
|
+
return {dependency.task_name for dependency in self.dependencies}
|
|
245
|
+
|
|
246
|
+
def run(self, executing_process: Process | None = None) -> TaskResult:
|
|
247
|
+
"""
|
|
248
|
+
Execute the task's function with its arguments and dependencies.
|
|
249
|
+
|
|
250
|
+
This method runs the task's function, automatically injecting results from
|
|
251
|
+
dependent tasks as specified in the dependency configuration. Logs the task
|
|
252
|
+
execution and captures any exceptions.
|
|
253
|
+
|
|
254
|
+
Parameters
|
|
255
|
+
----------
|
|
256
|
+
executing_process : Process, optional
|
|
257
|
+
The parent Process executing this task. Used to retrieve results from
|
|
258
|
+
dependent tasks. Defaults to None.
|
|
259
|
+
|
|
260
|
+
Returns
|
|
261
|
+
-------
|
|
262
|
+
TaskResult
|
|
263
|
+
Object containing:
|
|
264
|
+
- worked (bool): True if execution succeeded, False otherwise.
|
|
265
|
+
- result: The return value of the function if successful, None if failed.
|
|
266
|
+
- exception (Exception | None): The exception raised if execution failed,
|
|
267
|
+
None if successful.
|
|
268
|
+
"""
|
|
269
|
+
final_args = list(self.args) # Start with original positional args
|
|
270
|
+
final_kwargs = self.kwargs.copy() # Start with original keyword args
|
|
271
|
+
|
|
272
|
+
if executing_process is not None:
|
|
273
|
+
for dep in self.dependencies:
|
|
274
|
+
dep_result = executing_process.runner.passed_results[dep.task_name].result
|
|
275
|
+
if dep.use_result_as_additional_args:
|
|
276
|
+
final_args.append(dep_result)
|
|
277
|
+
if dep.use_result_as_additional_kwargs:
|
|
278
|
+
final_kwargs[dep.additional_kwarg_name] = dep_result
|
|
279
|
+
|
|
280
|
+
try:
|
|
281
|
+
self.logger.info(f"Starting {self.name}.")
|
|
282
|
+
result = self.func(*final_args, **final_kwargs)
|
|
283
|
+
self.logger.info(f"Finished {self.name}.")
|
|
284
|
+
return TaskResult(True, result, None)
|
|
285
|
+
except Exception as e:
|
|
286
|
+
report = ""
|
|
287
|
+
if executing_process is not None:
|
|
288
|
+
dependencies_names = [
|
|
289
|
+
d.name for d in executing_process.get_dependant_tasks(self.name)
|
|
290
|
+
]
|
|
291
|
+
if dependencies_names:
|
|
292
|
+
report = (
|
|
293
|
+
"<h3>Downstream Impact</h3><p>The following tasks will be skipped:</p><ul>"
|
|
294
|
+
)
|
|
295
|
+
report += "".join(
|
|
296
|
+
f"<li>{dependency_name}</li>" for dependency_name in dependencies_names
|
|
297
|
+
)
|
|
298
|
+
report += "</ul>"
|
|
299
|
+
report += f"<p><b>Context:</b><br>Function: {self.func.__name__}"
|
|
300
|
+
report += f"<br>Args: {self.args}<br>Kwargs: {self.kwargs}</p>"
|
|
301
|
+
self.logger.exception(e, extra={"post_traceback_html_body": report})
|
|
302
|
+
return TaskResult(False, None, e)
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: processes
|
|
3
|
+
Version: 1.0.2
|
|
4
|
+
Summary: A Python library for managing and executing dependent tasks in parallel or sequential order with automatic dependency resolution and topological sorting
|
|
5
|
+
Author-email: Oliver Mohr Bonometti <oliver.mohr.b@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: dag,dependencies,etl,parallel,process,tasks,topological-sort,workflow
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
<div align="center">
|
|
21
|
+
<img src="assets/banner.svg" width="100%" alt="Processes - Smart Task Orchestration">
|
|
22
|
+
</div>
|
|
23
|
+
|
|
24
|
+
# 🚀 Processes: Smart Task Orchestration
|
|
25
|
+
|
|
26
|
+
[](https://www.python.org/)
|
|
27
|
+
[](https://github.com/oliverm91/processes/actions/workflows/tests.yml)
|
|
28
|
+

|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
[](https://opensource.org/licenses/MIT)
|
|
32
|
+
[](https://oliverm91.github.io/processes/)
|
|
33
|
+
|
|
34
|
+
[](https://github.com/oliverm91/processes/actions/workflows/lint.yml)
|
|
35
|
+
[](https://github.com/oliverm91/processes/actions/workflows/mypy.yml)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
**Processes** is a lightweight, high-performance Python library designed to execute complex task graphs. It manages **dependencies**, handles **parallel execution**, and ensures system resilience without any external libraries.
|
|
42
|
+
|
|
43
|
+
File logging and **email notification** is supported.
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## 📑 Table of Contents
|
|
48
|
+
* [✨ Features](#-features)
|
|
49
|
+
* [⚙️ Core Concepts](#️-core-concepts)
|
|
50
|
+
* [🛠️ Use Cases](#️-use-cases)
|
|
51
|
+
* [💻 Quick Start](#-quick-start)
|
|
52
|
+
* [🛡️ Fault Tolerance & Logs](#️-fault-tolerance--logs)
|
|
53
|
+
* [📦 Installation](#-installation)
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## ✨ Features
|
|
58
|
+
|
|
59
|
+
* **🐍 Pure Python:** Zero external dependencies. Built entirely on the **Python Standard Library**.
|
|
60
|
+
* **⚡ Parallel Execution:** Built-in support for parallelization to maximize throughput.
|
|
61
|
+
* **🔗 Dependency Resolution:** Automatically sorts and executes tasks based on their requirements, regardless of input order.
|
|
62
|
+
* **📝 Shared Logging:** Multiple tasks can write to the same logfile or maintain separate ones seamlessly.
|
|
63
|
+
* **📧 Email Notifications:** Integrated SMTP support (including HTML) to alert you the moment an exception occurs.
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## ⚙️ Core Concepts
|
|
68
|
+
|
|
69
|
+
The library operates on two main primitives:
|
|
70
|
+
|
|
71
|
+
1. **Task**: The atomic unit of work. It encapsulates a function, its parameters, its specific logfile, and its relationship with other tasks.
|
|
72
|
+
2. **Process**: The orchestrator. It builds the execution graph, validates dependencies, and manages the lifecycle of the entire workflow.
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## 🛠️ Use Cases
|
|
78
|
+
- **ETL Pipelines:** Fetch data from an API, transform it, and load it into a database as separate, dependent tasks.
|
|
79
|
+
|
|
80
|
+
- **System Maintenance:** Run parallel cleanup scripts, check server health, and receive email alerts if a specific check fails.
|
|
81
|
+
|
|
82
|
+
- **Automated Reporting:** Generate multiple data parts in parallel, aggregate them into a final report, and distribute via SMTP.
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## 💻 Quick Start
|
|
88
|
+
Define your tasks and their dependencies. **Processes** will handle the execution order and data injection between tasks.
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from datetime import date
|
|
92
|
+
|
|
93
|
+
from processes import Process, Task, TaskDependency, HTMLSMTPHandler
|
|
94
|
+
|
|
95
|
+
# 1. Setup Email Alerts (Optional)
|
|
96
|
+
smtp_handler = HTMLSMTPHandler(
|
|
97
|
+
('smtp_server', 587), 'sender@example.com', ['admin@example.com', 'user@example.com'],
|
|
98
|
+
use_tls=True, credentials=('user', 'pass')
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# 2. If necessary, create wrappers for your Tasks.
|
|
102
|
+
def get_previous_working_day():
|
|
103
|
+
return date(2025, 12, 30)
|
|
104
|
+
def indep_task():
|
|
105
|
+
return "foo"
|
|
106
|
+
def search_and_sum_csv(t: date):
|
|
107
|
+
return 10
|
|
108
|
+
def sum_data_from_csv_and_x(x, a=1, b=2):
|
|
109
|
+
return x + a + b
|
|
110
|
+
|
|
111
|
+
# 3. Create the Task Graph (order is irrelevant, that is handled by Process)
|
|
112
|
+
tasks = [
|
|
113
|
+
Task("t-1", "etl.log", get_previous_working_day),
|
|
114
|
+
Task("intependent", "indep.log", indep_task, html_mail_handler=smtp_handler), # This task will send email on failure
|
|
115
|
+
Task("sum_csv", "etl.log", search_and_sum_csv,
|
|
116
|
+
dependencies= [
|
|
117
|
+
TaskDependency("t-1",
|
|
118
|
+
use_result_as_additional_args=True) # Adds result of t-1 task to search_and_sum_csv function as aditional args
|
|
119
|
+
]
|
|
120
|
+
),
|
|
121
|
+
Task("sum_x_and_csv", "etl.log", sum_data_from_csv_and_x,
|
|
122
|
+
args = (10,), kwargs = {"b": 100},
|
|
123
|
+
dependencies=[
|
|
124
|
+
TaskDependency("sum_csv",
|
|
125
|
+
use_result_as_additional_kwargs=True,
|
|
126
|
+
additional_kwarg_name="a")
|
|
127
|
+
]
|
|
128
|
+
)
|
|
129
|
+
]
|
|
130
|
+
|
|
131
|
+
# 4. Run the Process
|
|
132
|
+
with Process(tasks) as process: # Context Manager ensures correct disposal of loggers
|
|
133
|
+
process_result = process.run() # To enable parallelization use .run(parallel=True)
|
|
134
|
+
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
## 🛡️ Fault Tolerance & Logs
|
|
140
|
+
### Resilience by Design
|
|
141
|
+
If a `Task` raises an exception, the `Process` **does not stop**. It intelligently skips any tasks that depend on the failed one but continues to execute all other independent branches of your workflow.
|
|
142
|
+
|
|
143
|
+
### Advanced Logging
|
|
144
|
+
All tasks record their execution flow to their assigned logfiles. You can share a single logfile across the whole process or isolate specific tasks for easier debugging.
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## 📦 Installation
|
|
150
|
+
|
|
151
|
+
Since it's a pure Python library, you can install it directly from the repository using `pip`:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
pip install git+https://github.com/oliverm91/processes.git
|
|
155
|
+
```
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
processes/__init__.py,sha256=ZSr2xIsReGtYp4-s5ajRz-_VJcuN6XTbcBEUAgBROtY,661
|
|
2
|
+
processes/html_logging.py,sha256=qYtVrIZtdJ2f1eYa9PGvW1nZAMYiVyfdNMjuryuwpRc,6394
|
|
3
|
+
processes/process.py,sha256=kLoPKa5uywPZcO-lzuMN5zNgNooTalIwbDGY55yHJhA,14134
|
|
4
|
+
processes/task.py,sha256=gHKyN_vG4qTmxXV9KUDcSO8aVyb0aYeMdvCagfvffOQ,11364
|
|
5
|
+
processes-1.0.2.dist-info/METADATA,sha256=lqrH-dK8oE4dFb02wJ-nc9dS_-paBjKbOpomY0eTTVg,6260
|
|
6
|
+
processes-1.0.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
7
|
+
processes-1.0.2.dist-info/licenses/LICENSE,sha256=dVzfhK9bNx3A2yfwXvyvl6mw1N_vmp6elZROHNVfoLQ,1068
|
|
8
|
+
processes-1.0.2.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Oliver Mohr
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|