processes 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
processes/__init__.py ADDED
@@ -0,0 +1,24 @@
1
+ from importlib.metadata import PackageNotFoundError as _pnfe
2
+ from importlib.metadata import version as _v
3
+
4
+ from .html_logging import HTMLSMTPHandler as HTMLSMTPHandler
5
+ from .process import (
6
+ CircularDependencyError as CircularDependencyError,
7
+ )
8
+ from .process import (
9
+ DependencyNotFoundError as DependencyNotFoundError,
10
+ )
11
+ from .process import (
12
+ Process as Process,
13
+ )
14
+ from .process import (
15
+ TaskNotFoundError as TaskNotFoundError,
16
+ )
17
+ from .task import Task as Task
18
+ from .task import TaskDependency as TaskDependency
19
+ from .task import TaskResult as TaskResult
20
+
21
+ try:
22
+ __version__ = _v("processes")
23
+ except _pnfe:
24
+ __version__ = "0.0.0-unknown"
@@ -0,0 +1,201 @@
1
+ import logging
2
+ import logging.handlers
3
+ import smtplib
4
+ import ssl
5
+ import traceback
6
+ from email.mime.text import MIMEText
7
+ from email.utils import formatdate
8
+
9
+
10
+ class HTMLSMTPHandler(logging.handlers.SMTPHandler):
11
+ """
12
+ A logging handler that sends log records via SMTP as HTML formatted emails.
13
+
14
+ Extends the standard SMTPHandler to support HTML-formatted email messages,
15
+ enabling richer formatting and styling in error notifications.
16
+
17
+ Attributes
18
+ ----------
19
+ mailhost : tuple[str, int]
20
+ A tuple of (host, port) for the SMTP server.
21
+ fromaddr : str
22
+ The email address to send messages from.
23
+ toaddrs : list[str]
24
+ List of email addresses to send messages to.
25
+ credentials : tuple[str, str] | None
26
+ A tuple of (username, password) for SMTP authentication. Defaults to None.
27
+ secure : tuple | tuple[str, str] | tuple[str, str, ssl.SSLContext] | None
28
+ Security configuration for SMTP connection. Can be an empty tuple for no security,
29
+ a tuple of (certfile, keyfile), or (certfile, keyfile, SSLContext).
30
+ Defaults to None.
31
+ timeout : int
32
+ Connection timeout in seconds. Defaults to 5.
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ mailhost: tuple[str, int],
38
+ fromaddr: str,
39
+ toaddrs: list[str],
40
+ credentials: tuple[str, str] | None = None,
41
+ secure: tuple[()]
42
+ | tuple[str]
43
+ | tuple[str, str]
44
+ | tuple[str, str, ssl.SSLContext]
45
+ | None = None,
46
+ timeout: int = 5,
47
+ ):
48
+ self._crd = credentials
49
+ self._sec = secure
50
+ self._to = timeout
51
+
52
+ super().__init__(
53
+ mailhost,
54
+ fromaddr,
55
+ toaddrs,
56
+ "",
57
+ credentials=credentials,
58
+ secure=secure, # type: ignore[arg-type]
59
+ timeout=timeout,
60
+ )
61
+
62
+ def copy(self) -> "HTMLSMTPHandler":
63
+ """Create a shallow copy of this handler.
64
+
65
+ Returns
66
+ -------
67
+ HTMLSMTPHandler
68
+ A new HTMLSMTPHandler instance with the same configuration.
69
+ """
70
+ return HTMLSMTPHandler(
71
+ self.mailhost, # type: ignore[arg-type]
72
+ self.fromaddr,
73
+ self.toaddrs,
74
+ credentials=self._crd,
75
+ secure=self._sec,
76
+ timeout=self._to,
77
+ )
78
+
79
+ def __copy__(self) -> "HTMLSMTPHandler":
80
+ """Support for copy.copy() method.
81
+
82
+ Returns
83
+ -------
84
+ HTMLSMTPHandler
85
+ A shallow copy of this handler.
86
+ """
87
+ return self.copy()
88
+
89
+ def emit(self, record: logging.LogRecord) -> None:
90
+ """Send a log record via email as HTML formatted message.
91
+
92
+ Formats the log record using the handler's formatter and sends it
93
+ as an HTML-formatted email. Errors during sending are handled gracefully.
94
+
95
+ Parameters
96
+ ----------
97
+ record : logging.LogRecord
98
+ The log record to send.
99
+ """
100
+ try:
101
+ port = self.mailport
102
+ if not port:
103
+ port = smtplib.SMTP_PORT
104
+ smtp = smtplib.SMTP(self.mailhost, port)
105
+ msg = self.format(record)
106
+
107
+ # Create MIMEText object with HTML content
108
+ mime_msg = MIMEText(msg, "html")
109
+ mime_msg["From"] = self.fromaddr
110
+ mime_msg["To"] = ",".join(self.toaddrs)
111
+ mime_msg["Subject"] = self.getSubject(record)
112
+ mime_msg["Date"] = formatdate()
113
+
114
+ if self.username:
115
+ if self.secure is not None:
116
+ smtp.starttls(*self.secure)
117
+ smtp.login(self.username, self.password)
118
+ smtp.sendmail(self.fromaddr, self.toaddrs, mime_msg.as_string())
119
+ smtp.quit()
120
+ except Exception:
121
+ self.handleError(record)
122
+
123
+
124
+ class ExceptionHTMLFormatter(logging.Formatter):
125
+ """
126
+ A logging formatter that converts exception records to HTML format.
127
+
128
+ Formats exception tracebacks with syntax-highlighted HTML styling and
129
+ supports custom post-traceback content. Provides visually appealing
130
+ exception reports suitable for email delivery.
131
+ """
132
+
133
+ def format(self, record: logging.LogRecord) -> str:
134
+ """Format a log record as HTML, with special handling for exceptions.
135
+
136
+ Extracts exception information and traceback, formats them with HTML
137
+ styling, and includes any additional post-traceback content from the
138
+ log record's `post_traceback_html_body` attribute.
139
+
140
+ Parameters
141
+ ----------
142
+ record : logging.LogRecord
143
+ The log record to format.
144
+
145
+ Returns
146
+ -------
147
+ str
148
+ HTML-formatted string containing exception details, traceback,
149
+ and styling.
150
+ """
151
+ # Format the exception details and traceback
152
+ if record.exc_info:
153
+ exception_object = record.exc_info[1]
154
+ exception = str(exception_object)
155
+ tb_str = traceback.format_exc()
156
+ else:
157
+ exception = record.getMessage()
158
+ tb_str = "No traceback available"
159
+
160
+ post_traceback_html_body = getattr(record, "post_traceback_html_body", "")
161
+
162
+ # HTML content
163
+ tb_str = tb_str.replace("\n", "<br>")
164
+ body = f"""
165
+ <html>
166
+ <head>
167
+ <style>
168
+ body {{
169
+ font-family: Arial, sans-serif;
170
+ margin: 20px;
171
+ color: #333;
172
+ }}
173
+ h2 {{
174
+ color: #d9534f;
175
+ }}
176
+ .exception {{
177
+ font-weight: bold;
178
+ color: #d9534f;
179
+ }}
180
+ .traceback {{
181
+ background-color: #f9f2f4;
182
+ border: 1px solid #d9534f;
183
+ padding: 10px;
184
+ font-family: 'Courier New', Courier, monospace;
185
+ white-space: pre-wrap;
186
+ color: #333;
187
+ border-radius: 4px;
188
+ }}
189
+ </style>
190
+ </head>
191
+ <body>
192
+ <h2>Exception Details</h2>
193
+ <p class="exception">Exception: {exception}</p>
194
+ <p><strong>Traceback:</strong></p>
195
+ <div class="traceback">{tb_str}</div>
196
+ <br>
197
+ {post_traceback_html_body}
198
+ </body>
199
+ </html>
200
+ """
201
+ return body
processes/process.py ADDED
@@ -0,0 +1,417 @@
1
+ import concurrent.futures
2
+ from types import TracebackType
3
+ from typing import Literal, Self
4
+
5
+ from .task import Task, TaskResult
6
+
7
+
8
+ class DependencyNotFoundError(Exception):
9
+ """Raised when a task depends on a non-existent task."""
10
+
11
+ pass
12
+
13
+
14
+ class TaskNotFoundError(Exception):
15
+ """Raised when attempting to retrieve a task that does not exist in the process."""
16
+
17
+ pass
18
+
19
+
20
+ class CircularDependencyError(Exception):
21
+ """Raised when circular dependencies are detected among tasks."""
22
+
23
+ pass
24
+
25
+
26
+ class ProcessResult:
27
+ """
28
+ Container for the results of a process execution.
29
+
30
+ Holds the outcomes of all tasks executed in a process, separating successful
31
+ and failed tasks with their respective results.
32
+
33
+ Attributes
34
+ ----------
35
+ passed_tasks_results : dict[str, TaskResult]
36
+ Mapping of task names to TaskResult objects for all tasks that executed successfully.
37
+ failed_tasks : set[str]
38
+ Set of task names for all tasks that failed during execution.
39
+ """
40
+
41
+ def __init__(self, passed_tasks_results: dict[str, TaskResult], failed_tasks: set[str]):
42
+ self.passed_tasks_results = passed_tasks_results
43
+ self.failed_tasks = failed_tasks
44
+
45
+
46
+ class Process:
47
+ """
48
+ Manages and executes a collection of interdependent tasks.
49
+
50
+ A Process orchestrates the execution of multiple tasks, handling dependency
51
+ resolution, task ordering. Task execution can be performed in parallel or sequentially. It
52
+ provides logging management and error propagation for dependent tasks. If a task fails,
53
+ all tasks depending on it are marked as failed without execution, but non-dependent tasks
54
+ continue to run.
55
+
56
+ Attributes
57
+ ----------
58
+ tasks : list[Task]
59
+ List of tasks to be executed, automatically sorted by dependencies.
60
+ runner : ProcessRunner
61
+ The runner responsible for executing the tasks.
62
+
63
+ Raises
64
+ ------
65
+ TypeError
66
+ If tasks is not a list or contains non-Task elements.
67
+ ValueError
68
+ If duplicate task names are found.
69
+ DependencyNotFoundError
70
+ If a task depends on a non-existent task.
71
+ CircularDependencyError
72
+ If circular dependencies are detected among tasks.
73
+ """
74
+
75
+ def __init__(self, tasks: list[Task]):
76
+ self.tasks = tasks
77
+
78
+ try:
79
+ self._check_input_types()
80
+ self._check_duplicate_names()
81
+ self._check_dependencies_exist()
82
+ self._topological_sort()
83
+ except Exception as e:
84
+ self.close_loggers()
85
+ raise e
86
+ self.runner = ProcessRunner(self)
87
+
88
+ def __enter__(self) -> Self:
89
+ """Called when entering the 'with' block."""
90
+ return self
91
+
92
+ def __exit__(
93
+ self,
94
+ exc_type: type[BaseException] | None,
95
+ exc_value: BaseException | None,
96
+ traceback: TracebackType | None,
97
+ ) -> Literal[False]:
98
+ """Called when exiting the 'with' block, even if an error occurred."""
99
+ self.close_loggers()
100
+ return False
101
+
102
+ def _check_input_types(self) -> None:
103
+ """Validate that tasks is a list containing only Task objects.
104
+
105
+ Raises
106
+ ------
107
+ TypeError
108
+ If tasks is not a list or contains non-Task elements.
109
+ """
110
+ if not isinstance(self.tasks, list):
111
+ raise TypeError(f"tasks must be list. Got {type(self.tasks)}")
112
+ for task in self.tasks:
113
+ if not isinstance(task, Task):
114
+ raise TypeError(f"task must be Task. Got {type(task)}")
115
+
116
+ def _check_duplicate_names(self) -> None:
117
+ """Verify that all task names are unique.
118
+
119
+ Raises
120
+ ------
121
+ ValueError
122
+ If duplicate task names are found.
123
+ """
124
+ names = set()
125
+ for task in self.tasks:
126
+ if task.name in names:
127
+ raise ValueError(f"Duplicate task name: {task.name}")
128
+ names.add(task.name)
129
+
130
+ def _check_dependencies_exist(self) -> None:
131
+ """Verify that all task dependencies refer to existing tasks.
132
+
133
+ Raises
134
+ ------
135
+ DependencyNotFoundError
136
+ If a task depends on a non-existent task.
137
+ """
138
+ names = {t.name for t in self.tasks}
139
+ for task in self.tasks:
140
+ for dep in task.get_dependencies_names():
141
+ if dep not in names:
142
+ raise DependencyNotFoundError(
143
+ f"Task {task.name} depends on missing task: {dep}"
144
+ )
145
+
146
+ def _topological_sort(self) -> None:
147
+ """Sort tasks based on dependencies using Kahn's Algorithm in O(V+E) time.
148
+
149
+ Reorders the task list so that dependencies are always executed before
150
+ tasks that depend on them.
151
+
152
+ Raises
153
+ ------
154
+ CircularDependencyError
155
+ If circular dependencies are detected among tasks.
156
+ """
157
+ in_degree = {t.name: 0 for t in self.tasks}
158
+ graph: dict[str, list[str]] = {t.name: [] for t in self.tasks}
159
+ task_map = {t.name: t for t in self.tasks}
160
+
161
+ for task in self.tasks:
162
+ for dep in task.dependencies:
163
+ graph[dep.task_name].append(task.name)
164
+ in_degree[task.name] += 1
165
+
166
+ queue = [name for name, deg in in_degree.items() if deg == 0]
167
+ sorted_tasks = []
168
+
169
+ while queue:
170
+ u = queue.pop(0)
171
+ sorted_tasks.append(task_map[u])
172
+ for v in graph[u]:
173
+ in_degree[v] -= 1
174
+ if in_degree[v] == 0:
175
+ queue.append(v)
176
+
177
+ if len(sorted_tasks) != len(self.tasks):
178
+ raise CircularDependencyError("Circular dependency detected.")
179
+ self.tasks = sorted_tasks
180
+
181
+ def get_task(self, task_name: str) -> Task:
182
+ """Retrieve a task by name.
183
+
184
+ Parameters
185
+ ----------
186
+ task_name : str
187
+ The name of the task to retrieve.
188
+
189
+ Returns
190
+ -------
191
+ Task
192
+ The task with the specified name.
193
+
194
+ Raises
195
+ ------
196
+ TaskNotFoundError
197
+ If no task with the given name exists.
198
+ """
199
+ for task in self.tasks:
200
+ if task.name == task_name:
201
+ return task
202
+ raise TaskNotFoundError(f"Task not found: {task_name}")
203
+
204
+ def run(self, parallel: bool | None = None, max_workers: int = 4) -> ProcessResult:
205
+ """Execute all tasks in the process.
206
+
207
+ Runs tasks sequentially or in parallel while respecting dependencies.
208
+ Dependencies are always resolved before dependent tasks are executed.
209
+
210
+ Parameters
211
+ ----------
212
+ parallel : bool, optional
213
+ Whether to run tasks in parallel while respecting dependencies.
214
+ If None, automatically set to True for processes with 10 or more tasks,
215
+ False otherwise. Defaults to None.
216
+ max_workers : int, optional
217
+ Maximum number of worker threads for parallel execution. Defaults to 4.
218
+ Only used when parallel=True. If set to 1, falls back to sequential execution.
219
+
220
+ Returns
221
+ -------
222
+ ProcessResult
223
+ Contains passed_tasks_results (dict mapping task names to TaskResult)
224
+ and failed_tasks (set of task names that failed).
225
+ """
226
+ if parallel is None:
227
+ parallel = len(self.tasks) >= 10
228
+
229
+ max_workers = max(1, max_workers)
230
+ if parallel:
231
+ if max_workers == 1:
232
+ parallel = False # Fallback to sequential if only one worker
233
+ process_result = self.runner.run(parallel, max_workers)
234
+ return process_result
235
+
236
+ def get_dependant_tasks(self, task_name: str) -> list[Task]:
237
+ """Retrieve all tasks that directly or indirectly depend on a given task.
238
+
239
+ Parameters
240
+ ----------
241
+ task_name : str
242
+ The name of the task to find dependants for.
243
+
244
+ Returns
245
+ -------
246
+ list[Task]
247
+ List of all tasks that depend on the specified task, including
248
+ transitive dependencies (tasks that depend on tasks that depend
249
+ on the specified task).
250
+ """
251
+ found = []
252
+
253
+ def find(name: str) -> None:
254
+ for t in self.tasks:
255
+ if name in t.get_dependencies_names() and t not in found:
256
+ found.append(t)
257
+ find(t.name)
258
+
259
+ find(task_name)
260
+ return found
261
+
262
+ def close_loggers(self) -> None:
263
+ """Close and clean up all logger handlers for all tasks.
264
+
265
+ Should be called when the process is done to ensure proper resource cleanup.
266
+ """
267
+ for task in self.tasks:
268
+ for handler in task.logger.handlers:
269
+ handler.close()
270
+ task.logger.removeHandler(handler)
271
+
272
+
273
+ class ProcessRunner:
274
+ """
275
+ Executes tasks in a Process, handling both sequential and parallel execution.
276
+
277
+ Manages task execution state, tracks passed and failed tasks, and coordinates
278
+ dependencies during execution.
279
+
280
+ Attributes
281
+ ----------
282
+ process : Process
283
+ Reference to the parent Process being executed.
284
+ passed_results : dict[str, TaskResult]
285
+ Results from successfully executed tasks.
286
+ failed_tasks : set[str]
287
+ Names of tasks that failed during execution.
288
+ submitted_tasks : set[str]
289
+ Names of tasks that have been submitted for execution.
290
+ """
291
+
292
+ def __init__(self, process_ref: Process):
293
+ self.process = process_ref
294
+ self.passed_results: dict[str, TaskResult] = {}
295
+ self.failed_tasks: set[str] = set()
296
+ self.submitted_tasks: set[str] = set()
297
+
298
+ def run(self, parallel: bool, max_workers: int) -> ProcessResult:
299
+ """Execute all tasks in the process using the specified execution mode.
300
+
301
+ Parameters
302
+ ----------
303
+ parallel : bool
304
+ If True, execute tasks in parallel; otherwise execute sequentially.
305
+ max_workers : int
306
+ Maximum number of worker threads for parallel execution.
307
+
308
+ Returns
309
+ -------
310
+ ProcessResult
311
+ The combined results of all task executions.
312
+ """
313
+ if parallel:
314
+ self._run_parallel(max_workers)
315
+ else:
316
+ self._run_sequential()
317
+ return ProcessResult(self.passed_results, self.failed_tasks)
318
+
319
+ def _is_unrunnable(self, task: Task) -> bool:
320
+ """Check if a task cannot be run due to failed dependencies.
321
+
322
+ Parameters
323
+ ----------
324
+ task : Task
325
+ The task to check.
326
+
327
+ Returns
328
+ -------
329
+ bool
330
+ True if any of the task's dependencies have failed, False otherwise.
331
+ If True, the task is also marked as failed.
332
+ """
333
+ if any(d.task_name in self.failed_tasks for d in task.dependencies):
334
+ self.failed_tasks.add(task.name) # Propagate failure
335
+ return True
336
+ return False
337
+
338
+ def _all_deps_met(self, task: Task) -> bool:
339
+ """Check if all dependencies of a task have been successfully executed.
340
+
341
+ Parameters
342
+ ----------
343
+ task : Task
344
+ The task to check.
345
+
346
+ Returns
347
+ -------
348
+ bool
349
+ True if all dependencies have passed, False otherwise.
350
+ """
351
+ return all(d.task_name in self.passed_results for d in task.dependencies)
352
+
353
+ def _run_sequential(self) -> None:
354
+ """Execute all tasks sequentially in dependency order."""
355
+ for task in self.process.tasks:
356
+ if self._is_unrunnable(task):
357
+ continue
358
+ if self._all_deps_met(task):
359
+ res = task.run(self.process)
360
+ if res.worked:
361
+ self.passed_results[task.name] = res
362
+ else:
363
+ self.failed_tasks.add(task.name)
364
+
365
+ def _run_parallel(self, max_workers: int) -> None:
366
+ """Execute tasks in parallel using a thread pool while respecting dependencies.
367
+
368
+ Parameters
369
+ ----------
370
+ max_workers : int
371
+ Maximum number of worker threads to use.
372
+
373
+ Raises
374
+ ------
375
+ RuntimeError
376
+ If execution stalls with no candidates ready and no tasks running.
377
+ """
378
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
379
+ fut_to_name = {}
380
+ while len(self.passed_results) + len(self.failed_tasks) < len(self.process.tasks):
381
+ # Look for candidates to execute now
382
+ candidates = [
383
+ t
384
+ for t in self.process.tasks
385
+ if t.name not in self.submitted_tasks
386
+ and t.name not in self.failed_tasks
387
+ and not self._is_unrunnable(t)
388
+ and self._all_deps_met(t)
389
+ ]
390
+
391
+ # Send tasks for execution and register as Task as submitted
392
+ for task in candidates:
393
+ fut = executor.submit(task.run, self.process)
394
+ fut_to_name[fut] = task.name
395
+ self.submitted_tasks.add(task.name)
396
+
397
+ # If there are tasks pending, wait. As soon one is completed,
398
+ # save as passed or failed and remove from futures.
399
+ if fut_to_name:
400
+ done, _ = concurrent.futures.wait(
401
+ fut_to_name.keys(), return_when="FIRST_COMPLETED"
402
+ )
403
+ for fut in done:
404
+ name = fut_to_name.pop(fut)
405
+ try:
406
+ res = fut.result()
407
+ if res.worked:
408
+ self.passed_results[name] = res
409
+ else:
410
+ self.failed_tasks.add(name)
411
+ except Exception:
412
+ self.failed_tasks.add(name)
413
+ else:
414
+ # No candidates and no running tasks - likely a deadlock or logic error
415
+ raise RuntimeError(
416
+ "Parallel execution stalled: no candidates found and no tasks running"
417
+ )
processes/task.py ADDED
@@ -0,0 +1,302 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable
4
+ from typing import TYPE_CHECKING, Any
5
+
6
+ if TYPE_CHECKING:
7
+ from .process import Process
8
+
9
+ import logging
10
+
11
+ from .html_logging import ExceptionHTMLFormatter, HTMLSMTPHandler
12
+
13
+
14
+ class TaskResult:
15
+ """
16
+ Container for the result of a task execution.
17
+
18
+ Holds the outcome of running a task, including whether it succeeded,
19
+ its return value, and any exception that occurred.
20
+
21
+ Attributes
22
+ ----------
23
+ worked : bool
24
+ True if the task executed successfully, False if an exception occurred.
25
+ result : Any
26
+ The return value of the task's function if execution succeeded, None if failed.
27
+ exception : Exception | None
28
+ The exception object if execution failed, None if successful.
29
+ """
30
+
31
+ def __init__(self, worked: bool, result: Any, exception: Exception | None):
32
+ self.worked = worked
33
+ self.result = result
34
+ self.exception = exception
35
+
36
+
37
+ class TaskDependency:
38
+ """
39
+ Represents a dependency relationship between tasks.
40
+
41
+ Defines how a task depends on another task, including how the result
42
+ of the dependency should be passed to the dependent task (as additional
43
+ positional arguments, keyword arguments, or both).
44
+
45
+ Attributes
46
+ ----------
47
+ task_name : str
48
+ The name of the task this dependency refers to.
49
+ use_result_as_additional_args : bool
50
+ If True, the result of the dependency task will be passed as an
51
+ additional positional argument as the last argument. Defaults to False.
52
+ use_result_as_additional_kwargs : bool
53
+ If True, the result of the dependency task will be passed as a
54
+ keyword argument. Defaults to False.
55
+ additional_kwarg_name : str | None
56
+ The name of the keyword argument to use if use_result_as_additional_kwargs
57
+ is True. Required when use_result_as_additional_kwargs is True.
58
+ Defaults to None.
59
+
60
+ Raises
61
+ ------
62
+ TypeError
63
+ If any parameter type is invalid or if use_result_as_additional_kwargs
64
+ is True but additional_kwarg_name is not a string.
65
+ """
66
+
67
+ def __init__(
68
+ self,
69
+ task_name: str,
70
+ use_result_as_additional_args: bool = False,
71
+ use_result_as_additional_kwargs: bool = False,
72
+ additional_kwarg_name: str = "",
73
+ ):
74
+ self.task_name = task_name
75
+ self.use_result_as_additional_args = use_result_as_additional_args
76
+ self.use_result_as_additional_kwargs = use_result_as_additional_kwargs
77
+ self.additional_kwarg_name = additional_kwarg_name
78
+
79
+ if not isinstance(self.task_name, str):
80
+ raise TypeError(f"task_name must be of type str. Got {type(self.task_name)}")
81
+ if not isinstance(self.use_result_as_additional_args, bool):
82
+ raise TypeError(
83
+ f"use_result_as_additional_args must be of type bool. "
84
+ f"Got {type(self.use_result_as_additional_args)}"
85
+ )
86
+ if not isinstance(self.use_result_as_additional_kwargs, bool):
87
+ raise TypeError(
88
+ f"use_result_as_additional_kwargs must be of type bool. "
89
+ f"Got {type(self.use_result_as_additional_kwargs)}"
90
+ )
91
+
92
+ if self.use_result_as_additional_kwargs and self.additional_kwarg_name == "":
93
+ raise TypeError(
94
+ "If use_result_as_additional_kwargs is True, additional_kwarg_name"
95
+ " must be a non-empty string."
96
+ )
97
+
98
+ def __hash__(self) -> int:
99
+ """
100
+ Return hash of the dependency based on task name.
101
+
102
+ Returns
103
+ -------
104
+ int
105
+ Hash value based on the task_name attribute.
106
+ """
107
+ return hash(self.task_name)
108
+
109
+
110
+ class Task:
111
+ """
112
+ A Task represents a unit of work to be executed within a Process.
113
+
114
+ A Task encapsulates a callable function with its arguments, dependencies on other tasks,
115
+ and logging configuration. Tasks can be executed, by the Process class, sequentially
116
+ or in parallel, with automatic dependency resolution and result passing between dependent tasks.
117
+
118
+ Attributes
119
+ ----------
120
+ name : str
121
+ Unique name for the task (cannot contain spaces).
122
+ log_path : str
123
+ File path where task logs will be written.
124
+ func : Callable
125
+ The function to execute when the task runs.
126
+ args : tuple
127
+ Positional arguments to pass to the function. Defaults to empty tuple.
128
+ kwargs : dict
129
+ Keyword arguments to pass to the function. Defaults to empty dict.
130
+ dependencies : list[TaskDependency]
131
+ List of tasks this task depends on. Defaults to empty list.
132
+ html_mail_handler : HTMLSMTPHandler, optional
133
+ Handler for sending error logs via email in HTML format. Defaults to None.
134
+ logger : logging.Logger
135
+ Logger instance for this task, automatically configured.
136
+ """
137
+
138
+ kwargs: dict[str, Any]
139
+ dependencies: list[TaskDependency]
140
+
141
+ def __init__(
142
+ self,
143
+ name: str,
144
+ log_path: str,
145
+ func: Callable[..., Any],
146
+ args: tuple[Any, ...] = (),
147
+ kwargs: dict[str, Any] | None = None,
148
+ dependencies: list[TaskDependency] | None = None,
149
+ html_mail_handler: HTMLSMTPHandler | None = None,
150
+ ):
151
+ self.name = name
152
+ self.log_path = log_path
153
+ self.func = func
154
+ self.args = args
155
+ self.html_mail_handler = html_mail_handler
156
+
157
+ if kwargs is None:
158
+ self.kwargs = {}
159
+ else:
160
+ self.kwargs = kwargs
161
+ if dependencies is None:
162
+ self.dependencies = []
163
+ else:
164
+ self.dependencies = dependencies
165
+
166
+ self._check_input_types()
167
+ if " " in self.name:
168
+ raise ValueError(f"Task name cannot contain spaces. Got {self.name}")
169
+
170
+ depedencies_names = []
171
+ for dependency in self.dependencies:
172
+ if dependency.task_name in depedencies_names:
173
+ raise ValueError(f"Duplicate dependency name: {dependency.task_name}")
174
+ depedencies_names.append(dependency.task_name)
175
+ if dependency.task_name == self.name:
176
+ raise ValueError(
177
+ f"Got dependency with same name as Task. "
178
+ f"Task: {self.name}. Dependency: {dependency.task_name}"
179
+ )
180
+
181
+ logger = logging.getLogger(self.name)
182
+ logger.setLevel(logging.DEBUG)
183
+ if logger.hasHandlers():
184
+ logger.handlers.clear()
185
+
186
+ file_handler = logging.FileHandler(self.log_path)
187
+ file_handler.setLevel(logging.INFO)
188
+ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
189
+ file_handler.setFormatter(formatter)
190
+ logger.addHandler(file_handler)
191
+
192
+ if self.html_mail_handler is not None:
193
+ _html_mail_handler = self.html_mail_handler.copy()
194
+ _html_mail_handler.setFormatter(ExceptionHTMLFormatter())
195
+ _html_mail_handler.setLevel(logging.ERROR)
196
+ _html_mail_handler.subject = f"Error in task {self.name}"
197
+ logger.addHandler(_html_mail_handler)
198
+
199
+ self.logger = logger
200
+
201
+ def _check_input_types(self) -> None:
202
+ """
203
+ Validates all input parameter types.
204
+
205
+ Raises
206
+ ------
207
+ TypeError
208
+ If any parameter is not of the expected type.
209
+ """
210
+ if not callable(self.func):
211
+ raise TypeError(f"func must be callable. Got {type(self.func)}")
212
+
213
+ if not isinstance(self.args, tuple):
214
+ raise TypeError(f"args must be tuple. Got {type(self.args)}")
215
+
216
+ if not isinstance(self.kwargs, dict):
217
+ raise TypeError(f"kwargs must be dict. Got {type(self.kwargs)}")
218
+
219
+ if self.html_mail_handler is not None and not isinstance(
220
+ self.html_mail_handler, HTMLSMTPHandler
221
+ ):
222
+ raise TypeError(
223
+ f"mail_cfg must be of type HTMLSMTPHandler. Got {type(self.html_mail_handler)}"
224
+ )
225
+
226
+ if not isinstance(self.dependencies, list):
227
+ raise TypeError(f"dependencies must be list. Got {type(self.dependencies)}")
228
+
229
+ for dependency in self.dependencies:
230
+ if not isinstance(dependency, TaskDependency):
231
+ raise TypeError(
232
+ f"dependency must be of type TaskDependency. Got {type(dependency)}"
233
+ )
234
+
235
+ def get_dependencies_names(self) -> set[str]:
236
+ """
237
+ Get the names of all tasks this task depends on.
238
+
239
+ Returns
240
+ -------
241
+ set[str]
242
+ Set of dependency task names.
243
+ """
244
+ return {dependency.task_name for dependency in self.dependencies}
245
+
246
+ def run(self, executing_process: Process | None = None) -> TaskResult:
247
+ """
248
+ Execute the task's function with its arguments and dependencies.
249
+
250
+ This method runs the task's function, automatically injecting results from
251
+ dependent tasks as specified in the dependency configuration. Logs the task
252
+ execution and captures any exceptions.
253
+
254
+ Parameters
255
+ ----------
256
+ executing_process : Process, optional
257
+ The parent Process executing this task. Used to retrieve results from
258
+ dependent tasks. Defaults to None.
259
+
260
+ Returns
261
+ -------
262
+ TaskResult
263
+ Object containing:
264
+ - worked (bool): True if execution succeeded, False otherwise.
265
+ - result: The return value of the function if successful, None if failed.
266
+ - exception (Exception | None): The exception raised if execution failed,
267
+ None if successful.
268
+ """
269
+ final_args = list(self.args) # Start with original positional args
270
+ final_kwargs = self.kwargs.copy() # Start with original keyword args
271
+
272
+ if executing_process is not None:
273
+ for dep in self.dependencies:
274
+ dep_result = executing_process.runner.passed_results[dep.task_name].result
275
+ if dep.use_result_as_additional_args:
276
+ final_args.append(dep_result)
277
+ if dep.use_result_as_additional_kwargs:
278
+ final_kwargs[dep.additional_kwarg_name] = dep_result
279
+
280
+ try:
281
+ self.logger.info(f"Starting {self.name}.")
282
+ result = self.func(*final_args, **final_kwargs)
283
+ self.logger.info(f"Finished {self.name}.")
284
+ return TaskResult(True, result, None)
285
+ except Exception as e:
286
+ report = ""
287
+ if executing_process is not None:
288
+ dependencies_names = [
289
+ d.name for d in executing_process.get_dependant_tasks(self.name)
290
+ ]
291
+ if dependencies_names:
292
+ report = (
293
+ "<h3>Downstream Impact</h3><p>The following tasks will be skipped:</p><ul>"
294
+ )
295
+ report += "".join(
296
+ f"<li>{dependency_name}</li>" for dependency_name in dependencies_names
297
+ )
298
+ report += "</ul>"
299
+ report += f"<p><b>Context:</b><br>Function: {self.func.__name__}"
300
+ report += f"<br>Args: {self.args}<br>Kwargs: {self.kwargs}</p>"
301
+ self.logger.exception(e, extra={"post_traceback_html_body": report})
302
+ return TaskResult(False, None, e)
@@ -0,0 +1,155 @@
1
+ Metadata-Version: 2.4
2
+ Name: processes
3
+ Version: 1.0.2
4
+ Summary: A Python library for managing and executing dependent tasks in parallel or sequential order with automatic dependency resolution and topological sorting
5
+ Author-email: Oliver Mohr Bonometti <oliver.mohr.b@gmail.com>
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Keywords: dag,dependencies,etl,parallel,process,tasks,topological-sort,workflow
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Programming Language :: Python :: 3.14
17
+ Requires-Python: >=3.10
18
+ Description-Content-Type: text/markdown
19
+
20
+ <div align="center">
21
+ <img src="assets/banner.svg" width="100%" alt="Processes - Smart Task Orchestration">
22
+ </div>
23
+
24
+ # 🚀 Processes: Smart Task Orchestration
25
+
26
+ [![Python Version](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/)
27
+ [![Python Tests Status](https://github.com/oliverm91/processes/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/oliverm91/processes/actions/workflows/tests.yml)
28
+ ![Fast & Lightweight](https://img.shields.io/badge/Library-Pure%20Python-green.svg)
29
+
30
+
31
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
32
+ [![Documentation](https://img.shields.io/badge/docs-GitHub%20Pages-blue.svg)](https://oliverm91.github.io/processes/)
33
+
34
+ [![Ruff Lint Status](https://github.com/oliverm91/processes/actions/workflows/lint.yml/badge.svg?branch=main)](https://github.com/oliverm91/processes/actions/workflows/lint.yml)
35
+ [![mypy-check](https://github.com/oliverm91/processes/actions/workflows/mypy.yml/badge.svg)](https://github.com/oliverm91/processes/actions/workflows/mypy.yml)
36
+
37
+
38
+
39
+
40
+
41
+ **Processes** is a lightweight, high-performance Python library designed to execute complex task graphs. It manages **dependencies**, handles **parallel execution**, and ensures system resilience without any external libraries.
42
+
43
+ File logging and **email notification** is supported.
44
+
45
+ ---
46
+
47
+ ## 📑 Table of Contents
48
+ * [✨ Features](#-features)
49
+ * [⚙️ Core Concepts](#️-core-concepts)
50
+ * [🛠️ Use Cases](#️-use-cases)
51
+ * [💻 Quick Start](#-quick-start)
52
+ * [🛡️ Fault Tolerance & Logs](#️-fault-tolerance--logs)
53
+ * [📦 Installation](#-installation)
54
+
55
+ ---
56
+
57
+ ## ✨ Features
58
+
59
+ * **🐍 Pure Python:** Zero external dependencies. Built entirely on the **Python Standard Library**.
60
+ * **⚡ Parallel Execution:** Built-in support for parallelization to maximize throughput.
61
+ * **🔗 Dependency Resolution:** Automatically sorts and executes tasks based on their requirements, regardless of input order.
62
+ * **📝 Shared Logging:** Multiple tasks can write to the same logfile or maintain separate ones seamlessly.
63
+ * **📧 Email Notifications:** Integrated SMTP support (including HTML) to alert you the moment an exception occurs.
64
+
65
+ ---
66
+
67
+ ## ⚙️ Core Concepts
68
+
69
+ The library operates on two main primitives:
70
+
71
+ 1. **Task**: The atomic unit of work. It encapsulates a function, its parameters, its specific logfile, and its relationship with other tasks.
72
+ 2. **Process**: The orchestrator. It builds the execution graph, validates dependencies, and manages the lifecycle of the entire workflow.
73
+
74
+
75
+ ---
76
+
77
+ ## 🛠️ Use Cases
78
+ - **ETL Pipelines:** Fetch data from an API, transform it, and load it into a database as separate, dependent tasks.
79
+
80
+ - **System Maintenance:** Run parallel cleanup scripts, check server health, and receive email alerts if a specific check fails.
81
+
82
+ - **Automated Reporting:** Generate multiple data parts in parallel, aggregate them into a final report, and distribute via SMTP.
83
+
84
+
85
+ ---
86
+
87
+ ## 💻 Quick Start
88
+ Define your tasks and their dependencies. **Processes** will handle the execution order and data injection between tasks.
89
+
90
+ ```python
91
+ from datetime import date
92
+
93
+ from processes import Process, Task, TaskDependency, HTMLSMTPHandler
94
+
95
+ # 1. Setup Email Alerts (Optional)
96
+ smtp_handler = HTMLSMTPHandler(
97
+ ('smtp_server', 587), 'sender@example.com', ['admin@example.com', 'user@example.com'],
98
+ use_tls=True, credentials=('user', 'pass')
99
+ )
100
+
101
+ # 2. If necessary, create wrappers for your Tasks.
102
+ def get_previous_working_day():
103
+ return date(2025, 12, 30)
104
+ def indep_task():
105
+ return "foo"
106
+ def search_and_sum_csv(t: date):
107
+ return 10
108
+ def sum_data_from_csv_and_x(x, a=1, b=2):
109
+ return x + a + b
110
+
111
+ # 3. Create the Task Graph (order is irrelevant, that is handled by Process)
112
+ tasks = [
113
+ Task("t-1", "etl.log", get_previous_working_day),
114
+ Task("intependent", "indep.log", indep_task, html_mail_handler=smtp_handler), # This task will send email on failure
115
+ Task("sum_csv", "etl.log", search_and_sum_csv,
116
+ dependencies= [
117
+ TaskDependency("t-1",
118
+ use_result_as_additional_args=True) # Adds result of t-1 task to search_and_sum_csv function as aditional args
119
+ ]
120
+ ),
121
+ Task("sum_x_and_csv", "etl.log", sum_data_from_csv_and_x,
122
+ args = (10,), kwargs = {"b": 100},
123
+ dependencies=[
124
+ TaskDependency("sum_csv",
125
+ use_result_as_additional_kwargs=True,
126
+ additional_kwarg_name="a")
127
+ ]
128
+ )
129
+ ]
130
+
131
+ # 4. Run the Process
132
+ with Process(tasks) as process: # Context Manager ensures correct disposal of loggers
133
+ process_result = process.run() # To enable parallelization use .run(parallel=True)
134
+
135
+ ```
136
+
137
+ ---
138
+
139
+ ## 🛡️ Fault Tolerance & Logs
140
+ ### Resilience by Design
141
+ If a `Task` raises an exception, the `Process` **does not stop**. It intelligently skips any tasks that depend on the failed one but continues to execute all other independent branches of your workflow.
142
+
143
+ ### Advanced Logging
144
+ All tasks record their execution flow to their assigned logfiles. You can share a single logfile across the whole process or isolate specific tasks for easier debugging.
145
+
146
+
147
+ ---
148
+
149
+ ## 📦 Installation
150
+
151
+ Since it's a pure Python library, you can install it directly from the repository using `pip`:
152
+
153
+ ```bash
154
+ pip install git+https://github.com/oliverm91/processes.git
155
+ ```
@@ -0,0 +1,8 @@
1
+ processes/__init__.py,sha256=ZSr2xIsReGtYp4-s5ajRz-_VJcuN6XTbcBEUAgBROtY,661
2
+ processes/html_logging.py,sha256=qYtVrIZtdJ2f1eYa9PGvW1nZAMYiVyfdNMjuryuwpRc,6394
3
+ processes/process.py,sha256=kLoPKa5uywPZcO-lzuMN5zNgNooTalIwbDGY55yHJhA,14134
4
+ processes/task.py,sha256=gHKyN_vG4qTmxXV9KUDcSO8aVyb0aYeMdvCagfvffOQ,11364
5
+ processes-1.0.2.dist-info/METADATA,sha256=lqrH-dK8oE4dFb02wJ-nc9dS_-paBjKbOpomY0eTTVg,6260
6
+ processes-1.0.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
7
+ processes-1.0.2.dist-info/licenses/LICENSE,sha256=dVzfhK9bNx3A2yfwXvyvl6mw1N_vmp6elZROHNVfoLQ,1068
8
+ processes-1.0.2.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Oliver Mohr
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.