pysfi 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,13 @@
3
3
  This module provides a comprehensive workflow engine for managing
4
4
  complex task dependencies with support for I/O tasks, CPU-intensive tasks,
5
5
  serial tasks, and parallel task execution.
6
+
7
+ The engine supports:
8
+ - Dependency management with cycle detection
9
+ - Topological sorting for execution order
10
+ - Concurrent execution with configurable limits
11
+ - Error handling and timeout management
12
+ - Execution monitoring and reporting
6
13
  """
7
14
 
8
15
  from __future__ import annotations
@@ -49,7 +56,13 @@ class TaskResult:
49
56
  class Task(ABC):
50
57
  """Task abstract base class"""
51
58
 
52
- def __init__(self, task_id: str, task_type: TaskType, dependencies: list[str] | None = None, timeout: float = 30.0):
59
+ def __init__(
60
+ self,
61
+ task_id: str,
62
+ task_type: TaskType,
63
+ dependencies: list[str] | None = None,
64
+ timeout: float = 30.0,
65
+ ):
53
66
  self.task_id = task_id
54
67
  self.task_type = task_type
55
68
  self.dependencies = dependencies or []
@@ -82,18 +95,68 @@ class Task(ABC):
82
95
  return self.end_time - self.start_time
83
96
  return 0.0
84
97
 
98
+ async def _execute_with_error_handling(
99
+ self, execution_func, context: dict[str, TaskResult]
100
+ ):
101
+ """Common execution wrapper with error handling"""
102
+ self.start_time = time.time()
103
+ self.update_status(TaskStatus.RUNNING)
104
+
105
+ try:
106
+ data = await asyncio.wait_for(execution_func(context), timeout=self.timeout)
107
+ self.end_time = time.time()
108
+ self.result = TaskResult(
109
+ task_id=self.task_id,
110
+ success=True,
111
+ data=data,
112
+ execution_time=self.get_execution_time(),
113
+ )
114
+ self.update_status(TaskStatus.COMPLETED)
115
+ return self.result
116
+ except asyncio.TimeoutError as e:
117
+ self.end_time = time.time()
118
+ self.result = TaskResult(
119
+ task_id=self.task_id,
120
+ success=False,
121
+ data=None,
122
+ execution_time=self.get_execution_time(),
123
+ error=e,
124
+ )
125
+ self.update_status(TaskStatus.FAILED)
126
+ raise
127
+ except Exception as e:
128
+ self.end_time = time.time()
129
+ self.result = TaskResult(
130
+ task_id=self.task_id,
131
+ success=False,
132
+ data=None,
133
+ execution_time=self.get_execution_time(),
134
+ error=e,
135
+ )
136
+ self.update_status(TaskStatus.FAILED)
137
+ raise
138
+
85
139
 
86
140
  class IOTask(Task):
87
141
  """I/O-intensive task"""
88
142
 
89
- def __init__(self, task_id: str, duration: float, dependencies: list[str] | None = None, timeout: float = 30.0):
143
+ def __init__(
144
+ self,
145
+ task_id: str,
146
+ duration: float,
147
+ dependencies: list[str] | None = None,
148
+ timeout: float = 30.0,
149
+ ):
90
150
  super().__init__(task_id, TaskType.ASYNC, dependencies, timeout)
91
151
  self.duration = duration
92
152
 
93
153
  async def execute(self, context: dict[str, TaskResult]) -> Any:
94
154
  """Simulate I/O operation"""
95
- print(f"[IO] Starting task {self.task_id}, estimated duration: {self.duration}s")
96
- return await asyncio.wait_for(self._execute_io(context), timeout=self.timeout)
155
+ print(
156
+ f"[IO] Starting task {self.task_id}, estimated duration: {self.duration}s"
157
+ )
158
+ result = await self._execute_with_error_handling(self._execute_io, context)
159
+ return result.data
97
160
 
98
161
  async def _execute_io(self, context: dict[str, TaskResult]) -> Any:
99
162
  """Internal I/O execution method"""
@@ -104,25 +167,34 @@ class IOTask(Task):
104
167
  class CPUTask(Task):
105
168
  """CPU-intensive task"""
106
169
 
107
- def __init__(self, task_id: str, iterations: int, dependencies: list[str] | None = None, timeout: float = 30.0):
170
+ def __init__(
171
+ self,
172
+ task_id: str,
173
+ iterations: int,
174
+ dependencies: list[str] | None = None,
175
+ timeout: float = 30.0,
176
+ ):
108
177
  super().__init__(task_id, TaskType.CPU, dependencies, timeout)
109
178
  self.iterations = iterations
110
179
 
111
180
  async def execute(self, context: dict[str, TaskResult]) -> Any:
112
181
  """CPU-intensive computation task"""
113
182
  print(f"[CPU] Starting task {self.task_id}, iterations: {self.iterations}")
183
+ result = await self._execute_with_error_handling(self._execute_cpu, context)
184
+ return result.data
185
+
186
+ async def _execute_cpu(self, context: dict[str, TaskResult]) -> Any:
187
+ """Execute CPU-intensive work in thread pool"""
114
188
 
115
- # Move CPU-intensive task to thread pool to avoid blocking event loop
116
189
  def cpu_intensive_work():
117
190
  result = 0
118
191
  for i in range(self.iterations):
119
192
  result += i * i
120
193
  return result
121
194
 
122
- # Use asyncio.wait_for with timeout to prevent infinite hangs
123
- # Use run_in_executor for Python 3.8 compatibility (asyncio.to_thread is Python 3.9+)
195
+ # Use run_in_executor to avoid blocking event loop
124
196
  loop = asyncio.get_event_loop()
125
- result = await asyncio.wait_for(loop.run_in_executor(None, cpu_intensive_work), timeout=self.timeout)
197
+ result = await loop.run_in_executor(None, cpu_intensive_work)
126
198
  return f"CPU task {self.task_id} completed, result: {result}"
127
199
 
128
200
 
@@ -130,7 +202,11 @@ class SerialTask(Task):
130
202
  """Serial task (stateful, must execute sequentially)"""
131
203
 
132
204
  def __init__(
133
- self, task_id: str, process_func: Callable, dependencies: list[str] | None = None, timeout: float = 30.0
205
+ self,
206
+ task_id: str,
207
+ process_func: Callable,
208
+ dependencies: list[str] | None = None,
209
+ timeout: float = 30.0,
134
210
  ):
135
211
  super().__init__(task_id, TaskType.SERIAL, dependencies, timeout)
136
212
  self.process_func = process_func
@@ -139,7 +215,11 @@ class SerialTask(Task):
139
215
  async def execute(self, context: dict[str, TaskResult]) -> Any:
140
216
  """Execute serial task"""
141
217
  print(f"[Serial] Starting serial task {self.task_id}")
218
+ result = await self._execute_with_error_handling(self._execute_serial, context)
219
+ return result.data
142
220
 
221
+ async def _execute_serial(self, context: dict[str, TaskResult]) -> Any:
222
+ """Execute serial task logic"""
143
223
  # Collect results from dependent tasks
144
224
  inputs = {dep_id: context[dep_id].data for dep_id in self.dependencies}
145
225
 
@@ -172,61 +252,71 @@ class ParallelTask(Task):
172
252
 
173
253
  async def execute(self, context: dict[str, TaskResult]) -> Any:
174
254
  """Execute subtasks in parallel"""
175
- print(f"[Parallel] Starting parallel task {self.task_id}, contains {len(self.subtasks)} subtasks")
176
-
255
+ print(
256
+ f"[Parallel] Starting parallel task {self.task_id}, contains {len(self.subtasks)} subtasks"
257
+ )
258
+ result = await self._execute_with_error_handling(
259
+ self._execute_parallel, context
260
+ )
261
+ return result.data
262
+
263
+ async def _execute_parallel(self, context: dict[str, TaskResult]) -> Any:
264
+ """Execute subtasks in parallel with controlled concurrency"""
177
265
  # Create semaphore to control concurrency
178
266
  semaphore = asyncio.Semaphore(self.max_concurrent)
179
267
 
180
- async def execute_subtask(subtask: Task, sem: asyncio.Semaphore):
181
- async with sem:
182
- subtask.start_time = time.time()
183
- subtask.update_status(TaskStatus.RUNNING)
184
-
268
+ async def execute_subtask_with_semaphore(subtask: Task):
269
+ async with semaphore:
270
+ # Execute subtask with its own context
185
271
  try:
186
- data = await asyncio.wait_for(subtask.execute(context), timeout=subtask.timeout)
272
+ data = await asyncio.wait_for(
273
+ subtask.execute(context), timeout=subtask.timeout
274
+ )
187
275
  subtask.result = TaskResult(
188
276
  task_id=subtask.task_id,
189
277
  success=True,
190
278
  data=data,
191
- execution_time=time.time() - subtask.start_time,
279
+ execution_time=time.time() - subtask.start_time
280
+ if subtask.start_time
281
+ else 0,
192
282
  )
193
283
  subtask.update_status(TaskStatus.COMPLETED)
194
284
  return subtask.result
195
- except asyncio.TimeoutError as e:
196
- error = TimeoutError(f"Task {subtask.task_id} execution timeout")
197
- subtask.result = TaskResult(
198
- task_id=subtask.task_id,
199
- success=False,
200
- data=None,
201
- execution_time=time.time() - subtask.start_time,
202
- error=error,
203
- )
204
- subtask.update_status(TaskStatus.FAILED)
205
- raise error from e
206
285
  except Exception as e:
286
+ subtask.end_time = time.time()
207
287
  subtask.result = TaskResult(
208
288
  task_id=subtask.task_id,
209
289
  success=False,
210
290
  data=None,
211
- execution_time=time.time() - subtask.start_time,
291
+ execution_time=time.time() - subtask.start_time
292
+ if subtask.start_time
293
+ else 0,
212
294
  error=e,
213
295
  )
214
296
  subtask.update_status(TaskStatus.FAILED)
215
- raise e
297
+ return subtask.result
216
298
 
217
299
  # Execute all subtasks in parallel
218
- tasks = [execute_subtask(subtask, semaphore) for subtask in self.subtasks]
219
- results = await asyncio.gather(*tasks, return_exceptions=True)
300
+ results = await asyncio.gather(
301
+ *[execute_subtask_with_semaphore(subtask) for subtask in self.subtasks],
302
+ return_exceptions=True,
303
+ )
220
304
 
221
- # Process results using zip for better readability
305
+ # Process results
222
306
  successful_results = []
223
307
  failed_results = []
224
308
 
225
- for subtask, result in zip(self.subtasks, results):
309
+ for i, result in enumerate(results):
310
+ subtask = self.subtasks[i]
226
311
  if isinstance(result, Exception):
227
312
  failed_results.append(f"Subtask {subtask.task_id} failed: {result}")
228
313
  elif isinstance(result, TaskResult):
229
- successful_results.append(result.data)
314
+ if result.success:
315
+ successful_results.append(result.data)
316
+ else:
317
+ failed_results.append(
318
+ f"Subtask {subtask.task_id} failed: {result.error}"
319
+ )
230
320
 
231
321
  if failed_results:
232
322
  return f"Parallel task {self.task_id} partially failed: {failed_results}"
@@ -336,86 +426,8 @@ class WorkflowEngine:
336
426
  for level_index, level in enumerate(execution_order, 1):
337
427
  print(f"\n{'=' * 20} Phase {level_index} ({len(level)} tasks) {'=' * 20}")
338
428
 
339
- # Filter executable tasks in this level
340
- ready_tasks = []
341
- for task_id in level:
342
- task = self.tasks[task_id]
343
- if task.can_execute(completed_tasks):
344
- task.update_status(TaskStatus.READY)
345
- ready_tasks.append(task)
346
-
347
- if not ready_tasks:
348
- continue
349
-
350
- # Create semaphore for this level to control concurrency
351
- semaphore = asyncio.Semaphore(self.max_concurrent)
352
-
353
- async def execute_single_task(task: Task, sem: asyncio.Semaphore):
354
- async with sem:
355
- task.start_time = time.time()
356
- task.update_status(TaskStatus.RUNNING)
357
-
358
- try:
359
- # Collect results from dependent tasks
360
- dependency_results = {dep_id: self.results[dep_id] for dep_id in task.get_dependencies()}
361
-
362
- # Execute task
363
- data = await asyncio.wait_for(task.execute(dependency_results), timeout=task.timeout)
364
-
365
- task.end_time = time.time()
366
- task.result = TaskResult(
367
- task_id=task.task_id, success=True, data=data, execution_time=task.get_execution_time()
368
- )
369
- task.update_status(TaskStatus.COMPLETED)
370
-
371
- # Store result
372
- self.results[task.task_id] = task.result
373
- completed_tasks.add(task.task_id)
374
-
375
- print(f"[OK] Task {task.task_id} completed, duration: {task.get_execution_time():.2f}s")
376
-
377
- return task.result
378
-
379
- except asyncio.TimeoutError as e:
380
- task.end_time = time.time()
381
- task.result = TaskResult(
382
- task_id=task.task_id,
383
- success=False,
384
- data=None,
385
- execution_time=task.get_execution_time(),
386
- error=e,
387
- )
388
- task.update_status(TaskStatus.FAILED)
389
-
390
- # Store result and mark as completed (even if failed)
391
- self.results[task.task_id] = task.result
392
- completed_tasks.add(task.task_id)
393
-
394
- print(f"[FAIL] Task {task.task_id} timeout")
395
- raise e from e
396
- except Exception as e:
397
- task.end_time = time.time()
398
- task.result = TaskResult(
399
- task_id=task.task_id,
400
- success=False,
401
- data=None,
402
- execution_time=task.get_execution_time(),
403
- error=e,
404
- )
405
- task.update_status(TaskStatus.FAILED)
406
-
407
- # Store result and mark as completed (even if failed)
408
- self.results[task.task_id] = task.result
409
- completed_tasks.add(task.task_id)
410
-
411
- print(f"[FAIL] Task {task.task_id} failed: {e}")
412
- raise e
413
-
414
- # Execute all ready tasks in this level in parallel
415
- tasks_to_execute = [execute_single_task(task, semaphore) for task in ready_tasks]
416
-
417
- # Use return_exceptions=True to ensure all tasks complete even if some fail
418
- await asyncio.gather(*tasks_to_execute, return_exceptions=True)
429
+ # Execute the current level
430
+ await self._execute_level(level, completed_tasks)
419
431
 
420
432
  print(f"\n{'=' * 50}")
421
433
  print("Workflow execution completed")
@@ -423,22 +435,113 @@ class WorkflowEngine:
423
435
 
424
436
  return self.results
425
437
 
438
+ async def _execute_level(self, level: list[str], completed_tasks: set[str]):
439
+ """Execute a single level of tasks with controlled concurrency."""
440
+ # Filter executable tasks in this level
441
+ ready_tasks = []
442
+ for task_id in level:
443
+ task = self.tasks[task_id]
444
+ if task.can_execute(completed_tasks):
445
+ task.update_status(TaskStatus.READY)
446
+ ready_tasks.append(task)
447
+
448
+ if not ready_tasks:
449
+ return
450
+
451
+ # Use a shared semaphore for this level to control concurrency
452
+ semaphore = asyncio.Semaphore(self.max_concurrent)
453
+
454
+ # Execute all ready tasks in this level in parallel
455
+ tasks_to_execute = [
456
+ self._execute_single_task_with_semaphore(task, semaphore, completed_tasks)
457
+ for task in ready_tasks
458
+ ]
459
+
460
+ # Use return_exceptions=True to ensure all tasks complete even if some fail
461
+ await asyncio.gather(*tasks_to_execute, return_exceptions=True)
462
+
463
+ async def _execute_single_task_with_semaphore(
464
+ self, task: Task, semaphore: asyncio.Semaphore, completed_tasks: set[str]
465
+ ):
466
+ """Execute a single task with semaphore control for concurrency."""
467
+ async with semaphore:
468
+ return await self._execute_single_task(task, completed_tasks)
469
+
470
+ async def _execute_single_task(self, task: Task, completed_tasks: set[str]):
471
+ """Execute a single task with error handling."""
472
+ task.start_time = time.time()
473
+ task.update_status(TaskStatus.RUNNING)
474
+
475
+ # Collect results from dependent tasks
476
+ dependency_results = {
477
+ dep_id: self.results[dep_id] for dep_id in task.get_dependencies()
478
+ }
479
+
480
+ try:
481
+ # Execute task using the common error handling wrapper
482
+ result_data = await task._execute_with_error_handling(
483
+ lambda ctx: task.execute(ctx), dependency_results
484
+ )
485
+ # Update task with result
486
+ task.result = result_data
487
+ except Exception:
488
+ # Result is already stored in task.result by _execute_with_error_handling
489
+ pass
490
+
491
+ # Store result and update completed tasks (even if failed)
492
+ self.results[task.task_id] = task.result
493
+ completed_tasks.add(task.task_id)
494
+
495
+ # Print appropriate message based on result
496
+ if task.result.success:
497
+ print(
498
+ f"[OK] Task {task.task_id} completed, duration: {task.get_execution_time():.2f}s"
499
+ )
500
+ else:
501
+ error_msg = (
502
+ "timeout"
503
+ if isinstance(task.result.error, asyncio.TimeoutError)
504
+ else str(task.result.error)
505
+ )
506
+ print(f"[FAIL] Task {task.task_id} failed: {error_msg}")
507
+
508
+ return task.result
509
+
426
510
  def get_execution_summary(self) -> dict[str, Any]:
427
511
  """Get execution summary"""
428
512
  total_tasks = len(self.tasks)
429
- completed = sum(1 for task in self.tasks.values() if task.status == TaskStatus.COMPLETED)
430
- failed = sum(1 for task in self.tasks.values() if task.status == TaskStatus.FAILED)
431
-
513
+ if total_tasks == 0:
514
+ return {
515
+ "total_tasks": 0,
516
+ "completed": 0,
517
+ "failed": 0,
518
+ "pending": 0,
519
+ "total_execution_time": 0.0,
520
+ "success_rate": 0.0,
521
+ }
522
+
523
+ completed = 0
524
+ failed = 0
432
525
  total_time = 0.0
526
+
527
+ # Single pass through tasks to calculate all metrics
433
528
  for task in self.tasks.values():
529
+ if task.status == TaskStatus.COMPLETED:
530
+ completed += 1
531
+ elif task.status == TaskStatus.FAILED:
532
+ failed += 1
533
+
434
534
  if task.result:
435
535
  total_time += task.result.execution_time
436
536
 
537
+ pending = total_tasks - completed - failed
538
+ success_rate = completed / total_tasks if total_tasks > 0 else 0
539
+
437
540
  return {
438
541
  "total_tasks": total_tasks,
439
542
  "completed": completed,
440
543
  "failed": failed,
441
- "pending": total_tasks - completed - failed,
544
+ "pending": pending,
442
545
  "total_execution_time": total_time,
443
- "success_rate": completed / total_tasks if total_tasks > 0 else 0,
546
+ "success_rate": success_rate,
444
547
  }
File without changes