pysfi 0.1.13__py3-none-any.whl → 0.1.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pysfi-0.1.13.dist-info → pysfi-0.1.15.dist-info}/METADATA +1 -1
- {pysfi-0.1.13.dist-info → pysfi-0.1.15.dist-info}/RECORD +35 -35
- {pysfi-0.1.13.dist-info → pysfi-0.1.15.dist-info}/entry_points.txt +2 -0
- sfi/__init__.py +20 -5
- sfi/alarmclock/__init__.py +3 -3
- sfi/bumpversion/__init__.py +5 -5
- sfi/bumpversion/bumpversion.py +64 -15
- sfi/cleanbuild/__init__.py +3 -3
- sfi/cleanbuild/cleanbuild.py +5 -1
- sfi/cli.py +13 -2
- sfi/condasetup/__init__.py +1 -1
- sfi/condasetup/condasetup.py +91 -76
- sfi/docdiff/__init__.py +1 -1
- sfi/docdiff/docdiff.py +3 -2
- sfi/docscan/__init__.py +3 -3
- sfi/docscan/docscan.py +78 -23
- sfi/docscan/docscan_gui.py +5 -5
- sfi/filedate/filedate.py +12 -5
- sfi/img2pdf/img2pdf.py +5 -5
- sfi/llmquantize/llmquantize.py +44 -33
- sfi/llmserver/__init__.py +1 -1
- sfi/makepython/makepython.py +880 -319
- sfi/pdfcrypt/__init__.py +30 -0
- sfi/pdfcrypt/pdfcrypt.py +435 -0
- sfi/pdfsplit/pdfsplit.py +45 -12
- sfi/pyarchive/__init__.py +1 -1
- sfi/pyarchive/pyarchive.py +1 -1
- sfi/pyembedinstall/pyembedinstall.py +1 -1
- sfi/pylibpack/pylibpack.py +5 -13
- sfi/pyloadergen/pyloadergen.py +6 -3
- sfi/pypack/pypack.py +131 -105
- sfi/pyprojectparse/pyprojectparse.py +19 -44
- sfi/pysourcepack/__init__.py +1 -1
- sfi/pysourcepack/pysourcepack.py +11 -14
- sfi/workflowengine/__init__.py +0 -0
- sfi/workflowengine/workflowengine.py +0 -547
- {pysfi-0.1.13.dist-info → pysfi-0.1.15.dist-info}/WHEEL +0 -0
sfi/pysourcepack/pysourcepack.py
CHANGED
|
@@ -118,7 +118,6 @@ class ProjectPacker:
|
|
|
118
118
|
|
|
119
119
|
parent: PySourcePacker
|
|
120
120
|
project: Project
|
|
121
|
-
project_name: str
|
|
122
121
|
include_patterns: set[str]
|
|
123
122
|
exclude_patterns: set[str]
|
|
124
123
|
|
|
@@ -130,16 +129,13 @@ class ProjectPacker:
|
|
|
130
129
|
@cached_property
|
|
131
130
|
def project_path(self) -> Path:
|
|
132
131
|
"""Get project directory path."""
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
if self.is_single_project
|
|
136
|
-
else self.parent.root_dir / self.project_name
|
|
137
|
-
)
|
|
132
|
+
# Use the actual directory from toml_path instead of project name
|
|
133
|
+
return self.project.toml_path.parent
|
|
138
134
|
|
|
139
135
|
@cached_property
|
|
140
136
|
def output_dir(self) -> Path:
|
|
141
137
|
"""Get output directory path."""
|
|
142
|
-
return self.parent.root_dir / "dist" / "src" / self.
|
|
138
|
+
return self.parent.root_dir / "dist" / "src" / self.project.normalized_name
|
|
143
139
|
|
|
144
140
|
def validate_project_path(self) -> bool:
|
|
145
141
|
"""Validate project path exists and is a directory."""
|
|
@@ -159,14 +155,14 @@ class ProjectPacker:
|
|
|
159
155
|
Returns:
|
|
160
156
|
True if packing succeeded, False otherwise
|
|
161
157
|
"""
|
|
162
|
-
logger.debug(f"Start packing project: {self.
|
|
158
|
+
logger.debug(f"Start packing project: {self.project.normalized_name}")
|
|
163
159
|
|
|
164
|
-
if not self.
|
|
160
|
+
if not self.project.normalized_name:
|
|
165
161
|
logger.error("Project name cannot be empty")
|
|
166
162
|
return False
|
|
167
163
|
|
|
168
164
|
logger.debug(
|
|
169
|
-
f"Project path: {self.project_path}, project_name: {self.
|
|
165
|
+
f"Project path: {self.project_path}, project_name: {self.project.normalized_name}"
|
|
170
166
|
)
|
|
171
167
|
if not self.validate_project_path():
|
|
172
168
|
return False
|
|
@@ -177,7 +173,9 @@ class ProjectPacker:
|
|
|
177
173
|
copied_files = 0
|
|
178
174
|
copied_dirs = 0
|
|
179
175
|
|
|
180
|
-
logger.info(
|
|
176
|
+
logger.info(
|
|
177
|
+
f"Packing project '{self.project.normalized_name}' to {self.output_dir}"
|
|
178
|
+
)
|
|
181
179
|
|
|
182
180
|
try:
|
|
183
181
|
for item in self.project_path.iterdir():
|
|
@@ -202,12 +200,12 @@ class ProjectPacker:
|
|
|
202
200
|
copied_dirs += 1
|
|
203
201
|
|
|
204
202
|
logger.info(
|
|
205
|
-
f"Successfully packed {self.
|
|
203
|
+
f"Successfully packed {self.project.normalized_name}: {copied_files} files, {copied_dirs} directories"
|
|
206
204
|
)
|
|
207
205
|
return True
|
|
208
206
|
|
|
209
207
|
except Exception as e:
|
|
210
|
-
logger.error(f"Error packing project {self.
|
|
208
|
+
logger.error(f"Error packing project {self.project.normalized_name}: {e}")
|
|
211
209
|
return False
|
|
212
210
|
|
|
213
211
|
|
|
@@ -248,7 +246,6 @@ class PySourcePacker:
|
|
|
248
246
|
packer = ProjectPacker(
|
|
249
247
|
parent=self,
|
|
250
248
|
project=project,
|
|
251
|
-
project_name=project_name,
|
|
252
249
|
include_patterns=self.include_patterns,
|
|
253
250
|
exclude_patterns=self.exclude_patterns,
|
|
254
251
|
)
|
sfi/workflowengine/__init__.py
DELETED
|
File without changes
|
|
@@ -1,547 +0,0 @@
|
|
|
1
|
-
"""Workflow Engine - A flexible async task orchestration system.
|
|
2
|
-
|
|
3
|
-
This module provides a comprehensive workflow engine for managing
|
|
4
|
-
complex task dependencies with support for I/O tasks, CPU-intensive tasks,
|
|
5
|
-
serial tasks, and parallel task execution.
|
|
6
|
-
|
|
7
|
-
The engine supports:
|
|
8
|
-
- Dependency management with cycle detection
|
|
9
|
-
- Topological sorting for execution order
|
|
10
|
-
- Concurrent execution with configurable limits
|
|
11
|
-
- Error handling and timeout management
|
|
12
|
-
- Execution monitoring and reporting
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
from __future__ import annotations
|
|
16
|
-
|
|
17
|
-
import asyncio
|
|
18
|
-
import time
|
|
19
|
-
from abc import ABC, abstractmethod
|
|
20
|
-
from collections import defaultdict, deque
|
|
21
|
-
from dataclasses import dataclass
|
|
22
|
-
from enum import Enum
|
|
23
|
-
from typing import Any, Callable, Sequence
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class TaskStatus(Enum):
|
|
27
|
-
"""Task status enumeration"""
|
|
28
|
-
|
|
29
|
-
PENDING = "pending"
|
|
30
|
-
READY = "ready"
|
|
31
|
-
RUNNING = "running"
|
|
32
|
-
COMPLETED = "completed"
|
|
33
|
-
FAILED = "failed"
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class TaskType(Enum):
|
|
37
|
-
"""Task type enumeration"""
|
|
38
|
-
|
|
39
|
-
SERIAL = "serial" # Serial task
|
|
40
|
-
PARALLEL = "parallel" # Parallel task
|
|
41
|
-
ASYNC = "async" # Async I/O task
|
|
42
|
-
CPU = "cpu" # CPU-intensive task
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
@dataclass
|
|
46
|
-
class TaskResult:
|
|
47
|
-
"""Task execution result"""
|
|
48
|
-
|
|
49
|
-
task_id: str
|
|
50
|
-
success: bool
|
|
51
|
-
data: Any
|
|
52
|
-
execution_time: float
|
|
53
|
-
error: Exception | None = None
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
class Task(ABC):
|
|
57
|
-
"""Task abstract base class"""
|
|
58
|
-
|
|
59
|
-
def __init__(
|
|
60
|
-
self,
|
|
61
|
-
task_id: str,
|
|
62
|
-
task_type: TaskType,
|
|
63
|
-
dependencies: list[str] | None = None,
|
|
64
|
-
timeout: float = 30.0,
|
|
65
|
-
):
|
|
66
|
-
self.task_id = task_id
|
|
67
|
-
self.task_type = task_type
|
|
68
|
-
self.dependencies = dependencies or []
|
|
69
|
-
self.timeout = timeout
|
|
70
|
-
self.status = TaskStatus.PENDING
|
|
71
|
-
self.result: TaskResult | None = None
|
|
72
|
-
self.start_time: float | None = None
|
|
73
|
-
self.end_time: float | None = None
|
|
74
|
-
|
|
75
|
-
def get_dependencies(self) -> list[str]:
|
|
76
|
-
"""Get list of dependent task IDs"""
|
|
77
|
-
return self.dependencies.copy()
|
|
78
|
-
|
|
79
|
-
def can_execute(self, completed_tasks: set[str]) -> bool:
|
|
80
|
-
"""Check if task can be executed (dependencies satisfied)"""
|
|
81
|
-
return all(dep in completed_tasks for dep in self.dependencies)
|
|
82
|
-
|
|
83
|
-
def update_status(self, status: TaskStatus):
|
|
84
|
-
"""Update task status"""
|
|
85
|
-
self.status = status
|
|
86
|
-
|
|
87
|
-
@abstractmethod
|
|
88
|
-
async def execute(self, context: dict[str, TaskResult]) -> Any:
|
|
89
|
-
"""Execute task logic, must be implemented by subclasses"""
|
|
90
|
-
pass
|
|
91
|
-
|
|
92
|
-
def get_execution_time(self) -> float:
|
|
93
|
-
"""Get task execution time"""
|
|
94
|
-
if self.start_time and self.end_time:
|
|
95
|
-
return self.end_time - self.start_time
|
|
96
|
-
return 0.0
|
|
97
|
-
|
|
98
|
-
async def _execute_with_error_handling(
|
|
99
|
-
self, execution_func, context: dict[str, TaskResult]
|
|
100
|
-
):
|
|
101
|
-
"""Common execution wrapper with error handling"""
|
|
102
|
-
self.start_time = time.time()
|
|
103
|
-
self.update_status(TaskStatus.RUNNING)
|
|
104
|
-
|
|
105
|
-
try:
|
|
106
|
-
data = await asyncio.wait_for(execution_func(context), timeout=self.timeout)
|
|
107
|
-
self.end_time = time.time()
|
|
108
|
-
self.result = TaskResult(
|
|
109
|
-
task_id=self.task_id,
|
|
110
|
-
success=True,
|
|
111
|
-
data=data,
|
|
112
|
-
execution_time=self.get_execution_time(),
|
|
113
|
-
)
|
|
114
|
-
self.update_status(TaskStatus.COMPLETED)
|
|
115
|
-
return self.result
|
|
116
|
-
except asyncio.TimeoutError as e:
|
|
117
|
-
self.end_time = time.time()
|
|
118
|
-
self.result = TaskResult(
|
|
119
|
-
task_id=self.task_id,
|
|
120
|
-
success=False,
|
|
121
|
-
data=None,
|
|
122
|
-
execution_time=self.get_execution_time(),
|
|
123
|
-
error=e,
|
|
124
|
-
)
|
|
125
|
-
self.update_status(TaskStatus.FAILED)
|
|
126
|
-
raise
|
|
127
|
-
except Exception as e:
|
|
128
|
-
self.end_time = time.time()
|
|
129
|
-
self.result = TaskResult(
|
|
130
|
-
task_id=self.task_id,
|
|
131
|
-
success=False,
|
|
132
|
-
data=None,
|
|
133
|
-
execution_time=self.get_execution_time(),
|
|
134
|
-
error=e,
|
|
135
|
-
)
|
|
136
|
-
self.update_status(TaskStatus.FAILED)
|
|
137
|
-
raise
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
class IOTask(Task):
|
|
141
|
-
"""I/O-intensive task"""
|
|
142
|
-
|
|
143
|
-
def __init__(
|
|
144
|
-
self,
|
|
145
|
-
task_id: str,
|
|
146
|
-
duration: float,
|
|
147
|
-
dependencies: list[str] | None = None,
|
|
148
|
-
timeout: float = 30.0,
|
|
149
|
-
):
|
|
150
|
-
super().__init__(task_id, TaskType.ASYNC, dependencies, timeout)
|
|
151
|
-
self.duration = duration
|
|
152
|
-
|
|
153
|
-
async def execute(self, context: dict[str, TaskResult]) -> Any:
|
|
154
|
-
"""Simulate I/O operation"""
|
|
155
|
-
print(
|
|
156
|
-
f"[IO] Starting task {self.task_id}, estimated duration: {self.duration}s"
|
|
157
|
-
)
|
|
158
|
-
result = await self._execute_with_error_handling(self._execute_io, context)
|
|
159
|
-
return result.data
|
|
160
|
-
|
|
161
|
-
async def _execute_io(self, context: dict[str, TaskResult]) -> Any:
|
|
162
|
-
"""Internal I/O execution method"""
|
|
163
|
-
await asyncio.sleep(self.duration)
|
|
164
|
-
return f"IO task {self.task_id} completed, dependencies: {list(context.keys())}"
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
class CPUTask(Task):
|
|
168
|
-
"""CPU-intensive task"""
|
|
169
|
-
|
|
170
|
-
def __init__(
|
|
171
|
-
self,
|
|
172
|
-
task_id: str,
|
|
173
|
-
iterations: int,
|
|
174
|
-
dependencies: list[str] | None = None,
|
|
175
|
-
timeout: float = 30.0,
|
|
176
|
-
):
|
|
177
|
-
super().__init__(task_id, TaskType.CPU, dependencies, timeout)
|
|
178
|
-
self.iterations = iterations
|
|
179
|
-
|
|
180
|
-
async def execute(self, context: dict[str, TaskResult]) -> Any:
|
|
181
|
-
"""CPU-intensive computation task"""
|
|
182
|
-
print(f"[CPU] Starting task {self.task_id}, iterations: {self.iterations}")
|
|
183
|
-
result = await self._execute_with_error_handling(self._execute_cpu, context)
|
|
184
|
-
return result.data
|
|
185
|
-
|
|
186
|
-
async def _execute_cpu(self, context: dict[str, TaskResult]) -> Any:
|
|
187
|
-
"""Execute CPU-intensive work in thread pool"""
|
|
188
|
-
|
|
189
|
-
def cpu_intensive_work():
|
|
190
|
-
result = 0
|
|
191
|
-
for i in range(self.iterations):
|
|
192
|
-
result += i * i
|
|
193
|
-
return result
|
|
194
|
-
|
|
195
|
-
# Use run_in_executor to avoid blocking event loop
|
|
196
|
-
loop = asyncio.get_event_loop()
|
|
197
|
-
result = await loop.run_in_executor(None, cpu_intensive_work)
|
|
198
|
-
return f"CPU task {self.task_id} completed, result: {result}"
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
class SerialTask(Task):
|
|
202
|
-
"""Serial task (stateful, must execute sequentially)"""
|
|
203
|
-
|
|
204
|
-
def __init__(
|
|
205
|
-
self,
|
|
206
|
-
task_id: str,
|
|
207
|
-
process_func: Callable,
|
|
208
|
-
dependencies: list[str] | None = None,
|
|
209
|
-
timeout: float = 30.0,
|
|
210
|
-
):
|
|
211
|
-
super().__init__(task_id, TaskType.SERIAL, dependencies, timeout)
|
|
212
|
-
self.process_func = process_func
|
|
213
|
-
self.state = {}
|
|
214
|
-
|
|
215
|
-
async def execute(self, context: dict[str, TaskResult]) -> Any:
|
|
216
|
-
"""Execute serial task"""
|
|
217
|
-
print(f"[Serial] Starting serial task {self.task_id}")
|
|
218
|
-
result = await self._execute_with_error_handling(self._execute_serial, context)
|
|
219
|
-
return result.data
|
|
220
|
-
|
|
221
|
-
async def _execute_serial(self, context: dict[str, TaskResult]) -> Any:
|
|
222
|
-
"""Execute serial task logic"""
|
|
223
|
-
# Collect results from dependent tasks
|
|
224
|
-
inputs = {dep_id: context[dep_id].data for dep_id in self.dependencies}
|
|
225
|
-
|
|
226
|
-
# Execute process function
|
|
227
|
-
if asyncio.iscoroutinefunction(self.process_func):
|
|
228
|
-
result = await self.process_func(inputs, self.state)
|
|
229
|
-
else:
|
|
230
|
-
result = self.process_func(inputs, self.state)
|
|
231
|
-
|
|
232
|
-
# Update state
|
|
233
|
-
self.state = {"last_result": result, "executed": True}
|
|
234
|
-
|
|
235
|
-
return f"Serial task {self.task_id} completed, result: {result}"
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
class ParallelTask(Task):
|
|
239
|
-
"""Parallel task (can execute concurrently with other tasks)"""
|
|
240
|
-
|
|
241
|
-
def __init__(
|
|
242
|
-
self,
|
|
243
|
-
task_id: str,
|
|
244
|
-
subtasks: Sequence[Task],
|
|
245
|
-
dependencies: list[str] | None = None,
|
|
246
|
-
timeout: float = 30.0,
|
|
247
|
-
max_concurrent: int = 3,
|
|
248
|
-
):
|
|
249
|
-
super().__init__(task_id, TaskType.PARALLEL, dependencies, timeout)
|
|
250
|
-
self.subtasks = subtasks
|
|
251
|
-
self.max_concurrent = max_concurrent
|
|
252
|
-
|
|
253
|
-
async def execute(self, context: dict[str, TaskResult]) -> Any:
|
|
254
|
-
"""Execute subtasks in parallel"""
|
|
255
|
-
print(
|
|
256
|
-
f"[Parallel] Starting parallel task {self.task_id}, contains {len(self.subtasks)} subtasks"
|
|
257
|
-
)
|
|
258
|
-
result = await self._execute_with_error_handling(
|
|
259
|
-
self._execute_parallel, context
|
|
260
|
-
)
|
|
261
|
-
return result.data
|
|
262
|
-
|
|
263
|
-
async def _execute_parallel(self, context: dict[str, TaskResult]) -> Any:
|
|
264
|
-
"""Execute subtasks in parallel with controlled concurrency"""
|
|
265
|
-
# Create semaphore to control concurrency
|
|
266
|
-
semaphore = asyncio.Semaphore(self.max_concurrent)
|
|
267
|
-
|
|
268
|
-
async def execute_subtask_with_semaphore(subtask: Task):
|
|
269
|
-
async with semaphore:
|
|
270
|
-
# Execute subtask with its own context
|
|
271
|
-
try:
|
|
272
|
-
data = await asyncio.wait_for(
|
|
273
|
-
subtask.execute(context), timeout=subtask.timeout
|
|
274
|
-
)
|
|
275
|
-
subtask.result = TaskResult(
|
|
276
|
-
task_id=subtask.task_id,
|
|
277
|
-
success=True,
|
|
278
|
-
data=data,
|
|
279
|
-
execution_time=time.time() - subtask.start_time
|
|
280
|
-
if subtask.start_time
|
|
281
|
-
else 0,
|
|
282
|
-
)
|
|
283
|
-
subtask.update_status(TaskStatus.COMPLETED)
|
|
284
|
-
return subtask.result
|
|
285
|
-
except Exception as e:
|
|
286
|
-
subtask.end_time = time.time()
|
|
287
|
-
subtask.result = TaskResult(
|
|
288
|
-
task_id=subtask.task_id,
|
|
289
|
-
success=False,
|
|
290
|
-
data=None,
|
|
291
|
-
execution_time=time.time() - subtask.start_time
|
|
292
|
-
if subtask.start_time
|
|
293
|
-
else 0,
|
|
294
|
-
error=e,
|
|
295
|
-
)
|
|
296
|
-
subtask.update_status(TaskStatus.FAILED)
|
|
297
|
-
return subtask.result
|
|
298
|
-
|
|
299
|
-
# Execute all subtasks in parallel
|
|
300
|
-
results = await asyncio.gather(
|
|
301
|
-
*[execute_subtask_with_semaphore(subtask) for subtask in self.subtasks],
|
|
302
|
-
return_exceptions=True,
|
|
303
|
-
)
|
|
304
|
-
|
|
305
|
-
# Process results
|
|
306
|
-
successful_results = []
|
|
307
|
-
failed_results = []
|
|
308
|
-
|
|
309
|
-
for i, result in enumerate(results):
|
|
310
|
-
subtask = self.subtasks[i]
|
|
311
|
-
if isinstance(result, Exception):
|
|
312
|
-
failed_results.append(f"Subtask {subtask.task_id} failed: {result}")
|
|
313
|
-
elif isinstance(result, TaskResult):
|
|
314
|
-
if result.success:
|
|
315
|
-
successful_results.append(result.data)
|
|
316
|
-
else:
|
|
317
|
-
failed_results.append(
|
|
318
|
-
f"Subtask {subtask.task_id} failed: {result.error}"
|
|
319
|
-
)
|
|
320
|
-
|
|
321
|
-
if failed_results:
|
|
322
|
-
return f"Parallel task {self.task_id} partially failed: {failed_results}"
|
|
323
|
-
|
|
324
|
-
return f"Parallel task {self.task_id} completed, results: {successful_results}"
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
class WorkflowEngine:
|
|
328
|
-
"""Workflow engine - core orchestrator"""
|
|
329
|
-
|
|
330
|
-
def __init__(self, max_concurrent: int = 4):
|
|
331
|
-
self.tasks: dict[str, Task] = {}
|
|
332
|
-
self.results: dict[str, TaskResult] = {}
|
|
333
|
-
self.max_concurrent = max_concurrent
|
|
334
|
-
self.execution_order: list[list[str]] = []
|
|
335
|
-
|
|
336
|
-
def add_task(self, task: Task):
|
|
337
|
-
"""Add task to workflow"""
|
|
338
|
-
self.tasks[task.task_id] = task
|
|
339
|
-
|
|
340
|
-
def validate_dependencies(self) -> bool:
|
|
341
|
-
"""Validate task dependencies, ensure no circular dependencies"""
|
|
342
|
-
# Build adjacency list
|
|
343
|
-
graph = defaultdict(list)
|
|
344
|
-
in_degree = dict.fromkeys(self.tasks, 0)
|
|
345
|
-
|
|
346
|
-
for task_id, task in self.tasks.items():
|
|
347
|
-
for dep in task.get_dependencies():
|
|
348
|
-
if dep not in self.tasks:
|
|
349
|
-
raise ValueError(f"Task {task_id} depends on unknown task {dep}")
|
|
350
|
-
graph[dep].append(task_id)
|
|
351
|
-
in_degree[task_id] += 1
|
|
352
|
-
|
|
353
|
-
# Detect circular dependencies
|
|
354
|
-
visited = 0
|
|
355
|
-
queue = deque([task_id for task_id, degree in in_degree.items() if degree == 0])
|
|
356
|
-
|
|
357
|
-
while queue:
|
|
358
|
-
current = queue.popleft()
|
|
359
|
-
visited += 1
|
|
360
|
-
|
|
361
|
-
for neighbor in graph[current]:
|
|
362
|
-
in_degree[neighbor] -= 1
|
|
363
|
-
if in_degree[neighbor] == 0:
|
|
364
|
-
queue.append(neighbor)
|
|
365
|
-
|
|
366
|
-
if visited != len(self.tasks):
|
|
367
|
-
raise ValueError("Circular dependency detected in workflow")
|
|
368
|
-
|
|
369
|
-
return True
|
|
370
|
-
|
|
371
|
-
def calculate_execution_order(self) -> list[list[str]]:
|
|
372
|
-
"""Calculate task execution order (topological sort + level grouping)"""
|
|
373
|
-
if not self.tasks:
|
|
374
|
-
return []
|
|
375
|
-
|
|
376
|
-
# Build adjacency list
|
|
377
|
-
graph = defaultdict(list)
|
|
378
|
-
in_degree = dict.fromkeys(self.tasks, 0)
|
|
379
|
-
|
|
380
|
-
for task_id, task in self.tasks.items():
|
|
381
|
-
for dep in task.get_dependencies():
|
|
382
|
-
graph[dep].append(task_id)
|
|
383
|
-
in_degree[task_id] += 1
|
|
384
|
-
|
|
385
|
-
# Level-based topological sort
|
|
386
|
-
execution_order = []
|
|
387
|
-
queue = deque([task_id for task_id, degree in in_degree.items() if degree == 0])
|
|
388
|
-
|
|
389
|
-
while queue:
|
|
390
|
-
level_size = len(queue)
|
|
391
|
-
current_level = []
|
|
392
|
-
|
|
393
|
-
for _ in range(level_size):
|
|
394
|
-
task_id = queue.popleft()
|
|
395
|
-
current_level.append(task_id)
|
|
396
|
-
|
|
397
|
-
for neighbor in graph[task_id]:
|
|
398
|
-
in_degree[neighbor] -= 1
|
|
399
|
-
if in_degree[neighbor] == 0:
|
|
400
|
-
queue.append(neighbor)
|
|
401
|
-
|
|
402
|
-
if current_level:
|
|
403
|
-
execution_order.append(current_level)
|
|
404
|
-
|
|
405
|
-
self.execution_order = execution_order
|
|
406
|
-
return execution_order
|
|
407
|
-
|
|
408
|
-
async def execute_workflow(self) -> dict[str, TaskResult]:
|
|
409
|
-
"""Execute entire workflow"""
|
|
410
|
-
print("=" * 50)
|
|
411
|
-
print("Starting workflow execution")
|
|
412
|
-
print("=" * 50)
|
|
413
|
-
|
|
414
|
-
# Validate dependencies
|
|
415
|
-
self.validate_dependencies()
|
|
416
|
-
|
|
417
|
-
# Calculate execution order
|
|
418
|
-
execution_order = self.calculate_execution_order()
|
|
419
|
-
print(f"Execution plan ({len(execution_order)} phases):")
|
|
420
|
-
for i, level in enumerate(execution_order, 1):
|
|
421
|
-
print(f" Phase {i}: {level}")
|
|
422
|
-
|
|
423
|
-
# Execute by level
|
|
424
|
-
completed_tasks: set[str] = set()
|
|
425
|
-
|
|
426
|
-
for level_index, level in enumerate(execution_order, 1):
|
|
427
|
-
print(f"\n{'=' * 20} Phase {level_index} ({len(level)} tasks) {'=' * 20}")
|
|
428
|
-
|
|
429
|
-
# Execute the current level
|
|
430
|
-
await self._execute_level(level, completed_tasks)
|
|
431
|
-
|
|
432
|
-
print(f"\n{'=' * 50}")
|
|
433
|
-
print("Workflow execution completed")
|
|
434
|
-
print(f"{'=' * 50}")
|
|
435
|
-
|
|
436
|
-
return self.results
|
|
437
|
-
|
|
438
|
-
async def _execute_level(self, level: list[str], completed_tasks: set[str]):
|
|
439
|
-
"""Execute a single level of tasks with controlled concurrency."""
|
|
440
|
-
# Filter executable tasks in this level
|
|
441
|
-
ready_tasks = []
|
|
442
|
-
for task_id in level:
|
|
443
|
-
task = self.tasks[task_id]
|
|
444
|
-
if task.can_execute(completed_tasks):
|
|
445
|
-
task.update_status(TaskStatus.READY)
|
|
446
|
-
ready_tasks.append(task)
|
|
447
|
-
|
|
448
|
-
if not ready_tasks:
|
|
449
|
-
return
|
|
450
|
-
|
|
451
|
-
# Use a shared semaphore for this level to control concurrency
|
|
452
|
-
semaphore = asyncio.Semaphore(self.max_concurrent)
|
|
453
|
-
|
|
454
|
-
# Execute all ready tasks in this level in parallel
|
|
455
|
-
tasks_to_execute = [
|
|
456
|
-
self._execute_single_task_with_semaphore(task, semaphore, completed_tasks)
|
|
457
|
-
for task in ready_tasks
|
|
458
|
-
]
|
|
459
|
-
|
|
460
|
-
# Use return_exceptions=True to ensure all tasks complete even if some fail
|
|
461
|
-
await asyncio.gather(*tasks_to_execute, return_exceptions=True)
|
|
462
|
-
|
|
463
|
-
async def _execute_single_task_with_semaphore(
|
|
464
|
-
self, task: Task, semaphore: asyncio.Semaphore, completed_tasks: set[str]
|
|
465
|
-
):
|
|
466
|
-
"""Execute a single task with semaphore control for concurrency."""
|
|
467
|
-
async with semaphore:
|
|
468
|
-
return await self._execute_single_task(task, completed_tasks)
|
|
469
|
-
|
|
470
|
-
async def _execute_single_task(self, task: Task, completed_tasks: set[str]):
|
|
471
|
-
"""Execute a single task with error handling."""
|
|
472
|
-
task.start_time = time.time()
|
|
473
|
-
task.update_status(TaskStatus.RUNNING)
|
|
474
|
-
|
|
475
|
-
# Collect results from dependent tasks
|
|
476
|
-
dependency_results = {
|
|
477
|
-
dep_id: self.results[dep_id] for dep_id in task.get_dependencies()
|
|
478
|
-
}
|
|
479
|
-
|
|
480
|
-
try:
|
|
481
|
-
# Execute task using the common error handling wrapper
|
|
482
|
-
result_data = await task._execute_with_error_handling(
|
|
483
|
-
lambda ctx: task.execute(ctx), dependency_results
|
|
484
|
-
)
|
|
485
|
-
# Update task with result
|
|
486
|
-
task.result = result_data
|
|
487
|
-
except Exception:
|
|
488
|
-
# Result is already stored in task.result by _execute_with_error_handling
|
|
489
|
-
pass
|
|
490
|
-
|
|
491
|
-
# Store result and update completed tasks (even if failed)
|
|
492
|
-
self.results[task.task_id] = task.result
|
|
493
|
-
completed_tasks.add(task.task_id)
|
|
494
|
-
|
|
495
|
-
# Print appropriate message based on result
|
|
496
|
-
if task.result.success:
|
|
497
|
-
print(
|
|
498
|
-
f"[OK] Task {task.task_id} completed, duration: {task.get_execution_time():.2f}s"
|
|
499
|
-
)
|
|
500
|
-
else:
|
|
501
|
-
error_msg = (
|
|
502
|
-
"timeout"
|
|
503
|
-
if isinstance(task.result.error, asyncio.TimeoutError)
|
|
504
|
-
else str(task.result.error)
|
|
505
|
-
)
|
|
506
|
-
print(f"[FAIL] Task {task.task_id} failed: {error_msg}")
|
|
507
|
-
|
|
508
|
-
return task.result
|
|
509
|
-
|
|
510
|
-
def get_execution_summary(self) -> dict[str, Any]:
|
|
511
|
-
"""Get execution summary"""
|
|
512
|
-
total_tasks = len(self.tasks)
|
|
513
|
-
if total_tasks == 0:
|
|
514
|
-
return {
|
|
515
|
-
"total_tasks": 0,
|
|
516
|
-
"completed": 0,
|
|
517
|
-
"failed": 0,
|
|
518
|
-
"pending": 0,
|
|
519
|
-
"total_execution_time": 0.0,
|
|
520
|
-
"success_rate": 0.0,
|
|
521
|
-
}
|
|
522
|
-
|
|
523
|
-
completed = 0
|
|
524
|
-
failed = 0
|
|
525
|
-
total_time = 0.0
|
|
526
|
-
|
|
527
|
-
# Single pass through tasks to calculate all metrics
|
|
528
|
-
for task in self.tasks.values():
|
|
529
|
-
if task.status == TaskStatus.COMPLETED:
|
|
530
|
-
completed += 1
|
|
531
|
-
elif task.status == TaskStatus.FAILED:
|
|
532
|
-
failed += 1
|
|
533
|
-
|
|
534
|
-
if task.result:
|
|
535
|
-
total_time += task.result.execution_time
|
|
536
|
-
|
|
537
|
-
pending = total_tasks - completed - failed
|
|
538
|
-
success_rate = completed / total_tasks if total_tasks > 0 else 0
|
|
539
|
-
|
|
540
|
-
return {
|
|
541
|
-
"total_tasks": total_tasks,
|
|
542
|
-
"completed": completed,
|
|
543
|
-
"failed": failed,
|
|
544
|
-
"pending": pending,
|
|
545
|
-
"total_execution_time": total_time,
|
|
546
|
-
"success_rate": success_rate,
|
|
547
|
-
}
|
|
File without changes
|