chuk-tool-processor 0.1.6__py3-none-any.whl → 0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of chuk-tool-processor might be problematic. Click here for more details.
- chuk_tool_processor/core/processor.py +345 -132
- chuk_tool_processor/execution/strategies/inprocess_strategy.py +522 -71
- chuk_tool_processor/execution/strategies/subprocess_strategy.py +559 -64
- chuk_tool_processor/execution/tool_executor.py +282 -24
- chuk_tool_processor/execution/wrappers/caching.py +465 -123
- chuk_tool_processor/execution/wrappers/rate_limiting.py +199 -86
- chuk_tool_processor/execution/wrappers/retry.py +133 -23
- chuk_tool_processor/logging/__init__.py +83 -10
- chuk_tool_processor/logging/context.py +218 -22
- chuk_tool_processor/logging/formatter.py +56 -13
- chuk_tool_processor/logging/helpers.py +91 -16
- chuk_tool_processor/logging/metrics.py +75 -6
- chuk_tool_processor/mcp/mcp_tool.py +80 -35
- chuk_tool_processor/mcp/register_mcp_tools.py +74 -56
- chuk_tool_processor/mcp/setup_mcp_sse.py +41 -36
- chuk_tool_processor/mcp/setup_mcp_stdio.py +39 -37
- chuk_tool_processor/mcp/transport/sse_transport.py +351 -105
- chuk_tool_processor/models/execution_strategy.py +52 -3
- chuk_tool_processor/models/streaming_tool.py +110 -0
- chuk_tool_processor/models/tool_call.py +56 -4
- chuk_tool_processor/models/tool_result.py +115 -9
- chuk_tool_processor/models/validated_tool.py +15 -13
- chuk_tool_processor/plugins/discovery.py +115 -70
- chuk_tool_processor/plugins/parsers/base.py +13 -5
- chuk_tool_processor/plugins/parsers/{function_call_tool_plugin.py → function_call_tool.py} +39 -20
- chuk_tool_processor/plugins/parsers/json_tool.py +50 -0
- chuk_tool_processor/plugins/parsers/openai_tool.py +88 -0
- chuk_tool_processor/plugins/parsers/xml_tool.py +74 -20
- chuk_tool_processor/registry/__init__.py +46 -7
- chuk_tool_processor/registry/auto_register.py +92 -28
- chuk_tool_processor/registry/decorators.py +134 -11
- chuk_tool_processor/registry/interface.py +48 -14
- chuk_tool_processor/registry/metadata.py +52 -6
- chuk_tool_processor/registry/provider.py +75 -36
- chuk_tool_processor/registry/providers/__init__.py +49 -10
- chuk_tool_processor/registry/providers/memory.py +59 -48
- chuk_tool_processor/registry/tool_export.py +208 -39
- chuk_tool_processor/utils/validation.py +18 -13
- chuk_tool_processor-0.2.dist-info/METADATA +401 -0
- chuk_tool_processor-0.2.dist-info/RECORD +58 -0
- {chuk_tool_processor-0.1.6.dist-info → chuk_tool_processor-0.2.dist-info}/WHEEL +1 -1
- chuk_tool_processor/plugins/parsers/json_tool_plugin.py +0 -38
- chuk_tool_processor/plugins/parsers/openai_tool_plugin.py +0 -76
- chuk_tool_processor-0.1.6.dist-info/METADATA +0 -462
- chuk_tool_processor-0.1.6.dist-info/RECORD +0 -57
- {chuk_tool_processor-0.1.6.dist-info → chuk_tool_processor-0.2.dist-info}/top_level.txt +0 -0
|
@@ -1,103 +1,598 @@
|
|
|
1
|
-
# chuk_tool_processor/execution/subprocess_strategy.py
|
|
1
|
+
# chuk_tool_processor/execution/strategies/subprocess_strategy.py
|
|
2
|
+
"""
|
|
3
|
+
Subprocess execution strategy - truly runs tools in separate OS processes.
|
|
4
|
+
|
|
5
|
+
This strategy executes tools in separate Python processes using a process pool,
|
|
6
|
+
providing isolation and potentially better parallelism on multi-core systems.
|
|
7
|
+
|
|
8
|
+
FIXED: Ensures consistent timeout handling across all execution paths.
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
2
12
|
import asyncio
|
|
3
|
-
|
|
4
|
-
import
|
|
5
|
-
import importlib
|
|
13
|
+
import concurrent.futures
|
|
14
|
+
import functools
|
|
6
15
|
import inspect
|
|
16
|
+
import os
|
|
17
|
+
import pickle
|
|
18
|
+
import signal
|
|
19
|
+
import sys
|
|
20
|
+
import traceback
|
|
7
21
|
from datetime import datetime, timezone
|
|
8
|
-
from typing import List, Optional,
|
|
9
|
-
from concurrent.futures import ProcessPoolExecutor
|
|
22
|
+
from typing import Any, AsyncIterator, Dict, List, Optional, Tuple, Set
|
|
10
23
|
|
|
11
|
-
# imports
|
|
12
24
|
from chuk_tool_processor.models.execution_strategy import ExecutionStrategy
|
|
13
25
|
from chuk_tool_processor.models.tool_call import ToolCall
|
|
14
26
|
from chuk_tool_processor.models.tool_result import ToolResult
|
|
15
|
-
from chuk_tool_processor.
|
|
27
|
+
from chuk_tool_processor.registry.interface import ToolRegistryInterface
|
|
28
|
+
from chuk_tool_processor.logging import get_logger, log_context_span
|
|
16
29
|
|
|
17
30
|
logger = get_logger("chuk_tool_processor.execution.subprocess_strategy")
|
|
18
31
|
|
|
19
|
-
# Define a top-level function for subprocess execution
|
|
20
|
-
def _execute_tool_in_process(tool_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
21
|
-
"""
|
|
22
|
-
Execute a tool in a separate process.
|
|
23
32
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
33
|
+
# --------------------------------------------------------------------------- #
|
|
34
|
+
# Module-level helper functions for worker processes - these must be at the module
|
|
35
|
+
# level so they can be pickled
|
|
36
|
+
# --------------------------------------------------------------------------- #
|
|
37
|
+
def _init_worker():
|
|
38
|
+
"""Initialize worker process with signal handlers."""
|
|
39
|
+
# Ignore keyboard interrupt in workers
|
|
40
|
+
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _pool_test_func():
|
|
44
|
+
"""Simple function to test if the process pool is working."""
|
|
45
|
+
return "ok"
|
|
31
46
|
|
|
47
|
+
|
|
48
|
+
def _process_worker(
|
|
49
|
+
tool_name: str,
|
|
50
|
+
namespace: str,
|
|
51
|
+
module_name: str,
|
|
52
|
+
class_name: str,
|
|
53
|
+
arguments: Dict[str, Any],
|
|
54
|
+
timeout: Optional[float]
|
|
55
|
+
) -> Dict[str, Any]:
|
|
56
|
+
"""
|
|
57
|
+
Worker function that runs in a separate process.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
tool_name: Name of the tool
|
|
61
|
+
namespace: Namespace of the tool
|
|
62
|
+
module_name: Module containing the tool class
|
|
63
|
+
class_name: Name of the tool class
|
|
64
|
+
arguments: Arguments to pass to the tool
|
|
65
|
+
timeout: Optional timeout in seconds
|
|
66
|
+
|
|
32
67
|
Returns:
|
|
33
|
-
|
|
68
|
+
Serialized result data
|
|
34
69
|
"""
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
70
|
+
import asyncio
|
|
71
|
+
import importlib
|
|
72
|
+
import inspect
|
|
73
|
+
import os
|
|
74
|
+
import sys
|
|
75
|
+
import time
|
|
76
|
+
from datetime import datetime, timezone
|
|
77
|
+
|
|
42
78
|
start_time = datetime.now(timezone.utc)
|
|
43
79
|
pid = os.getpid()
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
80
|
+
hostname = os.uname().nodename
|
|
81
|
+
|
|
82
|
+
# Data for the result
|
|
83
|
+
result_data = {
|
|
84
|
+
"tool": tool_name,
|
|
85
|
+
"namespace": namespace,
|
|
86
|
+
"start_time": start_time.isoformat(),
|
|
87
|
+
"end_time": None,
|
|
88
|
+
"machine": hostname,
|
|
89
|
+
"pid": pid,
|
|
90
|
+
"result": None,
|
|
91
|
+
"error": None,
|
|
92
|
+
}
|
|
93
|
+
|
|
47
94
|
try:
|
|
95
|
+
# Import the module
|
|
48
96
|
if not module_name or not class_name:
|
|
49
|
-
|
|
97
|
+
raise ValueError("Missing module or class name")
|
|
98
|
+
|
|
99
|
+
# Import the module
|
|
100
|
+
try:
|
|
101
|
+
module = importlib.import_module(module_name)
|
|
102
|
+
except ImportError as e:
|
|
103
|
+
result_data["error"] = f"Failed to import module {module_name}: {str(e)}"
|
|
104
|
+
result_data["end_time"] = datetime.now(timezone.utc).isoformat()
|
|
50
105
|
return result_data
|
|
51
|
-
|
|
52
|
-
#
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
result_data["error"] = f"
|
|
106
|
+
|
|
107
|
+
# Get the class or function
|
|
108
|
+
try:
|
|
109
|
+
tool_class = getattr(module, class_name)
|
|
110
|
+
except AttributeError as e:
|
|
111
|
+
result_data["error"] = f"Failed to find {class_name} in {module_name}: {str(e)}"
|
|
112
|
+
result_data["end_time"] = datetime.now(timezone.utc).isoformat()
|
|
57
113
|
return result_data
|
|
114
|
+
|
|
115
|
+
# Instantiate the tool
|
|
116
|
+
tool_instance = tool_class() if inspect.isclass(tool_class) else tool_class
|
|
58
117
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
loop.close()
|
|
118
|
+
# Find the execute method
|
|
119
|
+
if hasattr(tool_instance, "_aexecute") and inspect.iscoroutinefunction(
|
|
120
|
+
getattr(tool_instance.__class__, "_aexecute", None)
|
|
121
|
+
):
|
|
122
|
+
execute_fn = tool_instance._aexecute
|
|
123
|
+
elif hasattr(tool_instance, "execute") and inspect.iscoroutinefunction(
|
|
124
|
+
getattr(tool_instance.__class__, "execute", None)
|
|
125
|
+
):
|
|
126
|
+
execute_fn = tool_instance.execute
|
|
69
127
|
else:
|
|
70
|
-
result_data["
|
|
128
|
+
result_data["error"] = "Tool must have an async execute or _aexecute method"
|
|
129
|
+
result_data["end_time"] = datetime.now(timezone.utc).isoformat()
|
|
130
|
+
return result_data
|
|
131
|
+
|
|
132
|
+
# Create a new event loop for this process
|
|
133
|
+
loop = asyncio.new_event_loop()
|
|
134
|
+
asyncio.set_event_loop(loop)
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
# Execute the tool with timeout
|
|
138
|
+
if timeout is not None and timeout > 0:
|
|
139
|
+
result_value = loop.run_until_complete(
|
|
140
|
+
asyncio.wait_for(execute_fn(**arguments), timeout)
|
|
141
|
+
)
|
|
142
|
+
else:
|
|
143
|
+
result_value = loop.run_until_complete(execute_fn(**arguments))
|
|
144
|
+
|
|
145
|
+
# Store the result
|
|
146
|
+
result_data["result"] = result_value
|
|
147
|
+
|
|
148
|
+
except asyncio.TimeoutError:
|
|
149
|
+
result_data["error"] = f"Execution timed out after {timeout}s"
|
|
150
|
+
except Exception as e:
|
|
151
|
+
result_data["error"] = f"Error during execution: {str(e)}"
|
|
152
|
+
|
|
153
|
+
finally:
|
|
154
|
+
# Clean up the loop
|
|
155
|
+
loop.close()
|
|
156
|
+
|
|
71
157
|
except Exception as e:
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
158
|
+
# Catch any other exceptions
|
|
159
|
+
result_data["error"] = f"Unexpected error: {str(e)}"
|
|
160
|
+
|
|
161
|
+
# Set end time
|
|
162
|
+
result_data["end_time"] = datetime.now(timezone.utc).isoformat()
|
|
75
163
|
return result_data
|
|
76
164
|
|
|
77
165
|
|
|
166
|
+
# --------------------------------------------------------------------------- #
|
|
167
|
+
# The subprocess strategy
|
|
168
|
+
# --------------------------------------------------------------------------- #
|
|
78
169
|
class SubprocessStrategy(ExecutionStrategy):
|
|
79
170
|
"""
|
|
80
|
-
|
|
171
|
+
Execute tools in separate processes for isolation and parallelism.
|
|
172
|
+
|
|
173
|
+
This strategy creates a pool of worker processes and distributes tool calls
|
|
174
|
+
among them. Each tool executes in its own process, providing isolation and
|
|
175
|
+
parallelism.
|
|
81
176
|
"""
|
|
82
|
-
|
|
177
|
+
|
|
178
|
+
def __init__(
|
|
179
|
+
self,
|
|
180
|
+
registry: ToolRegistryInterface,
|
|
181
|
+
*,
|
|
182
|
+
max_workers: int = 4,
|
|
183
|
+
default_timeout: Optional[float] = None,
|
|
184
|
+
worker_init_timeout: float = 5.0,
|
|
185
|
+
) -> None:
|
|
83
186
|
"""
|
|
84
|
-
Initialize
|
|
187
|
+
Initialize the subprocess execution strategy.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
registry: Tool registry for tool lookups
|
|
191
|
+
max_workers: Maximum number of worker processes
|
|
192
|
+
default_timeout: Default timeout for tool execution
|
|
193
|
+
worker_init_timeout: Timeout for worker process initialization
|
|
85
194
|
"""
|
|
86
195
|
self.registry = registry
|
|
87
|
-
self.
|
|
88
|
-
|
|
89
|
-
self.
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
)
|
|
196
|
+
self.max_workers = max_workers
|
|
197
|
+
self.default_timeout = default_timeout or 30.0 # Always have a default
|
|
198
|
+
self.worker_init_timeout = worker_init_timeout
|
|
199
|
+
|
|
200
|
+
# Process pool (initialized lazily)
|
|
201
|
+
self._process_pool: Optional[concurrent.futures.ProcessPoolExecutor] = None
|
|
202
|
+
self._pool_lock = asyncio.Lock()
|
|
203
|
+
|
|
204
|
+
# Task tracking for cleanup
|
|
205
|
+
self._active_tasks: Set[asyncio.Task] = set()
|
|
206
|
+
self._shutdown_event = asyncio.Event()
|
|
207
|
+
self._shutting_down = False
|
|
208
|
+
|
|
209
|
+
logger.debug("SubprocessStrategy initialized with timeout: %ss, max_workers: %d",
|
|
210
|
+
self.default_timeout, max_workers)
|
|
211
|
+
|
|
212
|
+
# Register shutdown handler if in main thread
|
|
213
|
+
try:
|
|
214
|
+
loop = asyncio.get_running_loop()
|
|
215
|
+
for sig in (signal.SIGTERM, signal.SIGINT):
|
|
216
|
+
loop.add_signal_handler(
|
|
217
|
+
sig, lambda s=sig: asyncio.create_task(self._signal_handler(s))
|
|
218
|
+
)
|
|
219
|
+
except (RuntimeError, NotImplementedError):
|
|
220
|
+
# Not in the main thread or not on Unix
|
|
221
|
+
pass
|
|
222
|
+
|
|
223
|
+
async def _ensure_pool(self) -> None:
|
|
224
|
+
"""Initialize the process pool if not already initialized."""
|
|
225
|
+
if self._process_pool is not None:
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
async with self._pool_lock:
|
|
229
|
+
if self._process_pool is not None:
|
|
230
|
+
return
|
|
231
|
+
|
|
232
|
+
# Create process pool
|
|
233
|
+
self._process_pool = concurrent.futures.ProcessPoolExecutor(
|
|
234
|
+
max_workers=self.max_workers,
|
|
235
|
+
initializer=_init_worker,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# Test the pool with a simple task
|
|
239
|
+
loop = asyncio.get_running_loop()
|
|
240
|
+
try:
|
|
241
|
+
# Use a module-level function instead of a lambda
|
|
242
|
+
await asyncio.wait_for(
|
|
243
|
+
loop.run_in_executor(self._process_pool, _pool_test_func),
|
|
244
|
+
timeout=self.worker_init_timeout
|
|
245
|
+
)
|
|
246
|
+
logger.info("Process pool initialized with %d workers", self.max_workers)
|
|
247
|
+
except Exception as e:
|
|
248
|
+
# Clean up on initialization error
|
|
249
|
+
self._process_pool.shutdown(wait=False)
|
|
250
|
+
self._process_pool = None
|
|
251
|
+
logger.error("Failed to initialize process pool: %s", e)
|
|
252
|
+
raise RuntimeError(f"Failed to initialize process pool: {e}") from e
|
|
253
|
+
|
|
254
|
+
# ------------------------------------------------------------------ #
|
|
255
|
+
# 🔌 legacy façade for older wrappers #
|
|
256
|
+
# ------------------------------------------------------------------ #
|
|
257
|
+
async def execute(
|
|
258
|
+
self,
|
|
259
|
+
calls: List[ToolCall],
|
|
260
|
+
*,
|
|
261
|
+
timeout: Optional[float] = None,
|
|
262
|
+
) -> List[ToolResult]:
|
|
263
|
+
"""
|
|
264
|
+
Back-compat shim.
|
|
94
265
|
|
|
266
|
+
Old wrappers (`retry`, `rate_limit`, `cache`, …) still expect an
|
|
267
|
+
``execute()`` coroutine on an execution-strategy object.
|
|
268
|
+
The real implementation lives in :meth:`run`, so we just forward.
|
|
269
|
+
"""
|
|
270
|
+
return await self.run(calls, timeout)
|
|
271
|
+
|
|
95
272
|
async def run(
|
|
96
273
|
self,
|
|
97
274
|
calls: List[ToolCall],
|
|
98
|
-
timeout: Optional[float] = None
|
|
275
|
+
timeout: Optional[float] = None,
|
|
99
276
|
) -> List[ToolResult]:
|
|
100
277
|
"""
|
|
101
|
-
Execute tool calls
|
|
278
|
+
Execute tool calls in separate processes.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
calls: List of tool calls to execute
|
|
282
|
+
timeout: Optional timeout for each execution (overrides default)
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
List of tool results in the same order as calls
|
|
286
|
+
"""
|
|
287
|
+
if not calls:
|
|
288
|
+
return []
|
|
289
|
+
|
|
290
|
+
if self._shutting_down:
|
|
291
|
+
# Return early with error results if shutting down
|
|
292
|
+
return [
|
|
293
|
+
ToolResult(
|
|
294
|
+
tool=call.tool,
|
|
295
|
+
result=None,
|
|
296
|
+
error="System is shutting down",
|
|
297
|
+
start_time=datetime.now(timezone.utc),
|
|
298
|
+
end_time=datetime.now(timezone.utc),
|
|
299
|
+
machine=os.uname().nodename,
|
|
300
|
+
pid=os.getpid(),
|
|
301
|
+
)
|
|
302
|
+
for call in calls
|
|
303
|
+
]
|
|
304
|
+
|
|
305
|
+
# Use default_timeout if no timeout specified
|
|
306
|
+
effective_timeout = timeout if timeout is not None else self.default_timeout
|
|
307
|
+
logger.debug("Executing %d calls in subprocesses with %ss timeout each", len(calls), effective_timeout)
|
|
308
|
+
|
|
309
|
+
# Create tasks for each call
|
|
310
|
+
tasks = []
|
|
311
|
+
for call in calls:
|
|
312
|
+
task = asyncio.create_task(self._execute_single_call(
|
|
313
|
+
call, effective_timeout # Always pass concrete timeout
|
|
314
|
+
))
|
|
315
|
+
self._active_tasks.add(task)
|
|
316
|
+
task.add_done_callback(self._active_tasks.discard)
|
|
317
|
+
tasks.append(task)
|
|
318
|
+
|
|
319
|
+
# Execute all tasks concurrently
|
|
320
|
+
async with log_context_span("subprocess_execution", {"num_calls": len(calls)}):
|
|
321
|
+
return await asyncio.gather(*tasks)
|
|
322
|
+
|
|
323
|
+
async def stream_run(
|
|
324
|
+
self,
|
|
325
|
+
calls: List[ToolCall],
|
|
326
|
+
timeout: Optional[float] = None,
|
|
327
|
+
) -> AsyncIterator[ToolResult]:
|
|
328
|
+
"""
|
|
329
|
+
Execute tool calls and yield results as they become available.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
calls: List of tool calls to execute
|
|
333
|
+
timeout: Optional timeout for each execution
|
|
334
|
+
|
|
335
|
+
Yields:
|
|
336
|
+
Tool results as they complete (not necessarily in order)
|
|
337
|
+
"""
|
|
338
|
+
if not calls:
|
|
339
|
+
return
|
|
340
|
+
|
|
341
|
+
if self._shutting_down:
|
|
342
|
+
# Yield error results if shutting down
|
|
343
|
+
for call in calls:
|
|
344
|
+
yield ToolResult(
|
|
345
|
+
tool=call.tool,
|
|
346
|
+
result=None,
|
|
347
|
+
error="System is shutting down",
|
|
348
|
+
start_time=datetime.now(timezone.utc),
|
|
349
|
+
end_time=datetime.now(timezone.utc),
|
|
350
|
+
machine=os.uname().nodename,
|
|
351
|
+
pid=os.getpid(),
|
|
352
|
+
)
|
|
353
|
+
return
|
|
354
|
+
|
|
355
|
+
# Use default_timeout if no timeout specified
|
|
356
|
+
effective_timeout = timeout if timeout is not None else self.default_timeout
|
|
357
|
+
|
|
358
|
+
# Create a queue for results
|
|
359
|
+
queue = asyncio.Queue()
|
|
360
|
+
|
|
361
|
+
# Start all executions and have them put results in the queue
|
|
362
|
+
pending = set()
|
|
363
|
+
for call in calls:
|
|
364
|
+
task = asyncio.create_task(self._execute_to_queue(
|
|
365
|
+
call, queue, effective_timeout # Always pass concrete timeout
|
|
366
|
+
))
|
|
367
|
+
self._active_tasks.add(task)
|
|
368
|
+
task.add_done_callback(self._active_tasks.discard)
|
|
369
|
+
pending.add(task)
|
|
370
|
+
|
|
371
|
+
# Yield results as they become available
|
|
372
|
+
while pending:
|
|
373
|
+
# Get next result from queue
|
|
374
|
+
result = await queue.get()
|
|
375
|
+
yield result
|
|
376
|
+
|
|
377
|
+
# Check for completed tasks
|
|
378
|
+
done, pending = await asyncio.wait(
|
|
379
|
+
pending, timeout=0, return_when=asyncio.FIRST_COMPLETED
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
# Handle any exceptions
|
|
383
|
+
for task in done:
|
|
384
|
+
try:
|
|
385
|
+
await task
|
|
386
|
+
except Exception as e:
|
|
387
|
+
logger.exception("Error in task: %s", e)
|
|
388
|
+
|
|
389
|
+
async def _execute_to_queue(
|
|
390
|
+
self,
|
|
391
|
+
call: ToolCall,
|
|
392
|
+
queue: asyncio.Queue,
|
|
393
|
+
timeout: float, # Make timeout required
|
|
394
|
+
) -> None:
|
|
395
|
+
"""Execute a single call and put the result in the queue."""
|
|
396
|
+
result = await self._execute_single_call(call, timeout)
|
|
397
|
+
await queue.put(result)
|
|
398
|
+
|
|
399
|
+
async def _execute_single_call(
|
|
400
|
+
self,
|
|
401
|
+
call: ToolCall,
|
|
402
|
+
timeout: float, # Make timeout required
|
|
403
|
+
) -> ToolResult:
|
|
404
|
+
"""
|
|
405
|
+
Execute a single tool call in a separate process.
|
|
406
|
+
|
|
407
|
+
Args:
|
|
408
|
+
call: Tool call to execute
|
|
409
|
+
timeout: Timeout in seconds (required)
|
|
410
|
+
|
|
411
|
+
Returns:
|
|
412
|
+
Tool execution result
|
|
413
|
+
"""
|
|
414
|
+
start_time = datetime.now(timezone.utc)
|
|
415
|
+
|
|
416
|
+
logger.debug("Executing %s in subprocess with %ss timeout", call.tool, timeout)
|
|
417
|
+
|
|
418
|
+
try:
|
|
419
|
+
# Ensure pool is initialized
|
|
420
|
+
await self._ensure_pool()
|
|
421
|
+
|
|
422
|
+
# Get tool from registry
|
|
423
|
+
tool_impl = await self.registry.get_tool(call.tool, call.namespace)
|
|
424
|
+
if tool_impl is None:
|
|
425
|
+
return ToolResult(
|
|
426
|
+
tool=call.tool,
|
|
427
|
+
result=None,
|
|
428
|
+
error=f"Tool '{call.tool}' not found",
|
|
429
|
+
start_time=start_time,
|
|
430
|
+
end_time=datetime.now(timezone.utc),
|
|
431
|
+
machine=os.uname().nodename,
|
|
432
|
+
pid=os.getpid(),
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
# Get module and class names for import in worker process
|
|
436
|
+
if inspect.isclass(tool_impl):
|
|
437
|
+
module_name = tool_impl.__module__
|
|
438
|
+
class_name = tool_impl.__name__
|
|
439
|
+
else:
|
|
440
|
+
module_name = tool_impl.__class__.__module__
|
|
441
|
+
class_name = tool_impl.__class__.__name__
|
|
442
|
+
|
|
443
|
+
# Execute in subprocess
|
|
444
|
+
loop = asyncio.get_running_loop()
|
|
445
|
+
|
|
446
|
+
# Add safety timeout to handle process crashes (tool timeout + buffer)
|
|
447
|
+
safety_timeout = timeout + 5.0
|
|
448
|
+
|
|
449
|
+
try:
|
|
450
|
+
result_data = await asyncio.wait_for(
|
|
451
|
+
loop.run_in_executor(
|
|
452
|
+
self._process_pool,
|
|
453
|
+
functools.partial(
|
|
454
|
+
_process_worker,
|
|
455
|
+
call.tool,
|
|
456
|
+
call.namespace,
|
|
457
|
+
module_name,
|
|
458
|
+
class_name,
|
|
459
|
+
call.arguments,
|
|
460
|
+
timeout # Pass the actual timeout to worker
|
|
461
|
+
)
|
|
462
|
+
),
|
|
463
|
+
timeout=safety_timeout
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
# Parse timestamps
|
|
467
|
+
if isinstance(result_data["start_time"], str):
|
|
468
|
+
start_time_str = result_data["start_time"]
|
|
469
|
+
result_data["start_time"] = datetime.fromisoformat(start_time_str)
|
|
470
|
+
|
|
471
|
+
if isinstance(result_data["end_time"], str):
|
|
472
|
+
end_time_str = result_data["end_time"]
|
|
473
|
+
result_data["end_time"] = datetime.fromisoformat(end_time_str)
|
|
474
|
+
|
|
475
|
+
end_time = datetime.now(timezone.utc)
|
|
476
|
+
actual_duration = (end_time - start_time).total_seconds()
|
|
477
|
+
|
|
478
|
+
if result_data.get("error"):
|
|
479
|
+
logger.debug("%s subprocess failed after %.3fs: %s",
|
|
480
|
+
call.tool, actual_duration, result_data["error"])
|
|
481
|
+
else:
|
|
482
|
+
logger.debug("%s subprocess completed in %.3fs (limit: %ss)",
|
|
483
|
+
call.tool, actual_duration, timeout)
|
|
484
|
+
|
|
485
|
+
# Create ToolResult from worker data
|
|
486
|
+
return ToolResult(
|
|
487
|
+
tool=result_data.get("tool", call.tool),
|
|
488
|
+
result=result_data.get("result"),
|
|
489
|
+
error=result_data.get("error"),
|
|
490
|
+
start_time=result_data.get("start_time", start_time),
|
|
491
|
+
end_time=result_data.get("end_time", end_time),
|
|
492
|
+
machine=result_data.get("machine", os.uname().nodename),
|
|
493
|
+
pid=result_data.get("pid", os.getpid()),
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
except asyncio.TimeoutError:
|
|
497
|
+
# This happens if the worker process itself hangs
|
|
498
|
+
end_time = datetime.now(timezone.utc)
|
|
499
|
+
actual_duration = (end_time - start_time).total_seconds()
|
|
500
|
+
logger.debug("%s subprocess timed out after %.3fs (safety limit: %ss)",
|
|
501
|
+
call.tool, actual_duration, safety_timeout)
|
|
502
|
+
|
|
503
|
+
return ToolResult(
|
|
504
|
+
tool=call.tool,
|
|
505
|
+
result=None,
|
|
506
|
+
error=f"Worker process timed out after {safety_timeout}s",
|
|
507
|
+
start_time=start_time,
|
|
508
|
+
end_time=end_time,
|
|
509
|
+
machine=os.uname().nodename,
|
|
510
|
+
pid=os.getpid(),
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
except concurrent.futures.process.BrokenProcessPool:
|
|
514
|
+
# Process pool broke - need to recreate it
|
|
515
|
+
logger.error("Process pool broke during execution - recreating")
|
|
516
|
+
if self._process_pool:
|
|
517
|
+
self._process_pool.shutdown(wait=False)
|
|
518
|
+
self._process_pool = None
|
|
519
|
+
|
|
520
|
+
return ToolResult(
|
|
521
|
+
tool=call.tool,
|
|
522
|
+
result=None,
|
|
523
|
+
error="Worker process crashed",
|
|
524
|
+
start_time=start_time,
|
|
525
|
+
end_time=datetime.now(timezone.utc),
|
|
526
|
+
machine=os.uname().nodename,
|
|
527
|
+
pid=os.getpid(),
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
except asyncio.CancelledError:
|
|
531
|
+
# Handle cancellation
|
|
532
|
+
logger.debug("%s subprocess was cancelled", call.tool)
|
|
533
|
+
return ToolResult(
|
|
534
|
+
tool=call.tool,
|
|
535
|
+
result=None,
|
|
536
|
+
error="Execution was cancelled",
|
|
537
|
+
start_time=start_time,
|
|
538
|
+
end_time=datetime.now(timezone.utc),
|
|
539
|
+
machine=os.uname().nodename,
|
|
540
|
+
pid=os.getpid(),
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
except Exception as e:
|
|
544
|
+
# Handle any other errors
|
|
545
|
+
logger.exception("Error executing %s in subprocess: %s", call.tool, e)
|
|
546
|
+
end_time = datetime.now(timezone.utc)
|
|
547
|
+
actual_duration = (end_time - start_time).total_seconds()
|
|
548
|
+
logger.debug("%s subprocess setup failed after %.3fs: %s",
|
|
549
|
+
call.tool, actual_duration, e)
|
|
550
|
+
|
|
551
|
+
return ToolResult(
|
|
552
|
+
tool=call.tool,
|
|
553
|
+
result=None,
|
|
554
|
+
error=f"Error: {str(e)}",
|
|
555
|
+
start_time=start_time,
|
|
556
|
+
end_time=end_time,
|
|
557
|
+
machine=os.uname().nodename,
|
|
558
|
+
pid=os.getpid(),
|
|
559
|
+
)
|
|
560
|
+
|
|
561
|
+
@property
|
|
562
|
+
def supports_streaming(self) -> bool:
|
|
563
|
+
"""Check if this strategy supports streaming execution."""
|
|
564
|
+
return True
|
|
565
|
+
|
|
566
|
+
async def _signal_handler(self, sig: int) -> None:
|
|
567
|
+
"""Handle termination signals."""
|
|
568
|
+
signame = signal.Signals(sig).name
|
|
569
|
+
logger.info("Received %s, shutting down process pool", signame)
|
|
570
|
+
await self.shutdown()
|
|
571
|
+
|
|
572
|
+
async def shutdown(self) -> None:
|
|
573
|
+
"""
|
|
574
|
+
Gracefully shut down the process pool.
|
|
575
|
+
|
|
576
|
+
This cancels all active tasks and shuts down the process pool.
|
|
102
577
|
"""
|
|
103
|
-
|
|
578
|
+
if self._shutting_down:
|
|
579
|
+
return
|
|
580
|
+
|
|
581
|
+
self._shutting_down = True
|
|
582
|
+
self._shutdown_event.set()
|
|
583
|
+
|
|
584
|
+
# Cancel all active tasks
|
|
585
|
+
active_tasks = list(self._active_tasks)
|
|
586
|
+
if active_tasks:
|
|
587
|
+
logger.info("Cancelling %d active tool executions", len(active_tasks))
|
|
588
|
+
for task in active_tasks:
|
|
589
|
+
task.cancel()
|
|
590
|
+
|
|
591
|
+
# Wait for all tasks to complete (with cancellation)
|
|
592
|
+
await asyncio.gather(*active_tasks, return_exceptions=True)
|
|
593
|
+
|
|
594
|
+
# Shut down the process pool
|
|
595
|
+
if self._process_pool:
|
|
596
|
+
logger.info("Shutting down process pool")
|
|
597
|
+
self._process_pool.shutdown(wait=True)
|
|
598
|
+
self._process_pool = None
|