chuk-tool-processor 0.6.4__py3-none-any.whl → 0.9.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of chuk-tool-processor might be problematic. Click here for more details.
- chuk_tool_processor/core/__init__.py +32 -1
- chuk_tool_processor/core/exceptions.py +225 -13
- chuk_tool_processor/core/processor.py +135 -104
- chuk_tool_processor/execution/strategies/__init__.py +6 -0
- chuk_tool_processor/execution/strategies/inprocess_strategy.py +142 -150
- chuk_tool_processor/execution/strategies/subprocess_strategy.py +202 -206
- chuk_tool_processor/execution/tool_executor.py +82 -84
- chuk_tool_processor/execution/wrappers/__init__.py +42 -0
- chuk_tool_processor/execution/wrappers/caching.py +150 -116
- chuk_tool_processor/execution/wrappers/circuit_breaker.py +370 -0
- chuk_tool_processor/execution/wrappers/rate_limiting.py +76 -43
- chuk_tool_processor/execution/wrappers/retry.py +116 -78
- chuk_tool_processor/logging/__init__.py +23 -17
- chuk_tool_processor/logging/context.py +40 -45
- chuk_tool_processor/logging/formatter.py +22 -21
- chuk_tool_processor/logging/helpers.py +28 -42
- chuk_tool_processor/logging/metrics.py +13 -15
- chuk_tool_processor/mcp/__init__.py +8 -12
- chuk_tool_processor/mcp/mcp_tool.py +158 -114
- chuk_tool_processor/mcp/register_mcp_tools.py +22 -22
- chuk_tool_processor/mcp/setup_mcp_http_streamable.py +57 -17
- chuk_tool_processor/mcp/setup_mcp_sse.py +57 -17
- chuk_tool_processor/mcp/setup_mcp_stdio.py +11 -11
- chuk_tool_processor/mcp/stream_manager.py +333 -276
- chuk_tool_processor/mcp/transport/__init__.py +22 -29
- chuk_tool_processor/mcp/transport/base_transport.py +180 -44
- chuk_tool_processor/mcp/transport/http_streamable_transport.py +505 -325
- chuk_tool_processor/mcp/transport/models.py +100 -0
- chuk_tool_processor/mcp/transport/sse_transport.py +607 -276
- chuk_tool_processor/mcp/transport/stdio_transport.py +597 -116
- chuk_tool_processor/models/__init__.py +21 -1
- chuk_tool_processor/models/execution_strategy.py +16 -21
- chuk_tool_processor/models/streaming_tool.py +28 -25
- chuk_tool_processor/models/tool_call.py +49 -31
- chuk_tool_processor/models/tool_export_mixin.py +22 -8
- chuk_tool_processor/models/tool_result.py +40 -77
- chuk_tool_processor/models/tool_spec.py +350 -0
- chuk_tool_processor/models/validated_tool.py +36 -18
- chuk_tool_processor/observability/__init__.py +30 -0
- chuk_tool_processor/observability/metrics.py +312 -0
- chuk_tool_processor/observability/setup.py +105 -0
- chuk_tool_processor/observability/tracing.py +345 -0
- chuk_tool_processor/plugins/__init__.py +1 -1
- chuk_tool_processor/plugins/discovery.py +11 -11
- chuk_tool_processor/plugins/parsers/__init__.py +1 -1
- chuk_tool_processor/plugins/parsers/base.py +1 -2
- chuk_tool_processor/plugins/parsers/function_call_tool.py +13 -8
- chuk_tool_processor/plugins/parsers/json_tool.py +4 -3
- chuk_tool_processor/plugins/parsers/openai_tool.py +12 -7
- chuk_tool_processor/plugins/parsers/xml_tool.py +4 -4
- chuk_tool_processor/registry/__init__.py +12 -12
- chuk_tool_processor/registry/auto_register.py +22 -30
- chuk_tool_processor/registry/decorators.py +127 -129
- chuk_tool_processor/registry/interface.py +26 -23
- chuk_tool_processor/registry/metadata.py +27 -22
- chuk_tool_processor/registry/provider.py +17 -18
- chuk_tool_processor/registry/providers/__init__.py +16 -19
- chuk_tool_processor/registry/providers/memory.py +18 -25
- chuk_tool_processor/registry/tool_export.py +42 -51
- chuk_tool_processor/utils/validation.py +15 -16
- chuk_tool_processor-0.9.7.dist-info/METADATA +1813 -0
- chuk_tool_processor-0.9.7.dist-info/RECORD +67 -0
- chuk_tool_processor-0.6.4.dist-info/METADATA +0 -697
- chuk_tool_processor-0.6.4.dist-info/RECORD +0 -60
- {chuk_tool_processor-0.6.4.dist-info → chuk_tool_processor-0.9.7.dist-info}/WHEEL +0 -0
- {chuk_tool_processor-0.6.4.dist-info → chuk_tool_processor-0.9.7.dist-info}/top_level.txt +0 -0
|
@@ -7,31 +7,33 @@ This strategy executes tools in separate Python processes using a process pool,
|
|
|
7
7
|
providing isolation and potentially better parallelism on multi-core systems.
|
|
8
8
|
|
|
9
9
|
Enhanced tool name resolution that properly handles:
|
|
10
|
-
- Simple names: "get_current_time"
|
|
10
|
+
- Simple names: "get_current_time"
|
|
11
11
|
- Namespaced names: "diagnostic_test.get_current_time"
|
|
12
12
|
- Cross-namespace fallback searching
|
|
13
13
|
|
|
14
14
|
Properly handles tool serialization and ensures tool_name is preserved.
|
|
15
15
|
"""
|
|
16
|
+
|
|
16
17
|
from __future__ import annotations
|
|
17
18
|
|
|
18
19
|
import asyncio
|
|
19
20
|
import concurrent.futures
|
|
21
|
+
import contextlib
|
|
20
22
|
import functools
|
|
21
23
|
import inspect
|
|
22
24
|
import os
|
|
23
25
|
import pickle
|
|
26
|
+
import platform
|
|
24
27
|
import signal
|
|
25
|
-
import
|
|
26
|
-
import
|
|
27
|
-
from
|
|
28
|
-
from typing import Any, AsyncIterator, Dict, List, Optional, Tuple, Set
|
|
28
|
+
from collections.abc import AsyncIterator
|
|
29
|
+
from datetime import UTC, datetime
|
|
30
|
+
from typing import Any
|
|
29
31
|
|
|
32
|
+
from chuk_tool_processor.logging import get_logger, log_context_span
|
|
30
33
|
from chuk_tool_processor.models.execution_strategy import ExecutionStrategy
|
|
31
34
|
from chuk_tool_processor.models.tool_call import ToolCall
|
|
32
35
|
from chuk_tool_processor.models.tool_result import ToolResult
|
|
33
36
|
from chuk_tool_processor.registry.interface import ToolRegistryInterface
|
|
34
|
-
from chuk_tool_processor.logging import get_logger, log_context_span
|
|
35
37
|
|
|
36
38
|
logger = get_logger("chuk_tool_processor.execution.subprocess_strategy")
|
|
37
39
|
|
|
@@ -44,7 +46,7 @@ def _init_worker():
|
|
|
44
46
|
"""Initialize worker process with signal handlers."""
|
|
45
47
|
# Ignore keyboard interrupt in workers
|
|
46
48
|
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
|
47
|
-
|
|
49
|
+
|
|
48
50
|
|
|
49
51
|
def _pool_test_func():
|
|
50
52
|
"""Simple function to test if the process pool is working."""
|
|
@@ -52,38 +54,34 @@ def _pool_test_func():
|
|
|
52
54
|
|
|
53
55
|
|
|
54
56
|
def _serialized_tool_worker(
|
|
55
|
-
tool_name: str,
|
|
56
|
-
|
|
57
|
-
arguments: Dict[str, Any],
|
|
58
|
-
timeout: Optional[float],
|
|
59
|
-
serialized_tool_data: bytes
|
|
60
|
-
) -> Dict[str, Any]:
|
|
57
|
+
tool_name: str, namespace: str, arguments: dict[str, Any], timeout: float | None, serialized_tool_data: bytes
|
|
58
|
+
) -> dict[str, Any]:
|
|
61
59
|
"""
|
|
62
60
|
Worker function that uses serialized tools and ensures tool_name is available.
|
|
63
|
-
|
|
61
|
+
|
|
64
62
|
This worker deserializes the complete tool and executes it, with multiple
|
|
65
63
|
fallbacks to ensure tool_name is properly set.
|
|
66
|
-
|
|
64
|
+
|
|
67
65
|
Args:
|
|
68
66
|
tool_name: Name of the tool
|
|
69
67
|
namespace: Namespace of the tool
|
|
70
68
|
arguments: Arguments to pass to the tool
|
|
71
69
|
timeout: Optional timeout in seconds
|
|
72
70
|
serialized_tool_data: Pickled tool instance
|
|
73
|
-
|
|
71
|
+
|
|
74
72
|
Returns:
|
|
75
73
|
Serialized result data
|
|
76
74
|
"""
|
|
77
75
|
import asyncio
|
|
78
|
-
import pickle
|
|
79
|
-
import os
|
|
80
76
|
import inspect
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
77
|
+
import os
|
|
78
|
+
import pickle
|
|
79
|
+
from datetime import datetime
|
|
80
|
+
|
|
81
|
+
start_time = datetime.now(UTC)
|
|
84
82
|
pid = os.getpid()
|
|
85
|
-
hostname =
|
|
86
|
-
|
|
83
|
+
hostname = platform.node()
|
|
84
|
+
|
|
87
85
|
result_data = {
|
|
88
86
|
"tool": tool_name,
|
|
89
87
|
"namespace": namespace,
|
|
@@ -94,17 +92,18 @@ def _serialized_tool_worker(
|
|
|
94
92
|
"result": None,
|
|
95
93
|
"error": None,
|
|
96
94
|
}
|
|
97
|
-
|
|
95
|
+
|
|
98
96
|
try:
|
|
99
97
|
# Deserialize the complete tool
|
|
100
|
-
|
|
101
|
-
|
|
98
|
+
# This is safe as the data comes from the parent process, not untrusted external sources
|
|
99
|
+
tool = pickle.loads(serialized_tool_data) # nosec B301
|
|
100
|
+
|
|
102
101
|
# Multiple fallbacks to ensure tool_name is available
|
|
103
|
-
|
|
102
|
+
|
|
104
103
|
# Fallback 1: If tool doesn't have tool_name, set it directly
|
|
105
|
-
if not hasattr(tool,
|
|
104
|
+
if not hasattr(tool, "tool_name") or not tool.tool_name:
|
|
106
105
|
tool.tool_name = tool_name
|
|
107
|
-
|
|
106
|
+
|
|
108
107
|
# Fallback 2: If it's a class instead of instance, instantiate it
|
|
109
108
|
if inspect.isclass(tool):
|
|
110
109
|
try:
|
|
@@ -112,46 +111,44 @@ def _serialized_tool_worker(
|
|
|
112
111
|
tool.tool_name = tool_name
|
|
113
112
|
except Exception as e:
|
|
114
113
|
result_data["error"] = f"Failed to instantiate tool class: {str(e)}"
|
|
115
|
-
result_data["end_time"] = datetime.now(
|
|
114
|
+
result_data["end_time"] = datetime.now(UTC).isoformat()
|
|
116
115
|
return result_data
|
|
117
|
-
|
|
116
|
+
|
|
118
117
|
# Fallback 3: Ensure tool_name exists using setattr
|
|
119
|
-
if not getattr(tool,
|
|
120
|
-
|
|
121
|
-
|
|
118
|
+
if not getattr(tool, "tool_name", None):
|
|
119
|
+
tool.tool_name = tool_name
|
|
120
|
+
|
|
122
121
|
# Fallback 4: Verify execute method exists
|
|
123
|
-
if not hasattr(tool,
|
|
124
|
-
result_data["error"] =
|
|
125
|
-
result_data["end_time"] = datetime.now(
|
|
122
|
+
if not hasattr(tool, "execute"):
|
|
123
|
+
result_data["error"] = "Tool missing execute method"
|
|
124
|
+
result_data["end_time"] = datetime.now(UTC).isoformat()
|
|
126
125
|
return result_data
|
|
127
|
-
|
|
126
|
+
|
|
128
127
|
# Create event loop for execution
|
|
129
128
|
loop = asyncio.new_event_loop()
|
|
130
129
|
asyncio.set_event_loop(loop)
|
|
131
|
-
|
|
130
|
+
|
|
132
131
|
try:
|
|
133
132
|
# Execute the tool with timeout
|
|
134
133
|
if timeout is not None and timeout > 0:
|
|
135
|
-
result_value = loop.run_until_complete(
|
|
136
|
-
asyncio.wait_for(tool.execute(**arguments), timeout)
|
|
137
|
-
)
|
|
134
|
+
result_value = loop.run_until_complete(asyncio.wait_for(tool.execute(**arguments), timeout))
|
|
138
135
|
else:
|
|
139
136
|
result_value = loop.run_until_complete(tool.execute(**arguments))
|
|
140
|
-
|
|
137
|
+
|
|
141
138
|
result_data["result"] = result_value
|
|
142
|
-
|
|
143
|
-
except
|
|
139
|
+
|
|
140
|
+
except TimeoutError:
|
|
144
141
|
result_data["error"] = f"Tool execution timed out after {timeout}s"
|
|
145
142
|
except Exception as e:
|
|
146
143
|
result_data["error"] = f"Tool execution failed: {str(e)}"
|
|
147
|
-
|
|
144
|
+
|
|
148
145
|
finally:
|
|
149
146
|
loop.close()
|
|
150
|
-
|
|
147
|
+
|
|
151
148
|
except Exception as e:
|
|
152
149
|
result_data["error"] = f"Worker error: {str(e)}"
|
|
153
|
-
|
|
154
|
-
result_data["end_time"] = datetime.now(
|
|
150
|
+
|
|
151
|
+
result_data["end_time"] = datetime.now(UTC).isoformat()
|
|
155
152
|
return result_data
|
|
156
153
|
|
|
157
154
|
|
|
@@ -161,11 +158,11 @@ def _serialized_tool_worker(
|
|
|
161
158
|
class SubprocessStrategy(ExecutionStrategy):
|
|
162
159
|
"""
|
|
163
160
|
Execute tools in separate processes for isolation and parallelism.
|
|
164
|
-
|
|
161
|
+
|
|
165
162
|
This strategy creates a pool of worker processes and distributes tool calls
|
|
166
163
|
among them. Each tool executes in its own process, providing isolation and
|
|
167
164
|
parallelism.
|
|
168
|
-
|
|
165
|
+
|
|
169
166
|
Enhanced tool name resolution and proper tool serialization.
|
|
170
167
|
"""
|
|
171
168
|
|
|
@@ -174,12 +171,12 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
174
171
|
registry: ToolRegistryInterface,
|
|
175
172
|
*,
|
|
176
173
|
max_workers: int = 4,
|
|
177
|
-
default_timeout:
|
|
174
|
+
default_timeout: float | None = None,
|
|
178
175
|
worker_init_timeout: float = 5.0,
|
|
179
176
|
) -> None:
|
|
180
177
|
"""
|
|
181
178
|
Initialize the subprocess execution strategy.
|
|
182
|
-
|
|
179
|
+
|
|
183
180
|
Args:
|
|
184
181
|
registry: Tool registry for tool lookups
|
|
185
182
|
max_workers: Maximum number of worker processes
|
|
@@ -190,26 +187,25 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
190
187
|
self.max_workers = max_workers
|
|
191
188
|
self.default_timeout = default_timeout or 30.0 # Always have a default
|
|
192
189
|
self.worker_init_timeout = worker_init_timeout
|
|
193
|
-
|
|
190
|
+
|
|
194
191
|
# Process pool (initialized lazily)
|
|
195
|
-
self._process_pool:
|
|
192
|
+
self._process_pool: concurrent.futures.ProcessPoolExecutor | None = None
|
|
196
193
|
self._pool_lock = asyncio.Lock()
|
|
197
|
-
|
|
194
|
+
|
|
198
195
|
# Task tracking for cleanup
|
|
199
|
-
self._active_tasks:
|
|
196
|
+
self._active_tasks: set[asyncio.Task] = set()
|
|
200
197
|
self._shutdown_event = asyncio.Event()
|
|
201
198
|
self._shutting_down = False
|
|
202
|
-
|
|
203
|
-
logger.debug(
|
|
204
|
-
|
|
205
|
-
|
|
199
|
+
|
|
200
|
+
logger.debug(
|
|
201
|
+
"SubprocessStrategy initialized with timeout: %ss, max_workers: %d", self.default_timeout, max_workers
|
|
202
|
+
)
|
|
203
|
+
|
|
206
204
|
# Register shutdown handler if in main thread
|
|
207
205
|
try:
|
|
208
206
|
loop = asyncio.get_running_loop()
|
|
209
207
|
for sig in (signal.SIGTERM, signal.SIGINT):
|
|
210
|
-
loop.add_signal_handler(
|
|
211
|
-
sig, lambda s=sig: asyncio.create_task(self._signal_handler(s))
|
|
212
|
-
)
|
|
208
|
+
loop.add_signal_handler(sig, lambda s=sig: asyncio.create_task(self._signal_handler(s)))
|
|
213
209
|
except (RuntimeError, NotImplementedError):
|
|
214
210
|
# Not in the main thread or not on Unix
|
|
215
211
|
pass
|
|
@@ -218,24 +214,23 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
218
214
|
"""Initialize the process pool if not already initialized."""
|
|
219
215
|
if self._process_pool is not None:
|
|
220
216
|
return
|
|
221
|
-
|
|
217
|
+
|
|
222
218
|
async with self._pool_lock:
|
|
223
219
|
if self._process_pool is not None:
|
|
224
220
|
return
|
|
225
|
-
|
|
221
|
+
|
|
226
222
|
# Create process pool
|
|
227
223
|
self._process_pool = concurrent.futures.ProcessPoolExecutor(
|
|
228
224
|
max_workers=self.max_workers,
|
|
229
225
|
initializer=_init_worker,
|
|
230
226
|
)
|
|
231
|
-
|
|
227
|
+
|
|
232
228
|
# Test the pool with a simple task
|
|
233
229
|
loop = asyncio.get_running_loop()
|
|
234
230
|
try:
|
|
235
231
|
# Use a module-level function instead of a lambda
|
|
236
232
|
await asyncio.wait_for(
|
|
237
|
-
loop.run_in_executor(self._process_pool, _pool_test_func),
|
|
238
|
-
timeout=self.worker_init_timeout
|
|
233
|
+
loop.run_in_executor(self._process_pool, _pool_test_func), timeout=self.worker_init_timeout
|
|
239
234
|
)
|
|
240
235
|
logger.info("Process pool initialized with %d workers", self.max_workers)
|
|
241
236
|
except Exception as e:
|
|
@@ -244,16 +239,16 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
244
239
|
self._process_pool = None
|
|
245
240
|
logger.error("Failed to initialize process pool: %s", e)
|
|
246
241
|
raise RuntimeError(f"Failed to initialize process pool: {e}") from e
|
|
247
|
-
|
|
242
|
+
|
|
248
243
|
# ------------------------------------------------------------------ #
|
|
249
244
|
# 🔌 legacy façade for older wrappers #
|
|
250
245
|
# ------------------------------------------------------------------ #
|
|
251
246
|
async def execute(
|
|
252
247
|
self,
|
|
253
|
-
calls:
|
|
248
|
+
calls: list[ToolCall],
|
|
254
249
|
*,
|
|
255
|
-
timeout:
|
|
256
|
-
) ->
|
|
250
|
+
timeout: float | None = None,
|
|
251
|
+
) -> list[ToolResult]:
|
|
257
252
|
"""
|
|
258
253
|
Back-compat shim.
|
|
259
254
|
|
|
@@ -262,25 +257,25 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
262
257
|
The real implementation lives in :meth:`run`, so we just forward.
|
|
263
258
|
"""
|
|
264
259
|
return await self.run(calls, timeout)
|
|
265
|
-
|
|
260
|
+
|
|
266
261
|
async def run(
|
|
267
262
|
self,
|
|
268
|
-
calls:
|
|
269
|
-
timeout:
|
|
270
|
-
) ->
|
|
263
|
+
calls: list[ToolCall],
|
|
264
|
+
timeout: float | None = None,
|
|
265
|
+
) -> list[ToolResult]:
|
|
271
266
|
"""
|
|
272
267
|
Execute tool calls in separate processes.
|
|
273
|
-
|
|
268
|
+
|
|
274
269
|
Args:
|
|
275
270
|
calls: List of tool calls to execute
|
|
276
271
|
timeout: Optional timeout for each execution (overrides default)
|
|
277
|
-
|
|
272
|
+
|
|
278
273
|
Returns:
|
|
279
274
|
List of tool results in the same order as calls
|
|
280
275
|
"""
|
|
281
276
|
if not calls:
|
|
282
277
|
return []
|
|
283
|
-
|
|
278
|
+
|
|
284
279
|
if self._shutting_down:
|
|
285
280
|
# Return early with error results if shutting down
|
|
286
281
|
return [
|
|
@@ -288,50 +283,53 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
288
283
|
tool=call.tool,
|
|
289
284
|
result=None,
|
|
290
285
|
error="System is shutting down",
|
|
291
|
-
start_time=datetime.now(
|
|
292
|
-
end_time=datetime.now(
|
|
293
|
-
machine=
|
|
286
|
+
start_time=datetime.now(UTC),
|
|
287
|
+
end_time=datetime.now(UTC),
|
|
288
|
+
machine=platform.node(),
|
|
294
289
|
pid=os.getpid(),
|
|
295
290
|
)
|
|
296
291
|
for call in calls
|
|
297
292
|
]
|
|
298
|
-
|
|
293
|
+
|
|
299
294
|
# Use default_timeout if no timeout specified
|
|
300
295
|
effective_timeout = timeout if timeout is not None else self.default_timeout
|
|
301
296
|
logger.debug("Executing %d calls in subprocesses with %ss timeout each", len(calls), effective_timeout)
|
|
302
|
-
|
|
297
|
+
|
|
303
298
|
# Create tasks for each call
|
|
304
299
|
tasks = []
|
|
305
300
|
for call in calls:
|
|
306
|
-
task = asyncio.create_task(
|
|
307
|
-
|
|
308
|
-
|
|
301
|
+
task = asyncio.create_task(
|
|
302
|
+
self._execute_single_call(
|
|
303
|
+
call,
|
|
304
|
+
effective_timeout, # Always pass concrete timeout
|
|
305
|
+
)
|
|
306
|
+
)
|
|
309
307
|
self._active_tasks.add(task)
|
|
310
308
|
task.add_done_callback(self._active_tasks.discard)
|
|
311
309
|
tasks.append(task)
|
|
312
|
-
|
|
310
|
+
|
|
313
311
|
# Execute all tasks concurrently
|
|
314
312
|
async with log_context_span("subprocess_execution", {"num_calls": len(calls)}):
|
|
315
313
|
return await asyncio.gather(*tasks)
|
|
316
314
|
|
|
317
315
|
async def stream_run(
|
|
318
316
|
self,
|
|
319
|
-
calls:
|
|
320
|
-
timeout:
|
|
317
|
+
calls: list[ToolCall],
|
|
318
|
+
timeout: float | None = None,
|
|
321
319
|
) -> AsyncIterator[ToolResult]:
|
|
322
320
|
"""
|
|
323
321
|
Execute tool calls and yield results as they become available.
|
|
324
|
-
|
|
322
|
+
|
|
325
323
|
Args:
|
|
326
324
|
calls: List of tool calls to execute
|
|
327
325
|
timeout: Optional timeout for each execution
|
|
328
|
-
|
|
326
|
+
|
|
329
327
|
Yields:
|
|
330
328
|
Tool results as they complete (not necessarily in order)
|
|
331
329
|
"""
|
|
332
330
|
if not calls:
|
|
333
331
|
return
|
|
334
|
-
|
|
332
|
+
|
|
335
333
|
if self._shutting_down:
|
|
336
334
|
# Yield error results if shutting down
|
|
337
335
|
for call in calls:
|
|
@@ -339,40 +337,42 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
339
337
|
tool=call.tool,
|
|
340
338
|
result=None,
|
|
341
339
|
error="System is shutting down",
|
|
342
|
-
start_time=datetime.now(
|
|
343
|
-
end_time=datetime.now(
|
|
344
|
-
machine=
|
|
340
|
+
start_time=datetime.now(UTC),
|
|
341
|
+
end_time=datetime.now(UTC),
|
|
342
|
+
machine=platform.node(),
|
|
345
343
|
pid=os.getpid(),
|
|
346
344
|
)
|
|
347
345
|
return
|
|
348
|
-
|
|
346
|
+
|
|
349
347
|
# Use default_timeout if no timeout specified
|
|
350
348
|
effective_timeout = timeout if timeout is not None else self.default_timeout
|
|
351
|
-
|
|
349
|
+
|
|
352
350
|
# Create a queue for results
|
|
353
351
|
queue = asyncio.Queue()
|
|
354
|
-
|
|
352
|
+
|
|
355
353
|
# Start all executions and have them put results in the queue
|
|
356
354
|
pending = set()
|
|
357
355
|
for call in calls:
|
|
358
|
-
task = asyncio.create_task(
|
|
359
|
-
|
|
360
|
-
|
|
356
|
+
task = asyncio.create_task(
|
|
357
|
+
self._execute_to_queue(
|
|
358
|
+
call,
|
|
359
|
+
queue,
|
|
360
|
+
effective_timeout, # Always pass concrete timeout
|
|
361
|
+
)
|
|
362
|
+
)
|
|
361
363
|
self._active_tasks.add(task)
|
|
362
364
|
task.add_done_callback(self._active_tasks.discard)
|
|
363
365
|
pending.add(task)
|
|
364
|
-
|
|
366
|
+
|
|
365
367
|
# Yield results as they become available
|
|
366
368
|
while pending:
|
|
367
369
|
# Get next result from queue
|
|
368
370
|
result = await queue.get()
|
|
369
371
|
yield result
|
|
370
|
-
|
|
372
|
+
|
|
371
373
|
# Check for completed tasks
|
|
372
|
-
done, pending = await asyncio.wait(
|
|
373
|
-
|
|
374
|
-
)
|
|
375
|
-
|
|
374
|
+
done, pending = await asyncio.wait(pending, timeout=0, return_when=asyncio.FIRST_COMPLETED)
|
|
375
|
+
|
|
376
376
|
# Handle any exceptions
|
|
377
377
|
for task in done:
|
|
378
378
|
try:
|
|
@@ -397,22 +397,22 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
397
397
|
) -> ToolResult:
|
|
398
398
|
"""
|
|
399
399
|
Execute a single tool call with enhanced tool resolution and serialization.
|
|
400
|
-
|
|
400
|
+
|
|
401
401
|
Args:
|
|
402
402
|
call: Tool call to execute
|
|
403
403
|
timeout: Timeout in seconds (required)
|
|
404
|
-
|
|
404
|
+
|
|
405
405
|
Returns:
|
|
406
406
|
Tool execution result
|
|
407
407
|
"""
|
|
408
|
-
start_time = datetime.now(
|
|
409
|
-
|
|
408
|
+
start_time = datetime.now(UTC)
|
|
409
|
+
|
|
410
410
|
logger.debug("Executing %s in subprocess with %ss timeout", call.tool, timeout)
|
|
411
|
-
|
|
411
|
+
|
|
412
412
|
try:
|
|
413
413
|
# Ensure pool is initialized
|
|
414
414
|
await self._ensure_pool()
|
|
415
|
-
|
|
415
|
+
|
|
416
416
|
# Use enhanced tool resolution instead of direct lookup
|
|
417
417
|
tool_impl, resolved_namespace = await self._resolve_tool_info(call.tool, call.namespace)
|
|
418
418
|
if tool_impl is None:
|
|
@@ -421,29 +421,24 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
421
421
|
result=None,
|
|
422
422
|
error=f"Tool '{call.tool}' not found in any namespace",
|
|
423
423
|
start_time=start_time,
|
|
424
|
-
end_time=datetime.now(
|
|
425
|
-
machine=
|
|
424
|
+
end_time=datetime.now(UTC),
|
|
425
|
+
machine=platform.node(),
|
|
426
426
|
pid=os.getpid(),
|
|
427
427
|
)
|
|
428
|
-
|
|
428
|
+
|
|
429
429
|
logger.debug(f"Resolved subprocess tool '{call.tool}' to namespace '{resolved_namespace}'")
|
|
430
|
-
|
|
430
|
+
|
|
431
431
|
# Ensure tool is properly prepared before serialization
|
|
432
|
-
if inspect.isclass(tool_impl)
|
|
433
|
-
|
|
434
|
-
else:
|
|
435
|
-
tool = tool_impl
|
|
436
|
-
|
|
432
|
+
tool = tool_impl() if inspect.isclass(tool_impl) else tool_impl
|
|
433
|
+
|
|
437
434
|
# Ensure tool_name attribute exists
|
|
438
|
-
if not hasattr(tool,
|
|
435
|
+
if not hasattr(tool, "tool_name") or not tool.tool_name:
|
|
439
436
|
tool.tool_name = call.tool
|
|
440
|
-
|
|
441
|
-
tool.tool_name = call.tool
|
|
442
|
-
|
|
437
|
+
|
|
443
438
|
# Also set _tool_name class attribute for consistency
|
|
444
|
-
if not hasattr(tool.__class__,
|
|
439
|
+
if not hasattr(tool.__class__, "_tool_name"):
|
|
445
440
|
tool.__class__._tool_name = call.tool
|
|
446
|
-
|
|
441
|
+
|
|
447
442
|
# Serialize the properly prepared tool
|
|
448
443
|
try:
|
|
449
444
|
serialized_tool_data = pickle.dumps(tool)
|
|
@@ -455,15 +450,15 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
455
450
|
result=None,
|
|
456
451
|
error=f"Tool serialization failed: {str(e)}",
|
|
457
452
|
start_time=start_time,
|
|
458
|
-
end_time=datetime.now(
|
|
459
|
-
machine=
|
|
453
|
+
end_time=datetime.now(UTC),
|
|
454
|
+
machine=platform.node(),
|
|
460
455
|
pid=os.getpid(),
|
|
461
456
|
)
|
|
462
|
-
|
|
457
|
+
|
|
463
458
|
# Execute in subprocess using the FIXED worker
|
|
464
459
|
loop = asyncio.get_running_loop()
|
|
465
460
|
safety_timeout = timeout + 5.0
|
|
466
|
-
|
|
461
|
+
|
|
467
462
|
try:
|
|
468
463
|
result_data = await asyncio.wait_for(
|
|
469
464
|
loop.run_in_executor(
|
|
@@ -474,29 +469,29 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
474
469
|
resolved_namespace, # Use resolved namespace
|
|
475
470
|
call.arguments,
|
|
476
471
|
timeout,
|
|
477
|
-
serialized_tool_data # Pass serialized tool data
|
|
478
|
-
)
|
|
472
|
+
serialized_tool_data, # Pass serialized tool data
|
|
473
|
+
),
|
|
479
474
|
),
|
|
480
|
-
timeout=safety_timeout
|
|
475
|
+
timeout=safety_timeout,
|
|
481
476
|
)
|
|
482
|
-
|
|
477
|
+
|
|
483
478
|
# Parse timestamps
|
|
484
479
|
if isinstance(result_data["start_time"], str):
|
|
485
480
|
result_data["start_time"] = datetime.fromisoformat(result_data["start_time"])
|
|
486
|
-
|
|
481
|
+
|
|
487
482
|
if isinstance(result_data["end_time"], str):
|
|
488
483
|
result_data["end_time"] = datetime.fromisoformat(result_data["end_time"])
|
|
489
|
-
|
|
490
|
-
end_time = datetime.now(
|
|
484
|
+
|
|
485
|
+
end_time = datetime.now(UTC)
|
|
491
486
|
actual_duration = (end_time - start_time).total_seconds()
|
|
492
|
-
|
|
487
|
+
|
|
493
488
|
if result_data.get("error"):
|
|
494
|
-
logger.debug(
|
|
495
|
-
|
|
489
|
+
logger.debug(
|
|
490
|
+
"%s subprocess failed after %.3fs: %s", call.tool, actual_duration, result_data["error"]
|
|
491
|
+
)
|
|
496
492
|
else:
|
|
497
|
-
logger.debug("%s subprocess completed in %.3fs (limit: %ss)",
|
|
498
|
-
|
|
499
|
-
|
|
493
|
+
logger.debug("%s subprocess completed in %.3fs (limit: %ss)", call.tool, actual_duration, timeout)
|
|
494
|
+
|
|
500
495
|
# Create ToolResult from worker data
|
|
501
496
|
return ToolResult(
|
|
502
497
|
tool=result_data.get("tool", call.tool),
|
|
@@ -504,42 +499,46 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
504
499
|
error=result_data.get("error"),
|
|
505
500
|
start_time=result_data.get("start_time", start_time),
|
|
506
501
|
end_time=result_data.get("end_time", end_time),
|
|
507
|
-
machine=result_data.get("machine",
|
|
502
|
+
machine=result_data.get("machine", platform.node()),
|
|
508
503
|
pid=result_data.get("pid", os.getpid()),
|
|
509
504
|
)
|
|
510
|
-
|
|
511
|
-
except
|
|
512
|
-
end_time = datetime.now(
|
|
505
|
+
|
|
506
|
+
except TimeoutError:
|
|
507
|
+
end_time = datetime.now(UTC)
|
|
513
508
|
actual_duration = (end_time - start_time).total_seconds()
|
|
514
|
-
logger.debug(
|
|
515
|
-
|
|
516
|
-
|
|
509
|
+
logger.debug(
|
|
510
|
+
"%s subprocess timed out after %.3fs (safety limit: %ss)",
|
|
511
|
+
call.tool,
|
|
512
|
+
actual_duration,
|
|
513
|
+
safety_timeout,
|
|
514
|
+
)
|
|
515
|
+
|
|
517
516
|
return ToolResult(
|
|
518
517
|
tool=call.tool,
|
|
519
518
|
result=None,
|
|
520
519
|
error=f"Worker process timed out after {safety_timeout}s",
|
|
521
520
|
start_time=start_time,
|
|
522
521
|
end_time=end_time,
|
|
523
|
-
machine=
|
|
522
|
+
machine=platform.node(),
|
|
524
523
|
pid=os.getpid(),
|
|
525
524
|
)
|
|
526
|
-
|
|
525
|
+
|
|
527
526
|
except concurrent.futures.process.BrokenProcessPool:
|
|
528
527
|
logger.error("Process pool broke during execution - recreating")
|
|
529
528
|
if self._process_pool:
|
|
530
529
|
self._process_pool.shutdown(wait=False)
|
|
531
530
|
self._process_pool = None
|
|
532
|
-
|
|
531
|
+
|
|
533
532
|
return ToolResult(
|
|
534
533
|
tool=call.tool,
|
|
535
534
|
result=None,
|
|
536
535
|
error="Worker process crashed",
|
|
537
536
|
start_time=start_time,
|
|
538
|
-
end_time=datetime.now(
|
|
539
|
-
machine=
|
|
537
|
+
end_time=datetime.now(UTC),
|
|
538
|
+
machine=platform.node(),
|
|
540
539
|
pid=os.getpid(),
|
|
541
540
|
)
|
|
542
|
-
|
|
541
|
+
|
|
543
542
|
except asyncio.CancelledError:
|
|
544
543
|
logger.debug("%s subprocess was cancelled", call.tool)
|
|
545
544
|
return ToolResult(
|
|
@@ -547,54 +546,55 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
547
546
|
result=None,
|
|
548
547
|
error="Execution was cancelled",
|
|
549
548
|
start_time=start_time,
|
|
550
|
-
end_time=datetime.now(
|
|
551
|
-
machine=
|
|
549
|
+
end_time=datetime.now(UTC),
|
|
550
|
+
machine=platform.node(),
|
|
552
551
|
pid=os.getpid(),
|
|
553
552
|
)
|
|
554
|
-
|
|
553
|
+
|
|
555
554
|
except Exception as e:
|
|
556
555
|
logger.exception("Error executing %s in subprocess: %s", call.tool, e)
|
|
557
|
-
end_time = datetime.now(
|
|
556
|
+
end_time = datetime.now(UTC)
|
|
558
557
|
actual_duration = (end_time - start_time).total_seconds()
|
|
559
|
-
logger.debug("%s subprocess setup failed after %.3fs: %s",
|
|
560
|
-
|
|
561
|
-
|
|
558
|
+
logger.debug("%s subprocess setup failed after %.3fs: %s", call.tool, actual_duration, e)
|
|
559
|
+
|
|
562
560
|
return ToolResult(
|
|
563
561
|
tool=call.tool,
|
|
564
562
|
result=None,
|
|
565
563
|
error=f"Error: {str(e)}",
|
|
566
564
|
start_time=start_time,
|
|
567
565
|
end_time=end_time,
|
|
568
|
-
machine=
|
|
566
|
+
machine=platform.node(),
|
|
569
567
|
pid=os.getpid(),
|
|
570
568
|
)
|
|
571
569
|
|
|
572
|
-
async def _resolve_tool_info(
|
|
570
|
+
async def _resolve_tool_info(
|
|
571
|
+
self, tool_name: str, preferred_namespace: str = "default"
|
|
572
|
+
) -> tuple[Any | None, str | None]:
|
|
573
573
|
"""
|
|
574
574
|
Enhanced tool name resolution with comprehensive fallback logic.
|
|
575
|
-
|
|
575
|
+
|
|
576
576
|
This method handles:
|
|
577
577
|
1. Simple names: "get_current_time" -> search in specified namespace first, then all namespaces
|
|
578
578
|
2. Namespaced names: "diagnostic_test.get_current_time" -> extract namespace and tool name
|
|
579
579
|
3. Fallback searching across all namespaces when not found in default
|
|
580
|
-
|
|
580
|
+
|
|
581
581
|
Args:
|
|
582
582
|
tool_name: Name of the tool to resolve
|
|
583
583
|
preferred_namespace: Preferred namespace to search first
|
|
584
|
-
|
|
584
|
+
|
|
585
585
|
Returns:
|
|
586
586
|
Tuple of (tool_object, resolved_namespace) or (None, None) if not found
|
|
587
587
|
"""
|
|
588
588
|
logger.debug(f"Resolving tool: '{tool_name}' (preferred namespace: '{preferred_namespace}')")
|
|
589
|
-
|
|
589
|
+
|
|
590
590
|
# Strategy 1: Handle namespaced tool names (namespace.tool_name format)
|
|
591
|
-
if
|
|
592
|
-
parts = tool_name.split(
|
|
591
|
+
if "." in tool_name:
|
|
592
|
+
parts = tool_name.split(".", 1) # Split on first dot only
|
|
593
593
|
namespace = parts[0]
|
|
594
594
|
actual_tool_name = parts[1]
|
|
595
|
-
|
|
595
|
+
|
|
596
596
|
logger.debug(f"Namespaced lookup: namespace='{namespace}', tool='{actual_tool_name}'")
|
|
597
|
-
|
|
597
|
+
|
|
598
598
|
tool = await self.registry.get_tool(actual_tool_name, namespace)
|
|
599
599
|
if tool is not None:
|
|
600
600
|
logger.debug(f"Found tool '{actual_tool_name}' in namespace '{namespace}'")
|
|
@@ -602,7 +602,7 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
602
602
|
else:
|
|
603
603
|
logger.debug(f"Tool '{actual_tool_name}' not found in namespace '{namespace}'")
|
|
604
604
|
return None, None
|
|
605
|
-
|
|
605
|
+
|
|
606
606
|
# Strategy 2: Simple tool name - try preferred namespace first
|
|
607
607
|
if preferred_namespace:
|
|
608
608
|
logger.debug(f"Simple tool lookup: trying preferred namespace '{preferred_namespace}' for '{tool_name}'")
|
|
@@ -610,7 +610,7 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
610
610
|
if tool is not None:
|
|
611
611
|
logger.debug(f"Found tool '{tool_name}' in preferred namespace '{preferred_namespace}'")
|
|
612
612
|
return tool, preferred_namespace
|
|
613
|
-
|
|
613
|
+
|
|
614
614
|
# Strategy 3: Try default namespace if different from preferred
|
|
615
615
|
if preferred_namespace != "default":
|
|
616
616
|
logger.debug(f"Simple tool lookup: trying default namespace for '{tool_name}'")
|
|
@@ -618,30 +618,30 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
618
618
|
if tool is not None:
|
|
619
619
|
logger.debug(f"Found tool '{tool_name}' in default namespace")
|
|
620
620
|
return tool, "default"
|
|
621
|
-
|
|
621
|
+
|
|
622
622
|
# Strategy 4: Search all namespaces as fallback
|
|
623
623
|
logger.debug(f"Tool '{tool_name}' not in preferred/default namespace, searching all namespaces...")
|
|
624
|
-
|
|
624
|
+
|
|
625
625
|
try:
|
|
626
626
|
# Get all available namespaces
|
|
627
627
|
namespaces = await self.registry.list_namespaces()
|
|
628
628
|
logger.debug(f"Available namespaces: {namespaces}")
|
|
629
|
-
|
|
629
|
+
|
|
630
630
|
# Search each namespace
|
|
631
631
|
for namespace in namespaces:
|
|
632
632
|
if namespace in [preferred_namespace, "default"]:
|
|
633
633
|
continue # Already tried these
|
|
634
|
-
|
|
634
|
+
|
|
635
635
|
logger.debug(f"Searching namespace '{namespace}' for tool '{tool_name}'")
|
|
636
636
|
tool = await self.registry.get_tool(tool_name, namespace)
|
|
637
637
|
if tool is not None:
|
|
638
638
|
logger.debug(f"Found tool '{tool_name}' in namespace '{namespace}'")
|
|
639
639
|
return tool, namespace
|
|
640
|
-
|
|
640
|
+
|
|
641
641
|
# Strategy 5: Final fallback - list all tools and do fuzzy matching
|
|
642
642
|
logger.debug(f"Tool '{tool_name}' not found in any namespace, trying fuzzy matching...")
|
|
643
643
|
all_tools = await self.registry.list_tools()
|
|
644
|
-
|
|
644
|
+
|
|
645
645
|
# Look for exact matches in tool name (ignoring namespace)
|
|
646
646
|
for namespace, registered_name in all_tools:
|
|
647
647
|
if registered_name == tool_name:
|
|
@@ -649,13 +649,13 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
649
649
|
tool = await self.registry.get_tool(registered_name, namespace)
|
|
650
650
|
if tool is not None:
|
|
651
651
|
return tool, namespace
|
|
652
|
-
|
|
652
|
+
|
|
653
653
|
# Log all available tools for debugging
|
|
654
654
|
logger.debug(f"Available tools: {all_tools}")
|
|
655
|
-
|
|
655
|
+
|
|
656
656
|
except Exception as e:
|
|
657
657
|
logger.error(f"Error during namespace search: {e}")
|
|
658
|
-
|
|
658
|
+
|
|
659
659
|
logger.warning(f"Tool '{tool_name}' not found in any namespace")
|
|
660
660
|
return None, None
|
|
661
661
|
|
|
@@ -663,26 +663,26 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
663
663
|
def supports_streaming(self) -> bool:
|
|
664
664
|
"""Check if this strategy supports streaming execution."""
|
|
665
665
|
return True
|
|
666
|
-
|
|
666
|
+
|
|
667
667
|
async def _signal_handler(self, sig: int) -> None:
|
|
668
668
|
"""Handle termination signals."""
|
|
669
669
|
signame = signal.Signals(sig).name
|
|
670
670
|
logger.info("Received %s, shutting down process pool", signame)
|
|
671
671
|
await self.shutdown()
|
|
672
|
-
|
|
672
|
+
|
|
673
673
|
async def shutdown(self) -> None:
|
|
674
674
|
"""Enhanced shutdown with graceful task handling and proper null checks."""
|
|
675
675
|
if self._shutting_down:
|
|
676
676
|
return
|
|
677
|
-
|
|
677
|
+
|
|
678
678
|
self._shutting_down = True
|
|
679
679
|
self._shutdown_event.set()
|
|
680
|
-
|
|
680
|
+
|
|
681
681
|
# Handle active tasks gracefully
|
|
682
682
|
active_tasks = list(self._active_tasks)
|
|
683
683
|
if active_tasks:
|
|
684
684
|
logger.debug(f"Completing {len(active_tasks)} active operations")
|
|
685
|
-
|
|
685
|
+
|
|
686
686
|
# Cancel tasks with brief intervals for clean handling
|
|
687
687
|
for task in active_tasks:
|
|
688
688
|
try:
|
|
@@ -691,22 +691,18 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
691
691
|
except Exception:
|
|
692
692
|
pass
|
|
693
693
|
# Small delay to prevent overwhelming the event loop
|
|
694
|
-
|
|
694
|
+
with contextlib.suppress(Exception):
|
|
695
695
|
await asyncio.sleep(0.001)
|
|
696
|
-
|
|
697
|
-
pass
|
|
698
|
-
|
|
696
|
+
|
|
699
697
|
# Allow reasonable time for completion
|
|
700
698
|
try:
|
|
701
|
-
completion_task = asyncio.create_task(
|
|
702
|
-
asyncio.gather(*active_tasks, return_exceptions=True)
|
|
703
|
-
)
|
|
699
|
+
completion_task = asyncio.create_task(asyncio.gather(*active_tasks, return_exceptions=True))
|
|
704
700
|
await asyncio.wait_for(completion_task, timeout=2.0)
|
|
705
|
-
except
|
|
701
|
+
except TimeoutError:
|
|
706
702
|
logger.debug("Active operations completed within timeout constraints")
|
|
707
703
|
except Exception:
|
|
708
704
|
logger.debug("Active operations completed successfully")
|
|
709
|
-
|
|
705
|
+
|
|
710
706
|
# Handle process pool shutdown with proper null checks
|
|
711
707
|
if self._process_pool is not None:
|
|
712
708
|
logger.debug("Finalizing process pool")
|
|
@@ -714,18 +710,18 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
714
710
|
# Store reference and null check before async operation
|
|
715
711
|
pool_to_shutdown = self._process_pool
|
|
716
712
|
self._process_pool = None # Clear immediately to prevent race conditions
|
|
717
|
-
|
|
713
|
+
|
|
718
714
|
# Create shutdown task with the stored reference
|
|
719
715
|
shutdown_task = asyncio.create_task(
|
|
720
716
|
asyncio.get_event_loop().run_in_executor(
|
|
721
717
|
None, lambda: pool_to_shutdown.shutdown(wait=False) if pool_to_shutdown else None
|
|
722
718
|
)
|
|
723
719
|
)
|
|
724
|
-
|
|
720
|
+
|
|
725
721
|
try:
|
|
726
722
|
await asyncio.wait_for(shutdown_task, timeout=1.0)
|
|
727
723
|
logger.debug("Process pool shutdown completed")
|
|
728
|
-
except
|
|
724
|
+
except TimeoutError:
|
|
729
725
|
logger.debug("Process pool shutdown timed out, forcing cleanup")
|
|
730
726
|
if not shutdown_task.done():
|
|
731
727
|
shutdown_task.cancel()
|
|
@@ -734,4 +730,4 @@ class SubprocessStrategy(ExecutionStrategy):
|
|
|
734
730
|
except Exception as e:
|
|
735
731
|
logger.debug(f"Process pool finalization completed: {e}")
|
|
736
732
|
else:
|
|
737
|
-
logger.debug("Process pool already cleaned up")
|
|
733
|
+
logger.debug("Process pool already cleaned up")
|