chuk-tool-processor 0.1.6__py3-none-any.whl → 0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chuk-tool-processor might be problematic. Click here for more details.

Files changed (46) hide show
  1. chuk_tool_processor/core/processor.py +345 -132
  2. chuk_tool_processor/execution/strategies/inprocess_strategy.py +522 -71
  3. chuk_tool_processor/execution/strategies/subprocess_strategy.py +559 -64
  4. chuk_tool_processor/execution/tool_executor.py +282 -24
  5. chuk_tool_processor/execution/wrappers/caching.py +465 -123
  6. chuk_tool_processor/execution/wrappers/rate_limiting.py +199 -86
  7. chuk_tool_processor/execution/wrappers/retry.py +133 -23
  8. chuk_tool_processor/logging/__init__.py +83 -10
  9. chuk_tool_processor/logging/context.py +218 -22
  10. chuk_tool_processor/logging/formatter.py +56 -13
  11. chuk_tool_processor/logging/helpers.py +91 -16
  12. chuk_tool_processor/logging/metrics.py +75 -6
  13. chuk_tool_processor/mcp/mcp_tool.py +80 -35
  14. chuk_tool_processor/mcp/register_mcp_tools.py +74 -56
  15. chuk_tool_processor/mcp/setup_mcp_sse.py +41 -36
  16. chuk_tool_processor/mcp/setup_mcp_stdio.py +39 -37
  17. chuk_tool_processor/mcp/transport/sse_transport.py +351 -105
  18. chuk_tool_processor/models/execution_strategy.py +52 -3
  19. chuk_tool_processor/models/streaming_tool.py +110 -0
  20. chuk_tool_processor/models/tool_call.py +56 -4
  21. chuk_tool_processor/models/tool_result.py +115 -9
  22. chuk_tool_processor/models/validated_tool.py +15 -13
  23. chuk_tool_processor/plugins/discovery.py +115 -70
  24. chuk_tool_processor/plugins/parsers/base.py +13 -5
  25. chuk_tool_processor/plugins/parsers/{function_call_tool_plugin.py → function_call_tool.py} +39 -20
  26. chuk_tool_processor/plugins/parsers/json_tool.py +50 -0
  27. chuk_tool_processor/plugins/parsers/openai_tool.py +88 -0
  28. chuk_tool_processor/plugins/parsers/xml_tool.py +74 -20
  29. chuk_tool_processor/registry/__init__.py +46 -7
  30. chuk_tool_processor/registry/auto_register.py +92 -28
  31. chuk_tool_processor/registry/decorators.py +134 -11
  32. chuk_tool_processor/registry/interface.py +48 -14
  33. chuk_tool_processor/registry/metadata.py +52 -6
  34. chuk_tool_processor/registry/provider.py +75 -36
  35. chuk_tool_processor/registry/providers/__init__.py +49 -10
  36. chuk_tool_processor/registry/providers/memory.py +59 -48
  37. chuk_tool_processor/registry/tool_export.py +208 -39
  38. chuk_tool_processor/utils/validation.py +18 -13
  39. chuk_tool_processor-0.2.dist-info/METADATA +401 -0
  40. chuk_tool_processor-0.2.dist-info/RECORD +58 -0
  41. {chuk_tool_processor-0.1.6.dist-info → chuk_tool_processor-0.2.dist-info}/WHEEL +1 -1
  42. chuk_tool_processor/plugins/parsers/json_tool_plugin.py +0 -38
  43. chuk_tool_processor/plugins/parsers/openai_tool_plugin.py +0 -76
  44. chuk_tool_processor-0.1.6.dist-info/METADATA +0 -462
  45. chuk_tool_processor-0.1.6.dist-info/RECORD +0 -57
  46. {chuk_tool_processor-0.1.6.dist-info → chuk_tool_processor-0.2.dist-info}/top_level.txt +0 -0
@@ -1,103 +1,598 @@
1
- # chuk_tool_processor/execution/subprocess_strategy.py
1
+ # chuk_tool_processor/execution/strategies/subprocess_strategy.py
2
+ """
3
+ Subprocess execution strategy - truly runs tools in separate OS processes.
4
+
5
+ This strategy executes tools in separate Python processes using a process pool,
6
+ providing isolation and potentially better parallelism on multi-core systems.
7
+
8
+ FIXED: Ensures consistent timeout handling across all execution paths.
9
+ """
10
+ from __future__ import annotations
11
+
2
12
  import asyncio
3
- from chuk_tool_processor.execution.strategies.inprocess_strategy import InProcessStrategy
4
- import os
5
- import importlib
13
+ import concurrent.futures
14
+ import functools
6
15
  import inspect
16
+ import os
17
+ import pickle
18
+ import signal
19
+ import sys
20
+ import traceback
7
21
  from datetime import datetime, timezone
8
- from typing import List, Optional, Dict, Any
9
- from concurrent.futures import ProcessPoolExecutor
22
+ from typing import Any, AsyncIterator, Dict, List, Optional, Tuple, Set
10
23
 
11
- # imports
12
24
  from chuk_tool_processor.models.execution_strategy import ExecutionStrategy
13
25
  from chuk_tool_processor.models.tool_call import ToolCall
14
26
  from chuk_tool_processor.models.tool_result import ToolResult
15
- from chuk_tool_processor.logging import get_logger
27
+ from chuk_tool_processor.registry.interface import ToolRegistryInterface
28
+ from chuk_tool_processor.logging import get_logger, log_context_span
16
29
 
17
30
  logger = get_logger("chuk_tool_processor.execution.subprocess_strategy")
18
31
 
19
- # Define a top-level function for subprocess execution
20
- def _execute_tool_in_process(tool_data: Dict[str, Any]) -> Dict[str, Any]:
21
- """
22
- Execute a tool in a separate process.
23
32
 
24
- Args:
25
- tool_data: Dictionary with:
26
- - tool_name: Name of the tool
27
- - module_name: Module containing the tool class
28
- - class_name: Name of the tool class
29
- - arguments: Arguments for the tool
30
- - is_async: Whether the tool's execute is async
33
+ # --------------------------------------------------------------------------- #
34
+ # Module-level helper functions for worker processes - these must be at the module
35
+ # level so they can be pickled
36
+ # --------------------------------------------------------------------------- #
37
+ def _init_worker():
38
+ """Initialize worker process with signal handlers."""
39
+ # Ignore keyboard interrupt in workers
40
+ signal.signal(signal.SIGINT, signal.SIG_IGN)
41
+
42
+
43
+ def _pool_test_func():
44
+ """Simple function to test if the process pool is working."""
45
+ return "ok"
31
46
 
47
+
48
+ def _process_worker(
49
+ tool_name: str,
50
+ namespace: str,
51
+ module_name: str,
52
+ class_name: str,
53
+ arguments: Dict[str, Any],
54
+ timeout: Optional[float]
55
+ ) -> Dict[str, Any]:
56
+ """
57
+ Worker function that runs in a separate process.
58
+
59
+ Args:
60
+ tool_name: Name of the tool
61
+ namespace: Namespace of the tool
62
+ module_name: Module containing the tool class
63
+ class_name: Name of the tool class
64
+ arguments: Arguments to pass to the tool
65
+ timeout: Optional timeout in seconds
66
+
32
67
  Returns:
33
- A dict containing result, error, start_time, end_time, pid, machine.
68
+ Serialized result data
34
69
  """
35
- # Extract data
36
- tool_name = tool_data.get("tool_name", "unknown")
37
- module_name = tool_data.get("module_name")
38
- class_name = tool_data.get("class_name")
39
- arguments = tool_data.get("arguments", {})
40
- is_async = tool_data.get("is_async", False)
41
-
70
+ import asyncio
71
+ import importlib
72
+ import inspect
73
+ import os
74
+ import sys
75
+ import time
76
+ from datetime import datetime, timezone
77
+
42
78
  start_time = datetime.now(timezone.utc)
43
79
  pid = os.getpid()
44
- machine = os.uname().nodename
45
- result_data = {"result": None, "error": None, "start_time": start_time, "end_time": None, "pid": pid, "machine": machine}
46
-
80
+ hostname = os.uname().nodename
81
+
82
+ # Data for the result
83
+ result_data = {
84
+ "tool": tool_name,
85
+ "namespace": namespace,
86
+ "start_time": start_time.isoformat(),
87
+ "end_time": None,
88
+ "machine": hostname,
89
+ "pid": pid,
90
+ "result": None,
91
+ "error": None,
92
+ }
93
+
47
94
  try:
95
+ # Import the module
48
96
  if not module_name or not class_name:
49
- result_data["error"] = f"Missing module_name or class_name for tool {tool_name}"
97
+ raise ValueError("Missing module or class name")
98
+
99
+ # Import the module
100
+ try:
101
+ module = importlib.import_module(module_name)
102
+ except ImportError as e:
103
+ result_data["error"] = f"Failed to import module {module_name}: {str(e)}"
104
+ result_data["end_time"] = datetime.now(timezone.utc).isoformat()
50
105
  return result_data
51
-
52
- # Load the tool class
53
- module = importlib.import_module(module_name)
54
- tool_class = getattr(module, class_name, None)
55
- if tool_class is None:
56
- result_data["error"] = f"Class {class_name} not found in module {module_name}"
106
+
107
+ # Get the class or function
108
+ try:
109
+ tool_class = getattr(module, class_name)
110
+ except AttributeError as e:
111
+ result_data["error"] = f"Failed to find {class_name} in {module_name}: {str(e)}"
112
+ result_data["end_time"] = datetime.now(timezone.utc).isoformat()
57
113
  return result_data
114
+
115
+ # Instantiate the tool
116
+ tool_instance = tool_class() if inspect.isclass(tool_class) else tool_class
58
117
 
59
- tool_instance = tool_class()
60
- # Determine execution path
61
- if is_async:
62
- import asyncio as _asyncio
63
- loop = _asyncio.new_event_loop()
64
- _asyncio.set_event_loop(loop)
65
- try:
66
- result_data["result"] = loop.run_until_complete(tool_instance.execute(**arguments))
67
- finally:
68
- loop.close()
118
+ # Find the execute method
119
+ if hasattr(tool_instance, "_aexecute") and inspect.iscoroutinefunction(
120
+ getattr(tool_instance.__class__, "_aexecute", None)
121
+ ):
122
+ execute_fn = tool_instance._aexecute
123
+ elif hasattr(tool_instance, "execute") and inspect.iscoroutinefunction(
124
+ getattr(tool_instance.__class__, "execute", None)
125
+ ):
126
+ execute_fn = tool_instance.execute
69
127
  else:
70
- result_data["result"] = tool_instance.execute(**arguments)
128
+ result_data["error"] = "Tool must have an async execute or _aexecute method"
129
+ result_data["end_time"] = datetime.now(timezone.utc).isoformat()
130
+ return result_data
131
+
132
+ # Create a new event loop for this process
133
+ loop = asyncio.new_event_loop()
134
+ asyncio.set_event_loop(loop)
135
+
136
+ try:
137
+ # Execute the tool with timeout
138
+ if timeout is not None and timeout > 0:
139
+ result_value = loop.run_until_complete(
140
+ asyncio.wait_for(execute_fn(**arguments), timeout)
141
+ )
142
+ else:
143
+ result_value = loop.run_until_complete(execute_fn(**arguments))
144
+
145
+ # Store the result
146
+ result_data["result"] = result_value
147
+
148
+ except asyncio.TimeoutError:
149
+ result_data["error"] = f"Execution timed out after {timeout}s"
150
+ except Exception as e:
151
+ result_data["error"] = f"Error during execution: {str(e)}"
152
+
153
+ finally:
154
+ # Clean up the loop
155
+ loop.close()
156
+
71
157
  except Exception as e:
72
- result_data["error"] = str(e)
73
- finally:
74
- result_data["end_time"] = datetime.now(timezone.utc)
158
+ # Catch any other exceptions
159
+ result_data["error"] = f"Unexpected error: {str(e)}"
160
+
161
+ # Set end time
162
+ result_data["end_time"] = datetime.now(timezone.utc).isoformat()
75
163
  return result_data
76
164
 
77
165
 
166
+ # --------------------------------------------------------------------------- #
167
+ # The subprocess strategy
168
+ # --------------------------------------------------------------------------- #
78
169
  class SubprocessStrategy(ExecutionStrategy):
79
170
  """
80
- Executes tool calls in-process via InProcessStrategy for compatibility with local tool definitions and tests.
171
+ Execute tools in separate processes for isolation and parallelism.
172
+
173
+ This strategy creates a pool of worker processes and distributes tool calls
174
+ among them. Each tool executes in its own process, providing isolation and
175
+ parallelism.
81
176
  """
82
- def __init__(self, registry, max_workers: int = 4, default_timeout: Optional[float] = None):
177
+
178
+ def __init__(
179
+ self,
180
+ registry: ToolRegistryInterface,
181
+ *,
182
+ max_workers: int = 4,
183
+ default_timeout: Optional[float] = None,
184
+ worker_init_timeout: float = 5.0,
185
+ ) -> None:
83
186
  """
84
- Initialize with in-process strategy delegation.
187
+ Initialize the subprocess execution strategy.
188
+
189
+ Args:
190
+ registry: Tool registry for tool lookups
191
+ max_workers: Maximum number of worker processes
192
+ default_timeout: Default timeout for tool execution
193
+ worker_init_timeout: Timeout for worker process initialization
85
194
  """
86
195
  self.registry = registry
87
- self.default_timeout = default_timeout
88
- # Use InProcessStrategy to execute calls directly
89
- self._strategy = InProcessStrategy(
90
- registry=registry,
91
- default_timeout=default_timeout,
92
- max_concurrency=max_workers
93
- )
196
+ self.max_workers = max_workers
197
+ self.default_timeout = default_timeout or 30.0 # Always have a default
198
+ self.worker_init_timeout = worker_init_timeout
199
+
200
+ # Process pool (initialized lazily)
201
+ self._process_pool: Optional[concurrent.futures.ProcessPoolExecutor] = None
202
+ self._pool_lock = asyncio.Lock()
203
+
204
+ # Task tracking for cleanup
205
+ self._active_tasks: Set[asyncio.Task] = set()
206
+ self._shutdown_event = asyncio.Event()
207
+ self._shutting_down = False
208
+
209
+ logger.debug("SubprocessStrategy initialized with timeout: %ss, max_workers: %d",
210
+ self.default_timeout, max_workers)
211
+
212
+ # Register shutdown handler if in main thread
213
+ try:
214
+ loop = asyncio.get_running_loop()
215
+ for sig in (signal.SIGTERM, signal.SIGINT):
216
+ loop.add_signal_handler(
217
+ sig, lambda s=sig: asyncio.create_task(self._signal_handler(s))
218
+ )
219
+ except (RuntimeError, NotImplementedError):
220
+ # Not in the main thread or not on Unix
221
+ pass
222
+
223
+ async def _ensure_pool(self) -> None:
224
+ """Initialize the process pool if not already initialized."""
225
+ if self._process_pool is not None:
226
+ return
227
+
228
+ async with self._pool_lock:
229
+ if self._process_pool is not None:
230
+ return
231
+
232
+ # Create process pool
233
+ self._process_pool = concurrent.futures.ProcessPoolExecutor(
234
+ max_workers=self.max_workers,
235
+ initializer=_init_worker,
236
+ )
237
+
238
+ # Test the pool with a simple task
239
+ loop = asyncio.get_running_loop()
240
+ try:
241
+ # Use a module-level function instead of a lambda
242
+ await asyncio.wait_for(
243
+ loop.run_in_executor(self._process_pool, _pool_test_func),
244
+ timeout=self.worker_init_timeout
245
+ )
246
+ logger.info("Process pool initialized with %d workers", self.max_workers)
247
+ except Exception as e:
248
+ # Clean up on initialization error
249
+ self._process_pool.shutdown(wait=False)
250
+ self._process_pool = None
251
+ logger.error("Failed to initialize process pool: %s", e)
252
+ raise RuntimeError(f"Failed to initialize process pool: {e}") from e
253
+
254
+ # ------------------------------------------------------------------ #
255
+ # 🔌 legacy façade for older wrappers #
256
+ # ------------------------------------------------------------------ #
257
+ async def execute(
258
+ self,
259
+ calls: List[ToolCall],
260
+ *,
261
+ timeout: Optional[float] = None,
262
+ ) -> List[ToolResult]:
263
+ """
264
+ Back-compat shim.
94
265
 
266
+ Old wrappers (`retry`, `rate_limit`, `cache`, …) still expect an
267
+ ``execute()`` coroutine on an execution-strategy object.
268
+ The real implementation lives in :meth:`run`, so we just forward.
269
+ """
270
+ return await self.run(calls, timeout)
271
+
95
272
  async def run(
96
273
  self,
97
274
  calls: List[ToolCall],
98
- timeout: Optional[float] = None
275
+ timeout: Optional[float] = None,
99
276
  ) -> List[ToolResult]:
100
277
  """
101
- Execute tool calls using in-process strategy.
278
+ Execute tool calls in separate processes.
279
+
280
+ Args:
281
+ calls: List of tool calls to execute
282
+ timeout: Optional timeout for each execution (overrides default)
283
+
284
+ Returns:
285
+ List of tool results in the same order as calls
286
+ """
287
+ if not calls:
288
+ return []
289
+
290
+ if self._shutting_down:
291
+ # Return early with error results if shutting down
292
+ return [
293
+ ToolResult(
294
+ tool=call.tool,
295
+ result=None,
296
+ error="System is shutting down",
297
+ start_time=datetime.now(timezone.utc),
298
+ end_time=datetime.now(timezone.utc),
299
+ machine=os.uname().nodename,
300
+ pid=os.getpid(),
301
+ )
302
+ for call in calls
303
+ ]
304
+
305
+ # Use default_timeout if no timeout specified
306
+ effective_timeout = timeout if timeout is not None else self.default_timeout
307
+ logger.debug("Executing %d calls in subprocesses with %ss timeout each", len(calls), effective_timeout)
308
+
309
+ # Create tasks for each call
310
+ tasks = []
311
+ for call in calls:
312
+ task = asyncio.create_task(self._execute_single_call(
313
+ call, effective_timeout # Always pass concrete timeout
314
+ ))
315
+ self._active_tasks.add(task)
316
+ task.add_done_callback(self._active_tasks.discard)
317
+ tasks.append(task)
318
+
319
+ # Execute all tasks concurrently
320
+ async with log_context_span("subprocess_execution", {"num_calls": len(calls)}):
321
+ return await asyncio.gather(*tasks)
322
+
323
+ async def stream_run(
324
+ self,
325
+ calls: List[ToolCall],
326
+ timeout: Optional[float] = None,
327
+ ) -> AsyncIterator[ToolResult]:
328
+ """
329
+ Execute tool calls and yield results as they become available.
330
+
331
+ Args:
332
+ calls: List of tool calls to execute
333
+ timeout: Optional timeout for each execution
334
+
335
+ Yields:
336
+ Tool results as they complete (not necessarily in order)
337
+ """
338
+ if not calls:
339
+ return
340
+
341
+ if self._shutting_down:
342
+ # Yield error results if shutting down
343
+ for call in calls:
344
+ yield ToolResult(
345
+ tool=call.tool,
346
+ result=None,
347
+ error="System is shutting down",
348
+ start_time=datetime.now(timezone.utc),
349
+ end_time=datetime.now(timezone.utc),
350
+ machine=os.uname().nodename,
351
+ pid=os.getpid(),
352
+ )
353
+ return
354
+
355
+ # Use default_timeout if no timeout specified
356
+ effective_timeout = timeout if timeout is not None else self.default_timeout
357
+
358
+ # Create a queue for results
359
+ queue = asyncio.Queue()
360
+
361
+ # Start all executions and have them put results in the queue
362
+ pending = set()
363
+ for call in calls:
364
+ task = asyncio.create_task(self._execute_to_queue(
365
+ call, queue, effective_timeout # Always pass concrete timeout
366
+ ))
367
+ self._active_tasks.add(task)
368
+ task.add_done_callback(self._active_tasks.discard)
369
+ pending.add(task)
370
+
371
+ # Yield results as they become available
372
+ while pending:
373
+ # Get next result from queue
374
+ result = await queue.get()
375
+ yield result
376
+
377
+ # Check for completed tasks
378
+ done, pending = await asyncio.wait(
379
+ pending, timeout=0, return_when=asyncio.FIRST_COMPLETED
380
+ )
381
+
382
+ # Handle any exceptions
383
+ for task in done:
384
+ try:
385
+ await task
386
+ except Exception as e:
387
+ logger.exception("Error in task: %s", e)
388
+
389
+ async def _execute_to_queue(
390
+ self,
391
+ call: ToolCall,
392
+ queue: asyncio.Queue,
393
+ timeout: float, # Make timeout required
394
+ ) -> None:
395
+ """Execute a single call and put the result in the queue."""
396
+ result = await self._execute_single_call(call, timeout)
397
+ await queue.put(result)
398
+
399
+ async def _execute_single_call(
400
+ self,
401
+ call: ToolCall,
402
+ timeout: float, # Make timeout required
403
+ ) -> ToolResult:
404
+ """
405
+ Execute a single tool call in a separate process.
406
+
407
+ Args:
408
+ call: Tool call to execute
409
+ timeout: Timeout in seconds (required)
410
+
411
+ Returns:
412
+ Tool execution result
413
+ """
414
+ start_time = datetime.now(timezone.utc)
415
+
416
+ logger.debug("Executing %s in subprocess with %ss timeout", call.tool, timeout)
417
+
418
+ try:
419
+ # Ensure pool is initialized
420
+ await self._ensure_pool()
421
+
422
+ # Get tool from registry
423
+ tool_impl = await self.registry.get_tool(call.tool, call.namespace)
424
+ if tool_impl is None:
425
+ return ToolResult(
426
+ tool=call.tool,
427
+ result=None,
428
+ error=f"Tool '{call.tool}' not found",
429
+ start_time=start_time,
430
+ end_time=datetime.now(timezone.utc),
431
+ machine=os.uname().nodename,
432
+ pid=os.getpid(),
433
+ )
434
+
435
+ # Get module and class names for import in worker process
436
+ if inspect.isclass(tool_impl):
437
+ module_name = tool_impl.__module__
438
+ class_name = tool_impl.__name__
439
+ else:
440
+ module_name = tool_impl.__class__.__module__
441
+ class_name = tool_impl.__class__.__name__
442
+
443
+ # Execute in subprocess
444
+ loop = asyncio.get_running_loop()
445
+
446
+ # Add safety timeout to handle process crashes (tool timeout + buffer)
447
+ safety_timeout = timeout + 5.0
448
+
449
+ try:
450
+ result_data = await asyncio.wait_for(
451
+ loop.run_in_executor(
452
+ self._process_pool,
453
+ functools.partial(
454
+ _process_worker,
455
+ call.tool,
456
+ call.namespace,
457
+ module_name,
458
+ class_name,
459
+ call.arguments,
460
+ timeout # Pass the actual timeout to worker
461
+ )
462
+ ),
463
+ timeout=safety_timeout
464
+ )
465
+
466
+ # Parse timestamps
467
+ if isinstance(result_data["start_time"], str):
468
+ start_time_str = result_data["start_time"]
469
+ result_data["start_time"] = datetime.fromisoformat(start_time_str)
470
+
471
+ if isinstance(result_data["end_time"], str):
472
+ end_time_str = result_data["end_time"]
473
+ result_data["end_time"] = datetime.fromisoformat(end_time_str)
474
+
475
+ end_time = datetime.now(timezone.utc)
476
+ actual_duration = (end_time - start_time).total_seconds()
477
+
478
+ if result_data.get("error"):
479
+ logger.debug("%s subprocess failed after %.3fs: %s",
480
+ call.tool, actual_duration, result_data["error"])
481
+ else:
482
+ logger.debug("%s subprocess completed in %.3fs (limit: %ss)",
483
+ call.tool, actual_duration, timeout)
484
+
485
+ # Create ToolResult from worker data
486
+ return ToolResult(
487
+ tool=result_data.get("tool", call.tool),
488
+ result=result_data.get("result"),
489
+ error=result_data.get("error"),
490
+ start_time=result_data.get("start_time", start_time),
491
+ end_time=result_data.get("end_time", end_time),
492
+ machine=result_data.get("machine", os.uname().nodename),
493
+ pid=result_data.get("pid", os.getpid()),
494
+ )
495
+
496
+ except asyncio.TimeoutError:
497
+ # This happens if the worker process itself hangs
498
+ end_time = datetime.now(timezone.utc)
499
+ actual_duration = (end_time - start_time).total_seconds()
500
+ logger.debug("%s subprocess timed out after %.3fs (safety limit: %ss)",
501
+ call.tool, actual_duration, safety_timeout)
502
+
503
+ return ToolResult(
504
+ tool=call.tool,
505
+ result=None,
506
+ error=f"Worker process timed out after {safety_timeout}s",
507
+ start_time=start_time,
508
+ end_time=end_time,
509
+ machine=os.uname().nodename,
510
+ pid=os.getpid(),
511
+ )
512
+
513
+ except concurrent.futures.process.BrokenProcessPool:
514
+ # Process pool broke - need to recreate it
515
+ logger.error("Process pool broke during execution - recreating")
516
+ if self._process_pool:
517
+ self._process_pool.shutdown(wait=False)
518
+ self._process_pool = None
519
+
520
+ return ToolResult(
521
+ tool=call.tool,
522
+ result=None,
523
+ error="Worker process crashed",
524
+ start_time=start_time,
525
+ end_time=datetime.now(timezone.utc),
526
+ machine=os.uname().nodename,
527
+ pid=os.getpid(),
528
+ )
529
+
530
+ except asyncio.CancelledError:
531
+ # Handle cancellation
532
+ logger.debug("%s subprocess was cancelled", call.tool)
533
+ return ToolResult(
534
+ tool=call.tool,
535
+ result=None,
536
+ error="Execution was cancelled",
537
+ start_time=start_time,
538
+ end_time=datetime.now(timezone.utc),
539
+ machine=os.uname().nodename,
540
+ pid=os.getpid(),
541
+ )
542
+
543
+ except Exception as e:
544
+ # Handle any other errors
545
+ logger.exception("Error executing %s in subprocess: %s", call.tool, e)
546
+ end_time = datetime.now(timezone.utc)
547
+ actual_duration = (end_time - start_time).total_seconds()
548
+ logger.debug("%s subprocess setup failed after %.3fs: %s",
549
+ call.tool, actual_duration, e)
550
+
551
+ return ToolResult(
552
+ tool=call.tool,
553
+ result=None,
554
+ error=f"Error: {str(e)}",
555
+ start_time=start_time,
556
+ end_time=end_time,
557
+ machine=os.uname().nodename,
558
+ pid=os.getpid(),
559
+ )
560
+
561
+ @property
562
+ def supports_streaming(self) -> bool:
563
+ """Check if this strategy supports streaming execution."""
564
+ return True
565
+
566
+ async def _signal_handler(self, sig: int) -> None:
567
+ """Handle termination signals."""
568
+ signame = signal.Signals(sig).name
569
+ logger.info("Received %s, shutting down process pool", signame)
570
+ await self.shutdown()
571
+
572
+ async def shutdown(self) -> None:
573
+ """
574
+ Gracefully shut down the process pool.
575
+
576
+ This cancels all active tasks and shuts down the process pool.
102
577
  """
103
- return await self._strategy.run(calls, timeout=timeout)
578
+ if self._shutting_down:
579
+ return
580
+
581
+ self._shutting_down = True
582
+ self._shutdown_event.set()
583
+
584
+ # Cancel all active tasks
585
+ active_tasks = list(self._active_tasks)
586
+ if active_tasks:
587
+ logger.info("Cancelling %d active tool executions", len(active_tasks))
588
+ for task in active_tasks:
589
+ task.cancel()
590
+
591
+ # Wait for all tasks to complete (with cancellation)
592
+ await asyncio.gather(*active_tasks, return_exceptions=True)
593
+
594
+ # Shut down the process pool
595
+ if self._process_pool:
596
+ logger.info("Shutting down process pool")
597
+ self._process_pool.shutdown(wait=True)
598
+ self._process_pool = None