chuk-tool-processor 0.1.6__py3-none-any.whl → 0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chuk-tool-processor might be problematic. Click here for more details.

Files changed (46) hide show
  1. chuk_tool_processor/core/processor.py +345 -132
  2. chuk_tool_processor/execution/strategies/inprocess_strategy.py +522 -71
  3. chuk_tool_processor/execution/strategies/subprocess_strategy.py +559 -64
  4. chuk_tool_processor/execution/tool_executor.py +282 -24
  5. chuk_tool_processor/execution/wrappers/caching.py +465 -123
  6. chuk_tool_processor/execution/wrappers/rate_limiting.py +199 -86
  7. chuk_tool_processor/execution/wrappers/retry.py +133 -23
  8. chuk_tool_processor/logging/__init__.py +83 -10
  9. chuk_tool_processor/logging/context.py +218 -22
  10. chuk_tool_processor/logging/formatter.py +56 -13
  11. chuk_tool_processor/logging/helpers.py +91 -16
  12. chuk_tool_processor/logging/metrics.py +75 -6
  13. chuk_tool_processor/mcp/mcp_tool.py +80 -35
  14. chuk_tool_processor/mcp/register_mcp_tools.py +74 -56
  15. chuk_tool_processor/mcp/setup_mcp_sse.py +41 -36
  16. chuk_tool_processor/mcp/setup_mcp_stdio.py +39 -37
  17. chuk_tool_processor/mcp/transport/sse_transport.py +351 -105
  18. chuk_tool_processor/models/execution_strategy.py +52 -3
  19. chuk_tool_processor/models/streaming_tool.py +110 -0
  20. chuk_tool_processor/models/tool_call.py +56 -4
  21. chuk_tool_processor/models/tool_result.py +115 -9
  22. chuk_tool_processor/models/validated_tool.py +15 -13
  23. chuk_tool_processor/plugins/discovery.py +115 -70
  24. chuk_tool_processor/plugins/parsers/base.py +13 -5
  25. chuk_tool_processor/plugins/parsers/{function_call_tool_plugin.py → function_call_tool.py} +39 -20
  26. chuk_tool_processor/plugins/parsers/json_tool.py +50 -0
  27. chuk_tool_processor/plugins/parsers/openai_tool.py +88 -0
  28. chuk_tool_processor/plugins/parsers/xml_tool.py +74 -20
  29. chuk_tool_processor/registry/__init__.py +46 -7
  30. chuk_tool_processor/registry/auto_register.py +92 -28
  31. chuk_tool_processor/registry/decorators.py +134 -11
  32. chuk_tool_processor/registry/interface.py +48 -14
  33. chuk_tool_processor/registry/metadata.py +52 -6
  34. chuk_tool_processor/registry/provider.py +75 -36
  35. chuk_tool_processor/registry/providers/__init__.py +49 -10
  36. chuk_tool_processor/registry/providers/memory.py +59 -48
  37. chuk_tool_processor/registry/tool_export.py +208 -39
  38. chuk_tool_processor/utils/validation.py +18 -13
  39. chuk_tool_processor-0.2.dist-info/METADATA +401 -0
  40. chuk_tool_processor-0.2.dist-info/RECORD +58 -0
  41. {chuk_tool_processor-0.1.6.dist-info → chuk_tool_processor-0.2.dist-info}/WHEEL +1 -1
  42. chuk_tool_processor/plugins/parsers/json_tool_plugin.py +0 -38
  43. chuk_tool_processor/plugins/parsers/openai_tool_plugin.py +0 -76
  44. chuk_tool_processor-0.1.6.dist-info/METADATA +0 -462
  45. chuk_tool_processor-0.1.6.dist-info/RECORD +0 -57
  46. {chuk_tool_processor-0.1.6.dist-info → chuk_tool_processor-0.2.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,23 @@
1
1
  # chuk_tool_processor/core/processor.py
2
+ """
3
+ Async-native core processor for tool execution.
4
+
5
+ This module provides the central ToolProcessor class which handles:
6
+ - Tool call parsing from various input formats
7
+ - Tool execution using configurable strategies
8
+ - Application of execution wrappers (caching, retries, etc.)
9
+ """
10
+ from __future__ import annotations
11
+
2
12
  import asyncio
3
13
  import time
4
14
  import json
5
15
  import hashlib
6
- from typing import Any, Dict, List, Optional, Type, Union
16
+ from typing import Any, Dict, List, Optional, Type, Union, Set
7
17
 
8
- # imports
9
18
  from chuk_tool_processor.models.tool_call import ToolCall
10
19
  from chuk_tool_processor.models.tool_result import ToolResult
11
20
  from chuk_tool_processor.registry import ToolRegistryInterface, ToolRegistryProvider
12
- from chuk_tool_processor.execution.tool_executor import ToolExecutor
13
21
  from chuk_tool_processor.execution.strategies.inprocess_strategy import InProcessStrategy
14
22
  from chuk_tool_processor.execution.wrappers.caching import CacheInterface, InMemoryCache, CachingToolExecutor
15
23
  from chuk_tool_processor.execution.wrappers.rate_limiting import RateLimiter, RateLimitedToolExecutor
@@ -21,12 +29,13 @@ from chuk_tool_processor.logging import get_logger, log_context_span, request_lo
21
29
  class ToolProcessor:
22
30
  """
23
31
  Main class for processing tool calls from LLM responses.
24
- Combines parsing, execution, and result handling.
32
+ Combines parsing, execution, and result handling with full async support.
25
33
  """
26
34
 
27
35
  def __init__(
28
36
  self,
29
37
  registry: Optional[ToolRegistryInterface] = None,
38
+ strategy = None,
30
39
  default_timeout: float = 10.0,
31
40
  max_concurrency: Optional[int] = None,
32
41
  enable_caching: bool = True,
@@ -43,6 +52,7 @@ class ToolProcessor:
43
52
 
44
53
  Args:
45
54
  registry: Tool registry to use. If None, uses the global registry.
55
+ strategy: Optional execution strategy (default: InProcessStrategy)
46
56
  default_timeout: Default timeout for tool execution in seconds.
47
57
  max_concurrency: Maximum number of concurrent tool executions.
48
58
  enable_caching: Whether to enable result caching.
@@ -56,121 +66,211 @@ class ToolProcessor:
56
66
  If None, uses all available parsers.
57
67
  """
58
68
  self.logger = get_logger("chuk_tool_processor.processor")
59
-
60
- # Use provided registry or global registry
61
- self.registry = registry or ToolRegistryProvider.get_registry()
62
-
63
- # Create base executor with in-process strategy
64
- self.strategy = InProcessStrategy(
65
- registry=self.registry,
66
- default_timeout=default_timeout,
67
- max_concurrency=max_concurrency,
68
- )
69
-
70
- self.executor = ToolExecutor(
71
- registry=self.registry,
72
- default_timeout=default_timeout,
73
- strategy=self.strategy,
74
- )
75
-
76
- # Apply optional wrappers
77
- if enable_retries:
78
- self.logger.debug("Enabling retry logic")
79
- self.executor = RetryableToolExecutor(
80
- executor=self.executor,
81
- default_config=RetryConfig(max_retries=max_retries),
82
- )
83
-
84
- if enable_rate_limiting:
85
- self.logger.debug("Enabling rate limiting")
86
- rate_limiter = RateLimiter(
87
- global_limit=global_rate_limit,
88
- tool_limits=tool_rate_limits,
89
- )
90
- self.executor = RateLimitedToolExecutor(
91
- executor=self.executor,
92
- rate_limiter=rate_limiter,
93
- )
94
-
95
- if enable_caching:
96
- self.logger.debug("Enabling result caching")
97
- cache = InMemoryCache(default_ttl=cache_ttl)
98
- self.executor = CachingToolExecutor(
99
- executor=self.executor,
100
- cache=cache,
101
- default_ttl=cache_ttl,
102
- )
103
-
104
- # Discover plugins if not already done
105
- if not plugin_registry.list_plugins().get("parser", []):
106
- discover_default_plugins()
107
-
108
- # Get parser plugins
109
- if parser_plugins:
110
- self.parsers = [
111
- plugin_registry.get_plugin("parser", name)
112
- for name in parser_plugins
113
- if plugin_registry.get_plugin("parser", name)
114
- ]
115
- else:
116
- parser_names = plugin_registry.list_plugins().get("parser", [])
117
- self.parsers = [
118
- plugin_registry.get_plugin("parser", name) for name in parser_names
119
- ]
120
-
121
- self.logger.debug(f"Initialized with {len(self.parsers)} parser plugins")
122
-
123
- async def process_text(
69
+
70
+ # Store initialization parameters for lazy initialization
71
+ self._registry = registry
72
+ self._strategy = strategy
73
+ self.default_timeout = default_timeout
74
+ self.max_concurrency = max_concurrency
75
+ self.enable_caching = enable_caching
76
+ self.cache_ttl = cache_ttl
77
+ self.enable_rate_limiting = enable_rate_limiting
78
+ self.global_rate_limit = global_rate_limit
79
+ self.tool_rate_limits = tool_rate_limits
80
+ self.enable_retries = enable_retries
81
+ self.max_retries = max_retries
82
+ self.parser_plugin_names = parser_plugins
83
+
84
+ # Placeholder for initialized components
85
+ self.registry = None
86
+ self.strategy = None
87
+ self.executor = None
88
+ self.parsers = []
89
+
90
+ # Flag for tracking initialization state
91
+ self._initialized = False
92
+ self._init_lock = asyncio.Lock()
93
+
94
+ async def initialize(self) -> None:
95
+ """
96
+ Initialize the processor asynchronously.
97
+
98
+ This method ensures all components are properly initialized before use.
99
+ It is called automatically by other methods if needed.
100
+ """
101
+ # Fast path if already initialized
102
+ if self._initialized:
103
+ return
104
+
105
+ # Ensure only one initialization happens at a time
106
+ async with self._init_lock:
107
+ # Double-check pattern after acquiring lock
108
+ if self._initialized:
109
+ return
110
+
111
+ self.logger.debug("Initializing tool processor")
112
+
113
+ # Get the registry
114
+ if self._registry is not None:
115
+ self.registry = self._registry
116
+ else:
117
+ self.registry = await ToolRegistryProvider.get_registry()
118
+
119
+ # Create execution strategy if needed
120
+ if self._strategy is not None:
121
+ self.strategy = self._strategy
122
+ else:
123
+ self.strategy = InProcessStrategy(
124
+ registry=self.registry,
125
+ default_timeout=self.default_timeout,
126
+ max_concurrency=self.max_concurrency,
127
+ )
128
+
129
+ # Set up the executor chain with optional wrappers
130
+ executor = self.strategy
131
+
132
+ # Apply wrappers in reverse order (innermost first)
133
+ if self.enable_retries:
134
+ self.logger.debug("Enabling retry logic")
135
+ executor = RetryableToolExecutor(
136
+ executor=executor,
137
+ default_config=RetryConfig(max_retries=self.max_retries),
138
+ )
139
+
140
+ if self.enable_rate_limiting:
141
+ self.logger.debug("Enabling rate limiting")
142
+ rate_limiter = RateLimiter(
143
+ global_limit=self.global_rate_limit,
144
+ tool_limits=self.tool_rate_limits,
145
+ )
146
+ executor = RateLimitedToolExecutor(
147
+ executor=executor,
148
+ limiter=rate_limiter,
149
+ )
150
+
151
+ if self.enable_caching:
152
+ self.logger.debug("Enabling result caching")
153
+ cache = InMemoryCache(default_ttl=self.cache_ttl)
154
+ executor = CachingToolExecutor(
155
+ executor=executor,
156
+ cache=cache,
157
+ default_ttl=self.cache_ttl,
158
+ )
159
+
160
+ self.executor = executor
161
+
162
+ # Initialize parser plugins
163
+ # Discover plugins if not already done
164
+ plugins = plugin_registry.list_plugins().get("parser", [])
165
+ if not plugins:
166
+ discover_default_plugins()
167
+ plugins = plugin_registry.list_plugins().get("parser", [])
168
+
169
+ # Get parser plugins
170
+ if self.parser_plugin_names:
171
+ self.parsers = [
172
+ plugin_registry.get_plugin("parser", name)
173
+ for name in self.parser_plugin_names
174
+ if plugin_registry.get_plugin("parser", name)
175
+ ]
176
+ else:
177
+ self.parsers = [
178
+ plugin_registry.get_plugin("parser", name) for name in plugins
179
+ ]
180
+
181
+ self.logger.debug(f"Initialized with {len(self.parsers)} parser plugins")
182
+ self._initialized = True
183
+
184
+ async def process(
124
185
  self,
125
- text: str,
186
+ data: Union[str, Dict[str, Any], List[Dict[str, Any]]],
126
187
  timeout: Optional[float] = None,
127
188
  use_cache: bool = True,
128
189
  request_id: Optional[str] = None,
129
190
  ) -> List[ToolResult]:
130
191
  """
131
- Process text to extract and execute tool calls.
132
-
192
+ Process tool calls from various input formats.
193
+
194
+ This method handles different input types:
195
+ - String: Parses tool calls from text using registered parsers
196
+ - Dict: Processes an OpenAI-style tool_calls object
197
+ - List[Dict]: Processes a list of individual tool calls
198
+
133
199
  Args:
134
- text: Text to process.
135
- timeout: Optional timeout for execution.
136
- use_cache: Whether to use cached results.
137
- request_id: Optional request ID for logging.
138
-
200
+ data: Input data containing tool calls
201
+ timeout: Optional timeout for execution
202
+ use_cache: Whether to use cached results
203
+ request_id: Optional request ID for logging
204
+
139
205
  Returns:
140
- List of tool results.
206
+ List of tool results
141
207
  """
208
+ # Ensure initialization
209
+ await self.initialize()
210
+
142
211
  # Create request context
143
- with request_logging(request_id) as req_id:
144
- self.logger.debug(f"Processing text ({len(text)} chars)")
145
-
146
- # Extract tool calls
147
- calls = await self._extract_tool_calls(text)
148
-
212
+ async with request_logging(request_id) as req_id:
213
+ # Handle different input types
214
+ if isinstance(data, str):
215
+ # Text processing
216
+ self.logger.debug(f"Processing text ({len(data)} chars)")
217
+ calls = await self._extract_tool_calls(data)
218
+ elif isinstance(data, dict):
219
+ # Handle OpenAI format with tool_calls array
220
+ if "tool_calls" in data and isinstance(data["tool_calls"], list):
221
+ calls = []
222
+ for tc in data["tool_calls"]:
223
+ if "function" in tc and isinstance(tc["function"], dict):
224
+ function = tc["function"]
225
+ name = function.get("name")
226
+ args_str = function.get("arguments", "{}")
227
+
228
+ # Parse arguments
229
+ try:
230
+ args = json.loads(args_str) if isinstance(args_str, str) else args_str
231
+ except json.JSONDecodeError:
232
+ args = {"raw": args_str}
233
+
234
+ if name:
235
+ calls.append(ToolCall(tool=name, arguments=args, id=tc.get("id")))
236
+ else:
237
+ # Assume it's a single tool call
238
+ calls = [ToolCall(**data)]
239
+ elif isinstance(data, list):
240
+ # List of tool calls
241
+ calls = [ToolCall(**tc) for tc in data]
242
+ else:
243
+ self.logger.warning(f"Unsupported input type: {type(data)}")
244
+ return []
245
+
149
246
  if not calls:
150
247
  self.logger.debug("No tool calls found")
151
248
  return []
152
-
249
+
153
250
  self.logger.debug(f"Found {len(calls)} tool calls")
154
-
251
+
155
252
  # Execute tool calls
156
- with log_context_span("tool_execution", {"num_calls": len(calls)}):
253
+ async with log_context_span("tool_execution", {"num_calls": len(calls)}):
157
254
  # Check if any tools are unknown
158
- tool_names = {call.tool for call in calls}
159
- unknown_tools = [name for name in tool_names if not self.registry.get_tool(name)]
160
-
255
+ unknown_tools = []
256
+ for call in calls:
257
+ tool = await self.registry.get_tool(call.tool)
258
+ if not tool:
259
+ unknown_tools.append(call.tool)
260
+
161
261
  if unknown_tools:
162
262
  self.logger.warning(f"Unknown tools: {unknown_tools}")
163
-
263
+
164
264
  # Execute tools
165
265
  results = await self.executor.execute(calls, timeout=timeout)
166
-
266
+
167
267
  # Log metrics for each tool call
168
268
  for call, result in zip(calls, results):
169
- log_tool_call(call, result)
170
-
269
+ await log_tool_call(call, result)
270
+
171
271
  # Record metrics
172
272
  duration = (result.end_time - result.start_time).total_seconds()
173
- metrics.log_tool_execution(
273
+ await metrics.log_tool_execution(
174
274
  tool=call.tool,
175
275
  success=result.error is None,
176
276
  duration=duration,
@@ -178,9 +278,64 @@ class ToolProcessor:
178
278
  cached=getattr(result, "cached", False),
179
279
  attempts=getattr(result, "attempts", 1),
180
280
  )
181
-
281
+
182
282
  return results
183
283
 
284
+ async def process_text(
285
+ self,
286
+ text: str,
287
+ timeout: Optional[float] = None,
288
+ use_cache: bool = True,
289
+ request_id: Optional[str] = None,
290
+ ) -> List[ToolResult]:
291
+ """
292
+ Process text to extract and execute tool calls.
293
+
294
+ Legacy alias for process() with string input.
295
+
296
+ Args:
297
+ text: Text to process.
298
+ timeout: Optional timeout for execution.
299
+ use_cache: Whether to use cached results.
300
+ request_id: Optional request ID for logging.
301
+
302
+ Returns:
303
+ List of tool results.
304
+ """
305
+ return await self.process(
306
+ data=text,
307
+ timeout=timeout,
308
+ use_cache=use_cache,
309
+ request_id=request_id,
310
+ )
311
+
312
+ async def execute(
313
+ self,
314
+ calls: List[ToolCall],
315
+ timeout: Optional[float] = None,
316
+ use_cache: bool = True,
317
+ ) -> List[ToolResult]:
318
+ """
319
+ Execute a list of ToolCall objects directly.
320
+
321
+ Args:
322
+ calls: List of tool calls to execute
323
+ timeout: Optional execution timeout
324
+ use_cache: Whether to use cached results
325
+
326
+ Returns:
327
+ List of tool results
328
+ """
329
+ # Ensure initialization
330
+ await self.initialize()
331
+
332
+ # Execute with the configured executor
333
+ return await self.executor.execute(
334
+ calls=calls,
335
+ timeout=timeout,
336
+ use_cache=use_cache if hasattr(self.executor, "use_cache") else True
337
+ )
338
+
184
339
  async def _extract_tool_calls(self, text: str) -> List[ToolCall]:
185
340
  """
186
341
  Extract tool calls from text using all available parsers.
@@ -194,39 +349,22 @@ class ToolProcessor:
194
349
  all_calls: List[ToolCall] = []
195
350
 
196
351
  # Try each parser
197
- with log_context_span("parsing", {"text_length": len(text)}):
352
+ async with log_context_span("parsing", {"text_length": len(text)}):
353
+ parse_tasks = []
354
+
355
+ # Create parsing tasks
198
356
  for parser in self.parsers:
199
- parser_name = parser.__class__.__name__
200
-
201
- with log_context_span(f"parser.{parser_name}", log_duration=True):
202
- start_time = time.time()
203
-
204
- try:
205
- # Try to parse
206
- calls = parser.try_parse(text)
207
-
208
- # Log success
209
- duration = time.time() - start_time
210
- metrics.log_parser_metric(
211
- parser=parser_name,
212
- success=True,
213
- duration=duration,
214
- num_calls=len(calls),
215
- )
216
-
217
- # Add calls to result
218
- all_calls.extend(calls)
219
-
220
- except Exception as e:
221
- # Log failure
222
- duration = time.time() - start_time
223
- metrics.log_parser_metric(
224
- parser=parser_name,
225
- success=False,
226
- duration=duration,
227
- num_calls=0,
228
- )
229
- self.logger.error(f"Parser {parser_name} failed: {str(e)}")
357
+ parse_tasks.append(self._try_parser(parser, text))
358
+
359
+ # Execute all parsers concurrently
360
+ parser_results = await asyncio.gather(*parse_tasks, return_exceptions=True)
361
+
362
+ # Collect successful results
363
+ for result in parser_results:
364
+ if isinstance(result, Exception):
365
+ continue
366
+ if result:
367
+ all_calls.extend(result)
230
368
 
231
369
  # ------------------------------------------------------------------ #
232
370
  # Remove duplicates – use a stable digest instead of hashing a
@@ -243,11 +381,83 @@ class ToolProcessor:
243
381
  unique_calls[key] = call
244
382
 
245
383
  return list(unique_calls.values())
384
+
385
+ async def _try_parser(self, parser, text: str) -> List[ToolCall]:
386
+ """Try a single parser with metrics and logging."""
387
+ parser_name = parser.__class__.__name__
388
+
389
+ async with log_context_span(f"parser.{parser_name}", log_duration=True):
390
+ start_time = time.time()
391
+
392
+ try:
393
+ # Try to parse
394
+ calls = await parser.try_parse(text)
395
+
396
+ # Log success
397
+ duration = time.time() - start_time
398
+ await metrics.log_parser_metric(
399
+ parser=parser_name,
400
+ success=True,
401
+ duration=duration,
402
+ num_calls=len(calls),
403
+ )
404
+
405
+ return calls
406
+
407
+ except Exception as e:
408
+ # Log failure
409
+ duration = time.time() - start_time
410
+ await metrics.log_parser_metric(
411
+ parser=parser_name,
412
+ success=False,
413
+ duration=duration,
414
+ num_calls=0,
415
+ )
416
+ self.logger.error(f"Parser {parser_name} failed: {str(e)}")
417
+ return []
246
418
 
247
419
 
248
- # Create a global processor with default settings
249
- default_processor = ToolProcessor()
250
-
420
+ # Create a global processor instance
421
+ _global_processor: Optional[ToolProcessor] = None
422
+ _processor_lock = asyncio.Lock()
423
+
424
+ async def get_default_processor() -> ToolProcessor:
425
+ """Get or initialize the default global processor."""
426
+ global _global_processor
427
+
428
+ if _global_processor is None:
429
+ async with _processor_lock:
430
+ if _global_processor is None:
431
+ _global_processor = ToolProcessor()
432
+ await _global_processor.initialize()
433
+
434
+ return _global_processor
435
+
436
+ async def process(
437
+ data: Union[str, Dict[str, Any], List[Dict[str, Any]]],
438
+ timeout: Optional[float] = None,
439
+ use_cache: bool = True,
440
+ request_id: Optional[str] = None,
441
+ ) -> List[ToolResult]:
442
+ """
443
+ Process tool calls with the default processor.
444
+
445
+ Args:
446
+ data: Input data (text, dict, or list of dicts)
447
+ timeout: Optional timeout for execution
448
+ use_cache: Whether to use cached results
449
+ request_id: Optional request ID for logging
450
+
451
+ Returns:
452
+ List of tool results
453
+ """
454
+ processor = await get_default_processor()
455
+ return await processor.process(
456
+ data=data,
457
+ timeout=timeout,
458
+ use_cache=use_cache,
459
+ request_id=request_id,
460
+ )
251
461
 
252
462
  async def process_text(
253
463
  text: str,
@@ -257,6 +467,8 @@ async def process_text(
257
467
  ) -> List[ToolResult]:
258
468
  """
259
469
  Process text with the default processor.
470
+
471
+ Legacy alias for backward compatibility.
260
472
 
261
473
  Args:
262
474
  text: Text to process.
@@ -267,7 +479,8 @@ async def process_text(
267
479
  Returns:
268
480
  List of tool results.
269
481
  """
270
- return await default_processor.process_text(
482
+ processor = await get_default_processor()
483
+ return await processor.process_text(
271
484
  text=text,
272
485
  timeout=timeout,
273
486
  use_cache=use_cache,