abstractcore 2.6.8__py3-none-any.whl → 2.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/apps/summarizer.py +69 -27
- abstractcore/architectures/detection.py +190 -25
- abstractcore/assets/architecture_formats.json +129 -6
- abstractcore/assets/model_capabilities.json +789 -136
- abstractcore/config/main.py +2 -2
- abstractcore/config/manager.py +3 -1
- abstractcore/events/__init__.py +7 -1
- abstractcore/mcp/__init__.py +30 -0
- abstractcore/mcp/client.py +213 -0
- abstractcore/mcp/factory.py +64 -0
- abstractcore/mcp/naming.py +28 -0
- abstractcore/mcp/stdio_client.py +336 -0
- abstractcore/mcp/tool_source.py +164 -0
- abstractcore/processing/basic_deepsearch.py +1 -1
- abstractcore/processing/basic_summarizer.py +300 -83
- abstractcore/providers/anthropic_provider.py +91 -10
- abstractcore/providers/base.py +537 -16
- abstractcore/providers/huggingface_provider.py +17 -8
- abstractcore/providers/lmstudio_provider.py +170 -25
- abstractcore/providers/mlx_provider.py +13 -10
- abstractcore/providers/ollama_provider.py +42 -26
- abstractcore/providers/openai_compatible_provider.py +87 -22
- abstractcore/providers/openai_provider.py +12 -9
- abstractcore/providers/streaming.py +201 -39
- abstractcore/providers/vllm_provider.py +78 -21
- abstractcore/server/app.py +65 -28
- abstractcore/structured/retry.py +20 -7
- abstractcore/tools/__init__.py +5 -4
- abstractcore/tools/abstractignore.py +166 -0
- abstractcore/tools/arg_canonicalizer.py +61 -0
- abstractcore/tools/common_tools.py +2311 -772
- abstractcore/tools/core.py +109 -13
- abstractcore/tools/handler.py +17 -3
- abstractcore/tools/parser.py +798 -155
- abstractcore/tools/registry.py +107 -2
- abstractcore/tools/syntax_rewriter.py +68 -6
- abstractcore/tools/tag_rewriter.py +186 -1
- abstractcore/utils/jsonish.py +111 -0
- abstractcore/utils/version.py +1 -1
- {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/METADATA +11 -2
- {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/RECORD +45 -36
- {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/WHEEL +0 -0
- {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/entry_points.txt +0 -0
- {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/top_level.txt +0 -0
abstractcore/tools/parser.py
CHANGED
|
@@ -7,16 +7,28 @@ responses based on their architecture.
|
|
|
7
7
|
|
|
8
8
|
import re
|
|
9
9
|
import json
|
|
10
|
+
import ast
|
|
10
11
|
from typing import List, Optional, Dict, Any
|
|
11
12
|
from enum import Enum
|
|
12
13
|
|
|
13
14
|
from .core import ToolCall, ToolDefinition
|
|
14
15
|
from ..architectures import detect_architecture, get_architecture_format
|
|
16
|
+
from ..utils.jsonish import loads_dict_like as _jsonish_loads_dict_like
|
|
15
17
|
from ..utils.structured_logging import get_logger
|
|
16
18
|
|
|
17
19
|
logger = get_logger(__name__)
|
|
18
20
|
|
|
19
21
|
|
|
22
|
+
def _loads_dict_like(raw: str) -> Optional[Dict[str, Any]]:
|
|
23
|
+
"""Parse a JSON-ish or Python-literal dict safely.
|
|
24
|
+
|
|
25
|
+
Many OSS models emit tool arguments with single quotes and Python literals
|
|
26
|
+
(True/False/None) even when asked for strict JSON. We accept both to keep
|
|
27
|
+
tool calling robust.
|
|
28
|
+
"""
|
|
29
|
+
return _jsonish_loads_dict_like(raw)
|
|
30
|
+
|
|
31
|
+
|
|
20
32
|
class ToolFormat(Enum):
|
|
21
33
|
"""Tool call formats for different architectures."""
|
|
22
34
|
|
|
@@ -41,6 +53,22 @@ def _has_json_tool_pattern(response: str) -> bool:
|
|
|
41
53
|
json_pattern = r'\{[^{}]*["\']name["\'][^{}]*(?:\{[^{}]*\}[^{}]*)*\}'
|
|
42
54
|
return bool(re.search(json_pattern, response, re.DOTALL))
|
|
43
55
|
|
|
56
|
+
def _has_bracket_tool_prefix(response: str) -> bool:
|
|
57
|
+
"""Check if response contains a `tool: [name]: {...}` style tool call prefix."""
|
|
58
|
+
if not response:
|
|
59
|
+
return False
|
|
60
|
+
return bool(re.search(r'(?im)^\s*tool\s*:\s*\[[^\]]+\]\s*:\s*\{', response))
|
|
61
|
+
|
|
62
|
+
def _has_harmony_tool_prefix(response: str) -> bool:
|
|
63
|
+
"""Check if response contains a Harmony/ChatML-style tool call marker.
|
|
64
|
+
|
|
65
|
+
Example emitted by some models:
|
|
66
|
+
<|channel|>commentary to=list_files <|constrain|>json<|message|>{"directory_path": "..."}
|
|
67
|
+
"""
|
|
68
|
+
if not response:
|
|
69
|
+
return False
|
|
70
|
+
return "<|channel|>" in response and "<|message|>" in response and "to=" in response
|
|
71
|
+
|
|
44
72
|
|
|
45
73
|
def detect_tool_calls(response: str, model_name: Optional[str] = None) -> bool:
|
|
46
74
|
"""
|
|
@@ -59,6 +87,12 @@ def detect_tool_calls(response: str, model_name: Optional[str] = None) -> bool:
|
|
|
59
87
|
# Get expected format from architecture
|
|
60
88
|
tool_format = _get_tool_format(model_name)
|
|
61
89
|
|
|
90
|
+
# Some models emit a CLI-like prefix format regardless of architecture.
|
|
91
|
+
if _has_bracket_tool_prefix(response):
|
|
92
|
+
return True
|
|
93
|
+
if _has_harmony_tool_prefix(response):
|
|
94
|
+
return True
|
|
95
|
+
|
|
62
96
|
# Check format-specific patterns (case-insensitive)
|
|
63
97
|
response_lower = response.lower()
|
|
64
98
|
if tool_format == ToolFormat.TOOL_CODE:
|
|
@@ -77,6 +111,8 @@ def detect_tool_calls(response: str, model_name: Optional[str] = None) -> bool:
|
|
|
77
111
|
"<|tool_call|>" in response_lower,
|
|
78
112
|
"<function_call" in response_lower,
|
|
79
113
|
"<tool_call>" in response_lower,
|
|
114
|
+
_has_bracket_tool_prefix(response),
|
|
115
|
+
_has_harmony_tool_prefix(response),
|
|
80
116
|
_has_json_tool_pattern(response),
|
|
81
117
|
])
|
|
82
118
|
|
|
@@ -113,16 +149,34 @@ def parse_tool_calls(response: str, model_name: Optional[str] = None) -> List[To
|
|
|
113
149
|
}
|
|
114
150
|
|
|
115
151
|
parser = parsers.get(tool_format, _parse_any_format)
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
152
|
+
calls = parser(response)
|
|
153
|
+
# Fallback: some models emit tool syntax that doesn't match their expected architecture format
|
|
154
|
+
# (e.g., `tool: [name]: {...}` or partial tags). Try the generic parser when needed.
|
|
155
|
+
if not calls and parser is not _parse_any_format:
|
|
156
|
+
calls = _parse_any_format(response)
|
|
157
|
+
if calls:
|
|
158
|
+
from .arg_canonicalizer import canonicalize_tool_arguments
|
|
159
|
+
|
|
160
|
+
for call in calls:
|
|
161
|
+
call.arguments = canonicalize_tool_arguments(call.name, call.arguments)
|
|
162
|
+
return calls
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def format_tool_prompt(
|
|
166
|
+
tools: List[ToolDefinition],
|
|
167
|
+
model_name: Optional[str] = None,
|
|
168
|
+
*,
|
|
169
|
+
include_tool_list: bool = True,
|
|
170
|
+
include_examples: bool = True,
|
|
171
|
+
) -> str:
|
|
120
172
|
"""
|
|
121
173
|
Format tools into a system prompt based on model architecture.
|
|
122
174
|
|
|
123
175
|
Args:
|
|
124
176
|
tools: List of tool definitions
|
|
125
177
|
model_name: Optional model name for architecture detection
|
|
178
|
+
include_tool_list: If False, omit per-tool listings (only include tool-call protocol/rules)
|
|
179
|
+
include_examples: If False, omit examples even if tools provide them
|
|
126
180
|
|
|
127
181
|
Returns:
|
|
128
182
|
Formatted system prompt
|
|
@@ -135,15 +189,17 @@ def format_tool_prompt(tools: List[ToolDefinition], model_name: Optional[str] =
|
|
|
135
189
|
|
|
136
190
|
# Format based on architecture
|
|
137
191
|
if tool_format == ToolFormat.TOOL_CODE:
|
|
138
|
-
return _format_gemma_style(tools)
|
|
192
|
+
return _format_gemma_style(tools, include_tool_list=include_tool_list, include_examples=include_examples)
|
|
139
193
|
elif tool_format == ToolFormat.SPECIAL_TOKEN:
|
|
140
|
-
return _format_qwen_style(tools)
|
|
194
|
+
return _format_qwen_style(tools, include_tool_list=include_tool_list, include_examples=include_examples)
|
|
141
195
|
elif tool_format == ToolFormat.FUNCTION_CALL:
|
|
142
|
-
return _format_llama_style(tools)
|
|
196
|
+
return _format_llama_style(tools, include_tool_list=include_tool_list, include_examples=include_examples)
|
|
143
197
|
elif tool_format == ToolFormat.XML_WRAPPED:
|
|
144
|
-
return _format_xml_style(tools)
|
|
198
|
+
return _format_xml_style(tools, include_tool_list=include_tool_list, include_examples=include_examples)
|
|
199
|
+
elif tool_format == ToolFormat.RAW_JSON:
|
|
200
|
+
return _format_json_style(tools, include_tool_list=include_tool_list, include_examples=include_examples)
|
|
145
201
|
else:
|
|
146
|
-
return _format_generic_style(tools)
|
|
202
|
+
return _format_generic_style(tools, include_tool_list=include_tool_list, include_examples=include_examples)
|
|
147
203
|
|
|
148
204
|
|
|
149
205
|
# Internal helpers
|
|
@@ -210,19 +266,37 @@ def _get_tool_format(model_name: Optional[str]) -> ToolFormat:
|
|
|
210
266
|
architecture = detect_architecture(model_name)
|
|
211
267
|
arch_format = get_architecture_format(architecture)
|
|
212
268
|
|
|
213
|
-
tool_format = arch_format.get("tool_format", "json")
|
|
269
|
+
tool_format = str(arch_format.get("tool_format", "json") or "").strip().lower()
|
|
270
|
+
message_format = str(arch_format.get("message_format", "") or "").strip().lower()
|
|
214
271
|
|
|
272
|
+
# tool_format values are defined in `abstractcore/assets/architecture_formats.json`.
|
|
273
|
+
# We interpret them as the model's *preferred tool-call syntax* and fall back to
|
|
274
|
+
# `_parse_any_format` when the model emits a different convention.
|
|
215
275
|
if tool_format == "special_token":
|
|
216
276
|
return ToolFormat.SPECIAL_TOKEN
|
|
217
|
-
|
|
277
|
+
if tool_format == "xml":
|
|
218
278
|
return ToolFormat.XML_WRAPPED
|
|
219
|
-
|
|
279
|
+
if tool_format == "pythonic":
|
|
220
280
|
return ToolFormat.TOOL_CODE
|
|
221
|
-
|
|
281
|
+
if tool_format == "json":
|
|
282
|
+
return ToolFormat.RAW_JSON
|
|
283
|
+
if tool_format in {"openai_functions", "native", "none"}:
|
|
284
|
+
# Native/OpenAI-functions tool calls are expected in structured response fields, not text.
|
|
285
|
+
# If tool syntax leaks into content, we parse with the generic fallback.
|
|
222
286
|
return ToolFormat.NATIVE
|
|
223
|
-
|
|
287
|
+
|
|
288
|
+
if tool_format == "prompted":
|
|
289
|
+
# "prompted" indicates the model relies on prompt-injected tool syntax.
|
|
290
|
+
# Choose the most likely format based on the architecture's message format.
|
|
291
|
+
# - Qwen/ChatML-like formats generally use <|tool_call|> special tokens.
|
|
292
|
+
if message_format == "im_start_end":
|
|
293
|
+
return ToolFormat.SPECIAL_TOKEN
|
|
294
|
+
# - LLaMA-style prompted tools commonly use <function_call>...</function_call>.
|
|
224
295
|
return ToolFormat.FUNCTION_CALL
|
|
225
296
|
|
|
297
|
+
# Conservative fallback: function-call wrapper (and then _parse_any_format fallback).
|
|
298
|
+
return ToolFormat.FUNCTION_CALL
|
|
299
|
+
|
|
226
300
|
|
|
227
301
|
|
|
228
302
|
|
|
@@ -380,7 +454,9 @@ def _parse_function_call(response: str) -> List[ToolCall]:
|
|
|
380
454
|
for match in re.finditer(pattern, response, re.DOTALL):
|
|
381
455
|
try:
|
|
382
456
|
json_str = match.group(1)
|
|
383
|
-
tool_data =
|
|
457
|
+
tool_data = _loads_dict_like(json_str)
|
|
458
|
+
if not isinstance(tool_data, dict):
|
|
459
|
+
continue
|
|
384
460
|
|
|
385
461
|
tool_call = ToolCall(
|
|
386
462
|
name=tool_data.get("name", ""),
|
|
@@ -399,23 +475,73 @@ def _parse_xml_wrapped(response: str) -> List[ToolCall]:
|
|
|
399
475
|
"""Parse XML-wrapped tool calls."""
|
|
400
476
|
tool_calls = []
|
|
401
477
|
|
|
402
|
-
# Pattern for XML format
|
|
403
|
-
|
|
478
|
+
# Pattern for XML format.
|
|
479
|
+
#
|
|
480
|
+
# Supported inner payloads:
|
|
481
|
+
# 1) JSON-ish dict (our canonical prompted-tool wrapper):
|
|
482
|
+
# <tool_call>{"name":"read_file","arguments":{...}}</tool_call>
|
|
483
|
+
# 2) Nemotron XML-ish wrapper (observed in the wild):
|
|
484
|
+
# <tool_call>
|
|
485
|
+
# <function=write_file>
|
|
486
|
+
# <parameter=file_path>...</parameter>
|
|
487
|
+
# <parameter=content>...</parameter>
|
|
488
|
+
# </function>
|
|
489
|
+
# </tool_call>
|
|
490
|
+
pattern = r'<tool_call>\s*(.*?)\s*</tool_call>'
|
|
491
|
+
|
|
492
|
+
for match in re.finditer(pattern, response, re.DOTALL | re.IGNORECASE):
|
|
493
|
+
body = match.group(1)
|
|
494
|
+
if not isinstance(body, str):
|
|
495
|
+
continue
|
|
404
496
|
|
|
405
|
-
|
|
406
|
-
try:
|
|
407
|
-
json_str = match.group(1)
|
|
408
|
-
tool_data = json.loads(json_str)
|
|
497
|
+
body_stripped = body.strip()
|
|
409
498
|
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
499
|
+
# Case 1: JSON-ish dict inside <tool_call>...</tool_call>
|
|
500
|
+
if body_stripped.startswith("{") and body_stripped.endswith("}"):
|
|
501
|
+
try:
|
|
502
|
+
tool_data = _loads_dict_like(body_stripped)
|
|
503
|
+
if not isinstance(tool_data, dict):
|
|
504
|
+
continue
|
|
416
505
|
|
|
417
|
-
|
|
418
|
-
|
|
506
|
+
tool_calls.append(ToolCall(
|
|
507
|
+
name=tool_data.get("name", ""),
|
|
508
|
+
arguments=tool_data.get("arguments", {}),
|
|
509
|
+
call_id=tool_data.get("id")
|
|
510
|
+
))
|
|
511
|
+
continue
|
|
512
|
+
except json.JSONDecodeError as e:
|
|
513
|
+
logger.warning(f"Failed to parse XML tool call JSON: {body_stripped} - {e}")
|
|
514
|
+
continue
|
|
515
|
+
|
|
516
|
+
# Case 2: Nemotron XML-ish function/parameter encoding
|
|
517
|
+
func_match = re.search(r'<function\s*=\s*([a-zA-Z0-9_-]+)\s*>', body, re.IGNORECASE)
|
|
518
|
+
if not func_match:
|
|
519
|
+
continue
|
|
520
|
+
func_name = func_match.group(1).strip()
|
|
521
|
+
if not func_name:
|
|
522
|
+
continue
|
|
523
|
+
|
|
524
|
+
arguments: Dict[str, Any] = {}
|
|
525
|
+
for param_match in re.finditer(
|
|
526
|
+
r'<parameter\s*=\s*([a-zA-Z0-9_-]+)\s*>(.*?)</parameter>',
|
|
527
|
+
body,
|
|
528
|
+
re.DOTALL | re.IGNORECASE,
|
|
529
|
+
):
|
|
530
|
+
key = (param_match.group(1) or "").strip()
|
|
531
|
+
raw_value = param_match.group(2) or ""
|
|
532
|
+
if not key:
|
|
533
|
+
continue
|
|
534
|
+
|
|
535
|
+
# Preserve content as-is, but strip the common leading/trailing newline artifacts
|
|
536
|
+
# introduced by pretty-printed tag blocks.
|
|
537
|
+
value = raw_value.replace("\r\n", "\n")
|
|
538
|
+
if value.startswith("\n"):
|
|
539
|
+
value = value[1:]
|
|
540
|
+
if value.endswith("\n"):
|
|
541
|
+
value = value[:-1]
|
|
542
|
+
arguments[key] = value
|
|
543
|
+
|
|
544
|
+
tool_calls.append(ToolCall(name=func_name, arguments=arguments, call_id=None))
|
|
419
545
|
|
|
420
546
|
return tool_calls
|
|
421
547
|
|
|
@@ -432,7 +558,9 @@ def _parse_tool_code(response: str) -> List[ToolCall]:
|
|
|
432
558
|
|
|
433
559
|
# Try to parse as JSON first
|
|
434
560
|
try:
|
|
435
|
-
tool_data =
|
|
561
|
+
tool_data = _loads_dict_like(code_content)
|
|
562
|
+
if not isinstance(tool_data, dict):
|
|
563
|
+
raise json.JSONDecodeError("not a dict", code_content, 0)
|
|
436
564
|
tool_call = ToolCall(
|
|
437
565
|
name=tool_data.get("name", ""),
|
|
438
566
|
arguments=tool_data.get("arguments", {}),
|
|
@@ -449,14 +577,31 @@ def _parse_tool_code(response: str) -> List[ToolCall]:
|
|
|
449
577
|
func_name = func_match.group(1)
|
|
450
578
|
args_str = func_match.group(2)
|
|
451
579
|
|
|
452
|
-
# Simple argument parsing
|
|
580
|
+
# Simple, safe argument parsing for common keyword args.
|
|
453
581
|
arguments = {}
|
|
454
582
|
if args_str.strip():
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
583
|
+
arg_pattern = r'(\w+)\s*=\s*(".*?"|\'.*?\'|[^,\)]+)'
|
|
584
|
+
for arg_match in re.finditer(arg_pattern, args_str):
|
|
585
|
+
key = arg_match.group(1)
|
|
586
|
+
raw_value = arg_match.group(2).strip()
|
|
587
|
+
value: Any = raw_value
|
|
588
|
+
if (raw_value.startswith('"') and raw_value.endswith('"')) or (
|
|
589
|
+
raw_value.startswith("'") and raw_value.endswith("'")
|
|
590
|
+
):
|
|
591
|
+
value = raw_value[1:-1]
|
|
592
|
+
elif raw_value.lower() in ("true", "false"):
|
|
593
|
+
value = raw_value.lower() == "true"
|
|
594
|
+
elif raw_value.lower() in ("none", "null"):
|
|
595
|
+
value = None
|
|
596
|
+
else:
|
|
597
|
+
try:
|
|
598
|
+
value = int(raw_value)
|
|
599
|
+
except Exception:
|
|
600
|
+
try:
|
|
601
|
+
value = float(raw_value)
|
|
602
|
+
except Exception:
|
|
603
|
+
value = raw_value
|
|
604
|
+
arguments[str(key)] = value
|
|
460
605
|
|
|
461
606
|
tool_call = ToolCall(
|
|
462
607
|
name=func_name,
|
|
@@ -477,7 +622,9 @@ def _parse_raw_json(response: str) -> List[ToolCall]:
|
|
|
477
622
|
for match in re.finditer(json_pattern, response):
|
|
478
623
|
try:
|
|
479
624
|
json_str = match.group(0)
|
|
480
|
-
tool_data =
|
|
625
|
+
tool_data = _loads_dict_like(json_str)
|
|
626
|
+
if not isinstance(tool_data, dict):
|
|
627
|
+
continue
|
|
481
628
|
|
|
482
629
|
if "name" in tool_data:
|
|
483
630
|
tool_call = ToolCall(
|
|
@@ -495,7 +642,9 @@ def _parse_raw_json(response: str) -> List[ToolCall]:
|
|
|
495
642
|
for match in re.finditer(code_block_pattern, response, re.DOTALL):
|
|
496
643
|
try:
|
|
497
644
|
json_str = match.group(1).strip()
|
|
498
|
-
tool_data =
|
|
645
|
+
tool_data = _loads_dict_like(json_str)
|
|
646
|
+
if not isinstance(tool_data, dict):
|
|
647
|
+
continue
|
|
499
648
|
|
|
500
649
|
if "name" in tool_data:
|
|
501
650
|
tool_call = ToolCall(
|
|
@@ -511,6 +660,240 @@ def _parse_raw_json(response: str) -> List[ToolCall]:
|
|
|
511
660
|
return tool_calls
|
|
512
661
|
|
|
513
662
|
|
|
663
|
+
def _parse_bracket_tool_prefix(response: str) -> List[ToolCall]:
|
|
664
|
+
"""Parse `tool: [name]: { ... }` format (arguments-only JSON)."""
|
|
665
|
+
tool_calls: List[ToolCall] = []
|
|
666
|
+
if not response:
|
|
667
|
+
return tool_calls
|
|
668
|
+
|
|
669
|
+
def _find_matching_brace(text: str, start: int) -> int:
|
|
670
|
+
"""Return index of the matching '}' for a '{' at `start`, or -1."""
|
|
671
|
+
depth = 0
|
|
672
|
+
in_string = False
|
|
673
|
+
quote = ""
|
|
674
|
+
escaped = False
|
|
675
|
+
|
|
676
|
+
for i in range(start, len(text)):
|
|
677
|
+
ch = text[i]
|
|
678
|
+
|
|
679
|
+
if in_string:
|
|
680
|
+
if escaped:
|
|
681
|
+
escaped = False
|
|
682
|
+
continue
|
|
683
|
+
if ch == "\\":
|
|
684
|
+
escaped = True
|
|
685
|
+
continue
|
|
686
|
+
if ch == quote:
|
|
687
|
+
in_string = False
|
|
688
|
+
quote = ""
|
|
689
|
+
continue
|
|
690
|
+
|
|
691
|
+
if ch in ("'", '"'):
|
|
692
|
+
in_string = True
|
|
693
|
+
quote = ch
|
|
694
|
+
continue
|
|
695
|
+
|
|
696
|
+
if ch == "{":
|
|
697
|
+
depth += 1
|
|
698
|
+
continue
|
|
699
|
+
if ch == "}":
|
|
700
|
+
depth -= 1
|
|
701
|
+
if depth == 0:
|
|
702
|
+
return i
|
|
703
|
+
|
|
704
|
+
return -1
|
|
705
|
+
|
|
706
|
+
# Common in some OSS model tool conventions.
|
|
707
|
+
# Example (single-line):
|
|
708
|
+
# tool: [list_files]: {"directory_path":"rtype","recursive":true}
|
|
709
|
+
# Example (multi-line):
|
|
710
|
+
# tool: [list_files]: {
|
|
711
|
+
# "directory_path": "rtype",
|
|
712
|
+
# "recursive": true
|
|
713
|
+
# }
|
|
714
|
+
header_re = re.compile(r"(?im)^\s*tool\s*:\s*\[([a-zA-Z0-9_\-]+)\]\s*:\s*")
|
|
715
|
+
for match in header_re.finditer(response):
|
|
716
|
+
name = str(match.group(1) or "").strip()
|
|
717
|
+
if not name:
|
|
718
|
+
continue
|
|
719
|
+
|
|
720
|
+
# Find the first opening brace after the header (allow whitespace/newlines).
|
|
721
|
+
brace_start = response.find("{", match.end())
|
|
722
|
+
if brace_start == -1:
|
|
723
|
+
continue
|
|
724
|
+
|
|
725
|
+
# Only allow whitespace between header end and '{' (avoid grabbing unrelated JSON).
|
|
726
|
+
between = response[match.end() : brace_start]
|
|
727
|
+
if between and any(not c.isspace() for c in between):
|
|
728
|
+
continue
|
|
729
|
+
|
|
730
|
+
brace_end = _find_matching_brace(response, brace_start)
|
|
731
|
+
if brace_end == -1:
|
|
732
|
+
continue
|
|
733
|
+
|
|
734
|
+
raw_args = response[brace_start : brace_end + 1]
|
|
735
|
+
args = _loads_dict_like(raw_args)
|
|
736
|
+
if not isinstance(args, dict):
|
|
737
|
+
continue
|
|
738
|
+
|
|
739
|
+
tool_calls.append(ToolCall(name=name, arguments=args))
|
|
740
|
+
|
|
741
|
+
return tool_calls
|
|
742
|
+
|
|
743
|
+
|
|
744
|
+
def _parse_harmony_tool_prefix(response: str) -> List[ToolCall]:
|
|
745
|
+
"""Parse Harmony/ChatML-style tool calls embedded in content.
|
|
746
|
+
|
|
747
|
+
Example:
|
|
748
|
+
<|channel|>commentary to=list_files <|constrain|>json<|message|>{"directory_path":"./x","recursive":true}
|
|
749
|
+
"""
|
|
750
|
+
tool_calls: List[ToolCall] = []
|
|
751
|
+
if not response:
|
|
752
|
+
return tool_calls
|
|
753
|
+
|
|
754
|
+
if "<|channel|>" not in response or "<|message|>" not in response or "to=" not in response:
|
|
755
|
+
return tool_calls
|
|
756
|
+
|
|
757
|
+
def _find_matching_brace(text: str, start: int) -> int:
|
|
758
|
+
"""Return index of the matching '}' for a '{' at `start`, or -1."""
|
|
759
|
+
depth = 0
|
|
760
|
+
in_string = False
|
|
761
|
+
quote = ""
|
|
762
|
+
escaped = False
|
|
763
|
+
|
|
764
|
+
for i in range(start, len(text)):
|
|
765
|
+
ch = text[i]
|
|
766
|
+
|
|
767
|
+
if in_string:
|
|
768
|
+
if escaped:
|
|
769
|
+
escaped = False
|
|
770
|
+
continue
|
|
771
|
+
if ch == "\\":
|
|
772
|
+
escaped = True
|
|
773
|
+
continue
|
|
774
|
+
if ch == quote:
|
|
775
|
+
in_string = False
|
|
776
|
+
quote = ""
|
|
777
|
+
continue
|
|
778
|
+
|
|
779
|
+
if ch in ("'", '"'):
|
|
780
|
+
in_string = True
|
|
781
|
+
quote = ch
|
|
782
|
+
continue
|
|
783
|
+
|
|
784
|
+
if ch == "{":
|
|
785
|
+
depth += 1
|
|
786
|
+
continue
|
|
787
|
+
if ch == "}":
|
|
788
|
+
depth -= 1
|
|
789
|
+
if depth == 0:
|
|
790
|
+
return i
|
|
791
|
+
|
|
792
|
+
return -1
|
|
793
|
+
|
|
794
|
+
# Match "<|channel|>... to=TOOL_NAME" and then find the following <|message|>{...}.
|
|
795
|
+
header_re = re.compile(
|
|
796
|
+
r"(?i)<\|channel\|>\s*[a-zA-Z0-9_\-]+\s+to=([a-zA-Z0-9_\-\.]+)\b"
|
|
797
|
+
)
|
|
798
|
+
for match in header_re.finditer(response):
|
|
799
|
+
raw_name = str(match.group(1) or "").strip()
|
|
800
|
+
if not raw_name:
|
|
801
|
+
continue
|
|
802
|
+
|
|
803
|
+
# Normalize common prefixes used by some tool-call transcripts.
|
|
804
|
+
name = raw_name
|
|
805
|
+
if name.startswith("functions."):
|
|
806
|
+
name = name.split(".", 1)[1].strip()
|
|
807
|
+
if not name:
|
|
808
|
+
continue
|
|
809
|
+
|
|
810
|
+
# Find the next "<|message|>" after the header.
|
|
811
|
+
msg_tag = "<|message|>"
|
|
812
|
+
msg_start = response.find(msg_tag, match.end())
|
|
813
|
+
if msg_start == -1:
|
|
814
|
+
continue
|
|
815
|
+
|
|
816
|
+
brace_start = response.find("{", msg_start + len(msg_tag))
|
|
817
|
+
if brace_start == -1:
|
|
818
|
+
continue
|
|
819
|
+
|
|
820
|
+
# Only allow whitespace between the message tag and '{'.
|
|
821
|
+
between = response[msg_start + len(msg_tag) : brace_start]
|
|
822
|
+
if between and any(not c.isspace() for c in between):
|
|
823
|
+
continue
|
|
824
|
+
|
|
825
|
+
brace_end = _find_matching_brace(response, brace_start)
|
|
826
|
+
if brace_end == -1:
|
|
827
|
+
# Some models occasionally omit the final closing brace(s) when emitting a
|
|
828
|
+
# Harmony tool transcript. Try a best-effort recovery by balancing braces
|
|
829
|
+
# to the end of the message and parsing the result.
|
|
830
|
+
raw_args = response[brace_start:].strip()
|
|
831
|
+
|
|
832
|
+
def _balance_braces(text: str) -> str:
|
|
833
|
+
depth = 0
|
|
834
|
+
in_string = False
|
|
835
|
+
quote = ""
|
|
836
|
+
escaped = False
|
|
837
|
+
for ch in text:
|
|
838
|
+
if in_string:
|
|
839
|
+
if escaped:
|
|
840
|
+
escaped = False
|
|
841
|
+
continue
|
|
842
|
+
if ch == "\\":
|
|
843
|
+
escaped = True
|
|
844
|
+
continue
|
|
845
|
+
if ch == quote:
|
|
846
|
+
in_string = False
|
|
847
|
+
quote = ""
|
|
848
|
+
continue
|
|
849
|
+
if ch in ("'", '"'):
|
|
850
|
+
in_string = True
|
|
851
|
+
quote = ch
|
|
852
|
+
continue
|
|
853
|
+
if ch == "{":
|
|
854
|
+
depth += 1
|
|
855
|
+
continue
|
|
856
|
+
if ch == "}":
|
|
857
|
+
depth -= 1
|
|
858
|
+
continue
|
|
859
|
+
if depth > 0:
|
|
860
|
+
return text + ("}" * depth)
|
|
861
|
+
return text
|
|
862
|
+
|
|
863
|
+
raw_args = _balance_braces(raw_args)
|
|
864
|
+
else:
|
|
865
|
+
raw_args = response[brace_start : brace_end + 1]
|
|
866
|
+
payload = _loads_dict_like(raw_args)
|
|
867
|
+
if not isinstance(payload, dict):
|
|
868
|
+
continue
|
|
869
|
+
|
|
870
|
+
# Some models (notably OpenAI's gpt-oss via LM Studio) emit a wrapper payload:
|
|
871
|
+
# {"name":"tool_name","arguments":{...},"call_id": "..."}
|
|
872
|
+
# In that case, unwrap `arguments` so runtime tool execution receives only
|
|
873
|
+
# the tool kwargs (and not unexpected keys like "name").
|
|
874
|
+
call_id = None
|
|
875
|
+
args: Any = payload
|
|
876
|
+
if "arguments" in payload:
|
|
877
|
+
inner_args = payload.get("arguments")
|
|
878
|
+
if isinstance(inner_args, dict):
|
|
879
|
+
args = inner_args
|
|
880
|
+
elif isinstance(inner_args, str):
|
|
881
|
+
parsed = _loads_dict_like(inner_args)
|
|
882
|
+
if isinstance(parsed, dict):
|
|
883
|
+
args = parsed
|
|
884
|
+
|
|
885
|
+
call_id_value = payload.get("call_id") or payload.get("id")
|
|
886
|
+
if isinstance(call_id_value, str) and call_id_value.strip():
|
|
887
|
+
call_id = call_id_value.strip()
|
|
888
|
+
|
|
889
|
+
if not isinstance(args, dict):
|
|
890
|
+
continue
|
|
891
|
+
|
|
892
|
+
tool_calls.append(ToolCall(name=name, arguments=args, call_id=call_id))
|
|
893
|
+
|
|
894
|
+
return tool_calls
|
|
895
|
+
|
|
896
|
+
|
|
514
897
|
def _parse_any_format(response: str) -> List[ToolCall]:
|
|
515
898
|
"""Try all parsing formats with comprehensive fallbacks."""
|
|
516
899
|
# SANITIZE FIRST: Fix malformed tags before trying any parser
|
|
@@ -524,6 +907,8 @@ def _parse_any_format(response: str) -> List[ToolCall]:
|
|
|
524
907
|
_parse_function_call,
|
|
525
908
|
_parse_xml_wrapped,
|
|
526
909
|
_parse_tool_code,
|
|
910
|
+
_parse_harmony_tool_prefix,
|
|
911
|
+
_parse_bracket_tool_prefix,
|
|
527
912
|
_parse_raw_json
|
|
528
913
|
]
|
|
529
914
|
|
|
@@ -542,7 +927,11 @@ def _parse_any_format(response: str) -> List[ToolCall]:
|
|
|
542
927
|
unique_calls = []
|
|
543
928
|
seen = set()
|
|
544
929
|
for call in tool_calls:
|
|
545
|
-
|
|
930
|
+
try:
|
|
931
|
+
args_key = json.dumps(call.arguments, sort_keys=True, ensure_ascii=False)
|
|
932
|
+
except Exception:
|
|
933
|
+
args_key = str(call.arguments)
|
|
934
|
+
call_key = (call.name, args_key)
|
|
546
935
|
if call_key not in seen:
|
|
547
936
|
seen.add(call_key)
|
|
548
937
|
unique_calls.append(call)
|
|
@@ -592,181 +981,269 @@ def _parse_python_code_blocks(response: str) -> List[ToolCall]:
|
|
|
592
981
|
|
|
593
982
|
# Formatting functions
|
|
594
983
|
|
|
595
|
-
def
|
|
984
|
+
def _format_parameters_compact(parameters: Dict[str, Any]) -> str:
|
|
985
|
+
"""Render a compact, human/LLM-friendly parameter summary.
|
|
986
|
+
|
|
987
|
+
We intentionally avoid dumping full JSON schema here to keep the tool prompt small.
|
|
988
|
+
"""
|
|
989
|
+
if not isinstance(parameters, dict) or not parameters:
|
|
990
|
+
return "(none)"
|
|
991
|
+
|
|
992
|
+
def _fmt_default(value: Any) -> str:
|
|
993
|
+
try:
|
|
994
|
+
return json.dumps(value, ensure_ascii=False)
|
|
995
|
+
except Exception:
|
|
996
|
+
return str(value)
|
|
997
|
+
|
|
998
|
+
parts: List[str] = []
|
|
999
|
+
for name in sorted([k for k in parameters.keys() if isinstance(k, str)]):
|
|
1000
|
+
meta = parameters.get(name)
|
|
1001
|
+
ptype = "any"
|
|
1002
|
+
required = True
|
|
1003
|
+
default_repr: Optional[str] = None
|
|
1004
|
+
|
|
1005
|
+
if isinstance(meta, dict):
|
|
1006
|
+
if isinstance(meta.get("type"), str) and meta.get("type"):
|
|
1007
|
+
ptype = str(meta.get("type"))
|
|
1008
|
+
required = "default" not in meta
|
|
1009
|
+
if not required:
|
|
1010
|
+
default_value = meta.get("default")
|
|
1011
|
+
# Avoid printing `default null` / `default None` in prompts; treat that as optional.
|
|
1012
|
+
if default_value is not None:
|
|
1013
|
+
default_repr = _fmt_default(default_value)
|
|
1014
|
+
else:
|
|
1015
|
+
required = True
|
|
1016
|
+
|
|
1017
|
+
if required:
|
|
1018
|
+
parts.append(f"{name}: {ptype} (required)")
|
|
1019
|
+
elif default_repr is not None:
|
|
1020
|
+
parts.append(f"{name}: {ptype} (default {default_repr})")
|
|
1021
|
+
else:
|
|
1022
|
+
parts.append(f"{name}: {ptype} (optional)")
|
|
1023
|
+
|
|
1024
|
+
return ", ".join(parts) if parts else "(none)"
|
|
1025
|
+
|
|
1026
|
+
|
|
1027
|
+
def _append_tool_examples(
|
|
1028
|
+
prompt: str,
|
|
1029
|
+
tools: List[ToolDefinition],
|
|
1030
|
+
*,
|
|
1031
|
+
tool_format: ToolFormat,
|
|
1032
|
+
max_examples_total: int = 6,
|
|
1033
|
+
) -> str:
|
|
1034
|
+
"""Append a small, globally-capped examples section.
|
|
1035
|
+
|
|
1036
|
+
Notes:
|
|
1037
|
+
- Examples are useful, but they are extremely token-expensive when included per-tool.
|
|
1038
|
+
- We cap examples globally and prioritize the "core editing loop" tools first.
|
|
1039
|
+
"""
|
|
1040
|
+
if max_examples_total <= 0:
|
|
1041
|
+
return prompt
|
|
1042
|
+
|
|
1043
|
+
tools_with_examples = [t for t in tools if getattr(t, "examples", None)]
|
|
1044
|
+
if not tools_with_examples:
|
|
1045
|
+
return prompt
|
|
1046
|
+
|
|
1047
|
+
by_name = {t.name: t for t in tools_with_examples if isinstance(t.name, str) and t.name}
|
|
1048
|
+
preferred_order = [
|
|
1049
|
+
"list_files",
|
|
1050
|
+
"search_files",
|
|
1051
|
+
"read_file",
|
|
1052
|
+
"edit_file",
|
|
1053
|
+
"write_file",
|
|
1054
|
+
"execute_command",
|
|
1055
|
+
"fetch_url",
|
|
1056
|
+
"web_search",
|
|
1057
|
+
]
|
|
1058
|
+
|
|
1059
|
+
ordered_names = []
|
|
1060
|
+
seen: set[str] = set()
|
|
1061
|
+
for name in preferred_order:
|
|
1062
|
+
if name in by_name and name not in seen:
|
|
1063
|
+
ordered_names.append(name)
|
|
1064
|
+
seen.add(name)
|
|
1065
|
+
for name in sorted(by_name.keys()):
|
|
1066
|
+
if name not in seen:
|
|
1067
|
+
ordered_names.append(name)
|
|
1068
|
+
|
|
1069
|
+
out = prompt + "**EXAMPLES:**\n\n"
|
|
1070
|
+
added = 0
|
|
1071
|
+
for name in ordered_names:
|
|
1072
|
+
tool = by_name.get(name)
|
|
1073
|
+
if tool is None:
|
|
1074
|
+
continue
|
|
1075
|
+
examples = getattr(tool, "examples", None)
|
|
1076
|
+
if not isinstance(examples, list) or not examples:
|
|
1077
|
+
continue
|
|
1078
|
+
example = examples[0] if isinstance(examples[0], dict) else {}
|
|
1079
|
+
desc = str(example.get("description") or "Example").strip()
|
|
1080
|
+
args = example.get("arguments")
|
|
1081
|
+
args_dict = dict(args) if isinstance(args, dict) else {}
|
|
1082
|
+
|
|
1083
|
+
out += f"- {tool.name}: {desc}\n"
|
|
1084
|
+
out += _format_tool_call_example(tool.name, args_dict, tool_format) + "\n\n"
|
|
1085
|
+
added += 1
|
|
1086
|
+
if added >= max_examples_total:
|
|
1087
|
+
break
|
|
1088
|
+
|
|
1089
|
+
return out
|
|
1090
|
+
|
|
1091
|
+
|
|
1092
|
+
def _format_qwen_style(tools: List[ToolDefinition], *, include_tool_list: bool = True, include_examples: bool = True) -> str:
|
|
596
1093
|
"""Format tools for Qwen models using <|tool_call|> format with enhanced metadata."""
|
|
597
1094
|
if not tools:
|
|
598
1095
|
return ""
|
|
599
1096
|
|
|
600
1097
|
prompt = "You are a helpful AI assistant with access to the following tools:\n\n"
|
|
601
1098
|
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
prompt += f" • **When to use**: {tool.when_to_use}\n"
|
|
609
|
-
|
|
610
|
-
# Add tags if available
|
|
611
|
-
if tool.tags:
|
|
612
|
-
prompt += f" • **Tags**: {', '.join(tool.tags)}\n"
|
|
613
|
-
|
|
614
|
-
if tool.parameters:
|
|
615
|
-
prompt += f" • **Parameters**: {json.dumps(tool.parameters, indent=2)}\n"
|
|
616
|
-
prompt += "\n"
|
|
1099
|
+
if include_tool_list:
|
|
1100
|
+
for tool in tools:
|
|
1101
|
+
prompt += f"**{tool.name}**: {tool.description}\n"
|
|
1102
|
+
if tool.parameters:
|
|
1103
|
+
prompt += f" • **Args**: {_format_parameters_compact(tool.parameters)}\n"
|
|
1104
|
+
prompt += "\n"
|
|
617
1105
|
|
|
618
|
-
prompt += """To use a tool, respond with
|
|
1106
|
+
prompt += """To use a tool, respond with one or more tool-call blocks (no other text):
|
|
619
1107
|
<|tool_call|>
|
|
620
1108
|
{"name": "tool_name", "arguments": {"param1": "value1", "param2": "value2"}}
|
|
621
1109
|
</|tool_call|>
|
|
1110
|
+
|
|
1111
|
+
To call multiple tools, repeat the block once per call.
|
|
622
1112
|
""" + _critical_rules()
|
|
623
1113
|
|
|
624
1114
|
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
prompt += "**EXAMPLES:**\n\n"
|
|
628
|
-
for tool in tools:
|
|
629
|
-
if tool.examples:
|
|
630
|
-
prompt += f"**{tool.name} Examples:**\n"
|
|
631
|
-
for i, example in enumerate(tool.examples[:3], 1): # Limit to 3 examples
|
|
632
|
-
desc = example.get("description", f"Example {i}")
|
|
633
|
-
args = example.get("arguments", {})
|
|
634
|
-
prompt += f"{i}. {desc}:\n"
|
|
635
|
-
# Use Qwen3-specific tool call format
|
|
636
|
-
tool_call_example = _format_tool_call_example(tool.name, args, ToolFormat.SPECIAL_TOKEN)
|
|
637
|
-
prompt += f"{tool_call_example}\n\n"
|
|
1115
|
+
if include_examples:
|
|
1116
|
+
prompt = _append_tool_examples(prompt, tools, tool_format=ToolFormat.SPECIAL_TOKEN)
|
|
638
1117
|
|
|
639
1118
|
return prompt
|
|
640
1119
|
|
|
641
1120
|
|
|
642
|
-
def _format_llama_style(tools: List[ToolDefinition]) -> str:
|
|
1121
|
+
def _format_llama_style(tools: List[ToolDefinition], *, include_tool_list: bool = True, include_examples: bool = True) -> str:
|
|
643
1122
|
"""Format tools for LLaMA models using <function_call> format with enhanced metadata."""
|
|
644
1123
|
if not tools:
|
|
645
1124
|
return ""
|
|
646
1125
|
|
|
647
1126
|
prompt = "You have access to the following functions. Use them when needed:\n\n"
|
|
648
1127
|
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
prompt += f" • **When to use**: {tool.when_to_use}\n"
|
|
656
|
-
|
|
657
|
-
# Add tags if available
|
|
658
|
-
if tool.tags:
|
|
659
|
-
prompt += f" • **Tags**: {', '.join(tool.tags)}\n"
|
|
660
|
-
|
|
661
|
-
if tool.parameters:
|
|
662
|
-
prompt += f" • **Parameters**: {json.dumps(tool.parameters, indent=2)}\n"
|
|
663
|
-
prompt += "\n"
|
|
1128
|
+
if include_tool_list:
|
|
1129
|
+
for tool in tools:
|
|
1130
|
+
prompt += f"**{tool.name}**: {tool.description}\n"
|
|
1131
|
+
if tool.parameters:
|
|
1132
|
+
prompt += f" • **Args**: {_format_parameters_compact(tool.parameters)}\n"
|
|
1133
|
+
prompt += "\n"
|
|
664
1134
|
|
|
665
|
-
prompt += """To call a function,
|
|
1135
|
+
prompt += """To call a function, output one or more <function_call> blocks (no other text):
|
|
666
1136
|
<function_call>
|
|
667
1137
|
{"name": "function_name", "arguments": {"param1": "value1", "param2": "value2"}}
|
|
668
1138
|
</function_call>
|
|
1139
|
+
|
|
1140
|
+
To call multiple functions, repeat the block once per call.
|
|
669
1141
|
""" + _critical_rules()
|
|
670
1142
|
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
prompt += "**EXAMPLES:**\n\n"
|
|
674
|
-
for tool in tools:
|
|
675
|
-
if tool.examples:
|
|
676
|
-
prompt += f"**{tool.name} Examples:**\n"
|
|
677
|
-
for i, example in enumerate(tool.examples[:3], 1): # Limit to 3 examples
|
|
678
|
-
desc = example.get("description", f"Example {i}")
|
|
679
|
-
args = example.get("arguments", {})
|
|
680
|
-
prompt += f"{i}. {desc}:\n"
|
|
681
|
-
# Use architecture-specific tool call format
|
|
682
|
-
tool_call_example = _format_tool_call_example(tool.name, args, ToolFormat.FUNCTION_CALL)
|
|
683
|
-
prompt += f"{tool_call_example}\n\n"
|
|
1143
|
+
if include_examples:
|
|
1144
|
+
prompt = _append_tool_examples(prompt, tools, tool_format=ToolFormat.FUNCTION_CALL)
|
|
684
1145
|
|
|
685
1146
|
return prompt
|
|
686
1147
|
|
|
687
1148
|
|
|
688
|
-
def _format_xml_style(tools: List[ToolDefinition]) -> str:
|
|
1149
|
+
def _format_xml_style(tools: List[ToolDefinition], *, include_tool_list: bool = True, include_examples: bool = True) -> str:
|
|
689
1150
|
"""Format tools for XML-based models."""
|
|
690
1151
|
if not tools:
|
|
691
1152
|
return ""
|
|
692
1153
|
|
|
693
1154
|
prompt = "You have access to these tools:\n\n"
|
|
694
1155
|
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
1156
|
+
if include_tool_list:
|
|
1157
|
+
for tool in tools:
|
|
1158
|
+
prompt += f'<tool name="{tool.name}">\n'
|
|
1159
|
+
prompt += f" <description>{tool.description}</description>\n"
|
|
1160
|
+
if tool.parameters:
|
|
1161
|
+
prompt += f" <args>{_format_parameters_compact(tool.parameters)}</args>\n"
|
|
1162
|
+
prompt += "</tool>\n\n"
|
|
701
1163
|
|
|
702
|
-
prompt += """To use a tool,
|
|
1164
|
+
prompt += """To use a tool, output one or more <tool_call> blocks (no other text):
|
|
703
1165
|
<tool_call>
|
|
704
1166
|
{"name": "tool_name", "arguments": {"param1": "value1"}}
|
|
705
1167
|
</tool_call>
|
|
1168
|
+
|
|
1169
|
+
To call multiple tools, repeat the block once per call.
|
|
706
1170
|
""" + _critical_rules()
|
|
707
1171
|
|
|
1172
|
+
if include_examples:
|
|
1173
|
+
prompt = _append_tool_examples(prompt, tools, tool_format=ToolFormat.XML_WRAPPED)
|
|
1174
|
+
|
|
708
1175
|
return prompt
|
|
709
1176
|
|
|
710
1177
|
|
|
711
|
-
def
|
|
1178
|
+
def _format_json_style(tools: List[ToolDefinition], *, include_tool_list: bool = True, include_examples: bool = True) -> str:
|
|
1179
|
+
"""Format tools for models that prefer raw JSON tool calls in content."""
|
|
1180
|
+
if not tools:
|
|
1181
|
+
return ""
|
|
1182
|
+
|
|
1183
|
+
prompt = "You have access to the following tools:\n\n"
|
|
1184
|
+
|
|
1185
|
+
if include_tool_list:
|
|
1186
|
+
for tool in tools:
|
|
1187
|
+
prompt += f"- {tool.name}: {tool.description}\n"
|
|
1188
|
+
if tool.parameters:
|
|
1189
|
+
prompt += f" args: {_format_parameters_compact(tool.parameters)}\n"
|
|
1190
|
+
|
|
1191
|
+
prompt += """To use a tool, respond with one or more JSON objects (no extra text):
|
|
1192
|
+
{"name": "tool_name", "arguments": {"param1": "value1", "param2": "value2"}}
|
|
1193
|
+
|
|
1194
|
+
To call multiple tools, output multiple JSON objects (one per line/block).
|
|
1195
|
+
""" + _critical_rules()
|
|
1196
|
+
|
|
1197
|
+
if include_examples:
|
|
1198
|
+
prompt = _append_tool_examples(prompt, tools, tool_format=ToolFormat.RAW_JSON)
|
|
1199
|
+
|
|
1200
|
+
return prompt
|
|
1201
|
+
|
|
1202
|
+
|
|
1203
|
+
def _format_gemma_style(tools: List[ToolDefinition], *, include_tool_list: bool = True, include_examples: bool = True) -> str:
|
|
712
1204
|
"""Format tools for Gemma models using code blocks."""
|
|
713
1205
|
if not tools:
|
|
714
1206
|
return ""
|
|
715
1207
|
|
|
716
1208
|
prompt = "You can use these tools by writing tool_code blocks:\n\n"
|
|
717
1209
|
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
1210
|
+
if include_tool_list:
|
|
1211
|
+
for tool in tools:
|
|
1212
|
+
prompt += f"**{tool.name}**: {tool.description}\n"
|
|
1213
|
+
if tool.parameters:
|
|
1214
|
+
prompt += f"Args: {_format_parameters_compact(tool.parameters)}\n"
|
|
1215
|
+
prompt += "\n"
|
|
724
1216
|
|
|
725
|
-
prompt += """To call a tool,
|
|
1217
|
+
prompt += """To call a tool, output one or more tool_code blocks (no other text):
|
|
726
1218
|
```tool_code
|
|
727
1219
|
{"name": "tool_name", "arguments": {"param1": "value1", "param2": "value2"}}
|
|
728
|
-
```
|
|
1220
|
+
```
|
|
1221
|
+
|
|
1222
|
+
To call multiple tools, repeat the block once per call."""
|
|
1223
|
+
|
|
1224
|
+
if include_examples:
|
|
1225
|
+
prompt = _append_tool_examples(prompt, tools, tool_format=ToolFormat.TOOL_CODE)
|
|
729
1226
|
|
|
730
1227
|
return prompt
|
|
731
1228
|
|
|
732
1229
|
|
|
733
|
-
def _format_generic_style(tools: List[ToolDefinition]) -> str:
|
|
1230
|
+
def _format_generic_style(tools: List[ToolDefinition], *, include_tool_list: bool = True, include_examples: bool = True) -> str:
|
|
734
1231
|
"""Generic tool formatting for unknown architectures with enhanced metadata."""
|
|
735
1232
|
if not tools:
|
|
736
1233
|
return ""
|
|
737
1234
|
|
|
738
1235
|
prompt = "You have access to the following tools:\n\n"
|
|
739
1236
|
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
prompt += f" **When to use**: {tool.when_to_use}\n"
|
|
746
|
-
|
|
747
|
-
# Add tags if available
|
|
748
|
-
if tool.tags:
|
|
749
|
-
prompt += f" **Tags**: {', '.join(tool.tags)}\n"
|
|
750
|
-
|
|
751
|
-
if tool.parameters:
|
|
752
|
-
prompt += f" **Parameters**: {json.dumps(tool.parameters, indent=2)}\n"
|
|
753
|
-
prompt += "\n"
|
|
1237
|
+
if include_tool_list:
|
|
1238
|
+
for tool in tools:
|
|
1239
|
+
prompt += f"- {tool.name}: {tool.description}\n"
|
|
1240
|
+
if tool.parameters:
|
|
1241
|
+
prompt += f" args: {_format_parameters_compact(tool.parameters)}\n"
|
|
754
1242
|
|
|
755
1243
|
prompt += _critical_rules()
|
|
756
1244
|
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
prompt += "**EXAMPLES:**\n\n"
|
|
760
|
-
for tool in tools:
|
|
761
|
-
if tool.examples:
|
|
762
|
-
prompt += f"**{tool.name} Examples:**\n"
|
|
763
|
-
for i, example in enumerate(tool.examples[:3], 1): # Limit to 3 examples
|
|
764
|
-
desc = example.get("description", f"Example {i}")
|
|
765
|
-
args = example.get("arguments", {})
|
|
766
|
-
prompt += f"{i}. {desc}:\n"
|
|
767
|
-
# Use generic format for unknown architectures
|
|
768
|
-
tool_call_example = _format_tool_call_example(tool.name, args, ToolFormat.RAW_JSON)
|
|
769
|
-
prompt += f"{tool_call_example}\n\n"
|
|
1245
|
+
if include_examples:
|
|
1246
|
+
prompt = _append_tool_examples(prompt, tools, tool_format=ToolFormat.RAW_JSON)
|
|
770
1247
|
|
|
771
1248
|
return prompt
|
|
772
1249
|
|
|
@@ -793,6 +1270,158 @@ def clean_tool_syntax(content: str, tool_calls: List[ToolCall] = None) -> str:
|
|
|
793
1270
|
|
|
794
1271
|
import re
|
|
795
1272
|
|
|
1273
|
+
# Strip Harmony/ChatML tool-call segments first (balanced JSON after <|message|>).
|
|
1274
|
+
# Regex alone is brittle here because tool arguments can contain nested braces.
|
|
1275
|
+
if "<|channel|>" in content and "<|message|>" in content and "to=" in content:
|
|
1276
|
+
def _find_matching_brace(text: str, start: int) -> int:
|
|
1277
|
+
depth = 0
|
|
1278
|
+
in_string = False
|
|
1279
|
+
quote = ""
|
|
1280
|
+
escaped = False
|
|
1281
|
+
for i in range(start, len(text)):
|
|
1282
|
+
ch = text[i]
|
|
1283
|
+
if in_string:
|
|
1284
|
+
if escaped:
|
|
1285
|
+
escaped = False
|
|
1286
|
+
continue
|
|
1287
|
+
if ch == "\\":
|
|
1288
|
+
escaped = True
|
|
1289
|
+
continue
|
|
1290
|
+
if ch == quote:
|
|
1291
|
+
in_string = False
|
|
1292
|
+
quote = ""
|
|
1293
|
+
continue
|
|
1294
|
+
if ch in ("'", '"'):
|
|
1295
|
+
in_string = True
|
|
1296
|
+
quote = ch
|
|
1297
|
+
continue
|
|
1298
|
+
if ch == "{":
|
|
1299
|
+
depth += 1
|
|
1300
|
+
continue
|
|
1301
|
+
if ch == "}":
|
|
1302
|
+
depth -= 1
|
|
1303
|
+
if depth == 0:
|
|
1304
|
+
return i
|
|
1305
|
+
return -1
|
|
1306
|
+
|
|
1307
|
+
def _consume_trailing_kv_fragment(text: str, start_idx: int) -> int:
|
|
1308
|
+
"""Consume malformed trailing JSON key/value fragments after a closed object.
|
|
1309
|
+
|
|
1310
|
+
Some models (notably some OSS models emitting Harmony tool transcripts) occasionally
|
|
1311
|
+
close the JSON object early and then continue emitting extra fields outside of it,
|
|
1312
|
+
e.g.:
|
|
1313
|
+
<|message|>{"name":"write_file","arguments":{...},"call_id":null},"mode":"w"}
|
|
1314
|
+
|
|
1315
|
+
Tool parsing can still succeed (the prefix is valid), but the tail fragment must
|
|
1316
|
+
not leak into cleaned assistant content (it otherwise shows up as "Thought" in UIs).
|
|
1317
|
+
"""
|
|
1318
|
+
i = start_idx
|
|
1319
|
+
while i < len(text) and text[i].isspace():
|
|
1320
|
+
i += 1
|
|
1321
|
+
if i >= len(text) or text[i] != ",":
|
|
1322
|
+
return start_idx
|
|
1323
|
+
|
|
1324
|
+
# Quick heuristic: only treat as a JSON-ish continuation if we see `,"key":...`.
|
|
1325
|
+
j = i + 1
|
|
1326
|
+
while j < len(text) and text[j].isspace():
|
|
1327
|
+
j += 1
|
|
1328
|
+
if j >= len(text) or text[j] not in ("'", '"'):
|
|
1329
|
+
return start_idx
|
|
1330
|
+
|
|
1331
|
+
in_string = False
|
|
1332
|
+
quote = ""
|
|
1333
|
+
escaped = False
|
|
1334
|
+
brace_depth = 0
|
|
1335
|
+
saw_colon = False
|
|
1336
|
+
pos = i
|
|
1337
|
+
while pos < len(text):
|
|
1338
|
+
# Do not swallow the next Harmony segment (if any).
|
|
1339
|
+
if not in_string and text.startswith("<|channel|>", pos):
|
|
1340
|
+
return pos
|
|
1341
|
+
|
|
1342
|
+
ch = text[pos]
|
|
1343
|
+
if in_string:
|
|
1344
|
+
if escaped:
|
|
1345
|
+
escaped = False
|
|
1346
|
+
pos += 1
|
|
1347
|
+
continue
|
|
1348
|
+
if ch == "\\":
|
|
1349
|
+
escaped = True
|
|
1350
|
+
pos += 1
|
|
1351
|
+
continue
|
|
1352
|
+
if ch == quote:
|
|
1353
|
+
in_string = False
|
|
1354
|
+
quote = ""
|
|
1355
|
+
pos += 1
|
|
1356
|
+
continue
|
|
1357
|
+
pos += 1
|
|
1358
|
+
continue
|
|
1359
|
+
|
|
1360
|
+
if ch in ("'", '"'):
|
|
1361
|
+
in_string = True
|
|
1362
|
+
quote = ch
|
|
1363
|
+
pos += 1
|
|
1364
|
+
continue
|
|
1365
|
+
|
|
1366
|
+
if ch == ":":
|
|
1367
|
+
saw_colon = True
|
|
1368
|
+
elif ch == "{":
|
|
1369
|
+
brace_depth += 1
|
|
1370
|
+
elif ch == "}":
|
|
1371
|
+
if saw_colon and brace_depth == 0:
|
|
1372
|
+
return pos + 1
|
|
1373
|
+
if brace_depth > 0:
|
|
1374
|
+
brace_depth -= 1
|
|
1375
|
+
pos += 1
|
|
1376
|
+
|
|
1377
|
+
return len(text) if saw_colon else start_idx
|
|
1378
|
+
|
|
1379
|
+
msg_tag = "<|message|>"
|
|
1380
|
+
out_parts = []
|
|
1381
|
+
i = 0
|
|
1382
|
+
while i < len(content):
|
|
1383
|
+
start = content.find("<|channel|>", i)
|
|
1384
|
+
if start == -1:
|
|
1385
|
+
out_parts.append(content[i:])
|
|
1386
|
+
break
|
|
1387
|
+
out_parts.append(content[i:start])
|
|
1388
|
+
|
|
1389
|
+
msg_start = content.find(msg_tag, start)
|
|
1390
|
+
if msg_start == -1:
|
|
1391
|
+
out_parts.append(content[start:])
|
|
1392
|
+
break
|
|
1393
|
+
# Only treat as a tool call when there's a `to=` directive before the message tag.
|
|
1394
|
+
if "to=" not in content[start:msg_start]:
|
|
1395
|
+
out_parts.append(content[start:msg_start])
|
|
1396
|
+
i = msg_start
|
|
1397
|
+
continue
|
|
1398
|
+
|
|
1399
|
+
brace_start = content.find("{", msg_start + len(msg_tag))
|
|
1400
|
+
if brace_start == -1:
|
|
1401
|
+
out_parts.append(content[start:msg_start])
|
|
1402
|
+
i = msg_start
|
|
1403
|
+
continue
|
|
1404
|
+
between = content[msg_start + len(msg_tag) : brace_start]
|
|
1405
|
+
if between and any(not c.isspace() for c in between):
|
|
1406
|
+
out_parts.append(content[start:brace_start])
|
|
1407
|
+
i = brace_start
|
|
1408
|
+
continue
|
|
1409
|
+
|
|
1410
|
+
brace_end = _find_matching_brace(content, brace_start)
|
|
1411
|
+
if brace_end == -1:
|
|
1412
|
+
# Best-effort: drop the remainder of this segment up to the next Harmony marker
|
|
1413
|
+
# (or to end-of-content). Leaving partial tool payloads in `content` is more
|
|
1414
|
+
# harmful (it breaks agent scratchpads and UI "Thought" rendering).
|
|
1415
|
+
next_start = content.find("<|channel|>", brace_start + 1)
|
|
1416
|
+
if next_start == -1:
|
|
1417
|
+
break
|
|
1418
|
+
i = next_start
|
|
1419
|
+
continue
|
|
1420
|
+
|
|
1421
|
+
i = _consume_trailing_kv_fragment(content, brace_end + 1)
|
|
1422
|
+
|
|
1423
|
+
content = "".join(out_parts)
|
|
1424
|
+
|
|
796
1425
|
# Use the same sophisticated patterns as the _parse_special_token function
|
|
797
1426
|
patterns = [
|
|
798
1427
|
# Strategy 1: Properly closed <|tool_call|> tags
|
|
@@ -809,6 +1438,19 @@ def clean_tool_syntax(content: str, tool_calls: List[ToolCall] = None) -> str:
|
|
|
809
1438
|
r'<tool_call>.*?</tool_call>',
|
|
810
1439
|
r'```tool_code.*?```',
|
|
811
1440
|
r'```tool_call.*?```'
|
|
1441
|
+
,
|
|
1442
|
+
# CLI-like prefix format: tool: [name]: {...}
|
|
1443
|
+
r'(?im)^\s*tool\s*:\s*\[[^\]]+\]\s*:\s*\{.*\}\s*$',
|
|
1444
|
+
# Harmony/ChatML tool-call transcript format:
|
|
1445
|
+
# <|channel|>commentary to=tool <|constrain|>json<|message|>{...}
|
|
1446
|
+
r'(?is)<\|channel\|>\s*[a-zA-Z0-9_\-]+\s+to=[a-zA-Z0-9_\-\.]+\b.*?<\|message\|>\s*\{.*?\}',
|
|
1447
|
+
# Orphan tags (some models emit a closing tag on its own line)
|
|
1448
|
+
r'(?im)^\s*<\|tool_call\|>\s*$',
|
|
1449
|
+
r'(?im)^\s*</\|tool_call\|>\s*$',
|
|
1450
|
+
r'(?im)^\s*<tool_call>\s*$',
|
|
1451
|
+
r'(?im)^\s*</tool_call>\s*$',
|
|
1452
|
+
r'(?im)^\s*<\|channel\|>\s*$',
|
|
1453
|
+
r'(?im)^\s*<\|message\|>\s*$',
|
|
812
1454
|
]
|
|
813
1455
|
|
|
814
1456
|
# Apply all patterns
|
|
@@ -831,7 +1473,7 @@ def _format_tool_call_example(tool_name: str, arguments: Dict[str, Any], tool_fo
|
|
|
831
1473
|
Returns:
|
|
832
1474
|
Formatted tool call example string
|
|
833
1475
|
"""
|
|
834
|
-
tool_call_json = json.dumps({"name": tool_name, "arguments": arguments})
|
|
1476
|
+
tool_call_json = json.dumps({"name": tool_name, "arguments": arguments}, separators=(",", ":"), ensure_ascii=False)
|
|
835
1477
|
|
|
836
1478
|
if tool_format == ToolFormat.SPECIAL_TOKEN:
|
|
837
1479
|
# Qwen3, GLM-4.5+ format
|
|
@@ -861,13 +1503,14 @@ def _critical_rules():
|
|
|
861
1503
|
Returns:
|
|
862
1504
|
str: The critical rules for tool usage.
|
|
863
1505
|
"""
|
|
864
|
-
return
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
""
|
|
1506
|
+
return (
|
|
1507
|
+
"CRITICAL RULES FOR TOOL USAGE:\n"
|
|
1508
|
+
"1. If you can answer directly, do not call a tool.\n"
|
|
1509
|
+
"2. If you need info or an action, call the smallest relevant tool.\n"
|
|
1510
|
+
"3. Do not call tools to show off; if asked, describe capabilities.\n"
|
|
1511
|
+
"4. The \"name\" field must be top-level (not inside \"arguments\").\n"
|
|
1512
|
+
"5. Use the exact tool-call JSON structure.\n"
|
|
1513
|
+
"6. Never fabricate tool results; outputs are returned separately.\n"
|
|
1514
|
+
"7. Do not write your own `tool:` result lines.\n"
|
|
1515
|
+
"8. You MAY batch multiple tool calls by repeating the tool-call block once per call (prefer independent calls).\n"
|
|
1516
|
+
)
|