abstractcore 2.6.8__py3-none-any.whl → 2.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/apps/summarizer.py +69 -27
- abstractcore/architectures/detection.py +190 -25
- abstractcore/assets/architecture_formats.json +129 -6
- abstractcore/assets/model_capabilities.json +789 -136
- abstractcore/config/main.py +2 -2
- abstractcore/config/manager.py +3 -1
- abstractcore/events/__init__.py +7 -1
- abstractcore/mcp/__init__.py +30 -0
- abstractcore/mcp/client.py +213 -0
- abstractcore/mcp/factory.py +64 -0
- abstractcore/mcp/naming.py +28 -0
- abstractcore/mcp/stdio_client.py +336 -0
- abstractcore/mcp/tool_source.py +164 -0
- abstractcore/processing/basic_deepsearch.py +1 -1
- abstractcore/processing/basic_summarizer.py +300 -83
- abstractcore/providers/anthropic_provider.py +91 -10
- abstractcore/providers/base.py +537 -16
- abstractcore/providers/huggingface_provider.py +17 -8
- abstractcore/providers/lmstudio_provider.py +170 -25
- abstractcore/providers/mlx_provider.py +13 -10
- abstractcore/providers/ollama_provider.py +42 -26
- abstractcore/providers/openai_compatible_provider.py +87 -22
- abstractcore/providers/openai_provider.py +12 -9
- abstractcore/providers/streaming.py +201 -39
- abstractcore/providers/vllm_provider.py +78 -21
- abstractcore/server/app.py +65 -28
- abstractcore/structured/retry.py +20 -7
- abstractcore/tools/__init__.py +5 -4
- abstractcore/tools/abstractignore.py +166 -0
- abstractcore/tools/arg_canonicalizer.py +61 -0
- abstractcore/tools/common_tools.py +2311 -772
- abstractcore/tools/core.py +109 -13
- abstractcore/tools/handler.py +17 -3
- abstractcore/tools/parser.py +798 -155
- abstractcore/tools/registry.py +107 -2
- abstractcore/tools/syntax_rewriter.py +68 -6
- abstractcore/tools/tag_rewriter.py +186 -1
- abstractcore/utils/jsonish.py +111 -0
- abstractcore/utils/version.py +1 -1
- {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/METADATA +11 -2
- {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/RECORD +45 -36
- {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/WHEEL +0 -0
- {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/entry_points.txt +0 -0
- {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/top_level.txt +0 -0
abstractcore/tools/registry.py
CHANGED
|
@@ -17,6 +17,35 @@ from ..utils.structured_logging import get_logger
|
|
|
17
17
|
logger = get_logger(__name__)
|
|
18
18
|
|
|
19
19
|
|
|
20
|
+
def _error_from_output(value: Any) -> Optional[str]:
|
|
21
|
+
"""Detect tool failures reported as string outputs (instead of exceptions)."""
|
|
22
|
+
# Allow tools to return structured outputs while still communicating failure
|
|
23
|
+
# without raising exceptions. We only treat this as an error when the tool
|
|
24
|
+
# explicitly marks itself as unsuccessful.
|
|
25
|
+
if isinstance(value, dict):
|
|
26
|
+
success = value.get("success")
|
|
27
|
+
ok = value.get("ok")
|
|
28
|
+
if success is False or ok is False:
|
|
29
|
+
err = value.get("error") or value.get("message") or "Tool reported failure"
|
|
30
|
+
text = str(err).strip()
|
|
31
|
+
return text or "Tool reported failure"
|
|
32
|
+
return None
|
|
33
|
+
if not isinstance(value, str):
|
|
34
|
+
return None
|
|
35
|
+
text = value.strip()
|
|
36
|
+
if not text:
|
|
37
|
+
return None
|
|
38
|
+
if text.startswith("Error:"):
|
|
39
|
+
cleaned = text[len("Error:") :].strip()
|
|
40
|
+
return cleaned or text
|
|
41
|
+
if text.startswith(("❌", "🚫", "⏰")):
|
|
42
|
+
cleaned = text.lstrip("❌🚫⏰").strip()
|
|
43
|
+
if cleaned.startswith("Error:"):
|
|
44
|
+
cleaned = cleaned[len("Error:") :].strip()
|
|
45
|
+
return cleaned or text
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
|
|
20
49
|
class ToolRegistry:
|
|
21
50
|
"""Registry for managing available tools."""
|
|
22
51
|
|
|
@@ -149,10 +178,41 @@ class ToolRegistry:
|
|
|
149
178
|
return error_result
|
|
150
179
|
|
|
151
180
|
try:
|
|
181
|
+
from .arg_canonicalizer import canonicalize_tool_arguments
|
|
182
|
+
|
|
183
|
+
arguments = canonicalize_tool_arguments(tool_call.name, tool_call.arguments)
|
|
184
|
+
|
|
152
185
|
# Execute the function with the provided arguments
|
|
153
|
-
result = tool_def.function(**
|
|
186
|
+
result = tool_def.function(**arguments)
|
|
154
187
|
duration_ms = (time.time() - start_time) * 1000
|
|
155
188
|
|
|
189
|
+
implied_error = _error_from_output(result)
|
|
190
|
+
if implied_error is not None:
|
|
191
|
+
error_result = ToolResult(
|
|
192
|
+
call_id=tool_call.call_id or "",
|
|
193
|
+
# Preserve structured outputs for post-mortem evidence/provenance.
|
|
194
|
+
# For string-only error outputs, store the message in `error` and keep output empty.
|
|
195
|
+
output=result if not isinstance(result, str) else "",
|
|
196
|
+
error=implied_error,
|
|
197
|
+
success=False,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# Emit tool error event
|
|
201
|
+
result_data = create_tool_event(
|
|
202
|
+
tool_name=tool_call.name,
|
|
203
|
+
arguments=arguments,
|
|
204
|
+
success=False,
|
|
205
|
+
error=implied_error,
|
|
206
|
+
)
|
|
207
|
+
emit_global(
|
|
208
|
+
EventType.TOOL_COMPLETED,
|
|
209
|
+
result_data,
|
|
210
|
+
source="ToolRegistry",
|
|
211
|
+
duration_ms=duration_ms,
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
return error_result
|
|
215
|
+
|
|
156
216
|
success_result = ToolResult(
|
|
157
217
|
call_id=tool_call.call_id or "",
|
|
158
218
|
output=result,
|
|
@@ -162,7 +222,7 @@ class ToolRegistry:
|
|
|
162
222
|
# Emit successful tool result event
|
|
163
223
|
result_data = create_tool_event(
|
|
164
224
|
tool_name=tool_call.name,
|
|
165
|
-
arguments=
|
|
225
|
+
arguments=arguments,
|
|
166
226
|
result=result,
|
|
167
227
|
success=True
|
|
168
228
|
)
|
|
@@ -172,6 +232,51 @@ class ToolRegistry:
|
|
|
172
232
|
return success_result
|
|
173
233
|
|
|
174
234
|
except TypeError as e:
|
|
235
|
+
# Some models include wrapper/meta keys ("name", nested "arguments") or
|
|
236
|
+
# stray extras in tool kwargs. Retry once with a sanitized argument dict.
|
|
237
|
+
try:
|
|
238
|
+
wrapper_keys = {"name", "arguments", "call_id", "id"}
|
|
239
|
+
from .arg_canonicalizer import canonicalize_tool_arguments
|
|
240
|
+
|
|
241
|
+
args = canonicalize_tool_arguments(tool_call.name, tool_call.arguments)
|
|
242
|
+
for _ in range(4):
|
|
243
|
+
inner = args.get("arguments")
|
|
244
|
+
if not isinstance(inner, dict):
|
|
245
|
+
break
|
|
246
|
+
extras = {k: v for k, v in args.items() if k not in wrapper_keys}
|
|
247
|
+
merged = dict(inner)
|
|
248
|
+
for k, v in extras.items():
|
|
249
|
+
merged.setdefault(k, v)
|
|
250
|
+
args = merged
|
|
251
|
+
|
|
252
|
+
allowed = set(tool_def.parameters.keys()) if isinstance(tool_def.parameters, dict) else set()
|
|
253
|
+
if allowed:
|
|
254
|
+
args = {k: v for k, v in args.items() if k in allowed}
|
|
255
|
+
|
|
256
|
+
if args != dict(tool_call.arguments or {}):
|
|
257
|
+
result = tool_def.function(**args)
|
|
258
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
259
|
+
success_result = ToolResult(
|
|
260
|
+
call_id=tool_call.call_id or "",
|
|
261
|
+
output=result,
|
|
262
|
+
success=True,
|
|
263
|
+
)
|
|
264
|
+
result_data = create_tool_event(
|
|
265
|
+
tool_name=tool_call.name,
|
|
266
|
+
arguments=args,
|
|
267
|
+
result=result,
|
|
268
|
+
success=True,
|
|
269
|
+
)
|
|
270
|
+
emit_global(
|
|
271
|
+
EventType.TOOL_COMPLETED,
|
|
272
|
+
result_data,
|
|
273
|
+
source="ToolRegistry",
|
|
274
|
+
duration_ms=duration_ms,
|
|
275
|
+
)
|
|
276
|
+
return success_result
|
|
277
|
+
except Exception:
|
|
278
|
+
pass
|
|
279
|
+
|
|
175
280
|
duration_ms = (time.time() - start_time) * 1000
|
|
176
281
|
error_msg = f"Invalid arguments for tool '{tool_call.name}': {e}"
|
|
177
282
|
logger.warning(error_msg)
|
|
@@ -8,7 +8,7 @@ Supports multiple target formats including OpenAI, Codex, and custom agent forma
|
|
|
8
8
|
import re
|
|
9
9
|
import json
|
|
10
10
|
import uuid
|
|
11
|
-
from typing import List, Dict, Any, Optional, Union
|
|
11
|
+
from typing import List, Dict, Any, Optional, Union, Iterable
|
|
12
12
|
from dataclasses import dataclass
|
|
13
13
|
from enum import Enum
|
|
14
14
|
|
|
@@ -87,7 +87,7 @@ class ToolCallSyntaxRewriter:
|
|
|
87
87
|
def rewrite_content(
|
|
88
88
|
self,
|
|
89
89
|
content: str,
|
|
90
|
-
detected_tool_calls: Optional[List[
|
|
90
|
+
detected_tool_calls: Optional[List[Any]] = None
|
|
91
91
|
) -> str:
|
|
92
92
|
"""
|
|
93
93
|
Rewrite tool call syntax in content.
|
|
@@ -99,7 +99,9 @@ class ToolCallSyntaxRewriter:
|
|
|
99
99
|
Returns:
|
|
100
100
|
Content with rewritten tool call syntax
|
|
101
101
|
"""
|
|
102
|
-
|
|
102
|
+
# Allow formatting tool calls even when there's no surrounding assistant text.
|
|
103
|
+
# This is useful for streaming: a chunk may contain only tool calls.
|
|
104
|
+
if (not isinstance(content, str) or not content.strip()) and not detected_tool_calls:
|
|
103
105
|
return content
|
|
104
106
|
|
|
105
107
|
# Passthrough mode - return unchanged
|
|
@@ -110,6 +112,8 @@ class ToolCallSyntaxRewriter:
|
|
|
110
112
|
if detected_tool_calls is None:
|
|
111
113
|
detected_tool_calls = parse_tool_calls(content, self.model_name)
|
|
112
114
|
logger.debug(f"Detected {len(detected_tool_calls)} tool calls in content")
|
|
115
|
+
else:
|
|
116
|
+
detected_tool_calls = list(self._coerce_tool_calls(detected_tool_calls))
|
|
113
117
|
|
|
114
118
|
# No tool calls found
|
|
115
119
|
if not detected_tool_calls:
|
|
@@ -118,7 +122,7 @@ class ToolCallSyntaxRewriter:
|
|
|
118
122
|
# Apply format-specific rewriting
|
|
119
123
|
return self._apply_format_conversion(content, detected_tool_calls)
|
|
120
124
|
|
|
121
|
-
def convert_to_openai_format(self, tool_calls: List[
|
|
125
|
+
def convert_to_openai_format(self, tool_calls: List[Any]) -> List[Dict[str, Any]]:
|
|
122
126
|
"""
|
|
123
127
|
Convert tool calls to OpenAI API format.
|
|
124
128
|
|
|
@@ -130,7 +134,7 @@ class ToolCallSyntaxRewriter:
|
|
|
130
134
|
"""
|
|
131
135
|
openai_tools = []
|
|
132
136
|
|
|
133
|
-
for tool_call in tool_calls:
|
|
137
|
+
for tool_call in self._coerce_tool_calls(tool_calls):
|
|
134
138
|
# Ensure we have a call ID
|
|
135
139
|
call_id = tool_call.call_id or f"call_{uuid.uuid4().hex[:8]}"
|
|
136
140
|
|
|
@@ -161,6 +165,54 @@ class ToolCallSyntaxRewriter:
|
|
|
161
165
|
|
|
162
166
|
return openai_tools
|
|
163
167
|
|
|
168
|
+
def _coerce_tool_calls(self, tool_calls: Iterable[Any]) -> Iterable[ToolCall]:
|
|
169
|
+
"""Coerce ToolCall-like inputs (dicts, dataclasses) into ToolCall objects."""
|
|
170
|
+
for tc in tool_calls or []:
|
|
171
|
+
if isinstance(tc, ToolCall):
|
|
172
|
+
yield tc
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
if isinstance(tc, dict):
|
|
176
|
+
raw_id = tc.get("call_id")
|
|
177
|
+
if raw_id is None:
|
|
178
|
+
raw_id = tc.get("id")
|
|
179
|
+
|
|
180
|
+
raw_name = tc.get("name")
|
|
181
|
+
raw_args = tc.get("arguments")
|
|
182
|
+
func = tc.get("function") if isinstance(tc.get("function"), dict) else None
|
|
183
|
+
if func and (not isinstance(raw_name, str) or not raw_name.strip()):
|
|
184
|
+
raw_name = func.get("name")
|
|
185
|
+
if func and raw_args is None:
|
|
186
|
+
raw_args = func.get("arguments")
|
|
187
|
+
|
|
188
|
+
if not isinstance(raw_name, str) or not raw_name.strip():
|
|
189
|
+
continue
|
|
190
|
+
name = raw_name.strip()
|
|
191
|
+
|
|
192
|
+
arguments: Any = raw_args if raw_args is not None else {}
|
|
193
|
+
if isinstance(arguments, str):
|
|
194
|
+
# Try to parse JSON-ish args; fall back to string.
|
|
195
|
+
try:
|
|
196
|
+
parsed = json.loads(arguments)
|
|
197
|
+
arguments = parsed if isinstance(parsed, dict) else {}
|
|
198
|
+
except Exception:
|
|
199
|
+
arguments = {}
|
|
200
|
+
if not isinstance(arguments, dict):
|
|
201
|
+
arguments = {}
|
|
202
|
+
|
|
203
|
+
yield ToolCall(name=name, arguments=arguments, call_id=str(raw_id) if raw_id is not None else None)
|
|
204
|
+
continue
|
|
205
|
+
|
|
206
|
+
raw_name = getattr(tc, "name", None)
|
|
207
|
+
raw_args = getattr(tc, "arguments", None)
|
|
208
|
+
raw_id = getattr(tc, "call_id", None)
|
|
209
|
+
if not isinstance(raw_name, str) or not raw_name.strip():
|
|
210
|
+
continue
|
|
211
|
+
arguments = raw_args if raw_args is not None else {}
|
|
212
|
+
if not isinstance(arguments, (dict, str)):
|
|
213
|
+
arguments = {}
|
|
214
|
+
yield ToolCall(name=raw_name.strip(), arguments=arguments, call_id=str(raw_id) if raw_id is not None else None)
|
|
215
|
+
|
|
164
216
|
def _apply_format_conversion(self, content: str, tool_calls: List[ToolCall]) -> str:
|
|
165
217
|
"""Apply format-specific conversion."""
|
|
166
218
|
|
|
@@ -348,10 +400,17 @@ class ToolCallSyntaxRewriter:
|
|
|
348
400
|
r'<function_call>.*?</function_call>',
|
|
349
401
|
r'<tool_call>.*?</tool_call>',
|
|
350
402
|
r'```tool_code.*?```',
|
|
403
|
+
# Harmony/ChatML tool transcript: <|channel|>... to=tool ... <|message|>{...}
|
|
404
|
+
r'<\|channel\|>\s*[a-zA-Z0-9_\-]+\s+to=[a-zA-Z0-9_\-\.]+\b.*?<\|message\|>\s*\{.*?\}',
|
|
351
405
|
]
|
|
352
406
|
|
|
353
407
|
for pattern in complete_patterns:
|
|
354
408
|
cleaned = re.sub(pattern, '', cleaned, flags=re.DOTALL | re.IGNORECASE)
|
|
409
|
+
|
|
410
|
+
# Remove any remaining Harmony conversation tags that shouldn't appear.
|
|
411
|
+
cleaned = re.sub(r'<\|channel\|>', '', cleaned)
|
|
412
|
+
cleaned = re.sub(r'<\|message\|>', '', cleaned)
|
|
413
|
+
cleaned = re.sub(r'<\|constrain\|>', '', cleaned)
|
|
355
414
|
|
|
356
415
|
# Second pass: remove orphaned tags (from malformed tool calls)
|
|
357
416
|
orphaned_patterns = [
|
|
@@ -362,6 +421,9 @@ class ToolCallSyntaxRewriter:
|
|
|
362
421
|
r'<tool_call>',
|
|
363
422
|
r'</tool_call>',
|
|
364
423
|
r'```tool_code',
|
|
424
|
+
r'<\|channel\|>',
|
|
425
|
+
r'<\|message\|>',
|
|
426
|
+
r'<\|constrain\|>',
|
|
365
427
|
]
|
|
366
428
|
|
|
367
429
|
for pattern in orphaned_patterns:
|
|
@@ -468,4 +530,4 @@ def auto_detect_format(
|
|
|
468
530
|
return SyntaxFormat.XML
|
|
469
531
|
|
|
470
532
|
# Default to OpenAI format for maximum compatibility
|
|
471
|
-
return SyntaxFormat.OPENAI
|
|
533
|
+
return SyntaxFormat.OPENAI
|
|
@@ -10,6 +10,7 @@ import json
|
|
|
10
10
|
from typing import Dict, Any, Optional, Tuple, List
|
|
11
11
|
from dataclasses import dataclass
|
|
12
12
|
from ..utils.structured_logging import get_logger
|
|
13
|
+
from ..utils.jsonish import loads_dict_like as _loads_dict_like
|
|
13
14
|
|
|
14
15
|
logger = get_logger(__name__)
|
|
15
16
|
|
|
@@ -171,6 +172,12 @@ class ToolCallTagRewriter:
|
|
|
171
172
|
logger.debug("Early return: text is empty or preserve_json is False")
|
|
172
173
|
return text
|
|
173
174
|
|
|
175
|
+
# Pre-pass: convert CLI-like `tool: [name]: {...}` calls into canonical tagged JSON.
|
|
176
|
+
# Some OSS models emit this format even when prompted for other tags.
|
|
177
|
+
text = self._rewrite_bracket_prefix_calls(text)
|
|
178
|
+
# Pre-pass: convert Harmony/ChatML tool transcript format into canonical tagged JSON.
|
|
179
|
+
text = self._rewrite_harmony_prefix_calls(text)
|
|
180
|
+
|
|
174
181
|
# Check if we already have the target format (avoid double-tagging)
|
|
175
182
|
# Check using output tags (with angle brackets)
|
|
176
183
|
if (self._output_start_tag in text and
|
|
@@ -216,6 +223,184 @@ class ToolCallTagRewriter:
|
|
|
216
223
|
|
|
217
224
|
logger.debug(f"Final rewritten text: {rewritten[:200] if rewritten else None}")
|
|
218
225
|
return rewritten
|
|
226
|
+
|
|
227
|
+
def _rewrite_bracket_prefix_calls(self, text: str) -> str:
|
|
228
|
+
"""Rewrite `tool: [name]: {args}` lines into the configured tag format."""
|
|
229
|
+
if not text:
|
|
230
|
+
return text
|
|
231
|
+
|
|
232
|
+
pattern = re.compile(r"(?im)^\s*tool\s*:\s*\[([a-zA-Z0-9_\-]+)\]\s*:\s*")
|
|
233
|
+
|
|
234
|
+
def _find_matching_brace(s: str, start: int) -> int:
|
|
235
|
+
depth = 0
|
|
236
|
+
in_string = False
|
|
237
|
+
quote = ""
|
|
238
|
+
escaped = False
|
|
239
|
+
for i in range(start, len(s)):
|
|
240
|
+
ch = s[i]
|
|
241
|
+
if in_string:
|
|
242
|
+
if escaped:
|
|
243
|
+
escaped = False
|
|
244
|
+
continue
|
|
245
|
+
if ch == "\\":
|
|
246
|
+
escaped = True
|
|
247
|
+
continue
|
|
248
|
+
if ch == quote:
|
|
249
|
+
in_string = False
|
|
250
|
+
quote = ""
|
|
251
|
+
continue
|
|
252
|
+
if ch in ("'", '"'):
|
|
253
|
+
in_string = True
|
|
254
|
+
quote = ch
|
|
255
|
+
continue
|
|
256
|
+
if ch == "{":
|
|
257
|
+
depth += 1
|
|
258
|
+
continue
|
|
259
|
+
if ch == "}":
|
|
260
|
+
depth -= 1
|
|
261
|
+
if depth == 0:
|
|
262
|
+
return i
|
|
263
|
+
return -1
|
|
264
|
+
|
|
265
|
+
rewritten = text
|
|
266
|
+
matches = list(pattern.finditer(text))
|
|
267
|
+
if not matches:
|
|
268
|
+
return text
|
|
269
|
+
|
|
270
|
+
# Replace from end to start to preserve match indices while mutating the string.
|
|
271
|
+
for match in reversed(matches):
|
|
272
|
+
name = str(match.group(1) or "").strip()
|
|
273
|
+
if not name:
|
|
274
|
+
continue
|
|
275
|
+
|
|
276
|
+
brace_start = rewritten.find("{", match.end())
|
|
277
|
+
if brace_start == -1:
|
|
278
|
+
continue
|
|
279
|
+
between = rewritten[match.end() : brace_start]
|
|
280
|
+
if between and any(not c.isspace() for c in between):
|
|
281
|
+
continue
|
|
282
|
+
|
|
283
|
+
brace_end = _find_matching_brace(rewritten, brace_start)
|
|
284
|
+
if brace_end == -1:
|
|
285
|
+
continue
|
|
286
|
+
|
|
287
|
+
raw_args = rewritten[brace_start : brace_end + 1]
|
|
288
|
+
args = _loads_dict_like(raw_args)
|
|
289
|
+
if not isinstance(args, dict):
|
|
290
|
+
continue
|
|
291
|
+
|
|
292
|
+
# Some models emit a wrapper payload even when a syntax expects arguments-only JSON:
|
|
293
|
+
# {"name":"tool","arguments":{...}}
|
|
294
|
+
# Unwrap so we rewrite into canonical {"name": ..., "arguments": {...}} once.
|
|
295
|
+
inner_args = args.get("arguments")
|
|
296
|
+
if isinstance(inner_args, dict):
|
|
297
|
+
args = inner_args
|
|
298
|
+
elif isinstance(inner_args, str):
|
|
299
|
+
parsed = _loads_dict_like(inner_args)
|
|
300
|
+
if isinstance(parsed, dict):
|
|
301
|
+
args = parsed
|
|
302
|
+
|
|
303
|
+
payload = json.dumps({"name": name, "arguments": args}, ensure_ascii=False)
|
|
304
|
+
replacement = f"{self._output_start_tag}{payload}{self._output_end_tag}"
|
|
305
|
+
rewritten = rewritten[: match.start()] + replacement + rewritten[brace_end + 1 :]
|
|
306
|
+
|
|
307
|
+
return rewritten
|
|
308
|
+
|
|
309
|
+
def _rewrite_harmony_prefix_calls(self, text: str) -> str:
|
|
310
|
+
"""Rewrite Harmony/ChatML tool transcript blocks into the configured tag format.
|
|
311
|
+
|
|
312
|
+
Example:
|
|
313
|
+
<|channel|>commentary to=list_files <|constrain|>json<|message|>{"directory_path":"."}
|
|
314
|
+
"""
|
|
315
|
+
if not text:
|
|
316
|
+
return text
|
|
317
|
+
if "<|channel|>" not in text or "<|message|>" not in text or "to=" not in text:
|
|
318
|
+
return text
|
|
319
|
+
|
|
320
|
+
header_re = re.compile(
|
|
321
|
+
r"(?i)<\|channel\|>\s*[a-zA-Z0-9_\-]+\s+to=([a-zA-Z0-9_\-\.]+)\b"
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
def _find_matching_brace(s: str, start: int) -> int:
|
|
325
|
+
depth = 0
|
|
326
|
+
in_string = False
|
|
327
|
+
quote = ""
|
|
328
|
+
escaped = False
|
|
329
|
+
for i in range(start, len(s)):
|
|
330
|
+
ch = s[i]
|
|
331
|
+
if in_string:
|
|
332
|
+
if escaped:
|
|
333
|
+
escaped = False
|
|
334
|
+
continue
|
|
335
|
+
if ch == "\\":
|
|
336
|
+
escaped = True
|
|
337
|
+
continue
|
|
338
|
+
if ch == quote:
|
|
339
|
+
in_string = False
|
|
340
|
+
quote = ""
|
|
341
|
+
continue
|
|
342
|
+
if ch in ("'", '"'):
|
|
343
|
+
in_string = True
|
|
344
|
+
quote = ch
|
|
345
|
+
continue
|
|
346
|
+
if ch == "{":
|
|
347
|
+
depth += 1
|
|
348
|
+
continue
|
|
349
|
+
if ch == "}":
|
|
350
|
+
depth -= 1
|
|
351
|
+
if depth == 0:
|
|
352
|
+
return i
|
|
353
|
+
return -1
|
|
354
|
+
|
|
355
|
+
rewritten = text
|
|
356
|
+
matches = list(header_re.finditer(text))
|
|
357
|
+
if not matches:
|
|
358
|
+
return text
|
|
359
|
+
|
|
360
|
+
msg_tag = "<|message|>"
|
|
361
|
+
for match in reversed(matches):
|
|
362
|
+
raw_name = str(match.group(1) or "").strip()
|
|
363
|
+
if not raw_name:
|
|
364
|
+
continue
|
|
365
|
+
name = raw_name
|
|
366
|
+
if name.startswith("functions."):
|
|
367
|
+
name = name.split(".", 1)[1].strip()
|
|
368
|
+
if not name:
|
|
369
|
+
continue
|
|
370
|
+
|
|
371
|
+
msg_start = rewritten.find(msg_tag, match.end())
|
|
372
|
+
if msg_start == -1:
|
|
373
|
+
continue
|
|
374
|
+
brace_start = rewritten.find("{", msg_start + len(msg_tag))
|
|
375
|
+
if brace_start == -1:
|
|
376
|
+
continue
|
|
377
|
+
between = rewritten[msg_start + len(msg_tag) : brace_start]
|
|
378
|
+
if between and any(not c.isspace() for c in between):
|
|
379
|
+
continue
|
|
380
|
+
brace_end = _find_matching_brace(rewritten, brace_start)
|
|
381
|
+
if brace_end == -1:
|
|
382
|
+
continue
|
|
383
|
+
|
|
384
|
+
raw_args = rewritten[brace_start : brace_end + 1]
|
|
385
|
+
args = _loads_dict_like(raw_args)
|
|
386
|
+
if not isinstance(args, dict):
|
|
387
|
+
continue
|
|
388
|
+
|
|
389
|
+
# Some models emit a wrapper payload in the Harmony message JSON:
|
|
390
|
+
# {"name":"tool","arguments":{...},"call_id": "..."}
|
|
391
|
+
inner_args = args.get("arguments")
|
|
392
|
+
if isinstance(inner_args, dict):
|
|
393
|
+
args = inner_args
|
|
394
|
+
elif isinstance(inner_args, str):
|
|
395
|
+
parsed = _loads_dict_like(inner_args)
|
|
396
|
+
if isinstance(parsed, dict):
|
|
397
|
+
args = parsed
|
|
398
|
+
|
|
399
|
+
payload = json.dumps({"name": name, "arguments": args}, ensure_ascii=False)
|
|
400
|
+
replacement = f"{self._output_start_tag}{payload}{self._output_end_tag}"
|
|
401
|
+
rewritten = rewritten[: match.start()] + replacement + rewritten[brace_end + 1 :]
|
|
402
|
+
|
|
403
|
+
return rewritten
|
|
219
404
|
|
|
220
405
|
def rewrite_streaming_chunk(self, chunk: str, buffer: str = "") -> Tuple[str, str]:
|
|
221
406
|
"""
|
|
@@ -503,4 +688,4 @@ def create_tag_rewriter(cli_name: str = "qwen3") -> ToolCallTagRewriter:
|
|
|
503
688
|
ToolCallTagRewriter instance
|
|
504
689
|
"""
|
|
505
690
|
tags = get_predefined_tags(cli_name)
|
|
506
|
-
return ToolCallTagRewriter(tags)
|
|
691
|
+
return ToolCallTagRewriter(tags)
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Relaxed JSON/Python-literal parsing helpers.
|
|
2
|
+
|
|
3
|
+
Some models emit tool-call JSON that is "almost JSON" (e.g., Python booleans,
|
|
4
|
+
single quotes) or includes unescaped control characters (notably literal
|
|
5
|
+
newlines inside string values). These helpers provide a single, robust way to
|
|
6
|
+
parse dict-like payloads across AbstractCore.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import ast
|
|
12
|
+
import json
|
|
13
|
+
import re
|
|
14
|
+
from typing import Any, Dict, Optional
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _escape_control_chars_in_strings(text: str) -> str:
|
|
18
|
+
"""Escape literal control chars that appear inside quoted strings.
|
|
19
|
+
|
|
20
|
+
This turns invalid JSON like:
|
|
21
|
+
{"content":"line1
|
|
22
|
+
line2"}
|
|
23
|
+
into valid JSON:
|
|
24
|
+
{"content":"line1\\nline2"}
|
|
25
|
+
|
|
26
|
+
Works for both single- and double-quoted strings (for Python-literal fallbacks).
|
|
27
|
+
"""
|
|
28
|
+
if not text:
|
|
29
|
+
return text
|
|
30
|
+
|
|
31
|
+
out: list[str] = []
|
|
32
|
+
in_string = False
|
|
33
|
+
quote = ""
|
|
34
|
+
escaped = False
|
|
35
|
+
|
|
36
|
+
for ch in text:
|
|
37
|
+
if in_string:
|
|
38
|
+
if escaped:
|
|
39
|
+
out.append(ch)
|
|
40
|
+
escaped = False
|
|
41
|
+
continue
|
|
42
|
+
if ch == "\\":
|
|
43
|
+
out.append(ch)
|
|
44
|
+
escaped = True
|
|
45
|
+
continue
|
|
46
|
+
if ch == quote:
|
|
47
|
+
out.append(ch)
|
|
48
|
+
in_string = False
|
|
49
|
+
quote = ""
|
|
50
|
+
continue
|
|
51
|
+
if ch == "\n":
|
|
52
|
+
out.append("\\n")
|
|
53
|
+
continue
|
|
54
|
+
if ch == "\r":
|
|
55
|
+
out.append("\\r")
|
|
56
|
+
continue
|
|
57
|
+
if ch == "\t":
|
|
58
|
+
out.append("\\t")
|
|
59
|
+
continue
|
|
60
|
+
out.append(ch)
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
if ch in ("'", '"'):
|
|
64
|
+
in_string = True
|
|
65
|
+
quote = ch
|
|
66
|
+
out.append(ch)
|
|
67
|
+
continue
|
|
68
|
+
|
|
69
|
+
out.append(ch)
|
|
70
|
+
|
|
71
|
+
return "".join(out)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def loads_dict_like(raw: Any) -> Optional[Dict[str, Any]]:
|
|
75
|
+
"""Parse a JSON-ish or Python-literal dict safely."""
|
|
76
|
+
if raw is None:
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
text = str(raw).strip()
|
|
80
|
+
if not text:
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
value = json.loads(text)
|
|
85
|
+
if isinstance(value, dict):
|
|
86
|
+
return value
|
|
87
|
+
except json.JSONDecodeError:
|
|
88
|
+
fixed = _escape_control_chars_in_strings(text)
|
|
89
|
+
if fixed != text:
|
|
90
|
+
try:
|
|
91
|
+
value = json.loads(fixed)
|
|
92
|
+
if isinstance(value, dict):
|
|
93
|
+
return value
|
|
94
|
+
except Exception:
|
|
95
|
+
pass
|
|
96
|
+
except Exception:
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
candidate = re.sub(r"\btrue\b", "True", text, flags=re.IGNORECASE)
|
|
100
|
+
candidate = re.sub(r"\bfalse\b", "False", candidate, flags=re.IGNORECASE)
|
|
101
|
+
candidate = re.sub(r"\bnull\b", "None", candidate, flags=re.IGNORECASE)
|
|
102
|
+
candidate = _escape_control_chars_in_strings(candidate)
|
|
103
|
+
try:
|
|
104
|
+
value = ast.literal_eval(candidate)
|
|
105
|
+
except Exception:
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
if not isinstance(value, dict):
|
|
109
|
+
return None
|
|
110
|
+
return {str(k): v for k, v in value.items()}
|
|
111
|
+
|
abstractcore/utils/version.py
CHANGED
|
@@ -11,4 +11,4 @@ including when the package is installed from PyPI where pyproject.toml is not av
|
|
|
11
11
|
|
|
12
12
|
# Package version - update this when releasing new versions
|
|
13
13
|
# This must be manually synchronized with the version in pyproject.toml
|
|
14
|
-
__version__ = "2.
|
|
14
|
+
__version__ = "2.9.0"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: abstractcore
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.9.0
|
|
4
4
|
Summary: Unified interface to all LLM providers with essential infrastructure for tool calling, streaming, and model management
|
|
5
5
|
Author-email: Laurent-Philippe Albou <contact@abstractcore.ai>
|
|
6
6
|
Maintainer-email: Laurent-Philippe Albou <contact@abstractcore.ai>
|
|
@@ -57,8 +57,15 @@ Provides-Extra: processing
|
|
|
57
57
|
Provides-Extra: tools
|
|
58
58
|
Requires-Dist: beautifulsoup4<5.0.0,>=4.12.0; extra == "tools"
|
|
59
59
|
Requires-Dist: lxml<6.0.0,>=4.9.0; extra == "tools"
|
|
60
|
-
Requires-Dist:
|
|
60
|
+
Requires-Dist: ddgs<10.0.0,>=9.10.0; python_version >= "3.10" and extra == "tools"
|
|
61
|
+
Requires-Dist: duckduckgo-search<4.0.0,>=3.8.0; python_version < "3.10" and extra == "tools"
|
|
61
62
|
Requires-Dist: psutil<6.0.0,>=5.9.0; extra == "tools"
|
|
63
|
+
Provides-Extra: tool
|
|
64
|
+
Requires-Dist: beautifulsoup4<5.0.0,>=4.12.0; extra == "tool"
|
|
65
|
+
Requires-Dist: lxml<6.0.0,>=4.9.0; extra == "tool"
|
|
66
|
+
Requires-Dist: ddgs<10.0.0,>=9.10.0; python_version >= "3.10" and extra == "tool"
|
|
67
|
+
Requires-Dist: duckduckgo-search<4.0.0,>=3.8.0; python_version < "3.10" and extra == "tool"
|
|
68
|
+
Requires-Dist: psutil<6.0.0,>=5.9.0; extra == "tool"
|
|
62
69
|
Provides-Extra: media
|
|
63
70
|
Requires-Dist: Pillow<12.0.0,>=10.0.0; extra == "media"
|
|
64
71
|
Requires-Dist: pymupdf4llm<1.0.0,>=0.0.20; extra == "media"
|
|
@@ -282,6 +289,8 @@ print(f"Summary: {response.get_summary()}") # "Model: gpt-4o-mini | Toke
|
|
|
282
289
|
|
|
283
290
|
AbstractCore includes a comprehensive set of ready-to-use tools for common tasks:
|
|
284
291
|
|
|
292
|
+
> Note: `abstractcore.tools.common_tools` requires `abstractcore[tools]` (BeautifulSoup, lxml, web search backends, etc.).
|
|
293
|
+
|
|
285
294
|
```python
|
|
286
295
|
from abstractcore.tools.common_tools import fetch_url, search_files, read_file
|
|
287
296
|
|