autobyteus 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autobyteus/agent/bootstrap_steps/agent_bootstrapper.py +1 -1
- autobyteus/agent/bootstrap_steps/agent_runtime_queue_initialization_step.py +1 -1
- autobyteus/agent/bootstrap_steps/base_bootstrap_step.py +1 -1
- autobyteus/agent/bootstrap_steps/system_prompt_processing_step.py +1 -1
- autobyteus/agent/bootstrap_steps/workspace_context_initialization_step.py +1 -1
- autobyteus/agent/context/__init__.py +0 -5
- autobyteus/agent/context/agent_config.py +6 -2
- autobyteus/agent/context/agent_context.py +2 -5
- autobyteus/agent/context/agent_phase_manager.py +105 -5
- autobyteus/agent/context/agent_runtime_state.py +2 -2
- autobyteus/agent/context/phases.py +2 -0
- autobyteus/agent/events/__init__.py +0 -11
- autobyteus/agent/events/agent_events.py +0 -37
- autobyteus/agent/events/notifiers.py +25 -7
- autobyteus/agent/events/worker_event_dispatcher.py +1 -1
- autobyteus/agent/factory/agent_factory.py +6 -2
- autobyteus/agent/group/agent_group.py +16 -7
- autobyteus/agent/handlers/approved_tool_invocation_event_handler.py +28 -14
- autobyteus/agent/handlers/lifecycle_event_logger.py +1 -1
- autobyteus/agent/handlers/llm_complete_response_received_event_handler.py +4 -2
- autobyteus/agent/handlers/tool_invocation_request_event_handler.py +40 -15
- autobyteus/agent/handlers/tool_result_event_handler.py +12 -7
- autobyteus/agent/hooks/__init__.py +7 -0
- autobyteus/agent/hooks/base_phase_hook.py +11 -2
- autobyteus/agent/hooks/hook_definition.py +36 -0
- autobyteus/agent/hooks/hook_meta.py +37 -0
- autobyteus/agent/hooks/hook_registry.py +118 -0
- autobyteus/agent/input_processor/base_user_input_processor.py +6 -3
- autobyteus/agent/input_processor/passthrough_input_processor.py +2 -1
- autobyteus/agent/input_processor/processor_meta.py +1 -1
- autobyteus/agent/input_processor/processor_registry.py +19 -0
- autobyteus/agent/llm_response_processor/base_processor.py +6 -3
- autobyteus/agent/llm_response_processor/processor_meta.py +1 -1
- autobyteus/agent/llm_response_processor/processor_registry.py +19 -0
- autobyteus/agent/llm_response_processor/provider_aware_tool_usage_processor.py +2 -1
- autobyteus/agent/message/context_file_type.py +2 -3
- autobyteus/agent/phases/__init__.py +18 -0
- autobyteus/agent/phases/discover.py +52 -0
- autobyteus/agent/phases/manager.py +265 -0
- autobyteus/agent/phases/phase_enum.py +49 -0
- autobyteus/agent/phases/transition_decorator.py +40 -0
- autobyteus/agent/phases/transition_info.py +33 -0
- autobyteus/agent/remote_agent.py +1 -1
- autobyteus/agent/runtime/agent_runtime.py +5 -10
- autobyteus/agent/runtime/agent_worker.py +62 -19
- autobyteus/agent/streaming/agent_event_stream.py +58 -5
- autobyteus/agent/streaming/stream_event_payloads.py +24 -13
- autobyteus/agent/streaming/stream_events.py +14 -11
- autobyteus/agent/system_prompt_processor/base_processor.py +6 -3
- autobyteus/agent/system_prompt_processor/processor_meta.py +1 -1
- autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +45 -31
- autobyteus/agent/tool_invocation.py +29 -3
- autobyteus/agent/utils/wait_for_idle.py +1 -1
- autobyteus/agent/workspace/__init__.py +2 -0
- autobyteus/agent/workspace/base_workspace.py +33 -11
- autobyteus/agent/workspace/workspace_config.py +160 -0
- autobyteus/agent/workspace/workspace_definition.py +36 -0
- autobyteus/agent/workspace/workspace_meta.py +37 -0
- autobyteus/agent/workspace/workspace_registry.py +72 -0
- autobyteus/cli/__init__.py +4 -3
- autobyteus/cli/agent_cli.py +25 -207
- autobyteus/cli/cli_display.py +205 -0
- autobyteus/events/event_manager.py +2 -1
- autobyteus/events/event_types.py +3 -1
- autobyteus/llm/api/autobyteus_llm.py +2 -12
- autobyteus/llm/api/deepseek_llm.py +11 -173
- autobyteus/llm/api/grok_llm.py +11 -172
- autobyteus/llm/api/kimi_llm.py +24 -0
- autobyteus/llm/api/mistral_llm.py +4 -4
- autobyteus/llm/api/ollama_llm.py +2 -2
- autobyteus/llm/api/openai_compatible_llm.py +193 -0
- autobyteus/llm/api/openai_llm.py +11 -139
- autobyteus/llm/extensions/token_usage_tracking_extension.py +11 -1
- autobyteus/llm/llm_factory.py +168 -42
- autobyteus/llm/models.py +25 -29
- autobyteus/llm/ollama_provider.py +6 -2
- autobyteus/llm/ollama_provider_resolver.py +44 -0
- autobyteus/llm/providers.py +1 -0
- autobyteus/llm/token_counter/kimi_token_counter.py +24 -0
- autobyteus/llm/token_counter/token_counter_factory.py +3 -0
- autobyteus/llm/utils/messages.py +3 -3
- autobyteus/tools/__init__.py +2 -0
- autobyteus/tools/base_tool.py +7 -1
- autobyteus/tools/functional_tool.py +20 -5
- autobyteus/tools/mcp/call_handlers/stdio_handler.py +15 -1
- autobyteus/tools/mcp/config_service.py +106 -127
- autobyteus/tools/mcp/registrar.py +247 -59
- autobyteus/tools/mcp/types.py +5 -3
- autobyteus/tools/registry/tool_definition.py +8 -1
- autobyteus/tools/registry/tool_registry.py +18 -0
- autobyteus/tools/tool_category.py +11 -0
- autobyteus/tools/tool_meta.py +3 -1
- autobyteus/tools/tool_state.py +20 -0
- autobyteus/tools/usage/parsers/_json_extractor.py +99 -0
- autobyteus/tools/usage/parsers/default_json_tool_usage_parser.py +46 -77
- autobyteus/tools/usage/parsers/default_xml_tool_usage_parser.py +87 -96
- autobyteus/tools/usage/parsers/gemini_json_tool_usage_parser.py +37 -47
- autobyteus/tools/usage/parsers/openai_json_tool_usage_parser.py +112 -113
- {autobyteus-1.1.0.dist-info → autobyteus-1.1.2.dist-info}/METADATA +13 -12
- {autobyteus-1.1.0.dist-info → autobyteus-1.1.2.dist-info}/RECORD +103 -82
- {autobyteus-1.1.0.dist-info → autobyteus-1.1.2.dist-info}/WHEEL +0 -0
- {autobyteus-1.1.0.dist-info → autobyteus-1.1.2.dist-info}/licenses/LICENSE +0 -0
- {autobyteus-1.1.0.dist-info → autobyteus-1.1.2.dist-info}/top_level.txt +0 -0
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# file: autobyteus/autobyteus/tools/usage/parsers/default_json_tool_usage_parser.py
|
|
2
2
|
import json
|
|
3
|
-
import re
|
|
4
3
|
import logging
|
|
5
|
-
from typing import
|
|
6
|
-
import uuid
|
|
4
|
+
from typing import Dict, Any, TYPE_CHECKING, List
|
|
7
5
|
|
|
8
6
|
from autobyteus.agent.tool_invocation import ToolInvocation
|
|
9
7
|
from .base_parser import BaseToolUsageParser
|
|
8
|
+
from .exceptions import ToolUsageParseException
|
|
9
|
+
from ._json_extractor import _find_json_blobs
|
|
10
10
|
|
|
11
11
|
if TYPE_CHECKING:
|
|
12
12
|
from autobyteus.llm.utils.response_types import CompleteResponse
|
|
@@ -16,91 +16,60 @@ logger = logging.getLogger(__name__)
|
|
|
16
16
|
class DefaultJsonToolUsageParser(BaseToolUsageParser):
|
|
17
17
|
"""
|
|
18
18
|
A default parser for tool usage commands formatted as custom JSON.
|
|
19
|
-
It
|
|
19
|
+
It robustly extracts potential JSON blobs and expects a 'tool' object
|
|
20
|
+
with 'function' and 'parameters' keys.
|
|
20
21
|
"""
|
|
21
22
|
def get_name(self) -> str:
|
|
22
23
|
return "default_json_tool_usage_parser"
|
|
23
24
|
|
|
24
25
|
def parse(self, response: 'CompleteResponse') -> List[ToolInvocation]:
|
|
25
|
-
response_text =
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
try:
|
|
30
|
-
data = json.loads(response_text)
|
|
31
|
-
except json.JSONDecodeError:
|
|
32
|
-
logger.debug(f"Could not parse extracted text as JSON. Text: {response_text[:200]}")
|
|
33
|
-
return []
|
|
34
|
-
|
|
35
|
-
tool_calls_data = []
|
|
36
|
-
if isinstance(data, list):
|
|
37
|
-
tool_calls_data = data
|
|
38
|
-
elif isinstance(data, dict):
|
|
39
|
-
if "tools" in data and isinstance(data.get("tools"), list):
|
|
40
|
-
tool_calls_data = data["tools"]
|
|
41
|
-
else:
|
|
42
|
-
tool_calls_data = [data]
|
|
43
|
-
else:
|
|
26
|
+
response_text = response.content
|
|
27
|
+
json_blobs = _find_json_blobs(response_text)
|
|
28
|
+
if not json_blobs:
|
|
44
29
|
return []
|
|
45
30
|
|
|
46
31
|
invocations: List[ToolInvocation] = []
|
|
47
|
-
for
|
|
48
|
-
|
|
49
|
-
|
|
32
|
+
for blob in json_blobs:
|
|
33
|
+
try:
|
|
34
|
+
data = json.loads(blob)
|
|
35
|
+
|
|
36
|
+
# This parser specifically looks for the {"tool": {...}} structure.
|
|
37
|
+
if isinstance(data, dict) and "tool" in data:
|
|
38
|
+
tool_block = data.get("tool")
|
|
39
|
+
if not isinstance(tool_block, dict):
|
|
40
|
+
continue
|
|
41
|
+
|
|
42
|
+
tool_name = tool_block.get("function")
|
|
43
|
+
arguments = tool_block.get("parameters")
|
|
50
44
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
45
|
+
if not tool_name or not isinstance(tool_name, str):
|
|
46
|
+
logger.debug(f"Skipping malformed tool block (missing or invalid 'function'): {tool_block}")
|
|
47
|
+
continue
|
|
48
|
+
|
|
49
|
+
if arguments is None:
|
|
50
|
+
arguments = {}
|
|
51
|
+
|
|
52
|
+
if not isinstance(arguments, dict):
|
|
53
|
+
logger.debug(f"Skipping tool block with invalid 'parameters' type ({type(arguments)}): {tool_block}")
|
|
54
|
+
continue
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
# Pass id=None to trigger deterministic ID generation.
|
|
58
|
+
tool_invocation = ToolInvocation(name=tool_name, arguments=arguments, id=None)
|
|
59
|
+
invocations.append(tool_invocation)
|
|
60
|
+
logger.info(f"Successfully parsed default JSON tool invocation for '{tool_name}'.")
|
|
61
|
+
except Exception as e:
|
|
62
|
+
logger.error(f"Unexpected error creating ToolInvocation for tool '{tool_name}': {e}", exc_info=True)
|
|
57
63
|
|
|
58
|
-
|
|
59
|
-
logger.debug(f"
|
|
64
|
+
except json.JSONDecodeError:
|
|
65
|
+
logger.debug(f"Could not parse extracted text as JSON in {self.get_name()}. Blob: {blob[:200]}")
|
|
66
|
+
# This is likely not a tool call, so we can ignore it.
|
|
60
67
|
continue
|
|
61
|
-
|
|
62
|
-
if arguments is None:
|
|
63
|
-
arguments = {}
|
|
64
|
-
|
|
65
|
-
if not isinstance(arguments, dict):
|
|
66
|
-
logger.debug(f"Skipping tool block with invalid 'parameters' type ({type(arguments)}): {tool_block}")
|
|
67
|
-
continue
|
|
68
|
-
|
|
69
|
-
# The custom format does not have a tool ID, so we generate one.
|
|
70
|
-
tool_id = str(uuid.uuid4())
|
|
71
|
-
try:
|
|
72
|
-
tool_invocation = ToolInvocation(name=tool_name, arguments=arguments, id=tool_id)
|
|
73
|
-
invocations.append(tool_invocation)
|
|
74
68
|
except Exception as e:
|
|
75
|
-
|
|
69
|
+
# If we're here, it's likely a valid JSON but with unexpected structure.
|
|
70
|
+
# It's safer to raise this for upstream handling.
|
|
71
|
+
error_msg = f"Unexpected error while parsing JSON blob in {self.get_name()}: {e}. Blob: {blob[:200]}"
|
|
72
|
+
logger.error(error_msg, exc_info=True)
|
|
73
|
+
raise ToolUsageParseException(error_msg, original_exception=e)
|
|
76
74
|
|
|
77
75
|
return invocations
|
|
78
|
-
|
|
79
|
-
def _extract_json_from_response(self, text: str) -> Optional[str]:
|
|
80
|
-
match = re.search(r"```(?:json)?\s*([\s\S]+?)\s*```", text)
|
|
81
|
-
if match:
|
|
82
|
-
return match.group(1).strip()
|
|
83
|
-
|
|
84
|
-
# Try to find a JSON object or array in the text
|
|
85
|
-
first_bracket = text.find('[')
|
|
86
|
-
first_brace = text.find('{')
|
|
87
|
-
|
|
88
|
-
if first_brace == -1 and first_bracket == -1:
|
|
89
|
-
return None
|
|
90
|
-
|
|
91
|
-
start_index = -1
|
|
92
|
-
if first_bracket != -1 and first_brace != -1:
|
|
93
|
-
start_index = min(first_bracket, first_brace)
|
|
94
|
-
elif first_bracket != -1:
|
|
95
|
-
start_index = first_bracket
|
|
96
|
-
else: # first_brace != -1
|
|
97
|
-
start_index = first_brace
|
|
98
|
-
|
|
99
|
-
json_substring = text[start_index:]
|
|
100
|
-
try:
|
|
101
|
-
# Check if the substring is valid JSON
|
|
102
|
-
json.loads(json_substring)
|
|
103
|
-
return json_substring
|
|
104
|
-
except json.JSONDecodeError:
|
|
105
|
-
logger.debug(f"Found potential start of JSON, but substring was not valid: {json_substring[:100]}")
|
|
106
|
-
return None
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
# file: autobyteus/autobyteus/tools/usage/parsers/default_xml_tool_usage_parser.py
|
|
2
1
|
import xml.etree.ElementTree as ET
|
|
3
2
|
import re
|
|
4
3
|
import uuid
|
|
5
|
-
|
|
4
|
+
import html
|
|
5
|
+
from xml.sax.saxutils import escape
|
|
6
6
|
import xml.parsers.expat
|
|
7
7
|
import logging
|
|
8
8
|
from typing import TYPE_CHECKING, Dict, Any, List
|
|
@@ -18,118 +18,109 @@ logger = logging.getLogger(__name__)
|
|
|
18
18
|
|
|
19
19
|
class DefaultXmlToolUsageParser(BaseToolUsageParser):
|
|
20
20
|
"""
|
|
21
|
-
Parses LLM responses for tool usage commands formatted as XML
|
|
22
|
-
|
|
23
|
-
|
|
21
|
+
Parses LLM responses for tool usage commands formatted as XML using a robust,
|
|
22
|
+
stateful, character-by-character scanning approach. This parser can correctly
|
|
23
|
+
identify and extract valid <tool>...</tool> blocks even when they are mixed with
|
|
24
|
+
conversational text, malformed XML, or other noise.
|
|
24
25
|
"""
|
|
25
26
|
def get_name(self) -> str:
|
|
26
27
|
return "default_xml_tool_usage_parser"
|
|
27
28
|
|
|
28
29
|
def parse(self, response: 'CompleteResponse') -> List[ToolInvocation]:
|
|
29
|
-
|
|
30
|
-
logger.debug(f"{self.get_name()} attempting to parse response (first 500 chars): {response_text[:500]}...")
|
|
31
|
-
|
|
30
|
+
text = response.content
|
|
32
31
|
invocations: List[ToolInvocation] = []
|
|
33
|
-
|
|
34
|
-
if not match:
|
|
35
|
-
logger.debug(f"No <tools> or <tool> block found by {self.get_name()}.")
|
|
36
|
-
return invocations
|
|
37
|
-
|
|
38
|
-
xml_content = match.group(0)
|
|
39
|
-
processed_xml = self._preprocess_xml_for_parsing(xml_content)
|
|
40
|
-
|
|
41
|
-
try:
|
|
42
|
-
root = ET.fromstring(processed_xml)
|
|
43
|
-
tool_elements = []
|
|
44
|
-
|
|
45
|
-
if root.tag.lower() == "tools":
|
|
46
|
-
tool_elements = root.findall('tool')
|
|
47
|
-
if not tool_elements:
|
|
48
|
-
logger.debug("Found <tools> but no <tool> children.")
|
|
49
|
-
return invocations
|
|
50
|
-
elif root.tag.lower() == "tool":
|
|
51
|
-
tool_elements = [root]
|
|
52
|
-
else:
|
|
53
|
-
logger.warning(f"Root XML tag is '{root.tag}', not 'tools' or 'tool'. Skipping parsing.")
|
|
54
|
-
return invocations
|
|
55
|
-
|
|
56
|
-
for tool_elem in tool_elements:
|
|
57
|
-
tool_name = tool_elem.attrib.get("name")
|
|
58
|
-
tool_id = tool_elem.attrib.get("id") or str(uuid.uuid4())
|
|
59
|
-
arguments = self._parse_arguments_from_xml(tool_elem)
|
|
60
|
-
|
|
61
|
-
if tool_name:
|
|
62
|
-
tool_invocation = ToolInvocation(name=tool_name, arguments=arguments, id=tool_id)
|
|
63
|
-
invocations.append(tool_invocation)
|
|
64
|
-
else:
|
|
65
|
-
logger.warning(f"Parsed a <tool> element but its 'name' attribute is missing or empty.")
|
|
66
|
-
|
|
67
|
-
except (ET.ParseError, xml.parsers.expat.ExpatError) as e:
|
|
68
|
-
error_msg = f"XML parsing error in '{self.get_name()}': {e}. Content: '{processed_xml[:200]}'"
|
|
69
|
-
logger.debug(error_msg)
|
|
70
|
-
# Raise a specific exception to be caught upstream.
|
|
71
|
-
raise ToolUsageParseException(error_msg, original_exception=e)
|
|
32
|
+
cursor = 0
|
|
72
33
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
34
|
+
while cursor < len(text):
|
|
35
|
+
# Find the start of the next potential tool tag from the current cursor position
|
|
36
|
+
tool_start_index = text.find('<tool', cursor)
|
|
37
|
+
if tool_start_index == -1:
|
|
38
|
+
break # No more tool tags in the rest of the string
|
|
39
|
+
|
|
40
|
+
# Find the end of that opening <tool ...> tag. This is a potential end.
|
|
41
|
+
tool_start_tag_end = text.find('>', tool_start_index)
|
|
42
|
+
if tool_start_tag_end == -1:
|
|
43
|
+
# Incomplete tag at the end of the file, break
|
|
44
|
+
break
|
|
45
|
+
|
|
46
|
+
# Check if another '<' appears before the '>', which would indicate a malformed/aborted tag.
|
|
47
|
+
# Example: <tool name="abc" ... <tool name="xyz">
|
|
48
|
+
next_opening_bracket = text.find('<', tool_start_index + 1)
|
|
49
|
+
if next_opening_bracket != -1 and next_opening_bracket < tool_start_tag_end:
|
|
50
|
+
# The tag was not closed properly before another one started.
|
|
51
|
+
# Advance the cursor to this new tag and restart the loop.
|
|
52
|
+
cursor = next_opening_bracket
|
|
53
|
+
continue
|
|
54
|
+
|
|
55
|
+
# Find the corresponding </tool> closing tag
|
|
56
|
+
tool_end_index = text.find('</tool>', tool_start_tag_end)
|
|
57
|
+
if tool_end_index == -1:
|
|
58
|
+
# Found a start tag but no end tag, treat as fragment and advance
|
|
59
|
+
cursor = tool_start_tag_end + 1
|
|
60
|
+
continue
|
|
61
|
+
|
|
62
|
+
# Extract the full content of this potential tool block
|
|
63
|
+
block_end_pos = tool_end_index + len('</tool>')
|
|
64
|
+
tool_block = text[tool_start_index:block_end_pos]
|
|
65
|
+
|
|
66
|
+
# CRITICAL NESTING CHECK:
|
|
67
|
+
# Check if there is another '<tool' start tag within this block.
|
|
68
|
+
# If so, it means this is a malformed, nested block. We must skip it
|
|
69
|
+
# and let the loop find the inner tag on the next iteration.
|
|
70
|
+
# This check is now more of a safeguard, as the logic above should handle most cases.
|
|
71
|
+
if '<tool' in tool_block[1:]:
|
|
72
|
+
# Advance cursor past the opening tag of this malformed block to continue scanning
|
|
73
|
+
cursor = tool_start_tag_end + 1
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
# This is a valid, non-nested block. Attempt to parse it.
|
|
77
|
+
try:
|
|
78
|
+
# Preprocessing and parsing
|
|
79
|
+
processed_block = self._preprocess_xml_for_parsing(tool_block)
|
|
80
|
+
root = ET.fromstring(processed_block)
|
|
81
|
+
|
|
82
|
+
tool_name = root.attrib.get("name")
|
|
83
|
+
if not tool_name:
|
|
84
|
+
logger.warning(f"Skipping a <tool> block with no name attribute: {processed_block[:100]}")
|
|
85
|
+
else:
|
|
86
|
+
arguments = self._parse_arguments_from_xml(root)
|
|
87
|
+
tool_id_attr = root.attrib.get('id')
|
|
88
|
+
|
|
89
|
+
invocation = ToolInvocation(
|
|
90
|
+
name=tool_name,
|
|
91
|
+
arguments=arguments,
|
|
92
|
+
id=tool_id_attr
|
|
93
|
+
)
|
|
94
|
+
invocations.append(invocation)
|
|
95
|
+
logger.info(f"Successfully parsed XML tool invocation for '{tool_name}'.")
|
|
96
|
+
|
|
97
|
+
except (ET.ParseError, xml.parsers.expat.ExpatError) as e:
|
|
98
|
+
# The self-contained block was still malformed. Log and ignore it.
|
|
99
|
+
logger.warning(f"Skipping malformed XML tool block: {e}")
|
|
100
|
+
|
|
101
|
+
# CRITICAL: Advance cursor past the entire block we just processed
|
|
102
|
+
cursor = block_end_pos
|
|
103
|
+
|
|
78
104
|
return invocations
|
|
79
105
|
|
|
80
106
|
def _preprocess_xml_for_parsing(self, xml_content: str) -> str:
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
processed_content = re.sub(
|
|
85
|
-
r'(<arg\s+name\s*=\s*")([^"]+?)>',
|
|
86
|
-
r'\1\2">',
|
|
87
|
-
xml_content,
|
|
88
|
-
flags=re.IGNORECASE
|
|
89
|
-
)
|
|
90
|
-
if processed_content != xml_content:
|
|
91
|
-
logger.debug("Preprocessor fixed a missing quote in an <arg> tag.")
|
|
92
|
-
|
|
93
|
-
cdata_sections: Dict[str, str] = {}
|
|
94
|
-
def cdata_replacer(match_obj: re.Match) -> str:
|
|
95
|
-
placeholder = f"__CDATA_PLACEHOLDER_{len(cdata_sections)}__"
|
|
96
|
-
cdata_sections[placeholder] = match_obj.group(0)
|
|
97
|
-
return placeholder
|
|
98
|
-
|
|
99
|
-
xml_no_cdata = re.sub(r'<!\[CDATA\[.*?\]\]>', cdata_replacer, processed_content, flags=re.DOTALL)
|
|
100
|
-
|
|
101
|
-
def escape_arg_value(match_obj: re.Match) -> str:
|
|
102
|
-
open_tag = match_obj.group(1)
|
|
103
|
-
content = match_obj.group(2)
|
|
104
|
-
close_tag = match_obj.group(3)
|
|
105
|
-
if re.search(r'<\s*/?[a-zA-Z]', content.strip()):
|
|
106
|
-
return f"{open_tag}{content}{close_tag}"
|
|
107
|
-
escaped_content = escape(content) if not content.startswith("__CDATA_PLACEHOLDER_") else content
|
|
108
|
-
return f"{open_tag}{escaped_content}{close_tag}"
|
|
109
|
-
|
|
110
|
-
processed_content = re.sub(
|
|
111
|
-
r'(<arg\s+name\s*=\s*"[^"]*"\s*>\s*)(.*?)(\s*</arg\s*>)',
|
|
112
|
-
escape_arg_value,
|
|
113
|
-
xml_no_cdata,
|
|
114
|
-
flags=re.DOTALL | re.IGNORECASE
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
for placeholder, original_cdata_tag in cdata_sections.items():
|
|
118
|
-
processed_content = processed_content.replace(placeholder, original_cdata_tag)
|
|
119
|
-
|
|
120
|
-
return processed_content
|
|
107
|
+
# This function remains the same as it's not part of the core logic error.
|
|
108
|
+
# It's a helper for cleaning up minor syntax issues before parsing.
|
|
109
|
+
return xml_content
|
|
121
110
|
|
|
122
111
|
def _parse_arguments_from_xml(self, command_element: ET.Element) -> Dict[str, Any]:
|
|
112
|
+
"""Helper to extract arguments from a parsed <tool> element."""
|
|
123
113
|
arguments: Dict[str, Any] = {}
|
|
124
114
|
arguments_container = command_element.find('arguments')
|
|
125
115
|
if arguments_container is None:
|
|
126
|
-
logger.debug(f"No <arguments> tag found in <tool name='{command_element.attrib.get('name')}'>. No arguments will be parsed.")
|
|
127
116
|
return arguments
|
|
128
117
|
|
|
129
118
|
for arg_element in arguments_container.findall('arg'):
|
|
130
119
|
arg_name = arg_element.attrib.get('name')
|
|
131
120
|
if arg_name:
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
121
|
+
# Use .text to get only the direct text content of the tag.
|
|
122
|
+
# This is safer than itertext() if the LLM hallucinates nested tags.
|
|
123
|
+
# The XML parser already handles unescaping of standard entities.
|
|
124
|
+
raw_text = arg_element.text or ""
|
|
125
|
+
arguments[arg_name] = raw_text
|
|
135
126
|
return arguments
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
# file: autobyteus/autobyteus/tools/usage/parsers/gemini_json_tool_usage_parser.py
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
-
import
|
|
5
|
-
from typing import TYPE_CHECKING, List, Optional
|
|
4
|
+
from typing import TYPE_CHECKING, List
|
|
6
5
|
|
|
7
6
|
from autobyteus.agent.tool_invocation import ToolInvocation
|
|
8
7
|
from .base_parser import BaseToolUsageParser
|
|
8
|
+
from .exceptions import ToolUsageParseException
|
|
9
|
+
from ._json_extractor import _find_json_blobs
|
|
9
10
|
|
|
10
11
|
if TYPE_CHECKING:
|
|
11
12
|
from autobyteus.llm.utils.response_types import CompleteResponse
|
|
@@ -15,54 +16,43 @@ logger = logging.getLogger(__name__)
|
|
|
15
16
|
class GeminiJsonToolUsageParser(BaseToolUsageParser):
|
|
16
17
|
"""
|
|
17
18
|
Parses LLM responses for tool usage commands formatted in the Google Gemini style.
|
|
18
|
-
It expects a JSON object with "name" and "args" keys.
|
|
19
|
+
It expects a JSON object with "name" and "args" keys. It robustly extracts
|
|
20
|
+
all potential JSON objects from the response.
|
|
19
21
|
"""
|
|
20
22
|
def get_name(self) -> str:
|
|
21
23
|
return "gemini_json_tool_usage_parser"
|
|
22
24
|
|
|
23
25
|
def parse(self, response: 'CompleteResponse') -> List[ToolInvocation]:
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
if not
|
|
27
|
-
return invocations
|
|
28
|
-
|
|
29
|
-
try:
|
|
30
|
-
parsed_json = json.loads(response_text)
|
|
31
|
-
|
|
32
|
-
if isinstance(parsed_json, list):
|
|
33
|
-
tool_calls = parsed_json
|
|
34
|
-
elif isinstance(parsed_json, dict) and 'tool_calls' in parsed_json:
|
|
35
|
-
tool_calls = parsed_json['tool_calls']
|
|
36
|
-
else:
|
|
37
|
-
tool_calls = [parsed_json]
|
|
38
|
-
|
|
39
|
-
for tool_data in tool_calls:
|
|
40
|
-
tool_name = tool_data.get("name")
|
|
41
|
-
arguments = tool_data.get("args")
|
|
42
|
-
|
|
43
|
-
if tool_name and isinstance(tool_name, str) and isinstance(arguments, dict):
|
|
44
|
-
tool_invocation = ToolInvocation(name=tool_name, arguments=arguments, id=str(uuid.uuid4()))
|
|
45
|
-
invocations.append(tool_invocation)
|
|
46
|
-
else:
|
|
47
|
-
logger.debug(f"Skipping malformed Gemini tool call data: {tool_data}")
|
|
48
|
-
|
|
49
|
-
return invocations
|
|
50
|
-
except json.JSONDecodeError:
|
|
51
|
-
logger.debug(f"Failed to decode JSON for Gemini tool call: {response_text}")
|
|
52
|
-
return []
|
|
53
|
-
except Exception as e:
|
|
54
|
-
logger.error(f"Error processing Gemini tool usage in {self.get_name()}: {e}", exc_info=True)
|
|
26
|
+
response_text = response.content
|
|
27
|
+
json_blobs = _find_json_blobs(response_text)
|
|
28
|
+
if not json_blobs:
|
|
55
29
|
return []
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
30
|
+
|
|
31
|
+
invocations: List[ToolInvocation] = []
|
|
32
|
+
for blob in json_blobs:
|
|
33
|
+
try:
|
|
34
|
+
data = json.loads(blob)
|
|
35
|
+
|
|
36
|
+
# This parser specifically looks for the {"name": ..., "args": ...} structure.
|
|
37
|
+
if isinstance(data, dict) and "name" in data and "args" in data:
|
|
38
|
+
tool_name = data.get("name")
|
|
39
|
+
arguments = data.get("args")
|
|
40
|
+
|
|
41
|
+
if tool_name and isinstance(tool_name, str) and isinstance(arguments, dict):
|
|
42
|
+
# Pass id=None to trigger deterministic ID generation in ToolInvocation
|
|
43
|
+
tool_invocation = ToolInvocation(name=tool_name, arguments=arguments)
|
|
44
|
+
invocations.append(tool_invocation)
|
|
45
|
+
logger.info(f"Successfully parsed Gemini JSON tool invocation for '{tool_name}'.")
|
|
46
|
+
else:
|
|
47
|
+
logger.debug(f"Skipping malformed Gemini tool call data: {data}")
|
|
48
|
+
|
|
49
|
+
except json.JSONDecodeError:
|
|
50
|
+
logger.debug(f"Could not parse extracted text as JSON in {self.get_name()}. Blob: {blob[:200]}")
|
|
51
|
+
# Not a tool call, ignore.
|
|
52
|
+
continue
|
|
53
|
+
except Exception as e:
|
|
54
|
+
error_msg = f"Unexpected error while parsing JSON blob in {self.get_name()}: {e}. Blob: {blob[:200]}"
|
|
55
|
+
logger.error(error_msg, exc_info=True)
|
|
56
|
+
raise ToolUsageParseException(error_msg, original_exception=e)
|
|
57
|
+
|
|
58
|
+
return invocations
|