quantalogic 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quantalogic/__init__.py +20 -0
- quantalogic/agent.py +638 -0
- quantalogic/agent_config.py +138 -0
- quantalogic/coding_agent.py +83 -0
- quantalogic/event_emitter.py +223 -0
- quantalogic/generative_model.py +226 -0
- quantalogic/interactive_text_editor.py +190 -0
- quantalogic/main.py +185 -0
- quantalogic/memory.py +217 -0
- quantalogic/model_names.py +19 -0
- quantalogic/print_event.py +66 -0
- quantalogic/prompts.py +99 -0
- quantalogic/server/__init__.py +3 -0
- quantalogic/server/agent_server.py +633 -0
- quantalogic/server/models.py +60 -0
- quantalogic/server/routes.py +117 -0
- quantalogic/server/state.py +199 -0
- quantalogic/server/static/js/event_visualizer.js +430 -0
- quantalogic/server/static/js/quantalogic.js +571 -0
- quantalogic/server/templates/index.html +134 -0
- quantalogic/tool_manager.py +68 -0
- quantalogic/tools/__init__.py +46 -0
- quantalogic/tools/agent_tool.py +88 -0
- quantalogic/tools/download_http_file_tool.py +64 -0
- quantalogic/tools/edit_whole_content_tool.py +70 -0
- quantalogic/tools/elixir_tool.py +240 -0
- quantalogic/tools/execute_bash_command_tool.py +116 -0
- quantalogic/tools/input_question_tool.py +57 -0
- quantalogic/tools/language_handlers/__init__.py +21 -0
- quantalogic/tools/language_handlers/c_handler.py +33 -0
- quantalogic/tools/language_handlers/cpp_handler.py +33 -0
- quantalogic/tools/language_handlers/go_handler.py +33 -0
- quantalogic/tools/language_handlers/java_handler.py +37 -0
- quantalogic/tools/language_handlers/javascript_handler.py +42 -0
- quantalogic/tools/language_handlers/python_handler.py +29 -0
- quantalogic/tools/language_handlers/rust_handler.py +33 -0
- quantalogic/tools/language_handlers/scala_handler.py +33 -0
- quantalogic/tools/language_handlers/typescript_handler.py +42 -0
- quantalogic/tools/list_directory_tool.py +123 -0
- quantalogic/tools/llm_tool.py +119 -0
- quantalogic/tools/markitdown_tool.py +105 -0
- quantalogic/tools/nodejs_tool.py +515 -0
- quantalogic/tools/python_tool.py +469 -0
- quantalogic/tools/read_file_block_tool.py +140 -0
- quantalogic/tools/read_file_tool.py +79 -0
- quantalogic/tools/replace_in_file_tool.py +300 -0
- quantalogic/tools/ripgrep_tool.py +353 -0
- quantalogic/tools/search_definition_names.py +419 -0
- quantalogic/tools/task_complete_tool.py +35 -0
- quantalogic/tools/tool.py +146 -0
- quantalogic/tools/unified_diff_tool.py +387 -0
- quantalogic/tools/write_file_tool.py +97 -0
- quantalogic/utils/__init__.py +17 -0
- quantalogic/utils/ask_user_validation.py +12 -0
- quantalogic/utils/download_http_file.py +77 -0
- quantalogic/utils/get_coding_environment.py +15 -0
- quantalogic/utils/get_environment.py +26 -0
- quantalogic/utils/get_quantalogic_rules_content.py +19 -0
- quantalogic/utils/git_ls.py +121 -0
- quantalogic/utils/read_file.py +54 -0
- quantalogic/utils/read_http_text_content.py +101 -0
- quantalogic/xml_parser.py +242 -0
- quantalogic/xml_tool_parser.py +99 -0
- quantalogic-0.2.0.dist-info/LICENSE +201 -0
- quantalogic-0.2.0.dist-info/METADATA +1034 -0
- quantalogic-0.2.0.dist-info/RECORD +68 -0
- quantalogic-0.2.0.dist-info/WHEEL +4 -0
- quantalogic-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,121 @@
|
|
1
|
+
import os
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Dict, List
|
4
|
+
|
5
|
+
from pathspec import PathSpec
|
6
|
+
from pathspec.patterns import GitWildMatchPattern
|
7
|
+
|
8
|
+
|
9
|
+
def git_ls(
|
10
|
+
directory_path: str, recursive: bool = False, max_depth: int = 10, start_line: int = 1, end_line: int = 500
|
11
|
+
) -> str:
|
12
|
+
"""List files respecting .gitignore rules with formatted output.
|
13
|
+
|
14
|
+
Args:
|
15
|
+
directory_path: Path to directory to list
|
16
|
+
recursive: Whether to list recursively ("true"/"false")
|
17
|
+
max_depth: Maximum recursion depth
|
18
|
+
start_line: Start line for pagination
|
19
|
+
end_line: End line for pagination
|
20
|
+
|
21
|
+
Returns:
|
22
|
+
Formatted tree structure with file info
|
23
|
+
"""
|
24
|
+
# Convert inputs
|
25
|
+
recursive = recursive if isinstance(recursive, bool) else recursive.lower() == "true"
|
26
|
+
max_depth = int(max_depth)
|
27
|
+
start_line = int(start_line)
|
28
|
+
end_line = int(end_line)
|
29
|
+
|
30
|
+
# Expand paths and get absolute path
|
31
|
+
path = Path(os.path.expanduser(directory_path)).absolute()
|
32
|
+
|
33
|
+
# Load .gitignore patterns
|
34
|
+
ignore_spec = load_gitignore_spec(path)
|
35
|
+
|
36
|
+
# Generate file tree
|
37
|
+
tree = generate_file_tree(path, ignore_spec, recursive=recursive, max_depth=max_depth)
|
38
|
+
|
39
|
+
# Format and paginate output
|
40
|
+
return format_tree(tree, start_line, end_line)
|
41
|
+
|
42
|
+
|
43
|
+
def load_gitignore_spec(path: Path) -> PathSpec:
|
44
|
+
"""Load .gitignore patterns from directory and all parent directories."""
|
45
|
+
ignore_patterns = []
|
46
|
+
current = path
|
47
|
+
|
48
|
+
# Traverse up the directory tree
|
49
|
+
while current != current.parent: # Stop at root
|
50
|
+
gitignore_path = current / ".gitignore"
|
51
|
+
if gitignore_path.exists():
|
52
|
+
with open(gitignore_path) as f:
|
53
|
+
# Prepend parent patterns to maintain precedence
|
54
|
+
ignore_patterns = f.readlines() + ignore_patterns
|
55
|
+
current = current.parent
|
56
|
+
|
57
|
+
return PathSpec.from_lines(GitWildMatchPattern, ignore_patterns)
|
58
|
+
|
59
|
+
|
60
|
+
def generate_file_tree(
|
61
|
+
path: Path, ignore_spec: PathSpec, recursive: bool = False, max_depth: int = 1, current_depth: int = 0
|
62
|
+
) -> Dict:
|
63
|
+
"""Generate file tree structure."""
|
64
|
+
if current_depth > max_depth:
|
65
|
+
return {}
|
66
|
+
|
67
|
+
if ignore_spec.match_file(path) or path.name == ".git":
|
68
|
+
return {}
|
69
|
+
|
70
|
+
if path.is_file():
|
71
|
+
return {"name": path.name, "type": "file", "size": f"{path.stat().st_size} bytes"}
|
72
|
+
|
73
|
+
tree = {"name": path.name, "type": "directory", "children": []}
|
74
|
+
|
75
|
+
# Always list direct children, but only recursively list if recursive is True
|
76
|
+
children = sorted(path.iterdir(), key=lambda x: x.name.lower())
|
77
|
+
for child in children:
|
78
|
+
if not ignore_spec.match_file(child):
|
79
|
+
if child.is_file():
|
80
|
+
child_tree = generate_file_tree(child, ignore_spec, recursive, max_depth, current_depth)
|
81
|
+
tree["children"].append(child_tree)
|
82
|
+
elif child.is_dir():
|
83
|
+
# Always include directories
|
84
|
+
child_tree = generate_file_tree(child, ignore_spec, recursive, max_depth, current_depth + 1)
|
85
|
+
if recursive:
|
86
|
+
if child_tree:
|
87
|
+
tree["children"].append(child_tree)
|
88
|
+
else:
|
89
|
+
tree["children"].append({"name": child.name, "type": "directory", "children": []})
|
90
|
+
|
91
|
+
return tree
|
92
|
+
|
93
|
+
|
94
|
+
def format_tree(tree: Dict, start: int, end: int) -> str:
|
95
|
+
"""Format tree structure into string output with line information."""
|
96
|
+
lines = []
|
97
|
+
_format_tree_recursive(tree, lines, 0)
|
98
|
+
total_lines = len(lines)
|
99
|
+
is_last_block = end >= total_lines
|
100
|
+
output = "\n".join(lines[start - 1 : end])
|
101
|
+
|
102
|
+
header = f"==== Lines: {start}-{end} of {total_lines} ===="
|
103
|
+
if is_last_block:
|
104
|
+
header = f"==== Lines: {start}-{total_lines} of {total_lines} ===="
|
105
|
+
header += f" [LAST BLOCK] (total_lines: {total_lines})"
|
106
|
+
return f"{header}\n{output}\n==== End of Block ===="
|
107
|
+
|
108
|
+
|
109
|
+
def _format_tree_recursive(node: Dict, lines: List[str], depth: int):
|
110
|
+
"""Recursively format tree nodes."""
|
111
|
+
indent = " " * depth
|
112
|
+
if node["type"] == "file":
|
113
|
+
lines.append(f"{indent}📄 {node['name']} ({node['size']})")
|
114
|
+
else:
|
115
|
+
lines.append(f"{indent}📁 {node['name']}/")
|
116
|
+
for child in node["children"]:
|
117
|
+
_format_tree_recursive(child, lines, depth + 1)
|
118
|
+
|
119
|
+
|
120
|
+
if __name__ == "__main__":
|
121
|
+
print(git_ls("./", recursive=True, max_depth=30, start_line=1, end_line=500))
|
@@ -0,0 +1,54 @@
|
|
1
|
+
"""Reads the content of a file and returns it as a string."""
|
2
|
+
|
3
|
+
import os
|
4
|
+
|
5
|
+
|
6
|
+
def read_file(file_path: str, max_size: int = 10 * 1024 * 1024) -> str:
|
7
|
+
"""Reads the content of a file and returns it as a string.
|
8
|
+
|
9
|
+
This function performs the following steps:
|
10
|
+
1. Expands the tilde (~) in the file path to the user's home directory.
|
11
|
+
2. Converts a relative path to an absolute path.
|
12
|
+
3. Checks the file size before reading to ensure it is not too large.
|
13
|
+
4. Reads the file content and returns it as a string.
|
14
|
+
5. Handles common file operation errors such as FileNotFoundError, PermissionError, and OSError.
|
15
|
+
|
16
|
+
Parameters:
|
17
|
+
file_path (str): The path to the file to be read.
|
18
|
+
|
19
|
+
Returns:
|
20
|
+
str: The content of the file as a string.
|
21
|
+
|
22
|
+
Raises:
|
23
|
+
FileNotFoundError: If the file does not exist.
|
24
|
+
PermissionError: If the file cannot be read due to permission issues.
|
25
|
+
OSError: If the file size is too large or other OS-related errors occur.
|
26
|
+
"""
|
27
|
+
try:
|
28
|
+
# Expand tilde to user's home directory
|
29
|
+
expanded_path = os.path.expanduser(file_path)
|
30
|
+
|
31
|
+
# Convert relative path to absolute path
|
32
|
+
absolute_path = os.path.abspath(expanded_path)
|
33
|
+
|
34
|
+
# Check if the file exists
|
35
|
+
if not os.path.exists(absolute_path):
|
36
|
+
raise FileNotFoundError(f"The file '{absolute_path}' does not exist.")
|
37
|
+
|
38
|
+
# Check file size before reading
|
39
|
+
file_size = os.path.getsize(absolute_path)
|
40
|
+
if file_size > max_size:
|
41
|
+
raise OSError(f"File size ({file_size} bytes) exceeds the maximum allowed size ({max_size} bytes).")
|
42
|
+
|
43
|
+
# Read the file content
|
44
|
+
with open(absolute_path, encoding="utf-8") as file:
|
45
|
+
content = file.read()
|
46
|
+
|
47
|
+
return content
|
48
|
+
|
49
|
+
except FileNotFoundError:
|
50
|
+
raise FileNotFoundError(f"The file '{absolute_path}' does not exist.")
|
51
|
+
except PermissionError:
|
52
|
+
raise PermissionError(f"Permission denied: Unable to read the file '{absolute_path}'.")
|
53
|
+
except OSError as e:
|
54
|
+
raise OSError(f"An error occurred while reading the file: {e}")
|
@@ -0,0 +1,101 @@
|
|
1
|
+
"""Utility function to read text content from a given URL and return it as a string."""
|
2
|
+
|
3
|
+
import logging
|
4
|
+
from time import sleep
|
5
|
+
|
6
|
+
import requests
|
7
|
+
from requests.exceptions import ConnectionError, HTTPError, RequestException
|
8
|
+
|
9
|
+
# Configure logging
|
10
|
+
logging.basicConfig(level=logging.ERROR, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
11
|
+
logger = logging.getLogger(__name__)
|
12
|
+
|
13
|
+
|
14
|
+
def read_http_text_content(
|
15
|
+
url: str, timeout: int = 10, retries: int = 3, delay: int = 2
|
16
|
+
) -> tuple[str | None, str | None]:
|
17
|
+
"""Fetches the content from the given URL and returns it as a string.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
url (str): The URL from which to fetch the content.
|
21
|
+
timeout (int): Timeout in seconds for the HTTP request. Default is 10.
|
22
|
+
retries (int): Number of retries in case of failure. Default is 3.
|
23
|
+
delay (int): Delay in seconds between retries. Default is 2.
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
tuple[str | None, str | None]: A tuple containing the content as a string and an error message.
|
27
|
+
If successful, the error message is None.
|
28
|
+
If failed, the content is None and the error message is provided.
|
29
|
+
|
30
|
+
Examples:
|
31
|
+
>>> content, error = read_http_text_content("https://example.com/data.txt")
|
32
|
+
>>> if error:
|
33
|
+
... print(f"Error: {error}")
|
34
|
+
... else:
|
35
|
+
... print(content)
|
36
|
+
|
37
|
+
>>> content, error = read_http_file("https://example.com/binary.data")
|
38
|
+
>>> if error:
|
39
|
+
... print(f"Error: {error}") # Output: Error: Expected text-based content, but received binary content with Content-Type: application/octet-stream
|
40
|
+
"""
|
41
|
+
headers = {
|
42
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
43
|
+
"Accept": "text/plain, application/json, application/xml, text/csv, text/html, application/javascript, application/x-yaml, application/x-www-form-urlencoded, application/octet-stream",
|
44
|
+
}
|
45
|
+
|
46
|
+
for attempt in range(retries):
|
47
|
+
try:
|
48
|
+
logger.info(f"Attempt {attempt + 1} of {retries} to fetch {url}")
|
49
|
+
response = requests.get(url, headers=headers, timeout=timeout)
|
50
|
+
response.raise_for_status() # Raise an HTTPError for bad responses (4xx and 5xx)
|
51
|
+
|
52
|
+
# Check if the content type is text-based
|
53
|
+
content_type = response.headers.get("Content-Type", "").lower()
|
54
|
+
text_based_types = [
|
55
|
+
"text/",
|
56
|
+
"application/json",
|
57
|
+
"application/xml",
|
58
|
+
"text/csv",
|
59
|
+
"text/html",
|
60
|
+
"application/javascript",
|
61
|
+
"application/x-yaml",
|
62
|
+
"application/x-www-form-urlencoded",
|
63
|
+
]
|
64
|
+
if not any(content_type.startswith(t) for t in text_based_types):
|
65
|
+
error_msg = (
|
66
|
+
f"Expected text-based content, but received binary content with Content-Type: {content_type}"
|
67
|
+
)
|
68
|
+
logger.error(error_msg)
|
69
|
+
return None, error_msg
|
70
|
+
|
71
|
+
return response.text, None
|
72
|
+
except HTTPError as http_err:
|
73
|
+
status_code = http_err.response.status_code if http_err.response else "unknown"
|
74
|
+
error_msg = f"HTTP error occurred (status code: {status_code}): {http_err}"
|
75
|
+
logger.error(error_msg)
|
76
|
+
if status_code in [404, 403, 401]: # Don't retry for these status codes
|
77
|
+
break
|
78
|
+
except ConnectionError as conn_err:
|
79
|
+
error_msg = f"Connection error occurred (URL: {url}): {conn_err}"
|
80
|
+
logger.error(error_msg)
|
81
|
+
except requests.Timeout as timeout_err:
|
82
|
+
error_msg = f"Request timed out after {timeout} seconds: {timeout_err}"
|
83
|
+
logger.error(error_msg)
|
84
|
+
except RequestException as req_err:
|
85
|
+
error_msg = f"An unexpected error occurred (URL: {url}): {req_err}"
|
86
|
+
logger.error(error_msg)
|
87
|
+
|
88
|
+
if attempt < retries - 1:
|
89
|
+
sleep_duration = delay * (2**attempt) # Exponential backoff
|
90
|
+
logger.info(f"Retrying in {sleep_duration} seconds...")
|
91
|
+
sleep(sleep_duration)
|
92
|
+
|
93
|
+
return None, error_msg
|
94
|
+
|
95
|
+
|
96
|
+
if __name__ == "__main__":
|
97
|
+
content, error = read_http_text_content("https://www.quantalogic.app")
|
98
|
+
if error:
|
99
|
+
print(f"Error: {error}")
|
100
|
+
else:
|
101
|
+
print(content)
|
@@ -0,0 +1,242 @@
|
|
1
|
+
"""XML parsing utilities for extracting and processing XML-like elements.
|
2
|
+
|
3
|
+
This module provides tools for parsing and extracting XML-like elements from text,
|
4
|
+
with support for handling malformed XML and CDATA sections.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import html
|
8
|
+
import re
|
9
|
+
from collections import defaultdict
|
10
|
+
from typing import Self
|
11
|
+
|
12
|
+
from loguru import logger
|
13
|
+
from pydantic import BaseModel, Field, model_validator
|
14
|
+
|
15
|
+
|
16
|
+
class XMLElement(BaseModel):
|
17
|
+
"""Represents a parsed XML element with its structural and content details.
|
18
|
+
|
19
|
+
This model captures the essential information of an XML element,
|
20
|
+
including its name, content, raw representation, and positional
|
21
|
+
information within the original XML document.
|
22
|
+
|
23
|
+
Attributes:
|
24
|
+
name: The name of the XML element (tag name).
|
25
|
+
content: The textual content of the XML element.
|
26
|
+
raw: The complete raw string representation of the XML element.
|
27
|
+
start_pos: Starting character position in the original document.
|
28
|
+
end_pos: Ending character position in the original document.
|
29
|
+
cdata_sections: List of CDATA sections within the element.
|
30
|
+
"""
|
31
|
+
|
32
|
+
name: str = Field(..., description="The name of the XML element (tag name)")
|
33
|
+
content: str = Field(..., description="The textual content of the XML element")
|
34
|
+
raw: str = Field(..., description="The complete raw string representation")
|
35
|
+
start_pos: int = Field(..., description="Starting character position", ge=0)
|
36
|
+
end_pos: int = Field(..., description="Ending character position", gt=0)
|
37
|
+
cdata_sections: list[str] = Field(default_factory=list, description="List of CDATA sections within the element")
|
38
|
+
|
39
|
+
@model_validator(mode="after")
|
40
|
+
def validate_positions(self) -> Self:
|
41
|
+
"""Validate that end_pos is greater than start_pos."""
|
42
|
+
if self.end_pos <= self.start_pos:
|
43
|
+
raise ValueError("end_pos must be greater than start_pos")
|
44
|
+
return self
|
45
|
+
|
46
|
+
|
47
|
+
class ToleranceXMLParser:
|
48
|
+
"""A flexible XML-like parser for malformed and non-standard XML elements.
|
49
|
+
|
50
|
+
This parser extracts XML-like elements from text, supporting various
|
51
|
+
edge cases such as incomplete tags and CDATA sections.
|
52
|
+
"""
|
53
|
+
|
54
|
+
def __init__(self: Self) -> None:
|
55
|
+
"""Initialize the parser with regex patterns for matching XML-like elements."""
|
56
|
+
# Pattern for matching individual XML elements, including malformed tags
|
57
|
+
# Modified to be more lenient with content and preserve exact formatting
|
58
|
+
self.element_pattern = re.compile(r"<([^/>]+?)>(.*?)(?:</\1>|<\1>)", re.DOTALL)
|
59
|
+
# Pattern for matching CDATA sections
|
60
|
+
self.cdata_pattern = re.compile(r"<!\[CDATA\[(.*?)]]>", re.DOTALL)
|
61
|
+
logger.debug("Initialized ToleranceXMLParser with regex patterns")
|
62
|
+
|
63
|
+
def _extract_and_remove_cdata(self: Self, content: str, preserve_cdata: bool = False) -> tuple[str, list[str]]:
|
64
|
+
"""Extract CDATA sections from content.
|
65
|
+
|
66
|
+
Args:
|
67
|
+
content: Input text to extract CDATA sections from.
|
68
|
+
preserve_cdata: If True, preserve CDATA content in place.
|
69
|
+
If False, remove CDATA sections and return them separately.
|
70
|
+
|
71
|
+
Returns:
|
72
|
+
A tuple containing:
|
73
|
+
- The original content with CDATA sections handled
|
74
|
+
- List of extracted CDATA contents
|
75
|
+
"""
|
76
|
+
cdata_sections: list[str] = []
|
77
|
+
|
78
|
+
def replace_cdata(match: re.Match[str]) -> str:
|
79
|
+
cdata_content = match.group(1)
|
80
|
+
cdata_sections.append(cdata_content)
|
81
|
+
return cdata_content if preserve_cdata else match.group(0)
|
82
|
+
|
83
|
+
# Extract CDATA sections but keep the original content intact
|
84
|
+
cleaned_content = self.cdata_pattern.sub(replace_cdata, content)
|
85
|
+
return cleaned_content, cdata_sections
|
86
|
+
|
87
|
+
def _clean_content(self: Self, content: str) -> str:
|
88
|
+
"""Clean XML content while preserving exact original formatting.
|
89
|
+
|
90
|
+
Args:
|
91
|
+
content: Raw XML content to clean.
|
92
|
+
|
93
|
+
Returns:
|
94
|
+
Content with unescaped HTML entities, preserving all original formatting.
|
95
|
+
"""
|
96
|
+
# Only unescape HTML entities, preserve everything else exactly as is
|
97
|
+
return html.unescape(content)
|
98
|
+
|
99
|
+
def _map_element_name(self: Self, name: str) -> str:
|
100
|
+
"""Map element names to their canonical form.
|
101
|
+
|
102
|
+
Args:
|
103
|
+
name: Raw element name from XML.
|
104
|
+
|
105
|
+
Returns:
|
106
|
+
Canonical element name.
|
107
|
+
"""
|
108
|
+
# Map common element name variations
|
109
|
+
name_map = {"o": "output", "i": "input", "opt": "optional"}
|
110
|
+
return name_map.get(name.strip(), name.strip())
|
111
|
+
|
112
|
+
def _extract_element_content(self: Self, text: str, preserve_cdata: bool = False) -> dict[str, str]:
|
113
|
+
"""Extract content from nested XML elements.
|
114
|
+
|
115
|
+
Args:
|
116
|
+
text: Input text containing XML elements.
|
117
|
+
preserve_cdata: If True, preserve CDATA content in place.
|
118
|
+
|
119
|
+
Returns:
|
120
|
+
Dictionary mapping element names to their content values.
|
121
|
+
"""
|
122
|
+
elements: dict[str, str] = defaultdict(str)
|
123
|
+
|
124
|
+
# Process each match
|
125
|
+
for match in self.element_pattern.finditer(text):
|
126
|
+
name = match.group(1)
|
127
|
+
content = match.group(2) or ""
|
128
|
+
|
129
|
+
# Map element name to canonical form
|
130
|
+
name = self._map_element_name(name)
|
131
|
+
|
132
|
+
# Extract and handle CDATA sections
|
133
|
+
content, cdata_sections = self._extract_and_remove_cdata(content, preserve_cdata)
|
134
|
+
|
135
|
+
# Clean and normalize content
|
136
|
+
content = self._clean_content(content)
|
137
|
+
|
138
|
+
# If the content is empty but we have CDATA sections and we're
|
139
|
+
# not preserving them
|
140
|
+
if not content.strip() and cdata_sections and not preserve_cdata:
|
141
|
+
content = cdata_sections[0]
|
142
|
+
|
143
|
+
# Store the element content
|
144
|
+
elements[name] = content
|
145
|
+
|
146
|
+
# Extract nested elements from the content
|
147
|
+
nested_elements = self._extract_element_content(content, preserve_cdata)
|
148
|
+
elements.update(nested_elements)
|
149
|
+
|
150
|
+
return dict(elements) # Convert defaultdict to regular dict
|
151
|
+
|
152
|
+
def extract_elements(
|
153
|
+
self: Self,
|
154
|
+
text: str,
|
155
|
+
element_names: list[str] | None = None,
|
156
|
+
preserve_cdata: bool = False,
|
157
|
+
) -> dict[str, str]:
|
158
|
+
"""Extract XML-like elements from text, grouped by element names.
|
159
|
+
|
160
|
+
Args:
|
161
|
+
text: Input text containing XML-like elements.
|
162
|
+
element_names: Optional list of element names to extract.
|
163
|
+
If None, extracts all elements.
|
164
|
+
preserve_cdata: If True, preserve CDATA content in place.
|
165
|
+
If False, remove CDATA sections.
|
166
|
+
|
167
|
+
Returns:
|
168
|
+
Dictionary mapping element names to their content values.
|
169
|
+
For elements with multiple instances, only the last value is kept.
|
170
|
+
|
171
|
+
Raises:
|
172
|
+
ValueError: If the input text is invalid or contains malformed XML.
|
173
|
+
"""
|
174
|
+
try:
|
175
|
+
if not text or not isinstance(text, str):
|
176
|
+
raise ValueError("Input text must be a non-empty string")
|
177
|
+
|
178
|
+
logger.debug(f"Extracting elements: {element_names or 'all'}")
|
179
|
+
|
180
|
+
# Extract all elements and their content
|
181
|
+
elements = self._extract_element_content(text, preserve_cdata)
|
182
|
+
|
183
|
+
# Filter elements if specific names were requested
|
184
|
+
if element_names is not None:
|
185
|
+
elements = {name: content for name, content in elements.items() if name in element_names}
|
186
|
+
|
187
|
+
logger.debug(f"Successfully extracted {len(elements)} elements")
|
188
|
+
return elements
|
189
|
+
|
190
|
+
except Exception as e:
|
191
|
+
error_msg = f"Error extracting XML elements: {str(e)}"
|
192
|
+
logger.error(error_msg)
|
193
|
+
raise ValueError(error_msg)
|
194
|
+
|
195
|
+
def find_elements(self: Self, text: str, element_name: str) -> list[XMLElement]:
|
196
|
+
"""Find all instances of a specific XML element in the text.
|
197
|
+
|
198
|
+
Args:
|
199
|
+
text: Input text to search for elements.
|
200
|
+
element_name: Name of the element to find.
|
201
|
+
|
202
|
+
Returns:
|
203
|
+
List of XMLElement instances for each found element.
|
204
|
+
|
205
|
+
Raises:
|
206
|
+
ValueError: If the input text is invalid or contains malformed XML.
|
207
|
+
"""
|
208
|
+
try:
|
209
|
+
if not text or not isinstance(text, str):
|
210
|
+
raise ValueError("Input text must be a non-empty string")
|
211
|
+
|
212
|
+
elements: list[XMLElement] = []
|
213
|
+
pattern = re.compile(
|
214
|
+
f"<{element_name}>"
|
215
|
+
r"((?:(?!<!\[CDATA\[|]]>).)*?"
|
216
|
+
r"(?:<!\[CDATA\[.*?]]>)?"
|
217
|
+
r"(?:(?!<!\[CDATA\[|]]>).)*?)"
|
218
|
+
f"(?:</\1>|<\1>)",
|
219
|
+
re.DOTALL,
|
220
|
+
)
|
221
|
+
|
222
|
+
for match in pattern.finditer(text):
|
223
|
+
content = match.group(1)
|
224
|
+
cleaned_content, cdata_sections = self._extract_and_remove_cdata(content)
|
225
|
+
cleaned_content = self._clean_content(cleaned_content)
|
226
|
+
|
227
|
+
element = XMLElement(
|
228
|
+
name=element_name,
|
229
|
+
content=cleaned_content,
|
230
|
+
raw=match.group(0),
|
231
|
+
start_pos=match.start(),
|
232
|
+
end_pos=match.end(),
|
233
|
+
cdata_sections=cdata_sections,
|
234
|
+
)
|
235
|
+
elements.append(element)
|
236
|
+
|
237
|
+
return elements
|
238
|
+
|
239
|
+
except Exception as e:
|
240
|
+
error_msg = f"Error extracting XML elements: {str(e)}"
|
241
|
+
logger.error(error_msg)
|
242
|
+
raise ValueError(error_msg)
|
@@ -0,0 +1,99 @@
|
|
1
|
+
"""XML-based tool argument parser.
|
2
|
+
|
3
|
+
This module provides functionality for parsing tool arguments from XML-like
|
4
|
+
input, with support for validation and error handling.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from typing import Self
|
8
|
+
|
9
|
+
from loguru import logger
|
10
|
+
from pydantic import BaseModel, Field
|
11
|
+
|
12
|
+
from quantalogic.tools.tool import Tool
|
13
|
+
from quantalogic.xml_parser import ToleranceXMLParser
|
14
|
+
|
15
|
+
|
16
|
+
class ToolArguments(BaseModel):
|
17
|
+
"""Model for storing and validating tool arguments.
|
18
|
+
|
19
|
+
This model provides a structured way to store and validate arguments
|
20
|
+
extracted from XML input, ensuring they meet the tool's requirements.
|
21
|
+
|
22
|
+
Attributes:
|
23
|
+
arguments: Dictionary mapping argument names to their values.
|
24
|
+
"""
|
25
|
+
|
26
|
+
arguments: dict[str, str] = Field(
|
27
|
+
default_factory=dict, description="Dictionary mapping argument names to their values"
|
28
|
+
)
|
29
|
+
|
30
|
+
|
31
|
+
class ToolParser:
|
32
|
+
"""Parser for extracting and validating tool arguments from XML input.
|
33
|
+
|
34
|
+
This class handles the parsing of XML-like input to extract tool arguments,
|
35
|
+
validates them against the tool's requirements, and provides error handling
|
36
|
+
and logging.
|
37
|
+
|
38
|
+
Attributes:
|
39
|
+
tool: The tool instance containing argument specifications.
|
40
|
+
xml_parser: Parser for handling XML-like input.
|
41
|
+
"""
|
42
|
+
|
43
|
+
def __init__(self: Self, tool: Tool) -> None:
|
44
|
+
"""Initialize the parser with a tool instance.
|
45
|
+
|
46
|
+
Args:
|
47
|
+
tool: Tool instance containing argument specifications.
|
48
|
+
"""
|
49
|
+
self.tool = tool
|
50
|
+
self.xml_parser = ToleranceXMLParser()
|
51
|
+
|
52
|
+
def parse(self: Self, xml_string: str) -> dict[str, str]:
|
53
|
+
"""Parse XML string and return validated tool arguments.
|
54
|
+
|
55
|
+
Args:
|
56
|
+
xml_string: The XML string containing tool arguments.
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
A dictionary mapping argument names to their values.
|
60
|
+
|
61
|
+
Raises:
|
62
|
+
ValueError: If required arguments are missing or XML is invalid.
|
63
|
+
"""
|
64
|
+
try:
|
65
|
+
if not xml_string:
|
66
|
+
error_msg = "Input text must be a non-empty string"
|
67
|
+
logger.error(f"Error extracting XML elements: {error_msg}")
|
68
|
+
raise ValueError(f"Error extracting XML elements: {error_msg}")
|
69
|
+
|
70
|
+
if not xml_string.strip().startswith("<"):
|
71
|
+
error_msg = "Failed to parse XML"
|
72
|
+
logger.error(f"Error extracting XML elements: {error_msg}")
|
73
|
+
raise ValueError(f"Error extracting XML elements: {error_msg}")
|
74
|
+
|
75
|
+
# Parse XML and extract arguments, preserving CDATA content
|
76
|
+
elements = self.xml_parser.extract_elements(xml_string, preserve_cdata=True)
|
77
|
+
logger.debug(f"Extracted elements from XML: {elements}")
|
78
|
+
|
79
|
+
# Check for required arguments
|
80
|
+
for arg in self.tool.arguments:
|
81
|
+
if arg.required and arg.name not in elements:
|
82
|
+
error_msg = f"argument {arg.name} not found"
|
83
|
+
logger.error(f"Error extracting XML elements: {error_msg}")
|
84
|
+
raise ValueError(f"Error extracting XML elements: {error_msg}")
|
85
|
+
|
86
|
+
# Create and validate arguments dictionary
|
87
|
+
argument_dict = {arg.name: elements.get(arg.name, "") for arg in self.tool.arguments}
|
88
|
+
|
89
|
+
# Validate using Pydantic model
|
90
|
+
validated_args = ToolArguments(arguments=argument_dict)
|
91
|
+
logger.debug(f"Successfully parsed arguments: {validated_args.arguments}")
|
92
|
+
return validated_args.arguments
|
93
|
+
|
94
|
+
except ValueError as e:
|
95
|
+
if not str(e).startswith("Error extracting XML elements:"):
|
96
|
+
error_msg = f"Error extracting XML elements: {str(e)}"
|
97
|
+
logger.error(error_msg)
|
98
|
+
raise ValueError(error_msg)
|
99
|
+
raise
|