langroid 0.16.5__py3-none-any.whl → 0.16.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/md_tool_message_grammar.py +455 -0
- langroid/agent/tools/code_file_tool_parse.py +150 -0
- langroid/agent/tools/code_file_tool_pyparsing.py +194 -0
- langroid/agent/tools/code_file_tool_pyparsing2.py +199 -0
- langroid/agent/tools/formatted_model_custom.py +150 -0
- langroid/agent/tools/formatted_model_custom2.py +168 -0
- langroid/agent/tools/formatted_model_custom3.py +279 -0
- langroid/agent/tools/formatted_model_custom4.py +395 -0
- langroid/agent/tools/formatted_model_jinja.py +133 -0
- langroid/agent/tools/formatted_model_jinja.py-e +122 -0
- langroid/agent/tools/formatted_model_jinja2.py +145 -0
- langroid/agent/tools/formatted_model_jinja2.py-e +135 -0
- langroid/agent/tools/formatted_model_lark.py +0 -0
- langroid/agent/tools/formatted_model_lark2.py +168 -0
- langroid/agent/tools/formatted_model_parse.py +105 -0
- langroid/agent/tools/formatted_model_parse.py-e +98 -0
- langroid/agent/tools/formatted_model_parse2.py +113 -0
- langroid/agent/tools/formatted_model_parse2.py-e +109 -0
- langroid/agent/tools/formatted_model_parse3.py +114 -0
- langroid/agent/tools/formatted_model_parse3.py-e +110 -0
- langroid/agent/tools/formatted_model_parsimon.py +194 -0
- langroid/agent/tools/formatted_model_parsimon.py-e +186 -0
- langroid/agent/tools/formatted_model_pyparsing.py +169 -0
- langroid/agent/tools/formatted_model_pyparsing.py-e +149 -0
- langroid/agent/tools/formatted_model_pyparsing2.py +159 -0
- langroid/agent/tools/formatted_model_pyparsing2.py-e +143 -0
- langroid/agent/tools/formatted_model_pyparsing3.py +133 -0
- langroid/agent/tools/formatted_model_pyparsing3.py-e +121 -0
- langroid/agent/tools/formatted_model_pyparsing4.py +213 -0
- langroid/agent/tools/formatted_model_pyparsing4.py-e +176 -0
- langroid/agent/tools/formatted_model_pyparsing5.py +173 -0
- langroid/agent/tools/formatted_model_pyparsing5.py-e +142 -0
- langroid/agent/tools/formatted_model_regex.py +246 -0
- langroid/agent/tools/formatted_model_regex.py-e +248 -0
- langroid/agent/tools/formatted_model_regex2.py +250 -0
- langroid/agent/tools/formatted_model_regex2.py-e +253 -0
- langroid/agent/tools/formatted_model_tatsu.py +172 -0
- langroid/agent/tools/formatted_model_tatsu.py-e +160 -0
- langroid/agent/tools/formatted_model_template.py +217 -0
- langroid/agent/tools/formatted_model_template.py-e +200 -0
- langroid/agent/tools/formatted_model_xml.py +178 -0
- langroid/agent/tools/formatted_model_xml2.py +178 -0
- langroid/agent/tools/formatted_model_xml3.py +132 -0
- langroid/agent/tools/formatted_model_xml4.py +130 -0
- langroid/agent/tools/formatted_model_xml5.py +130 -0
- langroid/agent/tools/formatted_model_xml6.py +113 -0
- langroid/agent/tools/formatted_model_xml7.py +117 -0
- langroid/agent/tools/formatted_model_xml8.py +164 -0
- langroid/agent/tools/generic_tool.py +165 -0
- langroid/agent/tools/generic_tool_tatsu.py +275 -0
- langroid/agent/tools/grammar_based_model.py +132 -0
- langroid/agent/tools/grammar_based_model.py-e +128 -0
- langroid/agent/tools/grammar_based_model_lark.py +156 -0
- langroid/agent/tools/grammar_based_model_lark.py-e +153 -0
- langroid/agent/tools/grammar_based_model_parse.py +86 -0
- langroid/agent/tools/grammar_based_model_parse.py-e +80 -0
- langroid/agent/tools/grammar_based_model_parsimonious.py +129 -0
- langroid/agent/tools/grammar_based_model_parsimonious.py-e +120 -0
- langroid/agent/tools/grammar_based_model_pyparsing.py +105 -0
- langroid/agent/tools/grammar_based_model_pyparsing.py-e +103 -0
- langroid/agent/tools/grammar_based_model_regex.py +139 -0
- langroid/agent/tools/grammar_based_model_regex.py-e +130 -0
- langroid/agent/tools/grammar_based_model_regex2.py +124 -0
- langroid/agent/tools/grammar_based_model_regex2.py-e +116 -0
- langroid/agent/tools/grammar_based_model_tatsu.py +80 -0
- langroid/agent/tools/grammar_based_model_tatsu.py-e +77 -0
- langroid/agent/tools/lark_earley_example.py +135 -0
- langroid/agent/tools/lark_earley_example.py-e +117 -0
- langroid/agent/tools/lark_example.py +72 -0
- langroid/agent/tools/parse_example.py +76 -0
- langroid/agent/tools/parse_example2.py +87 -0
- langroid/agent/tools/parse_example3.py +42 -0
- langroid/agent/tools/parse_test.py +791 -0
- langroid/agent/xml_tool_message.py +106 -0
- langroid/language_models/openai_gpt.py +6 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/METADATA +1 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/RECORD +80 -6
- pyproject.toml +1 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/LICENSE +0 -0
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/WHEEL +0 -0
@@ -0,0 +1,178 @@
|
|
1
|
+
import xml.etree.ElementTree as ET
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
from xml.dom import minidom
|
4
|
+
|
5
|
+
from langroid.pydantic_v1 import BaseModel
|
6
|
+
|
7
|
+
|
8
|
+
class FormattingModel(BaseModel, ABC):
|
9
|
+
@classmethod
|
10
|
+
@abstractmethod
|
11
|
+
def format_spec(cls):
|
12
|
+
pass
|
13
|
+
|
14
|
+
@classmethod
|
15
|
+
@abstractmethod
|
16
|
+
def parse_spec(cls):
|
17
|
+
pass
|
18
|
+
|
19
|
+
@classmethod
|
20
|
+
@abstractmethod
|
21
|
+
def start_token(cls) -> str:
|
22
|
+
pass
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
@abstractmethod
|
26
|
+
def end_token(cls) -> str:
|
27
|
+
pass
|
28
|
+
|
29
|
+
@classmethod
|
30
|
+
def format(cls, instance: "FormattingModel") -> str:
|
31
|
+
spec = cls.format_spec()
|
32
|
+
formatted = spec(**instance.dict())
|
33
|
+
return f"{cls.start_token()}\n{formatted}\n{cls.end_token()}"
|
34
|
+
|
35
|
+
@classmethod
|
36
|
+
def parse(cls, formatted_string: str) -> "FormattingModel":
|
37
|
+
content = formatted_string.strip()
|
38
|
+
start_token = cls.start_token().strip()
|
39
|
+
end_token = cls.end_token().strip()
|
40
|
+
|
41
|
+
if not content.lower().startswith(
|
42
|
+
start_token.lower()
|
43
|
+
) or not content.lower().endswith(end_token.lower()):
|
44
|
+
raise ValueError("Invalid start or end token")
|
45
|
+
|
46
|
+
content = content[len(start_token) :].strip()
|
47
|
+
content = content[: -len(end_token)].strip()
|
48
|
+
|
49
|
+
spec = cls.parse_spec()
|
50
|
+
parsed = spec(content)
|
51
|
+
return cls(**parsed)
|
52
|
+
|
53
|
+
|
54
|
+
class CodeFileModel(FormattingModel):
|
55
|
+
language: str
|
56
|
+
file_path: str
|
57
|
+
code: str
|
58
|
+
|
59
|
+
@classmethod
|
60
|
+
def format_spec(cls):
|
61
|
+
def xml_formatter(file_path: str, language: str, code: str) -> str:
|
62
|
+
root = ET.Element("code_file_model")
|
63
|
+
|
64
|
+
file_path_elem = ET.SubElement(root, "file_path")
|
65
|
+
file_path_elem.text = file_path
|
66
|
+
|
67
|
+
language_elem = ET.SubElement(root, "language")
|
68
|
+
language_elem.text = language
|
69
|
+
|
70
|
+
code_elem = ET.SubElement(root, "code")
|
71
|
+
code_elem.text = f"\n{code}\n"
|
72
|
+
|
73
|
+
xml_str = ET.tostring(root, encoding="unicode")
|
74
|
+
pretty_xml = minidom.parseString(xml_str).toprettyxml(indent=" ")
|
75
|
+
|
76
|
+
# Remove the XML declaration
|
77
|
+
pretty_xml = "\n".join(pretty_xml.split("\n")[1:])
|
78
|
+
|
79
|
+
return pretty_xml.strip()
|
80
|
+
|
81
|
+
return xml_formatter
|
82
|
+
|
83
|
+
@classmethod
|
84
|
+
def parse_spec(cls):
|
85
|
+
def parse_xml(content: str) -> dict:
|
86
|
+
# Parse the XML
|
87
|
+
root = ET.fromstring(content)
|
88
|
+
|
89
|
+
# Extract the values, allowing for whitespace
|
90
|
+
file_path = root.find("file_path").text.strip()
|
91
|
+
language = root.find("language").text.strip()
|
92
|
+
|
93
|
+
# Handle code section more carefully
|
94
|
+
code_element = root.find("code")
|
95
|
+
if code_element is not None:
|
96
|
+
code = code_element.text
|
97
|
+
if code:
|
98
|
+
# Remove leading and trailing newlines, but preserve internal ones
|
99
|
+
code = code.strip()
|
100
|
+
else:
|
101
|
+
code = ""
|
102
|
+
|
103
|
+
return {"file_path": file_path, "language": language, "code": code}
|
104
|
+
|
105
|
+
return parse_xml
|
106
|
+
|
107
|
+
@classmethod
|
108
|
+
def start_token(cls):
|
109
|
+
return "<format>"
|
110
|
+
|
111
|
+
@classmethod
|
112
|
+
def end_token(cls):
|
113
|
+
return "</format>"
|
114
|
+
|
115
|
+
|
116
|
+
# Test cases
|
117
|
+
if __name__ == "__main__":
|
118
|
+
# Test formatting
|
119
|
+
code_file = CodeFileModel(
|
120
|
+
language="Python",
|
121
|
+
file_path="src/main.py",
|
122
|
+
code="def hello():\n print('Hello, World!')",
|
123
|
+
)
|
124
|
+
formatted = CodeFileModel.format(code_file)
|
125
|
+
print("Formatted output:")
|
126
|
+
print(formatted)
|
127
|
+
print("Formatting test passed.")
|
128
|
+
|
129
|
+
# Test parsing
|
130
|
+
parsed = CodeFileModel.parse(formatted)
|
131
|
+
assert (
|
132
|
+
parsed == code_file
|
133
|
+
), f"Parsing failed. Expected:\n{code_file}\nGot:\n{parsed}"
|
134
|
+
print("Parsing test passed.")
|
135
|
+
|
136
|
+
# Test round-trip
|
137
|
+
round_trip = CodeFileModel.parse(CodeFileModel.format(code_file))
|
138
|
+
assert (
|
139
|
+
round_trip == code_file
|
140
|
+
), f"Round-trip failed. Expected:\n{code_file}\nGot:\n{round_trip}"
|
141
|
+
print("Round-trip test passed.")
|
142
|
+
|
143
|
+
# Test with different values
|
144
|
+
code_file2 = CodeFileModel(
|
145
|
+
language="JavaScript",
|
146
|
+
file_path="src/app.js",
|
147
|
+
code="function greet() {\n console.log('Hello, World!');\n}",
|
148
|
+
)
|
149
|
+
formatted2 = CodeFileModel.format(code_file2)
|
150
|
+
parsed2 = CodeFileModel.parse(formatted2)
|
151
|
+
assert (
|
152
|
+
parsed2 == code_file2
|
153
|
+
), f"Parsing failed for different values. Expected:\n{code_file2}\nGot:\n{parsed2}"
|
154
|
+
print("Different values test passed.")
|
155
|
+
|
156
|
+
# Test tolerant parsing
|
157
|
+
tolerant_input = """<format>
|
158
|
+
<code_file_model>
|
159
|
+
<file_path> src/main.py </file_path>
|
160
|
+
<language> Python </language>
|
161
|
+
<code>
|
162
|
+
def hello():
|
163
|
+
print('Hello, World!')
|
164
|
+
</code>
|
165
|
+
</code_file_model>
|
166
|
+
</format>"""
|
167
|
+
parsed_tolerant = CodeFileModel.parse(tolerant_input)
|
168
|
+
expected_tolerant = CodeFileModel(
|
169
|
+
language="Python",
|
170
|
+
file_path="src/main.py",
|
171
|
+
code="def hello():\n print('Hello, World!')",
|
172
|
+
)
|
173
|
+
assert (
|
174
|
+
parsed_tolerant == expected_tolerant
|
175
|
+
), f"Tolerant parsing failed. Expected:\n{expected_tolerant}\nGot:\n{parsed_tolerant}"
|
176
|
+
print("Tolerant parsing test passed.")
|
177
|
+
|
178
|
+
print("All tests passed successfully!")
|
@@ -0,0 +1,178 @@
|
|
1
|
+
import xml.etree.ElementTree as ET
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
from typing import Any, Dict
|
4
|
+
from xml.dom import minidom
|
5
|
+
|
6
|
+
from langroid.pydantic_v1 import BaseModel
|
7
|
+
|
8
|
+
|
9
|
+
class FormattingModel(BaseModel, ABC):
|
10
|
+
@classmethod
|
11
|
+
@abstractmethod
|
12
|
+
def format_spec(cls):
|
13
|
+
pass
|
14
|
+
|
15
|
+
@classmethod
|
16
|
+
@abstractmethod
|
17
|
+
def parse_spec(cls) -> Dict[str, str]:
|
18
|
+
pass
|
19
|
+
|
20
|
+
@classmethod
|
21
|
+
@abstractmethod
|
22
|
+
def start_token(cls) -> str:
|
23
|
+
pass
|
24
|
+
|
25
|
+
@classmethod
|
26
|
+
@abstractmethod
|
27
|
+
def end_token(cls) -> str:
|
28
|
+
pass
|
29
|
+
|
30
|
+
@classmethod
|
31
|
+
def format(cls, instance: "FormattingModel") -> str:
|
32
|
+
spec = cls.format_spec()
|
33
|
+
formatted = spec(**instance.dict())
|
34
|
+
return f"{cls.start_token()}\n{formatted}\n{cls.end_token()}"
|
35
|
+
|
36
|
+
@classmethod
|
37
|
+
def parse(cls, formatted_string: str) -> "FormattingModel":
|
38
|
+
content = formatted_string.strip()
|
39
|
+
start_token = cls.start_token().strip()
|
40
|
+
end_token = cls.end_token().strip()
|
41
|
+
|
42
|
+
if not content.lower().startswith(
|
43
|
+
start_token.lower()
|
44
|
+
) or not content.lower().endswith(end_token.lower()):
|
45
|
+
raise ValueError("Invalid start or end token")
|
46
|
+
|
47
|
+
content = content[len(start_token) :].strip()
|
48
|
+
content = content[: -len(end_token)].strip()
|
49
|
+
|
50
|
+
spec = cls.parse_spec()
|
51
|
+
parsed = cls._parse_xml(content, spec)
|
52
|
+
return cls(**parsed)
|
53
|
+
|
54
|
+
@staticmethod
|
55
|
+
def _parse_xml(content: str, spec: Dict[str, str]) -> Dict[str, Any]:
|
56
|
+
root = ET.fromstring(content)
|
57
|
+
result = {}
|
58
|
+
for field, xpath in spec.items():
|
59
|
+
element = root.find(xpath)
|
60
|
+
if element is not None:
|
61
|
+
if field == "code":
|
62
|
+
result[field] = element.text.strip() if element.text else ""
|
63
|
+
else:
|
64
|
+
result[field] = element.text.strip() if element.text else ""
|
65
|
+
else:
|
66
|
+
raise ValueError(f"Required field '{field}' not found in XML")
|
67
|
+
return result
|
68
|
+
|
69
|
+
|
70
|
+
class CodeFileModel(FormattingModel):
|
71
|
+
language: str
|
72
|
+
file_path: str
|
73
|
+
code: str
|
74
|
+
|
75
|
+
@classmethod
|
76
|
+
def format_spec(cls):
|
77
|
+
def xml_formatter(file_path: str, language: str, code: str) -> str:
|
78
|
+
root = ET.Element("code_file_model")
|
79
|
+
|
80
|
+
file_path_elem = ET.SubElement(root, "file_path")
|
81
|
+
file_path_elem.text = file_path
|
82
|
+
|
83
|
+
language_elem = ET.SubElement(root, "language")
|
84
|
+
language_elem.text = language
|
85
|
+
|
86
|
+
code_elem = ET.SubElement(root, "code")
|
87
|
+
code_elem.text = f"\n{code}\n"
|
88
|
+
|
89
|
+
xml_str = ET.tostring(root, encoding="unicode")
|
90
|
+
pretty_xml = minidom.parseString(xml_str).toprettyxml(indent=" ")
|
91
|
+
|
92
|
+
# Remove the XML declaration
|
93
|
+
pretty_xml = "\n".join(pretty_xml.split("\n")[1:])
|
94
|
+
|
95
|
+
return pretty_xml.strip()
|
96
|
+
|
97
|
+
return xml_formatter
|
98
|
+
|
99
|
+
@classmethod
|
100
|
+
def parse_spec(cls) -> Dict[str, str]:
|
101
|
+
return {
|
102
|
+
"file_path": ".//file_path",
|
103
|
+
"language": ".//language",
|
104
|
+
"code": ".//code",
|
105
|
+
}
|
106
|
+
|
107
|
+
@classmethod
|
108
|
+
def start_token(cls):
|
109
|
+
return "<format>"
|
110
|
+
|
111
|
+
@classmethod
|
112
|
+
def end_token(cls):
|
113
|
+
return "</format>"
|
114
|
+
|
115
|
+
|
116
|
+
# Test cases
|
117
|
+
if __name__ == "__main__":
|
118
|
+
# Test formatting
|
119
|
+
code_file = CodeFileModel(
|
120
|
+
language="Python",
|
121
|
+
file_path="src/main.py",
|
122
|
+
code="def hello():\n print('Hello, World!')",
|
123
|
+
)
|
124
|
+
formatted = CodeFileModel.format(code_file)
|
125
|
+
print("Formatted output:")
|
126
|
+
print(formatted)
|
127
|
+
print("Formatting test passed.")
|
128
|
+
|
129
|
+
# Test parsing
|
130
|
+
parsed = CodeFileModel.parse(formatted)
|
131
|
+
assert (
|
132
|
+
parsed == code_file
|
133
|
+
), f"Parsing failed. Expected:\n{code_file}\nGot:\n{parsed}"
|
134
|
+
print("Parsing test passed.")
|
135
|
+
|
136
|
+
# Test round-trip
|
137
|
+
round_trip = CodeFileModel.parse(CodeFileModel.format(code_file))
|
138
|
+
assert (
|
139
|
+
round_trip == code_file
|
140
|
+
), f"Round-trip failed. Expected:\n{code_file}\nGot:\n{round_trip}"
|
141
|
+
print("Round-trip test passed.")
|
142
|
+
|
143
|
+
# Test with different values
|
144
|
+
code_file2 = CodeFileModel(
|
145
|
+
language="JavaScript",
|
146
|
+
file_path="src/app.js",
|
147
|
+
code="function greet() {\n console.log('Hello, World!');\n}",
|
148
|
+
)
|
149
|
+
formatted2 = CodeFileModel.format(code_file2)
|
150
|
+
parsed2 = CodeFileModel.parse(formatted2)
|
151
|
+
assert (
|
152
|
+
parsed2 == code_file2
|
153
|
+
), f"Parsing failed for different values. Expected:\n{code_file2}\nGot:\n{parsed2}"
|
154
|
+
print("Different values test passed.")
|
155
|
+
|
156
|
+
# Test tolerant parsing
|
157
|
+
tolerant_input = """<format>
|
158
|
+
<code_file_model>
|
159
|
+
<file_path> src/main.py </file_path>
|
160
|
+
<language> Python </language>
|
161
|
+
<code>
|
162
|
+
def hello():
|
163
|
+
print('Hello, World!')
|
164
|
+
</code>
|
165
|
+
</code_file_model>
|
166
|
+
</format>"""
|
167
|
+
parsed_tolerant = CodeFileModel.parse(tolerant_input)
|
168
|
+
expected_tolerant = CodeFileModel(
|
169
|
+
language="Python",
|
170
|
+
file_path="src/main.py",
|
171
|
+
code="def hello():\n print('Hello, World!')",
|
172
|
+
)
|
173
|
+
assert (
|
174
|
+
parsed_tolerant == expected_tolerant
|
175
|
+
), f"Tolerant parsing failed. Expected:\n{expected_tolerant}\nGot:\n{parsed_tolerant}"
|
176
|
+
print("Tolerant parsing test passed.")
|
177
|
+
|
178
|
+
print("All tests passed successfully!")
|
@@ -0,0 +1,132 @@
|
|
1
|
+
import xml.etree.ElementTree as ET
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
|
4
|
+
from langroid.pydantic_v1 import BaseModel
|
5
|
+
|
6
|
+
|
7
|
+
class FormattingModel(BaseModel, ABC):
|
8
|
+
@classmethod
|
9
|
+
@abstractmethod
|
10
|
+
def spec(cls):
|
11
|
+
pass
|
12
|
+
|
13
|
+
@classmethod
|
14
|
+
@abstractmethod
|
15
|
+
def root_element(cls) -> str:
|
16
|
+
pass
|
17
|
+
|
18
|
+
@classmethod
|
19
|
+
def format(cls, instance: "FormattingModel") -> str:
|
20
|
+
root = ET.Element(cls.root_element())
|
21
|
+
for field, field_type in cls.spec().items():
|
22
|
+
value = getattr(instance, field)
|
23
|
+
elem = ET.SubElement(root, field)
|
24
|
+
if field_type == "cdata":
|
25
|
+
elem.text = f"<![CDATA[{value}]]>"
|
26
|
+
else:
|
27
|
+
elem.text = value
|
28
|
+
return ET.tostring(root, encoding="unicode", method="xml")
|
29
|
+
|
30
|
+
@classmethod
|
31
|
+
def parse(cls, formatted_string: str) -> "FormattingModel":
|
32
|
+
root = ET.fromstring(formatted_string)
|
33
|
+
if root.tag != cls.root_element():
|
34
|
+
raise ValueError(
|
35
|
+
f"Invalid root element: expected {cls.root_element()}, got {root.tag}"
|
36
|
+
)
|
37
|
+
|
38
|
+
data = {}
|
39
|
+
for field, field_type in cls.spec().items():
|
40
|
+
elem = root.find(field)
|
41
|
+
if elem is None:
|
42
|
+
raise ValueError(f"Missing field: {field}")
|
43
|
+
if field_type == "cdata":
|
44
|
+
# Extract CDATA content
|
45
|
+
cdata_start = elem.text.find("<![CDATA[")
|
46
|
+
cdata_end = elem.text.rfind("]]>")
|
47
|
+
if cdata_start != -1 and cdata_end != -1:
|
48
|
+
data[field] = elem.text[cdata_start + 9 : cdata_end]
|
49
|
+
else:
|
50
|
+
data[field] = elem.text
|
51
|
+
else:
|
52
|
+
data[field] = elem.text.strip() if elem.text else ""
|
53
|
+
|
54
|
+
return cls(**data)
|
55
|
+
|
56
|
+
|
57
|
+
class CodeFileModel(FormattingModel):
|
58
|
+
language: str
|
59
|
+
file_path: str
|
60
|
+
code: str
|
61
|
+
|
62
|
+
@classmethod
|
63
|
+
def spec(cls):
|
64
|
+
return {"language": "text", "file_path": "text", "code": "cdata"}
|
65
|
+
|
66
|
+
@classmethod
|
67
|
+
def root_element(cls):
|
68
|
+
return "code_file_model"
|
69
|
+
|
70
|
+
|
71
|
+
# Test cases
|
72
|
+
if __name__ == "__main__":
|
73
|
+
# Test formatting
|
74
|
+
code_file = CodeFileModel(
|
75
|
+
language="Python",
|
76
|
+
file_path="src/main.py",
|
77
|
+
code="def hello():\n print('Hello, World!')",
|
78
|
+
)
|
79
|
+
formatted = CodeFileModel.format(code_file)
|
80
|
+
print("Formatted XML:")
|
81
|
+
print(formatted)
|
82
|
+
|
83
|
+
# Test parsing
|
84
|
+
parsed = CodeFileModel.parse(formatted)
|
85
|
+
assert (
|
86
|
+
parsed == code_file
|
87
|
+
), f"Parsing failed. Expected:\n{code_file}\nGot:\n{parsed}"
|
88
|
+
print("Parsing test passed.")
|
89
|
+
|
90
|
+
# Test round-trip
|
91
|
+
round_trip = CodeFileModel.parse(CodeFileModel.format(code_file))
|
92
|
+
assert (
|
93
|
+
round_trip == code_file
|
94
|
+
), f"Round-trip failed. Expected:\n{code_file}\nGot:\n{round_trip}"
|
95
|
+
print("Round-trip test passed.")
|
96
|
+
|
97
|
+
# Test with different values
|
98
|
+
code_file2 = CodeFileModel(
|
99
|
+
language="JavaScript",
|
100
|
+
file_path="src/app.js",
|
101
|
+
code="function greet() {\n console.log('Hello, World!');\n}",
|
102
|
+
)
|
103
|
+
formatted2 = CodeFileModel.format(code_file2)
|
104
|
+
parsed2 = CodeFileModel.parse(formatted2)
|
105
|
+
assert (
|
106
|
+
parsed2 == code_file2
|
107
|
+
), f"Parsing failed for different values. Expected:\n{code_file2}\nGot:\n{parsed2}"
|
108
|
+
print("Different values test passed.")
|
109
|
+
|
110
|
+
# Test tolerant parsing
|
111
|
+
tolerant_input = """
|
112
|
+
<code_file_model>
|
113
|
+
<language> Python </language>
|
114
|
+
<file_path> src/main.py </file_path>
|
115
|
+
<code><![CDATA[
|
116
|
+
def hello():
|
117
|
+
print('Hello, World!')
|
118
|
+
]]></code>
|
119
|
+
</code_file_model>
|
120
|
+
"""
|
121
|
+
parsed_tolerant = CodeFileModel.parse(tolerant_input)
|
122
|
+
expected_tolerant = CodeFileModel(
|
123
|
+
language="Python",
|
124
|
+
file_path="src/main.py",
|
125
|
+
code="\ndef hello():\n print('Hello, World!')\n ",
|
126
|
+
)
|
127
|
+
assert (
|
128
|
+
parsed_tolerant == expected_tolerant
|
129
|
+
), f"Tolerant parsing failed. Expected:\n{expected_tolerant}\nGot:\n{parsed_tolerant}"
|
130
|
+
print("Tolerant parsing test passed.")
|
131
|
+
|
132
|
+
print("All tests passed successfully!")
|
@@ -0,0 +1,130 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
|
3
|
+
from lxml import etree
|
4
|
+
|
5
|
+
from langroid.pydantic_v1 import BaseModel
|
6
|
+
|
7
|
+
|
8
|
+
class FormattingModel(BaseModel, ABC):
|
9
|
+
@classmethod
|
10
|
+
@abstractmethod
|
11
|
+
def spec(cls):
|
12
|
+
pass
|
13
|
+
|
14
|
+
@classmethod
|
15
|
+
@abstractmethod
|
16
|
+
def root_element(cls) -> str:
|
17
|
+
pass
|
18
|
+
|
19
|
+
@classmethod
|
20
|
+
def format(cls, instance: "FormattingModel") -> str:
|
21
|
+
root = etree.Element(cls.root_element())
|
22
|
+
for field, field_type in cls.spec().items():
|
23
|
+
value = getattr(instance, field)
|
24
|
+
elem = etree.SubElement(root, field)
|
25
|
+
if field_type == "cdata":
|
26
|
+
elem.text = etree.CDATA(value)
|
27
|
+
else:
|
28
|
+
elem.text = str(value)
|
29
|
+
return etree.tostring(root, encoding="unicode", pretty_print=True)
|
30
|
+
|
31
|
+
@classmethod
|
32
|
+
def parse(cls, formatted_string: str) -> "FormattingModel":
|
33
|
+
root = etree.fromstring(formatted_string.encode("utf-8"))
|
34
|
+
if root.tag != cls.root_element():
|
35
|
+
raise ValueError(
|
36
|
+
f"Invalid root element: expected {cls.root_element()}, got {root.tag}"
|
37
|
+
)
|
38
|
+
|
39
|
+
data = {}
|
40
|
+
for field, field_type in cls.spec().items():
|
41
|
+
elem = root.find(field)
|
42
|
+
if elem is None:
|
43
|
+
raise ValueError(f"Missing field: {field}")
|
44
|
+
if field_type == "cdata":
|
45
|
+
data[field] = elem.text
|
46
|
+
elif field_type == "text":
|
47
|
+
data[field] = elem.text.strip() if elem.text else ""
|
48
|
+
else:
|
49
|
+
# Handle other field types as needed
|
50
|
+
data[field] = elem.text
|
51
|
+
|
52
|
+
return cls(**data)
|
53
|
+
|
54
|
+
|
55
|
+
class CodeFileModel(FormattingModel):
|
56
|
+
language: str
|
57
|
+
file_path: str
|
58
|
+
code: str
|
59
|
+
|
60
|
+
@classmethod
|
61
|
+
def spec(cls):
|
62
|
+
return {"language": "text", "file_path": "text", "code": "cdata"}
|
63
|
+
|
64
|
+
@classmethod
|
65
|
+
def root_element(cls):
|
66
|
+
return "code_file_model"
|
67
|
+
|
68
|
+
|
69
|
+
# Test cases
|
70
|
+
if __name__ == "__main__":
|
71
|
+
# Test formatting
|
72
|
+
code_file = CodeFileModel(
|
73
|
+
language="Python",
|
74
|
+
file_path="src/main.py",
|
75
|
+
code="def hello():\n print('Hello, World!')",
|
76
|
+
)
|
77
|
+
formatted = CodeFileModel.format(code_file)
|
78
|
+
print("Formatted XML:")
|
79
|
+
print(formatted)
|
80
|
+
|
81
|
+
# Test parsing
|
82
|
+
parsed = CodeFileModel.parse(formatted)
|
83
|
+
assert (
|
84
|
+
parsed == code_file
|
85
|
+
), f"Parsing failed. Expected:\n{code_file}\nGot:\n{parsed}"
|
86
|
+
print("Parsing test passed.")
|
87
|
+
|
88
|
+
# Test round-trip
|
89
|
+
round_trip = CodeFileModel.parse(CodeFileModel.format(code_file))
|
90
|
+
assert (
|
91
|
+
round_trip == code_file
|
92
|
+
), f"Round-trip failed. Expected:\n{code_file}\nGot:\n{round_trip}"
|
93
|
+
print("Round-trip test passed.")
|
94
|
+
|
95
|
+
# Test with different values
|
96
|
+
code_file2 = CodeFileModel(
|
97
|
+
language="JavaScript",
|
98
|
+
file_path="src/app.js",
|
99
|
+
code="function greet() {\n console.log('Hello, World!');\n}",
|
100
|
+
)
|
101
|
+
formatted2 = CodeFileModel.format(code_file2)
|
102
|
+
parsed2 = CodeFileModel.parse(formatted2)
|
103
|
+
assert (
|
104
|
+
parsed2 == code_file2
|
105
|
+
), f"Parsing failed for different values. Expected:\n{code_file2}\nGot:\n{parsed2}"
|
106
|
+
print("Different values test passed.")
|
107
|
+
|
108
|
+
# Test tolerant parsing
|
109
|
+
tolerant_input = """
|
110
|
+
<code_file_model>
|
111
|
+
<language> Python </language>
|
112
|
+
<file_path> src/main.py </file_path>
|
113
|
+
<code><![CDATA[
|
114
|
+
def hello():
|
115
|
+
print('Hello, World!')
|
116
|
+
]]></code>
|
117
|
+
</code_file_model>
|
118
|
+
"""
|
119
|
+
parsed_tolerant = CodeFileModel.parse(tolerant_input)
|
120
|
+
expected_tolerant = CodeFileModel(
|
121
|
+
language="Python",
|
122
|
+
file_path="src/main.py",
|
123
|
+
code="\ndef hello():\n print('Hello, World!')\n ",
|
124
|
+
)
|
125
|
+
assert (
|
126
|
+
parsed_tolerant == expected_tolerant
|
127
|
+
), f"Tolerant parsing failed. Expected:\n{expected_tolerant}\nGot:\n{parsed_tolerant}"
|
128
|
+
print("Tolerant parsing test passed.")
|
129
|
+
|
130
|
+
print("All tests passed successfully!")
|