langroid 0.16.5__py3-none-any.whl → 0.16.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. langroid/agent/md_tool_message_grammar.py +455 -0
  2. langroid/agent/tools/code_file_tool_parse.py +150 -0
  3. langroid/agent/tools/code_file_tool_pyparsing.py +194 -0
  4. langroid/agent/tools/code_file_tool_pyparsing2.py +199 -0
  5. langroid/agent/tools/formatted_model_custom.py +150 -0
  6. langroid/agent/tools/formatted_model_custom2.py +168 -0
  7. langroid/agent/tools/formatted_model_custom3.py +279 -0
  8. langroid/agent/tools/formatted_model_custom4.py +395 -0
  9. langroid/agent/tools/formatted_model_jinja.py +133 -0
  10. langroid/agent/tools/formatted_model_jinja.py-e +122 -0
  11. langroid/agent/tools/formatted_model_jinja2.py +145 -0
  12. langroid/agent/tools/formatted_model_jinja2.py-e +135 -0
  13. langroid/agent/tools/formatted_model_lark.py +0 -0
  14. langroid/agent/tools/formatted_model_lark2.py +168 -0
  15. langroid/agent/tools/formatted_model_parse.py +105 -0
  16. langroid/agent/tools/formatted_model_parse.py-e +98 -0
  17. langroid/agent/tools/formatted_model_parse2.py +113 -0
  18. langroid/agent/tools/formatted_model_parse2.py-e +109 -0
  19. langroid/agent/tools/formatted_model_parse3.py +114 -0
  20. langroid/agent/tools/formatted_model_parse3.py-e +110 -0
  21. langroid/agent/tools/formatted_model_parsimon.py +194 -0
  22. langroid/agent/tools/formatted_model_parsimon.py-e +186 -0
  23. langroid/agent/tools/formatted_model_pyparsing.py +169 -0
  24. langroid/agent/tools/formatted_model_pyparsing.py-e +149 -0
  25. langroid/agent/tools/formatted_model_pyparsing2.py +159 -0
  26. langroid/agent/tools/formatted_model_pyparsing2.py-e +143 -0
  27. langroid/agent/tools/formatted_model_pyparsing3.py +133 -0
  28. langroid/agent/tools/formatted_model_pyparsing3.py-e +121 -0
  29. langroid/agent/tools/formatted_model_pyparsing4.py +213 -0
  30. langroid/agent/tools/formatted_model_pyparsing4.py-e +176 -0
  31. langroid/agent/tools/formatted_model_pyparsing5.py +173 -0
  32. langroid/agent/tools/formatted_model_pyparsing5.py-e +142 -0
  33. langroid/agent/tools/formatted_model_regex.py +246 -0
  34. langroid/agent/tools/formatted_model_regex.py-e +248 -0
  35. langroid/agent/tools/formatted_model_regex2.py +250 -0
  36. langroid/agent/tools/formatted_model_regex2.py-e +253 -0
  37. langroid/agent/tools/formatted_model_tatsu.py +172 -0
  38. langroid/agent/tools/formatted_model_tatsu.py-e +160 -0
  39. langroid/agent/tools/formatted_model_template.py +217 -0
  40. langroid/agent/tools/formatted_model_template.py-e +200 -0
  41. langroid/agent/tools/formatted_model_xml.py +178 -0
  42. langroid/agent/tools/formatted_model_xml2.py +178 -0
  43. langroid/agent/tools/formatted_model_xml3.py +132 -0
  44. langroid/agent/tools/formatted_model_xml4.py +130 -0
  45. langroid/agent/tools/formatted_model_xml5.py +130 -0
  46. langroid/agent/tools/formatted_model_xml6.py +113 -0
  47. langroid/agent/tools/formatted_model_xml7.py +117 -0
  48. langroid/agent/tools/formatted_model_xml8.py +164 -0
  49. langroid/agent/tools/generic_tool.py +165 -0
  50. langroid/agent/tools/generic_tool_tatsu.py +275 -0
  51. langroid/agent/tools/grammar_based_model.py +132 -0
  52. langroid/agent/tools/grammar_based_model.py-e +128 -0
  53. langroid/agent/tools/grammar_based_model_lark.py +156 -0
  54. langroid/agent/tools/grammar_based_model_lark.py-e +153 -0
  55. langroid/agent/tools/grammar_based_model_parse.py +86 -0
  56. langroid/agent/tools/grammar_based_model_parse.py-e +80 -0
  57. langroid/agent/tools/grammar_based_model_parsimonious.py +129 -0
  58. langroid/agent/tools/grammar_based_model_parsimonious.py-e +120 -0
  59. langroid/agent/tools/grammar_based_model_pyparsing.py +105 -0
  60. langroid/agent/tools/grammar_based_model_pyparsing.py-e +103 -0
  61. langroid/agent/tools/grammar_based_model_regex.py +139 -0
  62. langroid/agent/tools/grammar_based_model_regex.py-e +130 -0
  63. langroid/agent/tools/grammar_based_model_regex2.py +124 -0
  64. langroid/agent/tools/grammar_based_model_regex2.py-e +116 -0
  65. langroid/agent/tools/grammar_based_model_tatsu.py +80 -0
  66. langroid/agent/tools/grammar_based_model_tatsu.py-e +77 -0
  67. langroid/agent/tools/lark_earley_example.py +135 -0
  68. langroid/agent/tools/lark_earley_example.py-e +117 -0
  69. langroid/agent/tools/lark_example.py +72 -0
  70. langroid/agent/tools/parse_example.py +76 -0
  71. langroid/agent/tools/parse_example2.py +87 -0
  72. langroid/agent/tools/parse_example3.py +42 -0
  73. langroid/agent/tools/parse_test.py +791 -0
  74. langroid/agent/xml_tool_message.py +106 -0
  75. langroid/language_models/openai_gpt.py +6 -1
  76. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/METADATA +1 -1
  77. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/RECORD +80 -6
  78. pyproject.toml +1 -1
  79. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/LICENSE +0 -0
  80. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/WHEEL +0 -0
@@ -0,0 +1,178 @@
1
+ import xml.etree.ElementTree as ET
2
+ from abc import ABC, abstractmethod
3
+ from xml.dom import minidom
4
+
5
+ from langroid.pydantic_v1 import BaseModel
6
+
7
+
8
+ class FormattingModel(BaseModel, ABC):
9
+ @classmethod
10
+ @abstractmethod
11
+ def format_spec(cls):
12
+ pass
13
+
14
+ @classmethod
15
+ @abstractmethod
16
+ def parse_spec(cls):
17
+ pass
18
+
19
+ @classmethod
20
+ @abstractmethod
21
+ def start_token(cls) -> str:
22
+ pass
23
+
24
+ @classmethod
25
+ @abstractmethod
26
+ def end_token(cls) -> str:
27
+ pass
28
+
29
+ @classmethod
30
+ def format(cls, instance: "FormattingModel") -> str:
31
+ spec = cls.format_spec()
32
+ formatted = spec(**instance.dict())
33
+ return f"{cls.start_token()}\n{formatted}\n{cls.end_token()}"
34
+
35
+ @classmethod
36
+ def parse(cls, formatted_string: str) -> "FormattingModel":
37
+ content = formatted_string.strip()
38
+ start_token = cls.start_token().strip()
39
+ end_token = cls.end_token().strip()
40
+
41
+ if not content.lower().startswith(
42
+ start_token.lower()
43
+ ) or not content.lower().endswith(end_token.lower()):
44
+ raise ValueError("Invalid start or end token")
45
+
46
+ content = content[len(start_token) :].strip()
47
+ content = content[: -len(end_token)].strip()
48
+
49
+ spec = cls.parse_spec()
50
+ parsed = spec(content)
51
+ return cls(**parsed)
52
+
53
+
54
+ class CodeFileModel(FormattingModel):
55
+ language: str
56
+ file_path: str
57
+ code: str
58
+
59
+ @classmethod
60
+ def format_spec(cls):
61
+ def xml_formatter(file_path: str, language: str, code: str) -> str:
62
+ root = ET.Element("code_file_model")
63
+
64
+ file_path_elem = ET.SubElement(root, "file_path")
65
+ file_path_elem.text = file_path
66
+
67
+ language_elem = ET.SubElement(root, "language")
68
+ language_elem.text = language
69
+
70
+ code_elem = ET.SubElement(root, "code")
71
+ code_elem.text = f"\n{code}\n"
72
+
73
+ xml_str = ET.tostring(root, encoding="unicode")
74
+ pretty_xml = minidom.parseString(xml_str).toprettyxml(indent=" ")
75
+
76
+ # Remove the XML declaration
77
+ pretty_xml = "\n".join(pretty_xml.split("\n")[1:])
78
+
79
+ return pretty_xml.strip()
80
+
81
+ return xml_formatter
82
+
83
+ @classmethod
84
+ def parse_spec(cls):
85
+ def parse_xml(content: str) -> dict:
86
+ # Parse the XML
87
+ root = ET.fromstring(content)
88
+
89
+ # Extract the values, allowing for whitespace
90
+ file_path = root.find("file_path").text.strip()
91
+ language = root.find("language").text.strip()
92
+
93
+ # Handle code section more carefully
94
+ code_element = root.find("code")
95
+ if code_element is not None:
96
+ code = code_element.text
97
+ if code:
98
+ # Remove leading and trailing newlines, but preserve internal ones
99
+ code = code.strip()
100
+ else:
101
+ code = ""
102
+
103
+ return {"file_path": file_path, "language": language, "code": code}
104
+
105
+ return parse_xml
106
+
107
+ @classmethod
108
+ def start_token(cls):
109
+ return "<format>"
110
+
111
+ @classmethod
112
+ def end_token(cls):
113
+ return "</format>"
114
+
115
+
116
+ # Test cases
117
+ if __name__ == "__main__":
118
+ # Test formatting
119
+ code_file = CodeFileModel(
120
+ language="Python",
121
+ file_path="src/main.py",
122
+ code="def hello():\n print('Hello, World!')",
123
+ )
124
+ formatted = CodeFileModel.format(code_file)
125
+ print("Formatted output:")
126
+ print(formatted)
127
+ print("Formatting test passed.")
128
+
129
+ # Test parsing
130
+ parsed = CodeFileModel.parse(formatted)
131
+ assert (
132
+ parsed == code_file
133
+ ), f"Parsing failed. Expected:\n{code_file}\nGot:\n{parsed}"
134
+ print("Parsing test passed.")
135
+
136
+ # Test round-trip
137
+ round_trip = CodeFileModel.parse(CodeFileModel.format(code_file))
138
+ assert (
139
+ round_trip == code_file
140
+ ), f"Round-trip failed. Expected:\n{code_file}\nGot:\n{round_trip}"
141
+ print("Round-trip test passed.")
142
+
143
+ # Test with different values
144
+ code_file2 = CodeFileModel(
145
+ language="JavaScript",
146
+ file_path="src/app.js",
147
+ code="function greet() {\n console.log('Hello, World!');\n}",
148
+ )
149
+ formatted2 = CodeFileModel.format(code_file2)
150
+ parsed2 = CodeFileModel.parse(formatted2)
151
+ assert (
152
+ parsed2 == code_file2
153
+ ), f"Parsing failed for different values. Expected:\n{code_file2}\nGot:\n{parsed2}"
154
+ print("Different values test passed.")
155
+
156
+ # Test tolerant parsing
157
+ tolerant_input = """<format>
158
+ <code_file_model>
159
+ <file_path> src/main.py </file_path>
160
+ <language> Python </language>
161
+ <code>
162
+ def hello():
163
+ print('Hello, World!')
164
+ </code>
165
+ </code_file_model>
166
+ </format>"""
167
+ parsed_tolerant = CodeFileModel.parse(tolerant_input)
168
+ expected_tolerant = CodeFileModel(
169
+ language="Python",
170
+ file_path="src/main.py",
171
+ code="def hello():\n print('Hello, World!')",
172
+ )
173
+ assert (
174
+ parsed_tolerant == expected_tolerant
175
+ ), f"Tolerant parsing failed. Expected:\n{expected_tolerant}\nGot:\n{parsed_tolerant}"
176
+ print("Tolerant parsing test passed.")
177
+
178
+ print("All tests passed successfully!")
@@ -0,0 +1,178 @@
1
+ import xml.etree.ElementTree as ET
2
+ from abc import ABC, abstractmethod
3
+ from typing import Any, Dict
4
+ from xml.dom import minidom
5
+
6
+ from langroid.pydantic_v1 import BaseModel
7
+
8
+
9
+ class FormattingModel(BaseModel, ABC):
10
+ @classmethod
11
+ @abstractmethod
12
+ def format_spec(cls):
13
+ pass
14
+
15
+ @classmethod
16
+ @abstractmethod
17
+ def parse_spec(cls) -> Dict[str, str]:
18
+ pass
19
+
20
+ @classmethod
21
+ @abstractmethod
22
+ def start_token(cls) -> str:
23
+ pass
24
+
25
+ @classmethod
26
+ @abstractmethod
27
+ def end_token(cls) -> str:
28
+ pass
29
+
30
+ @classmethod
31
+ def format(cls, instance: "FormattingModel") -> str:
32
+ spec = cls.format_spec()
33
+ formatted = spec(**instance.dict())
34
+ return f"{cls.start_token()}\n{formatted}\n{cls.end_token()}"
35
+
36
+ @classmethod
37
+ def parse(cls, formatted_string: str) -> "FormattingModel":
38
+ content = formatted_string.strip()
39
+ start_token = cls.start_token().strip()
40
+ end_token = cls.end_token().strip()
41
+
42
+ if not content.lower().startswith(
43
+ start_token.lower()
44
+ ) or not content.lower().endswith(end_token.lower()):
45
+ raise ValueError("Invalid start or end token")
46
+
47
+ content = content[len(start_token) :].strip()
48
+ content = content[: -len(end_token)].strip()
49
+
50
+ spec = cls.parse_spec()
51
+ parsed = cls._parse_xml(content, spec)
52
+ return cls(**parsed)
53
+
54
+ @staticmethod
55
+ def _parse_xml(content: str, spec: Dict[str, str]) -> Dict[str, Any]:
56
+ root = ET.fromstring(content)
57
+ result = {}
58
+ for field, xpath in spec.items():
59
+ element = root.find(xpath)
60
+ if element is not None:
61
+ if field == "code":
62
+ result[field] = element.text.strip() if element.text else ""
63
+ else:
64
+ result[field] = element.text.strip() if element.text else ""
65
+ else:
66
+ raise ValueError(f"Required field '{field}' not found in XML")
67
+ return result
68
+
69
+
70
+ class CodeFileModel(FormattingModel):
71
+ language: str
72
+ file_path: str
73
+ code: str
74
+
75
+ @classmethod
76
+ def format_spec(cls):
77
+ def xml_formatter(file_path: str, language: str, code: str) -> str:
78
+ root = ET.Element("code_file_model")
79
+
80
+ file_path_elem = ET.SubElement(root, "file_path")
81
+ file_path_elem.text = file_path
82
+
83
+ language_elem = ET.SubElement(root, "language")
84
+ language_elem.text = language
85
+
86
+ code_elem = ET.SubElement(root, "code")
87
+ code_elem.text = f"\n{code}\n"
88
+
89
+ xml_str = ET.tostring(root, encoding="unicode")
90
+ pretty_xml = minidom.parseString(xml_str).toprettyxml(indent=" ")
91
+
92
+ # Remove the XML declaration
93
+ pretty_xml = "\n".join(pretty_xml.split("\n")[1:])
94
+
95
+ return pretty_xml.strip()
96
+
97
+ return xml_formatter
98
+
99
+ @classmethod
100
+ def parse_spec(cls) -> Dict[str, str]:
101
+ return {
102
+ "file_path": ".//file_path",
103
+ "language": ".//language",
104
+ "code": ".//code",
105
+ }
106
+
107
+ @classmethod
108
+ def start_token(cls):
109
+ return "<format>"
110
+
111
+ @classmethod
112
+ def end_token(cls):
113
+ return "</format>"
114
+
115
+
116
+ # Test cases
117
+ if __name__ == "__main__":
118
+ # Test formatting
119
+ code_file = CodeFileModel(
120
+ language="Python",
121
+ file_path="src/main.py",
122
+ code="def hello():\n print('Hello, World!')",
123
+ )
124
+ formatted = CodeFileModel.format(code_file)
125
+ print("Formatted output:")
126
+ print(formatted)
127
+ print("Formatting test passed.")
128
+
129
+ # Test parsing
130
+ parsed = CodeFileModel.parse(formatted)
131
+ assert (
132
+ parsed == code_file
133
+ ), f"Parsing failed. Expected:\n{code_file}\nGot:\n{parsed}"
134
+ print("Parsing test passed.")
135
+
136
+ # Test round-trip
137
+ round_trip = CodeFileModel.parse(CodeFileModel.format(code_file))
138
+ assert (
139
+ round_trip == code_file
140
+ ), f"Round-trip failed. Expected:\n{code_file}\nGot:\n{round_trip}"
141
+ print("Round-trip test passed.")
142
+
143
+ # Test with different values
144
+ code_file2 = CodeFileModel(
145
+ language="JavaScript",
146
+ file_path="src/app.js",
147
+ code="function greet() {\n console.log('Hello, World!');\n}",
148
+ )
149
+ formatted2 = CodeFileModel.format(code_file2)
150
+ parsed2 = CodeFileModel.parse(formatted2)
151
+ assert (
152
+ parsed2 == code_file2
153
+ ), f"Parsing failed for different values. Expected:\n{code_file2}\nGot:\n{parsed2}"
154
+ print("Different values test passed.")
155
+
156
+ # Test tolerant parsing
157
+ tolerant_input = """<format>
158
+ <code_file_model>
159
+ <file_path> src/main.py </file_path>
160
+ <language> Python </language>
161
+ <code>
162
+ def hello():
163
+ print('Hello, World!')
164
+ </code>
165
+ </code_file_model>
166
+ </format>"""
167
+ parsed_tolerant = CodeFileModel.parse(tolerant_input)
168
+ expected_tolerant = CodeFileModel(
169
+ language="Python",
170
+ file_path="src/main.py",
171
+ code="def hello():\n print('Hello, World!')",
172
+ )
173
+ assert (
174
+ parsed_tolerant == expected_tolerant
175
+ ), f"Tolerant parsing failed. Expected:\n{expected_tolerant}\nGot:\n{parsed_tolerant}"
176
+ print("Tolerant parsing test passed.")
177
+
178
+ print("All tests passed successfully!")
@@ -0,0 +1,132 @@
1
+ import xml.etree.ElementTree as ET
2
+ from abc import ABC, abstractmethod
3
+
4
+ from langroid.pydantic_v1 import BaseModel
5
+
6
+
7
+ class FormattingModel(BaseModel, ABC):
8
+ @classmethod
9
+ @abstractmethod
10
+ def spec(cls):
11
+ pass
12
+
13
+ @classmethod
14
+ @abstractmethod
15
+ def root_element(cls) -> str:
16
+ pass
17
+
18
+ @classmethod
19
+ def format(cls, instance: "FormattingModel") -> str:
20
+ root = ET.Element(cls.root_element())
21
+ for field, field_type in cls.spec().items():
22
+ value = getattr(instance, field)
23
+ elem = ET.SubElement(root, field)
24
+ if field_type == "cdata":
25
+ elem.text = f"<![CDATA[{value}]]>"
26
+ else:
27
+ elem.text = value
28
+ return ET.tostring(root, encoding="unicode", method="xml")
29
+
30
+ @classmethod
31
+ def parse(cls, formatted_string: str) -> "FormattingModel":
32
+ root = ET.fromstring(formatted_string)
33
+ if root.tag != cls.root_element():
34
+ raise ValueError(
35
+ f"Invalid root element: expected {cls.root_element()}, got {root.tag}"
36
+ )
37
+
38
+ data = {}
39
+ for field, field_type in cls.spec().items():
40
+ elem = root.find(field)
41
+ if elem is None:
42
+ raise ValueError(f"Missing field: {field}")
43
+ if field_type == "cdata":
44
+ # Extract CDATA content
45
+ cdata_start = elem.text.find("<![CDATA[")
46
+ cdata_end = elem.text.rfind("]]>")
47
+ if cdata_start != -1 and cdata_end != -1:
48
+ data[field] = elem.text[cdata_start + 9 : cdata_end]
49
+ else:
50
+ data[field] = elem.text
51
+ else:
52
+ data[field] = elem.text.strip() if elem.text else ""
53
+
54
+ return cls(**data)
55
+
56
+
57
+ class CodeFileModel(FormattingModel):
58
+ language: str
59
+ file_path: str
60
+ code: str
61
+
62
+ @classmethod
63
+ def spec(cls):
64
+ return {"language": "text", "file_path": "text", "code": "cdata"}
65
+
66
+ @classmethod
67
+ def root_element(cls):
68
+ return "code_file_model"
69
+
70
+
71
+ # Test cases
72
+ if __name__ == "__main__":
73
+ # Test formatting
74
+ code_file = CodeFileModel(
75
+ language="Python",
76
+ file_path="src/main.py",
77
+ code="def hello():\n print('Hello, World!')",
78
+ )
79
+ formatted = CodeFileModel.format(code_file)
80
+ print("Formatted XML:")
81
+ print(formatted)
82
+
83
+ # Test parsing
84
+ parsed = CodeFileModel.parse(formatted)
85
+ assert (
86
+ parsed == code_file
87
+ ), f"Parsing failed. Expected:\n{code_file}\nGot:\n{parsed}"
88
+ print("Parsing test passed.")
89
+
90
+ # Test round-trip
91
+ round_trip = CodeFileModel.parse(CodeFileModel.format(code_file))
92
+ assert (
93
+ round_trip == code_file
94
+ ), f"Round-trip failed. Expected:\n{code_file}\nGot:\n{round_trip}"
95
+ print("Round-trip test passed.")
96
+
97
+ # Test with different values
98
+ code_file2 = CodeFileModel(
99
+ language="JavaScript",
100
+ file_path="src/app.js",
101
+ code="function greet() {\n console.log('Hello, World!');\n}",
102
+ )
103
+ formatted2 = CodeFileModel.format(code_file2)
104
+ parsed2 = CodeFileModel.parse(formatted2)
105
+ assert (
106
+ parsed2 == code_file2
107
+ ), f"Parsing failed for different values. Expected:\n{code_file2}\nGot:\n{parsed2}"
108
+ print("Different values test passed.")
109
+
110
+ # Test tolerant parsing
111
+ tolerant_input = """
112
+ <code_file_model>
113
+ <language> Python </language>
114
+ <file_path> src/main.py </file_path>
115
+ <code><![CDATA[
116
+ def hello():
117
+ print('Hello, World!')
118
+ ]]></code>
119
+ </code_file_model>
120
+ """
121
+ parsed_tolerant = CodeFileModel.parse(tolerant_input)
122
+ expected_tolerant = CodeFileModel(
123
+ language="Python",
124
+ file_path="src/main.py",
125
+ code="\ndef hello():\n print('Hello, World!')\n ",
126
+ )
127
+ assert (
128
+ parsed_tolerant == expected_tolerant
129
+ ), f"Tolerant parsing failed. Expected:\n{expected_tolerant}\nGot:\n{parsed_tolerant}"
130
+ print("Tolerant parsing test passed.")
131
+
132
+ print("All tests passed successfully!")
@@ -0,0 +1,130 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ from lxml import etree
4
+
5
+ from langroid.pydantic_v1 import BaseModel
6
+
7
+
8
+ class FormattingModel(BaseModel, ABC):
9
+ @classmethod
10
+ @abstractmethod
11
+ def spec(cls):
12
+ pass
13
+
14
+ @classmethod
15
+ @abstractmethod
16
+ def root_element(cls) -> str:
17
+ pass
18
+
19
+ @classmethod
20
+ def format(cls, instance: "FormattingModel") -> str:
21
+ root = etree.Element(cls.root_element())
22
+ for field, field_type in cls.spec().items():
23
+ value = getattr(instance, field)
24
+ elem = etree.SubElement(root, field)
25
+ if field_type == "cdata":
26
+ elem.text = etree.CDATA(value)
27
+ else:
28
+ elem.text = str(value)
29
+ return etree.tostring(root, encoding="unicode", pretty_print=True)
30
+
31
+ @classmethod
32
+ def parse(cls, formatted_string: str) -> "FormattingModel":
33
+ root = etree.fromstring(formatted_string.encode("utf-8"))
34
+ if root.tag != cls.root_element():
35
+ raise ValueError(
36
+ f"Invalid root element: expected {cls.root_element()}, got {root.tag}"
37
+ )
38
+
39
+ data = {}
40
+ for field, field_type in cls.spec().items():
41
+ elem = root.find(field)
42
+ if elem is None:
43
+ raise ValueError(f"Missing field: {field}")
44
+ if field_type == "cdata":
45
+ data[field] = elem.text
46
+ elif field_type == "text":
47
+ data[field] = elem.text.strip() if elem.text else ""
48
+ else:
49
+ # Handle other field types as needed
50
+ data[field] = elem.text
51
+
52
+ return cls(**data)
53
+
54
+
55
+ class CodeFileModel(FormattingModel):
56
+ language: str
57
+ file_path: str
58
+ code: str
59
+
60
+ @classmethod
61
+ def spec(cls):
62
+ return {"language": "text", "file_path": "text", "code": "cdata"}
63
+
64
+ @classmethod
65
+ def root_element(cls):
66
+ return "code_file_model"
67
+
68
+
69
+ # Test cases
70
+ if __name__ == "__main__":
71
+ # Test formatting
72
+ code_file = CodeFileModel(
73
+ language="Python",
74
+ file_path="src/main.py",
75
+ code="def hello():\n print('Hello, World!')",
76
+ )
77
+ formatted = CodeFileModel.format(code_file)
78
+ print("Formatted XML:")
79
+ print(formatted)
80
+
81
+ # Test parsing
82
+ parsed = CodeFileModel.parse(formatted)
83
+ assert (
84
+ parsed == code_file
85
+ ), f"Parsing failed. Expected:\n{code_file}\nGot:\n{parsed}"
86
+ print("Parsing test passed.")
87
+
88
+ # Test round-trip
89
+ round_trip = CodeFileModel.parse(CodeFileModel.format(code_file))
90
+ assert (
91
+ round_trip == code_file
92
+ ), f"Round-trip failed. Expected:\n{code_file}\nGot:\n{round_trip}"
93
+ print("Round-trip test passed.")
94
+
95
+ # Test with different values
96
+ code_file2 = CodeFileModel(
97
+ language="JavaScript",
98
+ file_path="src/app.js",
99
+ code="function greet() {\n console.log('Hello, World!');\n}",
100
+ )
101
+ formatted2 = CodeFileModel.format(code_file2)
102
+ parsed2 = CodeFileModel.parse(formatted2)
103
+ assert (
104
+ parsed2 == code_file2
105
+ ), f"Parsing failed for different values. Expected:\n{code_file2}\nGot:\n{parsed2}"
106
+ print("Different values test passed.")
107
+
108
+ # Test tolerant parsing
109
+ tolerant_input = """
110
+ <code_file_model>
111
+ <language> Python </language>
112
+ <file_path> src/main.py </file_path>
113
+ <code><![CDATA[
114
+ def hello():
115
+ print('Hello, World!')
116
+ ]]></code>
117
+ </code_file_model>
118
+ """
119
+ parsed_tolerant = CodeFileModel.parse(tolerant_input)
120
+ expected_tolerant = CodeFileModel(
121
+ language="Python",
122
+ file_path="src/main.py",
123
+ code="\ndef hello():\n print('Hello, World!')\n ",
124
+ )
125
+ assert (
126
+ parsed_tolerant == expected_tolerant
127
+ ), f"Tolerant parsing failed. Expected:\n{expected_tolerant}\nGot:\n{parsed_tolerant}"
128
+ print("Tolerant parsing test passed.")
129
+
130
+ print("All tests passed successfully!")