langroid 0.16.5__py3-none-any.whl → 0.16.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. langroid/agent/md_tool_message_grammar.py +455 -0
  2. langroid/agent/tools/code_file_tool_parse.py +150 -0
  3. langroid/agent/tools/code_file_tool_pyparsing.py +194 -0
  4. langroid/agent/tools/code_file_tool_pyparsing2.py +199 -0
  5. langroid/agent/tools/formatted_model_custom.py +150 -0
  6. langroid/agent/tools/formatted_model_custom2.py +168 -0
  7. langroid/agent/tools/formatted_model_custom3.py +279 -0
  8. langroid/agent/tools/formatted_model_custom4.py +395 -0
  9. langroid/agent/tools/formatted_model_jinja.py +133 -0
  10. langroid/agent/tools/formatted_model_jinja.py-e +122 -0
  11. langroid/agent/tools/formatted_model_jinja2.py +145 -0
  12. langroid/agent/tools/formatted_model_jinja2.py-e +135 -0
  13. langroid/agent/tools/formatted_model_lark.py +0 -0
  14. langroid/agent/tools/formatted_model_lark2.py +168 -0
  15. langroid/agent/tools/formatted_model_parse.py +105 -0
  16. langroid/agent/tools/formatted_model_parse.py-e +98 -0
  17. langroid/agent/tools/formatted_model_parse2.py +113 -0
  18. langroid/agent/tools/formatted_model_parse2.py-e +109 -0
  19. langroid/agent/tools/formatted_model_parse3.py +114 -0
  20. langroid/agent/tools/formatted_model_parse3.py-e +110 -0
  21. langroid/agent/tools/formatted_model_parsimon.py +194 -0
  22. langroid/agent/tools/formatted_model_parsimon.py-e +186 -0
  23. langroid/agent/tools/formatted_model_pyparsing.py +169 -0
  24. langroid/agent/tools/formatted_model_pyparsing.py-e +149 -0
  25. langroid/agent/tools/formatted_model_pyparsing2.py +159 -0
  26. langroid/agent/tools/formatted_model_pyparsing2.py-e +143 -0
  27. langroid/agent/tools/formatted_model_pyparsing3.py +133 -0
  28. langroid/agent/tools/formatted_model_pyparsing3.py-e +121 -0
  29. langroid/agent/tools/formatted_model_pyparsing4.py +213 -0
  30. langroid/agent/tools/formatted_model_pyparsing4.py-e +176 -0
  31. langroid/agent/tools/formatted_model_pyparsing5.py +173 -0
  32. langroid/agent/tools/formatted_model_pyparsing5.py-e +142 -0
  33. langroid/agent/tools/formatted_model_regex.py +246 -0
  34. langroid/agent/tools/formatted_model_regex.py-e +248 -0
  35. langroid/agent/tools/formatted_model_regex2.py +250 -0
  36. langroid/agent/tools/formatted_model_regex2.py-e +253 -0
  37. langroid/agent/tools/formatted_model_tatsu.py +172 -0
  38. langroid/agent/tools/formatted_model_tatsu.py-e +160 -0
  39. langroid/agent/tools/formatted_model_template.py +217 -0
  40. langroid/agent/tools/formatted_model_template.py-e +200 -0
  41. langroid/agent/tools/formatted_model_xml.py +178 -0
  42. langroid/agent/tools/formatted_model_xml2.py +178 -0
  43. langroid/agent/tools/formatted_model_xml3.py +132 -0
  44. langroid/agent/tools/formatted_model_xml4.py +130 -0
  45. langroid/agent/tools/formatted_model_xml5.py +130 -0
  46. langroid/agent/tools/formatted_model_xml6.py +113 -0
  47. langroid/agent/tools/formatted_model_xml7.py +117 -0
  48. langroid/agent/tools/formatted_model_xml8.py +164 -0
  49. langroid/agent/tools/generic_tool.py +165 -0
  50. langroid/agent/tools/generic_tool_tatsu.py +275 -0
  51. langroid/agent/tools/grammar_based_model.py +132 -0
  52. langroid/agent/tools/grammar_based_model.py-e +128 -0
  53. langroid/agent/tools/grammar_based_model_lark.py +156 -0
  54. langroid/agent/tools/grammar_based_model_lark.py-e +153 -0
  55. langroid/agent/tools/grammar_based_model_parse.py +86 -0
  56. langroid/agent/tools/grammar_based_model_parse.py-e +80 -0
  57. langroid/agent/tools/grammar_based_model_parsimonious.py +129 -0
  58. langroid/agent/tools/grammar_based_model_parsimonious.py-e +120 -0
  59. langroid/agent/tools/grammar_based_model_pyparsing.py +105 -0
  60. langroid/agent/tools/grammar_based_model_pyparsing.py-e +103 -0
  61. langroid/agent/tools/grammar_based_model_regex.py +139 -0
  62. langroid/agent/tools/grammar_based_model_regex.py-e +130 -0
  63. langroid/agent/tools/grammar_based_model_regex2.py +124 -0
  64. langroid/agent/tools/grammar_based_model_regex2.py-e +116 -0
  65. langroid/agent/tools/grammar_based_model_tatsu.py +80 -0
  66. langroid/agent/tools/grammar_based_model_tatsu.py-e +77 -0
  67. langroid/agent/tools/lark_earley_example.py +135 -0
  68. langroid/agent/tools/lark_earley_example.py-e +117 -0
  69. langroid/agent/tools/lark_example.py +72 -0
  70. langroid/agent/tools/parse_example.py +76 -0
  71. langroid/agent/tools/parse_example2.py +87 -0
  72. langroid/agent/tools/parse_example3.py +42 -0
  73. langroid/agent/tools/parse_test.py +791 -0
  74. langroid/agent/xml_tool_message.py +106 -0
  75. langroid/language_models/openai_gpt.py +6 -1
  76. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/METADATA +1 -1
  77. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/RECORD +80 -6
  78. pyproject.toml +1 -1
  79. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/LICENSE +0 -0
  80. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/WHEEL +0 -0
@@ -0,0 +1,250 @@
1
+ import re
2
+ from abc import ABC, abstractmethod
3
+ from typing import Dict, Type, TypeVar
4
+
5
+ from langroid.pydantic_v1 import BaseModel
6
+
7
+ T = TypeVar("T", bound="FormattingModel")
8
+
9
+
10
+ class FormattingModel(BaseModel, ABC):
11
+ @classmethod
12
+ @abstractmethod
13
+ def format_spec(cls) -> str:
14
+ pass
15
+
16
+ @classmethod
17
+ @abstractmethod
18
+ def start_token(cls) -> str:
19
+ pass
20
+
21
+ @classmethod
22
+ @abstractmethod
23
+ def end_token(cls) -> str:
24
+ pass
25
+
26
+ @classmethod
27
+ @abstractmethod
28
+ def field_mappings(cls) -> Dict[str, str]:
29
+ pass
30
+
31
+ @classmethod
32
+ def parse(cls: Type[T], text: str) -> T:
33
+ # Remove start and end tokens
34
+ content = text.strip()[len(cls.start_token()) : -len(cls.end_token())].strip()
35
+
36
+ # Create regex pattern from format_spec
37
+ pattern = cls.format_spec()
38
+ for field, token in cls.field_mappings().items():
39
+ pattern = pattern.replace(token, f"(?P<{field}>.*?)")
40
+
41
+ # Extract data using regex
42
+ match = re.match(pattern, content, re.DOTALL)
43
+ if not match:
44
+ raise ValueError("Invalid format")
45
+
46
+ # Create instance with extracted data
47
+ data = {field: match.group(field).strip() for field in cls.field_mappings()}
48
+ return cls(**data)
49
+
50
+ def generate(self) -> str:
51
+ # Start with the format spec
52
+ result = self.format_spec()
53
+
54
+ # Replace tokens with actual values
55
+ for field, token in self.field_mappings().items():
56
+ value = getattr(self, field)
57
+ # Use re.sub to replace tokens, treating the replacement as a literal string
58
+ result = re.sub(re.escape(token), lambda m: str(value), result)
59
+
60
+ # Remove raw string markers and extra escapes
61
+ result = result.replace(r"\s*", "").replace(r"\n", "\n")
62
+
63
+ # Wrap with start and end tokens
64
+ return f"{self.start_token()}\n{result}\n{self.end_token()}"
65
+
66
+
67
+ class MyFormattedModel(FormattingModel):
68
+ name: str
69
+ age: int
70
+ city: str
71
+
72
+ @classmethod
73
+ def format_spec(cls) -> str:
74
+ return "name: {NAME}\n{AGE} is the age\nlives in {CITY}"
75
+
76
+ @classmethod
77
+ def start_token(cls) -> str:
78
+ return "<format>"
79
+
80
+ @classmethod
81
+ def end_token(cls) -> str:
82
+ return "</format>"
83
+
84
+ @classmethod
85
+ def field_mappings(cls) -> Dict[str, str]:
86
+ return {"name": "{NAME}", "age": "{AGE}", "city": "{CITY}"}
87
+
88
+
89
+ if __name__ == "__main__":
90
+ # Test object to string
91
+ model = MyFormattedModel(name="John", age=30, city="Tokyo")
92
+ generated = model.generate()
93
+ print("Generated string:")
94
+ print(generated)
95
+ print()
96
+
97
+ # Test string to object
98
+ parsed = MyFormattedModel.parse(generated)
99
+ print("Parsed object:")
100
+ print(parsed)
101
+ print()
102
+
103
+ # Test round-trip
104
+ print("Round-trip test:")
105
+ print("Original == Parsed:", model == parsed)
106
+
107
+ # Test with different values
108
+ another_model = MyFormattedModel(name="Alice", age=25, city="New York")
109
+ another_generated = another_model.generate()
110
+ print("\nAnother generated string:")
111
+ print(another_generated)
112
+ print()
113
+
114
+ another_parsed = MyFormattedModel.parse(another_generated)
115
+ print("Another parsed object:")
116
+ print(another_parsed)
117
+ print("Another Original == Another Parsed:", another_model == another_parsed)
118
+
119
+ # code file model
120
+ class CodeFileModel(FormattingModel):
121
+ language: str
122
+ file_path: str
123
+ code: str
124
+
125
+ @classmethod
126
+ def format_spec(cls) -> str:
127
+ return "code_file_model\nfile_path: {FILE_PATH}\n```{LANGUAGE}\n{CODE}\n```"
128
+
129
+ @classmethod
130
+ def start_token(cls) -> str:
131
+ return "<format>"
132
+
133
+ @classmethod
134
+ def end_token(cls) -> str:
135
+ return "</format>"
136
+
137
+ @classmethod
138
+ def field_mappings(cls) -> Dict[str, str]:
139
+ return {
140
+ "file_path": "{FILE_PATH}",
141
+ "language": "{LANGUAGE}",
142
+ "code": "{CODE}",
143
+ }
144
+
145
+ print("\nTesting CodeFileModel:")
146
+ code_model = CodeFileModel(
147
+ language="python",
148
+ file_path="src/main.py",
149
+ code='def hello():\n print("Hello, World!")',
150
+ )
151
+ code_generated = code_model.generate()
152
+ print("Generated CodeFileModel string:")
153
+ print(code_generated)
154
+ print()
155
+
156
+ code_parsed = CodeFileModel.parse(code_generated)
157
+ print("Parsed CodeFileModel object:")
158
+ print(code_parsed)
159
+ print()
160
+
161
+ print("CodeFileModel Round-trip test:")
162
+ print("Original == Parsed:", code_model == code_parsed)
163
+
164
+ # tolerant format
165
+ #
166
+ class CodeFileModel(FormattingModel):
167
+ language: str
168
+ file_path: str
169
+ code: str
170
+
171
+ @classmethod
172
+ def format_spec(cls) -> str:
173
+ return (
174
+ r"code_file_model\s*\n"
175
+ r"file_path:\s*{FILE_PATH}\s*\n"
176
+ r"```\s*{LANGUAGE}\s*\n"
177
+ r"{CODE}\s*"
178
+ r"```"
179
+ )
180
+
181
+ @classmethod
182
+ def start_token(cls) -> str:
183
+ return "<format>"
184
+
185
+ @classmethod
186
+ def end_token(cls) -> str:
187
+ return "</format>"
188
+
189
+ @classmethod
190
+ def field_mappings(cls) -> Dict[str, str]:
191
+ return {
192
+ "file_path": "{FILE_PATH}",
193
+ "language": "{LANGUAGE}",
194
+ "code": "{CODE}",
195
+ }
196
+
197
+ print("\nTesting CodeFileModel with various whitespace variations:")
198
+
199
+ test_strings = [
200
+ # Standard format
201
+ """<format>
202
+ code_file_model
203
+ file_path: src/main.py
204
+ ```python
205
+ def hello():
206
+ print("Hello, World!")
207
+ ```
208
+ </format>""",
209
+ # Extra whitespace
210
+ """<format>
211
+ code_file_model
212
+ file_path: src/main.py
213
+ ``` python
214
+ def hello():
215
+ print("Hello, World!")
216
+ ```
217
+ </format>""",
218
+ # Extra newlines
219
+ """<format>
220
+ code_file_model
221
+
222
+ file_path: src/main.py
223
+
224
+ ```python
225
+
226
+ def hello():
227
+ print("Hello, World!")
228
+
229
+ ```
230
+
231
+ </format>""",
232
+ ]
233
+
234
+ for i, test_string in enumerate(test_strings, 1):
235
+ print(f"\nTest {i}:")
236
+ print("Input string:")
237
+ print(test_string)
238
+
239
+ parsed = CodeFileModel.parse(test_string)
240
+ print("\nParsed object:")
241
+ print(parsed)
242
+
243
+ regenerated = parsed.generate()
244
+ print("\nRegenerated string:")
245
+ print(regenerated)
246
+
247
+ reparsed = CodeFileModel.parse(regenerated)
248
+ print("\nRound-trip test:")
249
+ print("Original parsed == Reparsed:", parsed == reparsed)
250
+ print("-" * 50)
@@ -0,0 +1,253 @@
1
+ from pydantic import BaseModel
2
+ from abc import ABC, abstractmethod
3
+ import re
4
+ from typing import Dict, Type, TypeVar
5
+
6
+ T = TypeVar('T', bound='FormattingModel')
7
+
8
+ class FormattingModel(BaseModel, ABC):
9
+ @classmethod
10
+ @abstractmethod
11
+ def format_spec(cls) -> str:
12
+ pass
13
+
14
+ @classmethod
15
+ @abstractmethod
16
+ def start_token(cls) -> str:
17
+ pass
18
+
19
+ @classmethod
20
+ @abstractmethod
21
+ def end_token(cls) -> str:
22
+ pass
23
+
24
+ @classmethod
25
+ @abstractmethod
26
+ def field_mappings(cls) -> Dict[str, str]:
27
+ pass
28
+
29
+ @classmethod
30
+ def parse(cls: Type[T], text: str) -> T:
31
+ # Remove start and end tokens
32
+ content = text.strip()[len(cls.start_token()):-len(cls.end_token())].strip()
33
+
34
+ # Create regex pattern from format_spec
35
+ pattern = cls.format_spec()
36
+ for field, token in cls.field_mappings().items():
37
+ pattern = pattern.replace(token, f"(?P<{field}>.*?)")
38
+
39
+ # Extract data using regex
40
+ match = re.match(pattern, content, re.DOTALL)
41
+ if not match:
42
+ raise ValueError("Invalid format")
43
+
44
+ # Create instance with extracted data
45
+ data = {field: match.group(field).strip() for field in cls.field_mappings()}
46
+ return cls(**data)
47
+
48
+ def generate(self) -> str:
49
+ # Start with the format spec
50
+ result = self.format_spec()
51
+
52
+ # Replace tokens with actual values
53
+ for field, token in self.field_mappings().items():
54
+ value = getattr(self, field)
55
+ # Use re.sub to replace tokens, treating the replacement as a literal string
56
+ result = re.sub(re.escape(token), lambda m: str(value), result)
57
+
58
+ # Remove raw string markers and extra escapes
59
+ result = result.replace(r'\s*', '').replace(r'\n', '\n')
60
+
61
+ # Wrap with start and end tokens
62
+ return f"{self.start_token()}\n{result}\n{self.end_token()}"
63
+
64
+
65
+ class MyFormattedModel(FormattingModel):
66
+ name: str
67
+ age: int
68
+ city: str
69
+
70
+ @classmethod
71
+ def format_spec(cls) -> str:
72
+ return "name: {NAME}\n{AGE} is the age\nlives in {CITY}"
73
+
74
+ @classmethod
75
+ def start_token(cls) -> str:
76
+ return "<format>"
77
+
78
+ @classmethod
79
+ def end_token(cls) -> str:
80
+ return "</format>"
81
+
82
+ @classmethod
83
+ def field_mappings(cls) -> Dict[str, str]:
84
+ return {
85
+ "name": "{NAME}",
86
+ "age": "{AGE}",
87
+ "city": "{CITY}"
88
+ }
89
+
90
+ if __name__ == "__main__":
91
+ # Test object to string
92
+ model = MyFormattedModel(name="John", age=30, city="Tokyo")
93
+ generated = model.generate()
94
+ print("Generated string:")
95
+ print(generated)
96
+ print()
97
+
98
+ # Test string to object
99
+ parsed = MyFormattedModel.parse(generated)
100
+ print("Parsed object:")
101
+ print(parsed)
102
+ print()
103
+
104
+ # Test round-trip
105
+ print("Round-trip test:")
106
+ print("Original == Parsed:", model == parsed)
107
+
108
+ # Test with different values
109
+ another_model = MyFormattedModel(name="Alice", age=25, city="New York")
110
+ another_generated = another_model.generate()
111
+ print("\nAnother generated string:")
112
+ print(another_generated)
113
+ print()
114
+
115
+ another_parsed = MyFormattedModel.parse(another_generated)
116
+ print("Another parsed object:")
117
+ print(another_parsed)
118
+ print("Another Original == Another Parsed:", another_model == another_parsed)
119
+
120
+
121
+ # code file model
122
+ class CodeFileModel(FormattingModel):
123
+ language: str
124
+ file_path: str
125
+ code: str
126
+
127
+ @classmethod
128
+ def format_spec(cls) -> str:
129
+ return "code_file_model\nfile_path: {FILE_PATH}\n```{LANGUAGE}\n{CODE}\n```"
130
+
131
+ @classmethod
132
+ def start_token(cls) -> str:
133
+ return "<format>"
134
+
135
+ @classmethod
136
+ def end_token(cls) -> str:
137
+ return "</format>"
138
+
139
+ @classmethod
140
+ def field_mappings(cls) -> Dict[str, str]:
141
+ return {
142
+ "file_path": "{FILE_PATH}",
143
+ "language": "{LANGUAGE}",
144
+ "code": "{CODE}"
145
+ }
146
+
147
+
148
+ print("\nTesting CodeFileModel:")
149
+ code_model = CodeFileModel(
150
+ language="python",
151
+ file_path="src/main.py",
152
+ code="def hello():\n print(\"Hello, World!\")"
153
+ )
154
+ code_generated = code_model.generate()
155
+ print("Generated CodeFileModel string:")
156
+ print(code_generated)
157
+ print()
158
+
159
+ code_parsed = CodeFileModel.parse(code_generated)
160
+ print("Parsed CodeFileModel object:")
161
+ print(code_parsed)
162
+ print()
163
+
164
+ print("CodeFileModel Round-trip test:")
165
+ print("Original == Parsed:", code_model == code_parsed)
166
+
167
+ # tolerant format
168
+ #
169
+ class CodeFileModel(FormattingModel):
170
+ language: str
171
+ file_path: str
172
+ code: str
173
+
174
+ @classmethod
175
+ def format_spec(cls) -> str:
176
+ return (
177
+ r"code_file_model\s*\n"
178
+ r"file_path:\s*{FILE_PATH}\s*\n"
179
+ r"```\s*{LANGUAGE}\s*\n"
180
+ r"{CODE}\s*"
181
+ r"```"
182
+ )
183
+
184
+ @classmethod
185
+ def start_token(cls) -> str:
186
+ return "<format>"
187
+
188
+ @classmethod
189
+ def end_token(cls) -> str:
190
+ return "</format>"
191
+
192
+ @classmethod
193
+ def field_mappings(cls) -> Dict[str, str]:
194
+ return {
195
+ "file_path": "{FILE_PATH}",
196
+ "language": "{LANGUAGE}",
197
+ "code": "{CODE}"
198
+ }
199
+
200
+ print("\nTesting CodeFileModel with various whitespace variations:")
201
+
202
+ test_strings = [
203
+ # Standard format
204
+ """<format>
205
+ code_file_model
206
+ file_path: src/main.py
207
+ ```python
208
+ def hello():
209
+ print("Hello, World!")
210
+ ```
211
+ </format>""",
212
+ # Extra whitespace
213
+ """<format>
214
+ code_file_model
215
+ file_path: src/main.py
216
+ ``` python
217
+ def hello():
218
+ print("Hello, World!")
219
+ ```
220
+ </format>""",
221
+ # Extra newlines
222
+ """<format>
223
+ code_file_model
224
+
225
+ file_path: src/main.py
226
+
227
+ ```python
228
+
229
+ def hello():
230
+ print("Hello, World!")
231
+
232
+ ```
233
+
234
+ </format>"""
235
+ ]
236
+
237
+ for i, test_string in enumerate(test_strings, 1):
238
+ print(f"\nTest {i}:")
239
+ print("Input string:")
240
+ print(test_string)
241
+
242
+ parsed = CodeFileModel.parse(test_string)
243
+ print("\nParsed object:")
244
+ print(parsed)
245
+
246
+ regenerated = parsed.generate()
247
+ print("\nRegenerated string:")
248
+ print(regenerated)
249
+
250
+ reparsed = CodeFileModel.parse(regenerated)
251
+ print("\nRound-trip test:")
252
+ print("Original parsed == Reparsed:", parsed == reparsed)
253
+ print("-" * 50)
@@ -0,0 +1,172 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ import tatsu
4
+ from tatsu.model import ModelBuilderSemantics
5
+
6
+ from langroid.pydantic_v1 import BaseModel
7
+
8
+
9
+ class FormattingModel(BaseModel, ABC):
10
+ @classmethod
11
+ @abstractmethod
12
+ def format_spec(cls):
13
+ pass
14
+
15
+ @classmethod
16
+ @abstractmethod
17
+ def parse_spec(cls):
18
+ pass
19
+
20
+ @classmethod
21
+ @abstractmethod
22
+ def start_token(cls) -> str:
23
+ pass
24
+
25
+ @classmethod
26
+ @abstractmethod
27
+ def end_token(cls) -> str:
28
+ pass
29
+
30
+ @classmethod
31
+ def format(cls, instance: "FormattingModel") -> str:
32
+ spec = cls.format_spec()
33
+ formatted = spec.format(**instance.dict())
34
+ return f"{cls.start_token()}\n{formatted}\n{cls.end_token()}"
35
+
36
+ @classmethod
37
+ def parse(cls, formatted_string: str) -> "FormattingModel":
38
+ lines = formatted_string.strip().split("\n")
39
+ if lines[0] != cls.start_token() or lines[-1] != cls.end_token():
40
+ raise ValueError("Invalid start or end token")
41
+ content = "\n".join(lines[1:-1])
42
+
43
+ parser = tatsu.compile(cls.parse_spec())
44
+ ast = parser.parse(content)
45
+ return cls(**ast)
46
+
47
+
48
+ class CodeFileModel(FormattingModel):
49
+ language: str
50
+ file_path: str
51
+ code: str
52
+
53
+ @classmethod
54
+ def format_spec(cls):
55
+ return "code_file_model\n{file_path}\n```{language}\n{code}\n```"
56
+
57
+ @classmethod
58
+ def parse_spec(cls):
59
+ return """
60
+ @@grammar::CodeFileModel
61
+
62
+ start = header file_path language code $ ;
63
+
64
+ header = "code_file_model" ~;
65
+ file_path = /[^\n]+/ ~;
66
+ language = "```" /[a-zA-Z]+/ ~;
67
+ code = /(?s).*?(?=```)/ "```" ~;
68
+
69
+ @@whitespace :: /\s*/
70
+ """
71
+
72
+ @classmethod
73
+ def start_token(cls):
74
+ return "<format>"
75
+
76
+ @classmethod
77
+ def end_token(cls):
78
+ return "</format>"
79
+
80
+ @classmethod
81
+ def parse(cls, formatted_string: str) -> "CodeFileModel":
82
+ lines = formatted_string.strip().split("\n")
83
+ if lines[0] != cls.start_token() or lines[-1] != cls.end_token():
84
+ raise ValueError("Invalid start or end token")
85
+ content = "\n".join(lines[1:-1])
86
+
87
+ class CodeFileModelSemantics(ModelBuilderSemantics):
88
+ def file_path(self, ast):
89
+ return ast.strip()
90
+
91
+ def language(self, ast):
92
+ return ast[1].strip()
93
+
94
+ def code(self, ast):
95
+ return ast[0].strip()
96
+
97
+ parser = tatsu.compile(cls.parse_spec(), semantics=CodeFileModelSemantics())
98
+ ast = parser.parse(content)
99
+ return cls(**ast)
100
+
101
+
102
+ # Test cases
103
+ if __name__ == "__main__":
104
+ # Test formatting
105
+ code_file = CodeFileModel(
106
+ language="Python",
107
+ file_path="src/main.py",
108
+ code="def hello():\n print('Hello, World!')",
109
+ )
110
+ formatted = CodeFileModel.format(code_file)
111
+ expected_format = """<format>
112
+ code_file_model
113
+ src/main.py
114
+ ```Python
115
+ def hello():
116
+ print('Hello, World!')
117
+ ```
118
+ </format>"""
119
+ assert (
120
+ formatted == expected_format
121
+ ), f"Formatting failed. Expected:\n{expected_format}\nGot:\n{formatted}"
122
+ print("Formatting test passed.")
123
+
124
+ # Test parsing
125
+ parsed = CodeFileModel.parse(formatted)
126
+ assert (
127
+ parsed == code_file
128
+ ), f"Parsing failed. Expected:\n{code_file}\nGot:\n{parsed}"
129
+ print("Parsing test passed.")
130
+
131
+ # Test round-trip
132
+ round_trip = CodeFileModel.parse(CodeFileModel.format(code_file))
133
+ assert (
134
+ round_trip == code_file
135
+ ), f"Round-trip failed. Expected:\n{code_file}\nGot:\n{round_trip}"
136
+ print("Round-trip test passed.")
137
+
138
+ # Test with different values
139
+ code_file2 = CodeFileModel(
140
+ language="JavaScript",
141
+ file_path="src/app.js",
142
+ code="function greet() {\n console.log('Hello, World!');\n}",
143
+ )
144
+ formatted2 = CodeFileModel.format(code_file2)
145
+ parsed2 = CodeFileModel.parse(formatted2)
146
+ assert (
147
+ parsed2 == code_file2
148
+ ), f"Parsing failed for different values. Expected:\n{code_file2}\nGot:\n{parsed2}"
149
+ print("Different values test passed.")
150
+
151
+ # Test tolerant parsing
152
+ tolerant_input = """<format>
153
+ code_file_model
154
+ src/main.py
155
+
156
+ ``` Python
157
+ def hello():
158
+ print('Hello, World!')
159
+ ```
160
+ </format>"""
161
+ parsed_tolerant = CodeFileModel.parse(tolerant_input)
162
+ expected_tolerant = CodeFileModel(
163
+ language="Python",
164
+ file_path="src/main.py",
165
+ code="def hello():\n print('Hello, World!')",
166
+ )
167
+ assert (
168
+ parsed_tolerant == expected_tolerant
169
+ ), f"Tolerant parsing failed. Expected:\n{expected_tolerant}\nGot:\n{parsed_tolerant}"
170
+ print("Tolerant parsing test passed.")
171
+
172
+ print("All tests passed successfully!")