langroid 0.16.5__py3-none-any.whl → 0.16.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/md_tool_message_grammar.py +455 -0
- langroid/agent/tools/code_file_tool_parse.py +150 -0
- langroid/agent/tools/code_file_tool_pyparsing.py +194 -0
- langroid/agent/tools/code_file_tool_pyparsing2.py +199 -0
- langroid/agent/tools/formatted_model_custom.py +150 -0
- langroid/agent/tools/formatted_model_custom2.py +168 -0
- langroid/agent/tools/formatted_model_custom3.py +279 -0
- langroid/agent/tools/formatted_model_custom4.py +395 -0
- langroid/agent/tools/formatted_model_jinja.py +133 -0
- langroid/agent/tools/formatted_model_jinja.py-e +122 -0
- langroid/agent/tools/formatted_model_jinja2.py +145 -0
- langroid/agent/tools/formatted_model_jinja2.py-e +135 -0
- langroid/agent/tools/formatted_model_lark.py +0 -0
- langroid/agent/tools/formatted_model_lark2.py +168 -0
- langroid/agent/tools/formatted_model_parse.py +105 -0
- langroid/agent/tools/formatted_model_parse.py-e +98 -0
- langroid/agent/tools/formatted_model_parse2.py +113 -0
- langroid/agent/tools/formatted_model_parse2.py-e +109 -0
- langroid/agent/tools/formatted_model_parse3.py +114 -0
- langroid/agent/tools/formatted_model_parse3.py-e +110 -0
- langroid/agent/tools/formatted_model_parsimon.py +194 -0
- langroid/agent/tools/formatted_model_parsimon.py-e +186 -0
- langroid/agent/tools/formatted_model_pyparsing.py +169 -0
- langroid/agent/tools/formatted_model_pyparsing.py-e +149 -0
- langroid/agent/tools/formatted_model_pyparsing2.py +159 -0
- langroid/agent/tools/formatted_model_pyparsing2.py-e +143 -0
- langroid/agent/tools/formatted_model_pyparsing3.py +133 -0
- langroid/agent/tools/formatted_model_pyparsing3.py-e +121 -0
- langroid/agent/tools/formatted_model_pyparsing4.py +213 -0
- langroid/agent/tools/formatted_model_pyparsing4.py-e +176 -0
- langroid/agent/tools/formatted_model_pyparsing5.py +173 -0
- langroid/agent/tools/formatted_model_pyparsing5.py-e +142 -0
- langroid/agent/tools/formatted_model_regex.py +246 -0
- langroid/agent/tools/formatted_model_regex.py-e +248 -0
- langroid/agent/tools/formatted_model_regex2.py +250 -0
- langroid/agent/tools/formatted_model_regex2.py-e +253 -0
- langroid/agent/tools/formatted_model_tatsu.py +172 -0
- langroid/agent/tools/formatted_model_tatsu.py-e +160 -0
- langroid/agent/tools/formatted_model_template.py +217 -0
- langroid/agent/tools/formatted_model_template.py-e +200 -0
- langroid/agent/tools/formatted_model_xml.py +178 -0
- langroid/agent/tools/formatted_model_xml2.py +178 -0
- langroid/agent/tools/formatted_model_xml3.py +132 -0
- langroid/agent/tools/formatted_model_xml4.py +130 -0
- langroid/agent/tools/formatted_model_xml5.py +130 -0
- langroid/agent/tools/formatted_model_xml6.py +113 -0
- langroid/agent/tools/formatted_model_xml7.py +117 -0
- langroid/agent/tools/formatted_model_xml8.py +164 -0
- langroid/agent/tools/generic_tool.py +165 -0
- langroid/agent/tools/generic_tool_tatsu.py +275 -0
- langroid/agent/tools/grammar_based_model.py +132 -0
- langroid/agent/tools/grammar_based_model.py-e +128 -0
- langroid/agent/tools/grammar_based_model_lark.py +156 -0
- langroid/agent/tools/grammar_based_model_lark.py-e +153 -0
- langroid/agent/tools/grammar_based_model_parse.py +86 -0
- langroid/agent/tools/grammar_based_model_parse.py-e +80 -0
- langroid/agent/tools/grammar_based_model_parsimonious.py +129 -0
- langroid/agent/tools/grammar_based_model_parsimonious.py-e +120 -0
- langroid/agent/tools/grammar_based_model_pyparsing.py +105 -0
- langroid/agent/tools/grammar_based_model_pyparsing.py-e +103 -0
- langroid/agent/tools/grammar_based_model_regex.py +139 -0
- langroid/agent/tools/grammar_based_model_regex.py-e +130 -0
- langroid/agent/tools/grammar_based_model_regex2.py +124 -0
- langroid/agent/tools/grammar_based_model_regex2.py-e +116 -0
- langroid/agent/tools/grammar_based_model_tatsu.py +80 -0
- langroid/agent/tools/grammar_based_model_tatsu.py-e +77 -0
- langroid/agent/tools/lark_earley_example.py +135 -0
- langroid/agent/tools/lark_earley_example.py-e +117 -0
- langroid/agent/tools/lark_example.py +72 -0
- langroid/agent/tools/parse_example.py +76 -0
- langroid/agent/tools/parse_example2.py +87 -0
- langroid/agent/tools/parse_example3.py +42 -0
- langroid/agent/tools/parse_test.py +791 -0
- langroid/agent/xml_tool_message.py +106 -0
- langroid/language_models/openai_gpt.py +6 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/METADATA +1 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/RECORD +80 -6
- pyproject.toml +1 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/LICENSE +0 -0
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/WHEEL +0 -0
@@ -0,0 +1,250 @@
|
|
1
|
+
import re
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
from typing import Dict, Type, TypeVar
|
4
|
+
|
5
|
+
from langroid.pydantic_v1 import BaseModel
|
6
|
+
|
7
|
+
T = TypeVar("T", bound="FormattingModel")
|
8
|
+
|
9
|
+
|
10
|
+
class FormattingModel(BaseModel, ABC):
|
11
|
+
@classmethod
|
12
|
+
@abstractmethod
|
13
|
+
def format_spec(cls) -> str:
|
14
|
+
pass
|
15
|
+
|
16
|
+
@classmethod
|
17
|
+
@abstractmethod
|
18
|
+
def start_token(cls) -> str:
|
19
|
+
pass
|
20
|
+
|
21
|
+
@classmethod
|
22
|
+
@abstractmethod
|
23
|
+
def end_token(cls) -> str:
|
24
|
+
pass
|
25
|
+
|
26
|
+
@classmethod
|
27
|
+
@abstractmethod
|
28
|
+
def field_mappings(cls) -> Dict[str, str]:
|
29
|
+
pass
|
30
|
+
|
31
|
+
@classmethod
|
32
|
+
def parse(cls: Type[T], text: str) -> T:
|
33
|
+
# Remove start and end tokens
|
34
|
+
content = text.strip()[len(cls.start_token()) : -len(cls.end_token())].strip()
|
35
|
+
|
36
|
+
# Create regex pattern from format_spec
|
37
|
+
pattern = cls.format_spec()
|
38
|
+
for field, token in cls.field_mappings().items():
|
39
|
+
pattern = pattern.replace(token, f"(?P<{field}>.*?)")
|
40
|
+
|
41
|
+
# Extract data using regex
|
42
|
+
match = re.match(pattern, content, re.DOTALL)
|
43
|
+
if not match:
|
44
|
+
raise ValueError("Invalid format")
|
45
|
+
|
46
|
+
# Create instance with extracted data
|
47
|
+
data = {field: match.group(field).strip() for field in cls.field_mappings()}
|
48
|
+
return cls(**data)
|
49
|
+
|
50
|
+
def generate(self) -> str:
|
51
|
+
# Start with the format spec
|
52
|
+
result = self.format_spec()
|
53
|
+
|
54
|
+
# Replace tokens with actual values
|
55
|
+
for field, token in self.field_mappings().items():
|
56
|
+
value = getattr(self, field)
|
57
|
+
# Use re.sub to replace tokens, treating the replacement as a literal string
|
58
|
+
result = re.sub(re.escape(token), lambda m: str(value), result)
|
59
|
+
|
60
|
+
# Remove raw string markers and extra escapes
|
61
|
+
result = result.replace(r"\s*", "").replace(r"\n", "\n")
|
62
|
+
|
63
|
+
# Wrap with start and end tokens
|
64
|
+
return f"{self.start_token()}\n{result}\n{self.end_token()}"
|
65
|
+
|
66
|
+
|
67
|
+
class MyFormattedModel(FormattingModel):
|
68
|
+
name: str
|
69
|
+
age: int
|
70
|
+
city: str
|
71
|
+
|
72
|
+
@classmethod
|
73
|
+
def format_spec(cls) -> str:
|
74
|
+
return "name: {NAME}\n{AGE} is the age\nlives in {CITY}"
|
75
|
+
|
76
|
+
@classmethod
|
77
|
+
def start_token(cls) -> str:
|
78
|
+
return "<format>"
|
79
|
+
|
80
|
+
@classmethod
|
81
|
+
def end_token(cls) -> str:
|
82
|
+
return "</format>"
|
83
|
+
|
84
|
+
@classmethod
|
85
|
+
def field_mappings(cls) -> Dict[str, str]:
|
86
|
+
return {"name": "{NAME}", "age": "{AGE}", "city": "{CITY}"}
|
87
|
+
|
88
|
+
|
89
|
+
if __name__ == "__main__":
|
90
|
+
# Test object to string
|
91
|
+
model = MyFormattedModel(name="John", age=30, city="Tokyo")
|
92
|
+
generated = model.generate()
|
93
|
+
print("Generated string:")
|
94
|
+
print(generated)
|
95
|
+
print()
|
96
|
+
|
97
|
+
# Test string to object
|
98
|
+
parsed = MyFormattedModel.parse(generated)
|
99
|
+
print("Parsed object:")
|
100
|
+
print(parsed)
|
101
|
+
print()
|
102
|
+
|
103
|
+
# Test round-trip
|
104
|
+
print("Round-trip test:")
|
105
|
+
print("Original == Parsed:", model == parsed)
|
106
|
+
|
107
|
+
# Test with different values
|
108
|
+
another_model = MyFormattedModel(name="Alice", age=25, city="New York")
|
109
|
+
another_generated = another_model.generate()
|
110
|
+
print("\nAnother generated string:")
|
111
|
+
print(another_generated)
|
112
|
+
print()
|
113
|
+
|
114
|
+
another_parsed = MyFormattedModel.parse(another_generated)
|
115
|
+
print("Another parsed object:")
|
116
|
+
print(another_parsed)
|
117
|
+
print("Another Original == Another Parsed:", another_model == another_parsed)
|
118
|
+
|
119
|
+
# code file model
|
120
|
+
class CodeFileModel(FormattingModel):
|
121
|
+
language: str
|
122
|
+
file_path: str
|
123
|
+
code: str
|
124
|
+
|
125
|
+
@classmethod
|
126
|
+
def format_spec(cls) -> str:
|
127
|
+
return "code_file_model\nfile_path: {FILE_PATH}\n```{LANGUAGE}\n{CODE}\n```"
|
128
|
+
|
129
|
+
@classmethod
|
130
|
+
def start_token(cls) -> str:
|
131
|
+
return "<format>"
|
132
|
+
|
133
|
+
@classmethod
|
134
|
+
def end_token(cls) -> str:
|
135
|
+
return "</format>"
|
136
|
+
|
137
|
+
@classmethod
|
138
|
+
def field_mappings(cls) -> Dict[str, str]:
|
139
|
+
return {
|
140
|
+
"file_path": "{FILE_PATH}",
|
141
|
+
"language": "{LANGUAGE}",
|
142
|
+
"code": "{CODE}",
|
143
|
+
}
|
144
|
+
|
145
|
+
print("\nTesting CodeFileModel:")
|
146
|
+
code_model = CodeFileModel(
|
147
|
+
language="python",
|
148
|
+
file_path="src/main.py",
|
149
|
+
code='def hello():\n print("Hello, World!")',
|
150
|
+
)
|
151
|
+
code_generated = code_model.generate()
|
152
|
+
print("Generated CodeFileModel string:")
|
153
|
+
print(code_generated)
|
154
|
+
print()
|
155
|
+
|
156
|
+
code_parsed = CodeFileModel.parse(code_generated)
|
157
|
+
print("Parsed CodeFileModel object:")
|
158
|
+
print(code_parsed)
|
159
|
+
print()
|
160
|
+
|
161
|
+
print("CodeFileModel Round-trip test:")
|
162
|
+
print("Original == Parsed:", code_model == code_parsed)
|
163
|
+
|
164
|
+
# tolerant format
|
165
|
+
#
|
166
|
+
class CodeFileModel(FormattingModel):
|
167
|
+
language: str
|
168
|
+
file_path: str
|
169
|
+
code: str
|
170
|
+
|
171
|
+
@classmethod
|
172
|
+
def format_spec(cls) -> str:
|
173
|
+
return (
|
174
|
+
r"code_file_model\s*\n"
|
175
|
+
r"file_path:\s*{FILE_PATH}\s*\n"
|
176
|
+
r"```\s*{LANGUAGE}\s*\n"
|
177
|
+
r"{CODE}\s*"
|
178
|
+
r"```"
|
179
|
+
)
|
180
|
+
|
181
|
+
@classmethod
|
182
|
+
def start_token(cls) -> str:
|
183
|
+
return "<format>"
|
184
|
+
|
185
|
+
@classmethod
|
186
|
+
def end_token(cls) -> str:
|
187
|
+
return "</format>"
|
188
|
+
|
189
|
+
@classmethod
|
190
|
+
def field_mappings(cls) -> Dict[str, str]:
|
191
|
+
return {
|
192
|
+
"file_path": "{FILE_PATH}",
|
193
|
+
"language": "{LANGUAGE}",
|
194
|
+
"code": "{CODE}",
|
195
|
+
}
|
196
|
+
|
197
|
+
print("\nTesting CodeFileModel with various whitespace variations:")
|
198
|
+
|
199
|
+
test_strings = [
|
200
|
+
# Standard format
|
201
|
+
"""<format>
|
202
|
+
code_file_model
|
203
|
+
file_path: src/main.py
|
204
|
+
```python
|
205
|
+
def hello():
|
206
|
+
print("Hello, World!")
|
207
|
+
```
|
208
|
+
</format>""",
|
209
|
+
# Extra whitespace
|
210
|
+
"""<format>
|
211
|
+
code_file_model
|
212
|
+
file_path: src/main.py
|
213
|
+
``` python
|
214
|
+
def hello():
|
215
|
+
print("Hello, World!")
|
216
|
+
```
|
217
|
+
</format>""",
|
218
|
+
# Extra newlines
|
219
|
+
"""<format>
|
220
|
+
code_file_model
|
221
|
+
|
222
|
+
file_path: src/main.py
|
223
|
+
|
224
|
+
```python
|
225
|
+
|
226
|
+
def hello():
|
227
|
+
print("Hello, World!")
|
228
|
+
|
229
|
+
```
|
230
|
+
|
231
|
+
</format>""",
|
232
|
+
]
|
233
|
+
|
234
|
+
for i, test_string in enumerate(test_strings, 1):
|
235
|
+
print(f"\nTest {i}:")
|
236
|
+
print("Input string:")
|
237
|
+
print(test_string)
|
238
|
+
|
239
|
+
parsed = CodeFileModel.parse(test_string)
|
240
|
+
print("\nParsed object:")
|
241
|
+
print(parsed)
|
242
|
+
|
243
|
+
regenerated = parsed.generate()
|
244
|
+
print("\nRegenerated string:")
|
245
|
+
print(regenerated)
|
246
|
+
|
247
|
+
reparsed = CodeFileModel.parse(regenerated)
|
248
|
+
print("\nRound-trip test:")
|
249
|
+
print("Original parsed == Reparsed:", parsed == reparsed)
|
250
|
+
print("-" * 50)
|
@@ -0,0 +1,253 @@
|
|
1
|
+
from pydantic import BaseModel
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
import re
|
4
|
+
from typing import Dict, Type, TypeVar
|
5
|
+
|
6
|
+
T = TypeVar('T', bound='FormattingModel')
|
7
|
+
|
8
|
+
class FormattingModel(BaseModel, ABC):
|
9
|
+
@classmethod
|
10
|
+
@abstractmethod
|
11
|
+
def format_spec(cls) -> str:
|
12
|
+
pass
|
13
|
+
|
14
|
+
@classmethod
|
15
|
+
@abstractmethod
|
16
|
+
def start_token(cls) -> str:
|
17
|
+
pass
|
18
|
+
|
19
|
+
@classmethod
|
20
|
+
@abstractmethod
|
21
|
+
def end_token(cls) -> str:
|
22
|
+
pass
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
@abstractmethod
|
26
|
+
def field_mappings(cls) -> Dict[str, str]:
|
27
|
+
pass
|
28
|
+
|
29
|
+
@classmethod
|
30
|
+
def parse(cls: Type[T], text: str) -> T:
|
31
|
+
# Remove start and end tokens
|
32
|
+
content = text.strip()[len(cls.start_token()):-len(cls.end_token())].strip()
|
33
|
+
|
34
|
+
# Create regex pattern from format_spec
|
35
|
+
pattern = cls.format_spec()
|
36
|
+
for field, token in cls.field_mappings().items():
|
37
|
+
pattern = pattern.replace(token, f"(?P<{field}>.*?)")
|
38
|
+
|
39
|
+
# Extract data using regex
|
40
|
+
match = re.match(pattern, content, re.DOTALL)
|
41
|
+
if not match:
|
42
|
+
raise ValueError("Invalid format")
|
43
|
+
|
44
|
+
# Create instance with extracted data
|
45
|
+
data = {field: match.group(field).strip() for field in cls.field_mappings()}
|
46
|
+
return cls(**data)
|
47
|
+
|
48
|
+
def generate(self) -> str:
|
49
|
+
# Start with the format spec
|
50
|
+
result = self.format_spec()
|
51
|
+
|
52
|
+
# Replace tokens with actual values
|
53
|
+
for field, token in self.field_mappings().items():
|
54
|
+
value = getattr(self, field)
|
55
|
+
# Use re.sub to replace tokens, treating the replacement as a literal string
|
56
|
+
result = re.sub(re.escape(token), lambda m: str(value), result)
|
57
|
+
|
58
|
+
# Remove raw string markers and extra escapes
|
59
|
+
result = result.replace(r'\s*', '').replace(r'\n', '\n')
|
60
|
+
|
61
|
+
# Wrap with start and end tokens
|
62
|
+
return f"{self.start_token()}\n{result}\n{self.end_token()}"
|
63
|
+
|
64
|
+
|
65
|
+
class MyFormattedModel(FormattingModel):
|
66
|
+
name: str
|
67
|
+
age: int
|
68
|
+
city: str
|
69
|
+
|
70
|
+
@classmethod
|
71
|
+
def format_spec(cls) -> str:
|
72
|
+
return "name: {NAME}\n{AGE} is the age\nlives in {CITY}"
|
73
|
+
|
74
|
+
@classmethod
|
75
|
+
def start_token(cls) -> str:
|
76
|
+
return "<format>"
|
77
|
+
|
78
|
+
@classmethod
|
79
|
+
def end_token(cls) -> str:
|
80
|
+
return "</format>"
|
81
|
+
|
82
|
+
@classmethod
|
83
|
+
def field_mappings(cls) -> Dict[str, str]:
|
84
|
+
return {
|
85
|
+
"name": "{NAME}",
|
86
|
+
"age": "{AGE}",
|
87
|
+
"city": "{CITY}"
|
88
|
+
}
|
89
|
+
|
90
|
+
if __name__ == "__main__":
|
91
|
+
# Test object to string
|
92
|
+
model = MyFormattedModel(name="John", age=30, city="Tokyo")
|
93
|
+
generated = model.generate()
|
94
|
+
print("Generated string:")
|
95
|
+
print(generated)
|
96
|
+
print()
|
97
|
+
|
98
|
+
# Test string to object
|
99
|
+
parsed = MyFormattedModel.parse(generated)
|
100
|
+
print("Parsed object:")
|
101
|
+
print(parsed)
|
102
|
+
print()
|
103
|
+
|
104
|
+
# Test round-trip
|
105
|
+
print("Round-trip test:")
|
106
|
+
print("Original == Parsed:", model == parsed)
|
107
|
+
|
108
|
+
# Test with different values
|
109
|
+
another_model = MyFormattedModel(name="Alice", age=25, city="New York")
|
110
|
+
another_generated = another_model.generate()
|
111
|
+
print("\nAnother generated string:")
|
112
|
+
print(another_generated)
|
113
|
+
print()
|
114
|
+
|
115
|
+
another_parsed = MyFormattedModel.parse(another_generated)
|
116
|
+
print("Another parsed object:")
|
117
|
+
print(another_parsed)
|
118
|
+
print("Another Original == Another Parsed:", another_model == another_parsed)
|
119
|
+
|
120
|
+
|
121
|
+
# code file model
|
122
|
+
class CodeFileModel(FormattingModel):
|
123
|
+
language: str
|
124
|
+
file_path: str
|
125
|
+
code: str
|
126
|
+
|
127
|
+
@classmethod
|
128
|
+
def format_spec(cls) -> str:
|
129
|
+
return "code_file_model\nfile_path: {FILE_PATH}\n```{LANGUAGE}\n{CODE}\n```"
|
130
|
+
|
131
|
+
@classmethod
|
132
|
+
def start_token(cls) -> str:
|
133
|
+
return "<format>"
|
134
|
+
|
135
|
+
@classmethod
|
136
|
+
def end_token(cls) -> str:
|
137
|
+
return "</format>"
|
138
|
+
|
139
|
+
@classmethod
|
140
|
+
def field_mappings(cls) -> Dict[str, str]:
|
141
|
+
return {
|
142
|
+
"file_path": "{FILE_PATH}",
|
143
|
+
"language": "{LANGUAGE}",
|
144
|
+
"code": "{CODE}"
|
145
|
+
}
|
146
|
+
|
147
|
+
|
148
|
+
print("\nTesting CodeFileModel:")
|
149
|
+
code_model = CodeFileModel(
|
150
|
+
language="python",
|
151
|
+
file_path="src/main.py",
|
152
|
+
code="def hello():\n print(\"Hello, World!\")"
|
153
|
+
)
|
154
|
+
code_generated = code_model.generate()
|
155
|
+
print("Generated CodeFileModel string:")
|
156
|
+
print(code_generated)
|
157
|
+
print()
|
158
|
+
|
159
|
+
code_parsed = CodeFileModel.parse(code_generated)
|
160
|
+
print("Parsed CodeFileModel object:")
|
161
|
+
print(code_parsed)
|
162
|
+
print()
|
163
|
+
|
164
|
+
print("CodeFileModel Round-trip test:")
|
165
|
+
print("Original == Parsed:", code_model == code_parsed)
|
166
|
+
|
167
|
+
# tolerant format
|
168
|
+
#
|
169
|
+
class CodeFileModel(FormattingModel):
|
170
|
+
language: str
|
171
|
+
file_path: str
|
172
|
+
code: str
|
173
|
+
|
174
|
+
@classmethod
|
175
|
+
def format_spec(cls) -> str:
|
176
|
+
return (
|
177
|
+
r"code_file_model\s*\n"
|
178
|
+
r"file_path:\s*{FILE_PATH}\s*\n"
|
179
|
+
r"```\s*{LANGUAGE}\s*\n"
|
180
|
+
r"{CODE}\s*"
|
181
|
+
r"```"
|
182
|
+
)
|
183
|
+
|
184
|
+
@classmethod
|
185
|
+
def start_token(cls) -> str:
|
186
|
+
return "<format>"
|
187
|
+
|
188
|
+
@classmethod
|
189
|
+
def end_token(cls) -> str:
|
190
|
+
return "</format>"
|
191
|
+
|
192
|
+
@classmethod
|
193
|
+
def field_mappings(cls) -> Dict[str, str]:
|
194
|
+
return {
|
195
|
+
"file_path": "{FILE_PATH}",
|
196
|
+
"language": "{LANGUAGE}",
|
197
|
+
"code": "{CODE}"
|
198
|
+
}
|
199
|
+
|
200
|
+
print("\nTesting CodeFileModel with various whitespace variations:")
|
201
|
+
|
202
|
+
test_strings = [
|
203
|
+
# Standard format
|
204
|
+
"""<format>
|
205
|
+
code_file_model
|
206
|
+
file_path: src/main.py
|
207
|
+
```python
|
208
|
+
def hello():
|
209
|
+
print("Hello, World!")
|
210
|
+
```
|
211
|
+
</format>""",
|
212
|
+
# Extra whitespace
|
213
|
+
"""<format>
|
214
|
+
code_file_model
|
215
|
+
file_path: src/main.py
|
216
|
+
``` python
|
217
|
+
def hello():
|
218
|
+
print("Hello, World!")
|
219
|
+
```
|
220
|
+
</format>""",
|
221
|
+
# Extra newlines
|
222
|
+
"""<format>
|
223
|
+
code_file_model
|
224
|
+
|
225
|
+
file_path: src/main.py
|
226
|
+
|
227
|
+
```python
|
228
|
+
|
229
|
+
def hello():
|
230
|
+
print("Hello, World!")
|
231
|
+
|
232
|
+
```
|
233
|
+
|
234
|
+
</format>"""
|
235
|
+
]
|
236
|
+
|
237
|
+
for i, test_string in enumerate(test_strings, 1):
|
238
|
+
print(f"\nTest {i}:")
|
239
|
+
print("Input string:")
|
240
|
+
print(test_string)
|
241
|
+
|
242
|
+
parsed = CodeFileModel.parse(test_string)
|
243
|
+
print("\nParsed object:")
|
244
|
+
print(parsed)
|
245
|
+
|
246
|
+
regenerated = parsed.generate()
|
247
|
+
print("\nRegenerated string:")
|
248
|
+
print(regenerated)
|
249
|
+
|
250
|
+
reparsed = CodeFileModel.parse(regenerated)
|
251
|
+
print("\nRound-trip test:")
|
252
|
+
print("Original parsed == Reparsed:", parsed == reparsed)
|
253
|
+
print("-" * 50)
|
@@ -0,0 +1,172 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
|
3
|
+
import tatsu
|
4
|
+
from tatsu.model import ModelBuilderSemantics
|
5
|
+
|
6
|
+
from langroid.pydantic_v1 import BaseModel
|
7
|
+
|
8
|
+
|
9
|
+
class FormattingModel(BaseModel, ABC):
|
10
|
+
@classmethod
|
11
|
+
@abstractmethod
|
12
|
+
def format_spec(cls):
|
13
|
+
pass
|
14
|
+
|
15
|
+
@classmethod
|
16
|
+
@abstractmethod
|
17
|
+
def parse_spec(cls):
|
18
|
+
pass
|
19
|
+
|
20
|
+
@classmethod
|
21
|
+
@abstractmethod
|
22
|
+
def start_token(cls) -> str:
|
23
|
+
pass
|
24
|
+
|
25
|
+
@classmethod
|
26
|
+
@abstractmethod
|
27
|
+
def end_token(cls) -> str:
|
28
|
+
pass
|
29
|
+
|
30
|
+
@classmethod
|
31
|
+
def format(cls, instance: "FormattingModel") -> str:
|
32
|
+
spec = cls.format_spec()
|
33
|
+
formatted = spec.format(**instance.dict())
|
34
|
+
return f"{cls.start_token()}\n{formatted}\n{cls.end_token()}"
|
35
|
+
|
36
|
+
@classmethod
|
37
|
+
def parse(cls, formatted_string: str) -> "FormattingModel":
|
38
|
+
lines = formatted_string.strip().split("\n")
|
39
|
+
if lines[0] != cls.start_token() or lines[-1] != cls.end_token():
|
40
|
+
raise ValueError("Invalid start or end token")
|
41
|
+
content = "\n".join(lines[1:-1])
|
42
|
+
|
43
|
+
parser = tatsu.compile(cls.parse_spec())
|
44
|
+
ast = parser.parse(content)
|
45
|
+
return cls(**ast)
|
46
|
+
|
47
|
+
|
48
|
+
class CodeFileModel(FormattingModel):
|
49
|
+
language: str
|
50
|
+
file_path: str
|
51
|
+
code: str
|
52
|
+
|
53
|
+
@classmethod
|
54
|
+
def format_spec(cls):
|
55
|
+
return "code_file_model\n{file_path}\n```{language}\n{code}\n```"
|
56
|
+
|
57
|
+
@classmethod
|
58
|
+
def parse_spec(cls):
|
59
|
+
return """
|
60
|
+
@@grammar::CodeFileModel
|
61
|
+
|
62
|
+
start = header file_path language code $ ;
|
63
|
+
|
64
|
+
header = "code_file_model" ~;
|
65
|
+
file_path = /[^\n]+/ ~;
|
66
|
+
language = "```" /[a-zA-Z]+/ ~;
|
67
|
+
code = /(?s).*?(?=```)/ "```" ~;
|
68
|
+
|
69
|
+
@@whitespace :: /\s*/
|
70
|
+
"""
|
71
|
+
|
72
|
+
@classmethod
|
73
|
+
def start_token(cls):
|
74
|
+
return "<format>"
|
75
|
+
|
76
|
+
@classmethod
|
77
|
+
def end_token(cls):
|
78
|
+
return "</format>"
|
79
|
+
|
80
|
+
@classmethod
|
81
|
+
def parse(cls, formatted_string: str) -> "CodeFileModel":
|
82
|
+
lines = formatted_string.strip().split("\n")
|
83
|
+
if lines[0] != cls.start_token() or lines[-1] != cls.end_token():
|
84
|
+
raise ValueError("Invalid start or end token")
|
85
|
+
content = "\n".join(lines[1:-1])
|
86
|
+
|
87
|
+
class CodeFileModelSemantics(ModelBuilderSemantics):
|
88
|
+
def file_path(self, ast):
|
89
|
+
return ast.strip()
|
90
|
+
|
91
|
+
def language(self, ast):
|
92
|
+
return ast[1].strip()
|
93
|
+
|
94
|
+
def code(self, ast):
|
95
|
+
return ast[0].strip()
|
96
|
+
|
97
|
+
parser = tatsu.compile(cls.parse_spec(), semantics=CodeFileModelSemantics())
|
98
|
+
ast = parser.parse(content)
|
99
|
+
return cls(**ast)
|
100
|
+
|
101
|
+
|
102
|
+
# Test cases
|
103
|
+
if __name__ == "__main__":
|
104
|
+
# Test formatting
|
105
|
+
code_file = CodeFileModel(
|
106
|
+
language="Python",
|
107
|
+
file_path="src/main.py",
|
108
|
+
code="def hello():\n print('Hello, World!')",
|
109
|
+
)
|
110
|
+
formatted = CodeFileModel.format(code_file)
|
111
|
+
expected_format = """<format>
|
112
|
+
code_file_model
|
113
|
+
src/main.py
|
114
|
+
```Python
|
115
|
+
def hello():
|
116
|
+
print('Hello, World!')
|
117
|
+
```
|
118
|
+
</format>"""
|
119
|
+
assert (
|
120
|
+
formatted == expected_format
|
121
|
+
), f"Formatting failed. Expected:\n{expected_format}\nGot:\n{formatted}"
|
122
|
+
print("Formatting test passed.")
|
123
|
+
|
124
|
+
# Test parsing
|
125
|
+
parsed = CodeFileModel.parse(formatted)
|
126
|
+
assert (
|
127
|
+
parsed == code_file
|
128
|
+
), f"Parsing failed. Expected:\n{code_file}\nGot:\n{parsed}"
|
129
|
+
print("Parsing test passed.")
|
130
|
+
|
131
|
+
# Test round-trip
|
132
|
+
round_trip = CodeFileModel.parse(CodeFileModel.format(code_file))
|
133
|
+
assert (
|
134
|
+
round_trip == code_file
|
135
|
+
), f"Round-trip failed. Expected:\n{code_file}\nGot:\n{round_trip}"
|
136
|
+
print("Round-trip test passed.")
|
137
|
+
|
138
|
+
# Test with different values
|
139
|
+
code_file2 = CodeFileModel(
|
140
|
+
language="JavaScript",
|
141
|
+
file_path="src/app.js",
|
142
|
+
code="function greet() {\n console.log('Hello, World!');\n}",
|
143
|
+
)
|
144
|
+
formatted2 = CodeFileModel.format(code_file2)
|
145
|
+
parsed2 = CodeFileModel.parse(formatted2)
|
146
|
+
assert (
|
147
|
+
parsed2 == code_file2
|
148
|
+
), f"Parsing failed for different values. Expected:\n{code_file2}\nGot:\n{parsed2}"
|
149
|
+
print("Different values test passed.")
|
150
|
+
|
151
|
+
# Test tolerant parsing
|
152
|
+
tolerant_input = """<format>
|
153
|
+
code_file_model
|
154
|
+
src/main.py
|
155
|
+
|
156
|
+
``` Python
|
157
|
+
def hello():
|
158
|
+
print('Hello, World!')
|
159
|
+
```
|
160
|
+
</format>"""
|
161
|
+
parsed_tolerant = CodeFileModel.parse(tolerant_input)
|
162
|
+
expected_tolerant = CodeFileModel(
|
163
|
+
language="Python",
|
164
|
+
file_path="src/main.py",
|
165
|
+
code="def hello():\n print('Hello, World!')",
|
166
|
+
)
|
167
|
+
assert (
|
168
|
+
parsed_tolerant == expected_tolerant
|
169
|
+
), f"Tolerant parsing failed. Expected:\n{expected_tolerant}\nGot:\n{parsed_tolerant}"
|
170
|
+
print("Tolerant parsing test passed.")
|
171
|
+
|
172
|
+
print("All tests passed successfully!")
|