langroid 0.16.5__py3-none-any.whl → 0.16.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/md_tool_message_grammar.py +455 -0
- langroid/agent/tools/code_file_tool_parse.py +150 -0
- langroid/agent/tools/code_file_tool_pyparsing.py +194 -0
- langroid/agent/tools/code_file_tool_pyparsing2.py +199 -0
- langroid/agent/tools/formatted_model_custom.py +150 -0
- langroid/agent/tools/formatted_model_custom2.py +168 -0
- langroid/agent/tools/formatted_model_custom3.py +279 -0
- langroid/agent/tools/formatted_model_custom4.py +395 -0
- langroid/agent/tools/formatted_model_jinja.py +133 -0
- langroid/agent/tools/formatted_model_jinja.py-e +122 -0
- langroid/agent/tools/formatted_model_jinja2.py +145 -0
- langroid/agent/tools/formatted_model_jinja2.py-e +135 -0
- langroid/agent/tools/formatted_model_lark.py +0 -0
- langroid/agent/tools/formatted_model_lark2.py +168 -0
- langroid/agent/tools/formatted_model_parse.py +105 -0
- langroid/agent/tools/formatted_model_parse.py-e +98 -0
- langroid/agent/tools/formatted_model_parse2.py +113 -0
- langroid/agent/tools/formatted_model_parse2.py-e +109 -0
- langroid/agent/tools/formatted_model_parse3.py +114 -0
- langroid/agent/tools/formatted_model_parse3.py-e +110 -0
- langroid/agent/tools/formatted_model_parsimon.py +194 -0
- langroid/agent/tools/formatted_model_parsimon.py-e +186 -0
- langroid/agent/tools/formatted_model_pyparsing.py +169 -0
- langroid/agent/tools/formatted_model_pyparsing.py-e +149 -0
- langroid/agent/tools/formatted_model_pyparsing2.py +159 -0
- langroid/agent/tools/formatted_model_pyparsing2.py-e +143 -0
- langroid/agent/tools/formatted_model_pyparsing3.py +133 -0
- langroid/agent/tools/formatted_model_pyparsing3.py-e +121 -0
- langroid/agent/tools/formatted_model_pyparsing4.py +213 -0
- langroid/agent/tools/formatted_model_pyparsing4.py-e +176 -0
- langroid/agent/tools/formatted_model_pyparsing5.py +173 -0
- langroid/agent/tools/formatted_model_pyparsing5.py-e +142 -0
- langroid/agent/tools/formatted_model_regex.py +246 -0
- langroid/agent/tools/formatted_model_regex.py-e +248 -0
- langroid/agent/tools/formatted_model_regex2.py +250 -0
- langroid/agent/tools/formatted_model_regex2.py-e +253 -0
- langroid/agent/tools/formatted_model_tatsu.py +172 -0
- langroid/agent/tools/formatted_model_tatsu.py-e +160 -0
- langroid/agent/tools/formatted_model_template.py +217 -0
- langroid/agent/tools/formatted_model_template.py-e +200 -0
- langroid/agent/tools/formatted_model_xml.py +178 -0
- langroid/agent/tools/formatted_model_xml2.py +178 -0
- langroid/agent/tools/formatted_model_xml3.py +132 -0
- langroid/agent/tools/formatted_model_xml4.py +130 -0
- langroid/agent/tools/formatted_model_xml5.py +130 -0
- langroid/agent/tools/formatted_model_xml6.py +113 -0
- langroid/agent/tools/formatted_model_xml7.py +117 -0
- langroid/agent/tools/formatted_model_xml8.py +164 -0
- langroid/agent/tools/generic_tool.py +165 -0
- langroid/agent/tools/generic_tool_tatsu.py +275 -0
- langroid/agent/tools/grammar_based_model.py +132 -0
- langroid/agent/tools/grammar_based_model.py-e +128 -0
- langroid/agent/tools/grammar_based_model_lark.py +156 -0
- langroid/agent/tools/grammar_based_model_lark.py-e +153 -0
- langroid/agent/tools/grammar_based_model_parse.py +86 -0
- langroid/agent/tools/grammar_based_model_parse.py-e +80 -0
- langroid/agent/tools/grammar_based_model_parsimonious.py +129 -0
- langroid/agent/tools/grammar_based_model_parsimonious.py-e +120 -0
- langroid/agent/tools/grammar_based_model_pyparsing.py +105 -0
- langroid/agent/tools/grammar_based_model_pyparsing.py-e +103 -0
- langroid/agent/tools/grammar_based_model_regex.py +139 -0
- langroid/agent/tools/grammar_based_model_regex.py-e +130 -0
- langroid/agent/tools/grammar_based_model_regex2.py +124 -0
- langroid/agent/tools/grammar_based_model_regex2.py-e +116 -0
- langroid/agent/tools/grammar_based_model_tatsu.py +80 -0
- langroid/agent/tools/grammar_based_model_tatsu.py-e +77 -0
- langroid/agent/tools/lark_earley_example.py +135 -0
- langroid/agent/tools/lark_earley_example.py-e +117 -0
- langroid/agent/tools/lark_example.py +72 -0
- langroid/agent/tools/parse_example.py +76 -0
- langroid/agent/tools/parse_example2.py +87 -0
- langroid/agent/tools/parse_example3.py +42 -0
- langroid/agent/tools/parse_test.py +791 -0
- langroid/agent/xml_tool_message.py +106 -0
- langroid/language_models/openai_gpt.py +6 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/METADATA +1 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/RECORD +80 -6
- pyproject.toml +1 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/LICENSE +0 -0
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/WHEEL +0 -0
@@ -0,0 +1,103 @@
|
|
1
|
+
from pydantic import BaseModel
|
2
|
+
from pyparsing import *
|
3
|
+
from abc import ABC, abstractmethod
|
4
|
+
from typing import Dict, Any
|
5
|
+
|
6
|
+
class GrammarBasedModel(BaseModel, ABC):
|
7
|
+
@classmethod
|
8
|
+
@abstractmethod
|
9
|
+
def grammar(cls) -> ParserElement:
|
10
|
+
pass
|
11
|
+
|
12
|
+
@classmethod
|
13
|
+
@abstractmethod
|
14
|
+
def start_token(cls) -> str:
|
15
|
+
pass
|
16
|
+
|
17
|
+
@classmethod
|
18
|
+
@abstractmethod
|
19
|
+
def end_token(cls) -> str:
|
20
|
+
pass
|
21
|
+
|
22
|
+
@classmethod
|
23
|
+
@abstractmethod
|
24
|
+
def field_mappings(cls) -> Dict[str, str]:
|
25
|
+
pass
|
26
|
+
|
27
|
+
@classmethod
|
28
|
+
def parse(cls, text: str) -> 'GrammarBasedModel':
|
29
|
+
full_grammar = (
|
30
|
+
Suppress(cls.start_token()) +
|
31
|
+
cls.grammar() +
|
32
|
+
Suppress(cls.end_token())
|
33
|
+
)
|
34
|
+
parsed = full_grammar.parseString(text, parseAll=True)
|
35
|
+
return cls(**{field: parsed[token][-1] for field, token in cls.field_mappings().items()})
|
36
|
+
|
37
|
+
def generate(self) -> str:
|
38
|
+
result = [self.start_token()]
|
39
|
+
for field, token in self.field_mappings().items():
|
40
|
+
value = getattr(self, field)
|
41
|
+
if token == "name":
|
42
|
+
result.append(f"name: {value}")
|
43
|
+
elif token == "age":
|
44
|
+
result.append(f"age is {value}")
|
45
|
+
elif token == "city":
|
46
|
+
result.append(f"lives in {value}")
|
47
|
+
result.append(self.end_token())
|
48
|
+
return "\n".join(result)
|
49
|
+
|
50
|
+
class PersonSpec(GrammarBasedModel):
|
51
|
+
name: str
|
52
|
+
age: int
|
53
|
+
city: str
|
54
|
+
|
55
|
+
@classmethod
|
56
|
+
def grammar(cls):
|
57
|
+
name = Group(Literal("name:") + Word(alphas))("name")
|
58
|
+
age = Group(Literal("age is") + Word(nums))("age")
|
59
|
+
city = Group(Literal("lives in") + Word(alphas))("city")
|
60
|
+
return name + age + city
|
61
|
+
|
62
|
+
@classmethod
|
63
|
+
def start_token(cls):
|
64
|
+
return "<spec>"
|
65
|
+
|
66
|
+
@classmethod
|
67
|
+
def end_token(cls):
|
68
|
+
return "</spec>"
|
69
|
+
|
70
|
+
@classmethod
|
71
|
+
def field_mappings(cls):
|
72
|
+
return {
|
73
|
+
"name": "name",
|
74
|
+
"age": "age",
|
75
|
+
"city": "city"
|
76
|
+
}
|
77
|
+
|
78
|
+
if __name__ == "__main__":
|
79
|
+
# Test parsing
|
80
|
+
test_string = """
|
81
|
+
<spec>
|
82
|
+
name: John
|
83
|
+
age is 30
|
84
|
+
lives in Tokyo
|
85
|
+
</spec>
|
86
|
+
"""
|
87
|
+
parsed_person = PersonSpec.parse(test_string)
|
88
|
+
print("Parsed person:", parsed_person)
|
89
|
+
|
90
|
+
# Test generating
|
91
|
+
new_person = PersonSpec(name="Alice", age=25, city="NewYork")
|
92
|
+
generated_string = new_person.generate()
|
93
|
+
print("\nGenerated string:")
|
94
|
+
print(generated_string)
|
95
|
+
|
96
|
+
# Test round-trip
|
97
|
+
round_trip_person = PersonSpec.parse(generated_string)
|
98
|
+
print("\nRound-trip parsed person:", round_trip_person)
|
99
|
+
|
100
|
+
assert new_person == round_trip_person, "Round-trip parsing failed"
|
101
|
+
print("\nRound-trip test passed!")
|
102
|
+
|
103
|
+
|
@@ -0,0 +1,139 @@
|
|
1
|
+
import re
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
from typing import Dict
|
4
|
+
|
5
|
+
from langroid.pydantic_v1 import BaseModel
|
6
|
+
|
7
|
+
|
8
|
+
class FormattingModel(BaseModel, ABC):
|
9
|
+
@classmethod
|
10
|
+
@abstractmethod
|
11
|
+
def format_spec(cls) -> str:
|
12
|
+
pass
|
13
|
+
|
14
|
+
@classmethod
|
15
|
+
@abstractmethod
|
16
|
+
def start_token(cls) -> str:
|
17
|
+
pass
|
18
|
+
|
19
|
+
@classmethod
|
20
|
+
@abstractmethod
|
21
|
+
def end_token(cls) -> str:
|
22
|
+
pass
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
@abstractmethod
|
26
|
+
def field_mappings(cls) -> Dict[str, str]:
|
27
|
+
pass
|
28
|
+
|
29
|
+
@classmethod
|
30
|
+
def _create_regex_pattern(cls) -> str:
|
31
|
+
spec = cls.format_spec()
|
32
|
+
for field, placeholder in cls.field_mappings().items():
|
33
|
+
spec = spec.replace(placeholder, f"(?P<{field}>.*?)")
|
34
|
+
pattern = (
|
35
|
+
f"{re.escape(cls.start_token())}\\s*{spec}\\s*{re.escape(cls.end_token())}"
|
36
|
+
)
|
37
|
+
return pattern
|
38
|
+
|
39
|
+
@classmethod
|
40
|
+
def parse(cls, text: str) -> "FormattingModel":
|
41
|
+
pattern = cls._create_regex_pattern()
|
42
|
+
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
|
43
|
+
if match:
|
44
|
+
return cls(**{k: v.strip() for k, v in match.groupdict().items()})
|
45
|
+
raise ValueError(
|
46
|
+
f"Text does not match the expected format. Pattern: {pattern}, Text: {text}"
|
47
|
+
)
|
48
|
+
|
49
|
+
def generate(self) -> str:
|
50
|
+
content = self.format_spec()
|
51
|
+
# Remove \s* patterns
|
52
|
+
content = re.sub(r"\\s\*", " ", content)
|
53
|
+
for field, placeholder in self.field_mappings().items():
|
54
|
+
content = content.replace(placeholder, str(getattr(self, field)))
|
55
|
+
return f"{self.start_token()}\n{content}\n{self.end_token()}"
|
56
|
+
|
57
|
+
|
58
|
+
class PersonModel(FormattingModel):
|
59
|
+
name: str
|
60
|
+
age: int
|
61
|
+
city: str
|
62
|
+
|
63
|
+
@classmethod
|
64
|
+
def format_spec(cls) -> str:
|
65
|
+
return "name:\\s*{name}\\s*age is\\s*{age}\\s*lives in\\s*{city}"
|
66
|
+
|
67
|
+
@classmethod
|
68
|
+
def start_token(cls) -> str:
|
69
|
+
return "<spec>"
|
70
|
+
|
71
|
+
@classmethod
|
72
|
+
def end_token(cls) -> str:
|
73
|
+
return "</spec>"
|
74
|
+
|
75
|
+
@classmethod
|
76
|
+
def field_mappings(cls) -> Dict[str, str]:
|
77
|
+
return {"name": "{name}", "age": "{age}", "city": "{city}"}
|
78
|
+
|
79
|
+
|
80
|
+
def test_round_trip(model_class, input_string):
|
81
|
+
# Parse the input string
|
82
|
+
parsed_model = model_class.parse(input_string)
|
83
|
+
print(f"Parsed model: {parsed_model}")
|
84
|
+
|
85
|
+
# Generate a string from the parsed model
|
86
|
+
generated_string = parsed_model.generate()
|
87
|
+
print(f"Generated string:\n{generated_string}")
|
88
|
+
|
89
|
+
# Parse the generated string
|
90
|
+
reparsed_model = model_class.parse(generated_string)
|
91
|
+
print(f"Reparsed model: {reparsed_model}")
|
92
|
+
|
93
|
+
# Assert that the original parsed model and the reparsed model are equal
|
94
|
+
assert (
|
95
|
+
parsed_model == reparsed_model
|
96
|
+
), "Round trip failed: original and reparsed models are not equal"
|
97
|
+
|
98
|
+
# Assert that all fields are present and have the correct types
|
99
|
+
for field, field_type in model_class.__annotations__.items():
|
100
|
+
assert hasattr(parsed_model, field), f"Field {field} is missing"
|
101
|
+
assert isinstance(
|
102
|
+
getattr(parsed_model, field), field_type
|
103
|
+
), f"Field {field} has incorrect type"
|
104
|
+
|
105
|
+
print("Round trip test passed successfully!")
|
106
|
+
|
107
|
+
|
108
|
+
if __name__ == "__main__":
|
109
|
+
# Test case 1: Standard formatting
|
110
|
+
test_string1 = """
|
111
|
+
<spec>
|
112
|
+
name: John Doe
|
113
|
+
age is 30
|
114
|
+
lives in New York
|
115
|
+
</spec>
|
116
|
+
"""
|
117
|
+
test_round_trip(PersonModel, test_string1)
|
118
|
+
|
119
|
+
print("\n" + "=" * 50 + "\n")
|
120
|
+
|
121
|
+
# Test case 2: Varying whitespace
|
122
|
+
test_string2 = "<spec>name: Alice \nage is 25 \nlives in Tokyo</spec>"
|
123
|
+
test_round_trip(PersonModel, test_string2)
|
124
|
+
|
125
|
+
print("\n" + "=" * 50 + "\n")
|
126
|
+
|
127
|
+
# Test case 3: Multiline values
|
128
|
+
test_string3 = """
|
129
|
+
<spec>
|
130
|
+
name: Bob
|
131
|
+
Smith
|
132
|
+
age is 40
|
133
|
+
lives in San
|
134
|
+
Francisco
|
135
|
+
</spec>
|
136
|
+
"""
|
137
|
+
test_round_trip(PersonModel, test_string3)
|
138
|
+
|
139
|
+
print("All tests passed successfully!")
|
@@ -0,0 +1,130 @@
|
|
1
|
+
import re
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
from typing import Dict, Any
|
4
|
+
from pydantic import BaseModel
|
5
|
+
|
6
|
+
class FormattingModel(BaseModel, ABC):
|
7
|
+
@classmethod
|
8
|
+
@abstractmethod
|
9
|
+
def format_spec(cls) -> str:
|
10
|
+
pass
|
11
|
+
|
12
|
+
@classmethod
|
13
|
+
@abstractmethod
|
14
|
+
def start_token(cls) -> str:
|
15
|
+
pass
|
16
|
+
|
17
|
+
@classmethod
|
18
|
+
@abstractmethod
|
19
|
+
def end_token(cls) -> str:
|
20
|
+
pass
|
21
|
+
|
22
|
+
@classmethod
|
23
|
+
@abstractmethod
|
24
|
+
def field_mappings(cls) -> Dict[str, str]:
|
25
|
+
pass
|
26
|
+
|
27
|
+
@classmethod
|
28
|
+
def _create_regex_pattern(cls) -> str:
|
29
|
+
spec = cls.format_spec()
|
30
|
+
for field, placeholder in cls.field_mappings().items():
|
31
|
+
spec = spec.replace(placeholder, f"(?P<{field}>.*?)")
|
32
|
+
pattern = f"{re.escape(cls.start_token())}\\s*{spec}\\s*{re.escape(cls.end_token())}"
|
33
|
+
return pattern
|
34
|
+
|
35
|
+
@classmethod
|
36
|
+
def parse(cls, text: str) -> 'FormattingModel':
|
37
|
+
pattern = cls._create_regex_pattern()
|
38
|
+
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
|
39
|
+
if match:
|
40
|
+
return cls(**{k: v.strip() for k, v in match.groupdict().items()})
|
41
|
+
raise ValueError(f"Text does not match the expected format. Pattern: {pattern}, Text: {text}")
|
42
|
+
|
43
|
+
def generate(self) -> str:
|
44
|
+
content = self.format_spec()
|
45
|
+
# Remove \s* patterns
|
46
|
+
content = re.sub(r'\\s\*', ' ', content)
|
47
|
+
for field, placeholder in self.field_mappings().items():
|
48
|
+
content = content.replace(placeholder, str(getattr(self, field)))
|
49
|
+
return f"{self.start_token()}\n{content}\n{self.end_token()}"
|
50
|
+
|
51
|
+
class PersonModel(FormattingModel):
|
52
|
+
name: str
|
53
|
+
age: int
|
54
|
+
city: str
|
55
|
+
|
56
|
+
@classmethod
|
57
|
+
def format_spec(cls) -> str:
|
58
|
+
return "name:\\s*{name}\\s*age is\\s*{age}\\s*lives in\\s*{city}"
|
59
|
+
|
60
|
+
@classmethod
|
61
|
+
def start_token(cls) -> str:
|
62
|
+
return "<spec>"
|
63
|
+
|
64
|
+
@classmethod
|
65
|
+
def end_token(cls) -> str:
|
66
|
+
return "</spec>"
|
67
|
+
|
68
|
+
@classmethod
|
69
|
+
def field_mappings(cls) -> Dict[str, str]:
|
70
|
+
return {
|
71
|
+
"name": "{name}",
|
72
|
+
"age": "{age}",
|
73
|
+
"city": "{city}"
|
74
|
+
}
|
75
|
+
|
76
|
+
def test_round_trip(model_class, input_string):
|
77
|
+
# Parse the input string
|
78
|
+
parsed_model = model_class.parse(input_string)
|
79
|
+
print(f"Parsed model: {parsed_model}")
|
80
|
+
|
81
|
+
# Generate a string from the parsed model
|
82
|
+
generated_string = parsed_model.generate()
|
83
|
+
print(f"Generated string:\n{generated_string}")
|
84
|
+
|
85
|
+
# Parse the generated string
|
86
|
+
reparsed_model = model_class.parse(generated_string)
|
87
|
+
print(f"Reparsed model: {reparsed_model}")
|
88
|
+
|
89
|
+
# Assert that the original parsed model and the reparsed model are equal
|
90
|
+
assert parsed_model == reparsed_model, "Round trip failed: original and reparsed models are not equal"
|
91
|
+
|
92
|
+
# Assert that all fields are present and have the correct types
|
93
|
+
for field, field_type in model_class.__annotations__.items():
|
94
|
+
assert hasattr(parsed_model, field), f"Field {field} is missing"
|
95
|
+
assert isinstance(getattr(parsed_model, field), field_type), f"Field {field} has incorrect type"
|
96
|
+
|
97
|
+
print("Round trip test passed successfully!")
|
98
|
+
|
99
|
+
if __name__ == "__main__":
|
100
|
+
# Test case 1: Standard formatting
|
101
|
+
test_string1 = """
|
102
|
+
<spec>
|
103
|
+
name: John Doe
|
104
|
+
age is 30
|
105
|
+
lives in New York
|
106
|
+
</spec>
|
107
|
+
"""
|
108
|
+
test_round_trip(PersonModel, test_string1)
|
109
|
+
|
110
|
+
print("\n" + "="*50 + "\n")
|
111
|
+
|
112
|
+
# Test case 2: Varying whitespace
|
113
|
+
test_string2 = "<spec>name: Alice \nage is 25 \nlives in Tokyo</spec>"
|
114
|
+
test_round_trip(PersonModel, test_string2)
|
115
|
+
|
116
|
+
print("\n" + "="*50 + "\n")
|
117
|
+
|
118
|
+
# Test case 3: Multiline values
|
119
|
+
test_string3 = """
|
120
|
+
<spec>
|
121
|
+
name: Bob
|
122
|
+
Smith
|
123
|
+
age is 40
|
124
|
+
lives in San
|
125
|
+
Francisco
|
126
|
+
</spec>
|
127
|
+
"""
|
128
|
+
test_round_trip(PersonModel, test_string3)
|
129
|
+
|
130
|
+
print("All tests passed successfully!")
|
@@ -0,0 +1,124 @@
|
|
1
|
+
import re
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
|
4
|
+
from langroid.pydantic_v1 import BaseModel
|
5
|
+
|
6
|
+
|
7
|
+
class FormattingModel(BaseModel, ABC):
|
8
|
+
@classmethod
|
9
|
+
@abstractmethod
|
10
|
+
def format_spec(cls) -> str:
|
11
|
+
pass
|
12
|
+
|
13
|
+
@classmethod
|
14
|
+
@abstractmethod
|
15
|
+
def start_token(cls) -> str:
|
16
|
+
pass
|
17
|
+
|
18
|
+
@classmethod
|
19
|
+
@abstractmethod
|
20
|
+
def end_token(cls) -> str:
|
21
|
+
pass
|
22
|
+
|
23
|
+
@classmethod
|
24
|
+
def _create_regex_pattern(cls) -> str:
|
25
|
+
spec = cls.format_spec()
|
26
|
+
# Replace {field_name} with (?P<field_name>.*?)
|
27
|
+
pattern = re.sub(r"\{(\w+)\}", lambda m: f"(?P<{m.group(1)}>.*?)", spec)
|
28
|
+
# Replace newlines with \s* to allow flexible whitespace
|
29
|
+
pattern = pattern.replace("\n", r"\s*")
|
30
|
+
return f"{re.escape(cls.start_token())}\\s*{pattern}\\s*{re.escape(cls.end_token())}"
|
31
|
+
|
32
|
+
@classmethod
|
33
|
+
def parse(cls, text: str) -> "FormattingModel":
|
34
|
+
pattern = cls._create_regex_pattern()
|
35
|
+
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
|
36
|
+
if match:
|
37
|
+
return cls(**{k: v.strip() for k, v in match.groupdict().items()})
|
38
|
+
raise ValueError(
|
39
|
+
f"Text does not match the expected format. Pattern: {pattern}, Text: {text}"
|
40
|
+
)
|
41
|
+
|
42
|
+
def generate(self) -> str:
|
43
|
+
content = self.format_spec().format(**self.dict())
|
44
|
+
return f"{self.start_token()}\n{content}\n{self.end_token()}"
|
45
|
+
|
46
|
+
|
47
|
+
class PersonModel(FormattingModel):
|
48
|
+
name: str
|
49
|
+
age: int
|
50
|
+
city: str
|
51
|
+
|
52
|
+
@classmethod
|
53
|
+
def format_spec(cls) -> str:
|
54
|
+
return "name: {name}\nage is {age}\nlives in {city}"
|
55
|
+
|
56
|
+
@classmethod
|
57
|
+
def start_token(cls) -> str:
|
58
|
+
return "<spec>"
|
59
|
+
|
60
|
+
@classmethod
|
61
|
+
def end_token(cls) -> str:
|
62
|
+
return "</spec>"
|
63
|
+
|
64
|
+
|
65
|
+
def test_round_trip(model_class, input_string):
|
66
|
+
# Parse the input string
|
67
|
+
parsed_model = model_class.parse(input_string)
|
68
|
+
print(f"Parsed model: {parsed_model}")
|
69
|
+
|
70
|
+
# Generate a string from the parsed model
|
71
|
+
generated_string = parsed_model.generate()
|
72
|
+
print(f"Generated string:\n{generated_string}")
|
73
|
+
|
74
|
+
# Parse the generated string
|
75
|
+
reparsed_model = model_class.parse(generated_string)
|
76
|
+
print(f"Reparsed model: {reparsed_model}")
|
77
|
+
|
78
|
+
# Assert that the original parsed model and the reparsed model are equal
|
79
|
+
assert (
|
80
|
+
parsed_model == reparsed_model
|
81
|
+
), "Round trip failed: original and reparsed models are not equal"
|
82
|
+
|
83
|
+
# Assert that all fields are present and have the correct types
|
84
|
+
for field, field_type in model_class.__annotations__.items():
|
85
|
+
assert hasattr(parsed_model, field), f"Field {field} is missing"
|
86
|
+
assert isinstance(
|
87
|
+
getattr(parsed_model, field), field_type
|
88
|
+
), f"Field {field} has incorrect type"
|
89
|
+
|
90
|
+
print("Round trip test passed successfully!")
|
91
|
+
|
92
|
+
|
93
|
+
if __name__ == "__main__":
|
94
|
+
# Test case 1: Standard formatting
|
95
|
+
test_string1 = """
|
96
|
+
<spec>
|
97
|
+
name: John Doe
|
98
|
+
age is 30
|
99
|
+
lives in New York
|
100
|
+
</spec>
|
101
|
+
"""
|
102
|
+
test_round_trip(PersonModel, test_string1)
|
103
|
+
|
104
|
+
print("\n" + "=" * 50 + "\n")
|
105
|
+
|
106
|
+
# Test case 2: Varying whitespace
|
107
|
+
test_string2 = "<spec>name: Alice \nage is 25 \nlives in Tokyo</spec>"
|
108
|
+
test_round_trip(PersonModel, test_string2)
|
109
|
+
|
110
|
+
print("\n" + "=" * 50 + "\n")
|
111
|
+
|
112
|
+
# Test case 3: Multiline values
|
113
|
+
test_string3 = """
|
114
|
+
<spec>
|
115
|
+
name: Bob
|
116
|
+
Smith
|
117
|
+
age is 40
|
118
|
+
lives in San
|
119
|
+
Francisco
|
120
|
+
</spec>
|
121
|
+
"""
|
122
|
+
test_round_trip(PersonModel, test_string3)
|
123
|
+
|
124
|
+
print("All tests passed successfully!")
|
@@ -0,0 +1,116 @@
|
|
1
|
+
import re
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
from typing import Dict, Any
|
4
|
+
from pydantic import BaseModel
|
5
|
+
|
6
|
+
class FormattingModel(BaseModel, ABC):
|
7
|
+
@classmethod
|
8
|
+
@abstractmethod
|
9
|
+
def format_spec(cls) -> str:
|
10
|
+
pass
|
11
|
+
|
12
|
+
@classmethod
|
13
|
+
@abstractmethod
|
14
|
+
def start_token(cls) -> str:
|
15
|
+
pass
|
16
|
+
|
17
|
+
@classmethod
|
18
|
+
@abstractmethod
|
19
|
+
def end_token(cls) -> str:
|
20
|
+
pass
|
21
|
+
|
22
|
+
@classmethod
|
23
|
+
def _create_regex_pattern(cls) -> str:
|
24
|
+
spec = cls.format_spec()
|
25
|
+
# Replace {field_name} with (?P<field_name>.*?)
|
26
|
+
pattern = re.sub(r'\{(\w+)\}', lambda m: f'(?P<{m.group(1)}>.*?)', spec)
|
27
|
+
# Replace newlines with \s* to allow flexible whitespace
|
28
|
+
pattern = pattern.replace('\n', r'\s*')
|
29
|
+
return f"{re.escape(cls.start_token())}\\s*{pattern}\\s*{re.escape(cls.end_token())}"
|
30
|
+
|
31
|
+
@classmethod
|
32
|
+
def parse(cls, text: str) -> 'FormattingModel':
|
33
|
+
pattern = cls._create_regex_pattern()
|
34
|
+
match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
|
35
|
+
if match:
|
36
|
+
return cls(**{k: v.strip() for k, v in match.groupdict().items()})
|
37
|
+
raise ValueError(f"Text does not match the expected format. Pattern: {pattern}, Text: {text}")
|
38
|
+
|
39
|
+
def generate(self) -> str:
|
40
|
+
content = self.format_spec().format(**self.dict())
|
41
|
+
return f"{self.start_token()}\n{content}\n{self.end_token()}"
|
42
|
+
|
43
|
+
class PersonModel(FormattingModel):
|
44
|
+
name: str
|
45
|
+
age: int
|
46
|
+
city: str
|
47
|
+
|
48
|
+
@classmethod
|
49
|
+
def format_spec(cls) -> str:
|
50
|
+
return "name: {name}\nage is {age}\nlives in {city}"
|
51
|
+
|
52
|
+
@classmethod
|
53
|
+
def start_token(cls) -> str:
|
54
|
+
return "<spec>"
|
55
|
+
|
56
|
+
@classmethod
|
57
|
+
def end_token(cls) -> str:
|
58
|
+
return "</spec>"
|
59
|
+
|
60
|
+
def test_round_trip(model_class, input_string):
|
61
|
+
# Parse the input string
|
62
|
+
parsed_model = model_class.parse(input_string)
|
63
|
+
print(f"Parsed model: {parsed_model}")
|
64
|
+
|
65
|
+
# Generate a string from the parsed model
|
66
|
+
generated_string = parsed_model.generate()
|
67
|
+
print(f"Generated string:\n{generated_string}")
|
68
|
+
|
69
|
+
# Parse the generated string
|
70
|
+
reparsed_model = model_class.parse(generated_string)
|
71
|
+
print(f"Reparsed model: {reparsed_model}")
|
72
|
+
|
73
|
+
# Assert that the original parsed model and the reparsed model are equal
|
74
|
+
assert parsed_model == reparsed_model, "Round trip failed: original and reparsed models are not equal"
|
75
|
+
|
76
|
+
# Assert that all fields are present and have the correct types
|
77
|
+
for field, field_type in model_class.__annotations__.items():
|
78
|
+
assert hasattr(parsed_model, field), f"Field {field} is missing"
|
79
|
+
assert isinstance(getattr(parsed_model, field), field_type), f"Field {field} has incorrect type"
|
80
|
+
|
81
|
+
print("Round trip test passed successfully!")
|
82
|
+
|
83
|
+
if __name__ == "__main__":
|
84
|
+
# Test case 1: Standard formatting
|
85
|
+
test_string1 = """
|
86
|
+
<spec>
|
87
|
+
name: John Doe
|
88
|
+
age is 30
|
89
|
+
lives in New York
|
90
|
+
</spec>
|
91
|
+
"""
|
92
|
+
test_round_trip(PersonModel, test_string1)
|
93
|
+
|
94
|
+
print("\n" + "="*50 + "\n")
|
95
|
+
|
96
|
+
# Test case 2: Varying whitespace
|
97
|
+
test_string2 = "<spec>name: Alice \nage is 25 \nlives in Tokyo</spec>"
|
98
|
+
test_round_trip(PersonModel, test_string2)
|
99
|
+
|
100
|
+
print("\n" + "="*50 + "\n")
|
101
|
+
|
102
|
+
# Test case 3: Multiline values
|
103
|
+
test_string3 = """
|
104
|
+
<spec>
|
105
|
+
name: Bob
|
106
|
+
Smith
|
107
|
+
age is 40
|
108
|
+
lives in San
|
109
|
+
Francisco
|
110
|
+
</spec>
|
111
|
+
"""
|
112
|
+
test_round_trip(PersonModel, test_string3)
|
113
|
+
|
114
|
+
print("All tests passed successfully!")
|
115
|
+
|
116
|
+
|
@@ -0,0 +1,80 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import ClassVar, Dict
|
3
|
+
|
4
|
+
from tatsu import compile as compile_grammar
|
5
|
+
from tatsu.model import ModelBuilder
|
6
|
+
|
7
|
+
from langroid.pydantic_v1 import BaseModel
|
8
|
+
|
9
|
+
|
10
|
+
class GrammarBasedModel(BaseModel, ABC):
|
11
|
+
grammar: ClassVar[str]
|
12
|
+
start_token: ClassVar[str]
|
13
|
+
end_token: ClassVar[str]
|
14
|
+
field_mappings: ClassVar[Dict[str, str]]
|
15
|
+
|
16
|
+
@classmethod
|
17
|
+
@abstractmethod
|
18
|
+
def get_grammar(cls) -> str:
|
19
|
+
pass
|
20
|
+
|
21
|
+
@classmethod
|
22
|
+
def parse(cls, text: str) -> "GrammarBasedModel":
|
23
|
+
parser = compile_grammar(cls.get_grammar())
|
24
|
+
ast = parser.parse(text, start="start")
|
25
|
+
model_dict = {
|
26
|
+
field: getattr(ast, rule) for field, rule in cls.field_mappings.items()
|
27
|
+
}
|
28
|
+
return cls(**model_dict)
|
29
|
+
|
30
|
+
def generate(self) -> str:
|
31
|
+
grammar = compile_grammar(self.get_grammar())
|
32
|
+
model_builder = ModelBuilder()
|
33
|
+
for field, rule in self.field_mappings.items():
|
34
|
+
setattr(model_builder, rule, getattr(self, field))
|
35
|
+
ast = model_builder.start()
|
36
|
+
return f"{self.start_token}\n{grammar.parse(str(ast), start='start')}\n{self.end_token}"
|
37
|
+
|
38
|
+
|
39
|
+
class PersonSpec(GrammarBasedModel):
|
40
|
+
name: str
|
41
|
+
age: int
|
42
|
+
city: str
|
43
|
+
|
44
|
+
grammar = """
|
45
|
+
start = name_line age_line city_line;
|
46
|
+
name_line = 'name:' /\s*/ name:/.+/ EOL;
|
47
|
+
age_line = 'age is' /\s*/ age:/\d+/ EOL;
|
48
|
+
city_line = 'lives in' /\s*/ city:/.+/ EOL;
|
49
|
+
EOL = /\r?\n/;
|
50
|
+
"""
|
51
|
+
start_token = "<spec>"
|
52
|
+
end_token = "</spec>"
|
53
|
+
field_mappings = {"name": "name", "age": "age", "city": "city"}
|
54
|
+
|
55
|
+
@classmethod
|
56
|
+
def get_grammar(cls):
|
57
|
+
return cls.grammar
|
58
|
+
|
59
|
+
|
60
|
+
if __name__ == "__main__":
|
61
|
+
# Test parsing
|
62
|
+
input_str = """<spec>
|
63
|
+
name: John Doe
|
64
|
+
age is 30
|
65
|
+
lives in New York
|
66
|
+
</spec>"""
|
67
|
+
person = PersonSpec.parse(input_str)
|
68
|
+
print("Parsed person:", person)
|
69
|
+
|
70
|
+
# Test generation
|
71
|
+
generated_str = person.generate()
|
72
|
+
print("\nGenerated string:")
|
73
|
+
print(generated_str)
|
74
|
+
|
75
|
+
# Test round-trip
|
76
|
+
round_trip_person = PersonSpec.parse(generated_str)
|
77
|
+
print("\nRound-trip parsed person:", round_trip_person)
|
78
|
+
|
79
|
+
assert person == round_trip_person, "Round-trip parsing failed"
|
80
|
+
print("\nRound-trip test passed!")
|