langroid 0.16.5__py3-none-any.whl → 0.16.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/md_tool_message_grammar.py +455 -0
- langroid/agent/tools/code_file_tool_parse.py +150 -0
- langroid/agent/tools/code_file_tool_pyparsing.py +194 -0
- langroid/agent/tools/code_file_tool_pyparsing2.py +199 -0
- langroid/agent/tools/formatted_model_custom.py +150 -0
- langroid/agent/tools/formatted_model_custom2.py +168 -0
- langroid/agent/tools/formatted_model_custom3.py +279 -0
- langroid/agent/tools/formatted_model_custom4.py +395 -0
- langroid/agent/tools/formatted_model_jinja.py +133 -0
- langroid/agent/tools/formatted_model_jinja.py-e +122 -0
- langroid/agent/tools/formatted_model_jinja2.py +145 -0
- langroid/agent/tools/formatted_model_jinja2.py-e +135 -0
- langroid/agent/tools/formatted_model_lark.py +0 -0
- langroid/agent/tools/formatted_model_lark2.py +168 -0
- langroid/agent/tools/formatted_model_parse.py +105 -0
- langroid/agent/tools/formatted_model_parse.py-e +98 -0
- langroid/agent/tools/formatted_model_parse2.py +113 -0
- langroid/agent/tools/formatted_model_parse2.py-e +109 -0
- langroid/agent/tools/formatted_model_parse3.py +114 -0
- langroid/agent/tools/formatted_model_parse3.py-e +110 -0
- langroid/agent/tools/formatted_model_parsimon.py +194 -0
- langroid/agent/tools/formatted_model_parsimon.py-e +186 -0
- langroid/agent/tools/formatted_model_pyparsing.py +169 -0
- langroid/agent/tools/formatted_model_pyparsing.py-e +149 -0
- langroid/agent/tools/formatted_model_pyparsing2.py +159 -0
- langroid/agent/tools/formatted_model_pyparsing2.py-e +143 -0
- langroid/agent/tools/formatted_model_pyparsing3.py +133 -0
- langroid/agent/tools/formatted_model_pyparsing3.py-e +121 -0
- langroid/agent/tools/formatted_model_pyparsing4.py +213 -0
- langroid/agent/tools/formatted_model_pyparsing4.py-e +176 -0
- langroid/agent/tools/formatted_model_pyparsing5.py +173 -0
- langroid/agent/tools/formatted_model_pyparsing5.py-e +142 -0
- langroid/agent/tools/formatted_model_regex.py +246 -0
- langroid/agent/tools/formatted_model_regex.py-e +248 -0
- langroid/agent/tools/formatted_model_regex2.py +250 -0
- langroid/agent/tools/formatted_model_regex2.py-e +253 -0
- langroid/agent/tools/formatted_model_tatsu.py +172 -0
- langroid/agent/tools/formatted_model_tatsu.py-e +160 -0
- langroid/agent/tools/formatted_model_template.py +217 -0
- langroid/agent/tools/formatted_model_template.py-e +200 -0
- langroid/agent/tools/formatted_model_xml.py +178 -0
- langroid/agent/tools/formatted_model_xml2.py +178 -0
- langroid/agent/tools/formatted_model_xml3.py +132 -0
- langroid/agent/tools/formatted_model_xml4.py +130 -0
- langroid/agent/tools/formatted_model_xml5.py +130 -0
- langroid/agent/tools/formatted_model_xml6.py +113 -0
- langroid/agent/tools/formatted_model_xml7.py +117 -0
- langroid/agent/tools/formatted_model_xml8.py +164 -0
- langroid/agent/tools/generic_tool.py +165 -0
- langroid/agent/tools/generic_tool_tatsu.py +275 -0
- langroid/agent/tools/grammar_based_model.py +132 -0
- langroid/agent/tools/grammar_based_model.py-e +128 -0
- langroid/agent/tools/grammar_based_model_lark.py +156 -0
- langroid/agent/tools/grammar_based_model_lark.py-e +153 -0
- langroid/agent/tools/grammar_based_model_parse.py +86 -0
- langroid/agent/tools/grammar_based_model_parse.py-e +80 -0
- langroid/agent/tools/grammar_based_model_parsimonious.py +129 -0
- langroid/agent/tools/grammar_based_model_parsimonious.py-e +120 -0
- langroid/agent/tools/grammar_based_model_pyparsing.py +105 -0
- langroid/agent/tools/grammar_based_model_pyparsing.py-e +103 -0
- langroid/agent/tools/grammar_based_model_regex.py +139 -0
- langroid/agent/tools/grammar_based_model_regex.py-e +130 -0
- langroid/agent/tools/grammar_based_model_regex2.py +124 -0
- langroid/agent/tools/grammar_based_model_regex2.py-e +116 -0
- langroid/agent/tools/grammar_based_model_tatsu.py +80 -0
- langroid/agent/tools/grammar_based_model_tatsu.py-e +77 -0
- langroid/agent/tools/lark_earley_example.py +135 -0
- langroid/agent/tools/lark_earley_example.py-e +117 -0
- langroid/agent/tools/lark_example.py +72 -0
- langroid/agent/tools/parse_example.py +76 -0
- langroid/agent/tools/parse_example2.py +87 -0
- langroid/agent/tools/parse_example3.py +42 -0
- langroid/agent/tools/parse_test.py +791 -0
- langroid/agent/xml_tool_message.py +106 -0
- langroid/language_models/openai_gpt.py +6 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/METADATA +1 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/RECORD +80 -6
- pyproject.toml +1 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/LICENSE +0 -0
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/WHEEL +0 -0
@@ -0,0 +1,153 @@
|
|
1
|
+
from pydantic import BaseModel
|
2
|
+
from lark import Lark, Transformer, Visitor
|
3
|
+
from abc import ABC, abstractmethod
|
4
|
+
from typing import Dict, Any
|
5
|
+
|
6
|
+
class GrammarBasedModel(BaseModel, ABC):
|
7
|
+
@classmethod
|
8
|
+
@abstractmethod
|
9
|
+
def grammar(cls) -> str:
|
10
|
+
pass
|
11
|
+
|
12
|
+
@classmethod
|
13
|
+
@abstractmethod
|
14
|
+
def start_rule(cls) -> str:
|
15
|
+
pass
|
16
|
+
|
17
|
+
@classmethod
|
18
|
+
@abstractmethod
|
19
|
+
def field_mappings(cls) -> Dict[str, str]:
|
20
|
+
pass
|
21
|
+
|
22
|
+
@classmethod
|
23
|
+
def parse(cls, text: str) -> 'GrammarBasedModel':
|
24
|
+
parser = Lark(cls.grammar(), start=cls.start_rule())
|
25
|
+
tree = parser.parse(text)
|
26
|
+
|
27
|
+
class TreeToDict(Transformer):
|
28
|
+
def __init__(self, field_mappings):
|
29
|
+
self.field_mappings = field_mappings
|
30
|
+
|
31
|
+
def __default__(self, data, children, meta):
|
32
|
+
for field, rule in self.field_mappings.items():
|
33
|
+
if data == rule:
|
34
|
+
return {field: children[0]}
|
35
|
+
return children
|
36
|
+
|
37
|
+
def start(self, items):
|
38
|
+
result = {}
|
39
|
+
for item in items:
|
40
|
+
if isinstance(item, dict):
|
41
|
+
result.update(item)
|
42
|
+
return result
|
43
|
+
|
44
|
+
transformer = TreeToDict(cls.field_mappings())
|
45
|
+
data = transformer.transform(tree)
|
46
|
+
return cls(**data)
|
47
|
+
|
48
|
+
def generate(self) -> str:
|
49
|
+
parser = Lark(self.grammar(), start=self.start_rule())
|
50
|
+
|
51
|
+
class ModelToString(Visitor):
|
52
|
+
def __init__(self, model):
|
53
|
+
self.model = model
|
54
|
+
self.result = []
|
55
|
+
|
56
|
+
def __default__(self, tree):
|
57
|
+
if tree.data in self.model.field_mappings().values():
|
58
|
+
field = next(k for k, v in self.model.field_mappings().items() if v == tree.data)
|
59
|
+
value = getattr(self.model, field)
|
60
|
+
self.result.append(f"{' '.join(tree.children)} {value}")
|
61
|
+
else:
|
62
|
+
for child in tree.children:
|
63
|
+
if isinstance(child, str):
|
64
|
+
self.result.append(child)
|
65
|
+
else:
|
66
|
+
self.visit(child)
|
67
|
+
|
68
|
+
visitor = ModelToString(self)
|
69
|
+
tree = parser.parse(" ".join(self.grammar().split()))
|
70
|
+
visitor.visit(tree)
|
71
|
+
return " ".join(visitor.result)
|
72
|
+
|
73
|
+
class PersonSpec(GrammarBasedModel):
|
74
|
+
name: str
|
75
|
+
age: int
|
76
|
+
city: str
|
77
|
+
|
78
|
+
@classmethod
|
79
|
+
def grammar(cls):
|
80
|
+
return """
|
81
|
+
start: "<spec>" name age city "</spec>"
|
82
|
+
name: "name:" WORD
|
83
|
+
age: "age" "is" NUMBER
|
84
|
+
city: "lives" "in" WORD
|
85
|
+
%import common.WORD
|
86
|
+
%import common.NUMBER
|
87
|
+
%import common.WS
|
88
|
+
%ignore WS
|
89
|
+
"""
|
90
|
+
|
91
|
+
@classmethod
|
92
|
+
def start_rule(cls):
|
93
|
+
return "start"
|
94
|
+
|
95
|
+
@classmethod
|
96
|
+
def field_mappings(cls):
|
97
|
+
return {
|
98
|
+
"name": "name",
|
99
|
+
"age": "age",
|
100
|
+
"city": "city"
|
101
|
+
}
|
102
|
+
|
103
|
+
if __name__ == "__main__":
|
104
|
+
# Test parsing
|
105
|
+
test_string = """
|
106
|
+
<spec>
|
107
|
+
name: John
|
108
|
+
age is 30
|
109
|
+
lives in Tokyo
|
110
|
+
</spec>
|
111
|
+
"""
|
112
|
+
parsed_person = PersonSpec.parse(test_string)
|
113
|
+
print("Parsed person:", parsed_person)
|
114
|
+
|
115
|
+
# Test generating
|
116
|
+
new_person = PersonSpec(name="Alice", age=25, city="NewYork")
|
117
|
+
generated_string = new_person.generate()
|
118
|
+
print("\nGenerated string:")
|
119
|
+
print(generated_string)
|
120
|
+
|
121
|
+
# Test round-trip
|
122
|
+
round_trip_person = PersonSpec.parse(generated_string)
|
123
|
+
print("\nRound-trip parsed person:", round_trip_person)
|
124
|
+
|
125
|
+
assert new_person == round_trip_person, "Round-trip parsing failed"
|
126
|
+
print("\nRound-trip test passed!")
|
127
|
+
|
128
|
+
# Test with modified grammar
|
129
|
+
class ModifiedPersonSpec(PersonSpec):
|
130
|
+
@classmethod
|
131
|
+
def grammar(cls):
|
132
|
+
return """
|
133
|
+
start: "<person>" name age city "</person>"
|
134
|
+
name: "Name:" WORD
|
135
|
+
age: "Age:" NUMBER "years"
|
136
|
+
city: "City:" WORD
|
137
|
+
%import common.WORD
|
138
|
+
%import common.NUMBER
|
139
|
+
%import common.WS
|
140
|
+
%ignore WS
|
141
|
+
"""
|
142
|
+
|
143
|
+
modified_person = ModifiedPersonSpec(name="Bob", age=40, city="London")
|
144
|
+
modified_string = modified_person.generate()
|
145
|
+
print("\nModified grammar generated string:")
|
146
|
+
print(modified_string)
|
147
|
+
|
148
|
+
parsed_modified = ModifiedPersonSpec.parse(modified_string)
|
149
|
+
print("Parsed modified person:", parsed_modified)
|
150
|
+
assert modified_person == parsed_modified, "Modified grammar round-trip failed"
|
151
|
+
print("Modified grammar round-trip test passed!")
|
152
|
+
|
153
|
+
|
@@ -0,0 +1,86 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
|
3
|
+
from parse import compile
|
4
|
+
|
5
|
+
from langroid.pydantic_v1 import BaseModel
|
6
|
+
|
7
|
+
|
8
|
+
class GrammarBasedModel(BaseModel, ABC):
|
9
|
+
@classmethod
|
10
|
+
@abstractmethod
|
11
|
+
def grammar(cls) -> str:
|
12
|
+
pass
|
13
|
+
|
14
|
+
@classmethod
|
15
|
+
def parse(cls, string: str):
|
16
|
+
parser = compile(cls.grammar())
|
17
|
+
result = parser.parse(string)
|
18
|
+
if result is None:
|
19
|
+
raise ValueError("Invalid string format")
|
20
|
+
return cls(**result.named)
|
21
|
+
|
22
|
+
def generate(self) -> str:
|
23
|
+
return self.grammar().format(**self.dict())
|
24
|
+
|
25
|
+
class Config:
|
26
|
+
arbitrary_types_allowed = True
|
27
|
+
|
28
|
+
|
29
|
+
class Person(GrammarBasedModel):
|
30
|
+
name: str
|
31
|
+
age: int
|
32
|
+
city: str
|
33
|
+
|
34
|
+
@classmethod
|
35
|
+
def grammar(cls) -> str:
|
36
|
+
return """
|
37
|
+
{:s}<spec>{:s}
|
38
|
+
{:s}name={name:S}{:s}
|
39
|
+
{:s}age={age:d}{:s}
|
40
|
+
{:s}city={city:S}{:s}
|
41
|
+
{:s}</spec>{:s}
|
42
|
+
"""
|
43
|
+
|
44
|
+
|
45
|
+
class SimpleFormat(GrammarBasedModel):
|
46
|
+
key: str
|
47
|
+
value: str
|
48
|
+
|
49
|
+
@classmethod
|
50
|
+
def grammar(cls) -> str:
|
51
|
+
return "{:s}{key:S}{:s}:{:s}{value:S}{:s}"
|
52
|
+
|
53
|
+
|
54
|
+
if __name__ == "__main__":
|
55
|
+
# Test Person class
|
56
|
+
input_string = """
|
57
|
+
<spec>
|
58
|
+
name=John Doe
|
59
|
+
age=30
|
60
|
+
city=New York
|
61
|
+
</spec>
|
62
|
+
"""
|
63
|
+
|
64
|
+
person = Person.parse(input_string)
|
65
|
+
print("Parsed person:", person)
|
66
|
+
|
67
|
+
generated_string = person.generate()
|
68
|
+
print("Generated string:")
|
69
|
+
print(generated_string)
|
70
|
+
|
71
|
+
# Test SimpleFormat class
|
72
|
+
simple_input = " Hello : World "
|
73
|
+
simple = SimpleFormat.parse(simple_input)
|
74
|
+
print("Parsed simple format:", simple)
|
75
|
+
|
76
|
+
simple_generated = simple.generate()
|
77
|
+
print("Generated simple format:", simple_generated)
|
78
|
+
|
79
|
+
# Test with different whitespace
|
80
|
+
input_string2 = "<spec>\nname=Jane Smith\n\n age=25\n\t\tcity=London\n</spec>"
|
81
|
+
person2 = Person.parse(input_string2)
|
82
|
+
print("Parsed person with different whitespace:", person2)
|
83
|
+
|
84
|
+
generated_string2 = person2.generate()
|
85
|
+
print("Generated string for person2:")
|
86
|
+
print(generated_string2)
|
@@ -0,0 +1,80 @@
|
|
1
|
+
from pydantic import BaseModel
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
from parse import compile
|
4
|
+
|
5
|
+
class GrammarBasedModel(BaseModel, ABC):
|
6
|
+
@classmethod
|
7
|
+
@abstractmethod
|
8
|
+
def grammar(cls) -> str:
|
9
|
+
pass
|
10
|
+
|
11
|
+
@classmethod
|
12
|
+
def parse(cls, string: str):
|
13
|
+
parser = compile(cls.grammar())
|
14
|
+
result = parser.parse(string)
|
15
|
+
if result is None:
|
16
|
+
raise ValueError("Invalid string format")
|
17
|
+
return cls(**result.named)
|
18
|
+
|
19
|
+
def generate(self) -> str:
|
20
|
+
return self.grammar().format(**self.dict())
|
21
|
+
|
22
|
+
class Config:
|
23
|
+
arbitrary_types_allowed = True
|
24
|
+
|
25
|
+
class Person(GrammarBasedModel):
|
26
|
+
name: str
|
27
|
+
age: int
|
28
|
+
city: str
|
29
|
+
|
30
|
+
@classmethod
|
31
|
+
def grammar(cls) -> str:
|
32
|
+
return """
|
33
|
+
{:s}<spec>{:s}
|
34
|
+
{:s}name={name:S}{:s}
|
35
|
+
{:s}age={age:d}{:s}
|
36
|
+
{:s}city={city:S}{:s}
|
37
|
+
{:s}</spec>{:s}
|
38
|
+
"""
|
39
|
+
|
40
|
+
class SimpleFormat(GrammarBasedModel):
|
41
|
+
key: str
|
42
|
+
value: str
|
43
|
+
|
44
|
+
@classmethod
|
45
|
+
def grammar(cls) -> str:
|
46
|
+
return "{:s}{key:S}{:s}:{:s}{value:S}{:s}"
|
47
|
+
|
48
|
+
if __name__ == "__main__":
|
49
|
+
# Test Person class
|
50
|
+
input_string = """
|
51
|
+
<spec>
|
52
|
+
name=John Doe
|
53
|
+
age=30
|
54
|
+
city=New York
|
55
|
+
</spec>
|
56
|
+
"""
|
57
|
+
|
58
|
+
person = Person.parse(input_string)
|
59
|
+
print("Parsed person:", person)
|
60
|
+
|
61
|
+
generated_string = person.generate()
|
62
|
+
print("Generated string:")
|
63
|
+
print(generated_string)
|
64
|
+
|
65
|
+
# Test SimpleFormat class
|
66
|
+
simple_input = " Hello : World "
|
67
|
+
simple = SimpleFormat.parse(simple_input)
|
68
|
+
print("Parsed simple format:", simple)
|
69
|
+
|
70
|
+
simple_generated = simple.generate()
|
71
|
+
print("Generated simple format:", simple_generated)
|
72
|
+
|
73
|
+
# Test with different whitespace
|
74
|
+
input_string2 = "<spec>\nname=Jane Smith\n\n age=25\n\t\tcity=London\n</spec>"
|
75
|
+
person2 = Person.parse(input_string2)
|
76
|
+
print("Parsed person with different whitespace:", person2)
|
77
|
+
|
78
|
+
generated_string2 = person2.generate()
|
79
|
+
print("Generated string for person2:")
|
80
|
+
print(generated_string2)
|
@@ -0,0 +1,129 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import ClassVar, Dict
|
3
|
+
|
4
|
+
from parsimonious import Grammar, NodeVisitor
|
5
|
+
|
6
|
+
from langroid.pydantic_v1 import BaseModel
|
7
|
+
|
8
|
+
|
9
|
+
class GrammarBasedModel(BaseModel, ABC):
|
10
|
+
grammar: ClassVar[str]
|
11
|
+
start_token: ClassVar[str]
|
12
|
+
end_token: ClassVar[str]
|
13
|
+
field_mappings: ClassVar[Dict[str, str]]
|
14
|
+
|
15
|
+
@classmethod
|
16
|
+
@abstractmethod
|
17
|
+
def get_grammar(cls) -> str:
|
18
|
+
pass
|
19
|
+
|
20
|
+
@classmethod
|
21
|
+
def parse(cls, text: str) -> "GrammarBasedModel":
|
22
|
+
grammar = Grammar(cls.get_grammar())
|
23
|
+
tree = grammar.parse(text[len(cls.start_token) : -len(cls.end_token)].strip())
|
24
|
+
|
25
|
+
class ModelVisitor(NodeVisitor):
|
26
|
+
def __init__(self, field_mappings):
|
27
|
+
self.field_mappings = field_mappings
|
28
|
+
self.data = {}
|
29
|
+
|
30
|
+
def generic_visit(self, node, visited_children):
|
31
|
+
return visited_children or node.text
|
32
|
+
|
33
|
+
def visit_start(self, node, visited_children):
|
34
|
+
return self.data
|
35
|
+
|
36
|
+
def __getattr__(self, name):
|
37
|
+
if name.startswith("visit_"):
|
38
|
+
field = name[6:]
|
39
|
+
if field in self.field_mappings.values():
|
40
|
+
|
41
|
+
def visit_method(node, visited_children):
|
42
|
+
model_field = next(
|
43
|
+
k for k, v in self.field_mappings.items() if v == field
|
44
|
+
)
|
45
|
+
self.data[model_field] = node.text.strip()
|
46
|
+
return node.text
|
47
|
+
|
48
|
+
return visit_method
|
49
|
+
return super().__getattribute__(name)
|
50
|
+
|
51
|
+
visitor = ModelVisitor(cls.field_mappings)
|
52
|
+
model_dict = visitor.visit(tree)
|
53
|
+
return cls(**model_dict)
|
54
|
+
|
55
|
+
def generate(self) -> str:
|
56
|
+
grammar = Grammar(self.get_grammar())
|
57
|
+
|
58
|
+
class ModelGenerator(NodeVisitor):
|
59
|
+
def __init__(self, model):
|
60
|
+
self.model = model
|
61
|
+
|
62
|
+
def generic_visit(self, node, visited_children):
|
63
|
+
return "".join(filter(None, visited_children))
|
64
|
+
|
65
|
+
def __getattr__(self, name):
|
66
|
+
if name.startswith("visit_"):
|
67
|
+
field = name[6:]
|
68
|
+
if field in self.model.field_mappings.values():
|
69
|
+
model_field = next(
|
70
|
+
k
|
71
|
+
for k, v in self.model.field_mappings.items()
|
72
|
+
if v == field
|
73
|
+
)
|
74
|
+
return lambda node, children: str(
|
75
|
+
getattr(self.model, model_field)
|
76
|
+
)
|
77
|
+
return lambda node, children: node.text
|
78
|
+
|
79
|
+
generator = ModelGenerator(self)
|
80
|
+
generated_content = generator.visit(grammar["start"])
|
81
|
+
return f"{self.start_token}\n{generated_content}\n{self.end_token}"
|
82
|
+
|
83
|
+
|
84
|
+
class PersonSpec(GrammarBasedModel):
|
85
|
+
name: str
|
86
|
+
age: int
|
87
|
+
city: str
|
88
|
+
|
89
|
+
grammar = r"""
|
90
|
+
start = name_line age_line city_line
|
91
|
+
name_line = "name:" ws name newline
|
92
|
+
age_line = "age is" ws age newline
|
93
|
+
city_line = "lives in" ws city newline?
|
94
|
+
name = ~r"[^\n]+"
|
95
|
+
age = ~r"\d+"
|
96
|
+
city = ~r"[^\n]+"
|
97
|
+
ws = ~r"\s+"
|
98
|
+
newline = ~r"\n"
|
99
|
+
"""
|
100
|
+
start_token = "<spec>"
|
101
|
+
end_token = "</spec>"
|
102
|
+
field_mappings = {"name": "name", "age": "age", "city": "city"}
|
103
|
+
|
104
|
+
@classmethod
|
105
|
+
def get_grammar(cls):
|
106
|
+
return cls.grammar
|
107
|
+
|
108
|
+
|
109
|
+
if __name__ == "__main__":
|
110
|
+
# Test parsing
|
111
|
+
input_str = """<spec>
|
112
|
+
name: John Doe
|
113
|
+
age is 30
|
114
|
+
lives in New York
|
115
|
+
</spec>"""
|
116
|
+
person = PersonSpec.parse(input_str)
|
117
|
+
print("Parsed person:", person)
|
118
|
+
|
119
|
+
# Test generation
|
120
|
+
generated_str = person.generate()
|
121
|
+
print("\nGenerated string:")
|
122
|
+
print(generated_str)
|
123
|
+
|
124
|
+
# Test round-trip
|
125
|
+
round_trip_person = PersonSpec.parse(generated_str)
|
126
|
+
print("\nRound-trip parsed person:", round_trip_person)
|
127
|
+
|
128
|
+
assert person == round_trip_person, "Round-trip parsing failed"
|
129
|
+
print("\nRound-trip test passed!")
|
@@ -0,0 +1,120 @@
|
|
1
|
+
from typing import Dict, ClassVar
|
2
|
+
from pydantic import BaseModel
|
3
|
+
from parsimonious import Grammar, NodeVisitor
|
4
|
+
from abc import ABC, abstractmethod
|
5
|
+
|
6
|
+
class GrammarBasedModel(BaseModel, ABC):
|
7
|
+
grammar: ClassVar[str]
|
8
|
+
start_token: ClassVar[str]
|
9
|
+
end_token: ClassVar[str]
|
10
|
+
field_mappings: ClassVar[Dict[str, str]]
|
11
|
+
|
12
|
+
@classmethod
|
13
|
+
@abstractmethod
|
14
|
+
def get_grammar(cls) -> str:
|
15
|
+
pass
|
16
|
+
|
17
|
+
@classmethod
|
18
|
+
def parse(cls, text: str) -> 'GrammarBasedModel':
|
19
|
+
grammar = Grammar(cls.get_grammar())
|
20
|
+
tree = grammar.parse(text[len(cls.start_token):-len(cls.end_token)].strip())
|
21
|
+
|
22
|
+
class ModelVisitor(NodeVisitor):
|
23
|
+
def __init__(self, field_mappings):
|
24
|
+
self.field_mappings = field_mappings
|
25
|
+
self.data = {}
|
26
|
+
|
27
|
+
def generic_visit(self, node, visited_children):
|
28
|
+
return visited_children or node.text
|
29
|
+
|
30
|
+
def visit_start(self, node, visited_children):
|
31
|
+
return self.data
|
32
|
+
|
33
|
+
def __getattr__(self, name):
|
34
|
+
if name.startswith('visit_'):
|
35
|
+
field = name[6:]
|
36
|
+
if field in self.field_mappings.values():
|
37
|
+
def visit_method(node, visited_children):
|
38
|
+
model_field = next(k for k, v in self.field_mappings.items() if v == field)
|
39
|
+
self.data[model_field] = node.text.strip()
|
40
|
+
return node.text
|
41
|
+
return visit_method
|
42
|
+
return super().__getattribute__(name)
|
43
|
+
|
44
|
+
visitor = ModelVisitor(cls.field_mappings)
|
45
|
+
model_dict = visitor.visit(tree)
|
46
|
+
return cls(**model_dict)
|
47
|
+
|
48
|
+
def generate(self) -> str:
|
49
|
+
grammar = Grammar(self.get_grammar())
|
50
|
+
|
51
|
+
class ModelGenerator(NodeVisitor):
|
52
|
+
def __init__(self, model):
|
53
|
+
self.model = model
|
54
|
+
|
55
|
+
def generic_visit(self, node, visited_children):
|
56
|
+
return ''.join(filter(None, visited_children))
|
57
|
+
|
58
|
+
def __getattr__(self, name):
|
59
|
+
if name.startswith('visit_'):
|
60
|
+
field = name[6:]
|
61
|
+
if field in self.model.field_mappings.values():
|
62
|
+
model_field = next(k for k, v in self.model.field_mappings.items() if v == field)
|
63
|
+
return lambda node, children: str(getattr(self.model, model_field))
|
64
|
+
return lambda node, children: node.text
|
65
|
+
|
66
|
+
generator = ModelGenerator(self)
|
67
|
+
generated_content = generator.visit(grammar['start'])
|
68
|
+
return f"{self.start_token}\n{generated_content}\n{self.end_token}"
|
69
|
+
|
70
|
+
class PersonSpec(GrammarBasedModel):
|
71
|
+
name: str
|
72
|
+
age: int
|
73
|
+
city: str
|
74
|
+
|
75
|
+
grammar = r"""
|
76
|
+
start = name_line age_line city_line
|
77
|
+
name_line = "name:" ws name newline
|
78
|
+
age_line = "age is" ws age newline
|
79
|
+
city_line = "lives in" ws city newline?
|
80
|
+
name = ~r"[^\n]+"
|
81
|
+
age = ~r"\d+"
|
82
|
+
city = ~r"[^\n]+"
|
83
|
+
ws = ~r"\s+"
|
84
|
+
newline = ~r"\n"
|
85
|
+
"""
|
86
|
+
start_token = "<spec>"
|
87
|
+
end_token = "</spec>"
|
88
|
+
field_mappings = {
|
89
|
+
"name": "name",
|
90
|
+
"age": "age",
|
91
|
+
"city": "city"
|
92
|
+
}
|
93
|
+
|
94
|
+
@classmethod
|
95
|
+
def get_grammar(cls):
|
96
|
+
return cls.grammar
|
97
|
+
|
98
|
+
if __name__ == "__main__":
|
99
|
+
# Test parsing
|
100
|
+
input_str = """<spec>
|
101
|
+
name: John Doe
|
102
|
+
age is 30
|
103
|
+
lives in New York
|
104
|
+
</spec>"""
|
105
|
+
person = PersonSpec.parse(input_str)
|
106
|
+
print("Parsed person:", person)
|
107
|
+
|
108
|
+
# Test generation
|
109
|
+
generated_str = person.generate()
|
110
|
+
print("\nGenerated string:")
|
111
|
+
print(generated_str)
|
112
|
+
|
113
|
+
# Test round-trip
|
114
|
+
round_trip_person = PersonSpec.parse(generated_str)
|
115
|
+
print("\nRound-trip parsed person:", round_trip_person)
|
116
|
+
|
117
|
+
assert person == round_trip_person, "Round-trip parsing failed"
|
118
|
+
print("\nRound-trip test passed!")
|
119
|
+
|
120
|
+
|
@@ -0,0 +1,105 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import Dict
|
3
|
+
|
4
|
+
from pyparsing import *
|
5
|
+
|
6
|
+
from langroid.pydantic_v1 import BaseModel
|
7
|
+
|
8
|
+
|
9
|
+
class GrammarBasedModel(BaseModel, ABC):
|
10
|
+
@classmethod
|
11
|
+
@abstractmethod
|
12
|
+
def grammar(cls) -> ParserElement:
|
13
|
+
pass
|
14
|
+
|
15
|
+
@classmethod
|
16
|
+
@abstractmethod
|
17
|
+
def start_token(cls) -> str:
|
18
|
+
pass
|
19
|
+
|
20
|
+
@classmethod
|
21
|
+
@abstractmethod
|
22
|
+
def end_token(cls) -> str:
|
23
|
+
pass
|
24
|
+
|
25
|
+
@classmethod
|
26
|
+
@abstractmethod
|
27
|
+
def field_mappings(cls) -> Dict[str, str]:
|
28
|
+
pass
|
29
|
+
|
30
|
+
@classmethod
|
31
|
+
def parse(cls, text: str) -> "GrammarBasedModel":
|
32
|
+
full_grammar = (
|
33
|
+
Suppress(cls.start_token()) + cls.grammar() + Suppress(cls.end_token())
|
34
|
+
)
|
35
|
+
parsed = full_grammar.parseString(text, parseAll=True)
|
36
|
+
return cls(
|
37
|
+
**{
|
38
|
+
field: parsed[token][-1]
|
39
|
+
for field, token in cls.field_mappings().items()
|
40
|
+
}
|
41
|
+
)
|
42
|
+
|
43
|
+
def generate(self) -> str:
|
44
|
+
result = [self.start_token()]
|
45
|
+
for field, token in self.field_mappings().items():
|
46
|
+
value = getattr(self, field)
|
47
|
+
if token == "name":
|
48
|
+
result.append(f"name: {value}")
|
49
|
+
elif token == "age":
|
50
|
+
result.append(f"age is {value}")
|
51
|
+
elif token == "city":
|
52
|
+
result.append(f"lives in {value}")
|
53
|
+
result.append(self.end_token())
|
54
|
+
return "\n".join(result)
|
55
|
+
|
56
|
+
|
57
|
+
class PersonSpec(GrammarBasedModel):
|
58
|
+
name: str
|
59
|
+
age: int
|
60
|
+
city: str
|
61
|
+
|
62
|
+
@classmethod
|
63
|
+
def grammar(cls):
|
64
|
+
name = Group(Literal("name:") + Word(alphas))("name")
|
65
|
+
age = Group(Literal("age is") + Word(nums))("age")
|
66
|
+
city = Group(Literal("lives in") + Word(alphas))("city")
|
67
|
+
return name + age + city
|
68
|
+
|
69
|
+
@classmethod
|
70
|
+
def start_token(cls):
|
71
|
+
return "<spec>"
|
72
|
+
|
73
|
+
@classmethod
|
74
|
+
def end_token(cls):
|
75
|
+
return "</spec>"
|
76
|
+
|
77
|
+
@classmethod
|
78
|
+
def field_mappings(cls):
|
79
|
+
return {"name": "name", "age": "age", "city": "city"}
|
80
|
+
|
81
|
+
|
82
|
+
if __name__ == "__main__":
|
83
|
+
# Test parsing
|
84
|
+
test_string = """
|
85
|
+
<spec>
|
86
|
+
name: John
|
87
|
+
age is 30
|
88
|
+
lives in Tokyo
|
89
|
+
</spec>
|
90
|
+
"""
|
91
|
+
parsed_person = PersonSpec.parse(test_string)
|
92
|
+
print("Parsed person:", parsed_person)
|
93
|
+
|
94
|
+
# Test generating
|
95
|
+
new_person = PersonSpec(name="Alice", age=25, city="NewYork")
|
96
|
+
generated_string = new_person.generate()
|
97
|
+
print("\nGenerated string:")
|
98
|
+
print(generated_string)
|
99
|
+
|
100
|
+
# Test round-trip
|
101
|
+
round_trip_person = PersonSpec.parse(generated_string)
|
102
|
+
print("\nRound-trip parsed person:", round_trip_person)
|
103
|
+
|
104
|
+
assert new_person == round_trip_person, "Round-trip parsing failed"
|
105
|
+
print("\nRound-trip test passed!")
|