langroid 0.16.5__py3-none-any.whl → 0.16.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/md_tool_message_grammar.py +455 -0
- langroid/agent/tools/code_file_tool_parse.py +150 -0
- langroid/agent/tools/code_file_tool_pyparsing.py +194 -0
- langroid/agent/tools/code_file_tool_pyparsing2.py +199 -0
- langroid/agent/tools/formatted_model_custom.py +150 -0
- langroid/agent/tools/formatted_model_custom2.py +168 -0
- langroid/agent/tools/formatted_model_custom3.py +279 -0
- langroid/agent/tools/formatted_model_custom4.py +395 -0
- langroid/agent/tools/formatted_model_jinja.py +133 -0
- langroid/agent/tools/formatted_model_jinja.py-e +122 -0
- langroid/agent/tools/formatted_model_jinja2.py +145 -0
- langroid/agent/tools/formatted_model_jinja2.py-e +135 -0
- langroid/agent/tools/formatted_model_lark.py +0 -0
- langroid/agent/tools/formatted_model_lark2.py +168 -0
- langroid/agent/tools/formatted_model_parse.py +105 -0
- langroid/agent/tools/formatted_model_parse.py-e +98 -0
- langroid/agent/tools/formatted_model_parse2.py +113 -0
- langroid/agent/tools/formatted_model_parse2.py-e +109 -0
- langroid/agent/tools/formatted_model_parse3.py +114 -0
- langroid/agent/tools/formatted_model_parse3.py-e +110 -0
- langroid/agent/tools/formatted_model_parsimon.py +194 -0
- langroid/agent/tools/formatted_model_parsimon.py-e +186 -0
- langroid/agent/tools/formatted_model_pyparsing.py +169 -0
- langroid/agent/tools/formatted_model_pyparsing.py-e +149 -0
- langroid/agent/tools/formatted_model_pyparsing2.py +159 -0
- langroid/agent/tools/formatted_model_pyparsing2.py-e +143 -0
- langroid/agent/tools/formatted_model_pyparsing3.py +133 -0
- langroid/agent/tools/formatted_model_pyparsing3.py-e +121 -0
- langroid/agent/tools/formatted_model_pyparsing4.py +213 -0
- langroid/agent/tools/formatted_model_pyparsing4.py-e +176 -0
- langroid/agent/tools/formatted_model_pyparsing5.py +173 -0
- langroid/agent/tools/formatted_model_pyparsing5.py-e +142 -0
- langroid/agent/tools/formatted_model_regex.py +246 -0
- langroid/agent/tools/formatted_model_regex.py-e +248 -0
- langroid/agent/tools/formatted_model_regex2.py +250 -0
- langroid/agent/tools/formatted_model_regex2.py-e +253 -0
- langroid/agent/tools/formatted_model_tatsu.py +172 -0
- langroid/agent/tools/formatted_model_tatsu.py-e +160 -0
- langroid/agent/tools/formatted_model_template.py +217 -0
- langroid/agent/tools/formatted_model_template.py-e +200 -0
- langroid/agent/tools/formatted_model_xml.py +178 -0
- langroid/agent/tools/formatted_model_xml2.py +178 -0
- langroid/agent/tools/formatted_model_xml3.py +132 -0
- langroid/agent/tools/formatted_model_xml4.py +130 -0
- langroid/agent/tools/formatted_model_xml5.py +130 -0
- langroid/agent/tools/formatted_model_xml6.py +113 -0
- langroid/agent/tools/formatted_model_xml7.py +117 -0
- langroid/agent/tools/formatted_model_xml8.py +164 -0
- langroid/agent/tools/generic_tool.py +165 -0
- langroid/agent/tools/generic_tool_tatsu.py +275 -0
- langroid/agent/tools/grammar_based_model.py +132 -0
- langroid/agent/tools/grammar_based_model.py-e +128 -0
- langroid/agent/tools/grammar_based_model_lark.py +156 -0
- langroid/agent/tools/grammar_based_model_lark.py-e +153 -0
- langroid/agent/tools/grammar_based_model_parse.py +86 -0
- langroid/agent/tools/grammar_based_model_parse.py-e +80 -0
- langroid/agent/tools/grammar_based_model_parsimonious.py +129 -0
- langroid/agent/tools/grammar_based_model_parsimonious.py-e +120 -0
- langroid/agent/tools/grammar_based_model_pyparsing.py +105 -0
- langroid/agent/tools/grammar_based_model_pyparsing.py-e +103 -0
- langroid/agent/tools/grammar_based_model_regex.py +139 -0
- langroid/agent/tools/grammar_based_model_regex.py-e +130 -0
- langroid/agent/tools/grammar_based_model_regex2.py +124 -0
- langroid/agent/tools/grammar_based_model_regex2.py-e +116 -0
- langroid/agent/tools/grammar_based_model_tatsu.py +80 -0
- langroid/agent/tools/grammar_based_model_tatsu.py-e +77 -0
- langroid/agent/tools/lark_earley_example.py +135 -0
- langroid/agent/tools/lark_earley_example.py-e +117 -0
- langroid/agent/tools/lark_example.py +72 -0
- langroid/agent/tools/parse_example.py +76 -0
- langroid/agent/tools/parse_example2.py +87 -0
- langroid/agent/tools/parse_example3.py +42 -0
- langroid/agent/tools/parse_test.py +791 -0
- langroid/agent/xml_tool_message.py +106 -0
- langroid/language_models/openai_gpt.py +6 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/METADATA +1 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/RECORD +80 -6
- pyproject.toml +1 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/LICENSE +0 -0
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/WHEEL +0 -0
@@ -0,0 +1,275 @@
|
|
1
|
+
from abc import abstractmethod
|
2
|
+
from typing import List
|
3
|
+
|
4
|
+
import tatsu
|
5
|
+
|
6
|
+
from langroid.agent.tool_message import ToolMessage
|
7
|
+
|
8
|
+
|
9
|
+
class GenericTool(ToolMessage):
|
10
|
+
"""
|
11
|
+
Abstract class for a tool whose format is defined by a grammar,
|
12
|
+
and not necessarily JSON-based.
|
13
|
+
Especially useful for tools where we need an LLM to return code.
|
14
|
+
Most LLMs, especially weaker ones, have significant issues
|
15
|
+
(related to unescaped newlines, quotes, etc) when returning code within JSON.
|
16
|
+
"""
|
17
|
+
|
18
|
+
@classmethod
|
19
|
+
@abstractmethod
|
20
|
+
def tool_grammar(cls) -> str:
|
21
|
+
"""Define the grammar for the `tool` rule"""
|
22
|
+
pass
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
def grammar(cls) -> str:
|
26
|
+
"""
|
27
|
+
Full grammar, including templates for rendering.
|
28
|
+
"""
|
29
|
+
base_grammar = """
|
30
|
+
@@grammar :: CombinedGrammar
|
31
|
+
@@whitespace :: /[ \\t]+/
|
32
|
+
@@nameguard :: False
|
33
|
+
|
34
|
+
start
|
35
|
+
=
|
36
|
+
"<spec>" ws?
|
37
|
+
request:word ws?
|
38
|
+
tool
|
39
|
+
ws? "</spec>"
|
40
|
+
{:
|
41
|
+
"<spec> " {{request}} {{tool}} " </spec>"
|
42
|
+
:}
|
43
|
+
;
|
44
|
+
|
45
|
+
ws = /[\\s]+/ ;
|
46
|
+
|
47
|
+
word = /[^\\s<>/]+/ ;
|
48
|
+
"""
|
49
|
+
full_grammar = base_grammar + "\n" + cls.tool_grammar()
|
50
|
+
return full_grammar
|
51
|
+
|
52
|
+
@classmethod
|
53
|
+
def parse(cls, s: str):
|
54
|
+
"""
|
55
|
+
Parses a string `s` using the grammar and returns an instance of the subclass.
|
56
|
+
"""
|
57
|
+
# Build the parser using the provided grammar with model generation
|
58
|
+
parser = tatsu.compile(cls.grammar(), asmodel=True)
|
59
|
+
|
60
|
+
# Parse the input string to get a model object
|
61
|
+
model = parser.parse(s)
|
62
|
+
|
63
|
+
# Convert the model to a dict, filtering only the expected fields
|
64
|
+
data = {k: getattr(model, k) for k in cls.__fields__ if hasattr(model, k)}
|
65
|
+
|
66
|
+
# Create an instance of the subclass with the parsed data
|
67
|
+
model_instance = cls(**data)
|
68
|
+
return model_instance
|
69
|
+
|
70
|
+
def format(self) -> str:
|
71
|
+
"""
|
72
|
+
Generates a string representation of the instance based on the grammar.
|
73
|
+
"""
|
74
|
+
# Build the parser using the provided grammar with model generation
|
75
|
+
parser = tatsu.compile(self.grammar(), asmodel=True)
|
76
|
+
|
77
|
+
# Create a model instance
|
78
|
+
model_class = parser.model()
|
79
|
+
model = model_class()
|
80
|
+
|
81
|
+
# Set attributes from the instance, excluding fields not in the grammar
|
82
|
+
for field in self.__fields__:
|
83
|
+
if field == "purpose":
|
84
|
+
continue # Exclude 'purpose' from rendering
|
85
|
+
setattr(model, field, getattr(self, field))
|
86
|
+
|
87
|
+
# Render the model back to text using the grammar's templates
|
88
|
+
generated_string = model.render()
|
89
|
+
return generated_string
|
90
|
+
|
91
|
+
@classmethod
|
92
|
+
def instructions(cls) -> str:
|
93
|
+
"""
|
94
|
+
Generates instructions for formatting an instance, including placeholders
|
95
|
+
and an example output with placeholders.
|
96
|
+
"""
|
97
|
+
|
98
|
+
def generate_placeholders(field, prefix=""):
|
99
|
+
placeholders = {}
|
100
|
+
if hasattr(field.type_, "__fields__"):
|
101
|
+
# Nested model
|
102
|
+
for sub_field_name, sub_field in field.type_.__fields__.items():
|
103
|
+
placeholders.update(
|
104
|
+
generate_placeholders(
|
105
|
+
sub_field, prefix=f"{prefix}{field.name}."
|
106
|
+
)
|
107
|
+
)
|
108
|
+
elif isinstance(field.type_, type) and issubclass(field.type_, list):
|
109
|
+
# List field
|
110
|
+
placeholders[field.name] = (
|
111
|
+
f"[<{field.name}_item1>,<{field.name}_item2>,...]"
|
112
|
+
)
|
113
|
+
else:
|
114
|
+
placeholders[field.name] = f"<{prefix}{field.name}>"
|
115
|
+
return placeholders
|
116
|
+
|
117
|
+
# Generate placeholders for all fields
|
118
|
+
placeholders = {}
|
119
|
+
for field_name, field in cls.__fields__.items():
|
120
|
+
placeholders.update(generate_placeholders(field))
|
121
|
+
|
122
|
+
# Build the preamble
|
123
|
+
preamble_lines = ["Placeholders for formatting:"]
|
124
|
+
for field_name, placeholder in placeholders.items():
|
125
|
+
field_type = cls.__fields__[field_name].type_.__name__
|
126
|
+
preamble_lines.append(
|
127
|
+
f"- `{placeholder}`: placeholder for `{field_name}` field (type: `{field_type}`)"
|
128
|
+
)
|
129
|
+
preamble = "\n".join(preamble_lines)
|
130
|
+
|
131
|
+
# Create a placeholder instance
|
132
|
+
placeholder_values = {}
|
133
|
+
for field_name in cls.__fields__:
|
134
|
+
placeholder_values[field_name] = placeholders[field_name]
|
135
|
+
placeholder_instance = cls(**placeholder_values)
|
136
|
+
|
137
|
+
# Generate an example output with placeholders
|
138
|
+
parser = tatsu.compile(cls.grammar())
|
139
|
+
ast = placeholder_instance.to_ast()
|
140
|
+
# Use the placeholders in the AST
|
141
|
+
for key, value in ast.items():
|
142
|
+
ast[key] = placeholders.get(key, value)
|
143
|
+
example_output = parser.render(ast)
|
144
|
+
|
145
|
+
# Combine preamble and example output
|
146
|
+
instructions = f"{preamble}\n\nExample format:\n\n{example_output}"
|
147
|
+
return instructions
|
148
|
+
|
149
|
+
@classmethod
|
150
|
+
def from_ast(cls, ast):
|
151
|
+
"""
|
152
|
+
Converts an AST into a model instance.
|
153
|
+
"""
|
154
|
+
# Since TatSu produces dicts, we can convert the AST dict to the model
|
155
|
+
return cls(**ast)
|
156
|
+
|
157
|
+
def to_ast(self):
|
158
|
+
"""
|
159
|
+
Converts the model instance into an AST (dict).
|
160
|
+
"""
|
161
|
+
# Since TatSu expects dicts for rendering, we can use the model's dict
|
162
|
+
return self.dict()
|
163
|
+
|
164
|
+
@classmethod
|
165
|
+
def from_string(cls, input_string: str) -> "CodeFileTool":
|
166
|
+
"""Parse a string into a CodeFileTool object, using the TEMPLATE."""
|
167
|
+
parsed_data = cls.parse(input_string)
|
168
|
+
if parsed_data:
|
169
|
+
return cls(**parsed_data)
|
170
|
+
raise ValueError("Invalid input string format")
|
171
|
+
|
172
|
+
def to_string(self) -> str:
|
173
|
+
"""Convert a CodeFileTool object to a string, using the TEMPLATE."""
|
174
|
+
return self.format()
|
175
|
+
|
176
|
+
@classmethod
|
177
|
+
def find_candidates(cls, s: str) -> List[str]:
|
178
|
+
"""
|
179
|
+
Finds all substrings in `s` that start with start_marker and end with end_marker.
|
180
|
+
"""
|
181
|
+
start = "<spec>" # TODO get from TOOL_BEGIN, TOOL_END
|
182
|
+
end = "</spec>"
|
183
|
+
candidates = []
|
184
|
+
start_len = len(start)
|
185
|
+
end_len = len(end)
|
186
|
+
index = 0
|
187
|
+
while index < len(s):
|
188
|
+
start_index = s.find(start, index)
|
189
|
+
if start_index == -1:
|
190
|
+
break
|
191
|
+
end_index = s.find(end, start_index + start_len)
|
192
|
+
if end_index == -1:
|
193
|
+
break
|
194
|
+
candidate = s[start_index : end_index + end_len]
|
195
|
+
# Attempt to parse the candidate to ensure it's valid
|
196
|
+
try:
|
197
|
+
cls.parse(candidate)
|
198
|
+
candidates.append(candidate)
|
199
|
+
except tatsu.exceptions.ParseException:
|
200
|
+
# Ignore invalid candidates
|
201
|
+
pass
|
202
|
+
index = end_index + end_len
|
203
|
+
return candidates
|
204
|
+
|
205
|
+
def __str__(self):
|
206
|
+
return self.to_string()
|
207
|
+
|
208
|
+
# def __repr__(self) -> str:
|
209
|
+
# class_name = self.__class__.__name__
|
210
|
+
# attributes = []
|
211
|
+
# for key, value in self.__dict__.items():
|
212
|
+
# if not key.startswith('_'): # Skip private attributes
|
213
|
+
# if isinstance(value, str):
|
214
|
+
# # Escape quotes and newlines in string values
|
215
|
+
# value_repr = f"'{value.replace('\\', '\\\\').replace(\"'\", \"\\'\").replace('\\n', '\\n')}'"
|
216
|
+
# else:
|
217
|
+
# value_repr = repr(value)
|
218
|
+
# attributes.append(f"{key}={value_repr}")
|
219
|
+
# return f"{class_name}({', '.join(attributes)})"
|
220
|
+
|
221
|
+
|
222
|
+
if __name__ == "__main__":
|
223
|
+
# Example subclass
|
224
|
+
|
225
|
+
class MyTool(GenericTool):
|
226
|
+
request: str = "my_tool"
|
227
|
+
purpose: str = "do something"
|
228
|
+
value: int
|
229
|
+
|
230
|
+
@classmethod
|
231
|
+
def tool_grammar(cls) -> str:
|
232
|
+
return """
|
233
|
+
tool = "value:" value:number
|
234
|
+
{:
|
235
|
+
"value:" {{value}}
|
236
|
+
:}
|
237
|
+
;
|
238
|
+
|
239
|
+
number = /\\d+/
|
240
|
+
;
|
241
|
+
"""
|
242
|
+
|
243
|
+
my_tool = MyTool(value=42)
|
244
|
+
|
245
|
+
# Generate the string from the instance using the grammar
|
246
|
+
generated_string = my_tool.format()
|
247
|
+
print("Formatted string:", generated_string)
|
248
|
+
|
249
|
+
# Parse the string back into an instance using the grammar
|
250
|
+
parsed_instance = MyTool.parse(generated_string)
|
251
|
+
print("Parsed instance:", parsed_instance)
|
252
|
+
print("Parsed value:", parsed_instance.value)
|
253
|
+
|
254
|
+
# Extended example
|
255
|
+
class ExtendedModel(GrammarBasedModel):
|
256
|
+
request: str
|
257
|
+
user_id: int
|
258
|
+
action: str
|
259
|
+
details: str
|
260
|
+
|
261
|
+
@classmethod
|
262
|
+
def rest_grammar(cls) -> str:
|
263
|
+
return """
|
264
|
+
rest = user_id:number ws action:word ws details:text ;
|
265
|
+
|
266
|
+
number = /\d+/ ;
|
267
|
+
text = /.+/ ;
|
268
|
+
"""
|
269
|
+
|
270
|
+
input_string_ext = "<spec> user_update 42 delete Account deletion</spec>"
|
271
|
+
extended_instance = ExtendedModel.parse(input_string_ext)
|
272
|
+
print("Parsed extended instance:", extended_instance)
|
273
|
+
|
274
|
+
generated_string_ext = extended_instance.generate()
|
275
|
+
print("Generated extended string:", generated_string_ext)
|
@@ -0,0 +1,132 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
|
3
|
+
from lark import Lark, Token, Tree
|
4
|
+
from lark.reconstruct import Reconstructor
|
5
|
+
|
6
|
+
from langroid.pydantic_v1 import BaseModel
|
7
|
+
|
8
|
+
|
9
|
+
class GrammarBasedModel(BaseModel, ABC):
|
10
|
+
_parse_tree: Tree = None # Store the parse tree
|
11
|
+
_parser: Lark = None # Store the parser instance
|
12
|
+
|
13
|
+
@classmethod
|
14
|
+
@abstractmethod
|
15
|
+
def get_grammar(cls) -> str:
|
16
|
+
"""
|
17
|
+
Subclasses must implement this method to return their grammar as a string.
|
18
|
+
"""
|
19
|
+
pass
|
20
|
+
|
21
|
+
@classmethod
|
22
|
+
def get_token_field_mapping(cls):
|
23
|
+
"""
|
24
|
+
Returns a mapping from token types to model field names. Subclasses can override this
|
25
|
+
if their token types and field names differ.
|
26
|
+
"""
|
27
|
+
return {}
|
28
|
+
|
29
|
+
@classmethod
|
30
|
+
def parse(cls, text: str):
|
31
|
+
"""
|
32
|
+
Parse the input text using the grammar to create an instance of the model.
|
33
|
+
"""
|
34
|
+
parser = Lark(cls.get_grammar(), parser="lalr", propagate_positions=True)
|
35
|
+
tree = parser.parse(text)
|
36
|
+
model_instance = cls.from_tree(tree)
|
37
|
+
model_instance._parse_tree = tree # Store the parse tree in the instance
|
38
|
+
model_instance._parser = parser # Store the parser in the instance
|
39
|
+
return model_instance
|
40
|
+
|
41
|
+
@classmethod
|
42
|
+
def from_tree(cls, tree: Tree):
|
43
|
+
"""
|
44
|
+
Convert a parse tree into a model instance.
|
45
|
+
"""
|
46
|
+
data = cls.tree_to_dict(tree)
|
47
|
+
return cls(**data)
|
48
|
+
|
49
|
+
@classmethod
|
50
|
+
def tree_to_dict(cls, tree: Tree):
|
51
|
+
"""
|
52
|
+
Recursively convert a parse tree into a dictionary of field values.
|
53
|
+
"""
|
54
|
+
data = {}
|
55
|
+
token_field_mapping = cls.get_token_field_mapping()
|
56
|
+
for child in tree.children:
|
57
|
+
if isinstance(child, Tree):
|
58
|
+
data.update(cls.tree_to_dict(child))
|
59
|
+
elif isinstance(child, Token):
|
60
|
+
token_type = child.type
|
61
|
+
field_name = token_field_mapping.get(token_type, token_type.lower())
|
62
|
+
data[field_name] = child.value
|
63
|
+
return data
|
64
|
+
|
65
|
+
def generate(self) -> str:
|
66
|
+
"""
|
67
|
+
Generate a string representation of the model instance using the grammar.
|
68
|
+
"""
|
69
|
+
if self._parse_tree is None or self._parser is None:
|
70
|
+
raise ValueError("Cannot generate text without parsing first.")
|
71
|
+
# Update the parse tree with current model data
|
72
|
+
self.update_tree_with_model_data(self._parse_tree)
|
73
|
+
reconstructor = Reconstructor(self._parser)
|
74
|
+
text = reconstructor.reconstruct(self._parse_tree)
|
75
|
+
return text
|
76
|
+
|
77
|
+
def update_tree_with_model_data(self, tree: Tree):
|
78
|
+
"""
|
79
|
+
Update the parse tree with the current model data.
|
80
|
+
"""
|
81
|
+
token_field_mapping = self.get_token_field_mapping()
|
82
|
+
reverse_mapping = {v: k for k, v in token_field_mapping.items()}
|
83
|
+
for idx, child in enumerate(tree.children):
|
84
|
+
if isinstance(child, Tree):
|
85
|
+
self.update_tree_with_model_data(child)
|
86
|
+
elif isinstance(child, Token):
|
87
|
+
field_name = token_field_mapping.get(child.type, child.type.lower())
|
88
|
+
if hasattr(self, field_name):
|
89
|
+
new_value = getattr(self, field_name)
|
90
|
+
tree.children[idx] = Token(child.type, str(new_value))
|
91
|
+
|
92
|
+
|
93
|
+
# Example subclass
|
94
|
+
class MyModel(GrammarBasedModel):
|
95
|
+
name: str
|
96
|
+
age: int
|
97
|
+
|
98
|
+
@classmethod
|
99
|
+
def get_grammar(cls):
|
100
|
+
return """
|
101
|
+
start: "name:" NAME "age:" NUMBER
|
102
|
+
%import common.CNAME -> NAME
|
103
|
+
%import common.INT -> NUMBER
|
104
|
+
%import common.WS
|
105
|
+
%ignore WS
|
106
|
+
"""
|
107
|
+
|
108
|
+
@classmethod
|
109
|
+
def get_token_field_mapping(cls):
|
110
|
+
return {
|
111
|
+
"NAME": "name",
|
112
|
+
"NUMBER": "age",
|
113
|
+
}
|
114
|
+
|
115
|
+
|
116
|
+
# Usage example
|
117
|
+
if __name__ == "__main__":
|
118
|
+
text = "name: Alice age: 30"
|
119
|
+
model = MyModel.parse(text)
|
120
|
+
print("Parsed Model:", model)
|
121
|
+
|
122
|
+
# Generate string from the model
|
123
|
+
generated_text = model.generate()
|
124
|
+
print("Generated Text:", generated_text)
|
125
|
+
|
126
|
+
# Modify the model
|
127
|
+
model.age = 31
|
128
|
+
model.name = "Bob"
|
129
|
+
|
130
|
+
# Generate updated string
|
131
|
+
updated_text = model.generate()
|
132
|
+
print("Updated Generated Text:", updated_text)
|
@@ -0,0 +1,128 @@
|
|
1
|
+
from pydantic import BaseModel
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
from lark import Lark, Tree, Token
|
4
|
+
from lark.reconstruct import Reconstructor
|
5
|
+
|
6
|
+
class GrammarBasedModel(BaseModel, ABC):
|
7
|
+
_parse_tree: Tree = None # Store the parse tree
|
8
|
+
_parser: Lark = None # Store the parser instance
|
9
|
+
|
10
|
+
@classmethod
|
11
|
+
@abstractmethod
|
12
|
+
def get_grammar(cls) -> str:
|
13
|
+
"""
|
14
|
+
Subclasses must implement this method to return their grammar as a string.
|
15
|
+
"""
|
16
|
+
pass
|
17
|
+
|
18
|
+
@classmethod
|
19
|
+
def get_token_field_mapping(cls):
|
20
|
+
"""
|
21
|
+
Returns a mapping from token types to model field names. Subclasses can override this
|
22
|
+
if their token types and field names differ.
|
23
|
+
"""
|
24
|
+
return {}
|
25
|
+
|
26
|
+
@classmethod
|
27
|
+
def parse(cls, text: str):
|
28
|
+
"""
|
29
|
+
Parse the input text using the grammar to create an instance of the model.
|
30
|
+
"""
|
31
|
+
parser = Lark(cls.get_grammar(), parser='lalr', propagate_positions=True)
|
32
|
+
tree = parser.parse(text)
|
33
|
+
model_instance = cls.from_tree(tree)
|
34
|
+
model_instance._parse_tree = tree # Store the parse tree in the instance
|
35
|
+
model_instance._parser = parser # Store the parser in the instance
|
36
|
+
return model_instance
|
37
|
+
|
38
|
+
@classmethod
|
39
|
+
def from_tree(cls, tree: Tree):
|
40
|
+
"""
|
41
|
+
Convert a parse tree into a model instance.
|
42
|
+
"""
|
43
|
+
data = cls.tree_to_dict(tree)
|
44
|
+
return cls(**data)
|
45
|
+
|
46
|
+
@classmethod
|
47
|
+
def tree_to_dict(cls, tree: Tree):
|
48
|
+
"""
|
49
|
+
Recursively convert a parse tree into a dictionary of field values.
|
50
|
+
"""
|
51
|
+
data = {}
|
52
|
+
token_field_mapping = cls.get_token_field_mapping()
|
53
|
+
for child in tree.children:
|
54
|
+
if isinstance(child, Tree):
|
55
|
+
data.update(cls.tree_to_dict(child))
|
56
|
+
elif isinstance(child, Token):
|
57
|
+
token_type = child.type
|
58
|
+
field_name = token_field_mapping.get(token_type, token_type.lower())
|
59
|
+
data[field_name] = child.value
|
60
|
+
return data
|
61
|
+
|
62
|
+
def generate(self) -> str:
|
63
|
+
"""
|
64
|
+
Generate a string representation of the model instance using the grammar.
|
65
|
+
"""
|
66
|
+
if self._parse_tree is None or self._parser is None:
|
67
|
+
raise ValueError("Cannot generate text without parsing first.")
|
68
|
+
# Update the parse tree with current model data
|
69
|
+
self.update_tree_with_model_data(self._parse_tree)
|
70
|
+
reconstructor = Reconstructor(self._parser)
|
71
|
+
text = reconstructor.reconstruct(self._parse_tree)
|
72
|
+
return text
|
73
|
+
|
74
|
+
def update_tree_with_model_data(self, tree: Tree):
|
75
|
+
"""
|
76
|
+
Update the parse tree with the current model data.
|
77
|
+
"""
|
78
|
+
token_field_mapping = self.get_token_field_mapping()
|
79
|
+
reverse_mapping = {v: k for k, v in token_field_mapping.items()}
|
80
|
+
for idx, child in enumerate(tree.children):
|
81
|
+
if isinstance(child, Tree):
|
82
|
+
self.update_tree_with_model_data(child)
|
83
|
+
elif isinstance(child, Token):
|
84
|
+
field_name = token_field_mapping.get(child.type, child.type.lower())
|
85
|
+
if hasattr(self, field_name):
|
86
|
+
new_value = getattr(self, field_name)
|
87
|
+
tree.children[idx] = Token(child.type, str(new_value))
|
88
|
+
|
89
|
+
# Example subclass
|
90
|
+
class MyModel(GrammarBasedModel):
|
91
|
+
name: str
|
92
|
+
age: int
|
93
|
+
|
94
|
+
@classmethod
|
95
|
+
def get_grammar(cls):
|
96
|
+
return """
|
97
|
+
start: "name:" NAME "age:" NUMBER
|
98
|
+
%import common.CNAME -> NAME
|
99
|
+
%import common.INT -> NUMBER
|
100
|
+
%import common.WS
|
101
|
+
%ignore WS
|
102
|
+
"""
|
103
|
+
|
104
|
+
@classmethod
|
105
|
+
def get_token_field_mapping(cls):
|
106
|
+
return {
|
107
|
+
'NAME': 'name',
|
108
|
+
'NUMBER': 'age',
|
109
|
+
}
|
110
|
+
|
111
|
+
# Usage example
|
112
|
+
if __name__ == "__main__":
|
113
|
+
text = "name: Alice age: 30"
|
114
|
+
model = MyModel.parse(text)
|
115
|
+
print("Parsed Model:", model)
|
116
|
+
|
117
|
+
# Generate string from the model
|
118
|
+
generated_text = model.generate()
|
119
|
+
print("Generated Text:", generated_text)
|
120
|
+
|
121
|
+
# Modify the model
|
122
|
+
model.age = 31
|
123
|
+
model.name = "Bob"
|
124
|
+
|
125
|
+
# Generate updated string
|
126
|
+
updated_text = model.generate()
|
127
|
+
print("Updated Generated Text:", updated_text)
|
128
|
+
|
@@ -0,0 +1,156 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import Dict
|
3
|
+
|
4
|
+
from lark import Lark, Transformer, Visitor
|
5
|
+
|
6
|
+
from langroid.pydantic_v1 import BaseModel
|
7
|
+
|
8
|
+
|
9
|
+
class GrammarBasedModel(BaseModel, ABC):
|
10
|
+
@classmethod
|
11
|
+
@abstractmethod
|
12
|
+
def grammar(cls) -> str:
|
13
|
+
pass
|
14
|
+
|
15
|
+
@classmethod
|
16
|
+
@abstractmethod
|
17
|
+
def start_rule(cls) -> str:
|
18
|
+
pass
|
19
|
+
|
20
|
+
@classmethod
|
21
|
+
@abstractmethod
|
22
|
+
def field_mappings(cls) -> Dict[str, str]:
|
23
|
+
pass
|
24
|
+
|
25
|
+
@classmethod
|
26
|
+
def parse(cls, text: str) -> "GrammarBasedModel":
|
27
|
+
parser = Lark(cls.grammar(), start=cls.start_rule())
|
28
|
+
tree = parser.parse(text)
|
29
|
+
|
30
|
+
class TreeToDict(Transformer):
|
31
|
+
def __init__(self, field_mappings):
|
32
|
+
self.field_mappings = field_mappings
|
33
|
+
|
34
|
+
def __default__(self, data, children, meta):
|
35
|
+
for field, rule in self.field_mappings.items():
|
36
|
+
if data == rule:
|
37
|
+
return {field: children[0]}
|
38
|
+
return children
|
39
|
+
|
40
|
+
def start(self, items):
|
41
|
+
result = {}
|
42
|
+
for item in items:
|
43
|
+
if isinstance(item, dict):
|
44
|
+
result.update(item)
|
45
|
+
return result
|
46
|
+
|
47
|
+
transformer = TreeToDict(cls.field_mappings())
|
48
|
+
data = transformer.transform(tree)
|
49
|
+
return cls(**data)
|
50
|
+
|
51
|
+
def generate(self) -> str:
|
52
|
+
parser = Lark(self.grammar(), start=self.start_rule())
|
53
|
+
|
54
|
+
class ModelToString(Visitor):
|
55
|
+
def __init__(self, model):
|
56
|
+
self.model = model
|
57
|
+
self.result = []
|
58
|
+
|
59
|
+
def __default__(self, tree):
|
60
|
+
if tree.data in self.model.field_mappings().values():
|
61
|
+
field = next(
|
62
|
+
k
|
63
|
+
for k, v in self.model.field_mappings().items()
|
64
|
+
if v == tree.data
|
65
|
+
)
|
66
|
+
value = getattr(self.model, field)
|
67
|
+
self.result.append(f"{' '.join(tree.children)} {value}")
|
68
|
+
else:
|
69
|
+
for child in tree.children:
|
70
|
+
if isinstance(child, str):
|
71
|
+
self.result.append(child)
|
72
|
+
else:
|
73
|
+
self.visit(child)
|
74
|
+
|
75
|
+
visitor = ModelToString(self)
|
76
|
+
tree = parser.parse(" ".join(self.grammar().split()))
|
77
|
+
visitor.visit(tree)
|
78
|
+
return " ".join(visitor.result)
|
79
|
+
|
80
|
+
|
81
|
+
class PersonSpec(GrammarBasedModel):
|
82
|
+
name: str
|
83
|
+
age: int
|
84
|
+
city: str
|
85
|
+
|
86
|
+
@classmethod
|
87
|
+
def grammar(cls):
|
88
|
+
return """
|
89
|
+
start: "<spec>" name age city "</spec>"
|
90
|
+
name: "name:" WORD
|
91
|
+
age: "age" "is" NUMBER
|
92
|
+
city: "lives" "in" WORD
|
93
|
+
%import common.WORD
|
94
|
+
%import common.NUMBER
|
95
|
+
%import common.WS
|
96
|
+
%ignore WS
|
97
|
+
"""
|
98
|
+
|
99
|
+
@classmethod
|
100
|
+
def start_rule(cls):
|
101
|
+
return "start"
|
102
|
+
|
103
|
+
@classmethod
|
104
|
+
def field_mappings(cls):
|
105
|
+
return {"name": "name", "age": "age", "city": "city"}
|
106
|
+
|
107
|
+
|
108
|
+
if __name__ == "__main__":
|
109
|
+
# Test parsing
|
110
|
+
test_string = """
|
111
|
+
<spec>
|
112
|
+
name: John
|
113
|
+
age is 30
|
114
|
+
lives in Tokyo
|
115
|
+
</spec>
|
116
|
+
"""
|
117
|
+
parsed_person = PersonSpec.parse(test_string)
|
118
|
+
print("Parsed person:", parsed_person)
|
119
|
+
|
120
|
+
# Test generating
|
121
|
+
new_person = PersonSpec(name="Alice", age=25, city="NewYork")
|
122
|
+
generated_string = new_person.generate()
|
123
|
+
print("\nGenerated string:")
|
124
|
+
print(generated_string)
|
125
|
+
|
126
|
+
# Test round-trip
|
127
|
+
round_trip_person = PersonSpec.parse(generated_string)
|
128
|
+
print("\nRound-trip parsed person:", round_trip_person)
|
129
|
+
|
130
|
+
assert new_person == round_trip_person, "Round-trip parsing failed"
|
131
|
+
print("\nRound-trip test passed!")
|
132
|
+
|
133
|
+
# Test with modified grammar
|
134
|
+
class ModifiedPersonSpec(PersonSpec):
|
135
|
+
@classmethod
|
136
|
+
def grammar(cls):
|
137
|
+
return """
|
138
|
+
start: "<person>" name age city "</person>"
|
139
|
+
name: "Name:" WORD
|
140
|
+
age: "Age:" NUMBER "years"
|
141
|
+
city: "City:" WORD
|
142
|
+
%import common.WORD
|
143
|
+
%import common.NUMBER
|
144
|
+
%import common.WS
|
145
|
+
%ignore WS
|
146
|
+
"""
|
147
|
+
|
148
|
+
modified_person = ModifiedPersonSpec(name="Bob", age=40, city="London")
|
149
|
+
modified_string = modified_person.generate()
|
150
|
+
print("\nModified grammar generated string:")
|
151
|
+
print(modified_string)
|
152
|
+
|
153
|
+
parsed_modified = ModifiedPersonSpec.parse(modified_string)
|
154
|
+
print("Parsed modified person:", parsed_modified)
|
155
|
+
assert modified_person == parsed_modified, "Modified grammar round-trip failed"
|
156
|
+
print("Modified grammar round-trip test passed!")
|