langroid 0.16.5__py3-none-any.whl → 0.16.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. langroid/agent/md_tool_message_grammar.py +455 -0
  2. langroid/agent/tools/code_file_tool_parse.py +150 -0
  3. langroid/agent/tools/code_file_tool_pyparsing.py +194 -0
  4. langroid/agent/tools/code_file_tool_pyparsing2.py +199 -0
  5. langroid/agent/tools/formatted_model_custom.py +150 -0
  6. langroid/agent/tools/formatted_model_custom2.py +168 -0
  7. langroid/agent/tools/formatted_model_custom3.py +279 -0
  8. langroid/agent/tools/formatted_model_custom4.py +395 -0
  9. langroid/agent/tools/formatted_model_jinja.py +133 -0
  10. langroid/agent/tools/formatted_model_jinja.py-e +122 -0
  11. langroid/agent/tools/formatted_model_jinja2.py +145 -0
  12. langroid/agent/tools/formatted_model_jinja2.py-e +135 -0
  13. langroid/agent/tools/formatted_model_lark.py +0 -0
  14. langroid/agent/tools/formatted_model_lark2.py +168 -0
  15. langroid/agent/tools/formatted_model_parse.py +105 -0
  16. langroid/agent/tools/formatted_model_parse.py-e +98 -0
  17. langroid/agent/tools/formatted_model_parse2.py +113 -0
  18. langroid/agent/tools/formatted_model_parse2.py-e +109 -0
  19. langroid/agent/tools/formatted_model_parse3.py +114 -0
  20. langroid/agent/tools/formatted_model_parse3.py-e +110 -0
  21. langroid/agent/tools/formatted_model_parsimon.py +194 -0
  22. langroid/agent/tools/formatted_model_parsimon.py-e +186 -0
  23. langroid/agent/tools/formatted_model_pyparsing.py +169 -0
  24. langroid/agent/tools/formatted_model_pyparsing.py-e +149 -0
  25. langroid/agent/tools/formatted_model_pyparsing2.py +159 -0
  26. langroid/agent/tools/formatted_model_pyparsing2.py-e +143 -0
  27. langroid/agent/tools/formatted_model_pyparsing3.py +133 -0
  28. langroid/agent/tools/formatted_model_pyparsing3.py-e +121 -0
  29. langroid/agent/tools/formatted_model_pyparsing4.py +213 -0
  30. langroid/agent/tools/formatted_model_pyparsing4.py-e +176 -0
  31. langroid/agent/tools/formatted_model_pyparsing5.py +173 -0
  32. langroid/agent/tools/formatted_model_pyparsing5.py-e +142 -0
  33. langroid/agent/tools/formatted_model_regex.py +246 -0
  34. langroid/agent/tools/formatted_model_regex.py-e +248 -0
  35. langroid/agent/tools/formatted_model_regex2.py +250 -0
  36. langroid/agent/tools/formatted_model_regex2.py-e +253 -0
  37. langroid/agent/tools/formatted_model_tatsu.py +172 -0
  38. langroid/agent/tools/formatted_model_tatsu.py-e +160 -0
  39. langroid/agent/tools/formatted_model_template.py +217 -0
  40. langroid/agent/tools/formatted_model_template.py-e +200 -0
  41. langroid/agent/tools/formatted_model_xml.py +178 -0
  42. langroid/agent/tools/formatted_model_xml2.py +178 -0
  43. langroid/agent/tools/formatted_model_xml3.py +132 -0
  44. langroid/agent/tools/formatted_model_xml4.py +130 -0
  45. langroid/agent/tools/formatted_model_xml5.py +130 -0
  46. langroid/agent/tools/formatted_model_xml6.py +113 -0
  47. langroid/agent/tools/formatted_model_xml7.py +117 -0
  48. langroid/agent/tools/formatted_model_xml8.py +164 -0
  49. langroid/agent/tools/generic_tool.py +165 -0
  50. langroid/agent/tools/generic_tool_tatsu.py +275 -0
  51. langroid/agent/tools/grammar_based_model.py +132 -0
  52. langroid/agent/tools/grammar_based_model.py-e +128 -0
  53. langroid/agent/tools/grammar_based_model_lark.py +156 -0
  54. langroid/agent/tools/grammar_based_model_lark.py-e +153 -0
  55. langroid/agent/tools/grammar_based_model_parse.py +86 -0
  56. langroid/agent/tools/grammar_based_model_parse.py-e +80 -0
  57. langroid/agent/tools/grammar_based_model_parsimonious.py +129 -0
  58. langroid/agent/tools/grammar_based_model_parsimonious.py-e +120 -0
  59. langroid/agent/tools/grammar_based_model_pyparsing.py +105 -0
  60. langroid/agent/tools/grammar_based_model_pyparsing.py-e +103 -0
  61. langroid/agent/tools/grammar_based_model_regex.py +139 -0
  62. langroid/agent/tools/grammar_based_model_regex.py-e +130 -0
  63. langroid/agent/tools/grammar_based_model_regex2.py +124 -0
  64. langroid/agent/tools/grammar_based_model_regex2.py-e +116 -0
  65. langroid/agent/tools/grammar_based_model_tatsu.py +80 -0
  66. langroid/agent/tools/grammar_based_model_tatsu.py-e +77 -0
  67. langroid/agent/tools/lark_earley_example.py +135 -0
  68. langroid/agent/tools/lark_earley_example.py-e +117 -0
  69. langroid/agent/tools/lark_example.py +72 -0
  70. langroid/agent/tools/parse_example.py +76 -0
  71. langroid/agent/tools/parse_example2.py +87 -0
  72. langroid/agent/tools/parse_example3.py +42 -0
  73. langroid/agent/tools/parse_test.py +791 -0
  74. langroid/agent/xml_tool_message.py +106 -0
  75. langroid/language_models/openai_gpt.py +6 -1
  76. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/METADATA +1 -1
  77. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/RECORD +80 -6
  78. pyproject.toml +1 -1
  79. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/LICENSE +0 -0
  80. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/WHEEL +0 -0
@@ -0,0 +1,275 @@
1
+ from abc import abstractmethod
2
+ from typing import List
3
+
4
+ import tatsu
5
+
6
+ from langroid.agent.tool_message import ToolMessage
7
+
8
+
9
+ class GenericTool(ToolMessage):
10
+ """
11
+ Abstract class for a tool whose format is defined by a grammar,
12
+ and not necessarily JSON-based.
13
+ Especially useful for tools where we need an LLM to return code.
14
+ Most LLMs, especially weaker ones, have significant issues
15
+ (related to unescaped newlines, quotes, etc) when returning code within JSON.
16
+ """
17
+
18
+ @classmethod
19
+ @abstractmethod
20
+ def tool_grammar(cls) -> str:
21
+ """Define the grammar for the `tool` rule"""
22
+ pass
23
+
24
+ @classmethod
25
+ def grammar(cls) -> str:
26
+ """
27
+ Full grammar, including templates for rendering.
28
+ """
29
+ base_grammar = """
30
+ @@grammar :: CombinedGrammar
31
+ @@whitespace :: /[ \\t]+/
32
+ @@nameguard :: False
33
+
34
+ start
35
+ =
36
+ "<spec>" ws?
37
+ request:word ws?
38
+ tool
39
+ ws? "</spec>"
40
+ {:
41
+ "<spec> " {{request}} {{tool}} " </spec>"
42
+ :}
43
+ ;
44
+
45
+ ws = /[\\s]+/ ;
46
+
47
+ word = /[^\\s<>/]+/ ;
48
+ """
49
+ full_grammar = base_grammar + "\n" + cls.tool_grammar()
50
+ return full_grammar
51
+
52
+ @classmethod
53
+ def parse(cls, s: str):
54
+ """
55
+ Parses a string `s` using the grammar and returns an instance of the subclass.
56
+ """
57
+ # Build the parser using the provided grammar with model generation
58
+ parser = tatsu.compile(cls.grammar(), asmodel=True)
59
+
60
+ # Parse the input string to get a model object
61
+ model = parser.parse(s)
62
+
63
+ # Convert the model to a dict, filtering only the expected fields
64
+ data = {k: getattr(model, k) for k in cls.__fields__ if hasattr(model, k)}
65
+
66
+ # Create an instance of the subclass with the parsed data
67
+ model_instance = cls(**data)
68
+ return model_instance
69
+
70
+ def format(self) -> str:
71
+ """
72
+ Generates a string representation of the instance based on the grammar.
73
+ """
74
+ # Build the parser using the provided grammar with model generation
75
+ parser = tatsu.compile(self.grammar(), asmodel=True)
76
+
77
+ # Create a model instance
78
+ model_class = parser.model()
79
+ model = model_class()
80
+
81
+ # Set attributes from the instance, excluding fields not in the grammar
82
+ for field in self.__fields__:
83
+ if field == "purpose":
84
+ continue # Exclude 'purpose' from rendering
85
+ setattr(model, field, getattr(self, field))
86
+
87
+ # Render the model back to text using the grammar's templates
88
+ generated_string = model.render()
89
+ return generated_string
90
+
91
+ @classmethod
92
+ def instructions(cls) -> str:
93
+ """
94
+ Generates instructions for formatting an instance, including placeholders
95
+ and an example output with placeholders.
96
+ """
97
+
98
+ def generate_placeholders(field, prefix=""):
99
+ placeholders = {}
100
+ if hasattr(field.type_, "__fields__"):
101
+ # Nested model
102
+ for sub_field_name, sub_field in field.type_.__fields__.items():
103
+ placeholders.update(
104
+ generate_placeholders(
105
+ sub_field, prefix=f"{prefix}{field.name}."
106
+ )
107
+ )
108
+ elif isinstance(field.type_, type) and issubclass(field.type_, list):
109
+ # List field
110
+ placeholders[field.name] = (
111
+ f"[<{field.name}_item1>,<{field.name}_item2>,...]"
112
+ )
113
+ else:
114
+ placeholders[field.name] = f"<{prefix}{field.name}>"
115
+ return placeholders
116
+
117
+ # Generate placeholders for all fields
118
+ placeholders = {}
119
+ for field_name, field in cls.__fields__.items():
120
+ placeholders.update(generate_placeholders(field))
121
+
122
+ # Build the preamble
123
+ preamble_lines = ["Placeholders for formatting:"]
124
+ for field_name, placeholder in placeholders.items():
125
+ field_type = cls.__fields__[field_name].type_.__name__
126
+ preamble_lines.append(
127
+ f"- `{placeholder}`: placeholder for `{field_name}` field (type: `{field_type}`)"
128
+ )
129
+ preamble = "\n".join(preamble_lines)
130
+
131
+ # Create a placeholder instance
132
+ placeholder_values = {}
133
+ for field_name in cls.__fields__:
134
+ placeholder_values[field_name] = placeholders[field_name]
135
+ placeholder_instance = cls(**placeholder_values)
136
+
137
+ # Generate an example output with placeholders
138
+ parser = tatsu.compile(cls.grammar())
139
+ ast = placeholder_instance.to_ast()
140
+ # Use the placeholders in the AST
141
+ for key, value in ast.items():
142
+ ast[key] = placeholders.get(key, value)
143
+ example_output = parser.render(ast)
144
+
145
+ # Combine preamble and example output
146
+ instructions = f"{preamble}\n\nExample format:\n\n{example_output}"
147
+ return instructions
148
+
149
+ @classmethod
150
+ def from_ast(cls, ast):
151
+ """
152
+ Converts an AST into a model instance.
153
+ """
154
+ # Since TatSu produces dicts, we can convert the AST dict to the model
155
+ return cls(**ast)
156
+
157
+ def to_ast(self):
158
+ """
159
+ Converts the model instance into an AST (dict).
160
+ """
161
+ # Since TatSu expects dicts for rendering, we can use the model's dict
162
+ return self.dict()
163
+
164
+ @classmethod
165
+ def from_string(cls, input_string: str) -> "CodeFileTool":
166
+ """Parse a string into a CodeFileTool object, using the TEMPLATE."""
167
+ parsed_data = cls.parse(input_string)
168
+ if parsed_data:
169
+ return cls(**parsed_data)
170
+ raise ValueError("Invalid input string format")
171
+
172
+ def to_string(self) -> str:
173
+ """Convert a CodeFileTool object to a string, using the TEMPLATE."""
174
+ return self.format()
175
+
176
+ @classmethod
177
+ def find_candidates(cls, s: str) -> List[str]:
178
+ """
179
+ Finds all substrings in `s` that start with start_marker and end with end_marker.
180
+ """
181
+ start = "<spec>" # TODO get from TOOL_BEGIN, TOOL_END
182
+ end = "</spec>"
183
+ candidates = []
184
+ start_len = len(start)
185
+ end_len = len(end)
186
+ index = 0
187
+ while index < len(s):
188
+ start_index = s.find(start, index)
189
+ if start_index == -1:
190
+ break
191
+ end_index = s.find(end, start_index + start_len)
192
+ if end_index == -1:
193
+ break
194
+ candidate = s[start_index : end_index + end_len]
195
+ # Attempt to parse the candidate to ensure it's valid
196
+ try:
197
+ cls.parse(candidate)
198
+ candidates.append(candidate)
199
+ except tatsu.exceptions.ParseException:
200
+ # Ignore invalid candidates
201
+ pass
202
+ index = end_index + end_len
203
+ return candidates
204
+
205
+ def __str__(self):
206
+ return self.to_string()
207
+
208
+ # def __repr__(self) -> str:
209
+ # class_name = self.__class__.__name__
210
+ # attributes = []
211
+ # for key, value in self.__dict__.items():
212
+ # if not key.startswith('_'): # Skip private attributes
213
+ # if isinstance(value, str):
214
+ # # Escape quotes and newlines in string values
215
+ # value_repr = f"'{value.replace('\\', '\\\\').replace(\"'\", \"\\'\").replace('\\n', '\\n')}'"
216
+ # else:
217
+ # value_repr = repr(value)
218
+ # attributes.append(f"{key}={value_repr}")
219
+ # return f"{class_name}({', '.join(attributes)})"
220
+
221
+
222
+ if __name__ == "__main__":
223
+ # Example subclass
224
+
225
+ class MyTool(GenericTool):
226
+ request: str = "my_tool"
227
+ purpose: str = "do something"
228
+ value: int
229
+
230
+ @classmethod
231
+ def tool_grammar(cls) -> str:
232
+ return """
233
+ tool = "value:" value:number
234
+ {:
235
+ "value:" {{value}}
236
+ :}
237
+ ;
238
+
239
+ number = /\\d+/
240
+ ;
241
+ """
242
+
243
+ my_tool = MyTool(value=42)
244
+
245
+ # Generate the string from the instance using the grammar
246
+ generated_string = my_tool.format()
247
+ print("Formatted string:", generated_string)
248
+
249
+ # Parse the string back into an instance using the grammar
250
+ parsed_instance = MyTool.parse(generated_string)
251
+ print("Parsed instance:", parsed_instance)
252
+ print("Parsed value:", parsed_instance.value)
253
+
254
+ # Extended example
255
+ class ExtendedModel(GrammarBasedModel):
256
+ request: str
257
+ user_id: int
258
+ action: str
259
+ details: str
260
+
261
+ @classmethod
262
+ def rest_grammar(cls) -> str:
263
+ return """
264
+ rest = user_id:number ws action:word ws details:text ;
265
+
266
+ number = /\d+/ ;
267
+ text = /.+/ ;
268
+ """
269
+
270
+ input_string_ext = "<spec> user_update 42 delete Account deletion</spec>"
271
+ extended_instance = ExtendedModel.parse(input_string_ext)
272
+ print("Parsed extended instance:", extended_instance)
273
+
274
+ generated_string_ext = extended_instance.generate()
275
+ print("Generated extended string:", generated_string_ext)
@@ -0,0 +1,132 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ from lark import Lark, Token, Tree
4
+ from lark.reconstruct import Reconstructor
5
+
6
+ from langroid.pydantic_v1 import BaseModel
7
+
8
+
9
+ class GrammarBasedModel(BaseModel, ABC):
10
+ _parse_tree: Tree = None # Store the parse tree
11
+ _parser: Lark = None # Store the parser instance
12
+
13
+ @classmethod
14
+ @abstractmethod
15
+ def get_grammar(cls) -> str:
16
+ """
17
+ Subclasses must implement this method to return their grammar as a string.
18
+ """
19
+ pass
20
+
21
+ @classmethod
22
+ def get_token_field_mapping(cls):
23
+ """
24
+ Returns a mapping from token types to model field names. Subclasses can override this
25
+ if their token types and field names differ.
26
+ """
27
+ return {}
28
+
29
+ @classmethod
30
+ def parse(cls, text: str):
31
+ """
32
+ Parse the input text using the grammar to create an instance of the model.
33
+ """
34
+ parser = Lark(cls.get_grammar(), parser="lalr", propagate_positions=True)
35
+ tree = parser.parse(text)
36
+ model_instance = cls.from_tree(tree)
37
+ model_instance._parse_tree = tree # Store the parse tree in the instance
38
+ model_instance._parser = parser # Store the parser in the instance
39
+ return model_instance
40
+
41
+ @classmethod
42
+ def from_tree(cls, tree: Tree):
43
+ """
44
+ Convert a parse tree into a model instance.
45
+ """
46
+ data = cls.tree_to_dict(tree)
47
+ return cls(**data)
48
+
49
+ @classmethod
50
+ def tree_to_dict(cls, tree: Tree):
51
+ """
52
+ Recursively convert a parse tree into a dictionary of field values.
53
+ """
54
+ data = {}
55
+ token_field_mapping = cls.get_token_field_mapping()
56
+ for child in tree.children:
57
+ if isinstance(child, Tree):
58
+ data.update(cls.tree_to_dict(child))
59
+ elif isinstance(child, Token):
60
+ token_type = child.type
61
+ field_name = token_field_mapping.get(token_type, token_type.lower())
62
+ data[field_name] = child.value
63
+ return data
64
+
65
+ def generate(self) -> str:
66
+ """
67
+ Generate a string representation of the model instance using the grammar.
68
+ """
69
+ if self._parse_tree is None or self._parser is None:
70
+ raise ValueError("Cannot generate text without parsing first.")
71
+ # Update the parse tree with current model data
72
+ self.update_tree_with_model_data(self._parse_tree)
73
+ reconstructor = Reconstructor(self._parser)
74
+ text = reconstructor.reconstruct(self._parse_tree)
75
+ return text
76
+
77
+ def update_tree_with_model_data(self, tree: Tree):
78
+ """
79
+ Update the parse tree with the current model data.
80
+ """
81
+ token_field_mapping = self.get_token_field_mapping()
82
+ reverse_mapping = {v: k for k, v in token_field_mapping.items()}
83
+ for idx, child in enumerate(tree.children):
84
+ if isinstance(child, Tree):
85
+ self.update_tree_with_model_data(child)
86
+ elif isinstance(child, Token):
87
+ field_name = token_field_mapping.get(child.type, child.type.lower())
88
+ if hasattr(self, field_name):
89
+ new_value = getattr(self, field_name)
90
+ tree.children[idx] = Token(child.type, str(new_value))
91
+
92
+
93
+ # Example subclass
94
+ class MyModel(GrammarBasedModel):
95
+ name: str
96
+ age: int
97
+
98
+ @classmethod
99
+ def get_grammar(cls):
100
+ return """
101
+ start: "name:" NAME "age:" NUMBER
102
+ %import common.CNAME -> NAME
103
+ %import common.INT -> NUMBER
104
+ %import common.WS
105
+ %ignore WS
106
+ """
107
+
108
+ @classmethod
109
+ def get_token_field_mapping(cls):
110
+ return {
111
+ "NAME": "name",
112
+ "NUMBER": "age",
113
+ }
114
+
115
+
116
+ # Usage example
117
+ if __name__ == "__main__":
118
+ text = "name: Alice age: 30"
119
+ model = MyModel.parse(text)
120
+ print("Parsed Model:", model)
121
+
122
+ # Generate string from the model
123
+ generated_text = model.generate()
124
+ print("Generated Text:", generated_text)
125
+
126
+ # Modify the model
127
+ model.age = 31
128
+ model.name = "Bob"
129
+
130
+ # Generate updated string
131
+ updated_text = model.generate()
132
+ print("Updated Generated Text:", updated_text)
@@ -0,0 +1,128 @@
1
+ from pydantic import BaseModel
2
+ from abc import ABC, abstractmethod
3
+ from lark import Lark, Tree, Token
4
+ from lark.reconstruct import Reconstructor
5
+
6
+ class GrammarBasedModel(BaseModel, ABC):
7
+ _parse_tree: Tree = None # Store the parse tree
8
+ _parser: Lark = None # Store the parser instance
9
+
10
+ @classmethod
11
+ @abstractmethod
12
+ def get_grammar(cls) -> str:
13
+ """
14
+ Subclasses must implement this method to return their grammar as a string.
15
+ """
16
+ pass
17
+
18
+ @classmethod
19
+ def get_token_field_mapping(cls):
20
+ """
21
+ Returns a mapping from token types to model field names. Subclasses can override this
22
+ if their token types and field names differ.
23
+ """
24
+ return {}
25
+
26
+ @classmethod
27
+ def parse(cls, text: str):
28
+ """
29
+ Parse the input text using the grammar to create an instance of the model.
30
+ """
31
+ parser = Lark(cls.get_grammar(), parser='lalr', propagate_positions=True)
32
+ tree = parser.parse(text)
33
+ model_instance = cls.from_tree(tree)
34
+ model_instance._parse_tree = tree # Store the parse tree in the instance
35
+ model_instance._parser = parser # Store the parser in the instance
36
+ return model_instance
37
+
38
+ @classmethod
39
+ def from_tree(cls, tree: Tree):
40
+ """
41
+ Convert a parse tree into a model instance.
42
+ """
43
+ data = cls.tree_to_dict(tree)
44
+ return cls(**data)
45
+
46
+ @classmethod
47
+ def tree_to_dict(cls, tree: Tree):
48
+ """
49
+ Recursively convert a parse tree into a dictionary of field values.
50
+ """
51
+ data = {}
52
+ token_field_mapping = cls.get_token_field_mapping()
53
+ for child in tree.children:
54
+ if isinstance(child, Tree):
55
+ data.update(cls.tree_to_dict(child))
56
+ elif isinstance(child, Token):
57
+ token_type = child.type
58
+ field_name = token_field_mapping.get(token_type, token_type.lower())
59
+ data[field_name] = child.value
60
+ return data
61
+
62
+ def generate(self) -> str:
63
+ """
64
+ Generate a string representation of the model instance using the grammar.
65
+ """
66
+ if self._parse_tree is None or self._parser is None:
67
+ raise ValueError("Cannot generate text without parsing first.")
68
+ # Update the parse tree with current model data
69
+ self.update_tree_with_model_data(self._parse_tree)
70
+ reconstructor = Reconstructor(self._parser)
71
+ text = reconstructor.reconstruct(self._parse_tree)
72
+ return text
73
+
74
+ def update_tree_with_model_data(self, tree: Tree):
75
+ """
76
+ Update the parse tree with the current model data.
77
+ """
78
+ token_field_mapping = self.get_token_field_mapping()
79
+ reverse_mapping = {v: k for k, v in token_field_mapping.items()}
80
+ for idx, child in enumerate(tree.children):
81
+ if isinstance(child, Tree):
82
+ self.update_tree_with_model_data(child)
83
+ elif isinstance(child, Token):
84
+ field_name = token_field_mapping.get(child.type, child.type.lower())
85
+ if hasattr(self, field_name):
86
+ new_value = getattr(self, field_name)
87
+ tree.children[idx] = Token(child.type, str(new_value))
88
+
89
+ # Example subclass
90
+ class MyModel(GrammarBasedModel):
91
+ name: str
92
+ age: int
93
+
94
+ @classmethod
95
+ def get_grammar(cls):
96
+ return """
97
+ start: "name:" NAME "age:" NUMBER
98
+ %import common.CNAME -> NAME
99
+ %import common.INT -> NUMBER
100
+ %import common.WS
101
+ %ignore WS
102
+ """
103
+
104
+ @classmethod
105
+ def get_token_field_mapping(cls):
106
+ return {
107
+ 'NAME': 'name',
108
+ 'NUMBER': 'age',
109
+ }
110
+
111
+ # Usage example
112
+ if __name__ == "__main__":
113
+ text = "name: Alice age: 30"
114
+ model = MyModel.parse(text)
115
+ print("Parsed Model:", model)
116
+
117
+ # Generate string from the model
118
+ generated_text = model.generate()
119
+ print("Generated Text:", generated_text)
120
+
121
+ # Modify the model
122
+ model.age = 31
123
+ model.name = "Bob"
124
+
125
+ # Generate updated string
126
+ updated_text = model.generate()
127
+ print("Updated Generated Text:", updated_text)
128
+
@@ -0,0 +1,156 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Dict
3
+
4
+ from lark import Lark, Transformer, Visitor
5
+
6
+ from langroid.pydantic_v1 import BaseModel
7
+
8
+
9
+ class GrammarBasedModel(BaseModel, ABC):
10
+ @classmethod
11
+ @abstractmethod
12
+ def grammar(cls) -> str:
13
+ pass
14
+
15
+ @classmethod
16
+ @abstractmethod
17
+ def start_rule(cls) -> str:
18
+ pass
19
+
20
+ @classmethod
21
+ @abstractmethod
22
+ def field_mappings(cls) -> Dict[str, str]:
23
+ pass
24
+
25
+ @classmethod
26
+ def parse(cls, text: str) -> "GrammarBasedModel":
27
+ parser = Lark(cls.grammar(), start=cls.start_rule())
28
+ tree = parser.parse(text)
29
+
30
+ class TreeToDict(Transformer):
31
+ def __init__(self, field_mappings):
32
+ self.field_mappings = field_mappings
33
+
34
+ def __default__(self, data, children, meta):
35
+ for field, rule in self.field_mappings.items():
36
+ if data == rule:
37
+ return {field: children[0]}
38
+ return children
39
+
40
+ def start(self, items):
41
+ result = {}
42
+ for item in items:
43
+ if isinstance(item, dict):
44
+ result.update(item)
45
+ return result
46
+
47
+ transformer = TreeToDict(cls.field_mappings())
48
+ data = transformer.transform(tree)
49
+ return cls(**data)
50
+
51
+ def generate(self) -> str:
52
+ parser = Lark(self.grammar(), start=self.start_rule())
53
+
54
+ class ModelToString(Visitor):
55
+ def __init__(self, model):
56
+ self.model = model
57
+ self.result = []
58
+
59
+ def __default__(self, tree):
60
+ if tree.data in self.model.field_mappings().values():
61
+ field = next(
62
+ k
63
+ for k, v in self.model.field_mappings().items()
64
+ if v == tree.data
65
+ )
66
+ value = getattr(self.model, field)
67
+ self.result.append(f"{' '.join(tree.children)} {value}")
68
+ else:
69
+ for child in tree.children:
70
+ if isinstance(child, str):
71
+ self.result.append(child)
72
+ else:
73
+ self.visit(child)
74
+
75
+ visitor = ModelToString(self)
76
+ tree = parser.parse(" ".join(self.grammar().split()))
77
+ visitor.visit(tree)
78
+ return " ".join(visitor.result)
79
+
80
+
81
+ class PersonSpec(GrammarBasedModel):
82
+ name: str
83
+ age: int
84
+ city: str
85
+
86
+ @classmethod
87
+ def grammar(cls):
88
+ return """
89
+ start: "<spec>" name age city "</spec>"
90
+ name: "name:" WORD
91
+ age: "age" "is" NUMBER
92
+ city: "lives" "in" WORD
93
+ %import common.WORD
94
+ %import common.NUMBER
95
+ %import common.WS
96
+ %ignore WS
97
+ """
98
+
99
+ @classmethod
100
+ def start_rule(cls):
101
+ return "start"
102
+
103
+ @classmethod
104
+ def field_mappings(cls):
105
+ return {"name": "name", "age": "age", "city": "city"}
106
+
107
+
108
+ if __name__ == "__main__":
109
+ # Test parsing
110
+ test_string = """
111
+ <spec>
112
+ name: John
113
+ age is 30
114
+ lives in Tokyo
115
+ </spec>
116
+ """
117
+ parsed_person = PersonSpec.parse(test_string)
118
+ print("Parsed person:", parsed_person)
119
+
120
+ # Test generating
121
+ new_person = PersonSpec(name="Alice", age=25, city="NewYork")
122
+ generated_string = new_person.generate()
123
+ print("\nGenerated string:")
124
+ print(generated_string)
125
+
126
+ # Test round-trip
127
+ round_trip_person = PersonSpec.parse(generated_string)
128
+ print("\nRound-trip parsed person:", round_trip_person)
129
+
130
+ assert new_person == round_trip_person, "Round-trip parsing failed"
131
+ print("\nRound-trip test passed!")
132
+
133
+ # Test with modified grammar
134
+ class ModifiedPersonSpec(PersonSpec):
135
+ @classmethod
136
+ def grammar(cls):
137
+ return """
138
+ start: "<person>" name age city "</person>"
139
+ name: "Name:" WORD
140
+ age: "Age:" NUMBER "years"
141
+ city: "City:" WORD
142
+ %import common.WORD
143
+ %import common.NUMBER
144
+ %import common.WS
145
+ %ignore WS
146
+ """
147
+
148
+ modified_person = ModifiedPersonSpec(name="Bob", age=40, city="London")
149
+ modified_string = modified_person.generate()
150
+ print("\nModified grammar generated string:")
151
+ print(modified_string)
152
+
153
+ parsed_modified = ModifiedPersonSpec.parse(modified_string)
154
+ print("Parsed modified person:", parsed_modified)
155
+ assert modified_person == parsed_modified, "Modified grammar round-trip failed"
156
+ print("Modified grammar round-trip test passed!")