langroid 0.16.7__py3-none-any.whl → 0.17.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. langroid/agent/base.py +45 -21
  2. langroid/agent/chat_agent.py +22 -14
  3. langroid/agent/chat_document.py +22 -13
  4. langroid/agent/tool_message.py +11 -11
  5. langroid/agent/tools/file_tools.py +234 -0
  6. langroid/agent/xml_tool_message.py +179 -45
  7. langroid/utils/constants.py +2 -0
  8. langroid/utils/git_utils.py +251 -0
  9. langroid/utils/system.py +78 -0
  10. {langroid-0.16.7.dist-info → langroid-0.17.1.dist-info}/METADATA +6 -3
  11. {langroid-0.16.7.dist-info → langroid-0.17.1.dist-info}/RECORD +14 -89
  12. pyproject.toml +3 -2
  13. langroid/agent/md_tool_message_grammar.py +0 -455
  14. langroid/agent/tools/code_file_tool_parse.py +0 -150
  15. langroid/agent/tools/code_file_tool_pyparsing.py +0 -194
  16. langroid/agent/tools/code_file_tool_pyparsing2.py +0 -199
  17. langroid/agent/tools/extract_tool.py +0 -96
  18. langroid/agent/tools/formatted_model_custom.py +0 -150
  19. langroid/agent/tools/formatted_model_custom2.py +0 -168
  20. langroid/agent/tools/formatted_model_custom3.py +0 -279
  21. langroid/agent/tools/formatted_model_custom4.py +0 -395
  22. langroid/agent/tools/formatted_model_jinja.py +0 -133
  23. langroid/agent/tools/formatted_model_jinja.py-e +0 -122
  24. langroid/agent/tools/formatted_model_jinja2.py +0 -145
  25. langroid/agent/tools/formatted_model_jinja2.py-e +0 -135
  26. langroid/agent/tools/formatted_model_lark.py +0 -0
  27. langroid/agent/tools/formatted_model_lark2.py +0 -168
  28. langroid/agent/tools/formatted_model_parse.py +0 -105
  29. langroid/agent/tools/formatted_model_parse.py-e +0 -98
  30. langroid/agent/tools/formatted_model_parse2.py +0 -113
  31. langroid/agent/tools/formatted_model_parse2.py-e +0 -109
  32. langroid/agent/tools/formatted_model_parse3.py +0 -114
  33. langroid/agent/tools/formatted_model_parse3.py-e +0 -110
  34. langroid/agent/tools/formatted_model_parsimon.py +0 -194
  35. langroid/agent/tools/formatted_model_parsimon.py-e +0 -186
  36. langroid/agent/tools/formatted_model_pyparsing.py +0 -169
  37. langroid/agent/tools/formatted_model_pyparsing.py-e +0 -149
  38. langroid/agent/tools/formatted_model_pyparsing2.py +0 -159
  39. langroid/agent/tools/formatted_model_pyparsing2.py-e +0 -143
  40. langroid/agent/tools/formatted_model_pyparsing3.py +0 -133
  41. langroid/agent/tools/formatted_model_pyparsing3.py-e +0 -121
  42. langroid/agent/tools/formatted_model_pyparsing4.py +0 -213
  43. langroid/agent/tools/formatted_model_pyparsing4.py-e +0 -176
  44. langroid/agent/tools/formatted_model_pyparsing5.py +0 -173
  45. langroid/agent/tools/formatted_model_pyparsing5.py-e +0 -142
  46. langroid/agent/tools/formatted_model_regex.py +0 -246
  47. langroid/agent/tools/formatted_model_regex.py-e +0 -248
  48. langroid/agent/tools/formatted_model_regex2.py +0 -250
  49. langroid/agent/tools/formatted_model_regex2.py-e +0 -253
  50. langroid/agent/tools/formatted_model_tatsu.py +0 -172
  51. langroid/agent/tools/formatted_model_tatsu.py-e +0 -160
  52. langroid/agent/tools/formatted_model_template.py +0 -217
  53. langroid/agent/tools/formatted_model_template.py-e +0 -200
  54. langroid/agent/tools/formatted_model_xml.py +0 -178
  55. langroid/agent/tools/formatted_model_xml2.py +0 -178
  56. langroid/agent/tools/formatted_model_xml3.py +0 -132
  57. langroid/agent/tools/formatted_model_xml4.py +0 -130
  58. langroid/agent/tools/formatted_model_xml5.py +0 -130
  59. langroid/agent/tools/formatted_model_xml6.py +0 -113
  60. langroid/agent/tools/formatted_model_xml7.py +0 -117
  61. langroid/agent/tools/formatted_model_xml8.py +0 -164
  62. langroid/agent/tools/generator_tool.py +0 -20
  63. langroid/agent/tools/generic_tool.py +0 -165
  64. langroid/agent/tools/generic_tool_tatsu.py +0 -275
  65. langroid/agent/tools/grammar_based_model.py +0 -132
  66. langroid/agent/tools/grammar_based_model.py-e +0 -128
  67. langroid/agent/tools/grammar_based_model_lark.py +0 -156
  68. langroid/agent/tools/grammar_based_model_lark.py-e +0 -153
  69. langroid/agent/tools/grammar_based_model_parse.py +0 -86
  70. langroid/agent/tools/grammar_based_model_parse.py-e +0 -80
  71. langroid/agent/tools/grammar_based_model_parsimonious.py +0 -129
  72. langroid/agent/tools/grammar_based_model_parsimonious.py-e +0 -120
  73. langroid/agent/tools/grammar_based_model_pyparsing.py +0 -105
  74. langroid/agent/tools/grammar_based_model_pyparsing.py-e +0 -103
  75. langroid/agent/tools/grammar_based_model_regex.py +0 -139
  76. langroid/agent/tools/grammar_based_model_regex.py-e +0 -130
  77. langroid/agent/tools/grammar_based_model_regex2.py +0 -124
  78. langroid/agent/tools/grammar_based_model_regex2.py-e +0 -116
  79. langroid/agent/tools/grammar_based_model_tatsu.py +0 -80
  80. langroid/agent/tools/grammar_based_model_tatsu.py-e +0 -77
  81. langroid/agent/tools/lark_earley_example.py +0 -135
  82. langroid/agent/tools/lark_earley_example.py-e +0 -117
  83. langroid/agent/tools/lark_example.py +0 -72
  84. langroid/agent/tools/note_tool.py +0 -0
  85. langroid/agent/tools/parse_example.py +0 -76
  86. langroid/agent/tools/parse_example2.py +0 -87
  87. langroid/agent/tools/parse_example3.py +0 -42
  88. langroid/agent/tools/parse_test.py +0 -791
  89. langroid/agent/tools/run_python_code.py +0 -60
  90. {langroid-0.16.7.dist-info → langroid-0.17.1.dist-info}/LICENSE +0 -0
  91. {langroid-0.16.7.dist-info → langroid-0.17.1.dist-info}/WHEEL +0 -0
@@ -1,165 +0,0 @@
1
- from abc import abstractmethod
2
- from typing import Any, Dict, List
3
-
4
- from pyparsing import (
5
- LineEnd,
6
- Literal,
7
- ParserElement,
8
- )
9
-
10
- from langroid.agent.tool_message import ToolMessage
11
-
12
-
13
- class GenericTool(ToolMessage):
14
- """
15
- Abstract class for a tool whose format is defined by a grammar,
16
- and not necessarily JSON-based.
17
- Especially useful for tools where we need an LLM to return code.
18
- Most LLMs, especially weaker ones, have significant issues
19
- (related to unescaped newlines, quotes, etc) when returning code within JSON.
20
- """
21
-
22
- @classmethod
23
- @abstractmethod
24
- def define_grammar(cls):
25
- """Define the grammar for the specific tool."""
26
- pass
27
-
28
- @classmethod
29
- def create_parser(cls):
30
- """Create a parser based on the defined grammar."""
31
- grammar = cls.define_grammar()
32
- # Use the grammar to create and return a parser
33
- return grammar
34
-
35
- @classmethod
36
- def parse(cls, string) -> Dict[str, Any]:
37
- parser = cls.create_parser()
38
- try:
39
- result = parser.parseString(string, parseAll=True)
40
- return {
41
- name: result[name]
42
- for name in result.keys()
43
- if name and not name.startswith("_")
44
- }
45
- except Exception as e:
46
- print(f"Parsing failed: {e}")
47
- return {}
48
-
49
- @classmethod
50
- def instructions(cls) -> str:
51
- preamble = "Preamble:\n"
52
- for field, field_info in cls.__dict__["__fields__"].items():
53
- preamble += (
54
- f"<{field}> denotes the value of the `{field}` field "
55
- f"(type: {field_info.type_})\n"
56
- )
57
-
58
- parser = cls.create_parser()
59
-
60
- def format_element(element):
61
- if isinstance(element, Literal):
62
- return element.match
63
- elif hasattr(element, "resultsName") and element.resultsName:
64
- return f"<{element.resultsName}>"
65
- elif isinstance(element, LineEnd):
66
- return "\n"
67
- return ""
68
-
69
- def traverse_parser(parser_element):
70
- if isinstance(parser_element, ParserElement):
71
- if hasattr(parser_element, "exprs"):
72
- return "".join(
73
- traverse_parser(expr) for expr in parser_element.exprs
74
- )
75
- else:
76
- return format_element(parser_element)
77
- return str(parser_element)
78
-
79
- template = traverse_parser(parser)
80
-
81
- return f"{preamble}\nFormatted Example:\n{template.strip()}"
82
-
83
- @classmethod
84
- def parse(cls, string) -> Dict[str, Any]:
85
- parser = cls.create_parser()
86
- try:
87
- result = parser.parseString(string, parseAll=True)
88
- return {
89
- name: result[name]
90
- for name in result.keys()
91
- if name and not name.startswith("_")
92
- }
93
- except Exception as e:
94
- print(f"Parsing failed: {e}")
95
- return {}
96
-
97
- @classmethod
98
- def format(cls, instance) -> str:
99
- parser = cls.create_parser()
100
-
101
- def format_element(element):
102
- if isinstance(element, Literal):
103
- return element.match
104
- elif hasattr(element, "resultsName") and element.resultsName:
105
- return getattr(instance, element.resultsName, "")
106
- elif isinstance(element, LineEnd):
107
- return "\n"
108
- return ""
109
-
110
- def traverse_parser(parser_element):
111
- if isinstance(parser_element, ParserElement):
112
- if hasattr(parser_element, "exprs"):
113
- return "".join(
114
- traverse_parser(expr) for expr in parser_element.exprs
115
- )
116
- else:
117
- return format_element(parser_element)
118
- return str(parser_element)
119
-
120
- formatted_string = traverse_parser(parser)
121
-
122
- return formatted_string.strip()
123
-
124
- @classmethod
125
- def from_string(cls, input_string: str) -> "CodeFileTool":
126
- """Parse a string into a CodeFileTool object, using the TEMPLATE."""
127
- parsed_data = cls.parse(input_string)
128
- if parsed_data:
129
- return cls(**parsed_data)
130
- raise ValueError("Invalid input string format")
131
-
132
- @classmethod
133
- def to_string(cls, instance) -> str:
134
- """Convert a CodeFileTool object to a string, using the TEMPLATE."""
135
- return cls.format(instance)
136
-
137
- @classmethod
138
- def find_candidates(cls, input_str: str) -> List[str]:
139
- """
140
- Find all possible (top-level) candidates for
141
- CodeFileTool in the input string.
142
- """
143
- parser = cls.create_parser()
144
- candidates = []
145
-
146
- for tokens, start, end in parser.scanString(input_str):
147
- candidates.append(input_str[start:end])
148
-
149
- return candidates
150
-
151
- def __str__(self):
152
- return self.to_string()
153
-
154
- # def __repr__(self) -> str:
155
- # class_name = self.__class__.__name__
156
- # attributes = []
157
- # for key, value in self.__dict__.items():
158
- # if not key.startswith('_'): # Skip private attributes
159
- # if isinstance(value, str):
160
- # # Escape quotes and newlines in string values
161
- # value_repr = f"'{value.replace('\\', '\\\\').replace(\"'\", \"\\'\").replace('\\n', '\\n')}'"
162
- # else:
163
- # value_repr = repr(value)
164
- # attributes.append(f"{key}={value_repr}")
165
- # return f"{class_name}({', '.join(attributes)})"
@@ -1,275 +0,0 @@
1
- from abc import abstractmethod
2
- from typing import List
3
-
4
- import tatsu
5
-
6
- from langroid.agent.tool_message import ToolMessage
7
-
8
-
9
- class GenericTool(ToolMessage):
10
- """
11
- Abstract class for a tool whose format is defined by a grammar,
12
- and not necessarily JSON-based.
13
- Especially useful for tools where we need an LLM to return code.
14
- Most LLMs, especially weaker ones, have significant issues
15
- (related to unescaped newlines, quotes, etc) when returning code within JSON.
16
- """
17
-
18
- @classmethod
19
- @abstractmethod
20
- def tool_grammar(cls) -> str:
21
- """Define the grammar for the `tool` rule"""
22
- pass
23
-
24
- @classmethod
25
- def grammar(cls) -> str:
26
- """
27
- Full grammar, including templates for rendering.
28
- """
29
- base_grammar = """
30
- @@grammar :: CombinedGrammar
31
- @@whitespace :: /[ \\t]+/
32
- @@nameguard :: False
33
-
34
- start
35
- =
36
- "<spec>" ws?
37
- request:word ws?
38
- tool
39
- ws? "</spec>"
40
- {:
41
- "<spec> " {{request}} {{tool}} " </spec>"
42
- :}
43
- ;
44
-
45
- ws = /[\\s]+/ ;
46
-
47
- word = /[^\\s<>/]+/ ;
48
- """
49
- full_grammar = base_grammar + "\n" + cls.tool_grammar()
50
- return full_grammar
51
-
52
- @classmethod
53
- def parse(cls, s: str):
54
- """
55
- Parses a string `s` using the grammar and returns an instance of the subclass.
56
- """
57
- # Build the parser using the provided grammar with model generation
58
- parser = tatsu.compile(cls.grammar(), asmodel=True)
59
-
60
- # Parse the input string to get a model object
61
- model = parser.parse(s)
62
-
63
- # Convert the model to a dict, filtering only the expected fields
64
- data = {k: getattr(model, k) for k in cls.__fields__ if hasattr(model, k)}
65
-
66
- # Create an instance of the subclass with the parsed data
67
- model_instance = cls(**data)
68
- return model_instance
69
-
70
- def format(self) -> str:
71
- """
72
- Generates a string representation of the instance based on the grammar.
73
- """
74
- # Build the parser using the provided grammar with model generation
75
- parser = tatsu.compile(self.grammar(), asmodel=True)
76
-
77
- # Create a model instance
78
- model_class = parser.model()
79
- model = model_class()
80
-
81
- # Set attributes from the instance, excluding fields not in the grammar
82
- for field in self.__fields__:
83
- if field == "purpose":
84
- continue # Exclude 'purpose' from rendering
85
- setattr(model, field, getattr(self, field))
86
-
87
- # Render the model back to text using the grammar's templates
88
- generated_string = model.render()
89
- return generated_string
90
-
91
- @classmethod
92
- def instructions(cls) -> str:
93
- """
94
- Generates instructions for formatting an instance, including placeholders
95
- and an example output with placeholders.
96
- """
97
-
98
- def generate_placeholders(field, prefix=""):
99
- placeholders = {}
100
- if hasattr(field.type_, "__fields__"):
101
- # Nested model
102
- for sub_field_name, sub_field in field.type_.__fields__.items():
103
- placeholders.update(
104
- generate_placeholders(
105
- sub_field, prefix=f"{prefix}{field.name}."
106
- )
107
- )
108
- elif isinstance(field.type_, type) and issubclass(field.type_, list):
109
- # List field
110
- placeholders[field.name] = (
111
- f"[<{field.name}_item1>,<{field.name}_item2>,...]"
112
- )
113
- else:
114
- placeholders[field.name] = f"<{prefix}{field.name}>"
115
- return placeholders
116
-
117
- # Generate placeholders for all fields
118
- placeholders = {}
119
- for field_name, field in cls.__fields__.items():
120
- placeholders.update(generate_placeholders(field))
121
-
122
- # Build the preamble
123
- preamble_lines = ["Placeholders for formatting:"]
124
- for field_name, placeholder in placeholders.items():
125
- field_type = cls.__fields__[field_name].type_.__name__
126
- preamble_lines.append(
127
- f"- `{placeholder}`: placeholder for `{field_name}` field (type: `{field_type}`)"
128
- )
129
- preamble = "\n".join(preamble_lines)
130
-
131
- # Create a placeholder instance
132
- placeholder_values = {}
133
- for field_name in cls.__fields__:
134
- placeholder_values[field_name] = placeholders[field_name]
135
- placeholder_instance = cls(**placeholder_values)
136
-
137
- # Generate an example output with placeholders
138
- parser = tatsu.compile(cls.grammar())
139
- ast = placeholder_instance.to_ast()
140
- # Use the placeholders in the AST
141
- for key, value in ast.items():
142
- ast[key] = placeholders.get(key, value)
143
- example_output = parser.render(ast)
144
-
145
- # Combine preamble and example output
146
- instructions = f"{preamble}\n\nExample format:\n\n{example_output}"
147
- return instructions
148
-
149
- @classmethod
150
- def from_ast(cls, ast):
151
- """
152
- Converts an AST into a model instance.
153
- """
154
- # Since TatSu produces dicts, we can convert the AST dict to the model
155
- return cls(**ast)
156
-
157
- def to_ast(self):
158
- """
159
- Converts the model instance into an AST (dict).
160
- """
161
- # Since TatSu expects dicts for rendering, we can use the model's dict
162
- return self.dict()
163
-
164
- @classmethod
165
- def from_string(cls, input_string: str) -> "CodeFileTool":
166
- """Parse a string into a CodeFileTool object, using the TEMPLATE."""
167
- parsed_data = cls.parse(input_string)
168
- if parsed_data:
169
- return cls(**parsed_data)
170
- raise ValueError("Invalid input string format")
171
-
172
- def to_string(self) -> str:
173
- """Convert a CodeFileTool object to a string, using the TEMPLATE."""
174
- return self.format()
175
-
176
- @classmethod
177
- def find_candidates(cls, s: str) -> List[str]:
178
- """
179
- Finds all substrings in `s` that start with start_marker and end with end_marker.
180
- """
181
- start = "<spec>" # TODO get from TOOL_BEGIN, TOOL_END
182
- end = "</spec>"
183
- candidates = []
184
- start_len = len(start)
185
- end_len = len(end)
186
- index = 0
187
- while index < len(s):
188
- start_index = s.find(start, index)
189
- if start_index == -1:
190
- break
191
- end_index = s.find(end, start_index + start_len)
192
- if end_index == -1:
193
- break
194
- candidate = s[start_index : end_index + end_len]
195
- # Attempt to parse the candidate to ensure it's valid
196
- try:
197
- cls.parse(candidate)
198
- candidates.append(candidate)
199
- except tatsu.exceptions.ParseException:
200
- # Ignore invalid candidates
201
- pass
202
- index = end_index + end_len
203
- return candidates
204
-
205
- def __str__(self):
206
- return self.to_string()
207
-
208
- # def __repr__(self) -> str:
209
- # class_name = self.__class__.__name__
210
- # attributes = []
211
- # for key, value in self.__dict__.items():
212
- # if not key.startswith('_'): # Skip private attributes
213
- # if isinstance(value, str):
214
- # # Escape quotes and newlines in string values
215
- # value_repr = f"'{value.replace('\\', '\\\\').replace(\"'\", \"\\'\").replace('\\n', '\\n')}'"
216
- # else:
217
- # value_repr = repr(value)
218
- # attributes.append(f"{key}={value_repr}")
219
- # return f"{class_name}({', '.join(attributes)})"
220
-
221
-
222
- if __name__ == "__main__":
223
- # Example subclass
224
-
225
- class MyTool(GenericTool):
226
- request: str = "my_tool"
227
- purpose: str = "do something"
228
- value: int
229
-
230
- @classmethod
231
- def tool_grammar(cls) -> str:
232
- return """
233
- tool = "value:" value:number
234
- {:
235
- "value:" {{value}}
236
- :}
237
- ;
238
-
239
- number = /\\d+/
240
- ;
241
- """
242
-
243
- my_tool = MyTool(value=42)
244
-
245
- # Generate the string from the instance using the grammar
246
- generated_string = my_tool.format()
247
- print("Formatted string:", generated_string)
248
-
249
- # Parse the string back into an instance using the grammar
250
- parsed_instance = MyTool.parse(generated_string)
251
- print("Parsed instance:", parsed_instance)
252
- print("Parsed value:", parsed_instance.value)
253
-
254
- # Extended example
255
- class ExtendedModel(GrammarBasedModel):
256
- request: str
257
- user_id: int
258
- action: str
259
- details: str
260
-
261
- @classmethod
262
- def rest_grammar(cls) -> str:
263
- return """
264
- rest = user_id:number ws action:word ws details:text ;
265
-
266
- number = /\d+/ ;
267
- text = /.+/ ;
268
- """
269
-
270
- input_string_ext = "<spec> user_update 42 delete Account deletion</spec>"
271
- extended_instance = ExtendedModel.parse(input_string_ext)
272
- print("Parsed extended instance:", extended_instance)
273
-
274
- generated_string_ext = extended_instance.generate()
275
- print("Generated extended string:", generated_string_ext)
@@ -1,132 +0,0 @@
1
- from abc import ABC, abstractmethod
2
-
3
- from lark import Lark, Token, Tree
4
- from lark.reconstruct import Reconstructor
5
-
6
- from langroid.pydantic_v1 import BaseModel
7
-
8
-
9
- class GrammarBasedModel(BaseModel, ABC):
10
- _parse_tree: Tree = None # Store the parse tree
11
- _parser: Lark = None # Store the parser instance
12
-
13
- @classmethod
14
- @abstractmethod
15
- def get_grammar(cls) -> str:
16
- """
17
- Subclasses must implement this method to return their grammar as a string.
18
- """
19
- pass
20
-
21
- @classmethod
22
- def get_token_field_mapping(cls):
23
- """
24
- Returns a mapping from token types to model field names. Subclasses can override this
25
- if their token types and field names differ.
26
- """
27
- return {}
28
-
29
- @classmethod
30
- def parse(cls, text: str):
31
- """
32
- Parse the input text using the grammar to create an instance of the model.
33
- """
34
- parser = Lark(cls.get_grammar(), parser="lalr", propagate_positions=True)
35
- tree = parser.parse(text)
36
- model_instance = cls.from_tree(tree)
37
- model_instance._parse_tree = tree # Store the parse tree in the instance
38
- model_instance._parser = parser # Store the parser in the instance
39
- return model_instance
40
-
41
- @classmethod
42
- def from_tree(cls, tree: Tree):
43
- """
44
- Convert a parse tree into a model instance.
45
- """
46
- data = cls.tree_to_dict(tree)
47
- return cls(**data)
48
-
49
- @classmethod
50
- def tree_to_dict(cls, tree: Tree):
51
- """
52
- Recursively convert a parse tree into a dictionary of field values.
53
- """
54
- data = {}
55
- token_field_mapping = cls.get_token_field_mapping()
56
- for child in tree.children:
57
- if isinstance(child, Tree):
58
- data.update(cls.tree_to_dict(child))
59
- elif isinstance(child, Token):
60
- token_type = child.type
61
- field_name = token_field_mapping.get(token_type, token_type.lower())
62
- data[field_name] = child.value
63
- return data
64
-
65
- def generate(self) -> str:
66
- """
67
- Generate a string representation of the model instance using the grammar.
68
- """
69
- if self._parse_tree is None or self._parser is None:
70
- raise ValueError("Cannot generate text without parsing first.")
71
- # Update the parse tree with current model data
72
- self.update_tree_with_model_data(self._parse_tree)
73
- reconstructor = Reconstructor(self._parser)
74
- text = reconstructor.reconstruct(self._parse_tree)
75
- return text
76
-
77
- def update_tree_with_model_data(self, tree: Tree):
78
- """
79
- Update the parse tree with the current model data.
80
- """
81
- token_field_mapping = self.get_token_field_mapping()
82
- reverse_mapping = {v: k for k, v in token_field_mapping.items()}
83
- for idx, child in enumerate(tree.children):
84
- if isinstance(child, Tree):
85
- self.update_tree_with_model_data(child)
86
- elif isinstance(child, Token):
87
- field_name = token_field_mapping.get(child.type, child.type.lower())
88
- if hasattr(self, field_name):
89
- new_value = getattr(self, field_name)
90
- tree.children[idx] = Token(child.type, str(new_value))
91
-
92
-
93
- # Example subclass
94
- class MyModel(GrammarBasedModel):
95
- name: str
96
- age: int
97
-
98
- @classmethod
99
- def get_grammar(cls):
100
- return """
101
- start: "name:" NAME "age:" NUMBER
102
- %import common.CNAME -> NAME
103
- %import common.INT -> NUMBER
104
- %import common.WS
105
- %ignore WS
106
- """
107
-
108
- @classmethod
109
- def get_token_field_mapping(cls):
110
- return {
111
- "NAME": "name",
112
- "NUMBER": "age",
113
- }
114
-
115
-
116
- # Usage example
117
- if __name__ == "__main__":
118
- text = "name: Alice age: 30"
119
- model = MyModel.parse(text)
120
- print("Parsed Model:", model)
121
-
122
- # Generate string from the model
123
- generated_text = model.generate()
124
- print("Generated Text:", generated_text)
125
-
126
- # Modify the model
127
- model.age = 31
128
- model.name = "Bob"
129
-
130
- # Generate updated string
131
- updated_text = model.generate()
132
- print("Updated Generated Text:", updated_text)
@@ -1,128 +0,0 @@
1
- from pydantic import BaseModel
2
- from abc import ABC, abstractmethod
3
- from lark import Lark, Tree, Token
4
- from lark.reconstruct import Reconstructor
5
-
6
- class GrammarBasedModel(BaseModel, ABC):
7
- _parse_tree: Tree = None # Store the parse tree
8
- _parser: Lark = None # Store the parser instance
9
-
10
- @classmethod
11
- @abstractmethod
12
- def get_grammar(cls) -> str:
13
- """
14
- Subclasses must implement this method to return their grammar as a string.
15
- """
16
- pass
17
-
18
- @classmethod
19
- def get_token_field_mapping(cls):
20
- """
21
- Returns a mapping from token types to model field names. Subclasses can override this
22
- if their token types and field names differ.
23
- """
24
- return {}
25
-
26
- @classmethod
27
- def parse(cls, text: str):
28
- """
29
- Parse the input text using the grammar to create an instance of the model.
30
- """
31
- parser = Lark(cls.get_grammar(), parser='lalr', propagate_positions=True)
32
- tree = parser.parse(text)
33
- model_instance = cls.from_tree(tree)
34
- model_instance._parse_tree = tree # Store the parse tree in the instance
35
- model_instance._parser = parser # Store the parser in the instance
36
- return model_instance
37
-
38
- @classmethod
39
- def from_tree(cls, tree: Tree):
40
- """
41
- Convert a parse tree into a model instance.
42
- """
43
- data = cls.tree_to_dict(tree)
44
- return cls(**data)
45
-
46
- @classmethod
47
- def tree_to_dict(cls, tree: Tree):
48
- """
49
- Recursively convert a parse tree into a dictionary of field values.
50
- """
51
- data = {}
52
- token_field_mapping = cls.get_token_field_mapping()
53
- for child in tree.children:
54
- if isinstance(child, Tree):
55
- data.update(cls.tree_to_dict(child))
56
- elif isinstance(child, Token):
57
- token_type = child.type
58
- field_name = token_field_mapping.get(token_type, token_type.lower())
59
- data[field_name] = child.value
60
- return data
61
-
62
- def generate(self) -> str:
63
- """
64
- Generate a string representation of the model instance using the grammar.
65
- """
66
- if self._parse_tree is None or self._parser is None:
67
- raise ValueError("Cannot generate text without parsing first.")
68
- # Update the parse tree with current model data
69
- self.update_tree_with_model_data(self._parse_tree)
70
- reconstructor = Reconstructor(self._parser)
71
- text = reconstructor.reconstruct(self._parse_tree)
72
- return text
73
-
74
- def update_tree_with_model_data(self, tree: Tree):
75
- """
76
- Update the parse tree with the current model data.
77
- """
78
- token_field_mapping = self.get_token_field_mapping()
79
- reverse_mapping = {v: k for k, v in token_field_mapping.items()}
80
- for idx, child in enumerate(tree.children):
81
- if isinstance(child, Tree):
82
- self.update_tree_with_model_data(child)
83
- elif isinstance(child, Token):
84
- field_name = token_field_mapping.get(child.type, child.type.lower())
85
- if hasattr(self, field_name):
86
- new_value = getattr(self, field_name)
87
- tree.children[idx] = Token(child.type, str(new_value))
88
-
89
- # Example subclass
90
- class MyModel(GrammarBasedModel):
91
- name: str
92
- age: int
93
-
94
- @classmethod
95
- def get_grammar(cls):
96
- return """
97
- start: "name:" NAME "age:" NUMBER
98
- %import common.CNAME -> NAME
99
- %import common.INT -> NUMBER
100
- %import common.WS
101
- %ignore WS
102
- """
103
-
104
- @classmethod
105
- def get_token_field_mapping(cls):
106
- return {
107
- 'NAME': 'name',
108
- 'NUMBER': 'age',
109
- }
110
-
111
- # Usage example
112
- if __name__ == "__main__":
113
- text = "name: Alice age: 30"
114
- model = MyModel.parse(text)
115
- print("Parsed Model:", model)
116
-
117
- # Generate string from the model
118
- generated_text = model.generate()
119
- print("Generated Text:", generated_text)
120
-
121
- # Modify the model
122
- model.age = 31
123
- model.name = "Bob"
124
-
125
- # Generate updated string
126
- updated_text = model.generate()
127
- print("Updated Generated Text:", updated_text)
128
-