langroid 0.16.5__py3-none-any.whl → 0.16.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. langroid/agent/md_tool_message_grammar.py +455 -0
  2. langroid/agent/tools/code_file_tool_parse.py +150 -0
  3. langroid/agent/tools/code_file_tool_pyparsing.py +194 -0
  4. langroid/agent/tools/code_file_tool_pyparsing2.py +199 -0
  5. langroid/agent/tools/formatted_model_custom.py +150 -0
  6. langroid/agent/tools/formatted_model_custom2.py +168 -0
  7. langroid/agent/tools/formatted_model_custom3.py +279 -0
  8. langroid/agent/tools/formatted_model_custom4.py +395 -0
  9. langroid/agent/tools/formatted_model_jinja.py +133 -0
  10. langroid/agent/tools/formatted_model_jinja.py-e +122 -0
  11. langroid/agent/tools/formatted_model_jinja2.py +145 -0
  12. langroid/agent/tools/formatted_model_jinja2.py-e +135 -0
  13. langroid/agent/tools/formatted_model_lark.py +0 -0
  14. langroid/agent/tools/formatted_model_lark2.py +168 -0
  15. langroid/agent/tools/formatted_model_parse.py +105 -0
  16. langroid/agent/tools/formatted_model_parse.py-e +98 -0
  17. langroid/agent/tools/formatted_model_parse2.py +113 -0
  18. langroid/agent/tools/formatted_model_parse2.py-e +109 -0
  19. langroid/agent/tools/formatted_model_parse3.py +114 -0
  20. langroid/agent/tools/formatted_model_parse3.py-e +110 -0
  21. langroid/agent/tools/formatted_model_parsimon.py +194 -0
  22. langroid/agent/tools/formatted_model_parsimon.py-e +186 -0
  23. langroid/agent/tools/formatted_model_pyparsing.py +169 -0
  24. langroid/agent/tools/formatted_model_pyparsing.py-e +149 -0
  25. langroid/agent/tools/formatted_model_pyparsing2.py +159 -0
  26. langroid/agent/tools/formatted_model_pyparsing2.py-e +143 -0
  27. langroid/agent/tools/formatted_model_pyparsing3.py +133 -0
  28. langroid/agent/tools/formatted_model_pyparsing3.py-e +121 -0
  29. langroid/agent/tools/formatted_model_pyparsing4.py +213 -0
  30. langroid/agent/tools/formatted_model_pyparsing4.py-e +176 -0
  31. langroid/agent/tools/formatted_model_pyparsing5.py +173 -0
  32. langroid/agent/tools/formatted_model_pyparsing5.py-e +142 -0
  33. langroid/agent/tools/formatted_model_regex.py +246 -0
  34. langroid/agent/tools/formatted_model_regex.py-e +248 -0
  35. langroid/agent/tools/formatted_model_regex2.py +250 -0
  36. langroid/agent/tools/formatted_model_regex2.py-e +253 -0
  37. langroid/agent/tools/formatted_model_tatsu.py +172 -0
  38. langroid/agent/tools/formatted_model_tatsu.py-e +160 -0
  39. langroid/agent/tools/formatted_model_template.py +217 -0
  40. langroid/agent/tools/formatted_model_template.py-e +200 -0
  41. langroid/agent/tools/formatted_model_xml.py +178 -0
  42. langroid/agent/tools/formatted_model_xml2.py +178 -0
  43. langroid/agent/tools/formatted_model_xml3.py +132 -0
  44. langroid/agent/tools/formatted_model_xml4.py +130 -0
  45. langroid/agent/tools/formatted_model_xml5.py +130 -0
  46. langroid/agent/tools/formatted_model_xml6.py +113 -0
  47. langroid/agent/tools/formatted_model_xml7.py +117 -0
  48. langroid/agent/tools/formatted_model_xml8.py +164 -0
  49. langroid/agent/tools/generic_tool.py +165 -0
  50. langroid/agent/tools/generic_tool_tatsu.py +275 -0
  51. langroid/agent/tools/grammar_based_model.py +132 -0
  52. langroid/agent/tools/grammar_based_model.py-e +128 -0
  53. langroid/agent/tools/grammar_based_model_lark.py +156 -0
  54. langroid/agent/tools/grammar_based_model_lark.py-e +153 -0
  55. langroid/agent/tools/grammar_based_model_parse.py +86 -0
  56. langroid/agent/tools/grammar_based_model_parse.py-e +80 -0
  57. langroid/agent/tools/grammar_based_model_parsimonious.py +129 -0
  58. langroid/agent/tools/grammar_based_model_parsimonious.py-e +120 -0
  59. langroid/agent/tools/grammar_based_model_pyparsing.py +105 -0
  60. langroid/agent/tools/grammar_based_model_pyparsing.py-e +103 -0
  61. langroid/agent/tools/grammar_based_model_regex.py +139 -0
  62. langroid/agent/tools/grammar_based_model_regex.py-e +130 -0
  63. langroid/agent/tools/grammar_based_model_regex2.py +124 -0
  64. langroid/agent/tools/grammar_based_model_regex2.py-e +116 -0
  65. langroid/agent/tools/grammar_based_model_tatsu.py +80 -0
  66. langroid/agent/tools/grammar_based_model_tatsu.py-e +77 -0
  67. langroid/agent/tools/lark_earley_example.py +135 -0
  68. langroid/agent/tools/lark_earley_example.py-e +117 -0
  69. langroid/agent/tools/lark_example.py +72 -0
  70. langroid/agent/tools/parse_example.py +76 -0
  71. langroid/agent/tools/parse_example2.py +87 -0
  72. langroid/agent/tools/parse_example3.py +42 -0
  73. langroid/agent/tools/parse_test.py +791 -0
  74. langroid/agent/xml_tool_message.py +106 -0
  75. langroid/language_models/openai_gpt.py +6 -1
  76. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/METADATA +1 -1
  77. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/RECORD +80 -6
  78. pyproject.toml +1 -1
  79. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/LICENSE +0 -0
  80. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/WHEEL +0 -0
@@ -0,0 +1,103 @@
1
+ from pydantic import BaseModel
2
+ from pyparsing import *
3
+ from abc import ABC, abstractmethod
4
+ from typing import Dict, Any
5
+
6
+ class GrammarBasedModel(BaseModel, ABC):
7
+ @classmethod
8
+ @abstractmethod
9
+ def grammar(cls) -> ParserElement:
10
+ pass
11
+
12
+ @classmethod
13
+ @abstractmethod
14
+ def start_token(cls) -> str:
15
+ pass
16
+
17
+ @classmethod
18
+ @abstractmethod
19
+ def end_token(cls) -> str:
20
+ pass
21
+
22
+ @classmethod
23
+ @abstractmethod
24
+ def field_mappings(cls) -> Dict[str, str]:
25
+ pass
26
+
27
+ @classmethod
28
+ def parse(cls, text: str) -> 'GrammarBasedModel':
29
+ full_grammar = (
30
+ Suppress(cls.start_token()) +
31
+ cls.grammar() +
32
+ Suppress(cls.end_token())
33
+ )
34
+ parsed = full_grammar.parseString(text, parseAll=True)
35
+ return cls(**{field: parsed[token][-1] for field, token in cls.field_mappings().items()})
36
+
37
+ def generate(self) -> str:
38
+ result = [self.start_token()]
39
+ for field, token in self.field_mappings().items():
40
+ value = getattr(self, field)
41
+ if token == "name":
42
+ result.append(f"name: {value}")
43
+ elif token == "age":
44
+ result.append(f"age is {value}")
45
+ elif token == "city":
46
+ result.append(f"lives in {value}")
47
+ result.append(self.end_token())
48
+ return "\n".join(result)
49
+
50
+ class PersonSpec(GrammarBasedModel):
51
+ name: str
52
+ age: int
53
+ city: str
54
+
55
+ @classmethod
56
+ def grammar(cls):
57
+ name = Group(Literal("name:") + Word(alphas))("name")
58
+ age = Group(Literal("age is") + Word(nums))("age")
59
+ city = Group(Literal("lives in") + Word(alphas))("city")
60
+ return name + age + city
61
+
62
+ @classmethod
63
+ def start_token(cls):
64
+ return "<spec>"
65
+
66
+ @classmethod
67
+ def end_token(cls):
68
+ return "</spec>"
69
+
70
+ @classmethod
71
+ def field_mappings(cls):
72
+ return {
73
+ "name": "name",
74
+ "age": "age",
75
+ "city": "city"
76
+ }
77
+
78
+ if __name__ == "__main__":
79
+ # Test parsing
80
+ test_string = """
81
+ <spec>
82
+ name: John
83
+ age is 30
84
+ lives in Tokyo
85
+ </spec>
86
+ """
87
+ parsed_person = PersonSpec.parse(test_string)
88
+ print("Parsed person:", parsed_person)
89
+
90
+ # Test generating
91
+ new_person = PersonSpec(name="Alice", age=25, city="NewYork")
92
+ generated_string = new_person.generate()
93
+ print("\nGenerated string:")
94
+ print(generated_string)
95
+
96
+ # Test round-trip
97
+ round_trip_person = PersonSpec.parse(generated_string)
98
+ print("\nRound-trip parsed person:", round_trip_person)
99
+
100
+ assert new_person == round_trip_person, "Round-trip parsing failed"
101
+ print("\nRound-trip test passed!")
102
+
103
+
@@ -0,0 +1,139 @@
1
+ import re
2
+ from abc import ABC, abstractmethod
3
+ from typing import Dict
4
+
5
+ from langroid.pydantic_v1 import BaseModel
6
+
7
+
8
+ class FormattingModel(BaseModel, ABC):
9
+ @classmethod
10
+ @abstractmethod
11
+ def format_spec(cls) -> str:
12
+ pass
13
+
14
+ @classmethod
15
+ @abstractmethod
16
+ def start_token(cls) -> str:
17
+ pass
18
+
19
+ @classmethod
20
+ @abstractmethod
21
+ def end_token(cls) -> str:
22
+ pass
23
+
24
+ @classmethod
25
+ @abstractmethod
26
+ def field_mappings(cls) -> Dict[str, str]:
27
+ pass
28
+
29
+ @classmethod
30
+ def _create_regex_pattern(cls) -> str:
31
+ spec = cls.format_spec()
32
+ for field, placeholder in cls.field_mappings().items():
33
+ spec = spec.replace(placeholder, f"(?P<{field}>.*?)")
34
+ pattern = (
35
+ f"{re.escape(cls.start_token())}\\s*{spec}\\s*{re.escape(cls.end_token())}"
36
+ )
37
+ return pattern
38
+
39
+ @classmethod
40
+ def parse(cls, text: str) -> "FormattingModel":
41
+ pattern = cls._create_regex_pattern()
42
+ match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
43
+ if match:
44
+ return cls(**{k: v.strip() for k, v in match.groupdict().items()})
45
+ raise ValueError(
46
+ f"Text does not match the expected format. Pattern: {pattern}, Text: {text}"
47
+ )
48
+
49
+ def generate(self) -> str:
50
+ content = self.format_spec()
51
+ # Remove \s* patterns
52
+ content = re.sub(r"\\s\*", " ", content)
53
+ for field, placeholder in self.field_mappings().items():
54
+ content = content.replace(placeholder, str(getattr(self, field)))
55
+ return f"{self.start_token()}\n{content}\n{self.end_token()}"
56
+
57
+
58
+ class PersonModel(FormattingModel):
59
+ name: str
60
+ age: int
61
+ city: str
62
+
63
+ @classmethod
64
+ def format_spec(cls) -> str:
65
+ return "name:\\s*{name}\\s*age is\\s*{age}\\s*lives in\\s*{city}"
66
+
67
+ @classmethod
68
+ def start_token(cls) -> str:
69
+ return "<spec>"
70
+
71
+ @classmethod
72
+ def end_token(cls) -> str:
73
+ return "</spec>"
74
+
75
+ @classmethod
76
+ def field_mappings(cls) -> Dict[str, str]:
77
+ return {"name": "{name}", "age": "{age}", "city": "{city}"}
78
+
79
+
80
+ def test_round_trip(model_class, input_string):
81
+ # Parse the input string
82
+ parsed_model = model_class.parse(input_string)
83
+ print(f"Parsed model: {parsed_model}")
84
+
85
+ # Generate a string from the parsed model
86
+ generated_string = parsed_model.generate()
87
+ print(f"Generated string:\n{generated_string}")
88
+
89
+ # Parse the generated string
90
+ reparsed_model = model_class.parse(generated_string)
91
+ print(f"Reparsed model: {reparsed_model}")
92
+
93
+ # Assert that the original parsed model and the reparsed model are equal
94
+ assert (
95
+ parsed_model == reparsed_model
96
+ ), "Round trip failed: original and reparsed models are not equal"
97
+
98
+ # Assert that all fields are present and have the correct types
99
+ for field, field_type in model_class.__annotations__.items():
100
+ assert hasattr(parsed_model, field), f"Field {field} is missing"
101
+ assert isinstance(
102
+ getattr(parsed_model, field), field_type
103
+ ), f"Field {field} has incorrect type"
104
+
105
+ print("Round trip test passed successfully!")
106
+
107
+
108
+ if __name__ == "__main__":
109
+ # Test case 1: Standard formatting
110
+ test_string1 = """
111
+ <spec>
112
+ name: John Doe
113
+ age is 30
114
+ lives in New York
115
+ </spec>
116
+ """
117
+ test_round_trip(PersonModel, test_string1)
118
+
119
+ print("\n" + "=" * 50 + "\n")
120
+
121
+ # Test case 2: Varying whitespace
122
+ test_string2 = "<spec>name: Alice \nage is 25 \nlives in Tokyo</spec>"
123
+ test_round_trip(PersonModel, test_string2)
124
+
125
+ print("\n" + "=" * 50 + "\n")
126
+
127
+ # Test case 3: Multiline values
128
+ test_string3 = """
129
+ <spec>
130
+ name: Bob
131
+ Smith
132
+ age is 40
133
+ lives in San
134
+ Francisco
135
+ </spec>
136
+ """
137
+ test_round_trip(PersonModel, test_string3)
138
+
139
+ print("All tests passed successfully!")
@@ -0,0 +1,130 @@
1
+ import re
2
+ from abc import ABC, abstractmethod
3
+ from typing import Dict, Any
4
+ from pydantic import BaseModel
5
+
6
+ class FormattingModel(BaseModel, ABC):
7
+ @classmethod
8
+ @abstractmethod
9
+ def format_spec(cls) -> str:
10
+ pass
11
+
12
+ @classmethod
13
+ @abstractmethod
14
+ def start_token(cls) -> str:
15
+ pass
16
+
17
+ @classmethod
18
+ @abstractmethod
19
+ def end_token(cls) -> str:
20
+ pass
21
+
22
+ @classmethod
23
+ @abstractmethod
24
+ def field_mappings(cls) -> Dict[str, str]:
25
+ pass
26
+
27
+ @classmethod
28
+ def _create_regex_pattern(cls) -> str:
29
+ spec = cls.format_spec()
30
+ for field, placeholder in cls.field_mappings().items():
31
+ spec = spec.replace(placeholder, f"(?P<{field}>.*?)")
32
+ pattern = f"{re.escape(cls.start_token())}\\s*{spec}\\s*{re.escape(cls.end_token())}"
33
+ return pattern
34
+
35
+ @classmethod
36
+ def parse(cls, text: str) -> 'FormattingModel':
37
+ pattern = cls._create_regex_pattern()
38
+ match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
39
+ if match:
40
+ return cls(**{k: v.strip() for k, v in match.groupdict().items()})
41
+ raise ValueError(f"Text does not match the expected format. Pattern: {pattern}, Text: {text}")
42
+
43
+ def generate(self) -> str:
44
+ content = self.format_spec()
45
+ # Remove \s* patterns
46
+ content = re.sub(r'\\s\*', ' ', content)
47
+ for field, placeholder in self.field_mappings().items():
48
+ content = content.replace(placeholder, str(getattr(self, field)))
49
+ return f"{self.start_token()}\n{content}\n{self.end_token()}"
50
+
51
+ class PersonModel(FormattingModel):
52
+ name: str
53
+ age: int
54
+ city: str
55
+
56
+ @classmethod
57
+ def format_spec(cls) -> str:
58
+ return "name:\\s*{name}\\s*age is\\s*{age}\\s*lives in\\s*{city}"
59
+
60
+ @classmethod
61
+ def start_token(cls) -> str:
62
+ return "<spec>"
63
+
64
+ @classmethod
65
+ def end_token(cls) -> str:
66
+ return "</spec>"
67
+
68
+ @classmethod
69
+ def field_mappings(cls) -> Dict[str, str]:
70
+ return {
71
+ "name": "{name}",
72
+ "age": "{age}",
73
+ "city": "{city}"
74
+ }
75
+
76
+ def test_round_trip(model_class, input_string):
77
+ # Parse the input string
78
+ parsed_model = model_class.parse(input_string)
79
+ print(f"Parsed model: {parsed_model}")
80
+
81
+ # Generate a string from the parsed model
82
+ generated_string = parsed_model.generate()
83
+ print(f"Generated string:\n{generated_string}")
84
+
85
+ # Parse the generated string
86
+ reparsed_model = model_class.parse(generated_string)
87
+ print(f"Reparsed model: {reparsed_model}")
88
+
89
+ # Assert that the original parsed model and the reparsed model are equal
90
+ assert parsed_model == reparsed_model, "Round trip failed: original and reparsed models are not equal"
91
+
92
+ # Assert that all fields are present and have the correct types
93
+ for field, field_type in model_class.__annotations__.items():
94
+ assert hasattr(parsed_model, field), f"Field {field} is missing"
95
+ assert isinstance(getattr(parsed_model, field), field_type), f"Field {field} has incorrect type"
96
+
97
+ print("Round trip test passed successfully!")
98
+
99
+ if __name__ == "__main__":
100
+ # Test case 1: Standard formatting
101
+ test_string1 = """
102
+ <spec>
103
+ name: John Doe
104
+ age is 30
105
+ lives in New York
106
+ </spec>
107
+ """
108
+ test_round_trip(PersonModel, test_string1)
109
+
110
+ print("\n" + "="*50 + "\n")
111
+
112
+ # Test case 2: Varying whitespace
113
+ test_string2 = "<spec>name: Alice \nage is 25 \nlives in Tokyo</spec>"
114
+ test_round_trip(PersonModel, test_string2)
115
+
116
+ print("\n" + "="*50 + "\n")
117
+
118
+ # Test case 3: Multiline values
119
+ test_string3 = """
120
+ <spec>
121
+ name: Bob
122
+ Smith
123
+ age is 40
124
+ lives in San
125
+ Francisco
126
+ </spec>
127
+ """
128
+ test_round_trip(PersonModel, test_string3)
129
+
130
+ print("All tests passed successfully!")
@@ -0,0 +1,124 @@
1
+ import re
2
+ from abc import ABC, abstractmethod
3
+
4
+ from langroid.pydantic_v1 import BaseModel
5
+
6
+
7
+ class FormattingModel(BaseModel, ABC):
8
+ @classmethod
9
+ @abstractmethod
10
+ def format_spec(cls) -> str:
11
+ pass
12
+
13
+ @classmethod
14
+ @abstractmethod
15
+ def start_token(cls) -> str:
16
+ pass
17
+
18
+ @classmethod
19
+ @abstractmethod
20
+ def end_token(cls) -> str:
21
+ pass
22
+
23
+ @classmethod
24
+ def _create_regex_pattern(cls) -> str:
25
+ spec = cls.format_spec()
26
+ # Replace {field_name} with (?P<field_name>.*?)
27
+ pattern = re.sub(r"\{(\w+)\}", lambda m: f"(?P<{m.group(1)}>.*?)", spec)
28
+ # Replace newlines with \s* to allow flexible whitespace
29
+ pattern = pattern.replace("\n", r"\s*")
30
+ return f"{re.escape(cls.start_token())}\\s*{pattern}\\s*{re.escape(cls.end_token())}"
31
+
32
+ @classmethod
33
+ def parse(cls, text: str) -> "FormattingModel":
34
+ pattern = cls._create_regex_pattern()
35
+ match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
36
+ if match:
37
+ return cls(**{k: v.strip() for k, v in match.groupdict().items()})
38
+ raise ValueError(
39
+ f"Text does not match the expected format. Pattern: {pattern}, Text: {text}"
40
+ )
41
+
42
+ def generate(self) -> str:
43
+ content = self.format_spec().format(**self.dict())
44
+ return f"{self.start_token()}\n{content}\n{self.end_token()}"
45
+
46
+
47
+ class PersonModel(FormattingModel):
48
+ name: str
49
+ age: int
50
+ city: str
51
+
52
+ @classmethod
53
+ def format_spec(cls) -> str:
54
+ return "name: {name}\nage is {age}\nlives in {city}"
55
+
56
+ @classmethod
57
+ def start_token(cls) -> str:
58
+ return "<spec>"
59
+
60
+ @classmethod
61
+ def end_token(cls) -> str:
62
+ return "</spec>"
63
+
64
+
65
+ def test_round_trip(model_class, input_string):
66
+ # Parse the input string
67
+ parsed_model = model_class.parse(input_string)
68
+ print(f"Parsed model: {parsed_model}")
69
+
70
+ # Generate a string from the parsed model
71
+ generated_string = parsed_model.generate()
72
+ print(f"Generated string:\n{generated_string}")
73
+
74
+ # Parse the generated string
75
+ reparsed_model = model_class.parse(generated_string)
76
+ print(f"Reparsed model: {reparsed_model}")
77
+
78
+ # Assert that the original parsed model and the reparsed model are equal
79
+ assert (
80
+ parsed_model == reparsed_model
81
+ ), "Round trip failed: original and reparsed models are not equal"
82
+
83
+ # Assert that all fields are present and have the correct types
84
+ for field, field_type in model_class.__annotations__.items():
85
+ assert hasattr(parsed_model, field), f"Field {field} is missing"
86
+ assert isinstance(
87
+ getattr(parsed_model, field), field_type
88
+ ), f"Field {field} has incorrect type"
89
+
90
+ print("Round trip test passed successfully!")
91
+
92
+
93
+ if __name__ == "__main__":
94
+ # Test case 1: Standard formatting
95
+ test_string1 = """
96
+ <spec>
97
+ name: John Doe
98
+ age is 30
99
+ lives in New York
100
+ </spec>
101
+ """
102
+ test_round_trip(PersonModel, test_string1)
103
+
104
+ print("\n" + "=" * 50 + "\n")
105
+
106
+ # Test case 2: Varying whitespace
107
+ test_string2 = "<spec>name: Alice \nage is 25 \nlives in Tokyo</spec>"
108
+ test_round_trip(PersonModel, test_string2)
109
+
110
+ print("\n" + "=" * 50 + "\n")
111
+
112
+ # Test case 3: Multiline values
113
+ test_string3 = """
114
+ <spec>
115
+ name: Bob
116
+ Smith
117
+ age is 40
118
+ lives in San
119
+ Francisco
120
+ </spec>
121
+ """
122
+ test_round_trip(PersonModel, test_string3)
123
+
124
+ print("All tests passed successfully!")
@@ -0,0 +1,116 @@
1
+ import re
2
+ from abc import ABC, abstractmethod
3
+ from typing import Dict, Any
4
+ from pydantic import BaseModel
5
+
6
+ class FormattingModel(BaseModel, ABC):
7
+ @classmethod
8
+ @abstractmethod
9
+ def format_spec(cls) -> str:
10
+ pass
11
+
12
+ @classmethod
13
+ @abstractmethod
14
+ def start_token(cls) -> str:
15
+ pass
16
+
17
+ @classmethod
18
+ @abstractmethod
19
+ def end_token(cls) -> str:
20
+ pass
21
+
22
+ @classmethod
23
+ def _create_regex_pattern(cls) -> str:
24
+ spec = cls.format_spec()
25
+ # Replace {field_name} with (?P<field_name>.*?)
26
+ pattern = re.sub(r'\{(\w+)\}', lambda m: f'(?P<{m.group(1)}>.*?)', spec)
27
+ # Replace newlines with \s* to allow flexible whitespace
28
+ pattern = pattern.replace('\n', r'\s*')
29
+ return f"{re.escape(cls.start_token())}\\s*{pattern}\\s*{re.escape(cls.end_token())}"
30
+
31
+ @classmethod
32
+ def parse(cls, text: str) -> 'FormattingModel':
33
+ pattern = cls._create_regex_pattern()
34
+ match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
35
+ if match:
36
+ return cls(**{k: v.strip() for k, v in match.groupdict().items()})
37
+ raise ValueError(f"Text does not match the expected format. Pattern: {pattern}, Text: {text}")
38
+
39
+ def generate(self) -> str:
40
+ content = self.format_spec().format(**self.dict())
41
+ return f"{self.start_token()}\n{content}\n{self.end_token()}"
42
+
43
+ class PersonModel(FormattingModel):
44
+ name: str
45
+ age: int
46
+ city: str
47
+
48
+ @classmethod
49
+ def format_spec(cls) -> str:
50
+ return "name: {name}\nage is {age}\nlives in {city}"
51
+
52
+ @classmethod
53
+ def start_token(cls) -> str:
54
+ return "<spec>"
55
+
56
+ @classmethod
57
+ def end_token(cls) -> str:
58
+ return "</spec>"
59
+
60
+ def test_round_trip(model_class, input_string):
61
+ # Parse the input string
62
+ parsed_model = model_class.parse(input_string)
63
+ print(f"Parsed model: {parsed_model}")
64
+
65
+ # Generate a string from the parsed model
66
+ generated_string = parsed_model.generate()
67
+ print(f"Generated string:\n{generated_string}")
68
+
69
+ # Parse the generated string
70
+ reparsed_model = model_class.parse(generated_string)
71
+ print(f"Reparsed model: {reparsed_model}")
72
+
73
+ # Assert that the original parsed model and the reparsed model are equal
74
+ assert parsed_model == reparsed_model, "Round trip failed: original and reparsed models are not equal"
75
+
76
+ # Assert that all fields are present and have the correct types
77
+ for field, field_type in model_class.__annotations__.items():
78
+ assert hasattr(parsed_model, field), f"Field {field} is missing"
79
+ assert isinstance(getattr(parsed_model, field), field_type), f"Field {field} has incorrect type"
80
+
81
+ print("Round trip test passed successfully!")
82
+
83
+ if __name__ == "__main__":
84
+ # Test case 1: Standard formatting
85
+ test_string1 = """
86
+ <spec>
87
+ name: John Doe
88
+ age is 30
89
+ lives in New York
90
+ </spec>
91
+ """
92
+ test_round_trip(PersonModel, test_string1)
93
+
94
+ print("\n" + "="*50 + "\n")
95
+
96
+ # Test case 2: Varying whitespace
97
+ test_string2 = "<spec>name: Alice \nage is 25 \nlives in Tokyo</spec>"
98
+ test_round_trip(PersonModel, test_string2)
99
+
100
+ print("\n" + "="*50 + "\n")
101
+
102
+ # Test case 3: Multiline values
103
+ test_string3 = """
104
+ <spec>
105
+ name: Bob
106
+ Smith
107
+ age is 40
108
+ lives in San
109
+ Francisco
110
+ </spec>
111
+ """
112
+ test_round_trip(PersonModel, test_string3)
113
+
114
+ print("All tests passed successfully!")
115
+
116
+
@@ -0,0 +1,80 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import ClassVar, Dict
3
+
4
+ from tatsu import compile as compile_grammar
5
+ from tatsu.model import ModelBuilder
6
+
7
+ from langroid.pydantic_v1 import BaseModel
8
+
9
+
10
+ class GrammarBasedModel(BaseModel, ABC):
11
+ grammar: ClassVar[str]
12
+ start_token: ClassVar[str]
13
+ end_token: ClassVar[str]
14
+ field_mappings: ClassVar[Dict[str, str]]
15
+
16
+ @classmethod
17
+ @abstractmethod
18
+ def get_grammar(cls) -> str:
19
+ pass
20
+
21
+ @classmethod
22
+ def parse(cls, text: str) -> "GrammarBasedModel":
23
+ parser = compile_grammar(cls.get_grammar())
24
+ ast = parser.parse(text, start="start")
25
+ model_dict = {
26
+ field: getattr(ast, rule) for field, rule in cls.field_mappings.items()
27
+ }
28
+ return cls(**model_dict)
29
+
30
+ def generate(self) -> str:
31
+ grammar = compile_grammar(self.get_grammar())
32
+ model_builder = ModelBuilder()
33
+ for field, rule in self.field_mappings.items():
34
+ setattr(model_builder, rule, getattr(self, field))
35
+ ast = model_builder.start()
36
+ return f"{self.start_token}\n{grammar.parse(str(ast), start='start')}\n{self.end_token}"
37
+
38
+
39
+ class PersonSpec(GrammarBasedModel):
40
+ name: str
41
+ age: int
42
+ city: str
43
+
44
+ grammar = """
45
+ start = name_line age_line city_line;
46
+ name_line = 'name:' /\s*/ name:/.+/ EOL;
47
+ age_line = 'age is' /\s*/ age:/\d+/ EOL;
48
+ city_line = 'lives in' /\s*/ city:/.+/ EOL;
49
+ EOL = /\r?\n/;
50
+ """
51
+ start_token = "<spec>"
52
+ end_token = "</spec>"
53
+ field_mappings = {"name": "name", "age": "age", "city": "city"}
54
+
55
+ @classmethod
56
+ def get_grammar(cls):
57
+ return cls.grammar
58
+
59
+
60
+ if __name__ == "__main__":
61
+ # Test parsing
62
+ input_str = """<spec>
63
+ name: John Doe
64
+ age is 30
65
+ lives in New York
66
+ </spec>"""
67
+ person = PersonSpec.parse(input_str)
68
+ print("Parsed person:", person)
69
+
70
+ # Test generation
71
+ generated_str = person.generate()
72
+ print("\nGenerated string:")
73
+ print(generated_str)
74
+
75
+ # Test round-trip
76
+ round_trip_person = PersonSpec.parse(generated_str)
77
+ print("\nRound-trip parsed person:", round_trip_person)
78
+
79
+ assert person == round_trip_person, "Round-trip parsing failed"
80
+ print("\nRound-trip test passed!")