langroid 0.16.5__py3-none-any.whl → 0.16.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. langroid/agent/md_tool_message_grammar.py +455 -0
  2. langroid/agent/tools/code_file_tool_parse.py +150 -0
  3. langroid/agent/tools/code_file_tool_pyparsing.py +194 -0
  4. langroid/agent/tools/code_file_tool_pyparsing2.py +199 -0
  5. langroid/agent/tools/formatted_model_custom.py +150 -0
  6. langroid/agent/tools/formatted_model_custom2.py +168 -0
  7. langroid/agent/tools/formatted_model_custom3.py +279 -0
  8. langroid/agent/tools/formatted_model_custom4.py +395 -0
  9. langroid/agent/tools/formatted_model_jinja.py +133 -0
  10. langroid/agent/tools/formatted_model_jinja.py-e +122 -0
  11. langroid/agent/tools/formatted_model_jinja2.py +145 -0
  12. langroid/agent/tools/formatted_model_jinja2.py-e +135 -0
  13. langroid/agent/tools/formatted_model_lark.py +0 -0
  14. langroid/agent/tools/formatted_model_lark2.py +168 -0
  15. langroid/agent/tools/formatted_model_parse.py +105 -0
  16. langroid/agent/tools/formatted_model_parse.py-e +98 -0
  17. langroid/agent/tools/formatted_model_parse2.py +113 -0
  18. langroid/agent/tools/formatted_model_parse2.py-e +109 -0
  19. langroid/agent/tools/formatted_model_parse3.py +114 -0
  20. langroid/agent/tools/formatted_model_parse3.py-e +110 -0
  21. langroid/agent/tools/formatted_model_parsimon.py +194 -0
  22. langroid/agent/tools/formatted_model_parsimon.py-e +186 -0
  23. langroid/agent/tools/formatted_model_pyparsing.py +169 -0
  24. langroid/agent/tools/formatted_model_pyparsing.py-e +149 -0
  25. langroid/agent/tools/formatted_model_pyparsing2.py +159 -0
  26. langroid/agent/tools/formatted_model_pyparsing2.py-e +143 -0
  27. langroid/agent/tools/formatted_model_pyparsing3.py +133 -0
  28. langroid/agent/tools/formatted_model_pyparsing3.py-e +121 -0
  29. langroid/agent/tools/formatted_model_pyparsing4.py +213 -0
  30. langroid/agent/tools/formatted_model_pyparsing4.py-e +176 -0
  31. langroid/agent/tools/formatted_model_pyparsing5.py +173 -0
  32. langroid/agent/tools/formatted_model_pyparsing5.py-e +142 -0
  33. langroid/agent/tools/formatted_model_regex.py +246 -0
  34. langroid/agent/tools/formatted_model_regex.py-e +248 -0
  35. langroid/agent/tools/formatted_model_regex2.py +250 -0
  36. langroid/agent/tools/formatted_model_regex2.py-e +253 -0
  37. langroid/agent/tools/formatted_model_tatsu.py +172 -0
  38. langroid/agent/tools/formatted_model_tatsu.py-e +160 -0
  39. langroid/agent/tools/formatted_model_template.py +217 -0
  40. langroid/agent/tools/formatted_model_template.py-e +200 -0
  41. langroid/agent/tools/formatted_model_xml.py +178 -0
  42. langroid/agent/tools/formatted_model_xml2.py +178 -0
  43. langroid/agent/tools/formatted_model_xml3.py +132 -0
  44. langroid/agent/tools/formatted_model_xml4.py +130 -0
  45. langroid/agent/tools/formatted_model_xml5.py +130 -0
  46. langroid/agent/tools/formatted_model_xml6.py +113 -0
  47. langroid/agent/tools/formatted_model_xml7.py +117 -0
  48. langroid/agent/tools/formatted_model_xml8.py +164 -0
  49. langroid/agent/tools/generic_tool.py +165 -0
  50. langroid/agent/tools/generic_tool_tatsu.py +275 -0
  51. langroid/agent/tools/grammar_based_model.py +132 -0
  52. langroid/agent/tools/grammar_based_model.py-e +128 -0
  53. langroid/agent/tools/grammar_based_model_lark.py +156 -0
  54. langroid/agent/tools/grammar_based_model_lark.py-e +153 -0
  55. langroid/agent/tools/grammar_based_model_parse.py +86 -0
  56. langroid/agent/tools/grammar_based_model_parse.py-e +80 -0
  57. langroid/agent/tools/grammar_based_model_parsimonious.py +129 -0
  58. langroid/agent/tools/grammar_based_model_parsimonious.py-e +120 -0
  59. langroid/agent/tools/grammar_based_model_pyparsing.py +105 -0
  60. langroid/agent/tools/grammar_based_model_pyparsing.py-e +103 -0
  61. langroid/agent/tools/grammar_based_model_regex.py +139 -0
  62. langroid/agent/tools/grammar_based_model_regex.py-e +130 -0
  63. langroid/agent/tools/grammar_based_model_regex2.py +124 -0
  64. langroid/agent/tools/grammar_based_model_regex2.py-e +116 -0
  65. langroid/agent/tools/grammar_based_model_tatsu.py +80 -0
  66. langroid/agent/tools/grammar_based_model_tatsu.py-e +77 -0
  67. langroid/agent/tools/lark_earley_example.py +135 -0
  68. langroid/agent/tools/lark_earley_example.py-e +117 -0
  69. langroid/agent/tools/lark_example.py +72 -0
  70. langroid/agent/tools/parse_example.py +76 -0
  71. langroid/agent/tools/parse_example2.py +87 -0
  72. langroid/agent/tools/parse_example3.py +42 -0
  73. langroid/agent/tools/parse_test.py +791 -0
  74. langroid/agent/xml_tool_message.py +106 -0
  75. langroid/language_models/openai_gpt.py +6 -1
  76. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/METADATA +1 -1
  77. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/RECORD +80 -6
  78. pyproject.toml +1 -1
  79. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/LICENSE +0 -0
  80. {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/WHEEL +0 -0
@@ -0,0 +1,133 @@
1
+ import re
2
+ from abc import ABC, abstractmethod
3
+ from typing import Type, TypeVar
4
+
5
+ from jinja2 import BaseLoader, Environment
6
+
7
+ from langroid.pydantic_v1 import BaseModel
8
+
9
+ T = TypeVar("T", bound="FormattingModel")
10
+
11
+
12
+ class FormattingModel(BaseModel, ABC):
13
+ @classmethod
14
+ @abstractmethod
15
+ def format_spec(cls) -> str:
16
+ pass
17
+
18
+ @classmethod
19
+ @abstractmethod
20
+ def start_token(cls) -> str:
21
+ pass
22
+
23
+ @classmethod
24
+ @abstractmethod
25
+ def end_token(cls) -> str:
26
+ pass
27
+
28
+ @classmethod
29
+ def parse(cls: Type[T], text: str) -> T:
30
+ content = text.strip()[len(cls.start_token()) : -len(cls.end_token())].strip()
31
+ pattern = cls.format_spec()
32
+ for field in cls.__fields__:
33
+ pattern = pattern.replace(f"{{{{{field}}}}}", f"(?P<{field}>.*?)")
34
+ pattern = pattern.replace("\n", "\\n")
35
+
36
+ match = re.match(pattern, content, re.DOTALL)
37
+ if not match:
38
+ raise ValueError("Failed to parse the input string")
39
+
40
+ parsed_data = {k: v.strip() for k, v in match.groupdict().items()}
41
+ return cls(**parsed_data)
42
+
43
+ def generate(self) -> str:
44
+ env = Environment(loader=BaseLoader())
45
+ template = env.from_string(self.format_spec())
46
+ content = template.render(**self.dict())
47
+ return f"{self.start_token()}\n{content}\n{self.end_token()}"
48
+
49
+
50
+ class CodeFileModel(FormattingModel):
51
+ language: str
52
+ file_path: str
53
+ code: str
54
+
55
+ @classmethod
56
+ def format_spec(cls) -> str:
57
+ return (
58
+ "code_file_model\nfile_path: {{file_path}}\n```{{language}}\n{{code}}\n```"
59
+ )
60
+
61
+ @classmethod
62
+ def start_token(cls) -> str:
63
+ return "<format>"
64
+
65
+ @classmethod
66
+ def end_token(cls) -> str:
67
+ return "</format>"
68
+
69
+
70
+ if __name__ == "__main__":
71
+ # Test CodeFileModel
72
+ code_model = CodeFileModel(
73
+ language="python",
74
+ file_path="src/main.py",
75
+ code='def hello():\n print("Hello, World!")',
76
+ )
77
+
78
+ print("Original CodeFileModel:")
79
+ print(code_model)
80
+ print()
81
+
82
+ generated = code_model.generate()
83
+ print("Generated string:")
84
+ print(generated)
85
+ print()
86
+
87
+ parsed = CodeFileModel.parse(generated)
88
+ print("Parsed CodeFileModel:")
89
+ print(parsed)
90
+ print()
91
+
92
+ print("Round-trip test:")
93
+ assert (
94
+ code_model == parsed
95
+ ), "Round-trip test failed: original and parsed models are not equal"
96
+ print("Passed!")
97
+
98
+ # Test with different values
99
+ another_model = CodeFileModel(
100
+ language="javascript",
101
+ file_path="src/app.js",
102
+ code="function greet(name) {\n console.log(`Hello, ${name}!`);\n}",
103
+ )
104
+
105
+ print("\nAnother CodeFileModel:")
106
+ print(another_model)
107
+ print()
108
+
109
+ another_generated = another_model.generate()
110
+ print("Another generated string:")
111
+ print(another_generated)
112
+ print()
113
+
114
+ another_parsed = CodeFileModel.parse(another_generated)
115
+ print("Another parsed CodeFileModel:")
116
+ print(another_parsed)
117
+ print()
118
+
119
+ print("Another round-trip test:")
120
+ assert (
121
+ another_model == another_parsed
122
+ ), "Another round-trip test failed: original and parsed models are not equal"
123
+ print("Passed!")
124
+
125
+ # Test error handling
126
+ print("\nTesting error handling:")
127
+ try:
128
+ CodeFileModel.parse("Invalid format string")
129
+ assert False, "Should have raised a ValueError"
130
+ except ValueError as e:
131
+ print(f"Correctly raised ValueError: {e}")
132
+
133
+ print("\nAll tests passed successfully!")
@@ -0,0 +1,122 @@
1
+ from pydantic import BaseModel
2
+ from abc import ABC, abstractmethod
3
+ from typing import Dict, Type, TypeVar
4
+ from jinja2 import Environment, BaseLoader
5
+ import re
6
+
7
+ T = TypeVar('T', bound='FormattingModel')
8
+
9
+ class FormattingModel(BaseModel, ABC):
10
+ @classmethod
11
+ @abstractmethod
12
+ def format_spec(cls) -> str:
13
+ pass
14
+
15
+ @classmethod
16
+ @abstractmethod
17
+ def start_token(cls) -> str:
18
+ pass
19
+
20
+ @classmethod
21
+ @abstractmethod
22
+ def end_token(cls) -> str:
23
+ pass
24
+
25
+ @classmethod
26
+ def parse(cls: Type[T], text: str) -> T:
27
+ content = text.strip()[len(cls.start_token()):-len(cls.end_token())].strip()
28
+ pattern = cls.format_spec()
29
+ for field in cls.__fields__:
30
+ pattern = pattern.replace(f"{{{{{field}}}}}", f"(?P<{field}>.*?)")
31
+ pattern = pattern.replace("\n", "\\n")
32
+
33
+ match = re.match(pattern, content, re.DOTALL)
34
+ if not match:
35
+ raise ValueError("Failed to parse the input string")
36
+
37
+ parsed_data = {k: v.strip() for k, v in match.groupdict().items()}
38
+ return cls(**parsed_data)
39
+
40
+ def generate(self) -> str:
41
+ env = Environment(loader=BaseLoader())
42
+ template = env.from_string(self.format_spec())
43
+ content = template.render(**self.dict())
44
+ return f"{self.start_token()}\n{content}\n{self.end_token()}"
45
+
46
+ class CodeFileModel(FormattingModel):
47
+ language: str
48
+ file_path: str
49
+ code: str
50
+
51
+ @classmethod
52
+ def format_spec(cls) -> str:
53
+ return "code_file_model\nfile_path: {{file_path}}\n```{{language}}\n{{code}}\n```"
54
+
55
+ @classmethod
56
+ def start_token(cls) -> str:
57
+ return "<format>"
58
+
59
+ @classmethod
60
+ def end_token(cls) -> str:
61
+ return "</format>"
62
+
63
+ if __name__ == "__main__":
64
+ # Test CodeFileModel
65
+ code_model = CodeFileModel(
66
+ language="python",
67
+ file_path="src/main.py",
68
+ code="def hello():\n print(\"Hello, World!\")"
69
+ )
70
+
71
+ print("Original CodeFileModel:")
72
+ print(code_model)
73
+ print()
74
+
75
+ generated = code_model.generate()
76
+ print("Generated string:")
77
+ print(generated)
78
+ print()
79
+
80
+ parsed = CodeFileModel.parse(generated)
81
+ print("Parsed CodeFileModel:")
82
+ print(parsed)
83
+ print()
84
+
85
+ print("Round-trip test:")
86
+ assert code_model == parsed, "Round-trip test failed: original and parsed models are not equal"
87
+ print("Passed!")
88
+
89
+ # Test with different values
90
+ another_model = CodeFileModel(
91
+ language="javascript",
92
+ file_path="src/app.js",
93
+ code="function greet(name) {\n console.log(`Hello, ${name}!`);\n}"
94
+ )
95
+
96
+ print("\nAnother CodeFileModel:")
97
+ print(another_model)
98
+ print()
99
+
100
+ another_generated = another_model.generate()
101
+ print("Another generated string:")
102
+ print(another_generated)
103
+ print()
104
+
105
+ another_parsed = CodeFileModel.parse(another_generated)
106
+ print("Another parsed CodeFileModel:")
107
+ print(another_parsed)
108
+ print()
109
+
110
+ print("Another round-trip test:")
111
+ assert another_model == another_parsed, "Another round-trip test failed: original and parsed models are not equal"
112
+ print("Passed!")
113
+
114
+ # Test error handling
115
+ print("\nTesting error handling:")
116
+ try:
117
+ CodeFileModel.parse("Invalid format string")
118
+ assert False, "Should have raised a ValueError"
119
+ except ValueError as e:
120
+ print(f"Correctly raised ValueError: {e}")
121
+
122
+ print("\nAll tests passed successfully!")
@@ -0,0 +1,145 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Type, TypeVar
3
+
4
+ from jinja2 import BaseLoader, Environment
5
+ from parse import parse as str_parse
6
+ from parse import with_pattern
7
+
8
+ from langroid.pydantic_v1 import BaseModel
9
+
10
+ T = TypeVar("T", bound="FormattingModel")
11
+
12
+
13
+ @with_pattern(r"[\s\S]*?")
14
+ def _match_multiline(text):
15
+ return text.strip()
16
+
17
+
18
+ class FormattingModel(BaseModel, ABC):
19
+ @classmethod
20
+ @abstractmethod
21
+ def format_spec(cls) -> str:
22
+ pass
23
+
24
+ @classmethod
25
+ @abstractmethod
26
+ def parse_spec(cls) -> str:
27
+ pass
28
+
29
+ @classmethod
30
+ @abstractmethod
31
+ def start_token(cls) -> str:
32
+ pass
33
+
34
+ @classmethod
35
+ @abstractmethod
36
+ def end_token(cls) -> str:
37
+ pass
38
+
39
+ @classmethod
40
+ def parse(cls: Type[T], text: str) -> T:
41
+ content = text.strip()[len(cls.start_token()) : -len(cls.end_token())].strip()
42
+ result = str_parse(
43
+ cls.parse_spec(),
44
+ content,
45
+ dict(multiline=_match_multiline),
46
+ case_sensitive=False,
47
+ )
48
+ if result is None:
49
+ raise ValueError(
50
+ f"Failed to parse the input string using spec: {cls.parse_spec()}\nInput: {content}"
51
+ )
52
+ return cls(**result.named)
53
+
54
+ def generate(self) -> str:
55
+ env = Environment(loader=BaseLoader(), trim_blocks=True, lstrip_blocks=True)
56
+ template = env.from_string(self.format_spec())
57
+ content = template.render(**self.dict())
58
+ return f"{self.start_token()}\n{content.strip()}\n{self.end_token()}"
59
+
60
+
61
+ class CodeFileModel(FormattingModel):
62
+ language: str
63
+ file_path: str
64
+ code: str
65
+
66
+ @classmethod
67
+ def format_spec(cls) -> str:
68
+ return (
69
+ "code_file_model\n"
70
+ "file_path: {{- file_path -}}\n"
71
+ "```{{- language -}}\n"
72
+ "{{ code }}"
73
+ "```"
74
+ )
75
+
76
+ @classmethod
77
+ def parse_spec(cls) -> str:
78
+ return "code_file_model\n" "file_path:{:s}\n" "```{:s}\n" "{:multiline}" "```"
79
+
80
+ @classmethod
81
+ def start_token(cls) -> str:
82
+ return "<format>"
83
+
84
+ @classmethod
85
+ def end_token(cls) -> str:
86
+ return "</format>"
87
+
88
+
89
+ # Test code
90
+ if __name__ == "__main__":
91
+ # Test with extra whitespace
92
+ test_string = """
93
+ <format>
94
+ code_file_model
95
+ file_path: src/main.py
96
+ ``` python
97
+ def hello():
98
+ print("Hello, World!")
99
+
100
+ ```
101
+ </format>
102
+ """
103
+
104
+ parsed = CodeFileModel.parse(test_string)
105
+ print("Parsed model:")
106
+ print(parsed)
107
+
108
+ generated = parsed.generate()
109
+ print("\nGenerated string:")
110
+ print(generated)
111
+
112
+ reparsed = CodeFileModel.parse(generated)
113
+ print("\nReparsed model:")
114
+ print(reparsed)
115
+
116
+ print("\nRound trip test:")
117
+ assert parsed == reparsed, "Round trip test failed"
118
+ print("Passed!")
119
+
120
+ # Test with different values and whitespace
121
+ another_test = """
122
+ <format>
123
+ code_file_model
124
+ file_path:src/app.js
125
+ ``` javascript
126
+ function greet(name) {
127
+ console.log(`Hello, ${name}!`);
128
+ }
129
+ ```
130
+ </format>
131
+ """
132
+
133
+ another_parsed = CodeFileModel.parse(another_test)
134
+ print("\nAnother parsed model:")
135
+ print(another_parsed)
136
+
137
+ another_generated = another_parsed.generate()
138
+ print("\nAnother generated string:")
139
+ print(another_generated)
140
+
141
+ print("\nAnother round trip test:")
142
+ assert another_parsed == CodeFileModel.parse(
143
+ another_generated
144
+ ), "Another round trip test failed"
145
+ print("Passed!")
@@ -0,0 +1,135 @@
1
+ from pydantic import BaseModel
2
+ from abc import ABC, abstractmethod
3
+ from typing import Type, TypeVar
4
+ from jinja2 import Environment, BaseLoader
5
+ from parse import parse as str_parse, with_pattern
6
+
7
+ T = TypeVar('T', bound='FormattingModel')
8
+
9
+ @with_pattern(r'[\s\S]*?')
10
+ def _match_multiline(text):
11
+ return text.strip()
12
+
13
+ class FormattingModel(BaseModel, ABC):
14
+ @classmethod
15
+ @abstractmethod
16
+ def format_spec(cls) -> str:
17
+ pass
18
+
19
+ @classmethod
20
+ @abstractmethod
21
+ def parse_spec(cls) -> str:
22
+ pass
23
+
24
+ @classmethod
25
+ @abstractmethod
26
+ def start_token(cls) -> str:
27
+ pass
28
+
29
+ @classmethod
30
+ @abstractmethod
31
+ def end_token(cls) -> str:
32
+ pass
33
+
34
+ @classmethod
35
+ def parse(cls: Type[T], text: str) -> T:
36
+ content = text.strip()[len(cls.start_token()):-len(cls.end_token())].strip()
37
+ result = str_parse(cls.parse_spec(), content, dict(multiline=_match_multiline), case_sensitive=False)
38
+ if result is None:
39
+ raise ValueError(f"Failed to parse the input string using spec: {cls.parse_spec()}\nInput: {content}")
40
+ return cls(**result.named)
41
+
42
+ def generate(self) -> str:
43
+ env = Environment(loader=BaseLoader(), trim_blocks=True, lstrip_blocks=True)
44
+ template = env.from_string(self.format_spec())
45
+ content = template.render(**self.dict())
46
+ return f"{self.start_token()}\n{content.strip()}\n{self.end_token()}"
47
+
48
+ class CodeFileModel(FormattingModel):
49
+ language: str
50
+ file_path: str
51
+ code: str
52
+
53
+ @classmethod
54
+ def format_spec(cls) -> str:
55
+ return (
56
+ "code_file_model\n"
57
+ "file_path: {{- file_path -}}\n"
58
+ "```{{- language -}}\n"
59
+ "{{ code }}"
60
+ "```"
61
+ )
62
+
63
+ @classmethod
64
+ def parse_spec(cls) -> str:
65
+ return (
66
+ "code_file_model\n"
67
+ "file_path:{:s}\n"
68
+ "```{:s}\n"
69
+ "{:multiline}"
70
+ "```"
71
+ )
72
+
73
+ @classmethod
74
+ def start_token(cls) -> str:
75
+ return "<format>"
76
+
77
+ @classmethod
78
+ def end_token(cls) -> str:
79
+ return "</format>"
80
+
81
+ # Test code
82
+ if __name__ == "__main__":
83
+ # Test with extra whitespace
84
+ test_string = """
85
+ <format>
86
+ code_file_model
87
+ file_path: src/main.py
88
+ ``` python
89
+ def hello():
90
+ print("Hello, World!")
91
+
92
+ ```
93
+ </format>
94
+ """
95
+
96
+ parsed = CodeFileModel.parse(test_string)
97
+ print("Parsed model:")
98
+ print(parsed)
99
+
100
+ generated = parsed.generate()
101
+ print("\nGenerated string:")
102
+ print(generated)
103
+
104
+ reparsed = CodeFileModel.parse(generated)
105
+ print("\nReparsed model:")
106
+ print(reparsed)
107
+
108
+ print("\nRound trip test:")
109
+ assert parsed == reparsed, "Round trip test failed"
110
+ print("Passed!")
111
+
112
+ # Test with different values and whitespace
113
+ another_test = """
114
+ <format>
115
+ code_file_model
116
+ file_path:src/app.js
117
+ ``` javascript
118
+ function greet(name) {
119
+ console.log(`Hello, ${name}!`);
120
+ }
121
+ ```
122
+ </format>
123
+ """
124
+
125
+ another_parsed = CodeFileModel.parse(another_test)
126
+ print("\nAnother parsed model:")
127
+ print(another_parsed)
128
+
129
+ another_generated = another_parsed.generate()
130
+ print("\nAnother generated string:")
131
+ print(another_generated)
132
+
133
+ print("\nAnother round trip test:")
134
+ assert another_parsed == CodeFileModel.parse(another_generated), "Another round trip test failed"
135
+ print("Passed!")
File without changes
@@ -0,0 +1,168 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ from lark import Lark, Transformer, v_args
4
+
5
+ from langroid.pydantic_v1 import BaseModel
6
+
7
+
8
+ class FormattingModel(BaseModel, ABC):
9
+ @classmethod
10
+ @abstractmethod
11
+ def format_spec(cls) -> str:
12
+ pass
13
+
14
+ @classmethod
15
+ @abstractmethod
16
+ def parse_spec(cls) -> str:
17
+ pass
18
+
19
+ @classmethod
20
+ @abstractmethod
21
+ def start_token(cls) -> str:
22
+ pass
23
+
24
+ @classmethod
25
+ @abstractmethod
26
+ def end_token(cls) -> str:
27
+ pass
28
+
29
+ @classmethod
30
+ def format(cls, instance: "FormattingModel") -> str:
31
+ spec = cls.format_spec()
32
+ formatted = spec.format(**instance.dict())
33
+ return f"{cls.start_token()}\n{formatted}\n{cls.end_token()}"
34
+
35
+ @classmethod
36
+ def parse(cls, formatted_string: str) -> "FormattingModel":
37
+ lines = formatted_string.strip().split("\n")
38
+ if lines[0] != cls.start_token() or lines[-1] != cls.end_token():
39
+ raise ValueError("Invalid start or end token")
40
+ content = "\n".join(lines[1:-1])
41
+
42
+ parser = Lark(cls.parse_spec(), start="start", parser="lalr")
43
+
44
+ @v_args(inline=True)
45
+ class TreeToDict(Transformer):
46
+ def start(self, _, file_path, code_block):
47
+ return {
48
+ "file_path": file_path,
49
+ "language": code_block.children[1],
50
+ "code": code_block.children[3],
51
+ }
52
+
53
+ def file_path(self, path):
54
+ return path.value
55
+
56
+ def language(self, lang):
57
+ return lang.value
58
+
59
+ def code(self, code):
60
+ return code.value.strip()
61
+
62
+ tree = parser.parse(content)
63
+ data = TreeToDict().transform(tree)
64
+ return cls(**data)
65
+
66
+
67
+ class CodeFileModel(FormattingModel):
68
+ language: str
69
+ file_path: str
70
+ code: str
71
+
72
+ @classmethod
73
+ def format_spec(cls):
74
+ return "code_file_model\n{file_path}\n```{language}\n{code}\n```"
75
+
76
+ @classmethod
77
+ def parse_spec(cls):
78
+ return """
79
+ start: "code_file_model" NEWLINE file_path NEWLINE code_block
80
+ file_path: /[^\\n]+/
81
+ code_block: "```" language NEWLINE code "```"
82
+ language: /[^\\n]+/
83
+ code: /.+?(?=\\n```)/s
84
+ NEWLINE: "\\n"
85
+ %import common.WS
86
+ %ignore WS
87
+ """
88
+
89
+ @classmethod
90
+ def start_token(cls):
91
+ return "<format>"
92
+
93
+ @classmethod
94
+ def end_token(cls):
95
+ return "</format>"
96
+
97
+
98
+ # Test cases
99
+ if __name__ == "__main__":
100
+ # Test formatting
101
+ code_file = CodeFileModel(
102
+ language="Python",
103
+ file_path="src/main.py",
104
+ code="def hello():\n print('Hello, World!')",
105
+ )
106
+ formatted = CodeFileModel.format(code_file)
107
+ expected_format = """<format>
108
+ code_file_model
109
+ src/main.py
110
+ ```Python
111
+ def hello():
112
+ print('Hello, World!')
113
+ ```
114
+ </format>"""
115
+ assert (
116
+ formatted == expected_format
117
+ ), f"Formatting failed. Expected:\n{expected_format}\nGot:\n{formatted}"
118
+ print("Formatting test passed.")
119
+
120
+ # Test parsing
121
+ parsed = CodeFileModel.parse(formatted)
122
+ assert (
123
+ parsed == code_file
124
+ ), f"Parsing failed. Expected:\n{code_file}\nGot:\n{parsed}"
125
+ print("Parsing test passed.")
126
+
127
+ # Test round-trip
128
+ round_trip = CodeFileModel.parse(CodeFileModel.format(code_file))
129
+ assert (
130
+ round_trip == code_file
131
+ ), f"Round-trip failed. Expected:\n{code_file}\nGot:\n{round_trip}"
132
+ print("Round-trip test passed.")
133
+
134
+ # Test with different values
135
+ code_file2 = CodeFileModel(
136
+ language="JavaScript",
137
+ file_path="src/app.js",
138
+ code="function greet() {\n console.log('Hello, World!');\n}",
139
+ )
140
+ formatted2 = CodeFileModel.format(code_file2)
141
+ parsed2 = CodeFileModel.parse(formatted2)
142
+ assert (
143
+ parsed2 == code_file2
144
+ ), f"Parsing failed for different values. Expected:\n{code_file2}\nGot:\n{parsed2}"
145
+ print("Different values test passed.")
146
+
147
+ # Test tolerant parsing
148
+ tolerant_input = """<format>
149
+ code_file_model
150
+ src/main.py
151
+
152
+ ``` Python
153
+ def hello():
154
+ print('Hello, World!')
155
+ ```
156
+ </format>"""
157
+ parsed_tolerant = CodeFileModel.parse(tolerant_input)
158
+ expected_tolerant = CodeFileModel(
159
+ language="Python",
160
+ file_path="src/main.py",
161
+ code="def hello():\n print('Hello, World!')",
162
+ )
163
+ assert (
164
+ parsed_tolerant == expected_tolerant
165
+ ), f"Tolerant parsing failed. Expected:\n{expected_tolerant}\nGot:\n{parsed_tolerant}"
166
+ print("Tolerant parsing test passed.")
167
+
168
+ print("All tests passed successfully!")