langroid 0.16.5__py3-none-any.whl → 0.16.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/md_tool_message_grammar.py +455 -0
- langroid/agent/tools/code_file_tool_parse.py +150 -0
- langroid/agent/tools/code_file_tool_pyparsing.py +194 -0
- langroid/agent/tools/code_file_tool_pyparsing2.py +199 -0
- langroid/agent/tools/formatted_model_custom.py +150 -0
- langroid/agent/tools/formatted_model_custom2.py +168 -0
- langroid/agent/tools/formatted_model_custom3.py +279 -0
- langroid/agent/tools/formatted_model_custom4.py +395 -0
- langroid/agent/tools/formatted_model_jinja.py +133 -0
- langroid/agent/tools/formatted_model_jinja.py-e +122 -0
- langroid/agent/tools/formatted_model_jinja2.py +145 -0
- langroid/agent/tools/formatted_model_jinja2.py-e +135 -0
- langroid/agent/tools/formatted_model_lark.py +0 -0
- langroid/agent/tools/formatted_model_lark2.py +168 -0
- langroid/agent/tools/formatted_model_parse.py +105 -0
- langroid/agent/tools/formatted_model_parse.py-e +98 -0
- langroid/agent/tools/formatted_model_parse2.py +113 -0
- langroid/agent/tools/formatted_model_parse2.py-e +109 -0
- langroid/agent/tools/formatted_model_parse3.py +114 -0
- langroid/agent/tools/formatted_model_parse3.py-e +110 -0
- langroid/agent/tools/formatted_model_parsimon.py +194 -0
- langroid/agent/tools/formatted_model_parsimon.py-e +186 -0
- langroid/agent/tools/formatted_model_pyparsing.py +169 -0
- langroid/agent/tools/formatted_model_pyparsing.py-e +149 -0
- langroid/agent/tools/formatted_model_pyparsing2.py +159 -0
- langroid/agent/tools/formatted_model_pyparsing2.py-e +143 -0
- langroid/agent/tools/formatted_model_pyparsing3.py +133 -0
- langroid/agent/tools/formatted_model_pyparsing3.py-e +121 -0
- langroid/agent/tools/formatted_model_pyparsing4.py +213 -0
- langroid/agent/tools/formatted_model_pyparsing4.py-e +176 -0
- langroid/agent/tools/formatted_model_pyparsing5.py +173 -0
- langroid/agent/tools/formatted_model_pyparsing5.py-e +142 -0
- langroid/agent/tools/formatted_model_regex.py +246 -0
- langroid/agent/tools/formatted_model_regex.py-e +248 -0
- langroid/agent/tools/formatted_model_regex2.py +250 -0
- langroid/agent/tools/formatted_model_regex2.py-e +253 -0
- langroid/agent/tools/formatted_model_tatsu.py +172 -0
- langroid/agent/tools/formatted_model_tatsu.py-e +160 -0
- langroid/agent/tools/formatted_model_template.py +217 -0
- langroid/agent/tools/formatted_model_template.py-e +200 -0
- langroid/agent/tools/formatted_model_xml.py +178 -0
- langroid/agent/tools/formatted_model_xml2.py +178 -0
- langroid/agent/tools/formatted_model_xml3.py +132 -0
- langroid/agent/tools/formatted_model_xml4.py +130 -0
- langroid/agent/tools/formatted_model_xml5.py +130 -0
- langroid/agent/tools/formatted_model_xml6.py +113 -0
- langroid/agent/tools/formatted_model_xml7.py +117 -0
- langroid/agent/tools/formatted_model_xml8.py +164 -0
- langroid/agent/tools/generic_tool.py +165 -0
- langroid/agent/tools/generic_tool_tatsu.py +275 -0
- langroid/agent/tools/grammar_based_model.py +132 -0
- langroid/agent/tools/grammar_based_model.py-e +128 -0
- langroid/agent/tools/grammar_based_model_lark.py +156 -0
- langroid/agent/tools/grammar_based_model_lark.py-e +153 -0
- langroid/agent/tools/grammar_based_model_parse.py +86 -0
- langroid/agent/tools/grammar_based_model_parse.py-e +80 -0
- langroid/agent/tools/grammar_based_model_parsimonious.py +129 -0
- langroid/agent/tools/grammar_based_model_parsimonious.py-e +120 -0
- langroid/agent/tools/grammar_based_model_pyparsing.py +105 -0
- langroid/agent/tools/grammar_based_model_pyparsing.py-e +103 -0
- langroid/agent/tools/grammar_based_model_regex.py +139 -0
- langroid/agent/tools/grammar_based_model_regex.py-e +130 -0
- langroid/agent/tools/grammar_based_model_regex2.py +124 -0
- langroid/agent/tools/grammar_based_model_regex2.py-e +116 -0
- langroid/agent/tools/grammar_based_model_tatsu.py +80 -0
- langroid/agent/tools/grammar_based_model_tatsu.py-e +77 -0
- langroid/agent/tools/lark_earley_example.py +135 -0
- langroid/agent/tools/lark_earley_example.py-e +117 -0
- langroid/agent/tools/lark_example.py +72 -0
- langroid/agent/tools/parse_example.py +76 -0
- langroid/agent/tools/parse_example2.py +87 -0
- langroid/agent/tools/parse_example3.py +42 -0
- langroid/agent/tools/parse_test.py +791 -0
- langroid/agent/xml_tool_message.py +106 -0
- langroid/language_models/openai_gpt.py +6 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/METADATA +1 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/RECORD +80 -6
- pyproject.toml +1 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/LICENSE +0 -0
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/WHEEL +0 -0
@@ -0,0 +1,133 @@
|
|
1
|
+
import re
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
from typing import Type, TypeVar
|
4
|
+
|
5
|
+
from jinja2 import BaseLoader, Environment
|
6
|
+
|
7
|
+
from langroid.pydantic_v1 import BaseModel
|
8
|
+
|
9
|
+
T = TypeVar("T", bound="FormattingModel")
|
10
|
+
|
11
|
+
|
12
|
+
class FormattingModel(BaseModel, ABC):
|
13
|
+
@classmethod
|
14
|
+
@abstractmethod
|
15
|
+
def format_spec(cls) -> str:
|
16
|
+
pass
|
17
|
+
|
18
|
+
@classmethod
|
19
|
+
@abstractmethod
|
20
|
+
def start_token(cls) -> str:
|
21
|
+
pass
|
22
|
+
|
23
|
+
@classmethod
|
24
|
+
@abstractmethod
|
25
|
+
def end_token(cls) -> str:
|
26
|
+
pass
|
27
|
+
|
28
|
+
@classmethod
|
29
|
+
def parse(cls: Type[T], text: str) -> T:
|
30
|
+
content = text.strip()[len(cls.start_token()) : -len(cls.end_token())].strip()
|
31
|
+
pattern = cls.format_spec()
|
32
|
+
for field in cls.__fields__:
|
33
|
+
pattern = pattern.replace(f"{{{{{field}}}}}", f"(?P<{field}>.*?)")
|
34
|
+
pattern = pattern.replace("\n", "\\n")
|
35
|
+
|
36
|
+
match = re.match(pattern, content, re.DOTALL)
|
37
|
+
if not match:
|
38
|
+
raise ValueError("Failed to parse the input string")
|
39
|
+
|
40
|
+
parsed_data = {k: v.strip() for k, v in match.groupdict().items()}
|
41
|
+
return cls(**parsed_data)
|
42
|
+
|
43
|
+
def generate(self) -> str:
|
44
|
+
env = Environment(loader=BaseLoader())
|
45
|
+
template = env.from_string(self.format_spec())
|
46
|
+
content = template.render(**self.dict())
|
47
|
+
return f"{self.start_token()}\n{content}\n{self.end_token()}"
|
48
|
+
|
49
|
+
|
50
|
+
class CodeFileModel(FormattingModel):
|
51
|
+
language: str
|
52
|
+
file_path: str
|
53
|
+
code: str
|
54
|
+
|
55
|
+
@classmethod
|
56
|
+
def format_spec(cls) -> str:
|
57
|
+
return (
|
58
|
+
"code_file_model\nfile_path: {{file_path}}\n```{{language}}\n{{code}}\n```"
|
59
|
+
)
|
60
|
+
|
61
|
+
@classmethod
|
62
|
+
def start_token(cls) -> str:
|
63
|
+
return "<format>"
|
64
|
+
|
65
|
+
@classmethod
|
66
|
+
def end_token(cls) -> str:
|
67
|
+
return "</format>"
|
68
|
+
|
69
|
+
|
70
|
+
if __name__ == "__main__":
|
71
|
+
# Test CodeFileModel
|
72
|
+
code_model = CodeFileModel(
|
73
|
+
language="python",
|
74
|
+
file_path="src/main.py",
|
75
|
+
code='def hello():\n print("Hello, World!")',
|
76
|
+
)
|
77
|
+
|
78
|
+
print("Original CodeFileModel:")
|
79
|
+
print(code_model)
|
80
|
+
print()
|
81
|
+
|
82
|
+
generated = code_model.generate()
|
83
|
+
print("Generated string:")
|
84
|
+
print(generated)
|
85
|
+
print()
|
86
|
+
|
87
|
+
parsed = CodeFileModel.parse(generated)
|
88
|
+
print("Parsed CodeFileModel:")
|
89
|
+
print(parsed)
|
90
|
+
print()
|
91
|
+
|
92
|
+
print("Round-trip test:")
|
93
|
+
assert (
|
94
|
+
code_model == parsed
|
95
|
+
), "Round-trip test failed: original and parsed models are not equal"
|
96
|
+
print("Passed!")
|
97
|
+
|
98
|
+
# Test with different values
|
99
|
+
another_model = CodeFileModel(
|
100
|
+
language="javascript",
|
101
|
+
file_path="src/app.js",
|
102
|
+
code="function greet(name) {\n console.log(`Hello, ${name}!`);\n}",
|
103
|
+
)
|
104
|
+
|
105
|
+
print("\nAnother CodeFileModel:")
|
106
|
+
print(another_model)
|
107
|
+
print()
|
108
|
+
|
109
|
+
another_generated = another_model.generate()
|
110
|
+
print("Another generated string:")
|
111
|
+
print(another_generated)
|
112
|
+
print()
|
113
|
+
|
114
|
+
another_parsed = CodeFileModel.parse(another_generated)
|
115
|
+
print("Another parsed CodeFileModel:")
|
116
|
+
print(another_parsed)
|
117
|
+
print()
|
118
|
+
|
119
|
+
print("Another round-trip test:")
|
120
|
+
assert (
|
121
|
+
another_model == another_parsed
|
122
|
+
), "Another round-trip test failed: original and parsed models are not equal"
|
123
|
+
print("Passed!")
|
124
|
+
|
125
|
+
# Test error handling
|
126
|
+
print("\nTesting error handling:")
|
127
|
+
try:
|
128
|
+
CodeFileModel.parse("Invalid format string")
|
129
|
+
assert False, "Should have raised a ValueError"
|
130
|
+
except ValueError as e:
|
131
|
+
print(f"Correctly raised ValueError: {e}")
|
132
|
+
|
133
|
+
print("\nAll tests passed successfully!")
|
@@ -0,0 +1,122 @@
|
|
1
|
+
from pydantic import BaseModel
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
from typing import Dict, Type, TypeVar
|
4
|
+
from jinja2 import Environment, BaseLoader
|
5
|
+
import re
|
6
|
+
|
7
|
+
T = TypeVar('T', bound='FormattingModel')
|
8
|
+
|
9
|
+
class FormattingModel(BaseModel, ABC):
|
10
|
+
@classmethod
|
11
|
+
@abstractmethod
|
12
|
+
def format_spec(cls) -> str:
|
13
|
+
pass
|
14
|
+
|
15
|
+
@classmethod
|
16
|
+
@abstractmethod
|
17
|
+
def start_token(cls) -> str:
|
18
|
+
pass
|
19
|
+
|
20
|
+
@classmethod
|
21
|
+
@abstractmethod
|
22
|
+
def end_token(cls) -> str:
|
23
|
+
pass
|
24
|
+
|
25
|
+
@classmethod
|
26
|
+
def parse(cls: Type[T], text: str) -> T:
|
27
|
+
content = text.strip()[len(cls.start_token()):-len(cls.end_token())].strip()
|
28
|
+
pattern = cls.format_spec()
|
29
|
+
for field in cls.__fields__:
|
30
|
+
pattern = pattern.replace(f"{{{{{field}}}}}", f"(?P<{field}>.*?)")
|
31
|
+
pattern = pattern.replace("\n", "\\n")
|
32
|
+
|
33
|
+
match = re.match(pattern, content, re.DOTALL)
|
34
|
+
if not match:
|
35
|
+
raise ValueError("Failed to parse the input string")
|
36
|
+
|
37
|
+
parsed_data = {k: v.strip() for k, v in match.groupdict().items()}
|
38
|
+
return cls(**parsed_data)
|
39
|
+
|
40
|
+
def generate(self) -> str:
|
41
|
+
env = Environment(loader=BaseLoader())
|
42
|
+
template = env.from_string(self.format_spec())
|
43
|
+
content = template.render(**self.dict())
|
44
|
+
return f"{self.start_token()}\n{content}\n{self.end_token()}"
|
45
|
+
|
46
|
+
class CodeFileModel(FormattingModel):
|
47
|
+
language: str
|
48
|
+
file_path: str
|
49
|
+
code: str
|
50
|
+
|
51
|
+
@classmethod
|
52
|
+
def format_spec(cls) -> str:
|
53
|
+
return "code_file_model\nfile_path: {{file_path}}\n```{{language}}\n{{code}}\n```"
|
54
|
+
|
55
|
+
@classmethod
|
56
|
+
def start_token(cls) -> str:
|
57
|
+
return "<format>"
|
58
|
+
|
59
|
+
@classmethod
|
60
|
+
def end_token(cls) -> str:
|
61
|
+
return "</format>"
|
62
|
+
|
63
|
+
if __name__ == "__main__":
|
64
|
+
# Test CodeFileModel
|
65
|
+
code_model = CodeFileModel(
|
66
|
+
language="python",
|
67
|
+
file_path="src/main.py",
|
68
|
+
code="def hello():\n print(\"Hello, World!\")"
|
69
|
+
)
|
70
|
+
|
71
|
+
print("Original CodeFileModel:")
|
72
|
+
print(code_model)
|
73
|
+
print()
|
74
|
+
|
75
|
+
generated = code_model.generate()
|
76
|
+
print("Generated string:")
|
77
|
+
print(generated)
|
78
|
+
print()
|
79
|
+
|
80
|
+
parsed = CodeFileModel.parse(generated)
|
81
|
+
print("Parsed CodeFileModel:")
|
82
|
+
print(parsed)
|
83
|
+
print()
|
84
|
+
|
85
|
+
print("Round-trip test:")
|
86
|
+
assert code_model == parsed, "Round-trip test failed: original and parsed models are not equal"
|
87
|
+
print("Passed!")
|
88
|
+
|
89
|
+
# Test with different values
|
90
|
+
another_model = CodeFileModel(
|
91
|
+
language="javascript",
|
92
|
+
file_path="src/app.js",
|
93
|
+
code="function greet(name) {\n console.log(`Hello, ${name}!`);\n}"
|
94
|
+
)
|
95
|
+
|
96
|
+
print("\nAnother CodeFileModel:")
|
97
|
+
print(another_model)
|
98
|
+
print()
|
99
|
+
|
100
|
+
another_generated = another_model.generate()
|
101
|
+
print("Another generated string:")
|
102
|
+
print(another_generated)
|
103
|
+
print()
|
104
|
+
|
105
|
+
another_parsed = CodeFileModel.parse(another_generated)
|
106
|
+
print("Another parsed CodeFileModel:")
|
107
|
+
print(another_parsed)
|
108
|
+
print()
|
109
|
+
|
110
|
+
print("Another round-trip test:")
|
111
|
+
assert another_model == another_parsed, "Another round-trip test failed: original and parsed models are not equal"
|
112
|
+
print("Passed!")
|
113
|
+
|
114
|
+
# Test error handling
|
115
|
+
print("\nTesting error handling:")
|
116
|
+
try:
|
117
|
+
CodeFileModel.parse("Invalid format string")
|
118
|
+
assert False, "Should have raised a ValueError"
|
119
|
+
except ValueError as e:
|
120
|
+
print(f"Correctly raised ValueError: {e}")
|
121
|
+
|
122
|
+
print("\nAll tests passed successfully!")
|
@@ -0,0 +1,145 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import Type, TypeVar
|
3
|
+
|
4
|
+
from jinja2 import BaseLoader, Environment
|
5
|
+
from parse import parse as str_parse
|
6
|
+
from parse import with_pattern
|
7
|
+
|
8
|
+
from langroid.pydantic_v1 import BaseModel
|
9
|
+
|
10
|
+
T = TypeVar("T", bound="FormattingModel")
|
11
|
+
|
12
|
+
|
13
|
+
@with_pattern(r"[\s\S]*?")
|
14
|
+
def _match_multiline(text):
|
15
|
+
return text.strip()
|
16
|
+
|
17
|
+
|
18
|
+
class FormattingModel(BaseModel, ABC):
|
19
|
+
@classmethod
|
20
|
+
@abstractmethod
|
21
|
+
def format_spec(cls) -> str:
|
22
|
+
pass
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
@abstractmethod
|
26
|
+
def parse_spec(cls) -> str:
|
27
|
+
pass
|
28
|
+
|
29
|
+
@classmethod
|
30
|
+
@abstractmethod
|
31
|
+
def start_token(cls) -> str:
|
32
|
+
pass
|
33
|
+
|
34
|
+
@classmethod
|
35
|
+
@abstractmethod
|
36
|
+
def end_token(cls) -> str:
|
37
|
+
pass
|
38
|
+
|
39
|
+
@classmethod
|
40
|
+
def parse(cls: Type[T], text: str) -> T:
|
41
|
+
content = text.strip()[len(cls.start_token()) : -len(cls.end_token())].strip()
|
42
|
+
result = str_parse(
|
43
|
+
cls.parse_spec(),
|
44
|
+
content,
|
45
|
+
dict(multiline=_match_multiline),
|
46
|
+
case_sensitive=False,
|
47
|
+
)
|
48
|
+
if result is None:
|
49
|
+
raise ValueError(
|
50
|
+
f"Failed to parse the input string using spec: {cls.parse_spec()}\nInput: {content}"
|
51
|
+
)
|
52
|
+
return cls(**result.named)
|
53
|
+
|
54
|
+
def generate(self) -> str:
|
55
|
+
env = Environment(loader=BaseLoader(), trim_blocks=True, lstrip_blocks=True)
|
56
|
+
template = env.from_string(self.format_spec())
|
57
|
+
content = template.render(**self.dict())
|
58
|
+
return f"{self.start_token()}\n{content.strip()}\n{self.end_token()}"
|
59
|
+
|
60
|
+
|
61
|
+
class CodeFileModel(FormattingModel):
|
62
|
+
language: str
|
63
|
+
file_path: str
|
64
|
+
code: str
|
65
|
+
|
66
|
+
@classmethod
|
67
|
+
def format_spec(cls) -> str:
|
68
|
+
return (
|
69
|
+
"code_file_model\n"
|
70
|
+
"file_path: {{- file_path -}}\n"
|
71
|
+
"```{{- language -}}\n"
|
72
|
+
"{{ code }}"
|
73
|
+
"```"
|
74
|
+
)
|
75
|
+
|
76
|
+
@classmethod
|
77
|
+
def parse_spec(cls) -> str:
|
78
|
+
return "code_file_model\n" "file_path:{:s}\n" "```{:s}\n" "{:multiline}" "```"
|
79
|
+
|
80
|
+
@classmethod
|
81
|
+
def start_token(cls) -> str:
|
82
|
+
return "<format>"
|
83
|
+
|
84
|
+
@classmethod
|
85
|
+
def end_token(cls) -> str:
|
86
|
+
return "</format>"
|
87
|
+
|
88
|
+
|
89
|
+
# Test code
|
90
|
+
if __name__ == "__main__":
|
91
|
+
# Test with extra whitespace
|
92
|
+
test_string = """
|
93
|
+
<format>
|
94
|
+
code_file_model
|
95
|
+
file_path: src/main.py
|
96
|
+
``` python
|
97
|
+
def hello():
|
98
|
+
print("Hello, World!")
|
99
|
+
|
100
|
+
```
|
101
|
+
</format>
|
102
|
+
"""
|
103
|
+
|
104
|
+
parsed = CodeFileModel.parse(test_string)
|
105
|
+
print("Parsed model:")
|
106
|
+
print(parsed)
|
107
|
+
|
108
|
+
generated = parsed.generate()
|
109
|
+
print("\nGenerated string:")
|
110
|
+
print(generated)
|
111
|
+
|
112
|
+
reparsed = CodeFileModel.parse(generated)
|
113
|
+
print("\nReparsed model:")
|
114
|
+
print(reparsed)
|
115
|
+
|
116
|
+
print("\nRound trip test:")
|
117
|
+
assert parsed == reparsed, "Round trip test failed"
|
118
|
+
print("Passed!")
|
119
|
+
|
120
|
+
# Test with different values and whitespace
|
121
|
+
another_test = """
|
122
|
+
<format>
|
123
|
+
code_file_model
|
124
|
+
file_path:src/app.js
|
125
|
+
``` javascript
|
126
|
+
function greet(name) {
|
127
|
+
console.log(`Hello, ${name}!`);
|
128
|
+
}
|
129
|
+
```
|
130
|
+
</format>
|
131
|
+
"""
|
132
|
+
|
133
|
+
another_parsed = CodeFileModel.parse(another_test)
|
134
|
+
print("\nAnother parsed model:")
|
135
|
+
print(another_parsed)
|
136
|
+
|
137
|
+
another_generated = another_parsed.generate()
|
138
|
+
print("\nAnother generated string:")
|
139
|
+
print(another_generated)
|
140
|
+
|
141
|
+
print("\nAnother round trip test:")
|
142
|
+
assert another_parsed == CodeFileModel.parse(
|
143
|
+
another_generated
|
144
|
+
), "Another round trip test failed"
|
145
|
+
print("Passed!")
|
@@ -0,0 +1,135 @@
|
|
1
|
+
from pydantic import BaseModel
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
from typing import Type, TypeVar
|
4
|
+
from jinja2 import Environment, BaseLoader
|
5
|
+
from parse import parse as str_parse, with_pattern
|
6
|
+
|
7
|
+
T = TypeVar('T', bound='FormattingModel')
|
8
|
+
|
9
|
+
@with_pattern(r'[\s\S]*?')
|
10
|
+
def _match_multiline(text):
|
11
|
+
return text.strip()
|
12
|
+
|
13
|
+
class FormattingModel(BaseModel, ABC):
|
14
|
+
@classmethod
|
15
|
+
@abstractmethod
|
16
|
+
def format_spec(cls) -> str:
|
17
|
+
pass
|
18
|
+
|
19
|
+
@classmethod
|
20
|
+
@abstractmethod
|
21
|
+
def parse_spec(cls) -> str:
|
22
|
+
pass
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
@abstractmethod
|
26
|
+
def start_token(cls) -> str:
|
27
|
+
pass
|
28
|
+
|
29
|
+
@classmethod
|
30
|
+
@abstractmethod
|
31
|
+
def end_token(cls) -> str:
|
32
|
+
pass
|
33
|
+
|
34
|
+
@classmethod
|
35
|
+
def parse(cls: Type[T], text: str) -> T:
|
36
|
+
content = text.strip()[len(cls.start_token()):-len(cls.end_token())].strip()
|
37
|
+
result = str_parse(cls.parse_spec(), content, dict(multiline=_match_multiline), case_sensitive=False)
|
38
|
+
if result is None:
|
39
|
+
raise ValueError(f"Failed to parse the input string using spec: {cls.parse_spec()}\nInput: {content}")
|
40
|
+
return cls(**result.named)
|
41
|
+
|
42
|
+
def generate(self) -> str:
|
43
|
+
env = Environment(loader=BaseLoader(), trim_blocks=True, lstrip_blocks=True)
|
44
|
+
template = env.from_string(self.format_spec())
|
45
|
+
content = template.render(**self.dict())
|
46
|
+
return f"{self.start_token()}\n{content.strip()}\n{self.end_token()}"
|
47
|
+
|
48
|
+
class CodeFileModel(FormattingModel):
|
49
|
+
language: str
|
50
|
+
file_path: str
|
51
|
+
code: str
|
52
|
+
|
53
|
+
@classmethod
|
54
|
+
def format_spec(cls) -> str:
|
55
|
+
return (
|
56
|
+
"code_file_model\n"
|
57
|
+
"file_path: {{- file_path -}}\n"
|
58
|
+
"```{{- language -}}\n"
|
59
|
+
"{{ code }}"
|
60
|
+
"```"
|
61
|
+
)
|
62
|
+
|
63
|
+
@classmethod
|
64
|
+
def parse_spec(cls) -> str:
|
65
|
+
return (
|
66
|
+
"code_file_model\n"
|
67
|
+
"file_path:{:s}\n"
|
68
|
+
"```{:s}\n"
|
69
|
+
"{:multiline}"
|
70
|
+
"```"
|
71
|
+
)
|
72
|
+
|
73
|
+
@classmethod
|
74
|
+
def start_token(cls) -> str:
|
75
|
+
return "<format>"
|
76
|
+
|
77
|
+
@classmethod
|
78
|
+
def end_token(cls) -> str:
|
79
|
+
return "</format>"
|
80
|
+
|
81
|
+
# Test code
|
82
|
+
if __name__ == "__main__":
|
83
|
+
# Test with extra whitespace
|
84
|
+
test_string = """
|
85
|
+
<format>
|
86
|
+
code_file_model
|
87
|
+
file_path: src/main.py
|
88
|
+
``` python
|
89
|
+
def hello():
|
90
|
+
print("Hello, World!")
|
91
|
+
|
92
|
+
```
|
93
|
+
</format>
|
94
|
+
"""
|
95
|
+
|
96
|
+
parsed = CodeFileModel.parse(test_string)
|
97
|
+
print("Parsed model:")
|
98
|
+
print(parsed)
|
99
|
+
|
100
|
+
generated = parsed.generate()
|
101
|
+
print("\nGenerated string:")
|
102
|
+
print(generated)
|
103
|
+
|
104
|
+
reparsed = CodeFileModel.parse(generated)
|
105
|
+
print("\nReparsed model:")
|
106
|
+
print(reparsed)
|
107
|
+
|
108
|
+
print("\nRound trip test:")
|
109
|
+
assert parsed == reparsed, "Round trip test failed"
|
110
|
+
print("Passed!")
|
111
|
+
|
112
|
+
# Test with different values and whitespace
|
113
|
+
another_test = """
|
114
|
+
<format>
|
115
|
+
code_file_model
|
116
|
+
file_path:src/app.js
|
117
|
+
``` javascript
|
118
|
+
function greet(name) {
|
119
|
+
console.log(`Hello, ${name}!`);
|
120
|
+
}
|
121
|
+
```
|
122
|
+
</format>
|
123
|
+
"""
|
124
|
+
|
125
|
+
another_parsed = CodeFileModel.parse(another_test)
|
126
|
+
print("\nAnother parsed model:")
|
127
|
+
print(another_parsed)
|
128
|
+
|
129
|
+
another_generated = another_parsed.generate()
|
130
|
+
print("\nAnother generated string:")
|
131
|
+
print(another_generated)
|
132
|
+
|
133
|
+
print("\nAnother round trip test:")
|
134
|
+
assert another_parsed == CodeFileModel.parse(another_generated), "Another round trip test failed"
|
135
|
+
print("Passed!")
|
File without changes
|
@@ -0,0 +1,168 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
|
3
|
+
from lark import Lark, Transformer, v_args
|
4
|
+
|
5
|
+
from langroid.pydantic_v1 import BaseModel
|
6
|
+
|
7
|
+
|
8
|
+
class FormattingModel(BaseModel, ABC):
|
9
|
+
@classmethod
|
10
|
+
@abstractmethod
|
11
|
+
def format_spec(cls) -> str:
|
12
|
+
pass
|
13
|
+
|
14
|
+
@classmethod
|
15
|
+
@abstractmethod
|
16
|
+
def parse_spec(cls) -> str:
|
17
|
+
pass
|
18
|
+
|
19
|
+
@classmethod
|
20
|
+
@abstractmethod
|
21
|
+
def start_token(cls) -> str:
|
22
|
+
pass
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
@abstractmethod
|
26
|
+
def end_token(cls) -> str:
|
27
|
+
pass
|
28
|
+
|
29
|
+
@classmethod
|
30
|
+
def format(cls, instance: "FormattingModel") -> str:
|
31
|
+
spec = cls.format_spec()
|
32
|
+
formatted = spec.format(**instance.dict())
|
33
|
+
return f"{cls.start_token()}\n{formatted}\n{cls.end_token()}"
|
34
|
+
|
35
|
+
@classmethod
|
36
|
+
def parse(cls, formatted_string: str) -> "FormattingModel":
|
37
|
+
lines = formatted_string.strip().split("\n")
|
38
|
+
if lines[0] != cls.start_token() or lines[-1] != cls.end_token():
|
39
|
+
raise ValueError("Invalid start or end token")
|
40
|
+
content = "\n".join(lines[1:-1])
|
41
|
+
|
42
|
+
parser = Lark(cls.parse_spec(), start="start", parser="lalr")
|
43
|
+
|
44
|
+
@v_args(inline=True)
|
45
|
+
class TreeToDict(Transformer):
|
46
|
+
def start(self, _, file_path, code_block):
|
47
|
+
return {
|
48
|
+
"file_path": file_path,
|
49
|
+
"language": code_block.children[1],
|
50
|
+
"code": code_block.children[3],
|
51
|
+
}
|
52
|
+
|
53
|
+
def file_path(self, path):
|
54
|
+
return path.value
|
55
|
+
|
56
|
+
def language(self, lang):
|
57
|
+
return lang.value
|
58
|
+
|
59
|
+
def code(self, code):
|
60
|
+
return code.value.strip()
|
61
|
+
|
62
|
+
tree = parser.parse(content)
|
63
|
+
data = TreeToDict().transform(tree)
|
64
|
+
return cls(**data)
|
65
|
+
|
66
|
+
|
67
|
+
class CodeFileModel(FormattingModel):
|
68
|
+
language: str
|
69
|
+
file_path: str
|
70
|
+
code: str
|
71
|
+
|
72
|
+
@classmethod
|
73
|
+
def format_spec(cls):
|
74
|
+
return "code_file_model\n{file_path}\n```{language}\n{code}\n```"
|
75
|
+
|
76
|
+
@classmethod
|
77
|
+
def parse_spec(cls):
|
78
|
+
return """
|
79
|
+
start: "code_file_model" NEWLINE file_path NEWLINE code_block
|
80
|
+
file_path: /[^\\n]+/
|
81
|
+
code_block: "```" language NEWLINE code "```"
|
82
|
+
language: /[^\\n]+/
|
83
|
+
code: /.+?(?=\\n```)/s
|
84
|
+
NEWLINE: "\\n"
|
85
|
+
%import common.WS
|
86
|
+
%ignore WS
|
87
|
+
"""
|
88
|
+
|
89
|
+
@classmethod
|
90
|
+
def start_token(cls):
|
91
|
+
return "<format>"
|
92
|
+
|
93
|
+
@classmethod
|
94
|
+
def end_token(cls):
|
95
|
+
return "</format>"
|
96
|
+
|
97
|
+
|
98
|
+
# Test cases
|
99
|
+
if __name__ == "__main__":
|
100
|
+
# Test formatting
|
101
|
+
code_file = CodeFileModel(
|
102
|
+
language="Python",
|
103
|
+
file_path="src/main.py",
|
104
|
+
code="def hello():\n print('Hello, World!')",
|
105
|
+
)
|
106
|
+
formatted = CodeFileModel.format(code_file)
|
107
|
+
expected_format = """<format>
|
108
|
+
code_file_model
|
109
|
+
src/main.py
|
110
|
+
```Python
|
111
|
+
def hello():
|
112
|
+
print('Hello, World!')
|
113
|
+
```
|
114
|
+
</format>"""
|
115
|
+
assert (
|
116
|
+
formatted == expected_format
|
117
|
+
), f"Formatting failed. Expected:\n{expected_format}\nGot:\n{formatted}"
|
118
|
+
print("Formatting test passed.")
|
119
|
+
|
120
|
+
# Test parsing
|
121
|
+
parsed = CodeFileModel.parse(formatted)
|
122
|
+
assert (
|
123
|
+
parsed == code_file
|
124
|
+
), f"Parsing failed. Expected:\n{code_file}\nGot:\n{parsed}"
|
125
|
+
print("Parsing test passed.")
|
126
|
+
|
127
|
+
# Test round-trip
|
128
|
+
round_trip = CodeFileModel.parse(CodeFileModel.format(code_file))
|
129
|
+
assert (
|
130
|
+
round_trip == code_file
|
131
|
+
), f"Round-trip failed. Expected:\n{code_file}\nGot:\n{round_trip}"
|
132
|
+
print("Round-trip test passed.")
|
133
|
+
|
134
|
+
# Test with different values
|
135
|
+
code_file2 = CodeFileModel(
|
136
|
+
language="JavaScript",
|
137
|
+
file_path="src/app.js",
|
138
|
+
code="function greet() {\n console.log('Hello, World!');\n}",
|
139
|
+
)
|
140
|
+
formatted2 = CodeFileModel.format(code_file2)
|
141
|
+
parsed2 = CodeFileModel.parse(formatted2)
|
142
|
+
assert (
|
143
|
+
parsed2 == code_file2
|
144
|
+
), f"Parsing failed for different values. Expected:\n{code_file2}\nGot:\n{parsed2}"
|
145
|
+
print("Different values test passed.")
|
146
|
+
|
147
|
+
# Test tolerant parsing
|
148
|
+
tolerant_input = """<format>
|
149
|
+
code_file_model
|
150
|
+
src/main.py
|
151
|
+
|
152
|
+
``` Python
|
153
|
+
def hello():
|
154
|
+
print('Hello, World!')
|
155
|
+
```
|
156
|
+
</format>"""
|
157
|
+
parsed_tolerant = CodeFileModel.parse(tolerant_input)
|
158
|
+
expected_tolerant = CodeFileModel(
|
159
|
+
language="Python",
|
160
|
+
file_path="src/main.py",
|
161
|
+
code="def hello():\n print('Hello, World!')",
|
162
|
+
)
|
163
|
+
assert (
|
164
|
+
parsed_tolerant == expected_tolerant
|
165
|
+
), f"Tolerant parsing failed. Expected:\n{expected_tolerant}\nGot:\n{parsed_tolerant}"
|
166
|
+
print("Tolerant parsing test passed.")
|
167
|
+
|
168
|
+
print("All tests passed successfully!")
|