langroid 0.16.5__py3-none-any.whl → 0.16.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/md_tool_message_grammar.py +455 -0
- langroid/agent/tools/code_file_tool_parse.py +150 -0
- langroid/agent/tools/code_file_tool_pyparsing.py +194 -0
- langroid/agent/tools/code_file_tool_pyparsing2.py +199 -0
- langroid/agent/tools/formatted_model_custom.py +150 -0
- langroid/agent/tools/formatted_model_custom2.py +168 -0
- langroid/agent/tools/formatted_model_custom3.py +279 -0
- langroid/agent/tools/formatted_model_custom4.py +395 -0
- langroid/agent/tools/formatted_model_jinja.py +133 -0
- langroid/agent/tools/formatted_model_jinja.py-e +122 -0
- langroid/agent/tools/formatted_model_jinja2.py +145 -0
- langroid/agent/tools/formatted_model_jinja2.py-e +135 -0
- langroid/agent/tools/formatted_model_lark.py +0 -0
- langroid/agent/tools/formatted_model_lark2.py +168 -0
- langroid/agent/tools/formatted_model_parse.py +105 -0
- langroid/agent/tools/formatted_model_parse.py-e +98 -0
- langroid/agent/tools/formatted_model_parse2.py +113 -0
- langroid/agent/tools/formatted_model_parse2.py-e +109 -0
- langroid/agent/tools/formatted_model_parse3.py +114 -0
- langroid/agent/tools/formatted_model_parse3.py-e +110 -0
- langroid/agent/tools/formatted_model_parsimon.py +194 -0
- langroid/agent/tools/formatted_model_parsimon.py-e +186 -0
- langroid/agent/tools/formatted_model_pyparsing.py +169 -0
- langroid/agent/tools/formatted_model_pyparsing.py-e +149 -0
- langroid/agent/tools/formatted_model_pyparsing2.py +159 -0
- langroid/agent/tools/formatted_model_pyparsing2.py-e +143 -0
- langroid/agent/tools/formatted_model_pyparsing3.py +133 -0
- langroid/agent/tools/formatted_model_pyparsing3.py-e +121 -0
- langroid/agent/tools/formatted_model_pyparsing4.py +213 -0
- langroid/agent/tools/formatted_model_pyparsing4.py-e +176 -0
- langroid/agent/tools/formatted_model_pyparsing5.py +173 -0
- langroid/agent/tools/formatted_model_pyparsing5.py-e +142 -0
- langroid/agent/tools/formatted_model_regex.py +246 -0
- langroid/agent/tools/formatted_model_regex.py-e +248 -0
- langroid/agent/tools/formatted_model_regex2.py +250 -0
- langroid/agent/tools/formatted_model_regex2.py-e +253 -0
- langroid/agent/tools/formatted_model_tatsu.py +172 -0
- langroid/agent/tools/formatted_model_tatsu.py-e +160 -0
- langroid/agent/tools/formatted_model_template.py +217 -0
- langroid/agent/tools/formatted_model_template.py-e +200 -0
- langroid/agent/tools/formatted_model_xml.py +178 -0
- langroid/agent/tools/formatted_model_xml2.py +178 -0
- langroid/agent/tools/formatted_model_xml3.py +132 -0
- langroid/agent/tools/formatted_model_xml4.py +130 -0
- langroid/agent/tools/formatted_model_xml5.py +130 -0
- langroid/agent/tools/formatted_model_xml6.py +113 -0
- langroid/agent/tools/formatted_model_xml7.py +117 -0
- langroid/agent/tools/formatted_model_xml8.py +164 -0
- langroid/agent/tools/generic_tool.py +165 -0
- langroid/agent/tools/generic_tool_tatsu.py +275 -0
- langroid/agent/tools/grammar_based_model.py +132 -0
- langroid/agent/tools/grammar_based_model.py-e +128 -0
- langroid/agent/tools/grammar_based_model_lark.py +156 -0
- langroid/agent/tools/grammar_based_model_lark.py-e +153 -0
- langroid/agent/tools/grammar_based_model_parse.py +86 -0
- langroid/agent/tools/grammar_based_model_parse.py-e +80 -0
- langroid/agent/tools/grammar_based_model_parsimonious.py +129 -0
- langroid/agent/tools/grammar_based_model_parsimonious.py-e +120 -0
- langroid/agent/tools/grammar_based_model_pyparsing.py +105 -0
- langroid/agent/tools/grammar_based_model_pyparsing.py-e +103 -0
- langroid/agent/tools/grammar_based_model_regex.py +139 -0
- langroid/agent/tools/grammar_based_model_regex.py-e +130 -0
- langroid/agent/tools/grammar_based_model_regex2.py +124 -0
- langroid/agent/tools/grammar_based_model_regex2.py-e +116 -0
- langroid/agent/tools/grammar_based_model_tatsu.py +80 -0
- langroid/agent/tools/grammar_based_model_tatsu.py-e +77 -0
- langroid/agent/tools/lark_earley_example.py +135 -0
- langroid/agent/tools/lark_earley_example.py-e +117 -0
- langroid/agent/tools/lark_example.py +72 -0
- langroid/agent/tools/parse_example.py +76 -0
- langroid/agent/tools/parse_example2.py +87 -0
- langroid/agent/tools/parse_example3.py +42 -0
- langroid/agent/tools/parse_test.py +791 -0
- langroid/agent/xml_tool_message.py +106 -0
- langroid/language_models/openai_gpt.py +6 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/METADATA +1 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/RECORD +80 -6
- pyproject.toml +1 -1
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/LICENSE +0 -0
- {langroid-0.16.5.dist-info → langroid-0.16.7.dist-info}/WHEEL +0 -0
@@ -0,0 +1,105 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import ClassVar
|
3
|
+
|
4
|
+
from parse import parse
|
5
|
+
|
6
|
+
from langroid.pydantic_v1 import BaseModel
|
7
|
+
|
8
|
+
|
9
|
+
class FormattingModel(BaseModel, ABC):
|
10
|
+
class Config:
|
11
|
+
arbitrary_types_allowed = True
|
12
|
+
|
13
|
+
START_TOKEN: ClassVar[str] = "<format>"
|
14
|
+
END_TOKEN: ClassVar[str] = "</format>"
|
15
|
+
|
16
|
+
@classmethod
|
17
|
+
@abstractmethod
|
18
|
+
def format_spec(cls) -> str:
|
19
|
+
pass
|
20
|
+
|
21
|
+
@classmethod
|
22
|
+
@abstractmethod
|
23
|
+
def parse_spec(cls) -> str:
|
24
|
+
pass
|
25
|
+
|
26
|
+
@classmethod
|
27
|
+
def format(cls, instance: "FormattingModel") -> str:
|
28
|
+
template = f"{cls.START_TOKEN}\n{{content}}\n{cls.END_TOKEN}"
|
29
|
+
spec_template = cls.format_spec()
|
30
|
+
formatted_content = spec_template.format(**instance.dict())
|
31
|
+
return template.format(content=formatted_content)
|
32
|
+
|
33
|
+
@classmethod
|
34
|
+
def parse(cls, formatted_string: str) -> "FormattingModel":
|
35
|
+
outer_template = f"{cls.START_TOKEN}\n{{content}}\n{cls.END_TOKEN}"
|
36
|
+
outer_parsed = parse(outer_template, formatted_string, case_sensitive=False)
|
37
|
+
if not outer_parsed:
|
38
|
+
raise ValueError("Invalid outer format")
|
39
|
+
|
40
|
+
content = outer_parsed["content"]
|
41
|
+
parse_template = cls.parse_spec()
|
42
|
+
parsed = parse(parse_template, content, case_sensitive=False)
|
43
|
+
if not parsed:
|
44
|
+
raise ValueError(
|
45
|
+
f"Failed to parse content:\n{content}\nusing spec:\n{parse_template}"
|
46
|
+
)
|
47
|
+
|
48
|
+
return cls(**parsed.named)
|
49
|
+
|
50
|
+
|
51
|
+
class PersonModel(FormattingModel):
|
52
|
+
name: str
|
53
|
+
age: int
|
54
|
+
city: str
|
55
|
+
|
56
|
+
START_TOKEN: ClassVar[str] = "<person>"
|
57
|
+
END_TOKEN: ClassVar[str] = "</person>"
|
58
|
+
|
59
|
+
@classmethod
|
60
|
+
def format_spec(cls):
|
61
|
+
return "name: {name}\n{age} is the age\nlives in {city}"
|
62
|
+
|
63
|
+
@classmethod
|
64
|
+
def parse_spec(cls):
|
65
|
+
return "name: {name:S}\n{age:d} is the age\nlives in {city:S}"
|
66
|
+
|
67
|
+
|
68
|
+
# Tests
|
69
|
+
if __name__ == "__main__":
|
70
|
+
# Test instance
|
71
|
+
person = PersonModel(name="John Doe", age=30, city="New York")
|
72
|
+
|
73
|
+
# Test formatting
|
74
|
+
formatted_string = PersonModel.format(person)
|
75
|
+
print("Formatted string:")
|
76
|
+
print(formatted_string)
|
77
|
+
assert formatted_string == (
|
78
|
+
"<person>\n"
|
79
|
+
"name: John Doe\n"
|
80
|
+
"30 is the age\n"
|
81
|
+
"lives in New York\n"
|
82
|
+
"</person>"
|
83
|
+
)
|
84
|
+
|
85
|
+
# Test parsing
|
86
|
+
parsed_person = PersonModel.parse(formatted_string)
|
87
|
+
print("\nParsed person:", parsed_person)
|
88
|
+
assert parsed_person == person
|
89
|
+
|
90
|
+
# Test round trip
|
91
|
+
round_trip_person = PersonModel.parse(PersonModel.format(person))
|
92
|
+
assert round_trip_person == person
|
93
|
+
|
94
|
+
# Test parsing with extra whitespace and different casing
|
95
|
+
extra_whitespace_string = """
|
96
|
+
<PERSON>
|
97
|
+
Name: John Doe
|
98
|
+
30 IS THE AGE
|
99
|
+
Lives in New York
|
100
|
+
</person>
|
101
|
+
"""
|
102
|
+
parsed_extra_whitespace = PersonModel.parse(extra_whitespace_string)
|
103
|
+
assert parsed_extra_whitespace == person
|
104
|
+
|
105
|
+
print("All tests passed!")
|
@@ -0,0 +1,98 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import ClassVar
|
3
|
+
from pydantic import BaseModel
|
4
|
+
from parse import parse
|
5
|
+
|
6
|
+
class FormattingModel(BaseModel, ABC):
|
7
|
+
class Config:
|
8
|
+
arbitrary_types_allowed = True
|
9
|
+
|
10
|
+
START_TOKEN: ClassVar[str] = "<format>"
|
11
|
+
END_TOKEN: ClassVar[str] = "</format>"
|
12
|
+
|
13
|
+
@classmethod
|
14
|
+
@abstractmethod
|
15
|
+
def format_spec(cls) -> str:
|
16
|
+
pass
|
17
|
+
|
18
|
+
@classmethod
|
19
|
+
@abstractmethod
|
20
|
+
def parse_spec(cls) -> str:
|
21
|
+
pass
|
22
|
+
|
23
|
+
@classmethod
|
24
|
+
def format(cls, instance: 'FormattingModel') -> str:
|
25
|
+
template = f"{cls.START_TOKEN}\n{{content}}\n{cls.END_TOKEN}"
|
26
|
+
spec_template = cls.format_spec()
|
27
|
+
formatted_content = spec_template.format(**instance.dict())
|
28
|
+
return template.format(content=formatted_content)
|
29
|
+
|
30
|
+
@classmethod
|
31
|
+
def parse(cls, formatted_string: str) -> 'FormattingModel':
|
32
|
+
outer_template = f"{cls.START_TOKEN}\n{{content}}\n{cls.END_TOKEN}"
|
33
|
+
outer_parsed = parse(outer_template, formatted_string, case_sensitive=False)
|
34
|
+
if not outer_parsed:
|
35
|
+
raise ValueError("Invalid outer format")
|
36
|
+
|
37
|
+
content = outer_parsed['content']
|
38
|
+
parse_template = cls.parse_spec()
|
39
|
+
parsed = parse(parse_template, content, case_sensitive=False)
|
40
|
+
if not parsed:
|
41
|
+
raise ValueError(f"Failed to parse content:\n{content}\nusing spec:\n{parse_template}")
|
42
|
+
|
43
|
+
return cls(**parsed.named)
|
44
|
+
|
45
|
+
class PersonModel(FormattingModel):
|
46
|
+
name: str
|
47
|
+
age: int
|
48
|
+
city: str
|
49
|
+
|
50
|
+
START_TOKEN: ClassVar[str] = "<person>"
|
51
|
+
END_TOKEN: ClassVar[str] = "</person>"
|
52
|
+
|
53
|
+
@classmethod
|
54
|
+
def format_spec(cls):
|
55
|
+
return "name: {name}\n{age} is the age\nlives in {city}"
|
56
|
+
|
57
|
+
@classmethod
|
58
|
+
def parse_spec(cls):
|
59
|
+
return "name: {name:S}\n{age:d} is the age\nlives in {city:S}"
|
60
|
+
|
61
|
+
# Tests
|
62
|
+
if __name__ == "__main__":
|
63
|
+
# Test instance
|
64
|
+
person = PersonModel(name="John Doe", age=30, city="New York")
|
65
|
+
|
66
|
+
# Test formatting
|
67
|
+
formatted_string = PersonModel.format(person)
|
68
|
+
print("Formatted string:")
|
69
|
+
print(formatted_string)
|
70
|
+
assert formatted_string == (
|
71
|
+
"<person>\n"
|
72
|
+
"name: John Doe\n"
|
73
|
+
"30 is the age\n"
|
74
|
+
"lives in New York\n"
|
75
|
+
"</person>"
|
76
|
+
)
|
77
|
+
|
78
|
+
# Test parsing
|
79
|
+
parsed_person = PersonModel.parse(formatted_string)
|
80
|
+
print("\nParsed person:", parsed_person)
|
81
|
+
assert parsed_person == person
|
82
|
+
|
83
|
+
# Test round trip
|
84
|
+
round_trip_person = PersonModel.parse(PersonModel.format(person))
|
85
|
+
assert round_trip_person == person
|
86
|
+
|
87
|
+
# Test parsing with extra whitespace and different casing
|
88
|
+
extra_whitespace_string = """
|
89
|
+
<PERSON>
|
90
|
+
Name: John Doe
|
91
|
+
30 IS THE AGE
|
92
|
+
Lives in New York
|
93
|
+
</person>
|
94
|
+
"""
|
95
|
+
parsed_extra_whitespace = PersonModel.parse(extra_whitespace_string)
|
96
|
+
assert parsed_extra_whitespace == person
|
97
|
+
|
98
|
+
print("All tests passed!")
|
@@ -0,0 +1,113 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import ClassVar, Dict
|
3
|
+
|
4
|
+
import parse
|
5
|
+
|
6
|
+
from langroid.pydantic_v1 import BaseModel
|
7
|
+
|
8
|
+
|
9
|
+
class FormattingModel(BaseModel, ABC):
|
10
|
+
start_token: ClassVar[str] = "<format>"
|
11
|
+
end_token: ClassVar[str] = "</format>"
|
12
|
+
field_token_map: ClassVar[Dict[str, str]] = {}
|
13
|
+
|
14
|
+
@classmethod
|
15
|
+
@abstractmethod
|
16
|
+
def format_spec(cls) -> str:
|
17
|
+
pass
|
18
|
+
|
19
|
+
@classmethod
|
20
|
+
@abstractmethod
|
21
|
+
def parse_spec(cls) -> str:
|
22
|
+
pass
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
def parse(cls, formatted_string: str) -> "FormattingModel":
|
26
|
+
parser = parse.compile(cls.parse_spec())
|
27
|
+
result = parser.parse(formatted_string)
|
28
|
+
if result is None:
|
29
|
+
raise ValueError(f"Unable to parse: {formatted_string}")
|
30
|
+
return cls(**result.named)
|
31
|
+
|
32
|
+
def format(self) -> str:
|
33
|
+
format_string = self.format_spec()
|
34
|
+
field_values = {k: getattr(self, k) for k in self.__annotations__}
|
35
|
+
return format_string.format(**field_values)
|
36
|
+
|
37
|
+
|
38
|
+
class CodeFileModel(FormattingModel):
|
39
|
+
language: str
|
40
|
+
file_path: str
|
41
|
+
code: str
|
42
|
+
|
43
|
+
start_token: ClassVar[str] = "<format>"
|
44
|
+
end_token: ClassVar[str] = "</format>"
|
45
|
+
|
46
|
+
@classmethod
|
47
|
+
def format_spec(cls) -> str:
|
48
|
+
return (
|
49
|
+
f"{cls.start_token}\n"
|
50
|
+
"code_file_model\n"
|
51
|
+
"{file_path}\n"
|
52
|
+
"```{language}\n"
|
53
|
+
"{code}\n"
|
54
|
+
"```\n"
|
55
|
+
f"{cls.end_token}"
|
56
|
+
)
|
57
|
+
|
58
|
+
@classmethod
|
59
|
+
def parse_spec(cls) -> str:
|
60
|
+
return (
|
61
|
+
f"{cls.start_token}\n"
|
62
|
+
"code_file_model\n"
|
63
|
+
"{file_path}\n"
|
64
|
+
"```{language}\n"
|
65
|
+
"{code}\n"
|
66
|
+
"```\n"
|
67
|
+
f"{cls.end_token}"
|
68
|
+
)
|
69
|
+
|
70
|
+
|
71
|
+
# Informal tests
|
72
|
+
if __name__ == "__main__":
|
73
|
+
# Test CodeFileModel
|
74
|
+
my_model = CodeFileModel(
|
75
|
+
language="python",
|
76
|
+
file_path="src/main.py",
|
77
|
+
code='def hello():\n print("hello world")',
|
78
|
+
)
|
79
|
+
|
80
|
+
formatted = my_model.format()
|
81
|
+
print("Formatted:")
|
82
|
+
print(formatted)
|
83
|
+
|
84
|
+
parsed = CodeFileModel.parse(formatted)
|
85
|
+
print("\nParsed:")
|
86
|
+
print(parsed)
|
87
|
+
|
88
|
+
assert my_model == parsed, "Round trip failed"
|
89
|
+
|
90
|
+
# Test more lenient parsing
|
91
|
+
lenient_formatted = """<format>
|
92
|
+
code_file_model
|
93
|
+
src/test.py
|
94
|
+
```javascript
|
95
|
+
function test() {
|
96
|
+
console.log("Hello, world!");
|
97
|
+
}
|
98
|
+
```
|
99
|
+
</format>"""
|
100
|
+
|
101
|
+
parsed_lenient = CodeFileModel.parse(lenient_formatted)
|
102
|
+
print("\nParsed lenient:")
|
103
|
+
print(parsed_lenient)
|
104
|
+
|
105
|
+
# Test invalid format
|
106
|
+
try:
|
107
|
+
CodeFileModel.parse("Invalid format")
|
108
|
+
except ValueError as e:
|
109
|
+
print(f"\nCaught expected ValueError: {e}")
|
110
|
+
else:
|
111
|
+
assert False, "Expected ValueError was not raised"
|
112
|
+
|
113
|
+
print("\nAll tests passed!")
|
@@ -0,0 +1,109 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import Dict, ClassVar
|
3
|
+
from pydantic import BaseModel
|
4
|
+
import parse
|
5
|
+
|
6
|
+
class FormattingModel(BaseModel, ABC):
|
7
|
+
start_token: ClassVar[str] = "<format>"
|
8
|
+
end_token: ClassVar[str] = "</format>"
|
9
|
+
field_token_map: ClassVar[Dict[str, str]] = {}
|
10
|
+
|
11
|
+
@classmethod
|
12
|
+
@abstractmethod
|
13
|
+
def format_spec(cls) -> str:
|
14
|
+
pass
|
15
|
+
|
16
|
+
@classmethod
|
17
|
+
@abstractmethod
|
18
|
+
def parse_spec(cls) -> str:
|
19
|
+
pass
|
20
|
+
|
21
|
+
@classmethod
|
22
|
+
def parse(cls, formatted_string: str) -> 'FormattingModel':
|
23
|
+
parser = parse.compile(cls.parse_spec())
|
24
|
+
result = parser.parse(formatted_string)
|
25
|
+
if result is None:
|
26
|
+
raise ValueError(f"Unable to parse: {formatted_string}")
|
27
|
+
return cls(**result.named)
|
28
|
+
|
29
|
+
def format(self) -> str:
|
30
|
+
format_string = self.format_spec()
|
31
|
+
field_values = {k: getattr(self, k) for k in self.__annotations__}
|
32
|
+
return format_string.format(**field_values)
|
33
|
+
|
34
|
+
class CodeFileModel(FormattingModel):
|
35
|
+
language: str
|
36
|
+
file_path: str
|
37
|
+
code: str
|
38
|
+
|
39
|
+
start_token: ClassVar[str] = "<format>"
|
40
|
+
end_token: ClassVar[str] = "</format>"
|
41
|
+
|
42
|
+
@classmethod
|
43
|
+
def format_spec(cls) -> str:
|
44
|
+
return (
|
45
|
+
f"{cls.start_token}\n"
|
46
|
+
"code_file_model\n"
|
47
|
+
"{file_path}\n"
|
48
|
+
"```{language}\n"
|
49
|
+
"{code}\n"
|
50
|
+
"```\n"
|
51
|
+
f"{cls.end_token}"
|
52
|
+
)
|
53
|
+
|
54
|
+
@classmethod
|
55
|
+
def parse_spec(cls) -> str:
|
56
|
+
return (
|
57
|
+
f"{cls.start_token}\n"
|
58
|
+
"code_file_model\n"
|
59
|
+
"{file_path}\n"
|
60
|
+
"```{language}\n"
|
61
|
+
"{code}\n"
|
62
|
+
"```\n"
|
63
|
+
f"{cls.end_token}"
|
64
|
+
)
|
65
|
+
|
66
|
+
# Informal tests
|
67
|
+
if __name__ == "__main__":
|
68
|
+
# Test CodeFileModel
|
69
|
+
my_model = CodeFileModel(
|
70
|
+
language="python",
|
71
|
+
file_path="src/main.py",
|
72
|
+
code="def hello():\n print(\"hello world\")"
|
73
|
+
)
|
74
|
+
|
75
|
+
formatted = my_model.format()
|
76
|
+
print("Formatted:")
|
77
|
+
print(formatted)
|
78
|
+
|
79
|
+
parsed = CodeFileModel.parse(formatted)
|
80
|
+
print("\nParsed:")
|
81
|
+
print(parsed)
|
82
|
+
|
83
|
+
assert my_model == parsed, "Round trip failed"
|
84
|
+
|
85
|
+
# Test more lenient parsing
|
86
|
+
lenient_formatted = """<format>
|
87
|
+
code_file_model
|
88
|
+
src/test.py
|
89
|
+
```javascript
|
90
|
+
function test() {
|
91
|
+
console.log("Hello, world!");
|
92
|
+
}
|
93
|
+
```
|
94
|
+
</format>"""
|
95
|
+
|
96
|
+
parsed_lenient = CodeFileModel.parse(lenient_formatted)
|
97
|
+
print("\nParsed lenient:")
|
98
|
+
print(parsed_lenient)
|
99
|
+
|
100
|
+
# Test invalid format
|
101
|
+
try:
|
102
|
+
CodeFileModel.parse("Invalid format")
|
103
|
+
except ValueError as e:
|
104
|
+
print(f"\nCaught expected ValueError: {e}")
|
105
|
+
else:
|
106
|
+
assert False, "Expected ValueError was not raised"
|
107
|
+
|
108
|
+
print("\nAll tests passed!")
|
109
|
+
|
@@ -0,0 +1,114 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import ClassVar, Dict
|
3
|
+
|
4
|
+
import parse
|
5
|
+
|
6
|
+
from langroid.pydantic_v1 import BaseModel
|
7
|
+
|
8
|
+
|
9
|
+
class FormattingModel(BaseModel, ABC):
|
10
|
+
start_token: ClassVar[str] = "<format>"
|
11
|
+
end_token: ClassVar[str] = "</format>"
|
12
|
+
field_token_map: ClassVar[Dict[str, str]] = {}
|
13
|
+
|
14
|
+
@classmethod
|
15
|
+
@abstractmethod
|
16
|
+
def format_spec(cls) -> str:
|
17
|
+
pass
|
18
|
+
|
19
|
+
@classmethod
|
20
|
+
@abstractmethod
|
21
|
+
def parse_spec(cls) -> str:
|
22
|
+
pass
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
def parse(cls, formatted_string: str) -> "FormattingModel":
|
26
|
+
parser = parse.compile(cls.parse_spec())
|
27
|
+
result = parser.parse(formatted_string)
|
28
|
+
if result is None:
|
29
|
+
raise ValueError(f"Unable to parse: {formatted_string}")
|
30
|
+
return cls(**result.named)
|
31
|
+
|
32
|
+
def format(self) -> str:
|
33
|
+
format_string = self.format_spec()
|
34
|
+
field_values = {k: getattr(self, k) for k in self.__annotations__}
|
35
|
+
return format_string.format(**field_values)
|
36
|
+
|
37
|
+
|
38
|
+
class CodeFileModel(FormattingModel):
|
39
|
+
language: str
|
40
|
+
file_path: str
|
41
|
+
code: str
|
42
|
+
|
43
|
+
start_token: ClassVar[str] = "<format>"
|
44
|
+
end_token: ClassVar[str] = "</format>"
|
45
|
+
|
46
|
+
@classmethod
|
47
|
+
def format_spec(cls) -> str:
|
48
|
+
return (
|
49
|
+
f"{cls.start_token}\n"
|
50
|
+
"code_file_model\n"
|
51
|
+
"{file_path}\n"
|
52
|
+
"```{language}\n"
|
53
|
+
"{code}\n"
|
54
|
+
"```\n"
|
55
|
+
f"{cls.end_token}"
|
56
|
+
)
|
57
|
+
|
58
|
+
@classmethod
|
59
|
+
def parse_spec(cls) -> str:
|
60
|
+
return (
|
61
|
+
f"{cls.start_token}\n"
|
62
|
+
"code_file_model\n"
|
63
|
+
"{file_path:^}\n"
|
64
|
+
"```{language:^}\n"
|
65
|
+
"{code}\n"
|
66
|
+
"```\n"
|
67
|
+
f"{cls.end_token}"
|
68
|
+
)
|
69
|
+
|
70
|
+
|
71
|
+
# Informal tests
|
72
|
+
if __name__ == "__main__":
|
73
|
+
# Test CodeFileModel
|
74
|
+
my_model = CodeFileModel(
|
75
|
+
language="python",
|
76
|
+
file_path="src/main.py",
|
77
|
+
code='def hello():\n print("hello world")',
|
78
|
+
)
|
79
|
+
|
80
|
+
formatted = my_model.format()
|
81
|
+
print("Formatted:")
|
82
|
+
print(formatted)
|
83
|
+
|
84
|
+
parsed = CodeFileModel.parse(formatted)
|
85
|
+
print("\nParsed:")
|
86
|
+
print(parsed)
|
87
|
+
|
88
|
+
assert my_model == parsed, "Round trip failed"
|
89
|
+
|
90
|
+
# Test more lenient parsing
|
91
|
+
lenient_formatted = """<format>
|
92
|
+
code_file_model
|
93
|
+
src/test.py
|
94
|
+
|
95
|
+
``` javascript
|
96
|
+
function test() {
|
97
|
+
console.log("Hello, world!");
|
98
|
+
}
|
99
|
+
```
|
100
|
+
</format>"""
|
101
|
+
|
102
|
+
parsed_lenient = CodeFileModel.parse(lenient_formatted)
|
103
|
+
print("\nParsed lenient:")
|
104
|
+
print(parsed_lenient)
|
105
|
+
|
106
|
+
# Test invalid format
|
107
|
+
try:
|
108
|
+
CodeFileModel.parse("Invalid format")
|
109
|
+
except ValueError as e:
|
110
|
+
print(f"\nCaught expected ValueError: {e}")
|
111
|
+
else:
|
112
|
+
assert False, "Expected ValueError was not raised"
|
113
|
+
|
114
|
+
print("\nAll tests passed!")
|
@@ -0,0 +1,110 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import Dict, ClassVar
|
3
|
+
from pydantic import BaseModel
|
4
|
+
import parse
|
5
|
+
|
6
|
+
class FormattingModel(BaseModel, ABC):
|
7
|
+
start_token: ClassVar[str] = "<format>"
|
8
|
+
end_token: ClassVar[str] = "</format>"
|
9
|
+
field_token_map: ClassVar[Dict[str, str]] = {}
|
10
|
+
|
11
|
+
@classmethod
|
12
|
+
@abstractmethod
|
13
|
+
def format_spec(cls) -> str:
|
14
|
+
pass
|
15
|
+
|
16
|
+
@classmethod
|
17
|
+
@abstractmethod
|
18
|
+
def parse_spec(cls) -> str:
|
19
|
+
pass
|
20
|
+
|
21
|
+
@classmethod
|
22
|
+
def parse(cls, formatted_string: str) -> 'FormattingModel':
|
23
|
+
parser = parse.compile(cls.parse_spec())
|
24
|
+
result = parser.parse(formatted_string)
|
25
|
+
if result is None:
|
26
|
+
raise ValueError(f"Unable to parse: {formatted_string}")
|
27
|
+
return cls(**result.named)
|
28
|
+
|
29
|
+
def format(self) -> str:
|
30
|
+
format_string = self.format_spec()
|
31
|
+
field_values = {k: getattr(self, k) for k in self.__annotations__}
|
32
|
+
return format_string.format(**field_values)
|
33
|
+
|
34
|
+
class CodeFileModel(FormattingModel):
|
35
|
+
language: str
|
36
|
+
file_path: str
|
37
|
+
code: str
|
38
|
+
|
39
|
+
start_token: ClassVar[str] = "<format>"
|
40
|
+
end_token: ClassVar[str] = "</format>"
|
41
|
+
|
42
|
+
@classmethod
|
43
|
+
def format_spec(cls) -> str:
|
44
|
+
return (
|
45
|
+
f"{cls.start_token}\n"
|
46
|
+
"code_file_model\n"
|
47
|
+
"{file_path}\n"
|
48
|
+
"```{language}\n"
|
49
|
+
"{code}\n"
|
50
|
+
"```\n"
|
51
|
+
f"{cls.end_token}"
|
52
|
+
)
|
53
|
+
|
54
|
+
@classmethod
|
55
|
+
def parse_spec(cls) -> str:
|
56
|
+
return (
|
57
|
+
f"{cls.start_token}\n"
|
58
|
+
"code_file_model\n"
|
59
|
+
"{file_path:^}\n"
|
60
|
+
"```{language:^}\n"
|
61
|
+
"{code}\n"
|
62
|
+
"```\n"
|
63
|
+
f"{cls.end_token}"
|
64
|
+
)
|
65
|
+
|
66
|
+
# Informal tests
|
67
|
+
if __name__ == "__main__":
|
68
|
+
# Test CodeFileModel
|
69
|
+
my_model = CodeFileModel(
|
70
|
+
language="python",
|
71
|
+
file_path="src/main.py",
|
72
|
+
code="def hello():\n print(\"hello world\")"
|
73
|
+
)
|
74
|
+
|
75
|
+
formatted = my_model.format()
|
76
|
+
print("Formatted:")
|
77
|
+
print(formatted)
|
78
|
+
|
79
|
+
parsed = CodeFileModel.parse(formatted)
|
80
|
+
print("\nParsed:")
|
81
|
+
print(parsed)
|
82
|
+
|
83
|
+
assert my_model == parsed, "Round trip failed"
|
84
|
+
|
85
|
+
# Test more lenient parsing
|
86
|
+
lenient_formatted = """<format>
|
87
|
+
code_file_model
|
88
|
+
src/test.py
|
89
|
+
|
90
|
+
``` javascript
|
91
|
+
function test() {
|
92
|
+
console.log("Hello, world!");
|
93
|
+
}
|
94
|
+
```
|
95
|
+
</format>"""
|
96
|
+
|
97
|
+
parsed_lenient = CodeFileModel.parse(lenient_formatted)
|
98
|
+
print("\nParsed lenient:")
|
99
|
+
print(parsed_lenient)
|
100
|
+
|
101
|
+
# Test invalid format
|
102
|
+
try:
|
103
|
+
CodeFileModel.parse("Invalid format")
|
104
|
+
except ValueError as e:
|
105
|
+
print(f"\nCaught expected ValueError: {e}")
|
106
|
+
else:
|
107
|
+
assert False, "Expected ValueError was not raised"
|
108
|
+
|
109
|
+
print("\nAll tests passed!")
|
110
|
+
|