langroid 0.16.7__py3-none-any.whl → 0.17.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/base.py +45 -21
- langroid/agent/chat_agent.py +22 -14
- langroid/agent/chat_document.py +22 -13
- langroid/agent/tool_message.py +11 -11
- langroid/agent/tools/file_tools.py +234 -0
- langroid/agent/xml_tool_message.py +179 -45
- langroid/utils/constants.py +2 -0
- langroid/utils/git_utils.py +251 -0
- langroid/utils/system.py +78 -0
- {langroid-0.16.7.dist-info → langroid-0.17.1.dist-info}/METADATA +6 -3
- {langroid-0.16.7.dist-info → langroid-0.17.1.dist-info}/RECORD +14 -89
- pyproject.toml +3 -2
- langroid/agent/md_tool_message_grammar.py +0 -455
- langroid/agent/tools/code_file_tool_parse.py +0 -150
- langroid/agent/tools/code_file_tool_pyparsing.py +0 -194
- langroid/agent/tools/code_file_tool_pyparsing2.py +0 -199
- langroid/agent/tools/extract_tool.py +0 -96
- langroid/agent/tools/formatted_model_custom.py +0 -150
- langroid/agent/tools/formatted_model_custom2.py +0 -168
- langroid/agent/tools/formatted_model_custom3.py +0 -279
- langroid/agent/tools/formatted_model_custom4.py +0 -395
- langroid/agent/tools/formatted_model_jinja.py +0 -133
- langroid/agent/tools/formatted_model_jinja.py-e +0 -122
- langroid/agent/tools/formatted_model_jinja2.py +0 -145
- langroid/agent/tools/formatted_model_jinja2.py-e +0 -135
- langroid/agent/tools/formatted_model_lark.py +0 -0
- langroid/agent/tools/formatted_model_lark2.py +0 -168
- langroid/agent/tools/formatted_model_parse.py +0 -105
- langroid/agent/tools/formatted_model_parse.py-e +0 -98
- langroid/agent/tools/formatted_model_parse2.py +0 -113
- langroid/agent/tools/formatted_model_parse2.py-e +0 -109
- langroid/agent/tools/formatted_model_parse3.py +0 -114
- langroid/agent/tools/formatted_model_parse3.py-e +0 -110
- langroid/agent/tools/formatted_model_parsimon.py +0 -194
- langroid/agent/tools/formatted_model_parsimon.py-e +0 -186
- langroid/agent/tools/formatted_model_pyparsing.py +0 -169
- langroid/agent/tools/formatted_model_pyparsing.py-e +0 -149
- langroid/agent/tools/formatted_model_pyparsing2.py +0 -159
- langroid/agent/tools/formatted_model_pyparsing2.py-e +0 -143
- langroid/agent/tools/formatted_model_pyparsing3.py +0 -133
- langroid/agent/tools/formatted_model_pyparsing3.py-e +0 -121
- langroid/agent/tools/formatted_model_pyparsing4.py +0 -213
- langroid/agent/tools/formatted_model_pyparsing4.py-e +0 -176
- langroid/agent/tools/formatted_model_pyparsing5.py +0 -173
- langroid/agent/tools/formatted_model_pyparsing5.py-e +0 -142
- langroid/agent/tools/formatted_model_regex.py +0 -246
- langroid/agent/tools/formatted_model_regex.py-e +0 -248
- langroid/agent/tools/formatted_model_regex2.py +0 -250
- langroid/agent/tools/formatted_model_regex2.py-e +0 -253
- langroid/agent/tools/formatted_model_tatsu.py +0 -172
- langroid/agent/tools/formatted_model_tatsu.py-e +0 -160
- langroid/agent/tools/formatted_model_template.py +0 -217
- langroid/agent/tools/formatted_model_template.py-e +0 -200
- langroid/agent/tools/formatted_model_xml.py +0 -178
- langroid/agent/tools/formatted_model_xml2.py +0 -178
- langroid/agent/tools/formatted_model_xml3.py +0 -132
- langroid/agent/tools/formatted_model_xml4.py +0 -130
- langroid/agent/tools/formatted_model_xml5.py +0 -130
- langroid/agent/tools/formatted_model_xml6.py +0 -113
- langroid/agent/tools/formatted_model_xml7.py +0 -117
- langroid/agent/tools/formatted_model_xml8.py +0 -164
- langroid/agent/tools/generator_tool.py +0 -20
- langroid/agent/tools/generic_tool.py +0 -165
- langroid/agent/tools/generic_tool_tatsu.py +0 -275
- langroid/agent/tools/grammar_based_model.py +0 -132
- langroid/agent/tools/grammar_based_model.py-e +0 -128
- langroid/agent/tools/grammar_based_model_lark.py +0 -156
- langroid/agent/tools/grammar_based_model_lark.py-e +0 -153
- langroid/agent/tools/grammar_based_model_parse.py +0 -86
- langroid/agent/tools/grammar_based_model_parse.py-e +0 -80
- langroid/agent/tools/grammar_based_model_parsimonious.py +0 -129
- langroid/agent/tools/grammar_based_model_parsimonious.py-e +0 -120
- langroid/agent/tools/grammar_based_model_pyparsing.py +0 -105
- langroid/agent/tools/grammar_based_model_pyparsing.py-e +0 -103
- langroid/agent/tools/grammar_based_model_regex.py +0 -139
- langroid/agent/tools/grammar_based_model_regex.py-e +0 -130
- langroid/agent/tools/grammar_based_model_regex2.py +0 -124
- langroid/agent/tools/grammar_based_model_regex2.py-e +0 -116
- langroid/agent/tools/grammar_based_model_tatsu.py +0 -80
- langroid/agent/tools/grammar_based_model_tatsu.py-e +0 -77
- langroid/agent/tools/lark_earley_example.py +0 -135
- langroid/agent/tools/lark_earley_example.py-e +0 -117
- langroid/agent/tools/lark_example.py +0 -72
- langroid/agent/tools/note_tool.py +0 -0
- langroid/agent/tools/parse_example.py +0 -76
- langroid/agent/tools/parse_example2.py +0 -87
- langroid/agent/tools/parse_example3.py +0 -42
- langroid/agent/tools/parse_test.py +0 -791
- langroid/agent/tools/run_python_code.py +0 -60
- {langroid-0.16.7.dist-info → langroid-0.17.1.dist-info}/LICENSE +0 -0
- {langroid-0.16.7.dist-info → langroid-0.17.1.dist-info}/WHEEL +0 -0
@@ -1,150 +0,0 @@
|
|
1
|
-
from abc import ABC, abstractmethod
|
2
|
-
from typing import List, Tuple
|
3
|
-
|
4
|
-
from langroid.pydantic_v1 import BaseModel
|
5
|
-
|
6
|
-
|
7
|
-
class FormattingModel(BaseModel, ABC):
|
8
|
-
@classmethod
|
9
|
-
@abstractmethod
|
10
|
-
def format_spec(cls) -> str:
|
11
|
-
pass
|
12
|
-
|
13
|
-
@classmethod
|
14
|
-
@abstractmethod
|
15
|
-
def parse_spec(cls) -> List[Tuple[str, str, str]]:
|
16
|
-
pass
|
17
|
-
|
18
|
-
@classmethod
|
19
|
-
@abstractmethod
|
20
|
-
def start_token(cls) -> str:
|
21
|
-
pass
|
22
|
-
|
23
|
-
@classmethod
|
24
|
-
@abstractmethod
|
25
|
-
def end_token(cls) -> str:
|
26
|
-
pass
|
27
|
-
|
28
|
-
@classmethod
|
29
|
-
def format(cls, instance: "FormattingModel") -> str:
|
30
|
-
spec = cls.format_spec()
|
31
|
-
formatted = spec.format(**instance.dict())
|
32
|
-
return f"{cls.start_token()}\n{formatted}\n{cls.end_token()}"
|
33
|
-
|
34
|
-
@classmethod
|
35
|
-
def parse(cls, formatted_string: str) -> "FormattingModel":
|
36
|
-
lines = formatted_string.strip().split("\n")
|
37
|
-
if lines[0] != cls.start_token() or lines[-1] != cls.end_token():
|
38
|
-
raise ValueError("Invalid start or end token")
|
39
|
-
content = "\n".join(lines[1:-1])
|
40
|
-
|
41
|
-
parsed_data = {}
|
42
|
-
for field, start, end in cls.parse_spec():
|
43
|
-
start_index = content.find(start)
|
44
|
-
if start_index == -1:
|
45
|
-
raise ValueError(f"Could not find start of {field}")
|
46
|
-
end_index = content.find(end, start_index + len(start))
|
47
|
-
if end_index == -1:
|
48
|
-
raise ValueError(f"Could not find end of {field}")
|
49
|
-
value = content[start_index + len(start) : end_index].strip()
|
50
|
-
parsed_data[field] = value
|
51
|
-
|
52
|
-
return cls(**parsed_data)
|
53
|
-
|
54
|
-
|
55
|
-
class CodeFileModel(FormattingModel):
|
56
|
-
file_path: str
|
57
|
-
language: str
|
58
|
-
code: str
|
59
|
-
|
60
|
-
@classmethod
|
61
|
-
def format_spec(cls):
|
62
|
-
return "file_path: {file_path}\nlanguage: {language}\n```\n{code}\n```"
|
63
|
-
|
64
|
-
@classmethod
|
65
|
-
def parse_spec(cls):
|
66
|
-
return [
|
67
|
-
("file_path", "file_path:", "\n"),
|
68
|
-
("language", "language:", "\n"),
|
69
|
-
("code", "```\n", "\n```"),
|
70
|
-
]
|
71
|
-
|
72
|
-
@classmethod
|
73
|
-
def start_token(cls):
|
74
|
-
return "<code_file>"
|
75
|
-
|
76
|
-
@classmethod
|
77
|
-
def end_token(cls):
|
78
|
-
return "</code_file>"
|
79
|
-
|
80
|
-
|
81
|
-
# Test cases
|
82
|
-
if __name__ == "__main__":
|
83
|
-
# Test formatting
|
84
|
-
code_file = CodeFileModel(
|
85
|
-
file_path="src/main.py",
|
86
|
-
language="python",
|
87
|
-
code="def main():\n print('Hello, World!')",
|
88
|
-
)
|
89
|
-
formatted = CodeFileModel.format(code_file)
|
90
|
-
expected_format = """<code_file>
|
91
|
-
file_path: src/main.py
|
92
|
-
language: python
|
93
|
-
```
|
94
|
-
def main():
|
95
|
-
print('Hello, World!')
|
96
|
-
```
|
97
|
-
</code_file>"""
|
98
|
-
assert (
|
99
|
-
formatted == expected_format
|
100
|
-
), f"Formatting failed. Expected:\n{expected_format}\nGot:\n{formatted}"
|
101
|
-
print("Formatting test passed.")
|
102
|
-
|
103
|
-
# Test parsing
|
104
|
-
parsed = CodeFileModel.parse(formatted)
|
105
|
-
assert (
|
106
|
-
parsed == code_file
|
107
|
-
), f"Parsing failed. Expected:\n{code_file}\nGot:\n{parsed}"
|
108
|
-
print("Parsing test passed.")
|
109
|
-
|
110
|
-
# Test round-trip
|
111
|
-
round_trip = CodeFileModel.parse(CodeFileModel.format(code_file))
|
112
|
-
assert (
|
113
|
-
round_trip == code_file
|
114
|
-
), f"Round-trip failed. Expected:\n{code_file}\nGot:\n{round_trip}"
|
115
|
-
print("Round-trip test passed.")
|
116
|
-
|
117
|
-
# Test with different values
|
118
|
-
code_file2 = CodeFileModel(
|
119
|
-
file_path="src/app.js",
|
120
|
-
language="javascript",
|
121
|
-
code="function greet() {\n console.log('Hello, World!');\n}",
|
122
|
-
)
|
123
|
-
formatted2 = CodeFileModel.format(code_file2)
|
124
|
-
parsed2 = CodeFileModel.parse(formatted2)
|
125
|
-
assert (
|
126
|
-
parsed2 == code_file2
|
127
|
-
), f"Parsing failed for different values. Expected:\n{code_file2}\nGot:\n{parsed2}"
|
128
|
-
print("Different values test passed.")
|
129
|
-
|
130
|
-
# Test tolerant parsing
|
131
|
-
tolerant_input = """<code_file>
|
132
|
-
file_path: src/main.py
|
133
|
-
language: python
|
134
|
-
```
|
135
|
-
def main():
|
136
|
-
print('Hello, World!')
|
137
|
-
```
|
138
|
-
</code_file>"""
|
139
|
-
parsed_tolerant = CodeFileModel.parse(tolerant_input)
|
140
|
-
expected_tolerant = CodeFileModel(
|
141
|
-
file_path="src/main.py",
|
142
|
-
language="python",
|
143
|
-
code="def main():\n print('Hello, World!')",
|
144
|
-
)
|
145
|
-
assert (
|
146
|
-
parsed_tolerant == expected_tolerant
|
147
|
-
), f"Tolerant parsing failed. Expected:\n{expected_tolerant}\nGot:\n{parsed_tolerant}"
|
148
|
-
print("Tolerant parsing test passed.")
|
149
|
-
|
150
|
-
print("All tests passed successfully!")
|
@@ -1,168 +0,0 @@
|
|
1
|
-
from abc import ABC
|
2
|
-
from typing import Dict
|
3
|
-
|
4
|
-
from langroid.pydantic_v1 import BaseModel, Field
|
5
|
-
|
6
|
-
|
7
|
-
class FormatMetadata(BaseModel):
|
8
|
-
prefix: str = ""
|
9
|
-
suffix: str = ""
|
10
|
-
multiline: bool = False
|
11
|
-
|
12
|
-
|
13
|
-
class FormattingModel(BaseModel, ABC):
|
14
|
-
@classmethod
|
15
|
-
def format_spec(cls) -> str:
|
16
|
-
lines = []
|
17
|
-
for name, field in cls.__fields__.items():
|
18
|
-
metadata: FormatMetadata = field.field_info.extra.get(
|
19
|
-
"format_metadata", FormatMetadata()
|
20
|
-
)
|
21
|
-
if metadata.multiline:
|
22
|
-
lines.append(f"{metadata.prefix}{{{name}}}{metadata.suffix}")
|
23
|
-
else:
|
24
|
-
lines.append(f"{metadata.prefix}{{{name}}}{metadata.suffix}")
|
25
|
-
return "\n".join(lines)
|
26
|
-
|
27
|
-
@classmethod
|
28
|
-
def parse_spec(cls) -> Dict[str, FormatMetadata]:
|
29
|
-
return {
|
30
|
-
name: field.field_info.extra.get("format_metadata", FormatMetadata())
|
31
|
-
for name, field in cls.__fields__.items()
|
32
|
-
}
|
33
|
-
|
34
|
-
@classmethod
|
35
|
-
def start_token(cls) -> str:
|
36
|
-
return getattr(cls.Config, "start_token", "<format>")
|
37
|
-
|
38
|
-
@classmethod
|
39
|
-
def end_token(cls) -> str:
|
40
|
-
return getattr(cls.Config, "end_token", "</format>")
|
41
|
-
|
42
|
-
@classmethod
|
43
|
-
def format(cls, instance: "FormattingModel") -> str:
|
44
|
-
spec = cls.format_spec()
|
45
|
-
formatted = spec.format(**instance.dict())
|
46
|
-
return f"{cls.start_token()}\n{formatted}\n{cls.end_token()}"
|
47
|
-
|
48
|
-
@classmethod
|
49
|
-
def parse(cls, formatted_string: str) -> "FormattingModel":
|
50
|
-
lines = formatted_string.strip().split("\n")
|
51
|
-
if lines[0] != cls.start_token() or lines[-1] != cls.end_token():
|
52
|
-
raise ValueError("Invalid start or end token")
|
53
|
-
content = "\n".join(lines[1:-1])
|
54
|
-
|
55
|
-
parsed_data = {}
|
56
|
-
parse_spec = cls.parse_spec()
|
57
|
-
|
58
|
-
for field, metadata in parse_spec.items():
|
59
|
-
if metadata.multiline:
|
60
|
-
start = f"{metadata.prefix}"
|
61
|
-
end = f"{metadata.suffix}"
|
62
|
-
start_index = content.find(start)
|
63
|
-
if start_index == -1:
|
64
|
-
raise ValueError(f"Could not find start of {field}")
|
65
|
-
end_index = content.find(end, start_index + len(start))
|
66
|
-
if end_index == -1:
|
67
|
-
raise ValueError(f"Could not find end of {field}")
|
68
|
-
value = content[start_index + len(start) : end_index].strip()
|
69
|
-
else:
|
70
|
-
line_start = f"{metadata.prefix}"
|
71
|
-
line_end = metadata.suffix or "\n"
|
72
|
-
start_index = content.find(line_start)
|
73
|
-
if start_index == -1:
|
74
|
-
raise ValueError(f"Could not find {field}")
|
75
|
-
end_index = content.find(line_end, start_index + len(line_start))
|
76
|
-
if end_index == -1:
|
77
|
-
end_index = len(content)
|
78
|
-
value = content[start_index + len(line_start) : end_index].strip()
|
79
|
-
|
80
|
-
parsed_data[field] = value
|
81
|
-
|
82
|
-
return cls(**parsed_data)
|
83
|
-
|
84
|
-
|
85
|
-
class CodeFileModel(FormattingModel):
|
86
|
-
file_path: str = Field(..., format_metadata=FormatMetadata(prefix="file_path: "))
|
87
|
-
language: str = Field(..., format_metadata=FormatMetadata(prefix="language: "))
|
88
|
-
code: str = Field(
|
89
|
-
...,
|
90
|
-
format_metadata=FormatMetadata(prefix="```\n", suffix="\n```", multiline=True),
|
91
|
-
)
|
92
|
-
|
93
|
-
class Config:
|
94
|
-
start_token = "<code_file>"
|
95
|
-
end_token = "</code_file>"
|
96
|
-
|
97
|
-
|
98
|
-
# Test cases
|
99
|
-
#
|
100
|
-
if __name__ == "__main__":
|
101
|
-
# Test formatting
|
102
|
-
code_file = CodeFileModel(
|
103
|
-
file_path="src/main.py",
|
104
|
-
language="python",
|
105
|
-
code="def main():\n print('Hello, World!')",
|
106
|
-
)
|
107
|
-
formatted = CodeFileModel.format(code_file)
|
108
|
-
expected_format = """<code_file>
|
109
|
-
file_path: src/main.py
|
110
|
-
language: python
|
111
|
-
```
|
112
|
-
def main():
|
113
|
-
print('Hello, World!')
|
114
|
-
```
|
115
|
-
</code_file>"""
|
116
|
-
assert (
|
117
|
-
formatted == expected_format
|
118
|
-
), f"Formatting failed. Expected:\n{expected_format}\nGot:\n{formatted}"
|
119
|
-
print("Formatting test passed.")
|
120
|
-
|
121
|
-
# Test parsing
|
122
|
-
parsed = CodeFileModel.parse(formatted)
|
123
|
-
assert (
|
124
|
-
parsed == code_file
|
125
|
-
), f"Parsing failed. Expected:\n{code_file}\nGot:\n{parsed}"
|
126
|
-
print("Parsing test passed.")
|
127
|
-
|
128
|
-
# Test round-trip
|
129
|
-
round_trip = CodeFileModel.parse(CodeFileModel.format(code_file))
|
130
|
-
assert (
|
131
|
-
round_trip == code_file
|
132
|
-
), f"Round-trip failed. Expected:\n{code_file}\nGot:\n{round_trip}"
|
133
|
-
print("Round-trip test passed.")
|
134
|
-
|
135
|
-
# Test with different values
|
136
|
-
code_file2 = CodeFileModel(
|
137
|
-
file_path="src/app.js",
|
138
|
-
language="javascript",
|
139
|
-
code="function greet() {\n console.log('Hello, World!');\n}",
|
140
|
-
)
|
141
|
-
formatted2 = CodeFileModel.format(code_file2)
|
142
|
-
parsed2 = CodeFileModel.parse(formatted2)
|
143
|
-
assert (
|
144
|
-
parsed2 == code_file2
|
145
|
-
), f"Parsing failed for different values. Expected:\n{code_file2}\nGot:\n{parsed2}"
|
146
|
-
print("Different values test passed.")
|
147
|
-
|
148
|
-
# Test tolerant parsing
|
149
|
-
tolerant_input = """<code_file>
|
150
|
-
file_path: src/main.py
|
151
|
-
language: python
|
152
|
-
```
|
153
|
-
def main():
|
154
|
-
print('Hello, World!')
|
155
|
-
```
|
156
|
-
</code_file>"""
|
157
|
-
parsed_tolerant = CodeFileModel.parse(tolerant_input)
|
158
|
-
expected_tolerant = CodeFileModel(
|
159
|
-
file_path="src/main.py",
|
160
|
-
language="python",
|
161
|
-
code="def main():\n print('Hello, World!')",
|
162
|
-
)
|
163
|
-
assert (
|
164
|
-
parsed_tolerant == expected_tolerant
|
165
|
-
), f"Tolerant parsing failed. Expected:\n{expected_tolerant}\nGot:\n{parsed_tolerant}"
|
166
|
-
print("Tolerant parsing test passed.")
|
167
|
-
|
168
|
-
print("All tests passed successfully!")
|
@@ -1,279 +0,0 @@
|
|
1
|
-
from abc import ABC
|
2
|
-
from typing import Dict, List
|
3
|
-
|
4
|
-
from langroid.pydantic_v1 import BaseModel, Field
|
5
|
-
|
6
|
-
|
7
|
-
class FormatMetadata(BaseModel):
|
8
|
-
prefix: str = ""
|
9
|
-
suffix: str = ""
|
10
|
-
multiline: bool = False
|
11
|
-
|
12
|
-
|
13
|
-
class FormattingModel(BaseModel, ABC):
|
14
|
-
@classmethod
|
15
|
-
def format_spec(cls) -> str:
|
16
|
-
lines = []
|
17
|
-
for name, field in cls.__fields__.items():
|
18
|
-
metadata: FormatMetadata = field.field_info.extra.get(
|
19
|
-
"format_metadata", FormatMetadata()
|
20
|
-
)
|
21
|
-
if metadata.multiline:
|
22
|
-
lines.append(f"{metadata.prefix}{{{name}}}{metadata.suffix}")
|
23
|
-
else:
|
24
|
-
lines.append(f"{metadata.prefix}{{{name}}}{metadata.suffix}")
|
25
|
-
return "\n".join(lines)
|
26
|
-
|
27
|
-
@classmethod
|
28
|
-
def parse_spec(cls) -> Dict[str, FormatMetadata]:
|
29
|
-
return {
|
30
|
-
name: field.field_info.extra.get("format_metadata", FormatMetadata())
|
31
|
-
for name, field in cls.__fields__.items()
|
32
|
-
}
|
33
|
-
|
34
|
-
@classmethod
|
35
|
-
def start_token(cls) -> str:
|
36
|
-
return getattr(cls.Config, "start_token", "<format>")
|
37
|
-
|
38
|
-
@classmethod
|
39
|
-
def end_token(cls) -> str:
|
40
|
-
return getattr(cls.Config, "end_token", "</format>")
|
41
|
-
|
42
|
-
@classmethod
|
43
|
-
def format(cls, instance: "FormattingModel") -> str:
|
44
|
-
spec = cls.format_spec()
|
45
|
-
formatted = spec.format(**instance.dict())
|
46
|
-
return f"{cls.start_token()}\n{formatted}\n{cls.end_token()}"
|
47
|
-
|
48
|
-
@classmethod
|
49
|
-
def parse(cls, formatted_string: str) -> "FormattingModel":
|
50
|
-
lines = formatted_string.strip().split("\n")
|
51
|
-
if lines[0] != cls.start_token():
|
52
|
-
raise ValueError("Invalid start token")
|
53
|
-
|
54
|
-
content = "\n".join(lines[1:])
|
55
|
-
if content.endswith(cls.end_token()):
|
56
|
-
content = content[: -len(cls.end_token())].strip()
|
57
|
-
|
58
|
-
parsed_data = {}
|
59
|
-
parse_spec = cls.parse_spec()
|
60
|
-
field_names = list(parse_spec.keys())
|
61
|
-
|
62
|
-
for i, (field, metadata) in enumerate(parse_spec.items()):
|
63
|
-
is_last_field = i == len(field_names) - 1
|
64
|
-
if metadata.multiline:
|
65
|
-
start = f"{metadata.prefix}"
|
66
|
-
end = f"{metadata.suffix}"
|
67
|
-
start_index = content.find(start)
|
68
|
-
if start_index == -1:
|
69
|
-
raise ValueError(f"Could not find start of {field}")
|
70
|
-
start_index += len(start)
|
71
|
-
if is_last_field:
|
72
|
-
end_index = content.find(end, start_index)
|
73
|
-
if end_index == -1:
|
74
|
-
end_index = len(content)
|
75
|
-
value = content[start_index:end_index].strip()
|
76
|
-
else:
|
77
|
-
end_index = content.find(end, start_index)
|
78
|
-
if end_index == -1:
|
79
|
-
raise ValueError(f"Could not find end of {field}")
|
80
|
-
value = content[start_index:end_index].strip()
|
81
|
-
else:
|
82
|
-
line_start = f"{metadata.prefix}"
|
83
|
-
line_end = metadata.suffix or "\n"
|
84
|
-
start_index = content.find(line_start)
|
85
|
-
if start_index == -1:
|
86
|
-
raise ValueError(f"Could not find {field}")
|
87
|
-
start_index += len(line_start)
|
88
|
-
if is_last_field:
|
89
|
-
end_index = content.find(line_end, start_index)
|
90
|
-
if end_index == -1:
|
91
|
-
end_index = len(content)
|
92
|
-
value = content[start_index:end_index].strip()
|
93
|
-
else:
|
94
|
-
end_index = content.find(line_end, start_index)
|
95
|
-
if end_index == -1:
|
96
|
-
raise ValueError(f"Could not find end of {field}")
|
97
|
-
value = content[start_index:end_index].strip()
|
98
|
-
|
99
|
-
parsed_data[field] = value
|
100
|
-
content = content[
|
101
|
-
end_index + len(end if metadata.multiline else line_end) :
|
102
|
-
].strip()
|
103
|
-
|
104
|
-
return cls(**parsed_data)
|
105
|
-
|
106
|
-
@staticmethod
|
107
|
-
def find_all_candidates(string: str, begin_token: str, end_token: str) -> List[str]:
|
108
|
-
candidates = []
|
109
|
-
start = 0
|
110
|
-
while True:
|
111
|
-
start_index = string.find(begin_token, start)
|
112
|
-
if start_index == -1:
|
113
|
-
break
|
114
|
-
|
115
|
-
end_index = string.find(end_token, start_index + len(begin_token))
|
116
|
-
if end_index == -1:
|
117
|
-
# If no end token is found, assume it extends to the end of the string
|
118
|
-
candidates.append(string[start_index:])
|
119
|
-
break
|
120
|
-
|
121
|
-
# Check if there's a nested begin token before the end token
|
122
|
-
next_start = string.find(
|
123
|
-
begin_token, start_index + len(begin_token), end_index
|
124
|
-
)
|
125
|
-
if next_start != -1:
|
126
|
-
# If there's a nested begin token, continue searching from there
|
127
|
-
start = next_start
|
128
|
-
continue
|
129
|
-
|
130
|
-
candidates.append(string[start_index : end_index + len(end_token)])
|
131
|
-
start = end_index + len(end_token)
|
132
|
-
|
133
|
-
return candidates
|
134
|
-
|
135
|
-
|
136
|
-
class CodeFileModel(FormattingModel):
|
137
|
-
file_path: str = Field(..., format_metadata=FormatMetadata(prefix="file_path: "))
|
138
|
-
language: str = Field(..., format_metadata=FormatMetadata(prefix="language: "))
|
139
|
-
code: str = Field(
|
140
|
-
...,
|
141
|
-
format_metadata=FormatMetadata(prefix="```\n", suffix="\n```", multiline=True),
|
142
|
-
)
|
143
|
-
|
144
|
-
class Config:
|
145
|
-
start_token = "<code_file>"
|
146
|
-
end_token = "</code_file>"
|
147
|
-
|
148
|
-
|
149
|
-
if __name__ == "__main__":
|
150
|
-
# Test formatting
|
151
|
-
code_file = CodeFileModel(
|
152
|
-
file_path="src/main.py",
|
153
|
-
language="python",
|
154
|
-
code="def main():\n print('Hello, World!')",
|
155
|
-
)
|
156
|
-
formatted = CodeFileModel.format(code_file)
|
157
|
-
expected_format = """<code_file>
|
158
|
-
file_path: src/main.py
|
159
|
-
language: python
|
160
|
-
```
|
161
|
-
def main():
|
162
|
-
print('Hello, World!')
|
163
|
-
```
|
164
|
-
</code_file>"""
|
165
|
-
assert (
|
166
|
-
formatted == expected_format
|
167
|
-
), f"Formatting failed. Expected:\n{expected_format}\nGot:\n{formatted}"
|
168
|
-
print("Formatting test passed.")
|
169
|
-
|
170
|
-
# Test parsing
|
171
|
-
parsed = CodeFileModel.parse(formatted)
|
172
|
-
assert (
|
173
|
-
parsed == code_file
|
174
|
-
), f"Parsing failed. Expected:\n{code_file}\nGot:\n{parsed}"
|
175
|
-
print("Parsing test passed.")
|
176
|
-
|
177
|
-
# Test round-trip
|
178
|
-
round_trip = CodeFileModel.parse(CodeFileModel.format(code_file))
|
179
|
-
assert (
|
180
|
-
round_trip == code_file
|
181
|
-
), f"Round-trip failed. Expected:\n{code_file}\nGot:\n{round_trip}"
|
182
|
-
print("Round-trip test passed.")
|
183
|
-
|
184
|
-
# Test with different values
|
185
|
-
code_file2 = CodeFileModel(
|
186
|
-
file_path="src/app.js",
|
187
|
-
language="javascript",
|
188
|
-
code="function greet() {\n console.log('Hello, World!');\n}",
|
189
|
-
)
|
190
|
-
formatted2 = CodeFileModel.format(code_file2)
|
191
|
-
parsed2 = CodeFileModel.parse(formatted2)
|
192
|
-
assert (
|
193
|
-
parsed2 == code_file2
|
194
|
-
), f"Parsing failed for different values. Expected:\n{code_file2}\nGot:\n{parsed2}"
|
195
|
-
print("Different values test passed.")
|
196
|
-
|
197
|
-
# Test tolerant parsing
|
198
|
-
tolerant_input = """<code_file>
|
199
|
-
file_path: src/main.py
|
200
|
-
language: python
|
201
|
-
```
|
202
|
-
def main():
|
203
|
-
print('Hello, World!')
|
204
|
-
```
|
205
|
-
</code_file>"""
|
206
|
-
parsed_tolerant = CodeFileModel.parse(tolerant_input)
|
207
|
-
expected_tolerant = CodeFileModel(
|
208
|
-
file_path="src/main.py",
|
209
|
-
language="python",
|
210
|
-
code="def main():\n print('Hello, World!')",
|
211
|
-
)
|
212
|
-
assert (
|
213
|
-
parsed_tolerant == expected_tolerant
|
214
|
-
), f"Tolerant parsing failed. Expected:\n{expected_tolerant}\nGot:\n{parsed_tolerant}"
|
215
|
-
print("Tolerant parsing test passed.")
|
216
|
-
|
217
|
-
# Test tolerant parsing without end token and last field suffix
|
218
|
-
tolerant_input_no_end = """<code_file>
|
219
|
-
file_path: src/main.py
|
220
|
-
language: python
|
221
|
-
```
|
222
|
-
def main():
|
223
|
-
print('Hello, World!')"""
|
224
|
-
parsed_tolerant_no_end = CodeFileModel.parse(tolerant_input_no_end)
|
225
|
-
expected_tolerant_no_end = CodeFileModel(
|
226
|
-
file_path="src/main.py",
|
227
|
-
language="python",
|
228
|
-
code="def main():\n print('Hello, World!')",
|
229
|
-
)
|
230
|
-
assert (
|
231
|
-
parsed_tolerant_no_end == expected_tolerant_no_end
|
232
|
-
), f"Tolerant parsing without end token failed. Expected:\n{expected_tolerant_no_end}\nGot:\n{parsed_tolerant_no_end}"
|
233
|
-
print("Tolerant parsing without end token test passed.")
|
234
|
-
|
235
|
-
# Test find_all_candidates method
|
236
|
-
test_string = """
|
237
|
-
Some text before
|
238
|
-
<code_file>
|
239
|
-
file_path: src/main.py
|
240
|
-
language: python
|
241
|
-
```
|
242
|
-
def main():
|
243
|
-
print('Hello, World!')
|
244
|
-
```
|
245
|
-
</code_file>
|
246
|
-
Some text in between
|
247
|
-
<code_file>
|
248
|
-
file_path: src/helper.py
|
249
|
-
language: python
|
250
|
-
```
|
251
|
-
def helper():
|
252
|
-
return 'Helper function'
|
253
|
-
```
|
254
|
-
</code_file>
|
255
|
-
<code_file>
|
256
|
-
file_path: src/incomplete.py
|
257
|
-
language: python
|
258
|
-
```
|
259
|
-
def incomplete():
|
260
|
-
print('No end token')
|
261
|
-
Some text after
|
262
|
-
"""
|
263
|
-
|
264
|
-
candidates = FormattingModel.find_all_candidates(
|
265
|
-
test_string, "<code_file>", "</code_file>"
|
266
|
-
)
|
267
|
-
assert len(candidates) == 3, f"Expected 3 candidates, got {len(candidates)}"
|
268
|
-
assert candidates[0].startswith("<code_file>") and candidates[0].endswith(
|
269
|
-
"</code_file>"
|
270
|
-
), "First candidate is incorrect"
|
271
|
-
assert candidates[1].startswith("<code_file>") and candidates[1].endswith(
|
272
|
-
"</code_file>"
|
273
|
-
), "Second candidate is incorrect"
|
274
|
-
assert candidates[2].startswith("<code_file>") and not candidates[2].endswith(
|
275
|
-
"</code_file>"
|
276
|
-
), "Third candidate is incorrect"
|
277
|
-
print("find_all_candidates test passed.")
|
278
|
-
|
279
|
-
print("All tests passed successfully!")
|