langroid 0.16.7__py3-none-any.whl → 0.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/base.py +45 -21
- langroid/agent/chat_agent.py +22 -14
- langroid/agent/chat_document.py +22 -13
- langroid/agent/tool_message.py +11 -11
- langroid/agent/tools/file_tools.py +234 -0
- langroid/agent/xml_tool_message.py +178 -45
- langroid/utils/constants.py +2 -0
- langroid/utils/git_utils.py +251 -0
- langroid/utils/system.py +78 -0
- {langroid-0.16.7.dist-info → langroid-0.17.0.dist-info}/METADATA +6 -3
- {langroid-0.16.7.dist-info → langroid-0.17.0.dist-info}/RECORD +14 -89
- pyproject.toml +3 -2
- langroid/agent/md_tool_message_grammar.py +0 -455
- langroid/agent/tools/code_file_tool_parse.py +0 -150
- langroid/agent/tools/code_file_tool_pyparsing.py +0 -194
- langroid/agent/tools/code_file_tool_pyparsing2.py +0 -199
- langroid/agent/tools/extract_tool.py +0 -96
- langroid/agent/tools/formatted_model_custom.py +0 -150
- langroid/agent/tools/formatted_model_custom2.py +0 -168
- langroid/agent/tools/formatted_model_custom3.py +0 -279
- langroid/agent/tools/formatted_model_custom4.py +0 -395
- langroid/agent/tools/formatted_model_jinja.py +0 -133
- langroid/agent/tools/formatted_model_jinja.py-e +0 -122
- langroid/agent/tools/formatted_model_jinja2.py +0 -145
- langroid/agent/tools/formatted_model_jinja2.py-e +0 -135
- langroid/agent/tools/formatted_model_lark.py +0 -0
- langroid/agent/tools/formatted_model_lark2.py +0 -168
- langroid/agent/tools/formatted_model_parse.py +0 -105
- langroid/agent/tools/formatted_model_parse.py-e +0 -98
- langroid/agent/tools/formatted_model_parse2.py +0 -113
- langroid/agent/tools/formatted_model_parse2.py-e +0 -109
- langroid/agent/tools/formatted_model_parse3.py +0 -114
- langroid/agent/tools/formatted_model_parse3.py-e +0 -110
- langroid/agent/tools/formatted_model_parsimon.py +0 -194
- langroid/agent/tools/formatted_model_parsimon.py-e +0 -186
- langroid/agent/tools/formatted_model_pyparsing.py +0 -169
- langroid/agent/tools/formatted_model_pyparsing.py-e +0 -149
- langroid/agent/tools/formatted_model_pyparsing2.py +0 -159
- langroid/agent/tools/formatted_model_pyparsing2.py-e +0 -143
- langroid/agent/tools/formatted_model_pyparsing3.py +0 -133
- langroid/agent/tools/formatted_model_pyparsing3.py-e +0 -121
- langroid/agent/tools/formatted_model_pyparsing4.py +0 -213
- langroid/agent/tools/formatted_model_pyparsing4.py-e +0 -176
- langroid/agent/tools/formatted_model_pyparsing5.py +0 -173
- langroid/agent/tools/formatted_model_pyparsing5.py-e +0 -142
- langroid/agent/tools/formatted_model_regex.py +0 -246
- langroid/agent/tools/formatted_model_regex.py-e +0 -248
- langroid/agent/tools/formatted_model_regex2.py +0 -250
- langroid/agent/tools/formatted_model_regex2.py-e +0 -253
- langroid/agent/tools/formatted_model_tatsu.py +0 -172
- langroid/agent/tools/formatted_model_tatsu.py-e +0 -160
- langroid/agent/tools/formatted_model_template.py +0 -217
- langroid/agent/tools/formatted_model_template.py-e +0 -200
- langroid/agent/tools/formatted_model_xml.py +0 -178
- langroid/agent/tools/formatted_model_xml2.py +0 -178
- langroid/agent/tools/formatted_model_xml3.py +0 -132
- langroid/agent/tools/formatted_model_xml4.py +0 -130
- langroid/agent/tools/formatted_model_xml5.py +0 -130
- langroid/agent/tools/formatted_model_xml6.py +0 -113
- langroid/agent/tools/formatted_model_xml7.py +0 -117
- langroid/agent/tools/formatted_model_xml8.py +0 -164
- langroid/agent/tools/generator_tool.py +0 -20
- langroid/agent/tools/generic_tool.py +0 -165
- langroid/agent/tools/generic_tool_tatsu.py +0 -275
- langroid/agent/tools/grammar_based_model.py +0 -132
- langroid/agent/tools/grammar_based_model.py-e +0 -128
- langroid/agent/tools/grammar_based_model_lark.py +0 -156
- langroid/agent/tools/grammar_based_model_lark.py-e +0 -153
- langroid/agent/tools/grammar_based_model_parse.py +0 -86
- langroid/agent/tools/grammar_based_model_parse.py-e +0 -80
- langroid/agent/tools/grammar_based_model_parsimonious.py +0 -129
- langroid/agent/tools/grammar_based_model_parsimonious.py-e +0 -120
- langroid/agent/tools/grammar_based_model_pyparsing.py +0 -105
- langroid/agent/tools/grammar_based_model_pyparsing.py-e +0 -103
- langroid/agent/tools/grammar_based_model_regex.py +0 -139
- langroid/agent/tools/grammar_based_model_regex.py-e +0 -130
- langroid/agent/tools/grammar_based_model_regex2.py +0 -124
- langroid/agent/tools/grammar_based_model_regex2.py-e +0 -116
- langroid/agent/tools/grammar_based_model_tatsu.py +0 -80
- langroid/agent/tools/grammar_based_model_tatsu.py-e +0 -77
- langroid/agent/tools/lark_earley_example.py +0 -135
- langroid/agent/tools/lark_earley_example.py-e +0 -117
- langroid/agent/tools/lark_example.py +0 -72
- langroid/agent/tools/note_tool.py +0 -0
- langroid/agent/tools/parse_example.py +0 -76
- langroid/agent/tools/parse_example2.py +0 -87
- langroid/agent/tools/parse_example3.py +0 -42
- langroid/agent/tools/parse_test.py +0 -791
- langroid/agent/tools/run_python_code.py +0 -60
- {langroid-0.16.7.dist-info → langroid-0.17.0.dist-info}/LICENSE +0 -0
- {langroid-0.16.7.dist-info → langroid-0.17.0.dist-info}/WHEEL +0 -0
@@ -1,150 +0,0 @@
|
|
1
|
-
""""
|
2
|
-
Non-JSON Tool for LLM to specify contents of a code file.
|
3
|
-
|
4
|
-
Why Non-JSON? Because there are numerous issues with even the best LLMs trying
|
5
|
-
to return code within JSON strings (e.g. unescaped newlines, quotes, etc.),
|
6
|
-
and the problem is even worse with weak LLMs. Json repair methods exist, but
|
7
|
-
can't deal with all possible cases.
|
8
|
-
|
9
|
-
E.g. see this study from Aider: https://aider.chat/2024/08/14/code-in-json.html
|
10
|
-
|
11
|
-
Note: We express the formatting rules with a template since it has several benefits:
|
12
|
-
- all of the formatting rules are in one place,
|
13
|
-
- we get a parser for free, and don't have to write parsing code,
|
14
|
-
- we get a formatting example generator for free, and don't have to write
|
15
|
-
example generation code.
|
16
|
-
- consistency between the parser and the example generator is guaranteed.
|
17
|
-
"""
|
18
|
-
|
19
|
-
from typing import Any, Callable, Dict, List, Tuple, Type
|
20
|
-
|
21
|
-
from parse import Parser, compile
|
22
|
-
|
23
|
-
from langroid.agent.tool_message import ToolMessage
|
24
|
-
from langroid.utils.constants import TOOL, TOOL_END
|
25
|
-
|
26
|
-
CODE_FENCE_START = "`" * 3
|
27
|
-
CODE_FENCE_END = "`" * 3
|
28
|
-
|
29
|
-
|
30
|
-
class CodeFileTool(ToolMessage):
|
31
|
-
"""
|
32
|
-
Used by LLM to specify contents of a code file.
|
33
|
-
"""
|
34
|
-
|
35
|
-
request: str = "code_file_tool"
|
36
|
-
purpose: str = """
|
37
|
-
To specify the contents of a code file.
|
38
|
-
"""
|
39
|
-
file_path: str
|
40
|
-
contents: str
|
41
|
-
language: str
|
42
|
-
|
43
|
-
@classmethod
|
44
|
-
def get_template(cls) -> str:
|
45
|
-
request = cls.default_value("request")
|
46
|
-
|
47
|
-
return f"""
|
48
|
-
{{ws}}{TOOL}:{{ws}}{request}
|
49
|
-
{{ws}}{{file_path}}
|
50
|
-
{CODE_FENCE_START}{{ws}}{{language}}
|
51
|
-
{{contents}}
|
52
|
-
{CODE_FENCE_END}
|
53
|
-
{{ws}}{TOOL_END}{{ws}}
|
54
|
-
"""
|
55
|
-
|
56
|
-
@classmethod
|
57
|
-
def parse(cls, string) -> Dict[str, Any]:
|
58
|
-
def parse_ws(string):
|
59
|
-
return (
|
60
|
-
string # This allows matching any amount of whitespace, including none
|
61
|
-
)
|
62
|
-
|
63
|
-
template = cls.get_template()
|
64
|
-
parser = Parser(template, dict(ws=parse_ws))
|
65
|
-
|
66
|
-
print(f"Parsing string:\n{string}") # Debug print
|
67
|
-
result = parser.parse(string)
|
68
|
-
print(f"Parse result: {result}") # Debug print
|
69
|
-
|
70
|
-
if result:
|
71
|
-
return {
|
72
|
-
"request": cls.default_value("request"),
|
73
|
-
"file_path": result["file_path"].strip(),
|
74
|
-
"language": result["language"].strip(),
|
75
|
-
"contents": result["contents"].strip(),
|
76
|
-
}
|
77
|
-
return {}
|
78
|
-
|
79
|
-
@classmethod
|
80
|
-
def format(cls, instance):
|
81
|
-
return cls.get_template().format(
|
82
|
-
request=instance.request,
|
83
|
-
file_path=instance.file_path,
|
84
|
-
language=instance.language,
|
85
|
-
contents=instance.contents,
|
86
|
-
s="", # no extra spaces
|
87
|
-
)
|
88
|
-
|
89
|
-
@classmethod
|
90
|
-
def create(cls, get_directory: Callable[[], str]) -> Type["CodeFileTool"]:
|
91
|
-
"""
|
92
|
-
Create a subclass of CodeFileTool with a static method get_directory,
|
93
|
-
which returns the current directory path, so that all file paths are
|
94
|
-
interpreted as relative to the current directory.
|
95
|
-
"""
|
96
|
-
|
97
|
-
class SubCodeFileTool(cls):
|
98
|
-
get_directory: Callable[[], str] = staticmethod(get_directory)
|
99
|
-
|
100
|
-
return SubCodeFileTool
|
101
|
-
|
102
|
-
@classmethod
|
103
|
-
def examples(cls) -> List[ToolMessage | Tuple[str, ToolMessage]]:
|
104
|
-
return [
|
105
|
-
cls(
|
106
|
-
file_path="src/lib.rs",
|
107
|
-
language="rust",
|
108
|
-
contents="""
|
109
|
-
// function to add two numbers
|
110
|
-
pub fn add(a: i32, b: i32) -> i32 {
|
111
|
-
a + b
|
112
|
-
}
|
113
|
-
""",
|
114
|
-
)
|
115
|
-
]
|
116
|
-
|
117
|
-
@classmethod
|
118
|
-
def find_candidates(cls, input_str: str) -> List[str]:
|
119
|
-
"""
|
120
|
-
Find all possible (top-level) candidates for
|
121
|
-
CodeFileTool in the input string.
|
122
|
-
"""
|
123
|
-
# Use parse.findall to find all instances of the CodeFileTool pattern
|
124
|
-
parser = compile(cls.get_template())
|
125
|
-
matches = list(parser.findall(input_str))
|
126
|
-
candidates = [match.fixed for match in matches]
|
127
|
-
return candidates
|
128
|
-
|
129
|
-
@classmethod
|
130
|
-
def from_string(cls, input_string: str) -> "CodeFileTool":
|
131
|
-
"""Parse a string into a CodeFileTool object, using the TEMPLATE."""
|
132
|
-
parsed_data = cls.parse(input_string)
|
133
|
-
if parsed_data:
|
134
|
-
return cls(**parsed_data)
|
135
|
-
raise ValueError("Invalid input string format")
|
136
|
-
|
137
|
-
@classmethod
|
138
|
-
def to_string(cls, instance) -> str:
|
139
|
-
"""Convert a CodeFileTool object to a string, using the TEMPLATE."""
|
140
|
-
return cls.format(instance)
|
141
|
-
|
142
|
-
def __str__(self):
|
143
|
-
return self.to_string()
|
144
|
-
|
145
|
-
def __repr__(self):
|
146
|
-
return f"""CodeFileTool(
|
147
|
-
file_path='{self.file_path}',
|
148
|
-
language='{self.language}',
|
149
|
-
contents='{self.contents}')
|
150
|
-
"""
|
@@ -1,194 +0,0 @@
|
|
1
|
-
""""
|
2
|
-
Non-JSON Tool for LLM to specify contents of a code file.
|
3
|
-
|
4
|
-
Why Non-JSON? Because there are numerous issues with even the best LLMs trying
|
5
|
-
to return code within JSON strings (e.g. unescaped newlines, quotes, etc.),
|
6
|
-
and the problem is even worse with weak LLMs. Json repair methods exist, but
|
7
|
-
can't deal with all possible cases.
|
8
|
-
|
9
|
-
E.g. see this study from Aider: https://aider.chat/2024/08/14/code-in-json.html
|
10
|
-
|
11
|
-
Note: We express the formatting rules with a template since it has several benefits:
|
12
|
-
- all of the formatting rules are in one place,
|
13
|
-
- we get a parser for free, and don't have to write parsing code,
|
14
|
-
- we get a formatting example generator for free, and don't have to write
|
15
|
-
example generation code.
|
16
|
-
- consistency between the parser and the example generator is guaranteed.
|
17
|
-
"""
|
18
|
-
|
19
|
-
from typing import Any, Callable, Dict, List, Tuple, Type
|
20
|
-
|
21
|
-
from pyparsing import (
|
22
|
-
LineEnd,
|
23
|
-
Literal,
|
24
|
-
Optional,
|
25
|
-
ParserElement,
|
26
|
-
SkipTo,
|
27
|
-
White,
|
28
|
-
Word,
|
29
|
-
alphanums,
|
30
|
-
lineEnd,
|
31
|
-
printables,
|
32
|
-
)
|
33
|
-
|
34
|
-
from langroid.agent.tool_message import ToolMessage
|
35
|
-
from langroid.utils.constants import TOOL, TOOL_END
|
36
|
-
|
37
|
-
CODE_FENCE_START = "`" * 3
|
38
|
-
CODE_FENCE_END = "`" * 3
|
39
|
-
|
40
|
-
|
41
|
-
class CodeFileTool(ToolMessage):
|
42
|
-
"""
|
43
|
-
Used by LLM to specify contents of a code file.
|
44
|
-
"""
|
45
|
-
|
46
|
-
request: str = "code_file_tool"
|
47
|
-
purpose: str = """
|
48
|
-
To specify the contents of a code file.
|
49
|
-
"""
|
50
|
-
file_path: str
|
51
|
-
contents: str
|
52
|
-
language: str
|
53
|
-
|
54
|
-
@classmethod
|
55
|
-
def create_parser(cls):
|
56
|
-
TOOL_START = Literal(TOOL + ":")
|
57
|
-
CODE_FENCE = Literal("```")
|
58
|
-
|
59
|
-
file_path = SkipTo(lineEnd)("file_path")
|
60
|
-
language = Word(alphanums)("language")
|
61
|
-
contents = SkipTo(CODE_FENCE)("contents")
|
62
|
-
|
63
|
-
parser = (
|
64
|
-
TOOL_START
|
65
|
-
+ Optional(Word(printables), default=cls.default_value("request"))(
|
66
|
-
"request"
|
67
|
-
)
|
68
|
-
+ lineEnd
|
69
|
-
+ file_path
|
70
|
-
+ lineEnd
|
71
|
-
+ CODE_FENCE
|
72
|
-
+ Optional(White()) # Allow space after ```
|
73
|
-
+ language
|
74
|
-
+ lineEnd
|
75
|
-
+ contents
|
76
|
-
+ CODE_FENCE
|
77
|
-
+ lineEnd # Add this line to expect a newline after the closing fence
|
78
|
-
+ Literal(TOOL_END)
|
79
|
-
)
|
80
|
-
return parser
|
81
|
-
|
82
|
-
@classmethod
|
83
|
-
def parse(cls, string) -> Dict[str, Any]:
|
84
|
-
parser = cls.create_parser()
|
85
|
-
try:
|
86
|
-
result = parser.parseString(string, parseAll=True)
|
87
|
-
return {
|
88
|
-
"request": result["request"],
|
89
|
-
"file_path": result["file_path"].strip(),
|
90
|
-
"language": result["language"],
|
91
|
-
"contents": result["contents"].strip(),
|
92
|
-
}
|
93
|
-
except Exception as e:
|
94
|
-
print(f"Parsing failed: {e}")
|
95
|
-
return {}
|
96
|
-
|
97
|
-
@classmethod
|
98
|
-
def format(cls, instance) -> str:
|
99
|
-
parser = cls.create_parser()
|
100
|
-
|
101
|
-
def format_element(element):
|
102
|
-
if isinstance(element, Literal):
|
103
|
-
return element.match
|
104
|
-
elif element.resultsName:
|
105
|
-
if element.resultsName == "request":
|
106
|
-
return instance.request
|
107
|
-
elif element.resultsName == "file_path":
|
108
|
-
return instance.file_path
|
109
|
-
elif element.resultsName == "language":
|
110
|
-
return instance.language
|
111
|
-
elif element.resultsName == "contents":
|
112
|
-
return f"{instance.contents}\n" # Add newline after contents
|
113
|
-
elif isinstance(element, LineEnd):
|
114
|
-
return "\n"
|
115
|
-
return ""
|
116
|
-
|
117
|
-
def traverse_parser(parser_element):
|
118
|
-
if isinstance(parser_element, ParserElement):
|
119
|
-
if isinstance(parser_element, SkipTo):
|
120
|
-
return format_element(parser_element)
|
121
|
-
elif hasattr(parser_element, "exprs"):
|
122
|
-
return "".join(
|
123
|
-
traverse_parser(expr) for expr in parser_element.exprs
|
124
|
-
)
|
125
|
-
else:
|
126
|
-
return format_element(parser_element)
|
127
|
-
return str(parser_element)
|
128
|
-
|
129
|
-
formatted_string = traverse_parser(parser)
|
130
|
-
|
131
|
-
return formatted_string.strip()
|
132
|
-
|
133
|
-
@classmethod
|
134
|
-
def create(cls, get_directory: Callable[[], str]) -> Type["CodeFileTool"]:
|
135
|
-
"""
|
136
|
-
Create a subclass of CodeFileTool with a static method get_directory,
|
137
|
-
which returns the current directory path, so that all file paths are
|
138
|
-
interpreted as relative to the current directory.
|
139
|
-
"""
|
140
|
-
|
141
|
-
class SubCodeFileTool(cls):
|
142
|
-
get_directory: Callable[[], str] = staticmethod(get_directory)
|
143
|
-
|
144
|
-
return SubCodeFileTool
|
145
|
-
|
146
|
-
@classmethod
|
147
|
-
def examples(cls) -> List[ToolMessage | Tuple[str, ToolMessage]]:
|
148
|
-
return [
|
149
|
-
cls(
|
150
|
-
file_path="src/lib.rs",
|
151
|
-
language="rust",
|
152
|
-
contents="""
|
153
|
-
// function to add two numbers
|
154
|
-
pub fn add(a: i32, b: i32) -> i32 {
|
155
|
-
a + b
|
156
|
-
}
|
157
|
-
""",
|
158
|
-
)
|
159
|
-
]
|
160
|
-
|
161
|
-
@classmethod
|
162
|
-
def find_candidates(cls, input_str: str) -> List[str]:
|
163
|
-
"""
|
164
|
-
Find all possible (top-level) candidates for
|
165
|
-
CodeFileTool in the input string.
|
166
|
-
"""
|
167
|
-
# Use parse.findall to find all instances of the CodeFileTool pattern
|
168
|
-
parser = compile(cls.get_template())
|
169
|
-
matches = list(parser.findall(input_str))
|
170
|
-
candidates = [match.fixed for match in matches]
|
171
|
-
return candidates
|
172
|
-
|
173
|
-
@classmethod
|
174
|
-
def from_string(cls, input_string: str) -> "CodeFileTool":
|
175
|
-
"""Parse a string into a CodeFileTool object, using the TEMPLATE."""
|
176
|
-
parsed_data = cls.parse(input_string)
|
177
|
-
if parsed_data:
|
178
|
-
return cls(**parsed_data)
|
179
|
-
raise ValueError("Invalid input string format")
|
180
|
-
|
181
|
-
@classmethod
|
182
|
-
def to_string(cls, instance) -> str:
|
183
|
-
"""Convert a CodeFileTool object to a string, using the TEMPLATE."""
|
184
|
-
return cls.format(instance)
|
185
|
-
|
186
|
-
def __str__(self):
|
187
|
-
return self.to_string()
|
188
|
-
|
189
|
-
def __repr__(self):
|
190
|
-
return f"""CodeFileTool(
|
191
|
-
file_path='{self.file_path}',
|
192
|
-
language='{self.language}',
|
193
|
-
contents='{self.contents}')
|
194
|
-
"""
|
@@ -1,199 +0,0 @@
|
|
1
|
-
""""
|
2
|
-
Non-JSON Tool for LLM to specify contents of a code file.
|
3
|
-
|
4
|
-
Why Non-JSON? Because there are numerous issues with even the best LLMs trying
|
5
|
-
to return code within JSON strings (e.g. unescaped newlines, quotes, etc.),
|
6
|
-
and the problem is even worse with weak LLMs. Json repair methods exist, but
|
7
|
-
can't deal with all possible cases.
|
8
|
-
|
9
|
-
E.g. see this study from Aider: https://aider.chat/2024/08/14/code-in-json.html
|
10
|
-
|
11
|
-
Note: We express the formatting rules with a template since it has several benefits:
|
12
|
-
- all of the formatting rules are in one place,
|
13
|
-
- we get a parser for free, and don't have to write parsing code,
|
14
|
-
- we get a formatting example generator for free, and don't have to write
|
15
|
-
example generation code.
|
16
|
-
- consistency between the parser and the example generator is guaranteed.
|
17
|
-
"""
|
18
|
-
|
19
|
-
from typing import Callable, List, Tuple, Type
|
20
|
-
|
21
|
-
from pyparsing import (
|
22
|
-
Literal,
|
23
|
-
Optional,
|
24
|
-
SkipTo,
|
25
|
-
White,
|
26
|
-
Word,
|
27
|
-
alphanums,
|
28
|
-
lineEnd,
|
29
|
-
printables,
|
30
|
-
)
|
31
|
-
|
32
|
-
from langroid.agent.tool_message import ToolMessage
|
33
|
-
from langroid.agent.tools.generic_tool import GenericTool
|
34
|
-
from langroid.utils.constants import TOOL, TOOL_END
|
35
|
-
|
36
|
-
CODE_FENCE_START = "`" * 3
|
37
|
-
CODE_FENCE_END = "`" * 3
|
38
|
-
|
39
|
-
|
40
|
-
class CodeFileTool(GenericTool):
|
41
|
-
"""
|
42
|
-
Used by LLM to specify contents of a code file.
|
43
|
-
"""
|
44
|
-
|
45
|
-
request: str = "code_file_tool"
|
46
|
-
purpose: str = """
|
47
|
-
To specify the <contents> of a code file at <file_path>,
|
48
|
-
containing code in a specific <language>.
|
49
|
-
"""
|
50
|
-
file_path: str
|
51
|
-
contents: str
|
52
|
-
language: str
|
53
|
-
|
54
|
-
@classmethod
|
55
|
-
def define_grammar(cls):
|
56
|
-
TOOL_START = Literal(TOOL + ":")
|
57
|
-
CODE_FENCE = Literal("```")
|
58
|
-
|
59
|
-
file_path = SkipTo(lineEnd)("file_path")
|
60
|
-
language = Word(alphanums)("language")
|
61
|
-
contents = SkipTo(lineEnd + CODE_FENCE)("contents")
|
62
|
-
|
63
|
-
grammar = (
|
64
|
-
TOOL_START
|
65
|
-
+ Optional(White())
|
66
|
-
+ Optional(Word(printables), default=cls.default_value("request"))(
|
67
|
-
"request"
|
68
|
-
)
|
69
|
-
+ lineEnd
|
70
|
-
+ Optional(White())
|
71
|
-
+ file_path
|
72
|
-
+ lineEnd
|
73
|
-
+ CODE_FENCE
|
74
|
-
+ Optional(White())
|
75
|
-
+ language
|
76
|
-
+ lineEnd
|
77
|
-
+ contents
|
78
|
-
+ lineEnd
|
79
|
-
+ CODE_FENCE
|
80
|
-
+ lineEnd
|
81
|
-
+ Optional(White())
|
82
|
-
+ Literal(TOOL_END)
|
83
|
-
+ Optional(White())
|
84
|
-
)
|
85
|
-
return grammar
|
86
|
-
|
87
|
-
@classmethod
|
88
|
-
def create(cls, get_directory: Callable[[], str]) -> Type["CodeFileTool"]:
|
89
|
-
"""
|
90
|
-
Create a subclass of CodeFileTool with a static method get_directory,
|
91
|
-
which returns the current directory path, so that all file paths are
|
92
|
-
interpreted as relative to the current directory.
|
93
|
-
"""
|
94
|
-
|
95
|
-
class SubCodeFileTool(cls):
|
96
|
-
get_directory: Callable[[], str] = staticmethod(get_directory)
|
97
|
-
|
98
|
-
return SubCodeFileTool
|
99
|
-
|
100
|
-
@classmethod
|
101
|
-
def examples(cls) -> List[ToolMessage | Tuple[str, ToolMessage]]:
|
102
|
-
return [
|
103
|
-
cls(
|
104
|
-
file_path="src/lib.rs",
|
105
|
-
language="rust",
|
106
|
-
contents="""
|
107
|
-
// function to add two numbers
|
108
|
-
pub fn add(a: i32, b: i32) -> i32 {
|
109
|
-
a + b
|
110
|
-
}
|
111
|
-
""",
|
112
|
-
)
|
113
|
-
]
|
114
|
-
|
115
|
-
def __repr__(self):
|
116
|
-
return f"""CodeFileTool(
|
117
|
-
file_path='{self.file_path}',
|
118
|
-
language='{self.language}',
|
119
|
-
contents='{self.contents}')
|
120
|
-
"""
|
121
|
-
|
122
|
-
|
123
|
-
if __name__ == "__main__":
|
124
|
-
# Informal test to print instructions for CodeFileTool
|
125
|
-
print("Testing CodeFileTool instructions:")
|
126
|
-
print("-" * 50)
|
127
|
-
|
128
|
-
instructions = CodeFileTool.instructions()
|
129
|
-
print(instructions)
|
130
|
-
|
131
|
-
print("-" * 50)
|
132
|
-
print("End of instructions test")
|
133
|
-
|
134
|
-
# You can add more informal tests here if needed
|
135
|
-
# For example, testing the parse method:
|
136
|
-
print("\nTesting CodeFileTool parse method:")
|
137
|
-
print("-" * 50)
|
138
|
-
|
139
|
-
test_input = """TOOL: code_file_tool
|
140
|
-
src/main.py
|
141
|
-
```python
|
142
|
-
def hello_world():
|
143
|
-
print("Hello, World!")
|
144
|
-
|
145
|
-
if __name__ == "__main__":
|
146
|
-
hello_world()
|
147
|
-
```
|
148
|
-
TOOL_END"""
|
149
|
-
|
150
|
-
parsed_result = CodeFileTool.parse(test_input)
|
151
|
-
print("Parsed result:")
|
152
|
-
for key, value in parsed_result.items():
|
153
|
-
print(f"{key}: {value}")
|
154
|
-
|
155
|
-
print("-" * 50)
|
156
|
-
print("End of parse test")
|
157
|
-
|
158
|
-
# Test format method
|
159
|
-
print("\nTesting CodeFileTool format method:")
|
160
|
-
print("-" * 50)
|
161
|
-
test_instance = CodeFileTool(
|
162
|
-
request="code_file_tool",
|
163
|
-
file_path="tests/test_file.py",
|
164
|
-
language="python",
|
165
|
-
contents="""
|
166
|
-
def test_function():
|
167
|
-
assert 1 + 1 == 2
|
168
|
-
|
169
|
-
if __name__ == "__main__":
|
170
|
-
test_function()
|
171
|
-
""",
|
172
|
-
)
|
173
|
-
formatted_output = CodeFileTool.format(test_instance)
|
174
|
-
print("Formatted output:")
|
175
|
-
print(formatted_output)
|
176
|
-
print("-" * 50)
|
177
|
-
print("End of format test")
|
178
|
-
|
179
|
-
# Additional test: Round-trip (parse -> format -> parse)
|
180
|
-
print("\nTesting CodeFileTool round-trip (parse -> format -> parse):")
|
181
|
-
print("-" * 50)
|
182
|
-
initial_parse = CodeFileTool.parse(test_input)
|
183
|
-
initial_instance = CodeFileTool(**initial_parse)
|
184
|
-
formatted_output = CodeFileTool.format(initial_instance)
|
185
|
-
final_parse = CodeFileTool.parse(formatted_output)
|
186
|
-
|
187
|
-
print("Initial parse:")
|
188
|
-
print(initial_parse)
|
189
|
-
print("\nFormatted output:")
|
190
|
-
print(formatted_output)
|
191
|
-
print("\nFinal parse:")
|
192
|
-
print(final_parse)
|
193
|
-
|
194
|
-
if initial_parse == final_parse:
|
195
|
-
print("\nRound-trip test passed: Initial and final parses match.")
|
196
|
-
else:
|
197
|
-
print("\nRound-trip test failed: Initial and final parses do not match.")
|
198
|
-
print("-" * 50)
|
199
|
-
print("End of round-trip test")
|
@@ -1,96 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
A tool to extract portions from previous (numbered) messages in
|
3
|
-
the chat history.
|
4
|
-
The idea is that when an LLM wants to (or is asked to) simply extract
|
5
|
-
portions of a message verbatim, it should use this tool/function to
|
6
|
-
SPECIFY what should be extracted, rather than actually extracting it.
|
7
|
-
This will usually be much cheaper and faster than actually writing out the extracted
|
8
|
-
text. The handler of this tool/function will then extract the text and send it back.
|
9
|
-
"""
|
10
|
-
|
11
|
-
from typing import Protocol, no_type_check
|
12
|
-
|
13
|
-
from jinja2 import DictLoader, Environment
|
14
|
-
|
15
|
-
from langroid.agent.tool_message import ToolMessage
|
16
|
-
from langroid.language_models.base import LLMMessage
|
17
|
-
|
18
|
-
|
19
|
-
def extract_between(value: str, start_word: str, end_word: str) -> str:
|
20
|
-
"""
|
21
|
-
Extract the substring between two words in a string.
|
22
|
-
NOTE: If there are multiple occurrences of the start_word, the first one is used,
|
23
|
-
and if there are multiple occurrences of the end_word, the first one after the
|
24
|
-
start_word is used.
|
25
|
-
|
26
|
-
We do not handle the case of multiple occurrences of the start_word, followed by
|
27
|
-
multiple occurrences of the end_word
|
28
|
-
|
29
|
-
Args:
|
30
|
-
value (str): the string to extract from
|
31
|
-
start_word (str): the word that starts the substring
|
32
|
-
end_word (str): the word that ends the substring
|
33
|
-
|
34
|
-
Returns:
|
35
|
-
str: the substring between the two words
|
36
|
-
"""
|
37
|
-
try:
|
38
|
-
start_index = value.index(start_word) + len(start_word)
|
39
|
-
end_index = value.index(end_word, start_index)
|
40
|
-
return value[start_index:end_index].strip()
|
41
|
-
except ValueError:
|
42
|
-
return ""
|
43
|
-
|
44
|
-
|
45
|
-
class HasMessageHistory(Protocol):
|
46
|
-
"""
|
47
|
-
Defines the fields expected in a class that enables this tool.
|
48
|
-
"""
|
49
|
-
|
50
|
-
message_history: list[LLMMessage]
|
51
|
-
|
52
|
-
|
53
|
-
class ExtractTool(ToolMessage):
|
54
|
-
request: str = "extract"
|
55
|
-
purpose: str = """
|
56
|
-
To generate a message in the form of a <jinja_template>,
|
57
|
-
using the Jinja templating language, where the
|
58
|
-
the i'th message is referred to as msg[i], and integer indices are used
|
59
|
-
to specify which part of the message to extract, e.g. msg[2][13:45].
|
60
|
-
"""
|
61
|
-
jinja_template: str
|
62
|
-
|
63
|
-
@classmethod
|
64
|
-
def instructions(cls) -> str:
|
65
|
-
return """
|
66
|
-
In a conversation with the user, your responses may sometimes use verbatim
|
67
|
-
extracts of previous messages in the conversation. You are an expert at
|
68
|
-
Jinja templating syntax, and you will rely on this syntax whenever you find
|
69
|
-
yourself wanting to repeat verbatim text from earlier parts of the
|
70
|
-
conversation.
|
71
|
-
In your Jinja templates you can use references like {{msg[3][3:100]}} to
|
72
|
-
indicate that the user should substitute the content of message number 3
|
73
|
-
(assume first msg is number 1), starting at position 3 and ending at 100.
|
74
|
-
|
75
|
-
For example you may respond with something like:
|
76
|
-
|
77
|
-
The story started like this: {{msg[5][45:89]}}. Then John came home and
|
78
|
-
{{msg[2][4:19}}.
|
79
|
-
|
80
|
-
VERY IMPORTANT --
|
81
|
-
(a) your FIRST priority is to generate messages that would sound natural
|
82
|
-
when the jinja templates are rendered.
|
83
|
-
|
84
|
-
(b) your NEXT priority is to ALWAYS RELY on the above JINJA scheme when
|
85
|
-
your intended message would contain verbatim text from previous messages.
|
86
|
-
|
87
|
-
(c) Do not simply use large verbatim parts of previous messages, when doing
|
88
|
-
so may not result in natural responses.
|
89
|
-
"""
|
90
|
-
|
91
|
-
@no_type_check
|
92
|
-
def handle(self: HasMessageHistory) -> str:
|
93
|
-
msg = self.message_history
|
94
|
-
env = Environment(loader=DictLoader({"base": self.jinja_template}))
|
95
|
-
template = env.get_template("base")
|
96
|
-
return template.render(msg=msg)
|