langroid 0.16.6__py3-none-any.whl → 0.16.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/md_tool_message_grammar.py +455 -0
- langroid/agent/tools/code_file_tool_parse.py +150 -0
- langroid/agent/tools/code_file_tool_pyparsing.py +194 -0
- langroid/agent/tools/code_file_tool_pyparsing2.py +199 -0
- langroid/agent/tools/formatted_model_custom.py +150 -0
- langroid/agent/tools/formatted_model_custom2.py +168 -0
- langroid/agent/tools/formatted_model_custom3.py +279 -0
- langroid/agent/tools/formatted_model_custom4.py +395 -0
- langroid/agent/tools/formatted_model_jinja.py +133 -0
- langroid/agent/tools/formatted_model_jinja.py-e +122 -0
- langroid/agent/tools/formatted_model_jinja2.py +145 -0
- langroid/agent/tools/formatted_model_jinja2.py-e +135 -0
- langroid/agent/tools/formatted_model_lark.py +0 -0
- langroid/agent/tools/formatted_model_lark2.py +168 -0
- langroid/agent/tools/formatted_model_parse.py +105 -0
- langroid/agent/tools/formatted_model_parse.py-e +98 -0
- langroid/agent/tools/formatted_model_parse2.py +113 -0
- langroid/agent/tools/formatted_model_parse2.py-e +109 -0
- langroid/agent/tools/formatted_model_parse3.py +114 -0
- langroid/agent/tools/formatted_model_parse3.py-e +110 -0
- langroid/agent/tools/formatted_model_parsimon.py +194 -0
- langroid/agent/tools/formatted_model_parsimon.py-e +186 -0
- langroid/agent/tools/formatted_model_pyparsing.py +169 -0
- langroid/agent/tools/formatted_model_pyparsing.py-e +149 -0
- langroid/agent/tools/formatted_model_pyparsing2.py +159 -0
- langroid/agent/tools/formatted_model_pyparsing2.py-e +143 -0
- langroid/agent/tools/formatted_model_pyparsing3.py +133 -0
- langroid/agent/tools/formatted_model_pyparsing3.py-e +121 -0
- langroid/agent/tools/formatted_model_pyparsing4.py +213 -0
- langroid/agent/tools/formatted_model_pyparsing4.py-e +176 -0
- langroid/agent/tools/formatted_model_pyparsing5.py +173 -0
- langroid/agent/tools/formatted_model_pyparsing5.py-e +142 -0
- langroid/agent/tools/formatted_model_regex.py +246 -0
- langroid/agent/tools/formatted_model_regex.py-e +248 -0
- langroid/agent/tools/formatted_model_regex2.py +250 -0
- langroid/agent/tools/formatted_model_regex2.py-e +253 -0
- langroid/agent/tools/formatted_model_tatsu.py +172 -0
- langroid/agent/tools/formatted_model_tatsu.py-e +160 -0
- langroid/agent/tools/formatted_model_template.py +217 -0
- langroid/agent/tools/formatted_model_template.py-e +200 -0
- langroid/agent/tools/formatted_model_xml.py +178 -0
- langroid/agent/tools/formatted_model_xml2.py +178 -0
- langroid/agent/tools/formatted_model_xml3.py +132 -0
- langroid/agent/tools/formatted_model_xml4.py +130 -0
- langroid/agent/tools/formatted_model_xml5.py +130 -0
- langroid/agent/tools/formatted_model_xml6.py +113 -0
- langroid/agent/tools/formatted_model_xml7.py +117 -0
- langroid/agent/tools/formatted_model_xml8.py +164 -0
- langroid/agent/tools/generic_tool.py +165 -0
- langroid/agent/tools/generic_tool_tatsu.py +275 -0
- langroid/agent/tools/grammar_based_model.py +132 -0
- langroid/agent/tools/grammar_based_model.py-e +128 -0
- langroid/agent/tools/grammar_based_model_lark.py +156 -0
- langroid/agent/tools/grammar_based_model_lark.py-e +153 -0
- langroid/agent/tools/grammar_based_model_parse.py +86 -0
- langroid/agent/tools/grammar_based_model_parse.py-e +80 -0
- langroid/agent/tools/grammar_based_model_parsimonious.py +129 -0
- langroid/agent/tools/grammar_based_model_parsimonious.py-e +120 -0
- langroid/agent/tools/grammar_based_model_pyparsing.py +105 -0
- langroid/agent/tools/grammar_based_model_pyparsing.py-e +103 -0
- langroid/agent/tools/grammar_based_model_regex.py +139 -0
- langroid/agent/tools/grammar_based_model_regex.py-e +130 -0
- langroid/agent/tools/grammar_based_model_regex2.py +124 -0
- langroid/agent/tools/grammar_based_model_regex2.py-e +116 -0
- langroid/agent/tools/grammar_based_model_tatsu.py +80 -0
- langroid/agent/tools/grammar_based_model_tatsu.py-e +77 -0
- langroid/agent/tools/lark_earley_example.py +135 -0
- langroid/agent/tools/lark_earley_example.py-e +117 -0
- langroid/agent/tools/lark_example.py +72 -0
- langroid/agent/tools/parse_example.py +76 -0
- langroid/agent/tools/parse_example2.py +87 -0
- langroid/agent/tools/parse_example3.py +42 -0
- langroid/agent/tools/parse_test.py +791 -0
- langroid/agent/xml_tool_message.py +106 -0
- langroid/language_models/openai_gpt.py +4 -2
- {langroid-0.16.6.dist-info → langroid-0.16.7.dist-info}/METADATA +1 -1
- {langroid-0.16.6.dist-info → langroid-0.16.7.dist-info}/RECORD +80 -6
- pyproject.toml +1 -1
- {langroid-0.16.6.dist-info → langroid-0.16.7.dist-info}/LICENSE +0 -0
- {langroid-0.16.6.dist-info → langroid-0.16.7.dist-info}/WHEEL +0 -0
@@ -0,0 +1,455 @@
|
|
1
|
+
"""
|
2
|
+
Subclass of `ToolMessage`, specialized for markdown-formatted structured messages.
|
3
|
+
Helpful when LLM is producing code as part of a tool-message -- code within JSON
|
4
|
+
tends to cause all kinds of issues, especially with weaker LLMs.
|
5
|
+
An LLM more reliably generates code within fenced blocks in a markdown doc.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import re
|
9
|
+
import textwrap
|
10
|
+
from random import choice
|
11
|
+
from typing import Any, Dict, List, Tuple, Type, TypeVar, Union
|
12
|
+
|
13
|
+
from langroid.agent.tool_message import ToolMessage
|
14
|
+
from langroid.language_models.base import LLMFunctionSpec
|
15
|
+
from langroid.pydantic_v1 import BaseModel, Extra
|
16
|
+
|
17
|
+
|
18
|
+
class FileContents(BaseModel):
|
19
|
+
file_path: str
|
20
|
+
contents: str
|
21
|
+
|
22
|
+
|
23
|
+
T = TypeVar("T", bound="MdToolMessage")
|
24
|
+
|
25
|
+
from lark import Lark
|
26
|
+
|
27
|
+
md_tool_message_grammar = """
|
28
|
+
start: section+
|
29
|
+
|
30
|
+
section: header content
|
31
|
+
|
32
|
+
header: "#" WS WORD
|
33
|
+
|
34
|
+
content: primitive
|
35
|
+
| list
|
36
|
+
| file_contents
|
37
|
+
| list_file_contents
|
38
|
+
|
39
|
+
primitive: value
|
40
|
+
|
41
|
+
list: list_item+
|
42
|
+
list_item: "-" WS value
|
43
|
+
|
44
|
+
file_contents: "file_path:" WS FILEPATH NEWLINE CODE_BLOCK
|
45
|
+
list_file_contents: file_contents+
|
46
|
+
|
47
|
+
CODE_BLOCK: /```[^`]*```/
|
48
|
+
FILEPATH: /[^\\n]+/
|
49
|
+
WORD: /[a-zA-Z_]+/
|
50
|
+
value: /[^\\n]+/
|
51
|
+
|
52
|
+
%import common.WS
|
53
|
+
%import common.NEWLINE
|
54
|
+
"""
|
55
|
+
|
56
|
+
parser = Lark(md_tool_message_grammar, start="start", parser="lalr")
|
57
|
+
|
58
|
+
|
59
|
+
def apply_grammar(markdown_content: str):
|
60
|
+
tree = parser.parse(markdown_content)
|
61
|
+
return tree
|
62
|
+
|
63
|
+
|
64
|
+
@classmethod
|
65
|
+
def from_markdown(cls: Type[T], markdown_content: str) -> T:
|
66
|
+
"""
|
67
|
+
Parse markdown content and create an instance of the MdToolMessage subclass.
|
68
|
+
"""
|
69
|
+
tree = apply_grammar(markdown_content)
|
70
|
+
parsed_data = cls._extract_data_from_tree(tree)
|
71
|
+
return cls(**parsed_data)
|
72
|
+
|
73
|
+
|
74
|
+
@classmethod
|
75
|
+
def _extract_data_from_tree(cls, tree):
|
76
|
+
parsed_data = {}
|
77
|
+
for section in tree.children:
|
78
|
+
key = section.children[0].children[1].value.lower()
|
79
|
+
value = cls._parse_content(section.children[1])
|
80
|
+
parsed_data[key] = value
|
81
|
+
return parsed_data
|
82
|
+
|
83
|
+
|
84
|
+
@classmethod
|
85
|
+
def _parse_content(cls, content):
|
86
|
+
if content.data == "primitive":
|
87
|
+
return cls._parse_primitive(content.children[0].value)
|
88
|
+
elif content.data == "list":
|
89
|
+
return [item.children[1].value for item in content.children]
|
90
|
+
elif content.data == "file_contents":
|
91
|
+
return cls._parse_file_contents(content)
|
92
|
+
elif content.data == "list_file_contents":
|
93
|
+
return [cls._parse_file_contents(fc) for fc in content.children]
|
94
|
+
|
95
|
+
|
96
|
+
@classmethod
|
97
|
+
def _parse_file_contents(cls, file_contents):
|
98
|
+
file_path = file_contents.children[1].value
|
99
|
+
content = file_contents.children[2].value.strip("`").strip()
|
100
|
+
return FileContents(file_path=file_path, contents=content)
|
101
|
+
|
102
|
+
|
103
|
+
class MdToolMessage(ToolMessage):
|
104
|
+
"""
|
105
|
+
Subclass of ToolMessage, with LLM instructions to generate markdown rather than
|
106
|
+
json format.
|
107
|
+
|
108
|
+
Limited to simple tool messages where each field is:
|
109
|
+
- of type str, int, float, bool, FileContents, or list of these
|
110
|
+
|
111
|
+
The corresponding markdown format would look like:
|
112
|
+
|
113
|
+
```md
|
114
|
+
# request
|
115
|
+
<request>
|
116
|
+
|
117
|
+
# purpose
|
118
|
+
<purpose>
|
119
|
+
|
120
|
+
# <field1>
|
121
|
+
<value1>
|
122
|
+
|
123
|
+
# <field2>
|
124
|
+
<value2>
|
125
|
+
|
126
|
+
# <list_field>
|
127
|
+
- item1
|
128
|
+
- item2
|
129
|
+
- item3
|
130
|
+
|
131
|
+
|
132
|
+
# <list_of_file_contents_field_name>
|
133
|
+
file_path:<file_path1>
|
134
|
+
<contents1> (ensure code is in a fenced block, e.g. ```rust ... ```)
|
135
|
+
file_path:<file_path2>
|
136
|
+
<contents2>
|
137
|
+
...
|
138
|
+
```
|
139
|
+
|
140
|
+
Attributes:
|
141
|
+
request (str): name of agent method to map to. This is the method that
|
142
|
+
would handle the LLM's generated tool call.
|
143
|
+
purpose (str): purpose of agent method, expressed in general terms.
|
144
|
+
(This is used when auto-generating the tool instruction to the LLM)
|
145
|
+
"""
|
146
|
+
|
147
|
+
request: str
|
148
|
+
purpose: str
|
149
|
+
id: str = "" # placeholder for OpenAI-API tool_call_id
|
150
|
+
|
151
|
+
_allow_llm_use: bool = True # allow an LLM to use (i.e. generate) this tool?
|
152
|
+
|
153
|
+
# model_config = ConfigDict(extra=Extra.allow)
|
154
|
+
|
155
|
+
class Config:
|
156
|
+
# This is NOT inherited from ToolMessage.Config, so we do it here again
|
157
|
+
extra = Extra.allow
|
158
|
+
arbitrary_types_allowed = False
|
159
|
+
validate_all = True
|
160
|
+
validate_assignment = True
|
161
|
+
# do not include these fields in the generated schema
|
162
|
+
# since we don't require the LLM to specify them
|
163
|
+
schema_extra = {"exclude": {"purpose", "id"}}
|
164
|
+
|
165
|
+
@classmethod
|
166
|
+
def __init_subclass__(cls, **kwargs: Any) -> None:
|
167
|
+
super().__init_subclass__(**kwargs)
|
168
|
+
for name, field in cls.__fields__.items():
|
169
|
+
if name not in ["request", "purpose", "id"]:
|
170
|
+
if field.type_ not in [
|
171
|
+
str,
|
172
|
+
int,
|
173
|
+
float,
|
174
|
+
bool,
|
175
|
+
FileContents,
|
176
|
+
List[str],
|
177
|
+
List[int],
|
178
|
+
List[float],
|
179
|
+
List[bool],
|
180
|
+
List[FileContents],
|
181
|
+
]:
|
182
|
+
raise ValueError(
|
183
|
+
f"""
|
184
|
+
Field '{name}' must be of type str, int, float, bool,
|
185
|
+
or list of these,
|
186
|
+
FileContents, or list of these
|
187
|
+
"""
|
188
|
+
)
|
189
|
+
|
190
|
+
@classmethod
|
191
|
+
def examples(cls) -> List["ToolMessage" | Tuple[str, "ToolMessage"]]:
|
192
|
+
"""
|
193
|
+
Examples to use in few-shot demos with markdown formatting instructions.
|
194
|
+
Each example can be either:
|
195
|
+
- just an MdToolMessage instance, e.g. MyTool(param1=1, param2="hello"), or
|
196
|
+
- a tuple (description, MdToolMessage instance), where the description is
|
197
|
+
a natural language "thought" that leads to the tool usage,
|
198
|
+
e.g. ("I want to find the square of 5", SquareTool(num=5))
|
199
|
+
In some scenarios, including such a description can significantly
|
200
|
+
enhance reliability of tool use.
|
201
|
+
Returns:
|
202
|
+
"""
|
203
|
+
return []
|
204
|
+
|
205
|
+
@classmethod
|
206
|
+
def usage_examples(cls, random: bool = False) -> str:
|
207
|
+
"""
|
208
|
+
Instruction to the LLM showing examples of how to use the tool-message.
|
209
|
+
|
210
|
+
Args:
|
211
|
+
random (bool): whether to pick a random example from the list of examples.
|
212
|
+
Set to `true` when using this to illustrate a dialog between LLM and
|
213
|
+
user.
|
214
|
+
(if false, use ALL examples)
|
215
|
+
Returns:
|
216
|
+
str: examples of how to use the tool/function-call
|
217
|
+
"""
|
218
|
+
# pick a random example of the fields
|
219
|
+
if len(cls.examples()) == 0:
|
220
|
+
return ""
|
221
|
+
if random:
|
222
|
+
examples = [choice(cls.examples())]
|
223
|
+
else:
|
224
|
+
examples = cls.examples()
|
225
|
+
examples_jsons = [
|
226
|
+
(
|
227
|
+
f"EXAMPLE {i}: (THOUGHT: {ex[0]}) => \n{ex[1].format_example()}"
|
228
|
+
if isinstance(ex, tuple)
|
229
|
+
else f"EXAMPLE {i}:\n {ex.format_example()}"
|
230
|
+
)
|
231
|
+
for i, ex in enumerate(examples, 1)
|
232
|
+
]
|
233
|
+
return "\n\n".join(examples_jsons)
|
234
|
+
|
235
|
+
def tree_to_markdown(tree):
|
236
|
+
markdown = []
|
237
|
+
for section in tree.children:
|
238
|
+
header = section.children[0].children[1].value
|
239
|
+
content = section.children[1]
|
240
|
+
markdown.append(f"# {header}")
|
241
|
+
markdown.append(cls._content_to_markdown(content))
|
242
|
+
return "\n\n".join(markdown)
|
243
|
+
|
244
|
+
@classmethod
|
245
|
+
def _content_to_markdown(cls, content):
|
246
|
+
if content.data == "primitive":
|
247
|
+
return content.children[0].value
|
248
|
+
elif content.data == "list":
|
249
|
+
return "\n".join(f"- {item.children[1].value}" for item in content.children)
|
250
|
+
elif content.data == "file_contents":
|
251
|
+
return cls._file_contents_to_markdown(content)
|
252
|
+
elif content.data == "list_file_contents":
|
253
|
+
return "\n\n".join(
|
254
|
+
cls._file_contents_to_markdown(fc) for fc in content.children
|
255
|
+
)
|
256
|
+
|
257
|
+
@classmethod
|
258
|
+
def _file_contents_to_markdown(cls, file_contents):
|
259
|
+
file_path = file_contents.children[1].value
|
260
|
+
content = file_contents.children[2].value.strip("`").strip()
|
261
|
+
return f"file_path: {file_path}\n{content}"
|
262
|
+
|
263
|
+
def to_markdown(self) -> str:
|
264
|
+
tree = apply_grammar(self.format_example())
|
265
|
+
return tree_to_markdown(tree)
|
266
|
+
|
267
|
+
def format_example(self) -> str:
|
268
|
+
"""
|
269
|
+
Override json_example to use our new to_markdown method
|
270
|
+
"""
|
271
|
+
return self.to_markdown()
|
272
|
+
|
273
|
+
@classmethod
|
274
|
+
def format_instructions(cls) -> str:
|
275
|
+
fields = cls.__fields__
|
276
|
+
excluded_fields = cls.Config.schema_extra.get("exclude", set())
|
277
|
+
|
278
|
+
instructions = [
|
279
|
+
cls._field_instruction(name, field)
|
280
|
+
for name, field in fields.items()
|
281
|
+
if name not in excluded_fields
|
282
|
+
]
|
283
|
+
all_instructions = "\n\n".join(instructions)
|
284
|
+
|
285
|
+
examples_str = ""
|
286
|
+
if cls.examples():
|
287
|
+
examples_str = "# Examples\n\n" + cls.usage_examples()
|
288
|
+
|
289
|
+
return f"{all_instructions}\n\n{examples_str}"
|
290
|
+
|
291
|
+
@classmethod
|
292
|
+
def _field_instruction(cls, name: str, field: Any) -> str:
|
293
|
+
field_type = field.outer_type_
|
294
|
+
|
295
|
+
if field_type in (str, int, float, bool):
|
296
|
+
return cls._primitive_instruction(name, field_type)
|
297
|
+
elif field_type == FileContents:
|
298
|
+
return cls._file_contents_instruction(name)
|
299
|
+
elif field_type == List[str]:
|
300
|
+
return cls._list_primitive_instruction(name, str)
|
301
|
+
elif field_type == List[int]:
|
302
|
+
return cls._list_primitive_instruction(name, int)
|
303
|
+
elif field_type == List[float]:
|
304
|
+
return cls._list_primitive_instruction(name, float)
|
305
|
+
elif field_type == List[bool]:
|
306
|
+
return cls._list_primitive_instruction(name, bool)
|
307
|
+
elif field_type == List[FileContents]:
|
308
|
+
return cls._list_file_contents_instruction(name)
|
309
|
+
|
310
|
+
raise ValueError(f"Unsupported field type: {field_type}")
|
311
|
+
|
312
|
+
@staticmethod
|
313
|
+
def _primitive_instruction(name: str, field_type: Type[Any]) -> str:
|
314
|
+
type_name = field_type.__name__
|
315
|
+
return f"# {name}\n<{name}> ({type_name}, required)"
|
316
|
+
|
317
|
+
@staticmethod
|
318
|
+
def _list_primitive_instruction(name: str, item_type: Type[Any]) -> str:
|
319
|
+
type_name = item_type.__name__
|
320
|
+
return f"# {name}\n- <{name}1> ({type_name})\n- <{name}2> ({type_name})\n..."
|
321
|
+
|
322
|
+
@staticmethod
|
323
|
+
def _file_contents_instruction(name: str) -> str:
|
324
|
+
return (
|
325
|
+
f"# {name}\n"
|
326
|
+
f"file_path: <file_path> (string)\n"
|
327
|
+
f"<contents> "
|
328
|
+
f"(string, ensure code is within code-fence, e.g. ```python ... ```)"
|
329
|
+
)
|
330
|
+
|
331
|
+
@staticmethod
|
332
|
+
def _list_file_contents_instruction(name: str) -> str:
|
333
|
+
return (
|
334
|
+
f"# {name}\n"
|
335
|
+
f"file_path: <file_path1> (string)\n"
|
336
|
+
f"<contents1> "
|
337
|
+
f"(string, ensure code is within code-fence, e.g. ```python ... ```)\n\n"
|
338
|
+
f"file_path: <file_path2> (string)\n"
|
339
|
+
f"<contents2> "
|
340
|
+
f"(string, ensure code is within code-fence, e.g. ```python ... ```)\n"
|
341
|
+
f"..."
|
342
|
+
)
|
343
|
+
|
344
|
+
@staticmethod
|
345
|
+
def json_group_instructions() -> str:
|
346
|
+
"""Template for instructions for a group of tools.
|
347
|
+
Works with GPT4 but override this for weaker LLMs if needed.
|
348
|
+
"""
|
349
|
+
return textwrap.dedent(
|
350
|
+
"""
|
351
|
+
=== ALL AVAILABLE TOOLS and THEIR JSON FORMAT INSTRUCTIONS ===
|
352
|
+
You have access to the following TOOLS to accomplish your task:
|
353
|
+
|
354
|
+
{json_instructions}
|
355
|
+
|
356
|
+
When one of the above TOOLs is applicable, you must express your
|
357
|
+
request as "TOOL:" followed by the request in the above JSON format.
|
358
|
+
"""
|
359
|
+
)
|
360
|
+
|
361
|
+
@classmethod
|
362
|
+
def llm_function_schema(
|
363
|
+
cls,
|
364
|
+
request: bool = False,
|
365
|
+
defaults: bool = True,
|
366
|
+
) -> LLMFunctionSpec:
|
367
|
+
raise NotImplementedError(
|
368
|
+
"""
|
369
|
+
The MdToolMessage class cannot be used with OpenAI function/tools.
|
370
|
+
In your ChatAgentConfig, set `use_functions_api=False` and `use_tools=True`
|
371
|
+
"""
|
372
|
+
)
|
373
|
+
|
374
|
+
@classmethod
|
375
|
+
def from_markdown(cls: Type[T], markdown_content: str) -> T:
|
376
|
+
"""
|
377
|
+
Parse markdown content and create an instance of the MdToolMessage subclass.
|
378
|
+
"""
|
379
|
+
parsed_data = cls._parse_markdown(markdown_content)
|
380
|
+
return cls(**parsed_data)
|
381
|
+
|
382
|
+
@classmethod
|
383
|
+
def _parse_markdown(cls, markdown_content: str) -> Dict[str, Any]:
|
384
|
+
sections = re.split(r"\n# ", markdown_content)
|
385
|
+
parsed_data = {}
|
386
|
+
|
387
|
+
for section in sections:
|
388
|
+
if not section.strip():
|
389
|
+
continue
|
390
|
+
lines = section.strip().split("\n")
|
391
|
+
key = lines[0].lower()
|
392
|
+
value = cls._parse_section_value(lines[1:])
|
393
|
+
parsed_data[key] = value
|
394
|
+
|
395
|
+
return parsed_data
|
396
|
+
|
397
|
+
@classmethod
|
398
|
+
def _parse_section_value(cls, lines: List[str]) -> Any:
|
399
|
+
if not lines:
|
400
|
+
return None
|
401
|
+
|
402
|
+
if lines[0].startswith("- "):
|
403
|
+
return [line.strip("- ").strip() for line in lines]
|
404
|
+
|
405
|
+
if "file_path:" in lines[0]:
|
406
|
+
return cls._parse_file_contents(lines)
|
407
|
+
|
408
|
+
if len(lines) == 1:
|
409
|
+
return cls._parse_primitive(lines[0])
|
410
|
+
|
411
|
+
return "\n".join(lines)
|
412
|
+
|
413
|
+
@staticmethod
|
414
|
+
def _parse_file_contents(lines: List[str]) -> FileContents | List[FileContents]:
|
415
|
+
file_contents_list = []
|
416
|
+
current_file = None
|
417
|
+
|
418
|
+
for line in lines:
|
419
|
+
if line.startswith("file_path:"):
|
420
|
+
if current_file:
|
421
|
+
file_contents_list.append(FileContents(**current_file))
|
422
|
+
current_file = {"file_path": line.split(":", 1)[1].strip()}
|
423
|
+
elif current_file:
|
424
|
+
current_file["contents"] = (
|
425
|
+
current_file.get("contents", "") + line + "\n"
|
426
|
+
)
|
427
|
+
current_file["contents"] = current_file["contents"].strip()
|
428
|
+
lines = current_file["contents"].split("\n")
|
429
|
+
# if first , last line contain backticks, discard them
|
430
|
+
if lines and "```" in lines[0]:
|
431
|
+
lines = lines[1:]
|
432
|
+
if lines and "```" in lines[-1]:
|
433
|
+
lines = lines[:-1]
|
434
|
+
current_file["contents"] = "\n".join(lines)
|
435
|
+
|
436
|
+
if current_file:
|
437
|
+
file_contents_list.append(FileContents(**current_file))
|
438
|
+
|
439
|
+
return (
|
440
|
+
file_contents_list if len(file_contents_list) > 1 else file_contents_list[0]
|
441
|
+
)
|
442
|
+
|
443
|
+
@staticmethod
|
444
|
+
def _parse_primitive(value: str) -> Union[str, int, float, bool]:
|
445
|
+
if value.lower() == "true":
|
446
|
+
return True
|
447
|
+
if value.lower() == "false":
|
448
|
+
return False
|
449
|
+
try:
|
450
|
+
return int(value)
|
451
|
+
except ValueError:
|
452
|
+
try:
|
453
|
+
return float(value)
|
454
|
+
except ValueError:
|
455
|
+
return value
|
@@ -0,0 +1,150 @@
|
|
1
|
+
""""
|
2
|
+
Non-JSON Tool for LLM to specify contents of a code file.
|
3
|
+
|
4
|
+
Why Non-JSON? Because there are numerous issues with even the best LLMs trying
|
5
|
+
to return code within JSON strings (e.g. unescaped newlines, quotes, etc.),
|
6
|
+
and the problem is even worse with weak LLMs. Json repair methods exist, but
|
7
|
+
can't deal with all possible cases.
|
8
|
+
|
9
|
+
E.g. see this study from Aider: https://aider.chat/2024/08/14/code-in-json.html
|
10
|
+
|
11
|
+
Note: We express the formatting rules with a template since it has several benefits:
|
12
|
+
- all of the formatting rules are in one place,
|
13
|
+
- we get a parser for free, and don't have to write parsing code,
|
14
|
+
- we get a formatting example generator for free, and don't have to write
|
15
|
+
example generation code.
|
16
|
+
- consistency between the parser and the example generator is guaranteed.
|
17
|
+
"""
|
18
|
+
|
19
|
+
from typing import Any, Callable, Dict, List, Tuple, Type
|
20
|
+
|
21
|
+
from parse import Parser, compile
|
22
|
+
|
23
|
+
from langroid.agent.tool_message import ToolMessage
|
24
|
+
from langroid.utils.constants import TOOL, TOOL_END
|
25
|
+
|
26
|
+
CODE_FENCE_START = "`" * 3
|
27
|
+
CODE_FENCE_END = "`" * 3
|
28
|
+
|
29
|
+
|
30
|
+
class CodeFileTool(ToolMessage):
|
31
|
+
"""
|
32
|
+
Used by LLM to specify contents of a code file.
|
33
|
+
"""
|
34
|
+
|
35
|
+
request: str = "code_file_tool"
|
36
|
+
purpose: str = """
|
37
|
+
To specify the contents of a code file.
|
38
|
+
"""
|
39
|
+
file_path: str
|
40
|
+
contents: str
|
41
|
+
language: str
|
42
|
+
|
43
|
+
@classmethod
|
44
|
+
def get_template(cls) -> str:
|
45
|
+
request = cls.default_value("request")
|
46
|
+
|
47
|
+
return f"""
|
48
|
+
{{ws}}{TOOL}:{{ws}}{request}
|
49
|
+
{{ws}}{{file_path}}
|
50
|
+
{CODE_FENCE_START}{{ws}}{{language}}
|
51
|
+
{{contents}}
|
52
|
+
{CODE_FENCE_END}
|
53
|
+
{{ws}}{TOOL_END}{{ws}}
|
54
|
+
"""
|
55
|
+
|
56
|
+
@classmethod
|
57
|
+
def parse(cls, string) -> Dict[str, Any]:
|
58
|
+
def parse_ws(string):
|
59
|
+
return (
|
60
|
+
string # This allows matching any amount of whitespace, including none
|
61
|
+
)
|
62
|
+
|
63
|
+
template = cls.get_template()
|
64
|
+
parser = Parser(template, dict(ws=parse_ws))
|
65
|
+
|
66
|
+
print(f"Parsing string:\n{string}") # Debug print
|
67
|
+
result = parser.parse(string)
|
68
|
+
print(f"Parse result: {result}") # Debug print
|
69
|
+
|
70
|
+
if result:
|
71
|
+
return {
|
72
|
+
"request": cls.default_value("request"),
|
73
|
+
"file_path": result["file_path"].strip(),
|
74
|
+
"language": result["language"].strip(),
|
75
|
+
"contents": result["contents"].strip(),
|
76
|
+
}
|
77
|
+
return {}
|
78
|
+
|
79
|
+
@classmethod
|
80
|
+
def format(cls, instance):
|
81
|
+
return cls.get_template().format(
|
82
|
+
request=instance.request,
|
83
|
+
file_path=instance.file_path,
|
84
|
+
language=instance.language,
|
85
|
+
contents=instance.contents,
|
86
|
+
s="", # no extra spaces
|
87
|
+
)
|
88
|
+
|
89
|
+
@classmethod
|
90
|
+
def create(cls, get_directory: Callable[[], str]) -> Type["CodeFileTool"]:
|
91
|
+
"""
|
92
|
+
Create a subclass of CodeFileTool with a static method get_directory,
|
93
|
+
which returns the current directory path, so that all file paths are
|
94
|
+
interpreted as relative to the current directory.
|
95
|
+
"""
|
96
|
+
|
97
|
+
class SubCodeFileTool(cls):
|
98
|
+
get_directory: Callable[[], str] = staticmethod(get_directory)
|
99
|
+
|
100
|
+
return SubCodeFileTool
|
101
|
+
|
102
|
+
@classmethod
|
103
|
+
def examples(cls) -> List[ToolMessage | Tuple[str, ToolMessage]]:
|
104
|
+
return [
|
105
|
+
cls(
|
106
|
+
file_path="src/lib.rs",
|
107
|
+
language="rust",
|
108
|
+
contents="""
|
109
|
+
// function to add two numbers
|
110
|
+
pub fn add(a: i32, b: i32) -> i32 {
|
111
|
+
a + b
|
112
|
+
}
|
113
|
+
""",
|
114
|
+
)
|
115
|
+
]
|
116
|
+
|
117
|
+
@classmethod
|
118
|
+
def find_candidates(cls, input_str: str) -> List[str]:
|
119
|
+
"""
|
120
|
+
Find all possible (top-level) candidates for
|
121
|
+
CodeFileTool in the input string.
|
122
|
+
"""
|
123
|
+
# Use parse.findall to find all instances of the CodeFileTool pattern
|
124
|
+
parser = compile(cls.get_template())
|
125
|
+
matches = list(parser.findall(input_str))
|
126
|
+
candidates = [match.fixed for match in matches]
|
127
|
+
return candidates
|
128
|
+
|
129
|
+
@classmethod
|
130
|
+
def from_string(cls, input_string: str) -> "CodeFileTool":
|
131
|
+
"""Parse a string into a CodeFileTool object, using the TEMPLATE."""
|
132
|
+
parsed_data = cls.parse(input_string)
|
133
|
+
if parsed_data:
|
134
|
+
return cls(**parsed_data)
|
135
|
+
raise ValueError("Invalid input string format")
|
136
|
+
|
137
|
+
@classmethod
|
138
|
+
def to_string(cls, instance) -> str:
|
139
|
+
"""Convert a CodeFileTool object to a string, using the TEMPLATE."""
|
140
|
+
return cls.format(instance)
|
141
|
+
|
142
|
+
def __str__(self):
|
143
|
+
return self.to_string()
|
144
|
+
|
145
|
+
def __repr__(self):
|
146
|
+
return f"""CodeFileTool(
|
147
|
+
file_path='{self.file_path}',
|
148
|
+
language='{self.language}',
|
149
|
+
contents='{self.contents}')
|
150
|
+
"""
|