langroid 0.16.7__py3-none-any.whl → 0.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. langroid/agent/base.py +45 -21
  2. langroid/agent/chat_agent.py +22 -14
  3. langroid/agent/chat_document.py +22 -13
  4. langroid/agent/tool_message.py +11 -11
  5. langroid/agent/tools/file_tools.py +234 -0
  6. langroid/agent/xml_tool_message.py +178 -45
  7. langroid/utils/constants.py +2 -0
  8. langroid/utils/git_utils.py +251 -0
  9. langroid/utils/system.py +78 -0
  10. {langroid-0.16.7.dist-info → langroid-0.17.0.dist-info}/METADATA +6 -3
  11. {langroid-0.16.7.dist-info → langroid-0.17.0.dist-info}/RECORD +14 -89
  12. pyproject.toml +3 -2
  13. langroid/agent/md_tool_message_grammar.py +0 -455
  14. langroid/agent/tools/code_file_tool_parse.py +0 -150
  15. langroid/agent/tools/code_file_tool_pyparsing.py +0 -194
  16. langroid/agent/tools/code_file_tool_pyparsing2.py +0 -199
  17. langroid/agent/tools/extract_tool.py +0 -96
  18. langroid/agent/tools/formatted_model_custom.py +0 -150
  19. langroid/agent/tools/formatted_model_custom2.py +0 -168
  20. langroid/agent/tools/formatted_model_custom3.py +0 -279
  21. langroid/agent/tools/formatted_model_custom4.py +0 -395
  22. langroid/agent/tools/formatted_model_jinja.py +0 -133
  23. langroid/agent/tools/formatted_model_jinja.py-e +0 -122
  24. langroid/agent/tools/formatted_model_jinja2.py +0 -145
  25. langroid/agent/tools/formatted_model_jinja2.py-e +0 -135
  26. langroid/agent/tools/formatted_model_lark.py +0 -0
  27. langroid/agent/tools/formatted_model_lark2.py +0 -168
  28. langroid/agent/tools/formatted_model_parse.py +0 -105
  29. langroid/agent/tools/formatted_model_parse.py-e +0 -98
  30. langroid/agent/tools/formatted_model_parse2.py +0 -113
  31. langroid/agent/tools/formatted_model_parse2.py-e +0 -109
  32. langroid/agent/tools/formatted_model_parse3.py +0 -114
  33. langroid/agent/tools/formatted_model_parse3.py-e +0 -110
  34. langroid/agent/tools/formatted_model_parsimon.py +0 -194
  35. langroid/agent/tools/formatted_model_parsimon.py-e +0 -186
  36. langroid/agent/tools/formatted_model_pyparsing.py +0 -169
  37. langroid/agent/tools/formatted_model_pyparsing.py-e +0 -149
  38. langroid/agent/tools/formatted_model_pyparsing2.py +0 -159
  39. langroid/agent/tools/formatted_model_pyparsing2.py-e +0 -143
  40. langroid/agent/tools/formatted_model_pyparsing3.py +0 -133
  41. langroid/agent/tools/formatted_model_pyparsing3.py-e +0 -121
  42. langroid/agent/tools/formatted_model_pyparsing4.py +0 -213
  43. langroid/agent/tools/formatted_model_pyparsing4.py-e +0 -176
  44. langroid/agent/tools/formatted_model_pyparsing5.py +0 -173
  45. langroid/agent/tools/formatted_model_pyparsing5.py-e +0 -142
  46. langroid/agent/tools/formatted_model_regex.py +0 -246
  47. langroid/agent/tools/formatted_model_regex.py-e +0 -248
  48. langroid/agent/tools/formatted_model_regex2.py +0 -250
  49. langroid/agent/tools/formatted_model_regex2.py-e +0 -253
  50. langroid/agent/tools/formatted_model_tatsu.py +0 -172
  51. langroid/agent/tools/formatted_model_tatsu.py-e +0 -160
  52. langroid/agent/tools/formatted_model_template.py +0 -217
  53. langroid/agent/tools/formatted_model_template.py-e +0 -200
  54. langroid/agent/tools/formatted_model_xml.py +0 -178
  55. langroid/agent/tools/formatted_model_xml2.py +0 -178
  56. langroid/agent/tools/formatted_model_xml3.py +0 -132
  57. langroid/agent/tools/formatted_model_xml4.py +0 -130
  58. langroid/agent/tools/formatted_model_xml5.py +0 -130
  59. langroid/agent/tools/formatted_model_xml6.py +0 -113
  60. langroid/agent/tools/formatted_model_xml7.py +0 -117
  61. langroid/agent/tools/formatted_model_xml8.py +0 -164
  62. langroid/agent/tools/generator_tool.py +0 -20
  63. langroid/agent/tools/generic_tool.py +0 -165
  64. langroid/agent/tools/generic_tool_tatsu.py +0 -275
  65. langroid/agent/tools/grammar_based_model.py +0 -132
  66. langroid/agent/tools/grammar_based_model.py-e +0 -128
  67. langroid/agent/tools/grammar_based_model_lark.py +0 -156
  68. langroid/agent/tools/grammar_based_model_lark.py-e +0 -153
  69. langroid/agent/tools/grammar_based_model_parse.py +0 -86
  70. langroid/agent/tools/grammar_based_model_parse.py-e +0 -80
  71. langroid/agent/tools/grammar_based_model_parsimonious.py +0 -129
  72. langroid/agent/tools/grammar_based_model_parsimonious.py-e +0 -120
  73. langroid/agent/tools/grammar_based_model_pyparsing.py +0 -105
  74. langroid/agent/tools/grammar_based_model_pyparsing.py-e +0 -103
  75. langroid/agent/tools/grammar_based_model_regex.py +0 -139
  76. langroid/agent/tools/grammar_based_model_regex.py-e +0 -130
  77. langroid/agent/tools/grammar_based_model_regex2.py +0 -124
  78. langroid/agent/tools/grammar_based_model_regex2.py-e +0 -116
  79. langroid/agent/tools/grammar_based_model_tatsu.py +0 -80
  80. langroid/agent/tools/grammar_based_model_tatsu.py-e +0 -77
  81. langroid/agent/tools/lark_earley_example.py +0 -135
  82. langroid/agent/tools/lark_earley_example.py-e +0 -117
  83. langroid/agent/tools/lark_example.py +0 -72
  84. langroid/agent/tools/note_tool.py +0 -0
  85. langroid/agent/tools/parse_example.py +0 -76
  86. langroid/agent/tools/parse_example2.py +0 -87
  87. langroid/agent/tools/parse_example3.py +0 -42
  88. langroid/agent/tools/parse_test.py +0 -791
  89. langroid/agent/tools/run_python_code.py +0 -60
  90. {langroid-0.16.7.dist-info → langroid-0.17.0.dist-info}/LICENSE +0 -0
  91. {langroid-0.16.7.dist-info → langroid-0.17.0.dist-info}/WHEEL +0 -0
@@ -1,150 +0,0 @@
1
- """"
2
- Non-JSON Tool for LLM to specify contents of a code file.
3
-
4
- Why Non-JSON? Because there are numerous issues with even the best LLMs trying
5
- to return code within JSON strings (e.g. unescaped newlines, quotes, etc.),
6
- and the problem is even worse with weak LLMs. Json repair methods exist, but
7
- can't deal with all possible cases.
8
-
9
- E.g. see this study from Aider: https://aider.chat/2024/08/14/code-in-json.html
10
-
11
- Note: We express the formatting rules with a template since it has several benefits:
12
- - all of the formatting rules are in one place,
13
- - we get a parser for free, and don't have to write parsing code,
14
- - we get a formatting example generator for free, and don't have to write
15
- example generation code.
16
- - consistency between the parser and the example generator is guaranteed.
17
- """
18
-
19
- from typing import Any, Callable, Dict, List, Tuple, Type
20
-
21
- from parse import Parser, compile
22
-
23
- from langroid.agent.tool_message import ToolMessage
24
- from langroid.utils.constants import TOOL, TOOL_END
25
-
26
- CODE_FENCE_START = "`" * 3
27
- CODE_FENCE_END = "`" * 3
28
-
29
-
30
- class CodeFileTool(ToolMessage):
31
- """
32
- Used by LLM to specify contents of a code file.
33
- """
34
-
35
- request: str = "code_file_tool"
36
- purpose: str = """
37
- To specify the contents of a code file.
38
- """
39
- file_path: str
40
- contents: str
41
- language: str
42
-
43
- @classmethod
44
- def get_template(cls) -> str:
45
- request = cls.default_value("request")
46
-
47
- return f"""
48
- {{ws}}{TOOL}:{{ws}}{request}
49
- {{ws}}{{file_path}}
50
- {CODE_FENCE_START}{{ws}}{{language}}
51
- {{contents}}
52
- {CODE_FENCE_END}
53
- {{ws}}{TOOL_END}{{ws}}
54
- """
55
-
56
- @classmethod
57
- def parse(cls, string) -> Dict[str, Any]:
58
- def parse_ws(string):
59
- return (
60
- string # This allows matching any amount of whitespace, including none
61
- )
62
-
63
- template = cls.get_template()
64
- parser = Parser(template, dict(ws=parse_ws))
65
-
66
- print(f"Parsing string:\n{string}") # Debug print
67
- result = parser.parse(string)
68
- print(f"Parse result: {result}") # Debug print
69
-
70
- if result:
71
- return {
72
- "request": cls.default_value("request"),
73
- "file_path": result["file_path"].strip(),
74
- "language": result["language"].strip(),
75
- "contents": result["contents"].strip(),
76
- }
77
- return {}
78
-
79
- @classmethod
80
- def format(cls, instance):
81
- return cls.get_template().format(
82
- request=instance.request,
83
- file_path=instance.file_path,
84
- language=instance.language,
85
- contents=instance.contents,
86
- s="", # no extra spaces
87
- )
88
-
89
- @classmethod
90
- def create(cls, get_directory: Callable[[], str]) -> Type["CodeFileTool"]:
91
- """
92
- Create a subclass of CodeFileTool with a static method get_directory,
93
- which returns the current directory path, so that all file paths are
94
- interpreted as relative to the current directory.
95
- """
96
-
97
- class SubCodeFileTool(cls):
98
- get_directory: Callable[[], str] = staticmethod(get_directory)
99
-
100
- return SubCodeFileTool
101
-
102
- @classmethod
103
- def examples(cls) -> List[ToolMessage | Tuple[str, ToolMessage]]:
104
- return [
105
- cls(
106
- file_path="src/lib.rs",
107
- language="rust",
108
- contents="""
109
- // function to add two numbers
110
- pub fn add(a: i32, b: i32) -> i32 {
111
- a + b
112
- }
113
- """,
114
- )
115
- ]
116
-
117
- @classmethod
118
- def find_candidates(cls, input_str: str) -> List[str]:
119
- """
120
- Find all possible (top-level) candidates for
121
- CodeFileTool in the input string.
122
- """
123
- # Use parse.findall to find all instances of the CodeFileTool pattern
124
- parser = compile(cls.get_template())
125
- matches = list(parser.findall(input_str))
126
- candidates = [match.fixed for match in matches]
127
- return candidates
128
-
129
- @classmethod
130
- def from_string(cls, input_string: str) -> "CodeFileTool":
131
- """Parse a string into a CodeFileTool object, using the TEMPLATE."""
132
- parsed_data = cls.parse(input_string)
133
- if parsed_data:
134
- return cls(**parsed_data)
135
- raise ValueError("Invalid input string format")
136
-
137
- @classmethod
138
- def to_string(cls, instance) -> str:
139
- """Convert a CodeFileTool object to a string, using the TEMPLATE."""
140
- return cls.format(instance)
141
-
142
- def __str__(self):
143
- return self.to_string()
144
-
145
- def __repr__(self):
146
- return f"""CodeFileTool(
147
- file_path='{self.file_path}',
148
- language='{self.language}',
149
- contents='{self.contents}')
150
- """
@@ -1,194 +0,0 @@
1
- """"
2
- Non-JSON Tool for LLM to specify contents of a code file.
3
-
4
- Why Non-JSON? Because there are numerous issues with even the best LLMs trying
5
- to return code within JSON strings (e.g. unescaped newlines, quotes, etc.),
6
- and the problem is even worse with weak LLMs. Json repair methods exist, but
7
- can't deal with all possible cases.
8
-
9
- E.g. see this study from Aider: https://aider.chat/2024/08/14/code-in-json.html
10
-
11
- Note: We express the formatting rules with a template since it has several benefits:
12
- - all of the formatting rules are in one place,
13
- - we get a parser for free, and don't have to write parsing code,
14
- - we get a formatting example generator for free, and don't have to write
15
- example generation code.
16
- - consistency between the parser and the example generator is guaranteed.
17
- """
18
-
19
- from typing import Any, Callable, Dict, List, Tuple, Type
20
-
21
- from pyparsing import (
22
- LineEnd,
23
- Literal,
24
- Optional,
25
- ParserElement,
26
- SkipTo,
27
- White,
28
- Word,
29
- alphanums,
30
- lineEnd,
31
- printables,
32
- )
33
-
34
- from langroid.agent.tool_message import ToolMessage
35
- from langroid.utils.constants import TOOL, TOOL_END
36
-
37
- CODE_FENCE_START = "`" * 3
38
- CODE_FENCE_END = "`" * 3
39
-
40
-
41
- class CodeFileTool(ToolMessage):
42
- """
43
- Used by LLM to specify contents of a code file.
44
- """
45
-
46
- request: str = "code_file_tool"
47
- purpose: str = """
48
- To specify the contents of a code file.
49
- """
50
- file_path: str
51
- contents: str
52
- language: str
53
-
54
- @classmethod
55
- def create_parser(cls):
56
- TOOL_START = Literal(TOOL + ":")
57
- CODE_FENCE = Literal("```")
58
-
59
- file_path = SkipTo(lineEnd)("file_path")
60
- language = Word(alphanums)("language")
61
- contents = SkipTo(CODE_FENCE)("contents")
62
-
63
- parser = (
64
- TOOL_START
65
- + Optional(Word(printables), default=cls.default_value("request"))(
66
- "request"
67
- )
68
- + lineEnd
69
- + file_path
70
- + lineEnd
71
- + CODE_FENCE
72
- + Optional(White()) # Allow space after ```
73
- + language
74
- + lineEnd
75
- + contents
76
- + CODE_FENCE
77
- + lineEnd # Add this line to expect a newline after the closing fence
78
- + Literal(TOOL_END)
79
- )
80
- return parser
81
-
82
- @classmethod
83
- def parse(cls, string) -> Dict[str, Any]:
84
- parser = cls.create_parser()
85
- try:
86
- result = parser.parseString(string, parseAll=True)
87
- return {
88
- "request": result["request"],
89
- "file_path": result["file_path"].strip(),
90
- "language": result["language"],
91
- "contents": result["contents"].strip(),
92
- }
93
- except Exception as e:
94
- print(f"Parsing failed: {e}")
95
- return {}
96
-
97
- @classmethod
98
- def format(cls, instance) -> str:
99
- parser = cls.create_parser()
100
-
101
- def format_element(element):
102
- if isinstance(element, Literal):
103
- return element.match
104
- elif element.resultsName:
105
- if element.resultsName == "request":
106
- return instance.request
107
- elif element.resultsName == "file_path":
108
- return instance.file_path
109
- elif element.resultsName == "language":
110
- return instance.language
111
- elif element.resultsName == "contents":
112
- return f"{instance.contents}\n" # Add newline after contents
113
- elif isinstance(element, LineEnd):
114
- return "\n"
115
- return ""
116
-
117
- def traverse_parser(parser_element):
118
- if isinstance(parser_element, ParserElement):
119
- if isinstance(parser_element, SkipTo):
120
- return format_element(parser_element)
121
- elif hasattr(parser_element, "exprs"):
122
- return "".join(
123
- traverse_parser(expr) for expr in parser_element.exprs
124
- )
125
- else:
126
- return format_element(parser_element)
127
- return str(parser_element)
128
-
129
- formatted_string = traverse_parser(parser)
130
-
131
- return formatted_string.strip()
132
-
133
- @classmethod
134
- def create(cls, get_directory: Callable[[], str]) -> Type["CodeFileTool"]:
135
- """
136
- Create a subclass of CodeFileTool with a static method get_directory,
137
- which returns the current directory path, so that all file paths are
138
- interpreted as relative to the current directory.
139
- """
140
-
141
- class SubCodeFileTool(cls):
142
- get_directory: Callable[[], str] = staticmethod(get_directory)
143
-
144
- return SubCodeFileTool
145
-
146
- @classmethod
147
- def examples(cls) -> List[ToolMessage | Tuple[str, ToolMessage]]:
148
- return [
149
- cls(
150
- file_path="src/lib.rs",
151
- language="rust",
152
- contents="""
153
- // function to add two numbers
154
- pub fn add(a: i32, b: i32) -> i32 {
155
- a + b
156
- }
157
- """,
158
- )
159
- ]
160
-
161
- @classmethod
162
- def find_candidates(cls, input_str: str) -> List[str]:
163
- """
164
- Find all possible (top-level) candidates for
165
- CodeFileTool in the input string.
166
- """
167
- # Use parse.findall to find all instances of the CodeFileTool pattern
168
- parser = compile(cls.get_template())
169
- matches = list(parser.findall(input_str))
170
- candidates = [match.fixed for match in matches]
171
- return candidates
172
-
173
- @classmethod
174
- def from_string(cls, input_string: str) -> "CodeFileTool":
175
- """Parse a string into a CodeFileTool object, using the TEMPLATE."""
176
- parsed_data = cls.parse(input_string)
177
- if parsed_data:
178
- return cls(**parsed_data)
179
- raise ValueError("Invalid input string format")
180
-
181
- @classmethod
182
- def to_string(cls, instance) -> str:
183
- """Convert a CodeFileTool object to a string, using the TEMPLATE."""
184
- return cls.format(instance)
185
-
186
- def __str__(self):
187
- return self.to_string()
188
-
189
- def __repr__(self):
190
- return f"""CodeFileTool(
191
- file_path='{self.file_path}',
192
- language='{self.language}',
193
- contents='{self.contents}')
194
- """
@@ -1,199 +0,0 @@
1
- """"
2
- Non-JSON Tool for LLM to specify contents of a code file.
3
-
4
- Why Non-JSON? Because there are numerous issues with even the best LLMs trying
5
- to return code within JSON strings (e.g. unescaped newlines, quotes, etc.),
6
- and the problem is even worse with weak LLMs. Json repair methods exist, but
7
- can't deal with all possible cases.
8
-
9
- E.g. see this study from Aider: https://aider.chat/2024/08/14/code-in-json.html
10
-
11
- Note: We express the formatting rules with a template since it has several benefits:
12
- - all of the formatting rules are in one place,
13
- - we get a parser for free, and don't have to write parsing code,
14
- - we get a formatting example generator for free, and don't have to write
15
- example generation code.
16
- - consistency between the parser and the example generator is guaranteed.
17
- """
18
-
19
- from typing import Callable, List, Tuple, Type
20
-
21
- from pyparsing import (
22
- Literal,
23
- Optional,
24
- SkipTo,
25
- White,
26
- Word,
27
- alphanums,
28
- lineEnd,
29
- printables,
30
- )
31
-
32
- from langroid.agent.tool_message import ToolMessage
33
- from langroid.agent.tools.generic_tool import GenericTool
34
- from langroid.utils.constants import TOOL, TOOL_END
35
-
36
- CODE_FENCE_START = "`" * 3
37
- CODE_FENCE_END = "`" * 3
38
-
39
-
40
- class CodeFileTool(GenericTool):
41
- """
42
- Used by LLM to specify contents of a code file.
43
- """
44
-
45
- request: str = "code_file_tool"
46
- purpose: str = """
47
- To specify the <contents> of a code file at <file_path>,
48
- containing code in a specific <language>.
49
- """
50
- file_path: str
51
- contents: str
52
- language: str
53
-
54
- @classmethod
55
- def define_grammar(cls):
56
- TOOL_START = Literal(TOOL + ":")
57
- CODE_FENCE = Literal("```")
58
-
59
- file_path = SkipTo(lineEnd)("file_path")
60
- language = Word(alphanums)("language")
61
- contents = SkipTo(lineEnd + CODE_FENCE)("contents")
62
-
63
- grammar = (
64
- TOOL_START
65
- + Optional(White())
66
- + Optional(Word(printables), default=cls.default_value("request"))(
67
- "request"
68
- )
69
- + lineEnd
70
- + Optional(White())
71
- + file_path
72
- + lineEnd
73
- + CODE_FENCE
74
- + Optional(White())
75
- + language
76
- + lineEnd
77
- + contents
78
- + lineEnd
79
- + CODE_FENCE
80
- + lineEnd
81
- + Optional(White())
82
- + Literal(TOOL_END)
83
- + Optional(White())
84
- )
85
- return grammar
86
-
87
- @classmethod
88
- def create(cls, get_directory: Callable[[], str]) -> Type["CodeFileTool"]:
89
- """
90
- Create a subclass of CodeFileTool with a static method get_directory,
91
- which returns the current directory path, so that all file paths are
92
- interpreted as relative to the current directory.
93
- """
94
-
95
- class SubCodeFileTool(cls):
96
- get_directory: Callable[[], str] = staticmethod(get_directory)
97
-
98
- return SubCodeFileTool
99
-
100
- @classmethod
101
- def examples(cls) -> List[ToolMessage | Tuple[str, ToolMessage]]:
102
- return [
103
- cls(
104
- file_path="src/lib.rs",
105
- language="rust",
106
- contents="""
107
- // function to add two numbers
108
- pub fn add(a: i32, b: i32) -> i32 {
109
- a + b
110
- }
111
- """,
112
- )
113
- ]
114
-
115
- def __repr__(self):
116
- return f"""CodeFileTool(
117
- file_path='{self.file_path}',
118
- language='{self.language}',
119
- contents='{self.contents}')
120
- """
121
-
122
-
123
- if __name__ == "__main__":
124
- # Informal test to print instructions for CodeFileTool
125
- print("Testing CodeFileTool instructions:")
126
- print("-" * 50)
127
-
128
- instructions = CodeFileTool.instructions()
129
- print(instructions)
130
-
131
- print("-" * 50)
132
- print("End of instructions test")
133
-
134
- # You can add more informal tests here if needed
135
- # For example, testing the parse method:
136
- print("\nTesting CodeFileTool parse method:")
137
- print("-" * 50)
138
-
139
- test_input = """TOOL: code_file_tool
140
- src/main.py
141
- ```python
142
- def hello_world():
143
- print("Hello, World!")
144
-
145
- if __name__ == "__main__":
146
- hello_world()
147
- ```
148
- TOOL_END"""
149
-
150
- parsed_result = CodeFileTool.parse(test_input)
151
- print("Parsed result:")
152
- for key, value in parsed_result.items():
153
- print(f"{key}: {value}")
154
-
155
- print("-" * 50)
156
- print("End of parse test")
157
-
158
- # Test format method
159
- print("\nTesting CodeFileTool format method:")
160
- print("-" * 50)
161
- test_instance = CodeFileTool(
162
- request="code_file_tool",
163
- file_path="tests/test_file.py",
164
- language="python",
165
- contents="""
166
- def test_function():
167
- assert 1 + 1 == 2
168
-
169
- if __name__ == "__main__":
170
- test_function()
171
- """,
172
- )
173
- formatted_output = CodeFileTool.format(test_instance)
174
- print("Formatted output:")
175
- print(formatted_output)
176
- print("-" * 50)
177
- print("End of format test")
178
-
179
- # Additional test: Round-trip (parse -> format -> parse)
180
- print("\nTesting CodeFileTool round-trip (parse -> format -> parse):")
181
- print("-" * 50)
182
- initial_parse = CodeFileTool.parse(test_input)
183
- initial_instance = CodeFileTool(**initial_parse)
184
- formatted_output = CodeFileTool.format(initial_instance)
185
- final_parse = CodeFileTool.parse(formatted_output)
186
-
187
- print("Initial parse:")
188
- print(initial_parse)
189
- print("\nFormatted output:")
190
- print(formatted_output)
191
- print("\nFinal parse:")
192
- print(final_parse)
193
-
194
- if initial_parse == final_parse:
195
- print("\nRound-trip test passed: Initial and final parses match.")
196
- else:
197
- print("\nRound-trip test failed: Initial and final parses do not match.")
198
- print("-" * 50)
199
- print("End of round-trip test")
@@ -1,96 +0,0 @@
1
- """
2
- A tool to extract portions from previous (numbered) messages in
3
- the chat history.
4
- The idea is that when an LLM wants to (or is asked to) simply extract
5
- portions of a message verbatim, it should use this tool/function to
6
- SPECIFY what should be extracted, rather than actually extracting it.
7
- This will usually be much cheaper and faster than actually writing out the extracted
8
- text. The handler of this tool/function will then extract the text and send it back.
9
- """
10
-
11
- from typing import Protocol, no_type_check
12
-
13
- from jinja2 import DictLoader, Environment
14
-
15
- from langroid.agent.tool_message import ToolMessage
16
- from langroid.language_models.base import LLMMessage
17
-
18
-
19
- def extract_between(value: str, start_word: str, end_word: str) -> str:
20
- """
21
- Extract the substring between two words in a string.
22
- NOTE: If there are multiple occurrences of the start_word, the first one is used,
23
- and if there are multiple occurrences of the end_word, the first one after the
24
- start_word is used.
25
-
26
- We do not handle the case of multiple occurrences of the start_word, followed by
27
- multiple occurrences of the end_word
28
-
29
- Args:
30
- value (str): the string to extract from
31
- start_word (str): the word that starts the substring
32
- end_word (str): the word that ends the substring
33
-
34
- Returns:
35
- str: the substring between the two words
36
- """
37
- try:
38
- start_index = value.index(start_word) + len(start_word)
39
- end_index = value.index(end_word, start_index)
40
- return value[start_index:end_index].strip()
41
- except ValueError:
42
- return ""
43
-
44
-
45
- class HasMessageHistory(Protocol):
46
- """
47
- Defines the fields expected in a class that enables this tool.
48
- """
49
-
50
- message_history: list[LLMMessage]
51
-
52
-
53
- class ExtractTool(ToolMessage):
54
- request: str = "extract"
55
- purpose: str = """
56
- To generate a message in the form of a <jinja_template>,
57
- using the Jinja templating language, where the
58
- the i'th message is referred to as msg[i], and integer indices are used
59
- to specify which part of the message to extract, e.g. msg[2][13:45].
60
- """
61
- jinja_template: str
62
-
63
- @classmethod
64
- def instructions(cls) -> str:
65
- return """
66
- In a conversation with the user, your responses may sometimes use verbatim
67
- extracts of previous messages in the conversation. You are an expert at
68
- Jinja templating syntax, and you will rely on this syntax whenever you find
69
- yourself wanting to repeat verbatim text from earlier parts of the
70
- conversation.
71
- In your Jinja templates you can use references like {{msg[3][3:100]}} to
72
- indicate that the user should substitute the content of message number 3
73
- (assume first msg is number 1), starting at position 3 and ending at 100.
74
-
75
- For example you may respond with something like:
76
-
77
- The story started like this: {{msg[5][45:89]}}. Then John came home and
78
- {{msg[2][4:19}}.
79
-
80
- VERY IMPORTANT --
81
- (a) your FIRST priority is to generate messages that would sound natural
82
- when the jinja templates are rendered.
83
-
84
- (b) your NEXT priority is to ALWAYS RELY on the above JINJA scheme when
85
- your intended message would contain verbatim text from previous messages.
86
-
87
- (c) Do not simply use large verbatim parts of previous messages, when doing
88
- so may not result in natural responses.
89
- """
90
-
91
- @no_type_check
92
- def handle(self: HasMessageHistory) -> str:
93
- msg = self.message_history
94
- env = Environment(loader=DictLoader({"base": self.jinja_template}))
95
- template = env.get_template("base")
96
- return template.render(msg=msg)