chatterer 0.1.24__py3-none-any.whl → 0.1.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatterer/__init__.py +97 -93
- chatterer/common_types/__init__.py +21 -21
- chatterer/common_types/io.py +19 -19
- chatterer/examples/__main__.py +75 -75
- chatterer/examples/any2md.py +85 -85
- chatterer/examples/pdf2md.py +338 -338
- chatterer/examples/pdf2txt.py +54 -54
- chatterer/examples/ppt.py +486 -486
- chatterer/examples/pw.py +143 -137
- chatterer/examples/snippet.py +56 -55
- chatterer/examples/transcribe.py +192 -112
- chatterer/examples/upstage.py +89 -89
- chatterer/examples/web2md.py +80 -66
- chatterer/interactive.py +354 -354
- chatterer/language_model.py +536 -536
- chatterer/messages.py +21 -21
- chatterer/strategies/__init__.py +13 -13
- chatterer/strategies/atom_of_thoughts.py +975 -975
- chatterer/strategies/base.py +14 -14
- chatterer/tools/__init__.py +46 -46
- chatterer/tools/caption_markdown_images.py +384 -384
- chatterer/tools/citation_chunking/__init__.py +3 -3
- chatterer/tools/citation_chunking/chunks.py +53 -53
- chatterer/tools/citation_chunking/citation_chunker.py +118 -118
- chatterer/tools/citation_chunking/citations.py +285 -285
- chatterer/tools/citation_chunking/prompt.py +157 -157
- chatterer/tools/citation_chunking/reference.py +26 -26
- chatterer/tools/citation_chunking/utils.py +138 -138
- chatterer/tools/convert_pdf_to_markdown.py +645 -625
- chatterer/tools/convert_to_text.py +446 -446
- chatterer/tools/upstage_document_parser.py +705 -705
- chatterer/tools/webpage_to_markdown.py +739 -739
- chatterer/tools/youtube.py +146 -146
- chatterer/utils/__init__.py +15 -15
- chatterer/utils/base64_image.py +293 -285
- chatterer/utils/bytesio.py +59 -59
- chatterer/utils/code_agent.py +237 -237
- chatterer/utils/imghdr.py +148 -148
- {chatterer-0.1.24.dist-info → chatterer-0.1.25.dist-info}/METADATA +390 -389
- chatterer-0.1.25.dist-info/RECORD +45 -0
- chatterer-0.1.24.dist-info/RECORD +0 -45
- {chatterer-0.1.24.dist-info → chatterer-0.1.25.dist-info}/WHEEL +0 -0
- {chatterer-0.1.24.dist-info → chatterer-0.1.25.dist-info}/entry_points.txt +0 -0
- {chatterer-0.1.24.dist-info → chatterer-0.1.25.dist-info}/top_level.txt +0 -0
chatterer/utils/bytesio.py
CHANGED
@@ -1,59 +1,59 @@
|
|
1
|
-
import os
|
2
|
-
from contextlib import contextmanager, suppress
|
3
|
-
from io import BytesIO
|
4
|
-
from typing import Iterator, Optional
|
5
|
-
|
6
|
-
from ..common_types.io import BytesReadable, PathOrReadable, StringReadable
|
7
|
-
|
8
|
-
|
9
|
-
@contextmanager
|
10
|
-
def read_bytes_stream(
|
11
|
-
path_or_file: PathOrReadable,
|
12
|
-
assume_pathlike_bytes_as_path: bool = False,
|
13
|
-
assume_pathlike_string_as_path: bool = True,
|
14
|
-
) -> Iterator[Optional[BytesReadable]]:
|
15
|
-
"""
|
16
|
-
Context manager for opening a file or using an existing stream.
|
17
|
-
|
18
|
-
Handles different types of input (file paths, byte streams, string streams)
|
19
|
-
and yields a BytesReadable object that can be used to read binary data.
|
20
|
-
|
21
|
-
Args:
|
22
|
-
path_or_file: File path or readable object.
|
23
|
-
assume_pathlike_bytes_as_path: If True, assume bytes-like objects are file paths. Else, treat as data itself.
|
24
|
-
assume_pathlike_string_as_path: If True, assume string-like objects are file paths. Else, treat as data itself.
|
25
|
-
|
26
|
-
Yields:
|
27
|
-
Optional[BytesReadable]: A readable binary stream or None if opening fails.
|
28
|
-
"""
|
29
|
-
stream: Optional[BytesReadable] = None
|
30
|
-
should_close: bool = True # Whether the stream should be closed after use
|
31
|
-
try:
|
32
|
-
with suppress(BaseException):
|
33
|
-
if isinstance(path_or_file, BytesReadable):
|
34
|
-
# Assume the input is already a bytes stream
|
35
|
-
# NOTE: Delivers itself, so shouldn't be closed.
|
36
|
-
stream = path_or_file
|
37
|
-
should_close = False
|
38
|
-
elif isinstance(path_or_file, StringReadable):
|
39
|
-
# Convert the string stream to bytes stream
|
40
|
-
stream = BytesIO(path_or_file.read().encode("utf-8"))
|
41
|
-
elif isinstance(path_or_file, bytes):
|
42
|
-
# Convert the bytes-like object to bytes stream
|
43
|
-
if assume_pathlike_bytes_as_path and os.path.exists(path_or_file):
|
44
|
-
stream = open(path_or_file, "rb")
|
45
|
-
else:
|
46
|
-
stream = BytesIO(path_or_file)
|
47
|
-
elif isinstance(path_or_file, str):
|
48
|
-
# Convert the file path to bytes stream
|
49
|
-
if assume_pathlike_string_as_path and os.path.exists(path_or_file):
|
50
|
-
stream = open(path_or_file, "rb")
|
51
|
-
else:
|
52
|
-
stream = BytesIO(path_or_file.encode("utf-8"))
|
53
|
-
else:
|
54
|
-
# Assume the input is a file descriptor or path
|
55
|
-
stream = open(path_or_file, "rb")
|
56
|
-
yield stream
|
57
|
-
finally:
|
58
|
-
if stream is not None and should_close:
|
59
|
-
stream.close()
|
1
|
+
import os
|
2
|
+
from contextlib import contextmanager, suppress
|
3
|
+
from io import BytesIO
|
4
|
+
from typing import Iterator, Optional
|
5
|
+
|
6
|
+
from ..common_types.io import BytesReadable, PathOrReadable, StringReadable
|
7
|
+
|
8
|
+
|
9
|
+
@contextmanager
|
10
|
+
def read_bytes_stream(
|
11
|
+
path_or_file: PathOrReadable,
|
12
|
+
assume_pathlike_bytes_as_path: bool = False,
|
13
|
+
assume_pathlike_string_as_path: bool = True,
|
14
|
+
) -> Iterator[Optional[BytesReadable]]:
|
15
|
+
"""
|
16
|
+
Context manager for opening a file or using an existing stream.
|
17
|
+
|
18
|
+
Handles different types of input (file paths, byte streams, string streams)
|
19
|
+
and yields a BytesReadable object that can be used to read binary data.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
path_or_file: File path or readable object.
|
23
|
+
assume_pathlike_bytes_as_path: If True, assume bytes-like objects are file paths. Else, treat as data itself.
|
24
|
+
assume_pathlike_string_as_path: If True, assume string-like objects are file paths. Else, treat as data itself.
|
25
|
+
|
26
|
+
Yields:
|
27
|
+
Optional[BytesReadable]: A readable binary stream or None if opening fails.
|
28
|
+
"""
|
29
|
+
stream: Optional[BytesReadable] = None
|
30
|
+
should_close: bool = True # Whether the stream should be closed after use
|
31
|
+
try:
|
32
|
+
with suppress(BaseException):
|
33
|
+
if isinstance(path_or_file, BytesReadable):
|
34
|
+
# Assume the input is already a bytes stream
|
35
|
+
# NOTE: Delivers itself, so shouldn't be closed.
|
36
|
+
stream = path_or_file
|
37
|
+
should_close = False
|
38
|
+
elif isinstance(path_or_file, StringReadable):
|
39
|
+
# Convert the string stream to bytes stream
|
40
|
+
stream = BytesIO(path_or_file.read().encode("utf-8"))
|
41
|
+
elif isinstance(path_or_file, bytes):
|
42
|
+
# Convert the bytes-like object to bytes stream
|
43
|
+
if assume_pathlike_bytes_as_path and os.path.exists(path_or_file):
|
44
|
+
stream = open(path_or_file, "rb")
|
45
|
+
else:
|
46
|
+
stream = BytesIO(path_or_file)
|
47
|
+
elif isinstance(path_or_file, str):
|
48
|
+
# Convert the file path to bytes stream
|
49
|
+
if assume_pathlike_string_as_path and os.path.exists(path_or_file):
|
50
|
+
stream = open(path_or_file, "rb")
|
51
|
+
else:
|
52
|
+
stream = BytesIO(path_or_file.encode("utf-8"))
|
53
|
+
else:
|
54
|
+
# Assume the input is a file descriptor or path
|
55
|
+
stream = open(path_or_file, "rb")
|
56
|
+
yield stream
|
57
|
+
finally:
|
58
|
+
if stream is not None and should_close:
|
59
|
+
stream.close()
|
chatterer/utils/code_agent.py
CHANGED
@@ -1,237 +1,237 @@
|
|
1
|
-
import inspect
|
2
|
-
import textwrap
|
3
|
-
from typing import (
|
4
|
-
TYPE_CHECKING,
|
5
|
-
Callable,
|
6
|
-
Iterable,
|
7
|
-
NamedTuple,
|
8
|
-
Optional,
|
9
|
-
Self,
|
10
|
-
Sequence,
|
11
|
-
)
|
12
|
-
|
13
|
-
from langchain_core.runnables.config import RunnableConfig
|
14
|
-
|
15
|
-
from ..messages import LanguageModelInput, SystemMessage
|
16
|
-
|
17
|
-
if TYPE_CHECKING:
|
18
|
-
from langchain_experimental.tools import PythonAstREPLTool
|
19
|
-
|
20
|
-
# --- Constants ---
|
21
|
-
DEFAULT_CODE_GENERATION_PROMPT = (
|
22
|
-
"You are equipped with a Python code execution tool.\n"
|
23
|
-
"Your primary goal is to generate Python code that effectively solves the *specific, immediate sub-task* required to progress towards the overall user request. The generated code and its resulting output will be automatically added to our conversation history.\n"
|
24
|
-
"\n"
|
25
|
-
"Guidelines for Optimal Tool Use:\n"
|
26
|
-
"- Conciseness and Efficiency: Write code that directly addresses the current need. Avoid unnecessary complexity, computations, or data loading. Tool execution has resource limits.\n"
|
27
|
-
"- Targeted Action: Focus only on the code required for the *next logical step*. Do not attempt to solve the entire problem in one code block if it involves multiple steps.\n"
|
28
|
-
"- Error Handling: Implement basic error handling (e.g., `try-except`) for operations that might fail (like file access or network requests, if applicable).\n"
|
29
|
-
"- Context Awareness: Assume the code runs in a stateful environment where variables and imports might persist from previous executions (unless explicitly cleared).\n"
|
30
|
-
"- Self-Contained Execution: Ensure the code block is runnable as provided. Define necessary variables within the block if they aren't guaranteed to exist from prior context.\n"
|
31
|
-
"\n"
|
32
|
-
"Output Format:\n"
|
33
|
-
"Return *only* a JSON object containing the Python code:\n"
|
34
|
-
'{\n "code": "<your_python_code_here>"\n}\n\n'
|
35
|
-
)
|
36
|
-
|
37
|
-
DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT = (
|
38
|
-
"The following Python functions are available in the global scope for you to use directly in your code.\n"
|
39
|
-
"You do not need to define these functions; simply call them as needed.\n"
|
40
|
-
"Use these functions only when they directly help in solving the current task. You are not obligated to use them.\n"
|
41
|
-
)
|
42
|
-
|
43
|
-
DEFAULT_FUNCTION_REFERENCE_SEPARATOR = "\n---\n" # Separator to distinguish different function references
|
44
|
-
|
45
|
-
|
46
|
-
# --- Helper Classes and Functions ---
|
47
|
-
|
48
|
-
|
49
|
-
class FunctionSignature(NamedTuple):
|
50
|
-
name: str
|
51
|
-
callable: Callable[..., object]
|
52
|
-
signature: str
|
53
|
-
|
54
|
-
@classmethod
|
55
|
-
def from_callable(cls, callables: Optional[Callable[..., object] | Iterable[Callable[..., object]]]) -> list[Self]:
|
56
|
-
if callables is None:
|
57
|
-
return []
|
58
|
-
# Correctly handle single callable case
|
59
|
-
if isinstance(callables, Callable) and not isinstance(callables, type): # Exclude classes if not intended
|
60
|
-
return [cls._from_callable(callables)]
|
61
|
-
# Handle iterables
|
62
|
-
if isinstance(callables, Iterable):
|
63
|
-
return [cls._from_callable(c) for c in callables]
|
64
|
-
# If it's neither a callable nor an iterable of callables, return empty
|
65
|
-
return []
|
66
|
-
|
67
|
-
@classmethod
|
68
|
-
def _from_callable(cls, callable_obj: Callable[..., object]) -> Self:
|
69
|
-
"""
|
70
|
-
Get the name and signature of a function as a string.
|
71
|
-
"""
|
72
|
-
is_async_func = inspect.iscoroutinefunction(callable_obj)
|
73
|
-
function_def = "async def" if is_async_func else "def"
|
74
|
-
|
75
|
-
if inspect.isfunction(callable_obj):
|
76
|
-
function_name = callable_obj.__code__.co_name
|
77
|
-
elif hasattr(callable_obj, "name") and isinstance(getattr(callable_obj, "name"), str):
|
78
|
-
function_name = getattr(callable_obj, "name")
|
79
|
-
elif hasattr(callable_obj, "__name__"):
|
80
|
-
function_name = callable_obj.__name__
|
81
|
-
else:
|
82
|
-
function_name = type(callable_obj).__name__
|
83
|
-
|
84
|
-
try:
|
85
|
-
signature_str = str(inspect.signature(callable_obj))
|
86
|
-
except ValueError: # Handles built-ins or others without inspectable signatures
|
87
|
-
signature_str = "(...)" # Placeholder signature
|
88
|
-
|
89
|
-
signature = f"{function_def} {function_name}{signature_str}:"
|
90
|
-
docstring = inspect.getdoc(callable_obj)
|
91
|
-
|
92
|
-
if docstring:
|
93
|
-
docstring = f'"""{docstring.strip()}"""'
|
94
|
-
full_signature = f"{signature}\n{textwrap.indent(docstring, ' ')}"
|
95
|
-
else:
|
96
|
-
full_signature = signature
|
97
|
-
|
98
|
-
return cls(name=function_name, callable=callable_obj, signature=full_signature)
|
99
|
-
|
100
|
-
@classmethod
|
101
|
-
def as_prompt(
|
102
|
-
cls,
|
103
|
-
function_signatures: Iterable[Self],
|
104
|
-
prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT, # Use constant
|
105
|
-
sep: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR, # Use constant
|
106
|
-
) -> str:
|
107
|
-
"""
|
108
|
-
Generate a prompt string from a list of function signatures.
|
109
|
-
"""
|
110
|
-
if not function_signatures:
|
111
|
-
return ""
|
112
|
-
body: str = sep.join(fsig.signature for fsig in function_signatures)
|
113
|
-
if prefix:
|
114
|
-
return f"{prefix}\n{body}" # Add newline for clarity
|
115
|
-
return body
|
116
|
-
|
117
|
-
|
118
|
-
class CodeExecutionResult(NamedTuple):
|
119
|
-
code: str
|
120
|
-
output: str
|
121
|
-
|
122
|
-
@classmethod
|
123
|
-
def from_code(
|
124
|
-
cls,
|
125
|
-
code: str,
|
126
|
-
repl_tool: Optional["PythonAstREPLTool"] = None,
|
127
|
-
config: Optional[RunnableConfig] = None,
|
128
|
-
function_signatures: Optional[Iterable[FunctionSignature]] = None,
|
129
|
-
**kwargs: object,
|
130
|
-
) -> Self:
|
131
|
-
"""
|
132
|
-
Execute code using the Python REPL tool.
|
133
|
-
"""
|
134
|
-
if repl_tool is None:
|
135
|
-
repl_tool = get_default_repl_tool()
|
136
|
-
if function_signatures:
|
137
|
-
insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
|
138
|
-
# Ensure kwargs are passed correctly if needed by invoke
|
139
|
-
output = str(repl_tool.invoke(code, config=config)) # pyright: ignore[reportUnknownMemberType]
|
140
|
-
return cls(code=code, output=output)
|
141
|
-
|
142
|
-
@classmethod
|
143
|
-
async def afrom_code(
|
144
|
-
cls,
|
145
|
-
code: str,
|
146
|
-
repl_tool: Optional["PythonAstREPLTool"] = None,
|
147
|
-
config: Optional[RunnableConfig] = None,
|
148
|
-
function_signatures: Optional[Iterable[FunctionSignature]] = None,
|
149
|
-
**kwargs: object,
|
150
|
-
) -> Self:
|
151
|
-
"""
|
152
|
-
Execute code using the Python REPL tool asynchronously.
|
153
|
-
"""
|
154
|
-
if repl_tool is None:
|
155
|
-
repl_tool = get_default_repl_tool()
|
156
|
-
if function_signatures:
|
157
|
-
insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
|
158
|
-
# Ensure kwargs are passed correctly if needed by ainvoke
|
159
|
-
output = str(await repl_tool.ainvoke(code, config=config)) # pyright: ignore[reportUnknownMemberType]if not needed by base ainvoke
|
160
|
-
return cls(code=code, output=output)
|
161
|
-
|
162
|
-
|
163
|
-
def get_default_repl_tool() -> "PythonAstREPLTool":
|
164
|
-
"""Initializes and returns a default PythonAstREPLTool instance."""
|
165
|
-
try:
|
166
|
-
from langchain_experimental.tools import PythonAstREPLTool
|
167
|
-
|
168
|
-
# You might want to configure specific globals/locals here if needed
|
169
|
-
return PythonAstREPLTool()
|
170
|
-
except ImportError:
|
171
|
-
raise ImportError(
|
172
|
-
"PythonAstREPLTool requires langchain_experimental. Install with: pip install langchain-experimental"
|
173
|
-
)
|
174
|
-
|
175
|
-
|
176
|
-
def insert_callables_into_global(
|
177
|
-
function_signatures: Iterable[FunctionSignature], repl_tool: "PythonAstREPLTool"
|
178
|
-
) -> None:
|
179
|
-
"""Insert callables into the REPL tool's globals."""
|
180
|
-
# Accessing globals might depend on the specific REPL tool implementation.
|
181
|
-
# This assumes a .globals attribute exists and is a dict.
|
182
|
-
if not hasattr(repl_tool, "globals") or not isinstance(repl_tool.globals, dict): # pyright: ignore[reportUnknownMemberType]
|
183
|
-
# Handle cases where .globals is not available or not a dict
|
184
|
-
# Maybe initialize it or log a warning/error
|
185
|
-
repl_tool.globals = {} # Or handle appropriately
|
186
|
-
|
187
|
-
# Safely update globals
|
188
|
-
current_globals: dict[object, object] = repl_tool.globals # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
|
189
|
-
for fsig in function_signatures:
|
190
|
-
current_globals[fsig.name] = fsig.callable
|
191
|
-
# No need to reassign if globals is mutable (dict)
|
192
|
-
# repl_tool.globals = current_globals
|
193
|
-
|
194
|
-
|
195
|
-
def _add_message_first(messages: LanguageModelInput, prompt_to_add: str) -> LanguageModelInput:
|
196
|
-
"""Prepends a SystemMessage to the beginning of the message list/string."""
|
197
|
-
if not prompt_to_add: # Don't add empty prompts
|
198
|
-
return messages
|
199
|
-
|
200
|
-
if isinstance(messages, str):
|
201
|
-
# Prepend with a newline for separation
|
202
|
-
return f"{prompt_to_add}\n\n{messages}"
|
203
|
-
elif isinstance(messages, Sequence):
|
204
|
-
# Create a mutable copy if it's a tuple
|
205
|
-
msg_list = list(messages)
|
206
|
-
msg_list.insert(0, SystemMessage(content=prompt_to_add))
|
207
|
-
return msg_list
|
208
|
-
# Handle LangChain Core BaseMessagePromptTemplate or similar if needed
|
209
|
-
# elif hasattr(messages, 'to_messages'):
|
210
|
-
# msg_list = messages.to_messages()
|
211
|
-
# msg_list.insert(0, SystemMessage(content=prompt_to_add))
|
212
|
-
# return msg_list # Or return a new prompt template if required
|
213
|
-
else:
|
214
|
-
# Fallback or raise error for unsupported types
|
215
|
-
raise TypeError(f"Unsupported message input type: {type(messages)}")
|
216
|
-
|
217
|
-
|
218
|
-
def augment_prompt_for_toolcall(
|
219
|
-
function_signatures: Iterable[FunctionSignature],
|
220
|
-
messages: LanguageModelInput,
|
221
|
-
prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
|
222
|
-
function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
|
223
|
-
function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
|
224
|
-
) -> LanguageModelInput:
|
225
|
-
"""Adds function references and code invocation prompts to the messages."""
|
226
|
-
# Add function references first (if any)
|
227
|
-
func_prompt = FunctionSignature.as_prompt(
|
228
|
-
function_signatures, function_reference_prefix, function_reference_seperator
|
229
|
-
)
|
230
|
-
if func_prompt:
|
231
|
-
messages = _add_message_first(messages=messages, prompt_to_add=func_prompt)
|
232
|
-
|
233
|
-
# Then add the main code invocation prompt (if provided)
|
234
|
-
if prompt_for_code_invoke:
|
235
|
-
messages = _add_message_first(messages=messages, prompt_to_add=prompt_for_code_invoke)
|
236
|
-
|
237
|
-
return messages
|
1
|
+
import inspect
|
2
|
+
import textwrap
|
3
|
+
from typing import (
|
4
|
+
TYPE_CHECKING,
|
5
|
+
Callable,
|
6
|
+
Iterable,
|
7
|
+
NamedTuple,
|
8
|
+
Optional,
|
9
|
+
Self,
|
10
|
+
Sequence,
|
11
|
+
)
|
12
|
+
|
13
|
+
from langchain_core.runnables.config import RunnableConfig
|
14
|
+
|
15
|
+
from ..messages import LanguageModelInput, SystemMessage
|
16
|
+
|
17
|
+
if TYPE_CHECKING:
|
18
|
+
from langchain_experimental.tools import PythonAstREPLTool
|
19
|
+
|
20
|
+
# --- Constants ---
|
21
|
+
DEFAULT_CODE_GENERATION_PROMPT = (
|
22
|
+
"You are equipped with a Python code execution tool.\n"
|
23
|
+
"Your primary goal is to generate Python code that effectively solves the *specific, immediate sub-task* required to progress towards the overall user request. The generated code and its resulting output will be automatically added to our conversation history.\n"
|
24
|
+
"\n"
|
25
|
+
"Guidelines for Optimal Tool Use:\n"
|
26
|
+
"- Conciseness and Efficiency: Write code that directly addresses the current need. Avoid unnecessary complexity, computations, or data loading. Tool execution has resource limits.\n"
|
27
|
+
"- Targeted Action: Focus only on the code required for the *next logical step*. Do not attempt to solve the entire problem in one code block if it involves multiple steps.\n"
|
28
|
+
"- Error Handling: Implement basic error handling (e.g., `try-except`) for operations that might fail (like file access or network requests, if applicable).\n"
|
29
|
+
"- Context Awareness: Assume the code runs in a stateful environment where variables and imports might persist from previous executions (unless explicitly cleared).\n"
|
30
|
+
"- Self-Contained Execution: Ensure the code block is runnable as provided. Define necessary variables within the block if they aren't guaranteed to exist from prior context.\n"
|
31
|
+
"\n"
|
32
|
+
"Output Format:\n"
|
33
|
+
"Return *only* a JSON object containing the Python code:\n"
|
34
|
+
'{\n "code": "<your_python_code_here>"\n}\n\n'
|
35
|
+
)
|
36
|
+
|
37
|
+
DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT = (
|
38
|
+
"The following Python functions are available in the global scope for you to use directly in your code.\n"
|
39
|
+
"You do not need to define these functions; simply call them as needed.\n"
|
40
|
+
"Use these functions only when they directly help in solving the current task. You are not obligated to use them.\n"
|
41
|
+
)
|
42
|
+
|
43
|
+
DEFAULT_FUNCTION_REFERENCE_SEPARATOR = "\n---\n" # Separator to distinguish different function references
|
44
|
+
|
45
|
+
|
46
|
+
# --- Helper Classes and Functions ---
|
47
|
+
|
48
|
+
|
49
|
+
class FunctionSignature(NamedTuple):
|
50
|
+
name: str
|
51
|
+
callable: Callable[..., object]
|
52
|
+
signature: str
|
53
|
+
|
54
|
+
@classmethod
|
55
|
+
def from_callable(cls, callables: Optional[Callable[..., object] | Iterable[Callable[..., object]]]) -> list[Self]:
|
56
|
+
if callables is None:
|
57
|
+
return []
|
58
|
+
# Correctly handle single callable case
|
59
|
+
if isinstance(callables, Callable) and not isinstance(callables, type): # Exclude classes if not intended
|
60
|
+
return [cls._from_callable(callables)]
|
61
|
+
# Handle iterables
|
62
|
+
if isinstance(callables, Iterable):
|
63
|
+
return [cls._from_callable(c) for c in callables]
|
64
|
+
# If it's neither a callable nor an iterable of callables, return empty
|
65
|
+
return []
|
66
|
+
|
67
|
+
@classmethod
|
68
|
+
def _from_callable(cls, callable_obj: Callable[..., object]) -> Self:
|
69
|
+
"""
|
70
|
+
Get the name and signature of a function as a string.
|
71
|
+
"""
|
72
|
+
is_async_func = inspect.iscoroutinefunction(callable_obj)
|
73
|
+
function_def = "async def" if is_async_func else "def"
|
74
|
+
|
75
|
+
if inspect.isfunction(callable_obj):
|
76
|
+
function_name = callable_obj.__code__.co_name
|
77
|
+
elif hasattr(callable_obj, "name") and isinstance(getattr(callable_obj, "name"), str):
|
78
|
+
function_name = getattr(callable_obj, "name")
|
79
|
+
elif hasattr(callable_obj, "__name__"):
|
80
|
+
function_name = callable_obj.__name__
|
81
|
+
else:
|
82
|
+
function_name = type(callable_obj).__name__
|
83
|
+
|
84
|
+
try:
|
85
|
+
signature_str = str(inspect.signature(callable_obj))
|
86
|
+
except ValueError: # Handles built-ins or others without inspectable signatures
|
87
|
+
signature_str = "(...)" # Placeholder signature
|
88
|
+
|
89
|
+
signature = f"{function_def} {function_name}{signature_str}:"
|
90
|
+
docstring = inspect.getdoc(callable_obj)
|
91
|
+
|
92
|
+
if docstring:
|
93
|
+
docstring = f'"""{docstring.strip()}"""'
|
94
|
+
full_signature = f"{signature}\n{textwrap.indent(docstring, ' ')}"
|
95
|
+
else:
|
96
|
+
full_signature = signature
|
97
|
+
|
98
|
+
return cls(name=function_name, callable=callable_obj, signature=full_signature)
|
99
|
+
|
100
|
+
@classmethod
|
101
|
+
def as_prompt(
|
102
|
+
cls,
|
103
|
+
function_signatures: Iterable[Self],
|
104
|
+
prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT, # Use constant
|
105
|
+
sep: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR, # Use constant
|
106
|
+
) -> str:
|
107
|
+
"""
|
108
|
+
Generate a prompt string from a list of function signatures.
|
109
|
+
"""
|
110
|
+
if not function_signatures:
|
111
|
+
return ""
|
112
|
+
body: str = sep.join(fsig.signature for fsig in function_signatures)
|
113
|
+
if prefix:
|
114
|
+
return f"{prefix}\n{body}" # Add newline for clarity
|
115
|
+
return body
|
116
|
+
|
117
|
+
|
118
|
+
class CodeExecutionResult(NamedTuple):
|
119
|
+
code: str
|
120
|
+
output: str
|
121
|
+
|
122
|
+
@classmethod
|
123
|
+
def from_code(
|
124
|
+
cls,
|
125
|
+
code: str,
|
126
|
+
repl_tool: Optional["PythonAstREPLTool"] = None,
|
127
|
+
config: Optional[RunnableConfig] = None,
|
128
|
+
function_signatures: Optional[Iterable[FunctionSignature]] = None,
|
129
|
+
**kwargs: object,
|
130
|
+
) -> Self:
|
131
|
+
"""
|
132
|
+
Execute code using the Python REPL tool.
|
133
|
+
"""
|
134
|
+
if repl_tool is None:
|
135
|
+
repl_tool = get_default_repl_tool()
|
136
|
+
if function_signatures:
|
137
|
+
insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
|
138
|
+
# Ensure kwargs are passed correctly if needed by invoke
|
139
|
+
output = str(repl_tool.invoke(code, config=config)) # pyright: ignore[reportUnknownMemberType]
|
140
|
+
return cls(code=code, output=output)
|
141
|
+
|
142
|
+
@classmethod
|
143
|
+
async def afrom_code(
|
144
|
+
cls,
|
145
|
+
code: str,
|
146
|
+
repl_tool: Optional["PythonAstREPLTool"] = None,
|
147
|
+
config: Optional[RunnableConfig] = None,
|
148
|
+
function_signatures: Optional[Iterable[FunctionSignature]] = None,
|
149
|
+
**kwargs: object,
|
150
|
+
) -> Self:
|
151
|
+
"""
|
152
|
+
Execute code using the Python REPL tool asynchronously.
|
153
|
+
"""
|
154
|
+
if repl_tool is None:
|
155
|
+
repl_tool = get_default_repl_tool()
|
156
|
+
if function_signatures:
|
157
|
+
insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
|
158
|
+
# Ensure kwargs are passed correctly if needed by ainvoke
|
159
|
+
output = str(await repl_tool.ainvoke(code, config=config)) # pyright: ignore[reportUnknownMemberType]if not needed by base ainvoke
|
160
|
+
return cls(code=code, output=output)
|
161
|
+
|
162
|
+
|
163
|
+
def get_default_repl_tool() -> "PythonAstREPLTool":
|
164
|
+
"""Initializes and returns a default PythonAstREPLTool instance."""
|
165
|
+
try:
|
166
|
+
from langchain_experimental.tools import PythonAstREPLTool
|
167
|
+
|
168
|
+
# You might want to configure specific globals/locals here if needed
|
169
|
+
return PythonAstREPLTool()
|
170
|
+
except ImportError:
|
171
|
+
raise ImportError(
|
172
|
+
"PythonAstREPLTool requires langchain_experimental. Install with: pip install langchain-experimental"
|
173
|
+
)
|
174
|
+
|
175
|
+
|
176
|
+
def insert_callables_into_global(
|
177
|
+
function_signatures: Iterable[FunctionSignature], repl_tool: "PythonAstREPLTool"
|
178
|
+
) -> None:
|
179
|
+
"""Insert callables into the REPL tool's globals."""
|
180
|
+
# Accessing globals might depend on the specific REPL tool implementation.
|
181
|
+
# This assumes a .globals attribute exists and is a dict.
|
182
|
+
if not hasattr(repl_tool, "globals") or not isinstance(repl_tool.globals, dict): # pyright: ignore[reportUnknownMemberType]
|
183
|
+
# Handle cases where .globals is not available or not a dict
|
184
|
+
# Maybe initialize it or log a warning/error
|
185
|
+
repl_tool.globals = {} # Or handle appropriately
|
186
|
+
|
187
|
+
# Safely update globals
|
188
|
+
current_globals: dict[object, object] = repl_tool.globals # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
|
189
|
+
for fsig in function_signatures:
|
190
|
+
current_globals[fsig.name] = fsig.callable
|
191
|
+
# No need to reassign if globals is mutable (dict)
|
192
|
+
# repl_tool.globals = current_globals
|
193
|
+
|
194
|
+
|
195
|
+
def _add_message_first(messages: LanguageModelInput, prompt_to_add: str) -> LanguageModelInput:
|
196
|
+
"""Prepends a SystemMessage to the beginning of the message list/string."""
|
197
|
+
if not prompt_to_add: # Don't add empty prompts
|
198
|
+
return messages
|
199
|
+
|
200
|
+
if isinstance(messages, str):
|
201
|
+
# Prepend with a newline for separation
|
202
|
+
return f"{prompt_to_add}\n\n{messages}"
|
203
|
+
elif isinstance(messages, Sequence):
|
204
|
+
# Create a mutable copy if it's a tuple
|
205
|
+
msg_list = list(messages)
|
206
|
+
msg_list.insert(0, SystemMessage(content=prompt_to_add))
|
207
|
+
return msg_list
|
208
|
+
# Handle LangChain Core BaseMessagePromptTemplate or similar if needed
|
209
|
+
# elif hasattr(messages, 'to_messages'):
|
210
|
+
# msg_list = messages.to_messages()
|
211
|
+
# msg_list.insert(0, SystemMessage(content=prompt_to_add))
|
212
|
+
# return msg_list # Or return a new prompt template if required
|
213
|
+
else:
|
214
|
+
# Fallback or raise error for unsupported types
|
215
|
+
raise TypeError(f"Unsupported message input type: {type(messages)}")
|
216
|
+
|
217
|
+
|
218
|
+
def augment_prompt_for_toolcall(
|
219
|
+
function_signatures: Iterable[FunctionSignature],
|
220
|
+
messages: LanguageModelInput,
|
221
|
+
prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
|
222
|
+
function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
|
223
|
+
function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
|
224
|
+
) -> LanguageModelInput:
|
225
|
+
"""Adds function references and code invocation prompts to the messages."""
|
226
|
+
# Add function references first (if any)
|
227
|
+
func_prompt = FunctionSignature.as_prompt(
|
228
|
+
function_signatures, function_reference_prefix, function_reference_seperator
|
229
|
+
)
|
230
|
+
if func_prompt:
|
231
|
+
messages = _add_message_first(messages=messages, prompt_to_add=func_prompt)
|
232
|
+
|
233
|
+
# Then add the main code invocation prompt (if provided)
|
234
|
+
if prompt_for_code_invoke:
|
235
|
+
messages = _add_message_first(messages=messages, prompt_to_add=prompt_for_code_invoke)
|
236
|
+
|
237
|
+
return messages
|