chatterer 0.1.11__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatterer/__init__.py +60 -60
- chatterer/language_model.py +577 -581
- chatterer/messages.py +9 -9
- chatterer/strategies/__init__.py +13 -13
- chatterer/strategies/atom_of_thoughts.py +975 -975
- chatterer/strategies/base.py +14 -14
- chatterer/tools/__init__.py +28 -28
- chatterer/tools/citation_chunking/__init__.py +3 -3
- chatterer/tools/citation_chunking/chunks.py +53 -53
- chatterer/tools/citation_chunking/citation_chunker.py +118 -118
- chatterer/tools/citation_chunking/citations.py +285 -285
- chatterer/tools/citation_chunking/prompt.py +157 -157
- chatterer/tools/citation_chunking/reference.py +26 -26
- chatterer/tools/citation_chunking/utils.py +138 -138
- chatterer/tools/convert_to_text.py +463 -463
- chatterer/tools/webpage_to_markdown/__init__.py +4 -4
- chatterer/tools/webpage_to_markdown/playwright_bot.py +649 -649
- chatterer/tools/webpage_to_markdown/utils.py +334 -329
- chatterer/tools/youtube.py +146 -146
- chatterer/utils/__init__.py +15 -15
- chatterer/utils/code_agent.py +138 -138
- chatterer/utils/image.py +291 -288
- {chatterer-0.1.11.dist-info → chatterer-0.1.12.dist-info}/METADATA +170 -170
- chatterer-0.1.12.dist-info/RECORD +27 -0
- chatterer-0.1.11.dist-info/RECORD +0 -27
- {chatterer-0.1.11.dist-info → chatterer-0.1.12.dist-info}/WHEEL +0 -0
- {chatterer-0.1.11.dist-info → chatterer-0.1.12.dist-info}/top_level.txt +0 -0
chatterer/tools/youtube.py
CHANGED
@@ -1,146 +1,146 @@
|
|
1
|
-
import json
|
2
|
-
import unicodedata
|
3
|
-
import urllib.parse
|
4
|
-
from dataclasses import dataclass
|
5
|
-
from typing import Any, Optional, Self, cast
|
6
|
-
|
7
|
-
import requests
|
8
|
-
|
9
|
-
|
10
|
-
def get_youtube_video_details(
|
11
|
-
query: str,
|
12
|
-
) -> list[dict[str, Optional[str]]]:
|
13
|
-
"""Search for video metadata on YouTube using the given query. Returns a list of dictionaries containing `video_id`, `title`, `channel`, `duration`, `views`, `publish_time`, and `long_desc`."""
|
14
|
-
return [
|
15
|
-
{
|
16
|
-
"video_id": video_id,
|
17
|
-
"title": video.title,
|
18
|
-
"channel": video.channel,
|
19
|
-
"duration": video.duration,
|
20
|
-
"views": video.views,
|
21
|
-
"publish_time": video.publish_time,
|
22
|
-
"long_desc": video.long_desc,
|
23
|
-
}
|
24
|
-
for video in YoutubeSearchResult.from_query(base_url="https://youtube.com", query=query, max_results=10)
|
25
|
-
if (video_id := _get_video_id(video.url_suffix))
|
26
|
-
]
|
27
|
-
|
28
|
-
|
29
|
-
def get_youtube_video_subtitle(video_id: str) -> str:
|
30
|
-
"""Get the transcript of a YouTube video using the given video ID."""
|
31
|
-
|
32
|
-
from youtube_transcript_api._api import YouTubeTranscriptApi
|
33
|
-
|
34
|
-
get_transcript = YouTubeTranscriptApi.get_transcript # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
|
35
|
-
list_transcripts = YouTubeTranscriptApi.list_transcripts # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
|
36
|
-
|
37
|
-
result: str = ""
|
38
|
-
buffer_timestamp: str = "0s"
|
39
|
-
buffer_texts: list[str] = []
|
40
|
-
for entry in get_transcript(video_id, languages=(next(iter(list_transcripts(video_id))).language_code,)): # pyright: ignore[reportUnknownVariableType]
|
41
|
-
entry = cast(dict[object, object], entry)
|
42
|
-
text: str = str(entry.get("text", "")).strip().replace("\n", " ")
|
43
|
-
if not text:
|
44
|
-
continue
|
45
|
-
if len(buffer_texts) >= 10 or _is_special_char(text) or (buffer_texts and _is_special_char(buffer_texts[-1])):
|
46
|
-
result += f"[{buffer_timestamp}] {'. '.join(buffer_texts)}\n"
|
47
|
-
start = entry.get("start", 0)
|
48
|
-
if start:
|
49
|
-
buffer_timestamp = f"{start:.0f}s"
|
50
|
-
buffer_texts = [text]
|
51
|
-
else:
|
52
|
-
buffer_texts.append(text)
|
53
|
-
|
54
|
-
if buffer_texts:
|
55
|
-
result += f"[{buffer_timestamp}] {' '.join(buffer_texts)}"
|
56
|
-
return result
|
57
|
-
|
58
|
-
|
59
|
-
def _get_video_id(suffix: str) -> str:
|
60
|
-
urllib_parse_result = urllib.parse.urlparse(suffix)
|
61
|
-
if urllib_parse_result.path.startswith("/shorts/"):
|
62
|
-
# Fore shorts (/shorts/...) the video ID is in the path
|
63
|
-
parts = urllib_parse_result.path.split("/")
|
64
|
-
if len(parts) < 3:
|
65
|
-
print(f"Failed to get video ID from {suffix}")
|
66
|
-
return ""
|
67
|
-
return parts[2]
|
68
|
-
|
69
|
-
query: str = urllib.parse.urlparse(suffix).query
|
70
|
-
query_strings = urllib.parse.parse_qs(query)
|
71
|
-
if "v" not in query_strings:
|
72
|
-
print(f"Failed to get video ID from {suffix}")
|
73
|
-
return ""
|
74
|
-
return next(iter(query_strings["v"]), "")
|
75
|
-
|
76
|
-
|
77
|
-
def _is_special_char(text: str) -> bool:
|
78
|
-
if not text:
|
79
|
-
return False
|
80
|
-
return not unicodedata.category(text[0]).startswith("L")
|
81
|
-
|
82
|
-
|
83
|
-
@dataclass
|
84
|
-
class YoutubeSearchResult:
|
85
|
-
url_suffix: str
|
86
|
-
id: Optional[str]
|
87
|
-
thumbnails: list[str]
|
88
|
-
title: Optional[str]
|
89
|
-
long_desc: Optional[str]
|
90
|
-
channel: Optional[str]
|
91
|
-
duration: Optional[str]
|
92
|
-
views: Optional[str]
|
93
|
-
publish_time: Optional[str]
|
94
|
-
|
95
|
-
@classmethod
|
96
|
-
def from_query(cls, base_url: str, query: str, max_results: int) -> list[Self]:
|
97
|
-
url: str = f"{base_url}/results?search_query={urllib.parse.quote_plus(query)}"
|
98
|
-
response: str = requests.get(url).text
|
99
|
-
while "ytInitialData" not in response:
|
100
|
-
response = requests.get(url).text
|
101
|
-
results: list[Self] = cls.parse_html(response)
|
102
|
-
return results[:max_results]
|
103
|
-
|
104
|
-
@classmethod
|
105
|
-
def parse_html(cls, html: str) -> list[Self]:
|
106
|
-
results: list[Self] = []
|
107
|
-
start: int = html.index("ytInitialData") + len("ytInitialData") + 3
|
108
|
-
end: int = html.index("};", start) + 1
|
109
|
-
data: Any = json.loads(html[start:end])
|
110
|
-
for contents in data["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"]["sectionListRenderer"][
|
111
|
-
"contents"
|
112
|
-
]:
|
113
|
-
for video in contents["itemSectionRenderer"]["contents"]:
|
114
|
-
if "videoRenderer" in video.keys():
|
115
|
-
video_data = video.get("videoRenderer", {})
|
116
|
-
suffix = (
|
117
|
-
video_data.get("navigationEndpoint", {})
|
118
|
-
.get("commandMetadata", {})
|
119
|
-
.get("webCommandMetadata", {})
|
120
|
-
.get("url", None)
|
121
|
-
)
|
122
|
-
if not suffix:
|
123
|
-
continue
|
124
|
-
res = cls(
|
125
|
-
id=video_data.get("videoId", None),
|
126
|
-
thumbnails=[
|
127
|
-
thumb.get("url", None) for thumb in video_data.get("thumbnail", {}).get("thumbnails", [{}])
|
128
|
-
],
|
129
|
-
title=video_data.get("title", {}).get("runs", [[{}]])[0].get("text", None),
|
130
|
-
long_desc=video_data.get("descriptionSnippet", {}).get("runs", [{}])[0].get("text", None),
|
131
|
-
channel=video_data.get("longBylineText", {}).get("runs", [[{}]])[0].get("text", None),
|
132
|
-
duration=video_data.get("lengthText", {}).get("simpleText", 0),
|
133
|
-
views=video_data.get("viewCountText", {}).get("simpleText", 0),
|
134
|
-
publish_time=video_data.get("publishedTimeText", {}).get("simpleText", 0),
|
135
|
-
url_suffix=suffix,
|
136
|
-
)
|
137
|
-
results.append(res)
|
138
|
-
|
139
|
-
if results:
|
140
|
-
break
|
141
|
-
return results
|
142
|
-
|
143
|
-
|
144
|
-
if __name__ == "__main__":
|
145
|
-
print(get_youtube_video_details("BTS"))
|
146
|
-
# print(get_youtube_transcript("y7jrpS8GHxs"))
|
1
|
+
import json
|
2
|
+
import unicodedata
|
3
|
+
import urllib.parse
|
4
|
+
from dataclasses import dataclass
|
5
|
+
from typing import Any, Optional, Self, cast
|
6
|
+
|
7
|
+
import requests
|
8
|
+
|
9
|
+
|
10
|
+
def get_youtube_video_details(
|
11
|
+
query: str,
|
12
|
+
) -> list[dict[str, Optional[str]]]:
|
13
|
+
"""Search for video metadata on YouTube using the given query. Returns a list of dictionaries containing `video_id`, `title`, `channel`, `duration`, `views`, `publish_time`, and `long_desc`."""
|
14
|
+
return [
|
15
|
+
{
|
16
|
+
"video_id": video_id,
|
17
|
+
"title": video.title,
|
18
|
+
"channel": video.channel,
|
19
|
+
"duration": video.duration,
|
20
|
+
"views": video.views,
|
21
|
+
"publish_time": video.publish_time,
|
22
|
+
"long_desc": video.long_desc,
|
23
|
+
}
|
24
|
+
for video in YoutubeSearchResult.from_query(base_url="https://youtube.com", query=query, max_results=10)
|
25
|
+
if (video_id := _get_video_id(video.url_suffix))
|
26
|
+
]
|
27
|
+
|
28
|
+
|
29
|
+
def get_youtube_video_subtitle(video_id: str) -> str:
|
30
|
+
"""Get the transcript of a YouTube video using the given video ID."""
|
31
|
+
|
32
|
+
from youtube_transcript_api._api import YouTubeTranscriptApi
|
33
|
+
|
34
|
+
get_transcript = YouTubeTranscriptApi.get_transcript # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
|
35
|
+
list_transcripts = YouTubeTranscriptApi.list_transcripts # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
|
36
|
+
|
37
|
+
result: str = ""
|
38
|
+
buffer_timestamp: str = "0s"
|
39
|
+
buffer_texts: list[str] = []
|
40
|
+
for entry in get_transcript(video_id, languages=(next(iter(list_transcripts(video_id))).language_code,)): # pyright: ignore[reportUnknownVariableType]
|
41
|
+
entry = cast(dict[object, object], entry)
|
42
|
+
text: str = str(entry.get("text", "")).strip().replace("\n", " ")
|
43
|
+
if not text:
|
44
|
+
continue
|
45
|
+
if len(buffer_texts) >= 10 or _is_special_char(text) or (buffer_texts and _is_special_char(buffer_texts[-1])):
|
46
|
+
result += f"[{buffer_timestamp}] {'. '.join(buffer_texts)}\n"
|
47
|
+
start = entry.get("start", 0)
|
48
|
+
if start:
|
49
|
+
buffer_timestamp = f"{start:.0f}s"
|
50
|
+
buffer_texts = [text]
|
51
|
+
else:
|
52
|
+
buffer_texts.append(text)
|
53
|
+
|
54
|
+
if buffer_texts:
|
55
|
+
result += f"[{buffer_timestamp}] {' '.join(buffer_texts)}"
|
56
|
+
return result
|
57
|
+
|
58
|
+
|
59
|
+
def _get_video_id(suffix: str) -> str:
|
60
|
+
urllib_parse_result = urllib.parse.urlparse(suffix)
|
61
|
+
if urllib_parse_result.path.startswith("/shorts/"):
|
62
|
+
# Fore shorts (/shorts/...) the video ID is in the path
|
63
|
+
parts = urllib_parse_result.path.split("/")
|
64
|
+
if len(parts) < 3:
|
65
|
+
print(f"Failed to get video ID from {suffix}")
|
66
|
+
return ""
|
67
|
+
return parts[2]
|
68
|
+
|
69
|
+
query: str = urllib.parse.urlparse(suffix).query
|
70
|
+
query_strings = urllib.parse.parse_qs(query)
|
71
|
+
if "v" not in query_strings:
|
72
|
+
print(f"Failed to get video ID from {suffix}")
|
73
|
+
return ""
|
74
|
+
return next(iter(query_strings["v"]), "")
|
75
|
+
|
76
|
+
|
77
|
+
def _is_special_char(text: str) -> bool:
|
78
|
+
if not text:
|
79
|
+
return False
|
80
|
+
return not unicodedata.category(text[0]).startswith("L")
|
81
|
+
|
82
|
+
|
83
|
+
@dataclass
|
84
|
+
class YoutubeSearchResult:
|
85
|
+
url_suffix: str
|
86
|
+
id: Optional[str]
|
87
|
+
thumbnails: list[str]
|
88
|
+
title: Optional[str]
|
89
|
+
long_desc: Optional[str]
|
90
|
+
channel: Optional[str]
|
91
|
+
duration: Optional[str]
|
92
|
+
views: Optional[str]
|
93
|
+
publish_time: Optional[str]
|
94
|
+
|
95
|
+
@classmethod
|
96
|
+
def from_query(cls, base_url: str, query: str, max_results: int) -> list[Self]:
|
97
|
+
url: str = f"{base_url}/results?search_query={urllib.parse.quote_plus(query)}"
|
98
|
+
response: str = requests.get(url).text
|
99
|
+
while "ytInitialData" not in response:
|
100
|
+
response = requests.get(url).text
|
101
|
+
results: list[Self] = cls.parse_html(response)
|
102
|
+
return results[:max_results]
|
103
|
+
|
104
|
+
@classmethod
|
105
|
+
def parse_html(cls, html: str) -> list[Self]:
|
106
|
+
results: list[Self] = []
|
107
|
+
start: int = html.index("ytInitialData") + len("ytInitialData") + 3
|
108
|
+
end: int = html.index("};", start) + 1
|
109
|
+
data: Any = json.loads(html[start:end])
|
110
|
+
for contents in data["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"]["sectionListRenderer"][
|
111
|
+
"contents"
|
112
|
+
]:
|
113
|
+
for video in contents["itemSectionRenderer"]["contents"]:
|
114
|
+
if "videoRenderer" in video.keys():
|
115
|
+
video_data = video.get("videoRenderer", {})
|
116
|
+
suffix = (
|
117
|
+
video_data.get("navigationEndpoint", {})
|
118
|
+
.get("commandMetadata", {})
|
119
|
+
.get("webCommandMetadata", {})
|
120
|
+
.get("url", None)
|
121
|
+
)
|
122
|
+
if not suffix:
|
123
|
+
continue
|
124
|
+
res = cls(
|
125
|
+
id=video_data.get("videoId", None),
|
126
|
+
thumbnails=[
|
127
|
+
thumb.get("url", None) for thumb in video_data.get("thumbnail", {}).get("thumbnails", [{}])
|
128
|
+
],
|
129
|
+
title=video_data.get("title", {}).get("runs", [[{}]])[0].get("text", None),
|
130
|
+
long_desc=video_data.get("descriptionSnippet", {}).get("runs", [{}])[0].get("text", None),
|
131
|
+
channel=video_data.get("longBylineText", {}).get("runs", [[{}]])[0].get("text", None),
|
132
|
+
duration=video_data.get("lengthText", {}).get("simpleText", 0),
|
133
|
+
views=video_data.get("viewCountText", {}).get("simpleText", 0),
|
134
|
+
publish_time=video_data.get("publishedTimeText", {}).get("simpleText", 0),
|
135
|
+
url_suffix=suffix,
|
136
|
+
)
|
137
|
+
results.append(res)
|
138
|
+
|
139
|
+
if results:
|
140
|
+
break
|
141
|
+
return results
|
142
|
+
|
143
|
+
|
144
|
+
if __name__ == "__main__":
|
145
|
+
print(get_youtube_video_details("BTS"))
|
146
|
+
# print(get_youtube_transcript("y7jrpS8GHxs"))
|
chatterer/utils/__init__.py
CHANGED
@@ -1,15 +1,15 @@
|
|
1
|
-
from .code_agent import (
|
2
|
-
CodeExecutionResult,
|
3
|
-
FunctionSignature,
|
4
|
-
get_default_repl_tool,
|
5
|
-
insert_callables_into_global,
|
6
|
-
)
|
7
|
-
from .image import Base64Image
|
8
|
-
|
9
|
-
__all__ = [
|
10
|
-
"Base64Image",
|
11
|
-
"FunctionSignature",
|
12
|
-
"CodeExecutionResult",
|
13
|
-
"get_default_repl_tool",
|
14
|
-
"insert_callables_into_global",
|
15
|
-
]
|
1
|
+
from .code_agent import (
|
2
|
+
CodeExecutionResult,
|
3
|
+
FunctionSignature,
|
4
|
+
get_default_repl_tool,
|
5
|
+
insert_callables_into_global,
|
6
|
+
)
|
7
|
+
from .image import Base64Image
|
8
|
+
|
9
|
+
__all__ = [
|
10
|
+
"Base64Image",
|
11
|
+
"FunctionSignature",
|
12
|
+
"CodeExecutionResult",
|
13
|
+
"get_default_repl_tool",
|
14
|
+
"insert_callables_into_global",
|
15
|
+
]
|
chatterer/utils/code_agent.py
CHANGED
@@ -1,138 +1,138 @@
|
|
1
|
-
import inspect
|
2
|
-
import textwrap
|
3
|
-
from typing import (
|
4
|
-
TYPE_CHECKING,
|
5
|
-
Callable,
|
6
|
-
Iterable,
|
7
|
-
NamedTuple,
|
8
|
-
Optional,
|
9
|
-
Self,
|
10
|
-
)
|
11
|
-
|
12
|
-
from langchain_core.runnables.config import RunnableConfig
|
13
|
-
|
14
|
-
if TYPE_CHECKING:
|
15
|
-
from langchain_experimental.tools import PythonAstREPLTool
|
16
|
-
|
17
|
-
|
18
|
-
class FunctionSignature(NamedTuple):
|
19
|
-
name: str
|
20
|
-
callable: Callable[..., object]
|
21
|
-
signature: str
|
22
|
-
|
23
|
-
@classmethod
|
24
|
-
def from_callable(cls, callables: Optional[Callable[..., object] | Iterable[Callable[..., object]]]) -> list[Self]:
|
25
|
-
if callables is None:
|
26
|
-
return []
|
27
|
-
if callable(callables):
|
28
|
-
return [cls._from_callable(callables)]
|
29
|
-
return [cls._from_callable(callable) for callable in callables]
|
30
|
-
|
31
|
-
@classmethod
|
32
|
-
def _from_callable(cls, callable: Callable[..., object]) -> Self:
|
33
|
-
"""
|
34
|
-
Get the name and signature of a function as a string.
|
35
|
-
"""
|
36
|
-
# Determine if the function is async
|
37
|
-
is_async_func = inspect.iscoroutinefunction(callable)
|
38
|
-
function_def = "async def" if is_async_func else "def"
|
39
|
-
|
40
|
-
# Determine the function name based on the type of callable
|
41
|
-
if inspect.isfunction(callable):
|
42
|
-
# For regular Python functions, use __code__.co_name
|
43
|
-
function_name = callable.__code__.co_name
|
44
|
-
elif hasattr(callable, "name"):
|
45
|
-
# For StructuredTool or similar objects with a 'name' attribute
|
46
|
-
function_name = callable.name # type: ignore
|
47
|
-
elif hasattr(callable, "__name__"):
|
48
|
-
# For other callables with a __name__ attribute
|
49
|
-
function_name = callable.__name__
|
50
|
-
else:
|
51
|
-
# Fallback to the class name if no name is found
|
52
|
-
function_name = type(callable).__name__
|
53
|
-
|
54
|
-
# Build the signature string
|
55
|
-
signature = f"{function_def} {function_name}{inspect.signature(callable)}:"
|
56
|
-
docstring = inspect.getdoc(callable)
|
57
|
-
if docstring:
|
58
|
-
docstring = f'"""{docstring.strip()}"""'
|
59
|
-
return cls(
|
60
|
-
name=function_name, callable=callable, signature=f"{signature}\n{textwrap.indent(docstring, ' ')}"
|
61
|
-
)
|
62
|
-
else:
|
63
|
-
return cls(name=function_name, callable=callable, signature=signature)
|
64
|
-
|
65
|
-
@classmethod
|
66
|
-
def as_prompt(
|
67
|
-
cls,
|
68
|
-
function_signatures: Iterable[Self],
|
69
|
-
prefix: Optional[str] = "You can use the pre-made functions below without defining them:\n",
|
70
|
-
sep: str = "\n---\n",
|
71
|
-
) -> str:
|
72
|
-
"""
|
73
|
-
Generate a prompt string from a list of callables.
|
74
|
-
"""
|
75
|
-
body: str = sep.join(fsig.signature for fsig in function_signatures)
|
76
|
-
if prefix:
|
77
|
-
return f"{prefix}{body}"
|
78
|
-
return body
|
79
|
-
|
80
|
-
|
81
|
-
class CodeExecutionResult(NamedTuple):
|
82
|
-
code: str
|
83
|
-
output: str
|
84
|
-
|
85
|
-
@classmethod
|
86
|
-
def from_code(
|
87
|
-
cls,
|
88
|
-
code: str,
|
89
|
-
repl_tool: Optional["PythonAstREPLTool"] = None,
|
90
|
-
config: Optional[RunnableConfig] = None,
|
91
|
-
function_signatures: Optional[Iterable[FunctionSignature]] = None,
|
92
|
-
**kwargs: object,
|
93
|
-
) -> Self:
|
94
|
-
"""
|
95
|
-
Execute code using the Python Code Execution Language Model.
|
96
|
-
"""
|
97
|
-
if repl_tool is None:
|
98
|
-
repl_tool = get_default_repl_tool()
|
99
|
-
if function_signatures:
|
100
|
-
insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
|
101
|
-
output = str(repl_tool.invoke(code, config=config, **kwargs)) # pyright: ignore[reportUnknownMemberType]
|
102
|
-
return cls(code=code, output=output)
|
103
|
-
|
104
|
-
@classmethod
|
105
|
-
async def afrom_code(
|
106
|
-
cls,
|
107
|
-
code: str,
|
108
|
-
repl_tool: Optional["PythonAstREPLTool"] = None,
|
109
|
-
config: Optional[RunnableConfig] = None,
|
110
|
-
function_signatures: Optional[Iterable[FunctionSignature]] = None,
|
111
|
-
**kwargs: object,
|
112
|
-
) -> Self:
|
113
|
-
"""
|
114
|
-
Execute code using the Python Code Execution Language Model asynchronously.
|
115
|
-
"""
|
116
|
-
if repl_tool is None:
|
117
|
-
repl_tool = get_default_repl_tool()
|
118
|
-
if function_signatures:
|
119
|
-
insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
|
120
|
-
output = str(await repl_tool.ainvoke(code, config=config, **kwargs)) # pyright: ignore[reportUnknownMemberType]
|
121
|
-
return cls(code=code, output=output)
|
122
|
-
|
123
|
-
|
124
|
-
def get_default_repl_tool() -> "PythonAstREPLTool":
|
125
|
-
from langchain_experimental.tools import PythonAstREPLTool
|
126
|
-
|
127
|
-
return PythonAstREPLTool()
|
128
|
-
|
129
|
-
|
130
|
-
def insert_callables_into_global(
|
131
|
-
function_signatures: Iterable[FunctionSignature], repl_tool: "PythonAstREPLTool"
|
132
|
-
) -> None:
|
133
|
-
"""Insert callables into the REPL tool's globals."""
|
134
|
-
repl_globals: Optional[dict[str, object]] = repl_tool.globals # pyright: ignore[reportUnknownMemberType]
|
135
|
-
if repl_globals is None:
|
136
|
-
repl_tool.globals = {fsig.name: fsig.callable for fsig in function_signatures}
|
137
|
-
else:
|
138
|
-
repl_globals.update({fsig.name: fsig.callable for fsig in function_signatures})
|
1
|
+
import inspect
|
2
|
+
import textwrap
|
3
|
+
from typing import (
|
4
|
+
TYPE_CHECKING,
|
5
|
+
Callable,
|
6
|
+
Iterable,
|
7
|
+
NamedTuple,
|
8
|
+
Optional,
|
9
|
+
Self,
|
10
|
+
)
|
11
|
+
|
12
|
+
from langchain_core.runnables.config import RunnableConfig
|
13
|
+
|
14
|
+
if TYPE_CHECKING:
|
15
|
+
from langchain_experimental.tools import PythonAstREPLTool
|
16
|
+
|
17
|
+
|
18
|
+
class FunctionSignature(NamedTuple):
|
19
|
+
name: str
|
20
|
+
callable: Callable[..., object]
|
21
|
+
signature: str
|
22
|
+
|
23
|
+
@classmethod
|
24
|
+
def from_callable(cls, callables: Optional[Callable[..., object] | Iterable[Callable[..., object]]]) -> list[Self]:
|
25
|
+
if callables is None:
|
26
|
+
return []
|
27
|
+
if callable(callables):
|
28
|
+
return [cls._from_callable(callables)]
|
29
|
+
return [cls._from_callable(callable) for callable in callables]
|
30
|
+
|
31
|
+
@classmethod
|
32
|
+
def _from_callable(cls, callable: Callable[..., object]) -> Self:
|
33
|
+
"""
|
34
|
+
Get the name and signature of a function as a string.
|
35
|
+
"""
|
36
|
+
# Determine if the function is async
|
37
|
+
is_async_func = inspect.iscoroutinefunction(callable)
|
38
|
+
function_def = "async def" if is_async_func else "def"
|
39
|
+
|
40
|
+
# Determine the function name based on the type of callable
|
41
|
+
if inspect.isfunction(callable):
|
42
|
+
# For regular Python functions, use __code__.co_name
|
43
|
+
function_name = callable.__code__.co_name
|
44
|
+
elif hasattr(callable, "name"):
|
45
|
+
# For StructuredTool or similar objects with a 'name' attribute
|
46
|
+
function_name = callable.name # type: ignore
|
47
|
+
elif hasattr(callable, "__name__"):
|
48
|
+
# For other callables with a __name__ attribute
|
49
|
+
function_name = callable.__name__
|
50
|
+
else:
|
51
|
+
# Fallback to the class name if no name is found
|
52
|
+
function_name = type(callable).__name__
|
53
|
+
|
54
|
+
# Build the signature string
|
55
|
+
signature = f"{function_def} {function_name}{inspect.signature(callable)}:"
|
56
|
+
docstring = inspect.getdoc(callable)
|
57
|
+
if docstring:
|
58
|
+
docstring = f'"""{docstring.strip()}"""'
|
59
|
+
return cls(
|
60
|
+
name=function_name, callable=callable, signature=f"{signature}\n{textwrap.indent(docstring, ' ')}"
|
61
|
+
)
|
62
|
+
else:
|
63
|
+
return cls(name=function_name, callable=callable, signature=signature)
|
64
|
+
|
65
|
+
@classmethod
|
66
|
+
def as_prompt(
|
67
|
+
cls,
|
68
|
+
function_signatures: Iterable[Self],
|
69
|
+
prefix: Optional[str] = "You can use the pre-made functions below without defining them:\n",
|
70
|
+
sep: str = "\n---\n",
|
71
|
+
) -> str:
|
72
|
+
"""
|
73
|
+
Generate a prompt string from a list of callables.
|
74
|
+
"""
|
75
|
+
body: str = sep.join(fsig.signature for fsig in function_signatures)
|
76
|
+
if prefix:
|
77
|
+
return f"{prefix}{body}"
|
78
|
+
return body
|
79
|
+
|
80
|
+
|
81
|
+
class CodeExecutionResult(NamedTuple):
|
82
|
+
code: str
|
83
|
+
output: str
|
84
|
+
|
85
|
+
@classmethod
|
86
|
+
def from_code(
|
87
|
+
cls,
|
88
|
+
code: str,
|
89
|
+
repl_tool: Optional["PythonAstREPLTool"] = None,
|
90
|
+
config: Optional[RunnableConfig] = None,
|
91
|
+
function_signatures: Optional[Iterable[FunctionSignature]] = None,
|
92
|
+
**kwargs: object,
|
93
|
+
) -> Self:
|
94
|
+
"""
|
95
|
+
Execute code using the Python Code Execution Language Model.
|
96
|
+
"""
|
97
|
+
if repl_tool is None:
|
98
|
+
repl_tool = get_default_repl_tool()
|
99
|
+
if function_signatures:
|
100
|
+
insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
|
101
|
+
output = str(repl_tool.invoke(code, config=config, **kwargs)) # pyright: ignore[reportUnknownMemberType]
|
102
|
+
return cls(code=code, output=output)
|
103
|
+
|
104
|
+
@classmethod
|
105
|
+
async def afrom_code(
|
106
|
+
cls,
|
107
|
+
code: str,
|
108
|
+
repl_tool: Optional["PythonAstREPLTool"] = None,
|
109
|
+
config: Optional[RunnableConfig] = None,
|
110
|
+
function_signatures: Optional[Iterable[FunctionSignature]] = None,
|
111
|
+
**kwargs: object,
|
112
|
+
) -> Self:
|
113
|
+
"""
|
114
|
+
Execute code using the Python Code Execution Language Model asynchronously.
|
115
|
+
"""
|
116
|
+
if repl_tool is None:
|
117
|
+
repl_tool = get_default_repl_tool()
|
118
|
+
if function_signatures:
|
119
|
+
insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
|
120
|
+
output = str(await repl_tool.ainvoke(code, config=config, **kwargs)) # pyright: ignore[reportUnknownMemberType]
|
121
|
+
return cls(code=code, output=output)
|
122
|
+
|
123
|
+
|
124
|
+
def get_default_repl_tool() -> "PythonAstREPLTool":
|
125
|
+
from langchain_experimental.tools import PythonAstREPLTool
|
126
|
+
|
127
|
+
return PythonAstREPLTool()
|
128
|
+
|
129
|
+
|
130
|
+
def insert_callables_into_global(
|
131
|
+
function_signatures: Iterable[FunctionSignature], repl_tool: "PythonAstREPLTool"
|
132
|
+
) -> None:
|
133
|
+
"""Insert callables into the REPL tool's globals."""
|
134
|
+
repl_globals: Optional[dict[str, object]] = repl_tool.globals # pyright: ignore[reportUnknownMemberType]
|
135
|
+
if repl_globals is None:
|
136
|
+
repl_tool.globals = {fsig.name: fsig.callable for fsig in function_signatures}
|
137
|
+
else:
|
138
|
+
repl_globals.update({fsig.name: fsig.callable for fsig in function_signatures})
|