chatterer 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. chatterer/__init__.py +62 -60
  2. chatterer/common_types/__init__.py +21 -0
  3. chatterer/common_types/io.py +19 -0
  4. chatterer/language_model.py +577 -577
  5. chatterer/messages.py +9 -9
  6. chatterer/strategies/__init__.py +13 -13
  7. chatterer/strategies/atom_of_thoughts.py +975 -975
  8. chatterer/strategies/base.py +14 -14
  9. chatterer/tools/__init__.py +35 -28
  10. chatterer/tools/citation_chunking/__init__.py +3 -3
  11. chatterer/tools/citation_chunking/chunks.py +53 -53
  12. chatterer/tools/citation_chunking/citation_chunker.py +118 -118
  13. chatterer/tools/citation_chunking/citations.py +285 -285
  14. chatterer/tools/citation_chunking/prompt.py +157 -157
  15. chatterer/tools/citation_chunking/reference.py +26 -26
  16. chatterer/tools/citation_chunking/utils.py +138 -138
  17. chatterer/tools/convert_to_text.py +418 -463
  18. chatterer/tools/upstage_document_parser.py +438 -0
  19. chatterer/tools/webpage_to_markdown/__init__.py +4 -4
  20. chatterer/tools/webpage_to_markdown/playwright_bot.py +649 -649
  21. chatterer/tools/webpage_to_markdown/utils.py +334 -334
  22. chatterer/tools/youtube.py +146 -146
  23. chatterer/utils/__init__.py +15 -15
  24. chatterer/utils/bytesio.py +59 -0
  25. chatterer/utils/code_agent.py +138 -138
  26. chatterer/utils/image.py +291 -291
  27. {chatterer-0.1.12.dist-info → chatterer-0.1.13.dist-info}/METADATA +171 -170
  28. chatterer-0.1.13.dist-info/RECORD +31 -0
  29. chatterer-0.1.12.dist-info/RECORD +0 -27
  30. {chatterer-0.1.12.dist-info → chatterer-0.1.13.dist-info}/WHEEL +0 -0
  31. {chatterer-0.1.12.dist-info → chatterer-0.1.13.dist-info}/top_level.txt +0 -0
@@ -1,146 +1,146 @@
1
- import json
2
- import unicodedata
3
- import urllib.parse
4
- from dataclasses import dataclass
5
- from typing import Any, Optional, Self, cast
6
-
7
- import requests
8
-
9
-
10
- def get_youtube_video_details(
11
- query: str,
12
- ) -> list[dict[str, Optional[str]]]:
13
- """Search for video metadata on YouTube using the given query. Returns a list of dictionaries containing `video_id`, `title`, `channel`, `duration`, `views`, `publish_time`, and `long_desc`."""
14
- return [
15
- {
16
- "video_id": video_id,
17
- "title": video.title,
18
- "channel": video.channel,
19
- "duration": video.duration,
20
- "views": video.views,
21
- "publish_time": video.publish_time,
22
- "long_desc": video.long_desc,
23
- }
24
- for video in YoutubeSearchResult.from_query(base_url="https://youtube.com", query=query, max_results=10)
25
- if (video_id := _get_video_id(video.url_suffix))
26
- ]
27
-
28
-
29
- def get_youtube_video_subtitle(video_id: str) -> str:
30
- """Get the transcript of a YouTube video using the given video ID."""
31
-
32
- from youtube_transcript_api._api import YouTubeTranscriptApi
33
-
34
- get_transcript = YouTubeTranscriptApi.get_transcript # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
35
- list_transcripts = YouTubeTranscriptApi.list_transcripts # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
36
-
37
- result: str = ""
38
- buffer_timestamp: str = "0s"
39
- buffer_texts: list[str] = []
40
- for entry in get_transcript(video_id, languages=(next(iter(list_transcripts(video_id))).language_code,)): # pyright: ignore[reportUnknownVariableType]
41
- entry = cast(dict[object, object], entry)
42
- text: str = str(entry.get("text", "")).strip().replace("\n", " ")
43
- if not text:
44
- continue
45
- if len(buffer_texts) >= 10 or _is_special_char(text) or (buffer_texts and _is_special_char(buffer_texts[-1])):
46
- result += f"[{buffer_timestamp}] {'. '.join(buffer_texts)}\n"
47
- start = entry.get("start", 0)
48
- if start:
49
- buffer_timestamp = f"{start:.0f}s"
50
- buffer_texts = [text]
51
- else:
52
- buffer_texts.append(text)
53
-
54
- if buffer_texts:
55
- result += f"[{buffer_timestamp}] {' '.join(buffer_texts)}"
56
- return result
57
-
58
-
59
- def _get_video_id(suffix: str) -> str:
60
- urllib_parse_result = urllib.parse.urlparse(suffix)
61
- if urllib_parse_result.path.startswith("/shorts/"):
62
- # Fore shorts (/shorts/...) the video ID is in the path
63
- parts = urllib_parse_result.path.split("/")
64
- if len(parts) < 3:
65
- print(f"Failed to get video ID from {suffix}")
66
- return ""
67
- return parts[2]
68
-
69
- query: str = urllib.parse.urlparse(suffix).query
70
- query_strings = urllib.parse.parse_qs(query)
71
- if "v" not in query_strings:
72
- print(f"Failed to get video ID from {suffix}")
73
- return ""
74
- return next(iter(query_strings["v"]), "")
75
-
76
-
77
- def _is_special_char(text: str) -> bool:
78
- if not text:
79
- return False
80
- return not unicodedata.category(text[0]).startswith("L")
81
-
82
-
83
- @dataclass
84
- class YoutubeSearchResult:
85
- url_suffix: str
86
- id: Optional[str]
87
- thumbnails: list[str]
88
- title: Optional[str]
89
- long_desc: Optional[str]
90
- channel: Optional[str]
91
- duration: Optional[str]
92
- views: Optional[str]
93
- publish_time: Optional[str]
94
-
95
- @classmethod
96
- def from_query(cls, base_url: str, query: str, max_results: int) -> list[Self]:
97
- url: str = f"{base_url}/results?search_query={urllib.parse.quote_plus(query)}"
98
- response: str = requests.get(url).text
99
- while "ytInitialData" not in response:
100
- response = requests.get(url).text
101
- results: list[Self] = cls.parse_html(response)
102
- return results[:max_results]
103
-
104
- @classmethod
105
- def parse_html(cls, html: str) -> list[Self]:
106
- results: list[Self] = []
107
- start: int = html.index("ytInitialData") + len("ytInitialData") + 3
108
- end: int = html.index("};", start) + 1
109
- data: Any = json.loads(html[start:end])
110
- for contents in data["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"]["sectionListRenderer"][
111
- "contents"
112
- ]:
113
- for video in contents["itemSectionRenderer"]["contents"]:
114
- if "videoRenderer" in video.keys():
115
- video_data = video.get("videoRenderer", {})
116
- suffix = (
117
- video_data.get("navigationEndpoint", {})
118
- .get("commandMetadata", {})
119
- .get("webCommandMetadata", {})
120
- .get("url", None)
121
- )
122
- if not suffix:
123
- continue
124
- res = cls(
125
- id=video_data.get("videoId", None),
126
- thumbnails=[
127
- thumb.get("url", None) for thumb in video_data.get("thumbnail", {}).get("thumbnails", [{}])
128
- ],
129
- title=video_data.get("title", {}).get("runs", [[{}]])[0].get("text", None),
130
- long_desc=video_data.get("descriptionSnippet", {}).get("runs", [{}])[0].get("text", None),
131
- channel=video_data.get("longBylineText", {}).get("runs", [[{}]])[0].get("text", None),
132
- duration=video_data.get("lengthText", {}).get("simpleText", 0),
133
- views=video_data.get("viewCountText", {}).get("simpleText", 0),
134
- publish_time=video_data.get("publishedTimeText", {}).get("simpleText", 0),
135
- url_suffix=suffix,
136
- )
137
- results.append(res)
138
-
139
- if results:
140
- break
141
- return results
142
-
143
-
144
- if __name__ == "__main__":
145
- print(get_youtube_video_details("BTS"))
146
- # print(get_youtube_transcript("y7jrpS8GHxs"))
1
+ import json
2
+ import unicodedata
3
+ import urllib.parse
4
+ from dataclasses import dataclass
5
+ from typing import Any, Optional, Self, cast
6
+
7
+ import requests
8
+
9
+
10
+ def get_youtube_video_details(
11
+ query: str,
12
+ ) -> list[dict[str, Optional[str]]]:
13
+ """Search for video metadata on YouTube using the given query. Returns a list of dictionaries containing `video_id`, `title`, `channel`, `duration`, `views`, `publish_time`, and `long_desc`."""
14
+ return [
15
+ {
16
+ "video_id": video_id,
17
+ "title": video.title,
18
+ "channel": video.channel,
19
+ "duration": video.duration,
20
+ "views": video.views,
21
+ "publish_time": video.publish_time,
22
+ "long_desc": video.long_desc,
23
+ }
24
+ for video in YoutubeSearchResult.from_query(base_url="https://youtube.com", query=query, max_results=10)
25
+ if (video_id := _get_video_id(video.url_suffix))
26
+ ]
27
+
28
+
29
+ def get_youtube_video_subtitle(video_id: str) -> str:
30
+ """Get the transcript of a YouTube video using the given video ID."""
31
+
32
+ from youtube_transcript_api._api import YouTubeTranscriptApi
33
+
34
+ get_transcript = YouTubeTranscriptApi.get_transcript # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
35
+ list_transcripts = YouTubeTranscriptApi.list_transcripts # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
36
+
37
+ result: str = ""
38
+ buffer_timestamp: str = "0s"
39
+ buffer_texts: list[str] = []
40
+ for entry in get_transcript(video_id, languages=(next(iter(list_transcripts(video_id))).language_code,)): # pyright: ignore[reportUnknownVariableType]
41
+ entry = cast(dict[object, object], entry)
42
+ text: str = str(entry.get("text", "")).strip().replace("\n", " ")
43
+ if not text:
44
+ continue
45
+ if len(buffer_texts) >= 10 or _is_special_char(text) or (buffer_texts and _is_special_char(buffer_texts[-1])):
46
+ result += f"[{buffer_timestamp}] {'. '.join(buffer_texts)}\n"
47
+ start = entry.get("start", 0)
48
+ if start:
49
+ buffer_timestamp = f"{start:.0f}s"
50
+ buffer_texts = [text]
51
+ else:
52
+ buffer_texts.append(text)
53
+
54
+ if buffer_texts:
55
+ result += f"[{buffer_timestamp}] {' '.join(buffer_texts)}"
56
+ return result
57
+
58
+
59
+ def _get_video_id(suffix: str) -> str:
60
+ urllib_parse_result = urllib.parse.urlparse(suffix)
61
+ if urllib_parse_result.path.startswith("/shorts/"):
62
+ # Fore shorts (/shorts/...) the video ID is in the path
63
+ parts = urllib_parse_result.path.split("/")
64
+ if len(parts) < 3:
65
+ print(f"Failed to get video ID from {suffix}")
66
+ return ""
67
+ return parts[2]
68
+
69
+ query: str = urllib.parse.urlparse(suffix).query
70
+ query_strings = urllib.parse.parse_qs(query)
71
+ if "v" not in query_strings:
72
+ print(f"Failed to get video ID from {suffix}")
73
+ return ""
74
+ return next(iter(query_strings["v"]), "")
75
+
76
+
77
+ def _is_special_char(text: str) -> bool:
78
+ if not text:
79
+ return False
80
+ return not unicodedata.category(text[0]).startswith("L")
81
+
82
+
83
+ @dataclass
84
+ class YoutubeSearchResult:
85
+ url_suffix: str
86
+ id: Optional[str]
87
+ thumbnails: list[str]
88
+ title: Optional[str]
89
+ long_desc: Optional[str]
90
+ channel: Optional[str]
91
+ duration: Optional[str]
92
+ views: Optional[str]
93
+ publish_time: Optional[str]
94
+
95
+ @classmethod
96
+ def from_query(cls, base_url: str, query: str, max_results: int) -> list[Self]:
97
+ url: str = f"{base_url}/results?search_query={urllib.parse.quote_plus(query)}"
98
+ response: str = requests.get(url).text
99
+ while "ytInitialData" not in response:
100
+ response = requests.get(url).text
101
+ results: list[Self] = cls.parse_html(response)
102
+ return results[:max_results]
103
+
104
+ @classmethod
105
+ def parse_html(cls, html: str) -> list[Self]:
106
+ results: list[Self] = []
107
+ start: int = html.index("ytInitialData") + len("ytInitialData") + 3
108
+ end: int = html.index("};", start) + 1
109
+ data: Any = json.loads(html[start:end])
110
+ for contents in data["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"]["sectionListRenderer"][
111
+ "contents"
112
+ ]:
113
+ for video in contents["itemSectionRenderer"]["contents"]:
114
+ if "videoRenderer" in video.keys():
115
+ video_data = video.get("videoRenderer", {})
116
+ suffix = (
117
+ video_data.get("navigationEndpoint", {})
118
+ .get("commandMetadata", {})
119
+ .get("webCommandMetadata", {})
120
+ .get("url", None)
121
+ )
122
+ if not suffix:
123
+ continue
124
+ res = cls(
125
+ id=video_data.get("videoId", None),
126
+ thumbnails=[
127
+ thumb.get("url", None) for thumb in video_data.get("thumbnail", {}).get("thumbnails", [{}])
128
+ ],
129
+ title=video_data.get("title", {}).get("runs", [[{}]])[0].get("text", None),
130
+ long_desc=video_data.get("descriptionSnippet", {}).get("runs", [{}])[0].get("text", None),
131
+ channel=video_data.get("longBylineText", {}).get("runs", [[{}]])[0].get("text", None),
132
+ duration=video_data.get("lengthText", {}).get("simpleText", 0),
133
+ views=video_data.get("viewCountText", {}).get("simpleText", 0),
134
+ publish_time=video_data.get("publishedTimeText", {}).get("simpleText", 0),
135
+ url_suffix=suffix,
136
+ )
137
+ results.append(res)
138
+
139
+ if results:
140
+ break
141
+ return results
142
+
143
+
144
+ if __name__ == "__main__":
145
+ print(get_youtube_video_details("BTS"))
146
+ # print(get_youtube_transcript("y7jrpS8GHxs"))
@@ -1,15 +1,15 @@
1
- from .code_agent import (
2
- CodeExecutionResult,
3
- FunctionSignature,
4
- get_default_repl_tool,
5
- insert_callables_into_global,
6
- )
7
- from .image import Base64Image
8
-
9
- __all__ = [
10
- "Base64Image",
11
- "FunctionSignature",
12
- "CodeExecutionResult",
13
- "get_default_repl_tool",
14
- "insert_callables_into_global",
15
- ]
1
+ from .code_agent import (
2
+ CodeExecutionResult,
3
+ FunctionSignature,
4
+ get_default_repl_tool,
5
+ insert_callables_into_global,
6
+ )
7
+ from .image import Base64Image
8
+
9
+ __all__ = [
10
+ "Base64Image",
11
+ "FunctionSignature",
12
+ "CodeExecutionResult",
13
+ "get_default_repl_tool",
14
+ "insert_callables_into_global",
15
+ ]
@@ -0,0 +1,59 @@
1
+ import os
2
+ from contextlib import contextmanager, suppress
3
+ from io import BytesIO
4
+ from typing import Iterator, Optional
5
+
6
+ from ..common_types.io import BytesReadable, PathOrReadable, StringReadable
7
+
8
+
9
+ @contextmanager
10
+ def read_bytes_stream(
11
+ path_or_file: PathOrReadable,
12
+ assume_pathlike_bytes_as_path: bool = False,
13
+ assume_pathlike_string_as_path: bool = True,
14
+ ) -> Iterator[Optional[BytesReadable]]:
15
+ """
16
+ Context manager for opening a file or using an existing stream.
17
+
18
+ Handles different types of input (file paths, byte streams, string streams)
19
+ and yields a BytesReadable object that can be used to read binary data.
20
+
21
+ Args:
22
+ path_or_file: File path or readable object.
23
+ assume_pathlike_bytes_as_path: If True, assume bytes-like objects are file paths. Else, treat as data itself.
24
+ assume_pathlike_string_as_path: If True, assume string-like objects are file paths. Else, treat as data itself.
25
+
26
+ Yields:
27
+ Optional[BytesReadable]: A readable binary stream or None if opening fails.
28
+ """
29
+ stream: Optional[BytesReadable] = None
30
+ should_close: bool = True # Whether the stream should be closed after use
31
+ try:
32
+ with suppress(BaseException):
33
+ if isinstance(path_or_file, BytesReadable):
34
+ # Assume the input is already a bytes stream
35
+ # NOTE: Delivers itself, so shouldn't be closed.
36
+ stream = path_or_file
37
+ should_close = False
38
+ elif isinstance(path_or_file, StringReadable):
39
+ # Convert the string stream to bytes stream
40
+ stream = BytesIO(path_or_file.read().encode("utf-8"))
41
+ elif isinstance(path_or_file, bytes):
42
+ # Convert the bytes-like object to bytes stream
43
+ if assume_pathlike_bytes_as_path and os.path.exists(path_or_file):
44
+ stream = open(path_or_file, "rb")
45
+ else:
46
+ stream = BytesIO(path_or_file)
47
+ elif isinstance(path_or_file, str):
48
+ # Convert the file path to bytes stream
49
+ if assume_pathlike_string_as_path and os.path.exists(path_or_file):
50
+ stream = open(path_or_file, "rb")
51
+ else:
52
+ stream = BytesIO(path_or_file.encode("utf-8"))
53
+ else:
54
+ # Assume the input is a file descriptor or path
55
+ stream = open(path_or_file, "rb")
56
+ yield stream
57
+ finally:
58
+ if stream is not None and should_close:
59
+ stream.close()
@@ -1,138 +1,138 @@
1
- import inspect
2
- import textwrap
3
- from typing import (
4
- TYPE_CHECKING,
5
- Callable,
6
- Iterable,
7
- NamedTuple,
8
- Optional,
9
- Self,
10
- )
11
-
12
- from langchain_core.runnables.config import RunnableConfig
13
-
14
- if TYPE_CHECKING:
15
- from langchain_experimental.tools import PythonAstREPLTool
16
-
17
-
18
- class FunctionSignature(NamedTuple):
19
- name: str
20
- callable: Callable[..., object]
21
- signature: str
22
-
23
- @classmethod
24
- def from_callable(cls, callables: Optional[Callable[..., object] | Iterable[Callable[..., object]]]) -> list[Self]:
25
- if callables is None:
26
- return []
27
- if callable(callables):
28
- return [cls._from_callable(callables)]
29
- return [cls._from_callable(callable) for callable in callables]
30
-
31
- @classmethod
32
- def _from_callable(cls, callable: Callable[..., object]) -> Self:
33
- """
34
- Get the name and signature of a function as a string.
35
- """
36
- # Determine if the function is async
37
- is_async_func = inspect.iscoroutinefunction(callable)
38
- function_def = "async def" if is_async_func else "def"
39
-
40
- # Determine the function name based on the type of callable
41
- if inspect.isfunction(callable):
42
- # For regular Python functions, use __code__.co_name
43
- function_name = callable.__code__.co_name
44
- elif hasattr(callable, "name"):
45
- # For StructuredTool or similar objects with a 'name' attribute
46
- function_name = callable.name # type: ignore
47
- elif hasattr(callable, "__name__"):
48
- # For other callables with a __name__ attribute
49
- function_name = callable.__name__
50
- else:
51
- # Fallback to the class name if no name is found
52
- function_name = type(callable).__name__
53
-
54
- # Build the signature string
55
- signature = f"{function_def} {function_name}{inspect.signature(callable)}:"
56
- docstring = inspect.getdoc(callable)
57
- if docstring:
58
- docstring = f'"""{docstring.strip()}"""'
59
- return cls(
60
- name=function_name, callable=callable, signature=f"{signature}\n{textwrap.indent(docstring, ' ')}"
61
- )
62
- else:
63
- return cls(name=function_name, callable=callable, signature=signature)
64
-
65
- @classmethod
66
- def as_prompt(
67
- cls,
68
- function_signatures: Iterable[Self],
69
- prefix: Optional[str] = "You can use the pre-made functions below without defining them:\n",
70
- sep: str = "\n---\n",
71
- ) -> str:
72
- """
73
- Generate a prompt string from a list of callables.
74
- """
75
- body: str = sep.join(fsig.signature for fsig in function_signatures)
76
- if prefix:
77
- return f"{prefix}{body}"
78
- return body
79
-
80
-
81
- class CodeExecutionResult(NamedTuple):
82
- code: str
83
- output: str
84
-
85
- @classmethod
86
- def from_code(
87
- cls,
88
- code: str,
89
- repl_tool: Optional["PythonAstREPLTool"] = None,
90
- config: Optional[RunnableConfig] = None,
91
- function_signatures: Optional[Iterable[FunctionSignature]] = None,
92
- **kwargs: object,
93
- ) -> Self:
94
- """
95
- Execute code using the Python Code Execution Language Model.
96
- """
97
- if repl_tool is None:
98
- repl_tool = get_default_repl_tool()
99
- if function_signatures:
100
- insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
101
- output = str(repl_tool.invoke(code, config=config, **kwargs)) # pyright: ignore[reportUnknownMemberType]
102
- return cls(code=code, output=output)
103
-
104
- @classmethod
105
- async def afrom_code(
106
- cls,
107
- code: str,
108
- repl_tool: Optional["PythonAstREPLTool"] = None,
109
- config: Optional[RunnableConfig] = None,
110
- function_signatures: Optional[Iterable[FunctionSignature]] = None,
111
- **kwargs: object,
112
- ) -> Self:
113
- """
114
- Execute code using the Python Code Execution Language Model asynchronously.
115
- """
116
- if repl_tool is None:
117
- repl_tool = get_default_repl_tool()
118
- if function_signatures:
119
- insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
120
- output = str(await repl_tool.ainvoke(code, config=config, **kwargs)) # pyright: ignore[reportUnknownMemberType]
121
- return cls(code=code, output=output)
122
-
123
-
124
- def get_default_repl_tool() -> "PythonAstREPLTool":
125
- from langchain_experimental.tools import PythonAstREPLTool
126
-
127
- return PythonAstREPLTool()
128
-
129
-
130
- def insert_callables_into_global(
131
- function_signatures: Iterable[FunctionSignature], repl_tool: "PythonAstREPLTool"
132
- ) -> None:
133
- """Insert callables into the REPL tool's globals."""
134
- repl_globals: Optional[dict[str, object]] = repl_tool.globals # pyright: ignore[reportUnknownMemberType]
135
- if repl_globals is None:
136
- repl_tool.globals = {fsig.name: fsig.callable for fsig in function_signatures}
137
- else:
138
- repl_globals.update({fsig.name: fsig.callable for fsig in function_signatures})
1
+ import inspect
2
+ import textwrap
3
+ from typing import (
4
+ TYPE_CHECKING,
5
+ Callable,
6
+ Iterable,
7
+ NamedTuple,
8
+ Optional,
9
+ Self,
10
+ )
11
+
12
+ from langchain_core.runnables.config import RunnableConfig
13
+
14
+ if TYPE_CHECKING:
15
+ from langchain_experimental.tools import PythonAstREPLTool
16
+
17
+
18
+ class FunctionSignature(NamedTuple):
19
+ name: str
20
+ callable: Callable[..., object]
21
+ signature: str
22
+
23
+ @classmethod
24
+ def from_callable(cls, callables: Optional[Callable[..., object] | Iterable[Callable[..., object]]]) -> list[Self]:
25
+ if callables is None:
26
+ return []
27
+ if callable(callables):
28
+ return [cls._from_callable(callables)]
29
+ return [cls._from_callable(callable) for callable in callables]
30
+
31
+ @classmethod
32
+ def _from_callable(cls, callable: Callable[..., object]) -> Self:
33
+ """
34
+ Get the name and signature of a function as a string.
35
+ """
36
+ # Determine if the function is async
37
+ is_async_func = inspect.iscoroutinefunction(callable)
38
+ function_def = "async def" if is_async_func else "def"
39
+
40
+ # Determine the function name based on the type of callable
41
+ if inspect.isfunction(callable):
42
+ # For regular Python functions, use __code__.co_name
43
+ function_name = callable.__code__.co_name
44
+ elif hasattr(callable, "name"):
45
+ # For StructuredTool or similar objects with a 'name' attribute
46
+ function_name = callable.name # type: ignore
47
+ elif hasattr(callable, "__name__"):
48
+ # For other callables with a __name__ attribute
49
+ function_name = callable.__name__
50
+ else:
51
+ # Fallback to the class name if no name is found
52
+ function_name = type(callable).__name__
53
+
54
+ # Build the signature string
55
+ signature = f"{function_def} {function_name}{inspect.signature(callable)}:"
56
+ docstring = inspect.getdoc(callable)
57
+ if docstring:
58
+ docstring = f'"""{docstring.strip()}"""'
59
+ return cls(
60
+ name=function_name, callable=callable, signature=f"{signature}\n{textwrap.indent(docstring, ' ')}"
61
+ )
62
+ else:
63
+ return cls(name=function_name, callable=callable, signature=signature)
64
+
65
+ @classmethod
66
+ def as_prompt(
67
+ cls,
68
+ function_signatures: Iterable[Self],
69
+ prefix: Optional[str] = "You can use the pre-made functions below without defining them:\n",
70
+ sep: str = "\n---\n",
71
+ ) -> str:
72
+ """
73
+ Generate a prompt string from a list of callables.
74
+ """
75
+ body: str = sep.join(fsig.signature for fsig in function_signatures)
76
+ if prefix:
77
+ return f"{prefix}{body}"
78
+ return body
79
+
80
+
81
+ class CodeExecutionResult(NamedTuple):
82
+ code: str
83
+ output: str
84
+
85
+ @classmethod
86
+ def from_code(
87
+ cls,
88
+ code: str,
89
+ repl_tool: Optional["PythonAstREPLTool"] = None,
90
+ config: Optional[RunnableConfig] = None,
91
+ function_signatures: Optional[Iterable[FunctionSignature]] = None,
92
+ **kwargs: object,
93
+ ) -> Self:
94
+ """
95
+ Execute code using the Python Code Execution Language Model.
96
+ """
97
+ if repl_tool is None:
98
+ repl_tool = get_default_repl_tool()
99
+ if function_signatures:
100
+ insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
101
+ output = str(repl_tool.invoke(code, config=config, **kwargs)) # pyright: ignore[reportUnknownMemberType]
102
+ return cls(code=code, output=output)
103
+
104
+ @classmethod
105
+ async def afrom_code(
106
+ cls,
107
+ code: str,
108
+ repl_tool: Optional["PythonAstREPLTool"] = None,
109
+ config: Optional[RunnableConfig] = None,
110
+ function_signatures: Optional[Iterable[FunctionSignature]] = None,
111
+ **kwargs: object,
112
+ ) -> Self:
113
+ """
114
+ Execute code using the Python Code Execution Language Model asynchronously.
115
+ """
116
+ if repl_tool is None:
117
+ repl_tool = get_default_repl_tool()
118
+ if function_signatures:
119
+ insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
120
+ output = str(await repl_tool.ainvoke(code, config=config, **kwargs)) # pyright: ignore[reportUnknownMemberType]
121
+ return cls(code=code, output=output)
122
+
123
+
124
+ def get_default_repl_tool() -> "PythonAstREPLTool":
125
+ from langchain_experimental.tools import PythonAstREPLTool
126
+
127
+ return PythonAstREPLTool()
128
+
129
+
130
+ def insert_callables_into_global(
131
+ function_signatures: Iterable[FunctionSignature], repl_tool: "PythonAstREPLTool"
132
+ ) -> None:
133
+ """Insert callables into the REPL tool's globals."""
134
+ repl_globals: Optional[dict[str, object]] = repl_tool.globals # pyright: ignore[reportUnknownMemberType]
135
+ if repl_globals is None:
136
+ repl_tool.globals = {fsig.name: fsig.callable for fsig in function_signatures}
137
+ else:
138
+ repl_globals.update({fsig.name: fsig.callable for fsig in function_signatures})