chatterer 0.1.13__py3-none-any.whl → 0.1.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatterer/__init__.py +36 -5
- chatterer/interactive.py +692 -0
- chatterer/language_model.py +217 -261
- chatterer/messages.py +13 -1
- chatterer/tools/__init__.py +26 -15
- chatterer/tools/{webpage_to_markdown/utils.py → caption_markdown_images.py} +158 -108
- chatterer/tools/convert_pdf_to_markdown.py +302 -0
- chatterer/tools/convert_to_text.py +45 -16
- chatterer/tools/upstage_document_parser.py +481 -214
- chatterer/tools/{webpage_to_markdown/playwright_bot.py → webpage_to_markdown.py} +197 -107
- chatterer/tools/youtube.py +2 -1
- chatterer/utils/__init__.py +1 -1
- chatterer/utils/{image.py → base64_image.py} +56 -62
- chatterer/utils/code_agent.py +137 -38
- chatterer/utils/imghdr.py +148 -0
- chatterer-0.1.16.dist-info/METADATA +392 -0
- chatterer-0.1.16.dist-info/RECORD +33 -0
- {chatterer-0.1.13.dist-info → chatterer-0.1.16.dist-info}/WHEEL +1 -1
- chatterer/tools/webpage_to_markdown/__init__.py +0 -4
- chatterer-0.1.13.dist-info/METADATA +0 -171
- chatterer-0.1.13.dist-info/RECORD +0 -31
- {chatterer-0.1.13.dist-info → chatterer-0.1.16.dist-info}/top_level.txt +0 -0
chatterer/utils/code_agent.py
CHANGED
@@ -7,13 +7,44 @@ from typing import (
|
|
7
7
|
NamedTuple,
|
8
8
|
Optional,
|
9
9
|
Self,
|
10
|
+
Sequence,
|
10
11
|
)
|
11
12
|
|
12
13
|
from langchain_core.runnables.config import RunnableConfig
|
13
14
|
|
15
|
+
from ..messages import LanguageModelInput, SystemMessage
|
16
|
+
|
14
17
|
if TYPE_CHECKING:
|
15
18
|
from langchain_experimental.tools import PythonAstREPLTool
|
16
19
|
|
20
|
+
# --- Constants ---
|
21
|
+
DEFAULT_CODE_GENERATION_PROMPT = (
|
22
|
+
"You are equipped with a Python code execution tool.\n"
|
23
|
+
"Your primary goal is to generate Python code that effectively solves the *specific, immediate sub-task* required to progress towards the overall user request. The generated code and its resulting output will be automatically added to our conversation history.\n"
|
24
|
+
"\n"
|
25
|
+
"Guidelines for Optimal Tool Use:\n"
|
26
|
+
"- Conciseness and Efficiency: Write code that directly addresses the current need. Avoid unnecessary complexity, computations, or data loading. Tool execution has resource limits.\n"
|
27
|
+
"- Targeted Action: Focus only on the code required for the *next logical step*. Do not attempt to solve the entire problem in one code block if it involves multiple steps.\n"
|
28
|
+
"- Error Handling: Implement basic error handling (e.g., `try-except`) for operations that might fail (like file access or network requests, if applicable).\n"
|
29
|
+
"- Context Awareness: Assume the code runs in a stateful environment where variables and imports might persist from previous executions (unless explicitly cleared).\n"
|
30
|
+
"- Self-Contained Execution: Ensure the code block is runnable as provided. Define necessary variables within the block if they aren't guaranteed to exist from prior context.\n"
|
31
|
+
"\n"
|
32
|
+
"Output Format:\n"
|
33
|
+
"Return *only* a JSON object containing the Python code:\n"
|
34
|
+
'{\n "code": "<your_python_code_here>"\n}\n\n'
|
35
|
+
)
|
36
|
+
|
37
|
+
DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT = (
|
38
|
+
"The following Python functions are available in the global scope for you to use directly in your code.\n"
|
39
|
+
"You do not need to define these functions; simply call them as needed.\n"
|
40
|
+
"Use these functions only when they directly help in solving the current task. You are not obligated to use them.\n"
|
41
|
+
)
|
42
|
+
|
43
|
+
DEFAULT_FUNCTION_REFERENCE_SEPARATOR = "\n---\n" # Separator to distinguish different function references
|
44
|
+
|
45
|
+
|
46
|
+
# --- Helper Classes and Functions ---
|
47
|
+
|
17
48
|
|
18
49
|
class FunctionSignature(NamedTuple):
|
19
50
|
name: str
|
@@ -24,57 +55,63 @@ class FunctionSignature(NamedTuple):
|
|
24
55
|
def from_callable(cls, callables: Optional[Callable[..., object] | Iterable[Callable[..., object]]]) -> list[Self]:
|
25
56
|
if callables is None:
|
26
57
|
return []
|
27
|
-
|
58
|
+
# Correctly handle single callable case
|
59
|
+
if isinstance(callables, Callable) and not isinstance(callables, type): # Exclude classes if not intended
|
28
60
|
return [cls._from_callable(callables)]
|
29
|
-
|
61
|
+
# Handle iterables
|
62
|
+
if isinstance(callables, Iterable):
|
63
|
+
return [cls._from_callable(c) for c in callables]
|
64
|
+
# If it's neither a callable nor an iterable of callables, return empty
|
65
|
+
return []
|
30
66
|
|
31
67
|
@classmethod
|
32
|
-
def _from_callable(cls,
|
68
|
+
def _from_callable(cls, callable_obj: Callable[..., object]) -> Self:
|
33
69
|
"""
|
34
70
|
Get the name and signature of a function as a string.
|
35
71
|
"""
|
36
|
-
|
37
|
-
is_async_func = inspect.iscoroutinefunction(callable)
|
72
|
+
is_async_func = inspect.iscoroutinefunction(callable_obj)
|
38
73
|
function_def = "async def" if is_async_func else "def"
|
39
74
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
function_name =
|
44
|
-
elif hasattr(
|
45
|
-
|
46
|
-
function_name = callable.name # type: ignore
|
47
|
-
elif hasattr(callable, "__name__"):
|
48
|
-
# For other callables with a __name__ attribute
|
49
|
-
function_name = callable.__name__
|
75
|
+
if inspect.isfunction(callable_obj):
|
76
|
+
function_name = callable_obj.__code__.co_name
|
77
|
+
elif hasattr(callable_obj, "name") and isinstance(getattr(callable_obj, "name"), str):
|
78
|
+
function_name = getattr(callable_obj, "name")
|
79
|
+
elif hasattr(callable_obj, "__name__"):
|
80
|
+
function_name = callable_obj.__name__
|
50
81
|
else:
|
51
|
-
|
52
|
-
|
82
|
+
function_name = type(callable_obj).__name__
|
83
|
+
|
84
|
+
try:
|
85
|
+
signature_str = str(inspect.signature(callable_obj))
|
86
|
+
except ValueError: # Handles built-ins or others without inspectable signatures
|
87
|
+
signature_str = "(...)" # Placeholder signature
|
88
|
+
|
89
|
+
signature = f"{function_def} {function_name}{signature_str}:"
|
90
|
+
docstring = inspect.getdoc(callable_obj)
|
53
91
|
|
54
|
-
# Build the signature string
|
55
|
-
signature = f"{function_def} {function_name}{inspect.signature(callable)}:"
|
56
|
-
docstring = inspect.getdoc(callable)
|
57
92
|
if docstring:
|
58
93
|
docstring = f'"""{docstring.strip()}"""'
|
59
|
-
|
60
|
-
name=function_name, callable=callable, signature=f"{signature}\n{textwrap.indent(docstring, ' ')}"
|
61
|
-
)
|
94
|
+
full_signature = f"{signature}\n{textwrap.indent(docstring, ' ')}"
|
62
95
|
else:
|
63
|
-
|
96
|
+
full_signature = signature
|
97
|
+
|
98
|
+
return cls(name=function_name, callable=callable_obj, signature=full_signature)
|
64
99
|
|
65
100
|
@classmethod
|
66
101
|
def as_prompt(
|
67
102
|
cls,
|
68
103
|
function_signatures: Iterable[Self],
|
69
|
-
prefix: Optional[str] =
|
70
|
-
sep: str =
|
104
|
+
prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT, # Use constant
|
105
|
+
sep: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR, # Use constant
|
71
106
|
) -> str:
|
72
107
|
"""
|
73
|
-
Generate a prompt string from a list of
|
108
|
+
Generate a prompt string from a list of function signatures.
|
74
109
|
"""
|
110
|
+
if not function_signatures:
|
111
|
+
return ""
|
75
112
|
body: str = sep.join(fsig.signature for fsig in function_signatures)
|
76
113
|
if prefix:
|
77
|
-
return f"{prefix}{body}"
|
114
|
+
return f"{prefix}\n{body}" # Add newline for clarity
|
78
115
|
return body
|
79
116
|
|
80
117
|
|
@@ -92,13 +129,14 @@ class CodeExecutionResult(NamedTuple):
|
|
92
129
|
**kwargs: object,
|
93
130
|
) -> Self:
|
94
131
|
"""
|
95
|
-
Execute code using the Python
|
132
|
+
Execute code using the Python REPL tool.
|
96
133
|
"""
|
97
134
|
if repl_tool is None:
|
98
135
|
repl_tool = get_default_repl_tool()
|
99
136
|
if function_signatures:
|
100
137
|
insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
|
101
|
-
|
138
|
+
# Ensure kwargs are passed correctly if needed by invoke
|
139
|
+
output = str(repl_tool.invoke(code, config=config)) # pyright: ignore[reportUnknownMemberType]
|
102
140
|
return cls(code=code, output=output)
|
103
141
|
|
104
142
|
@classmethod
|
@@ -111,28 +149,89 @@ class CodeExecutionResult(NamedTuple):
|
|
111
149
|
**kwargs: object,
|
112
150
|
) -> Self:
|
113
151
|
"""
|
114
|
-
Execute code using the Python
|
152
|
+
Execute code using the Python REPL tool asynchronously.
|
115
153
|
"""
|
116
154
|
if repl_tool is None:
|
117
155
|
repl_tool = get_default_repl_tool()
|
118
156
|
if function_signatures:
|
119
157
|
insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
|
120
|
-
|
158
|
+
# Ensure kwargs are passed correctly if needed by ainvoke
|
159
|
+
output = str(await repl_tool.ainvoke(code, config=config)) # pyright: ignore[reportUnknownMemberType]if not needed by base ainvoke
|
121
160
|
return cls(code=code, output=output)
|
122
161
|
|
123
162
|
|
124
163
|
def get_default_repl_tool() -> "PythonAstREPLTool":
|
125
|
-
|
164
|
+
"""Initializes and returns a default PythonAstREPLTool instance."""
|
165
|
+
try:
|
166
|
+
from langchain_experimental.tools import PythonAstREPLTool
|
126
167
|
|
127
|
-
|
168
|
+
# You might want to configure specific globals/locals here if needed
|
169
|
+
return PythonAstREPLTool()
|
170
|
+
except ImportError:
|
171
|
+
raise ImportError(
|
172
|
+
"PythonAstREPLTool requires langchain_experimental. Install with: pip install langchain-experimental"
|
173
|
+
)
|
128
174
|
|
129
175
|
|
130
176
|
def insert_callables_into_global(
|
131
177
|
function_signatures: Iterable[FunctionSignature], repl_tool: "PythonAstREPLTool"
|
132
178
|
) -> None:
|
133
179
|
"""Insert callables into the REPL tool's globals."""
|
134
|
-
|
135
|
-
|
136
|
-
|
180
|
+
# Accessing globals might depend on the specific REPL tool implementation.
|
181
|
+
# This assumes a .globals attribute exists and is a dict.
|
182
|
+
if not hasattr(repl_tool, "globals") or not isinstance(repl_tool.globals, dict): # pyright: ignore[reportUnknownMemberType]
|
183
|
+
# Handle cases where .globals is not available or not a dict
|
184
|
+
# Maybe initialize it or log a warning/error
|
185
|
+
repl_tool.globals = {} # Or handle appropriately
|
186
|
+
|
187
|
+
# Safely update globals
|
188
|
+
current_globals: dict[object, object] = repl_tool.globals # pyright: ignore[reportUnknownMemberType]
|
189
|
+
for fsig in function_signatures:
|
190
|
+
current_globals[fsig.name] = fsig.callable
|
191
|
+
# No need to reassign if globals is mutable (dict)
|
192
|
+
# repl_tool.globals = current_globals
|
193
|
+
|
194
|
+
|
195
|
+
def _add_message_first(messages: LanguageModelInput, prompt_to_add: str) -> LanguageModelInput:
|
196
|
+
"""Prepends a SystemMessage to the beginning of the message list/string."""
|
197
|
+
if not prompt_to_add: # Don't add empty prompts
|
198
|
+
return messages
|
199
|
+
|
200
|
+
if isinstance(messages, str):
|
201
|
+
# Prepend with a newline for separation
|
202
|
+
return f"{prompt_to_add}\n\n{messages}"
|
203
|
+
elif isinstance(messages, Sequence):
|
204
|
+
# Create a mutable copy if it's a tuple
|
205
|
+
msg_list = list(messages)
|
206
|
+
msg_list.insert(0, SystemMessage(content=prompt_to_add))
|
207
|
+
return msg_list
|
208
|
+
# Handle LangChain Core BaseMessagePromptTemplate or similar if needed
|
209
|
+
# elif hasattr(messages, 'to_messages'):
|
210
|
+
# msg_list = messages.to_messages()
|
211
|
+
# msg_list.insert(0, SystemMessage(content=prompt_to_add))
|
212
|
+
# return msg_list # Or return a new prompt template if required
|
137
213
|
else:
|
138
|
-
|
214
|
+
# Fallback or raise error for unsupported types
|
215
|
+
raise TypeError(f"Unsupported message input type: {type(messages)}")
|
216
|
+
|
217
|
+
|
218
|
+
def augment_prompt_for_toolcall(
|
219
|
+
function_signatures: Iterable[FunctionSignature],
|
220
|
+
messages: LanguageModelInput,
|
221
|
+
prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
|
222
|
+
function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
|
223
|
+
function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
|
224
|
+
) -> LanguageModelInput:
|
225
|
+
"""Adds function references and code invocation prompts to the messages."""
|
226
|
+
# Add function references first (if any)
|
227
|
+
func_prompt = FunctionSignature.as_prompt(
|
228
|
+
function_signatures, function_reference_prefix, function_reference_seperator
|
229
|
+
)
|
230
|
+
if func_prompt:
|
231
|
+
messages = _add_message_first(messages=messages, prompt_to_add=func_prompt)
|
232
|
+
|
233
|
+
# Then add the main code invocation prompt (if provided)
|
234
|
+
if prompt_for_code_invoke:
|
235
|
+
messages = _add_message_first(messages=messages, prompt_to_add=prompt_for_code_invoke)
|
236
|
+
|
237
|
+
return messages
|
@@ -0,0 +1,148 @@
|
|
1
|
+
"""
|
2
|
+
Recognize image file formats based on their first few bytes (base64-encoded).
|
3
|
+
Originally derived from Python's imghdr, modified for base64 inputs.
|
4
|
+
"""
|
5
|
+
|
6
|
+
import base64
|
7
|
+
import math
|
8
|
+
from typing import Callable, List, Literal, Optional
|
9
|
+
|
10
|
+
ImageType = Literal["jpeg", "png", "gif", "tiff", "rgb", "pbm", "pgm", "ppm", "rast", "xbm", "bmp", "webp", "exr"]
|
11
|
+
|
12
|
+
tests: List[Callable[[bytes], Optional[ImageType]]] = []
|
13
|
+
|
14
|
+
|
15
|
+
def register_test(func: Callable[[bytes], Optional[ImageType]]) -> Callable[[bytes], Optional[ImageType]]:
|
16
|
+
tests.append(func)
|
17
|
+
return func
|
18
|
+
|
19
|
+
|
20
|
+
def decode_prefix(b64_data: str, prefix_bytes: int = 32) -> bytes:
|
21
|
+
needed_chars = math.ceil(prefix_bytes * 4 / 3)
|
22
|
+
truncated_data = b64_data[:needed_chars]
|
23
|
+
|
24
|
+
try:
|
25
|
+
return base64.b64decode(truncated_data)
|
26
|
+
except Exception:
|
27
|
+
return base64.b64decode(b64_data)
|
28
|
+
|
29
|
+
|
30
|
+
def what(b64_data: str) -> Optional[ImageType]:
|
31
|
+
"""
|
32
|
+
base64 인코딩된 문자열에 포함된 이미지의 타입을 반환한다.
|
33
|
+
|
34
|
+
:param b64_data: 이미지 데이터를 담은 base64 문자열.
|
35
|
+
:return: 이미지 포맷 문자열 (예: "jpeg", "png", "gif", 등) 또는 인식되지 않으면 None.
|
36
|
+
"""
|
37
|
+
h: bytes = decode_prefix(b64_data, prefix_bytes=32)
|
38
|
+
|
39
|
+
for tf in tests:
|
40
|
+
res = tf(h)
|
41
|
+
if res:
|
42
|
+
return res
|
43
|
+
return None
|
44
|
+
|
45
|
+
|
46
|
+
# --- 테스트 함수들 --- #
|
47
|
+
|
48
|
+
|
49
|
+
@register_test
|
50
|
+
def test_jpeg(h: bytes) -> Optional[ImageType]:
|
51
|
+
if len(h) >= 10 and h[6:10] in (b"JFIF", b"Exif"):
|
52
|
+
return "jpeg"
|
53
|
+
elif h.startswith(b"\xff\xd8\xff\xdb"):
|
54
|
+
return "jpeg"
|
55
|
+
return None
|
56
|
+
|
57
|
+
|
58
|
+
@register_test
|
59
|
+
def test_png(h: bytes) -> Optional[ImageType]:
|
60
|
+
if h.startswith(b"\x89PNG\r\n\x1a\n"):
|
61
|
+
return "png"
|
62
|
+
return None
|
63
|
+
|
64
|
+
|
65
|
+
@register_test
|
66
|
+
def test_gif(h: bytes) -> Optional[ImageType]:
|
67
|
+
if h.startswith(b"GIF87a") or h.startswith(b"GIF89a"):
|
68
|
+
return "gif"
|
69
|
+
return None
|
70
|
+
|
71
|
+
|
72
|
+
@register_test
|
73
|
+
def test_tiff(h: bytes) -> Optional[ImageType]:
|
74
|
+
if h[:2] in (b"MM", b"II"):
|
75
|
+
return "tiff"
|
76
|
+
return None
|
77
|
+
|
78
|
+
|
79
|
+
@register_test
|
80
|
+
def test_rgb(h: bytes) -> Optional[ImageType]:
|
81
|
+
if h.startswith(b"\x01\xda"):
|
82
|
+
return "rgb"
|
83
|
+
return None
|
84
|
+
|
85
|
+
|
86
|
+
@register_test
|
87
|
+
def test_pbm(h: bytes) -> Optional[ImageType]:
|
88
|
+
if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"14" and h[2] in b" \t\n\r":
|
89
|
+
return "pbm"
|
90
|
+
return None
|
91
|
+
|
92
|
+
|
93
|
+
@register_test
|
94
|
+
def test_pgm(h: bytes) -> Optional[ImageType]:
|
95
|
+
if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"25" and h[2] in b" \t\n\r":
|
96
|
+
return "pgm"
|
97
|
+
return None
|
98
|
+
|
99
|
+
|
100
|
+
@register_test
|
101
|
+
def test_ppm(h: bytes) -> Optional[ImageType]:
|
102
|
+
if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"36" and h[2] in b" \t\n\r":
|
103
|
+
return "ppm"
|
104
|
+
return None
|
105
|
+
|
106
|
+
|
107
|
+
@register_test
|
108
|
+
def test_rast(h: bytes) -> Optional[ImageType]:
|
109
|
+
if h.startswith(b"\x59\xa6\x6a\x95"):
|
110
|
+
return "rast"
|
111
|
+
return None
|
112
|
+
|
113
|
+
|
114
|
+
@register_test
|
115
|
+
def test_xbm(h: bytes) -> Optional[ImageType]:
|
116
|
+
if h.startswith(b"#define "):
|
117
|
+
return "xbm"
|
118
|
+
return None
|
119
|
+
|
120
|
+
|
121
|
+
@register_test
|
122
|
+
def test_bmp(h: bytes) -> Optional[ImageType]:
|
123
|
+
if h.startswith(b"BM"):
|
124
|
+
return "bmp"
|
125
|
+
return None
|
126
|
+
|
127
|
+
|
128
|
+
@register_test
|
129
|
+
def test_webp(h: bytes) -> Optional[ImageType]:
|
130
|
+
if len(h) >= 12 and h.startswith(b"RIFF") and h[8:12] == b"WEBP":
|
131
|
+
return "webp"
|
132
|
+
return None
|
133
|
+
|
134
|
+
|
135
|
+
@register_test
|
136
|
+
def test_exr(h: bytes) -> Optional[ImageType]:
|
137
|
+
if h.startswith(b"\x76\x2f\x31\x01"):
|
138
|
+
return "exr"
|
139
|
+
return None
|
140
|
+
|
141
|
+
|
142
|
+
if __name__ == "__main__":
|
143
|
+
example_png_base64 = (
|
144
|
+
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/5+BAQAE/wH+U6az4wAAAABJRU5ErkJggg=="
|
145
|
+
)
|
146
|
+
|
147
|
+
fmt = what(example_png_base64)
|
148
|
+
print(f"Detected format: {fmt}") # Expected: png
|