chatterer 0.1.25__tar.gz → 0.1.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chatterer-0.1.25 → chatterer-0.1.26}/PKG-INFO +1 -1
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/__init__.py +0 -10
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/language_model.py +3 -3
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/utils/base64_image.py +75 -18
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/utils/imghdr.py +5 -8
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer.egg-info/PKG-INFO +1 -1
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer.egg-info/SOURCES.txt +0 -3
- {chatterer-0.1.25 → chatterer-0.1.26}/pyproject.toml +4 -1
- chatterer-0.1.25/chatterer/strategies/__init__.py +0 -13
- chatterer-0.1.25/chatterer/strategies/atom_of_thoughts.py +0 -975
- chatterer-0.1.25/chatterer/strategies/base.py +0 -14
- {chatterer-0.1.25 → chatterer-0.1.26}/README.md +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/common_types/__init__.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/common_types/io.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/__init__.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/__main__.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/any2md.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/pdf2md.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/pdf2txt.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/ppt.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/pw.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/snippet.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/transcribe.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/upstage.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/web2md.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/interactive.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/messages.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/py.typed +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/__init__.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/caption_markdown_images.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/citation_chunking/__init__.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/citation_chunking/chunks.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/citation_chunking/citation_chunker.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/citation_chunking/citations.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/citation_chunking/prompt.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/citation_chunking/reference.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/citation_chunking/utils.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/convert_pdf_to_markdown.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/convert_to_text.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/upstage_document_parser.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/webpage_to_markdown.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/youtube.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/utils/__init__.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/utils/bytesio.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/utils/code_agent.py +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer.egg-info/dependency_links.txt +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer.egg-info/entry_points.txt +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer.egg-info/requires.txt +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/chatterer.egg-info/top_level.txt +0 -0
- {chatterer-0.1.25 → chatterer-0.1.26}/setup.cfg +0 -0
@@ -12,12 +12,6 @@ from .messages import (
|
|
12
12
|
SystemMessage,
|
13
13
|
UsageMetadata,
|
14
14
|
)
|
15
|
-
from .strategies import (
|
16
|
-
AoTPipeline,
|
17
|
-
AoTPrompter,
|
18
|
-
AoTStrategy,
|
19
|
-
BaseStrategy,
|
20
|
-
)
|
21
15
|
from .tools import (
|
22
16
|
CodeSnippets,
|
23
17
|
MarkdownLink,
|
@@ -53,11 +47,7 @@ from .utils import (
|
|
53
47
|
load_dotenv()
|
54
48
|
|
55
49
|
__all__ = [
|
56
|
-
"BaseStrategy",
|
57
50
|
"Chatterer",
|
58
|
-
"AoTStrategy",
|
59
|
-
"AoTPipeline",
|
60
|
-
"AoTPrompter",
|
61
51
|
"html_to_markdown",
|
62
52
|
"anything_to_markdown",
|
63
53
|
"pdf_to_text",
|
@@ -27,7 +27,7 @@ from .messages import AIMessage, BaseMessage, HumanMessage, UsageMetadata
|
|
27
27
|
from .utils.code_agent import CodeExecutionResult, FunctionSignature, augment_prompt_for_toolcall
|
28
28
|
|
29
29
|
if TYPE_CHECKING:
|
30
|
-
from instructor import Partial
|
30
|
+
from instructor import Partial # pyright: ignore[reportMissingTypeStubs]
|
31
31
|
from langchain_experimental.tools.python.tool import PythonAstREPLTool
|
32
32
|
|
33
33
|
PydanticModelT = TypeVar("PydanticModelT", bound=BaseModel)
|
@@ -339,7 +339,7 @@ class Chatterer(BaseModel):
|
|
339
339
|
**kwargs: Any,
|
340
340
|
) -> Iterator[PydanticModelT]:
|
341
341
|
try:
|
342
|
-
import instructor
|
342
|
+
import instructor # pyright: ignore[reportMissingTypeStubs]
|
343
343
|
except ImportError:
|
344
344
|
raise ImportError("Please install `instructor` with `pip install instructor` to use this feature.")
|
345
345
|
|
@@ -360,7 +360,7 @@ class Chatterer(BaseModel):
|
|
360
360
|
**kwargs: Any,
|
361
361
|
) -> AsyncIterator[PydanticModelT]:
|
362
362
|
try:
|
363
|
-
import instructor
|
363
|
+
import instructor # pyright: ignore[reportMissingTypeStubs]
|
364
364
|
except ImportError:
|
365
365
|
raise ImportError("Please install `instructor` with `pip install instructor` to use this feature.")
|
366
366
|
|
@@ -1,5 +1,3 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
1
|
import re
|
4
2
|
from base64 import b64encode
|
5
3
|
from io import BytesIO
|
@@ -18,7 +16,6 @@ from typing import (
|
|
18
16
|
TypeAlias,
|
19
17
|
TypedDict,
|
20
18
|
TypeGuard,
|
21
|
-
cast,
|
22
19
|
get_args,
|
23
20
|
)
|
24
21
|
from urllib.parse import urlparse
|
@@ -29,11 +26,16 @@ from PIL.Image import Resampling
|
|
29
26
|
from PIL.Image import open as image_open
|
30
27
|
from pydantic import BaseModel
|
31
28
|
|
29
|
+
from .imghdr import what
|
30
|
+
|
32
31
|
if TYPE_CHECKING:
|
33
32
|
from openai.types.chat.chat_completion_content_part_image_param import ChatCompletionContentPartImageParam
|
34
33
|
|
35
34
|
logger = getLogger(__name__)
|
36
|
-
|
35
|
+
ImageFormat: TypeAlias = Literal["jpeg", "png", "gif", "webp", "bmp"]
|
36
|
+
ExtendedImageFormat: TypeAlias = ImageFormat | Literal["jpg", "JPG"] | Literal["JPEG", "PNG", "GIF", "WEBP", "BMP"]
|
37
|
+
|
38
|
+
ALLOWED_IMAGE_FORMATS: tuple[ImageFormat, ...] = get_args(ImageFormat)
|
37
39
|
|
38
40
|
|
39
41
|
class ImageProcessingConfig(TypedDict):
|
@@ -46,7 +48,7 @@ class ImageProcessingConfig(TypedDict):
|
|
46
48
|
- resize_target_for_min_side: (int) 리스케일시, '가장 작은 변'을 이 값으로 줄임(비율 유지는 Lanczos).
|
47
49
|
"""
|
48
50
|
|
49
|
-
formats: Sequence[
|
51
|
+
formats: Sequence[ImageFormat]
|
50
52
|
max_size_mb: NotRequired[float]
|
51
53
|
min_largest_side: NotRequired[int]
|
52
54
|
resize_if_min_side_exceeds: NotRequired[int]
|
@@ -59,16 +61,15 @@ def get_default_image_processing_config() -> ImageProcessingConfig:
|
|
59
61
|
"min_largest_side": 200,
|
60
62
|
"resize_if_min_side_exceeds": 2000,
|
61
63
|
"resize_target_for_min_side": 1000,
|
62
|
-
"formats": ["png", "jpeg", "
|
64
|
+
"formats": ["png", "jpeg", "gif", "bmp", "webp"],
|
63
65
|
}
|
64
66
|
|
65
67
|
|
66
|
-
# image_url: str, headers: dict[str, str]) -> Optional[bytes]:
|
67
68
|
class Base64Image(BaseModel):
|
68
|
-
ext:
|
69
|
+
ext: ImageFormat
|
69
70
|
data: str
|
70
71
|
|
71
|
-
IMAGE_TYPES: ClassVar[tuple[str, ...]] =
|
72
|
+
IMAGE_TYPES: ClassVar[tuple[str, ...]] = ALLOWED_IMAGE_FORMATS
|
72
73
|
IMAGE_PATTERN: ClassVar[re.Pattern[str]] = re.compile(
|
73
74
|
r"data:image/(" + "|".join(IMAGE_TYPES) + r");base64,([A-Za-z0-9+/]+={0,2})"
|
74
75
|
)
|
@@ -76,20 +77,66 @@ class Base64Image(BaseModel):
|
|
76
77
|
def __hash__(self) -> int:
|
77
78
|
return hash((self.ext, self.data))
|
78
79
|
|
79
|
-
|
80
|
-
|
81
|
-
|
80
|
+
@classmethod
|
81
|
+
def new(
|
82
|
+
cls,
|
83
|
+
url_or_path_or_bytes: str | bytes,
|
84
|
+
*,
|
85
|
+
headers: dict[str, str] = {},
|
86
|
+
config: ImageProcessingConfig = get_default_image_processing_config(),
|
87
|
+
img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], bytes]] = None,
|
88
|
+
) -> Self:
|
89
|
+
if isinstance(url_or_path_or_bytes, bytes):
|
90
|
+
ext = what(url_or_path_or_bytes)
|
91
|
+
if ext is None:
|
92
|
+
raise ValueError(f"Invalid image format: {url_or_path_or_bytes[:8]} ...")
|
93
|
+
if not cls._verify_ext(ext, config["formats"]):
|
94
|
+
raise ValueError(f"Invalid image format: {ext} not in {config['formats']}")
|
95
|
+
return cls.from_bytes(url_or_path_or_bytes, ext=ext)
|
96
|
+
elif maybe_base64 := cls.from_string(url_or_path_or_bytes):
|
97
|
+
return maybe_base64
|
98
|
+
elif maybe_url_or_path := cls.from_url_or_path(
|
99
|
+
url_or_path_or_bytes, headers=headers, config=config, img_bytes_fetcher=img_bytes_fetcher
|
100
|
+
):
|
101
|
+
return maybe_url_or_path
|
102
|
+
else:
|
103
|
+
raise ValueError(f"Invalid image format: {url_or_path_or_bytes}")
|
104
|
+
|
105
|
+
@classmethod
|
106
|
+
async def anew(
|
107
|
+
cls,
|
108
|
+
url_or_path_or_bytes: str | bytes,
|
109
|
+
*,
|
110
|
+
headers: dict[str, str] = {},
|
111
|
+
config: ImageProcessingConfig = get_default_image_processing_config(),
|
112
|
+
img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], Awaitable[bytes]]] = None,
|
113
|
+
) -> Self:
|
114
|
+
if isinstance(url_or_path_or_bytes, bytes):
|
115
|
+
ext = what(url_or_path_or_bytes)
|
116
|
+
if ext is None:
|
117
|
+
raise ValueError(f"Invalid image format: {url_or_path_or_bytes[:8]} ...")
|
118
|
+
if not cls._verify_ext(ext, config["formats"]):
|
119
|
+
raise ValueError(f"Invalid image format: {ext} not in {config['formats']}")
|
120
|
+
return cls.from_bytes(url_or_path_or_bytes, ext=ext)
|
121
|
+
elif maybe_base64 := cls.from_string(url_or_path_or_bytes):
|
122
|
+
return maybe_base64
|
123
|
+
elif maybe_url_or_path := await cls.afrom_url_or_path(
|
124
|
+
url_or_path_or_bytes, headers=headers, config=config, img_bytes_fetcher=img_bytes_fetcher
|
125
|
+
):
|
126
|
+
return maybe_url_or_path
|
127
|
+
else:
|
128
|
+
raise ValueError(f"Invalid image format: {url_or_path_or_bytes}")
|
82
129
|
|
83
130
|
@classmethod
|
84
131
|
def from_string(cls, data: str) -> Optional[Self]:
|
85
132
|
match = cls.IMAGE_PATTERN.fullmatch(data)
|
86
133
|
if not match:
|
87
134
|
return None
|
88
|
-
return cls(ext=
|
135
|
+
return cls(ext=_to_image_format(match.group(1)), data=match.group(2))
|
89
136
|
|
90
137
|
@classmethod
|
91
|
-
def from_bytes(cls, data: bytes, ext:
|
92
|
-
return cls(ext=ext, data=b64encode(data).decode("utf-8"))
|
138
|
+
def from_bytes(cls, data: bytes, ext: ExtendedImageFormat) -> Self:
|
139
|
+
return cls(ext=_to_image_format(ext), data=b64encode(data).decode("utf-8"))
|
93
140
|
|
94
141
|
@classmethod
|
95
142
|
def from_url_or_path(
|
@@ -154,7 +201,7 @@ class Base64Image(BaseModel):
|
|
154
201
|
return {"type": "image_url", "image_url": {"url": self.data_uri}}
|
155
202
|
|
156
203
|
@staticmethod
|
157
|
-
def _verify_ext(ext: str, allowed_types: Sequence[
|
204
|
+
def _verify_ext(ext: str, allowed_types: Sequence[ImageFormat]) -> TypeGuard[ImageFormat]:
|
158
205
|
return ext in allowed_types
|
159
206
|
|
160
207
|
@classmethod
|
@@ -226,7 +273,7 @@ class Base64Image(BaseModel):
|
|
226
273
|
# 포맷 제한
|
227
274
|
# PIL이 인식한 포맷이 대문자(JPEG)일 수 있으므로 소문자로
|
228
275
|
pil_format: str = (im.format or "").lower()
|
229
|
-
allowed_formats: Sequence[
|
276
|
+
allowed_formats: Sequence[ImageFormat] = config.get("formats", [])
|
230
277
|
if not cls._verify_ext(pil_format, allowed_formats):
|
231
278
|
logger.error(f"Invalid format: {pil_format} not in {allowed_formats}")
|
232
279
|
return None
|
@@ -265,12 +312,22 @@ class Base64Image(BaseModel):
|
|
265
312
|
return cls(ext=ext, data=b64encode(path.read_bytes()).decode("ascii"))
|
266
313
|
|
267
314
|
|
315
|
+
def _to_image_format(ext: str) -> ImageFormat:
|
316
|
+
lowered = ext.lower()
|
317
|
+
if lowered in ALLOWED_IMAGE_FORMATS:
|
318
|
+
return lowered
|
319
|
+
elif lowered == "jpg":
|
320
|
+
return "jpeg" # jpg -> jpeg
|
321
|
+
else:
|
322
|
+
raise ValueError(f"Invalid image format: {ext}")
|
323
|
+
|
324
|
+
|
268
325
|
def is_remote_url(path: str) -> bool:
|
269
326
|
parsed = urlparse(path)
|
270
327
|
return bool(parsed.scheme and parsed.netloc)
|
271
328
|
|
272
329
|
|
273
|
-
def detect_image_type(image_data: bytes) -> Optional[
|
330
|
+
def detect_image_type(image_data: bytes) -> Optional[ImageFormat]:
|
274
331
|
"""
|
275
332
|
Detect the image format based on the image binary signature (header).
|
276
333
|
Only JPEG, PNG, GIF, WEBP, and BMP are handled as examples.
|
@@ -27,14 +27,11 @@ def decode_prefix(b64_data: str, prefix_bytes: int = 32) -> bytes:
|
|
27
27
|
return base64.b64decode(b64_data)
|
28
28
|
|
29
29
|
|
30
|
-
def what(
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
:return: 이미지 포맷 문자열 (예: "jpeg", "png", "gif", 등) 또는 인식되지 않으면 None.
|
36
|
-
"""
|
37
|
-
h: bytes = decode_prefix(b64_data, prefix_bytes=32)
|
30
|
+
def what(b64_or_bytes: str | bytes, prefix_bytes: int = 32) -> Optional[ImageType]:
|
31
|
+
if isinstance(b64_or_bytes, str):
|
32
|
+
h: bytes = decode_prefix(b64_or_bytes, prefix_bytes=prefix_bytes)
|
33
|
+
else:
|
34
|
+
h = b64_or_bytes
|
38
35
|
|
39
36
|
for tf in tests:
|
40
37
|
res = tf(h)
|
@@ -24,9 +24,6 @@ chatterer/examples/snippet.py
|
|
24
24
|
chatterer/examples/transcribe.py
|
25
25
|
chatterer/examples/upstage.py
|
26
26
|
chatterer/examples/web2md.py
|
27
|
-
chatterer/strategies/__init__.py
|
28
|
-
chatterer/strategies/atom_of_thoughts.py
|
29
|
-
chatterer/strategies/base.py
|
30
27
|
chatterer/tools/__init__.py
|
31
28
|
chatterer/tools/caption_markdown_images.py
|
32
29
|
chatterer/tools/convert_pdf_to_markdown.py
|
@@ -11,7 +11,7 @@ dependencies = [
|
|
11
11
|
"dotenv>=0.9.9",
|
12
12
|
]
|
13
13
|
name = "chatterer"
|
14
|
-
version = "0.1.
|
14
|
+
version = "0.1.26"
|
15
15
|
description = "The highest-level interface for various LLM APIs."
|
16
16
|
readme = "README.md"
|
17
17
|
requires-python = ">=3.12"
|
@@ -20,6 +20,9 @@ requires-python = ">=3.12"
|
|
20
20
|
where = ["."]
|
21
21
|
include = ["chatterer", "chatterer.*"]
|
22
22
|
|
23
|
+
[tool.pyright]
|
24
|
+
typeCheckingMode = "strict"
|
25
|
+
|
23
26
|
[project.optional-dependencies]
|
24
27
|
dev = ["pyright>=1.1.401"]
|
25
28
|
|