chatterer 0.1.25__tar.gz → 0.1.26__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {chatterer-0.1.25 → chatterer-0.1.26}/PKG-INFO +1 -1
  2. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/__init__.py +0 -10
  3. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/language_model.py +3 -3
  4. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/utils/base64_image.py +75 -18
  5. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/utils/imghdr.py +5 -8
  6. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer.egg-info/PKG-INFO +1 -1
  7. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer.egg-info/SOURCES.txt +0 -3
  8. {chatterer-0.1.25 → chatterer-0.1.26}/pyproject.toml +4 -1
  9. chatterer-0.1.25/chatterer/strategies/__init__.py +0 -13
  10. chatterer-0.1.25/chatterer/strategies/atom_of_thoughts.py +0 -975
  11. chatterer-0.1.25/chatterer/strategies/base.py +0 -14
  12. {chatterer-0.1.25 → chatterer-0.1.26}/README.md +0 -0
  13. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/common_types/__init__.py +0 -0
  14. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/common_types/io.py +0 -0
  15. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/__init__.py +0 -0
  16. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/__main__.py +0 -0
  17. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/any2md.py +0 -0
  18. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/pdf2md.py +0 -0
  19. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/pdf2txt.py +0 -0
  20. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/ppt.py +0 -0
  21. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/pw.py +0 -0
  22. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/snippet.py +0 -0
  23. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/transcribe.py +0 -0
  24. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/upstage.py +0 -0
  25. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/examples/web2md.py +0 -0
  26. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/interactive.py +0 -0
  27. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/messages.py +0 -0
  28. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/py.typed +0 -0
  29. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/__init__.py +0 -0
  30. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/caption_markdown_images.py +0 -0
  31. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/citation_chunking/__init__.py +0 -0
  32. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/citation_chunking/chunks.py +0 -0
  33. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/citation_chunking/citation_chunker.py +0 -0
  34. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/citation_chunking/citations.py +0 -0
  35. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/citation_chunking/prompt.py +0 -0
  36. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/citation_chunking/reference.py +0 -0
  37. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/citation_chunking/utils.py +0 -0
  38. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/convert_pdf_to_markdown.py +0 -0
  39. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/convert_to_text.py +0 -0
  40. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/upstage_document_parser.py +0 -0
  41. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/webpage_to_markdown.py +0 -0
  42. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/tools/youtube.py +0 -0
  43. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/utils/__init__.py +0 -0
  44. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/utils/bytesio.py +0 -0
  45. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer/utils/code_agent.py +0 -0
  46. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer.egg-info/dependency_links.txt +0 -0
  47. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer.egg-info/entry_points.txt +0 -0
  48. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer.egg-info/requires.txt +0 -0
  49. {chatterer-0.1.25 → chatterer-0.1.26}/chatterer.egg-info/top_level.txt +0 -0
  50. {chatterer-0.1.25 → chatterer-0.1.26}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatterer
3
- Version: 0.1.25
3
+ Version: 0.1.26
4
4
  Summary: The highest-level interface for various LLM APIs.
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
@@ -12,12 +12,6 @@ from .messages import (
12
12
  SystemMessage,
13
13
  UsageMetadata,
14
14
  )
15
- from .strategies import (
16
- AoTPipeline,
17
- AoTPrompter,
18
- AoTStrategy,
19
- BaseStrategy,
20
- )
21
15
  from .tools import (
22
16
  CodeSnippets,
23
17
  MarkdownLink,
@@ -53,11 +47,7 @@ from .utils import (
53
47
  load_dotenv()
54
48
 
55
49
  __all__ = [
56
- "BaseStrategy",
57
50
  "Chatterer",
58
- "AoTStrategy",
59
- "AoTPipeline",
60
- "AoTPrompter",
61
51
  "html_to_markdown",
62
52
  "anything_to_markdown",
63
53
  "pdf_to_text",
@@ -27,7 +27,7 @@ from .messages import AIMessage, BaseMessage, HumanMessage, UsageMetadata
27
27
  from .utils.code_agent import CodeExecutionResult, FunctionSignature, augment_prompt_for_toolcall
28
28
 
29
29
  if TYPE_CHECKING:
30
- from instructor import Partial
30
+ from instructor import Partial # pyright: ignore[reportMissingTypeStubs]
31
31
  from langchain_experimental.tools.python.tool import PythonAstREPLTool
32
32
 
33
33
  PydanticModelT = TypeVar("PydanticModelT", bound=BaseModel)
@@ -339,7 +339,7 @@ class Chatterer(BaseModel):
339
339
  **kwargs: Any,
340
340
  ) -> Iterator[PydanticModelT]:
341
341
  try:
342
- import instructor
342
+ import instructor # pyright: ignore[reportMissingTypeStubs]
343
343
  except ImportError:
344
344
  raise ImportError("Please install `instructor` with `pip install instructor` to use this feature.")
345
345
 
@@ -360,7 +360,7 @@ class Chatterer(BaseModel):
360
360
  **kwargs: Any,
361
361
  ) -> AsyncIterator[PydanticModelT]:
362
362
  try:
363
- import instructor
363
+ import instructor # pyright: ignore[reportMissingTypeStubs]
364
364
  except ImportError:
365
365
  raise ImportError("Please install `instructor` with `pip install instructor` to use this feature.")
366
366
 
@@ -1,5 +1,3 @@
1
- from __future__ import annotations
2
-
3
1
  import re
4
2
  from base64 import b64encode
5
3
  from io import BytesIO
@@ -18,7 +16,6 @@ from typing import (
18
16
  TypeAlias,
19
17
  TypedDict,
20
18
  TypeGuard,
21
- cast,
22
19
  get_args,
23
20
  )
24
21
  from urllib.parse import urlparse
@@ -29,11 +26,16 @@ from PIL.Image import Resampling
29
26
  from PIL.Image import open as image_open
30
27
  from pydantic import BaseModel
31
28
 
29
+ from .imghdr import what
30
+
32
31
  if TYPE_CHECKING:
33
32
  from openai.types.chat.chat_completion_content_part_image_param import ChatCompletionContentPartImageParam
34
33
 
35
34
  logger = getLogger(__name__)
36
- ImageType: TypeAlias = Literal["jpeg", "jpg", "png", "gif", "webp", "bmp"]
35
+ ImageFormat: TypeAlias = Literal["jpeg", "png", "gif", "webp", "bmp"]
36
+ ExtendedImageFormat: TypeAlias = ImageFormat | Literal["jpg", "JPG"] | Literal["JPEG", "PNG", "GIF", "WEBP", "BMP"]
37
+
38
+ ALLOWED_IMAGE_FORMATS: tuple[ImageFormat, ...] = get_args(ImageFormat)
37
39
 
38
40
 
39
41
  class ImageProcessingConfig(TypedDict):
@@ -46,7 +48,7 @@ class ImageProcessingConfig(TypedDict):
46
48
  - resize_target_for_min_side: (int) 리스케일시, '가장 작은 변'을 이 값으로 줄임(비율 유지는 Lanczos).
47
49
  """
48
50
 
49
- formats: Sequence[ImageType]
51
+ formats: Sequence[ImageFormat]
50
52
  max_size_mb: NotRequired[float]
51
53
  min_largest_side: NotRequired[int]
52
54
  resize_if_min_side_exceeds: NotRequired[int]
@@ -59,16 +61,15 @@ def get_default_image_processing_config() -> ImageProcessingConfig:
59
61
  "min_largest_side": 200,
60
62
  "resize_if_min_side_exceeds": 2000,
61
63
  "resize_target_for_min_side": 1000,
62
- "formats": ["png", "jpeg", "jpg", "gif", "bmp", "webp"],
64
+ "formats": ["png", "jpeg", "gif", "bmp", "webp"],
63
65
  }
64
66
 
65
67
 
66
- # image_url: str, headers: dict[str, str]) -> Optional[bytes]:
67
68
  class Base64Image(BaseModel):
68
- ext: ImageType
69
+ ext: ImageFormat
69
70
  data: str
70
71
 
71
- IMAGE_TYPES: ClassVar[tuple[str, ...]] = tuple(map(str, get_args(ImageType)))
72
+ IMAGE_TYPES: ClassVar[tuple[str, ...]] = ALLOWED_IMAGE_FORMATS
72
73
  IMAGE_PATTERN: ClassVar[re.Pattern[str]] = re.compile(
73
74
  r"data:image/(" + "|".join(IMAGE_TYPES) + r");base64,([A-Za-z0-9+/]+={0,2})"
74
75
  )
@@ -76,20 +77,66 @@ class Base64Image(BaseModel):
76
77
  def __hash__(self) -> int:
77
78
  return hash((self.ext, self.data))
78
79
 
79
- def model_post_init(self, __context: object) -> None:
80
- if self.ext == "jpg":
81
- self.ext = "jpeg"
80
+ @classmethod
81
+ def new(
82
+ cls,
83
+ url_or_path_or_bytes: str | bytes,
84
+ *,
85
+ headers: dict[str, str] = {},
86
+ config: ImageProcessingConfig = get_default_image_processing_config(),
87
+ img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], bytes]] = None,
88
+ ) -> Self:
89
+ if isinstance(url_or_path_or_bytes, bytes):
90
+ ext = what(url_or_path_or_bytes)
91
+ if ext is None:
92
+ raise ValueError(f"Invalid image format: {url_or_path_or_bytes[:8]} ...")
93
+ if not cls._verify_ext(ext, config["formats"]):
94
+ raise ValueError(f"Invalid image format: {ext} not in {config['formats']}")
95
+ return cls.from_bytes(url_or_path_or_bytes, ext=ext)
96
+ elif maybe_base64 := cls.from_string(url_or_path_or_bytes):
97
+ return maybe_base64
98
+ elif maybe_url_or_path := cls.from_url_or_path(
99
+ url_or_path_or_bytes, headers=headers, config=config, img_bytes_fetcher=img_bytes_fetcher
100
+ ):
101
+ return maybe_url_or_path
102
+ else:
103
+ raise ValueError(f"Invalid image format: {url_or_path_or_bytes}")
104
+
105
+ @classmethod
106
+ async def anew(
107
+ cls,
108
+ url_or_path_or_bytes: str | bytes,
109
+ *,
110
+ headers: dict[str, str] = {},
111
+ config: ImageProcessingConfig = get_default_image_processing_config(),
112
+ img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], Awaitable[bytes]]] = None,
113
+ ) -> Self:
114
+ if isinstance(url_or_path_or_bytes, bytes):
115
+ ext = what(url_or_path_or_bytes)
116
+ if ext is None:
117
+ raise ValueError(f"Invalid image format: {url_or_path_or_bytes[:8]} ...")
118
+ if not cls._verify_ext(ext, config["formats"]):
119
+ raise ValueError(f"Invalid image format: {ext} not in {config['formats']}")
120
+ return cls.from_bytes(url_or_path_or_bytes, ext=ext)
121
+ elif maybe_base64 := cls.from_string(url_or_path_or_bytes):
122
+ return maybe_base64
123
+ elif maybe_url_or_path := await cls.afrom_url_or_path(
124
+ url_or_path_or_bytes, headers=headers, config=config, img_bytes_fetcher=img_bytes_fetcher
125
+ ):
126
+ return maybe_url_or_path
127
+ else:
128
+ raise ValueError(f"Invalid image format: {url_or_path_or_bytes}")
82
129
 
83
130
  @classmethod
84
131
  def from_string(cls, data: str) -> Optional[Self]:
85
132
  match = cls.IMAGE_PATTERN.fullmatch(data)
86
133
  if not match:
87
134
  return None
88
- return cls(ext=cast(ImageType, match.group(1)), data=match.group(2))
135
+ return cls(ext=_to_image_format(match.group(1)), data=match.group(2))
89
136
 
90
137
  @classmethod
91
- def from_bytes(cls, data: bytes, ext: ImageType) -> Self:
92
- return cls(ext=ext, data=b64encode(data).decode("utf-8"))
138
+ def from_bytes(cls, data: bytes, ext: ExtendedImageFormat) -> Self:
139
+ return cls(ext=_to_image_format(ext), data=b64encode(data).decode("utf-8"))
93
140
 
94
141
  @classmethod
95
142
  def from_url_or_path(
@@ -154,7 +201,7 @@ class Base64Image(BaseModel):
154
201
  return {"type": "image_url", "image_url": {"url": self.data_uri}}
155
202
 
156
203
  @staticmethod
157
- def _verify_ext(ext: str, allowed_types: Sequence[ImageType]) -> TypeGuard[ImageType]:
204
+ def _verify_ext(ext: str, allowed_types: Sequence[ImageFormat]) -> TypeGuard[ImageFormat]:
158
205
  return ext in allowed_types
159
206
 
160
207
  @classmethod
@@ -226,7 +273,7 @@ class Base64Image(BaseModel):
226
273
  # 포맷 제한
227
274
  # PIL이 인식한 포맷이 대문자(JPEG)일 수 있으므로 소문자로
228
275
  pil_format: str = (im.format or "").lower()
229
- allowed_formats: Sequence[ImageType] = config.get("formats", [])
276
+ allowed_formats: Sequence[ImageFormat] = config.get("formats", [])
230
277
  if not cls._verify_ext(pil_format, allowed_formats):
231
278
  logger.error(f"Invalid format: {pil_format} not in {allowed_formats}")
232
279
  return None
@@ -265,12 +312,22 @@ class Base64Image(BaseModel):
265
312
  return cls(ext=ext, data=b64encode(path.read_bytes()).decode("ascii"))
266
313
 
267
314
 
315
+ def _to_image_format(ext: str) -> ImageFormat:
316
+ lowered = ext.lower()
317
+ if lowered in ALLOWED_IMAGE_FORMATS:
318
+ return lowered
319
+ elif lowered == "jpg":
320
+ return "jpeg" # jpg -> jpeg
321
+ else:
322
+ raise ValueError(f"Invalid image format: {ext}")
323
+
324
+
268
325
  def is_remote_url(path: str) -> bool:
269
326
  parsed = urlparse(path)
270
327
  return bool(parsed.scheme and parsed.netloc)
271
328
 
272
329
 
273
- def detect_image_type(image_data: bytes) -> Optional[ImageType]:
330
+ def detect_image_type(image_data: bytes) -> Optional[ImageFormat]:
274
331
  """
275
332
  Detect the image format based on the image binary signature (header).
276
333
  Only JPEG, PNG, GIF, WEBP, and BMP are handled as examples.
@@ -27,14 +27,11 @@ def decode_prefix(b64_data: str, prefix_bytes: int = 32) -> bytes:
27
27
  return base64.b64decode(b64_data)
28
28
 
29
29
 
30
- def what(b64_data: str) -> Optional[ImageType]:
31
- """
32
- base64 인코딩된 문자열에 포함된 이미지의 타입을 반환한다.
33
-
34
- :param b64_data: 이미지 데이터를 담은 base64 문자열.
35
- :return: 이미지 포맷 문자열 (예: "jpeg", "png", "gif", 등) 또는 인식되지 않으면 None.
36
- """
37
- h: bytes = decode_prefix(b64_data, prefix_bytes=32)
30
+ def what(b64_or_bytes: str | bytes, prefix_bytes: int = 32) -> Optional[ImageType]:
31
+ if isinstance(b64_or_bytes, str):
32
+ h: bytes = decode_prefix(b64_or_bytes, prefix_bytes=prefix_bytes)
33
+ else:
34
+ h = b64_or_bytes
38
35
 
39
36
  for tf in tests:
40
37
  res = tf(h)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatterer
3
- Version: 0.1.25
3
+ Version: 0.1.26
4
4
  Summary: The highest-level interface for various LLM APIs.
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
@@ -24,9 +24,6 @@ chatterer/examples/snippet.py
24
24
  chatterer/examples/transcribe.py
25
25
  chatterer/examples/upstage.py
26
26
  chatterer/examples/web2md.py
27
- chatterer/strategies/__init__.py
28
- chatterer/strategies/atom_of_thoughts.py
29
- chatterer/strategies/base.py
30
27
  chatterer/tools/__init__.py
31
28
  chatterer/tools/caption_markdown_images.py
32
29
  chatterer/tools/convert_pdf_to_markdown.py
@@ -11,7 +11,7 @@ dependencies = [
11
11
  "dotenv>=0.9.9",
12
12
  ]
13
13
  name = "chatterer"
14
- version = "0.1.25"
14
+ version = "0.1.26"
15
15
  description = "The highest-level interface for various LLM APIs."
16
16
  readme = "README.md"
17
17
  requires-python = ">=3.12"
@@ -20,6 +20,9 @@ requires-python = ">=3.12"
20
20
  where = ["."]
21
21
  include = ["chatterer", "chatterer.*"]
22
22
 
23
+ [tool.pyright]
24
+ typeCheckingMode = "strict"
25
+
23
26
  [project.optional-dependencies]
24
27
  dev = ["pyright>=1.1.401"]
25
28
 
@@ -1,13 +0,0 @@
1
- from .atom_of_thoughts import (
2
- AoTPipeline,
3
- AoTStrategy,
4
- AoTPrompter,
5
- )
6
- from .base import BaseStrategy
7
-
8
- __all__ = [
9
- "BaseStrategy",
10
- "AoTPipeline",
11
- "AoTPrompter",
12
- "AoTStrategy",
13
- ]