chatterer 0.1.26__py3-none-any.whl → 0.1.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. chatterer/__init__.py +87 -87
  2. chatterer/common_types/__init__.py +21 -21
  3. chatterer/common_types/io.py +19 -19
  4. chatterer/constants.py +5 -0
  5. chatterer/examples/__main__.py +75 -75
  6. chatterer/examples/any2md.py +83 -85
  7. chatterer/examples/pdf2md.py +231 -338
  8. chatterer/examples/pdf2txt.py +52 -54
  9. chatterer/examples/ppt.py +487 -486
  10. chatterer/examples/pw.py +141 -143
  11. chatterer/examples/snippet.py +54 -56
  12. chatterer/examples/transcribe.py +192 -192
  13. chatterer/examples/upstage.py +87 -89
  14. chatterer/examples/web2md.py +80 -80
  15. chatterer/interactive.py +422 -354
  16. chatterer/language_model.py +530 -536
  17. chatterer/messages.py +21 -21
  18. chatterer/tools/__init__.py +46 -46
  19. chatterer/tools/caption_markdown_images.py +388 -384
  20. chatterer/tools/citation_chunking/__init__.py +3 -3
  21. chatterer/tools/citation_chunking/chunks.py +51 -53
  22. chatterer/tools/citation_chunking/citation_chunker.py +117 -118
  23. chatterer/tools/citation_chunking/citations.py +284 -285
  24. chatterer/tools/citation_chunking/prompt.py +157 -157
  25. chatterer/tools/citation_chunking/reference.py +26 -26
  26. chatterer/tools/citation_chunking/utils.py +138 -138
  27. chatterer/tools/convert_pdf_to_markdown.py +634 -645
  28. chatterer/tools/convert_to_text.py +446 -446
  29. chatterer/tools/upstage_document_parser.py +704 -705
  30. chatterer/tools/webpage_to_markdown.py +739 -739
  31. chatterer/tools/youtube.py +146 -147
  32. chatterer/utils/__init__.py +15 -15
  33. chatterer/utils/base64_image.py +349 -350
  34. chatterer/utils/bytesio.py +59 -59
  35. chatterer/utils/code_agent.py +237 -237
  36. chatterer/utils/imghdr.py +145 -145
  37. {chatterer-0.1.26.dist-info → chatterer-0.1.28.dist-info}/METADATA +377 -390
  38. chatterer-0.1.28.dist-info/RECORD +43 -0
  39. chatterer-0.1.26.dist-info/RECORD +0 -42
  40. {chatterer-0.1.26.dist-info → chatterer-0.1.28.dist-info}/WHEEL +0 -0
  41. {chatterer-0.1.26.dist-info → chatterer-0.1.28.dist-info}/entry_points.txt +0 -0
  42. {chatterer-0.1.26.dist-info → chatterer-0.1.28.dist-info}/top_level.txt +0 -0
@@ -1,350 +1,349 @@
1
- import re
2
- from base64 import b64encode
3
- from io import BytesIO
4
- from logging import getLogger
5
- from pathlib import Path
6
- from typing import (
7
- TYPE_CHECKING,
8
- Awaitable,
9
- Callable,
10
- ClassVar,
11
- Literal,
12
- NotRequired,
13
- Optional,
14
- Self,
15
- Sequence,
16
- TypeAlias,
17
- TypedDict,
18
- TypeGuard,
19
- get_args,
20
- )
21
- from urllib.parse import urlparse
22
-
23
- import requests
24
- from aiohttp import ClientSession
25
- from PIL.Image import Resampling
26
- from PIL.Image import open as image_open
27
- from pydantic import BaseModel
28
-
29
- from .imghdr import what
30
-
31
- if TYPE_CHECKING:
32
- from openai.types.chat.chat_completion_content_part_image_param import ChatCompletionContentPartImageParam
33
-
34
- logger = getLogger(__name__)
35
- ImageFormat: TypeAlias = Literal["jpeg", "png", "gif", "webp", "bmp"]
36
- ExtendedImageFormat: TypeAlias = ImageFormat | Literal["jpg", "JPG"] | Literal["JPEG", "PNG", "GIF", "WEBP", "BMP"]
37
-
38
- ALLOWED_IMAGE_FORMATS: tuple[ImageFormat, ...] = get_args(ImageFormat)
39
-
40
-
41
- class ImageProcessingConfig(TypedDict):
42
- """
43
- 이미지 필터링/변환 사용할 설정.
44
- - formats: (Sequence[str]) 허용할 이미지 포맷(소문자, 예: ["jpeg", "png", "webp"]).
45
- - max_size_mb: (float) 이미지 용량 상한(MB). 초과 시 제외.
46
- - min_largest_side: (int) 가로나 세로 중 가장 변의 최소 크기. 미만 시 제외.
47
- - resize_if_min_side_exceeds: (int) 가로나 세로 작은 변이 이상이면 리스케일.
48
- - resize_target_for_min_side: (int) 리스케일시, '가장 작은 변'을 이 값으로 줄임(비율 유지는 Lanczos).
49
- """
50
-
51
- formats: Sequence[ImageFormat]
52
- max_size_mb: NotRequired[float]
53
- min_largest_side: NotRequired[int]
54
- resize_if_min_side_exceeds: NotRequired[int]
55
- resize_target_for_min_side: NotRequired[int]
56
-
57
-
58
- def get_default_image_processing_config() -> ImageProcessingConfig:
59
- return {
60
- "max_size_mb": 5,
61
- "min_largest_side": 200,
62
- "resize_if_min_side_exceeds": 2000,
63
- "resize_target_for_min_side": 1000,
64
- "formats": ["png", "jpeg", "gif", "bmp", "webp"],
65
- }
66
-
67
-
68
- class Base64Image(BaseModel):
69
- ext: ImageFormat
70
- data: str
71
-
72
- IMAGE_TYPES: ClassVar[tuple[str, ...]] = ALLOWED_IMAGE_FORMATS
73
- IMAGE_PATTERN: ClassVar[re.Pattern[str]] = re.compile(
74
- r"data:image/(" + "|".join(IMAGE_TYPES) + r");base64,([A-Za-z0-9+/]+={0,2})"
75
- )
76
-
77
- def __hash__(self) -> int:
78
- return hash((self.ext, self.data))
79
-
80
- @classmethod
81
- def new(
82
- cls,
83
- url_or_path_or_bytes: str | bytes,
84
- *,
85
- headers: dict[str, str] = {},
86
- config: ImageProcessingConfig = get_default_image_processing_config(),
87
- img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], bytes]] = None,
88
- ) -> Self:
89
- if isinstance(url_or_path_or_bytes, bytes):
90
- ext = what(url_or_path_or_bytes)
91
- if ext is None:
92
- raise ValueError(f"Invalid image format: {url_or_path_or_bytes[:8]} ...")
93
- if not cls._verify_ext(ext, config["formats"]):
94
- raise ValueError(f"Invalid image format: {ext} not in {config['formats']}")
95
- return cls.from_bytes(url_or_path_or_bytes, ext=ext)
96
- elif maybe_base64 := cls.from_string(url_or_path_or_bytes):
97
- return maybe_base64
98
- elif maybe_url_or_path := cls.from_url_or_path(
99
- url_or_path_or_bytes, headers=headers, config=config, img_bytes_fetcher=img_bytes_fetcher
100
- ):
101
- return maybe_url_or_path
102
- else:
103
- raise ValueError(f"Invalid image format: {url_or_path_or_bytes}")
104
-
105
- @classmethod
106
- async def anew(
107
- cls,
108
- url_or_path_or_bytes: str | bytes,
109
- *,
110
- headers: dict[str, str] = {},
111
- config: ImageProcessingConfig = get_default_image_processing_config(),
112
- img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], Awaitable[bytes]]] = None,
113
- ) -> Self:
114
- if isinstance(url_or_path_or_bytes, bytes):
115
- ext = what(url_or_path_or_bytes)
116
- if ext is None:
117
- raise ValueError(f"Invalid image format: {url_or_path_or_bytes[:8]} ...")
118
- if not cls._verify_ext(ext, config["formats"]):
119
- raise ValueError(f"Invalid image format: {ext} not in {config['formats']}")
120
- return cls.from_bytes(url_or_path_or_bytes, ext=ext)
121
- elif maybe_base64 := cls.from_string(url_or_path_or_bytes):
122
- return maybe_base64
123
- elif maybe_url_or_path := await cls.afrom_url_or_path(
124
- url_or_path_or_bytes, headers=headers, config=config, img_bytes_fetcher=img_bytes_fetcher
125
- ):
126
- return maybe_url_or_path
127
- else:
128
- raise ValueError(f"Invalid image format: {url_or_path_or_bytes}")
129
-
130
- @classmethod
131
- def from_string(cls, data: str) -> Optional[Self]:
132
- match = cls.IMAGE_PATTERN.fullmatch(data)
133
- if not match:
134
- return None
135
- return cls(ext=_to_image_format(match.group(1)), data=match.group(2))
136
-
137
- @classmethod
138
- def from_bytes(cls, data: bytes, ext: ExtendedImageFormat) -> Self:
139
- return cls(ext=_to_image_format(ext), data=b64encode(data).decode("utf-8"))
140
-
141
- @classmethod
142
- def from_url_or_path(
143
- cls,
144
- url_or_path: str,
145
- *,
146
- headers: dict[str, str] = {},
147
- config: ImageProcessingConfig = get_default_image_processing_config(),
148
- img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], bytes]] = None,
149
- ) -> Optional[Self]:
150
- """Return a Base64Image instance from a URL or local file path."""
151
- if maybe_base64 := cls.from_string(url_or_path):
152
- return maybe_base64
153
- elif is_remote_url(url_or_path):
154
- if img_bytes_fetcher:
155
- img_bytes = img_bytes_fetcher(url_or_path, headers)
156
- else:
157
- img_bytes = cls._fetch_remote_image(url_or_path, headers)
158
- if not img_bytes:
159
- return None
160
- return cls._convert_image_into_base64(img_bytes, config)
161
- try:
162
- return cls._process_local_image(Path(url_or_path), config)
163
- except Exception:
164
- return None
165
-
166
- @classmethod
167
- async def afrom_url_or_path(
168
- cls,
169
- url_or_path: str,
170
- *,
171
- headers: dict[str, str] = {},
172
- config: ImageProcessingConfig = get_default_image_processing_config(),
173
- img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], Awaitable[bytes]]] = None,
174
- ) -> Optional[Self]:
175
- """Return a Base64Image instance from a URL or local file path."""
176
- if maybe_base64 := cls.from_string(url_or_path):
177
- return maybe_base64
178
- elif is_remote_url(url_or_path):
179
- if img_bytes_fetcher:
180
- img_bytes = await img_bytes_fetcher(url_or_path, headers)
181
- else:
182
- img_bytes = await cls._afetch_remote_image(url_or_path, headers)
183
- if not img_bytes:
184
- return None
185
- return cls._convert_image_into_base64(img_bytes, config)
186
- try:
187
- return cls._process_local_image(Path(url_or_path), config)
188
- except Exception:
189
- return None
190
-
191
- @property
192
- def data_uri(self) -> str:
193
- return f"data:image/{self.ext.replace('jpg', 'jpeg')};base64,{self.data}"
194
-
195
- @property
196
- def data_uri_content(self) -> "ChatCompletionContentPartImageParam":
197
- return {"type": "image_url", "image_url": {"url": self.data_uri}}
198
-
199
- @property
200
- def data_uri_content_dict(self) -> dict[str, object]:
201
- return {"type": "image_url", "image_url": {"url": self.data_uri}}
202
-
203
- @staticmethod
204
- def _verify_ext(ext: str, allowed_types: Sequence[ImageFormat]) -> TypeGuard[ImageFormat]:
205
- return ext in allowed_types
206
-
207
- @classmethod
208
- def _fetch_remote_image(cls, url: str, headers: dict[str, str]) -> bytes:
209
- try:
210
- with requests.Session() as session:
211
- response = session.get(url.strip(), headers={k: str(v) for k, v in headers.items()})
212
- response.raise_for_status()
213
- image_bytes = bytes(response.content or b"")
214
- if not image_bytes:
215
- return b""
216
- return image_bytes
217
- except Exception:
218
- return b""
219
-
220
- @classmethod
221
- async def _afetch_remote_image(cls, url: str, headers: dict[str, str]) -> bytes:
222
- try:
223
- async with ClientSession() as session:
224
- async with session.get(url.strip(), headers={k: str(v) for k, v in headers.items()}) as response:
225
- response.raise_for_status()
226
- return await response.read()
227
- except Exception:
228
- return b""
229
-
230
- @classmethod
231
- def _convert_image_into_base64(cls, image_data: bytes, config: Optional[ImageProcessingConfig]) -> Optional[Self]:
232
- """
233
- Retrieve an image in bytes and return a base64-encoded data URL,
234
- applying dynamic rules from 'config'.
235
- """
236
-
237
- if not config:
238
- # config 없으면 그냥 기존 헤더만 보고 돌려주는 간단 로직
239
- return cls._simple_base64_encode(image_data)
240
-
241
- # 1) 용량 검사
242
- max_size_mb = config.get("max_size_mb", float("inf"))
243
- image_size_mb = len(image_data) / (1024 * 1024)
244
- if image_size_mb > max_size_mb:
245
- logger.error(f"Image too large: {image_size_mb:.2f} MB > {max_size_mb} MB")
246
- return None
247
-
248
- # 2) Pillow로 이미지 열기
249
- try:
250
- with image_open(BytesIO(image_data)) as im:
251
- w, h = im.size
252
- # 가장
253
- largest_side = max(w, h)
254
- # 가장 작은
255
- smallest_side = min(w, h)
256
-
257
- # min_largest_side 기준
258
- min_largest_side = config.get("min_largest_side", 1)
259
- if largest_side < min_largest_side:
260
- logger.error(f"Image too small: {largest_side} < {min_largest_side}")
261
- return None
262
-
263
- # resize 로직
264
- resize_if_min_side_exceeds = config.get("resize_if_min_side_exceeds", float("inf"))
265
- if smallest_side >= resize_if_min_side_exceeds:
266
- # resize_target_for_min_side 로 축소
267
- resize_target = config.get("resize_target_for_min_side", 1000)
268
- ratio = resize_target / float(smallest_side)
269
- new_w = int(w * ratio)
270
- new_h = int(h * ratio)
271
- im = im.resize((new_w, new_h), Resampling.LANCZOS)
272
-
273
- # 포맷 제한
274
- # PIL이 인식한 포맷이 대문자(JPEG)일 수 있으므로 소문자로
275
- pil_format: str = (im.format or "").lower()
276
- allowed_formats: Sequence[ImageFormat] = config.get("formats", [])
277
- if not cls._verify_ext(pil_format, allowed_formats):
278
- logger.error(f"Invalid format: {pil_format} not in {allowed_formats}")
279
- return None
280
-
281
- # 다시 bytes 로 저장
282
- output_buffer = BytesIO()
283
- im.save(output_buffer, format=pil_format.upper()) # PIL에 맞춰서 대문자로
284
- output_buffer.seek(0)
285
- final_bytes = output_buffer.read()
286
-
287
- except Exception:
288
- return None
289
-
290
- # 최종 base64 인코딩
291
- encoded_data = b64encode(final_bytes).decode("utf-8")
292
- return cls(ext=pil_format, data=encoded_data)
293
-
294
- @classmethod
295
- def _simple_base64_encode(cls, image_data: bytes) -> Optional[Self]:
296
- """
297
- Retrieve an image URL and return a base64-encoded data URL.
298
- """
299
- ext = detect_image_type(image_data)
300
- if not ext:
301
- return
302
- return cls(ext=ext, data=b64encode(image_data).decode("utf-8"))
303
-
304
- @classmethod
305
- def _process_local_image(cls, path: Path, config: ImageProcessingConfig) -> Optional[Self]:
306
- """로컬 파일이 존재하고 유효한 이미지 포맷이면 Base64 데이터 URL을 반환, 아니면 None."""
307
- if not path.is_file():
308
- return None
309
- ext = path.suffix.lower().removeprefix(".")
310
- if not cls._verify_ext(ext, config["formats"]):
311
- return None
312
- return cls(ext=ext, data=b64encode(path.read_bytes()).decode("ascii"))
313
-
314
-
315
- def _to_image_format(ext: str) -> ImageFormat:
316
- lowered = ext.lower()
317
- if lowered in ALLOWED_IMAGE_FORMATS:
318
- return lowered
319
- elif lowered == "jpg":
320
- return "jpeg" # jpg -> jpeg
321
- else:
322
- raise ValueError(f"Invalid image format: {ext}")
323
-
324
-
325
- def is_remote_url(path: str) -> bool:
326
- parsed = urlparse(path)
327
- return bool(parsed.scheme and parsed.netloc)
328
-
329
-
330
- def detect_image_type(image_data: bytes) -> Optional[ImageFormat]:
331
- """
332
- Detect the image format based on the image binary signature (header).
333
- Only JPEG, PNG, GIF, WEBP, and BMP are handled as examples.
334
- If the format is not recognized, return None.
335
- """
336
- # JPEG: 시작 바이트가 FF D8 FF
337
- if image_data.startswith(b"\xff\xd8\xff"):
338
- return "jpeg"
339
- # PNG: 시작 바이트가 89 50 4E 47 0D 0A 1A 0A
340
- elif image_data.startswith(b"\x89PNG\r\n\x1a\n"):
341
- return "png"
342
- # GIF: 시작 바이트가 GIF87a 또는 GIF89a
343
- elif image_data.startswith(b"GIF87a") or image_data.startswith(b"GIF89a"):
344
- return "gif"
345
- # WEBP: 시작 바이트가 RIFF....WEBP
346
- elif image_data.startswith(b"RIFF") and image_data[8:12] == b"WEBP":
347
- return "webp"
348
- # BMP: 시작 바이트가 BM
349
- elif image_data.startswith(b"BM"):
350
- return "bmp"
1
+ import re
2
+ from base64 import b64encode
3
+ from io import BytesIO
4
+ from pathlib import Path
5
+ from typing import (
6
+ TYPE_CHECKING,
7
+ Awaitable,
8
+ Callable,
9
+ ClassVar,
10
+ Literal,
11
+ NotRequired,
12
+ Optional,
13
+ Self,
14
+ Sequence,
15
+ TypeAlias,
16
+ TypedDict,
17
+ TypeGuard,
18
+ get_args,
19
+ )
20
+ from urllib.parse import urlparse
21
+
22
+ import requests
23
+ from aiohttp import ClientSession
24
+ from loguru import logger
25
+ from PIL.Image import Resampling
26
+ from PIL.Image import open as image_open
27
+ from pydantic import BaseModel
28
+
29
+ from .imghdr import what
30
+
31
+ if TYPE_CHECKING:
32
+ from openai.types.chat.chat_completion_content_part_image_param import ChatCompletionContentPartImageParam
33
+
34
+ ImageFormat: TypeAlias = Literal["jpeg", "png", "gif", "webp", "bmp"]
35
+ ExtendedImageFormat: TypeAlias = ImageFormat | Literal["jpg", "JPG"] | Literal["JPEG", "PNG", "GIF", "WEBP", "BMP"]
36
+
37
+ ALLOWED_IMAGE_FORMATS: tuple[ImageFormat, ...] = get_args(ImageFormat)
38
+
39
+
40
+ class ImageProcessingConfig(TypedDict):
41
+ """
42
+ 이미지 필터링/변환 시 사용할 설정.
43
+ - formats: (Sequence[str]) 허용할 이미지 포맷(소문자, 예: ["jpeg", "png", "webp"]).
44
+ - max_size_mb: (float) 이미지 용량 상한(MB). 초과 제외.
45
+ - min_largest_side: (int) 가로나 세로 가장 큰 변의 최소 크기. 미만 시 제외.
46
+ - resize_if_min_side_exceeds: (int) 가로나 세로 중 작은 변이 이상이면 리스케일.
47
+ - resize_target_for_min_side: (int) 리스케일시, '가장 작은 변'을값으로 줄임(비율 유지는 Lanczos).
48
+ """
49
+
50
+ formats: Sequence[ImageFormat]
51
+ max_size_mb: NotRequired[float]
52
+ min_largest_side: NotRequired[int]
53
+ resize_if_min_side_exceeds: NotRequired[int]
54
+ resize_target_for_min_side: NotRequired[int]
55
+
56
+
57
+ def get_default_image_processing_config() -> ImageProcessingConfig:
58
+ return {
59
+ "max_size_mb": 5,
60
+ "min_largest_side": 200,
61
+ "resize_if_min_side_exceeds": 2000,
62
+ "resize_target_for_min_side": 1000,
63
+ "formats": ["png", "jpeg", "gif", "bmp", "webp"],
64
+ }
65
+
66
+
67
+ class Base64Image(BaseModel):
68
+ ext: ImageFormat
69
+ data: str
70
+
71
+ IMAGE_TYPES: ClassVar[tuple[str, ...]] = ALLOWED_IMAGE_FORMATS
72
+ IMAGE_PATTERN: ClassVar[re.Pattern[str]] = re.compile(
73
+ r"data:image/(" + "|".join(IMAGE_TYPES) + r");base64,([A-Za-z0-9+/]+={0,2})"
74
+ )
75
+
76
+ def __hash__(self) -> int:
77
+ return hash((self.ext, self.data))
78
+
79
+ @classmethod
80
+ def new(
81
+ cls,
82
+ url_or_path_or_bytes: str | bytes,
83
+ *,
84
+ headers: dict[str, str] = {},
85
+ config: ImageProcessingConfig = get_default_image_processing_config(),
86
+ img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], bytes]] = None,
87
+ ) -> Self:
88
+ if isinstance(url_or_path_or_bytes, bytes):
89
+ ext = what(url_or_path_or_bytes)
90
+ if ext is None:
91
+ raise ValueError(f"Invalid image format: {url_or_path_or_bytes[:8]} ...")
92
+ if not cls._verify_ext(ext, config["formats"]):
93
+ raise ValueError(f"Invalid image format: {ext} not in {config['formats']}")
94
+ return cls.from_bytes(url_or_path_or_bytes, ext=ext)
95
+ elif maybe_base64 := cls.from_string(url_or_path_or_bytes):
96
+ return maybe_base64
97
+ elif maybe_url_or_path := cls.from_url_or_path(
98
+ url_or_path_or_bytes, headers=headers, config=config, img_bytes_fetcher=img_bytes_fetcher
99
+ ):
100
+ return maybe_url_or_path
101
+ else:
102
+ raise ValueError(f"Invalid image format: {url_or_path_or_bytes}")
103
+
104
+ @classmethod
105
+ async def anew(
106
+ cls,
107
+ url_or_path_or_bytes: str | bytes,
108
+ *,
109
+ headers: dict[str, str] = {},
110
+ config: ImageProcessingConfig = get_default_image_processing_config(),
111
+ img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], Awaitable[bytes]]] = None,
112
+ ) -> Self:
113
+ if isinstance(url_or_path_or_bytes, bytes):
114
+ ext = what(url_or_path_or_bytes)
115
+ if ext is None:
116
+ raise ValueError(f"Invalid image format: {url_or_path_or_bytes[:8]} ...")
117
+ if not cls._verify_ext(ext, config["formats"]):
118
+ raise ValueError(f"Invalid image format: {ext} not in {config['formats']}")
119
+ return cls.from_bytes(url_or_path_or_bytes, ext=ext)
120
+ elif maybe_base64 := cls.from_string(url_or_path_or_bytes):
121
+ return maybe_base64
122
+ elif maybe_url_or_path := await cls.afrom_url_or_path(
123
+ url_or_path_or_bytes, headers=headers, config=config, img_bytes_fetcher=img_bytes_fetcher
124
+ ):
125
+ return maybe_url_or_path
126
+ else:
127
+ raise ValueError(f"Invalid image format: {url_or_path_or_bytes}")
128
+
129
+ @classmethod
130
+ def from_string(cls, data: str) -> Optional[Self]:
131
+ match = cls.IMAGE_PATTERN.fullmatch(data)
132
+ if not match:
133
+ return None
134
+ return cls(ext=_to_image_format(match.group(1)), data=match.group(2))
135
+
136
+ @classmethod
137
+ def from_bytes(cls, data: bytes, ext: ExtendedImageFormat) -> Self:
138
+ return cls(ext=_to_image_format(ext), data=b64encode(data).decode("utf-8"))
139
+
140
+ @classmethod
141
+ def from_url_or_path(
142
+ cls,
143
+ url_or_path: str,
144
+ *,
145
+ headers: dict[str, str] = {},
146
+ config: ImageProcessingConfig = get_default_image_processing_config(),
147
+ img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], bytes]] = None,
148
+ ) -> Optional[Self]:
149
+ """Return a Base64Image instance from a URL or local file path."""
150
+ if maybe_base64 := cls.from_string(url_or_path):
151
+ return maybe_base64
152
+ elif is_remote_url(url_or_path):
153
+ if img_bytes_fetcher:
154
+ img_bytes = img_bytes_fetcher(url_or_path, headers)
155
+ else:
156
+ img_bytes = cls._fetch_remote_image(url_or_path, headers)
157
+ if not img_bytes:
158
+ return None
159
+ return cls._convert_image_into_base64(img_bytes, config)
160
+ try:
161
+ return cls._process_local_image(Path(url_or_path), config)
162
+ except Exception:
163
+ return None
164
+
165
+ @classmethod
166
+ async def afrom_url_or_path(
167
+ cls,
168
+ url_or_path: str,
169
+ *,
170
+ headers: dict[str, str] = {},
171
+ config: ImageProcessingConfig = get_default_image_processing_config(),
172
+ img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], Awaitable[bytes]]] = None,
173
+ ) -> Optional[Self]:
174
+ """Return a Base64Image instance from a URL or local file path."""
175
+ if maybe_base64 := cls.from_string(url_or_path):
176
+ return maybe_base64
177
+ elif is_remote_url(url_or_path):
178
+ if img_bytes_fetcher:
179
+ img_bytes = await img_bytes_fetcher(url_or_path, headers)
180
+ else:
181
+ img_bytes = await cls._afetch_remote_image(url_or_path, headers)
182
+ if not img_bytes:
183
+ return None
184
+ return cls._convert_image_into_base64(img_bytes, config)
185
+ try:
186
+ return cls._process_local_image(Path(url_or_path), config)
187
+ except Exception:
188
+ return None
189
+
190
+ @property
191
+ def data_uri(self) -> str:
192
+ return f"data:image/{self.ext.replace('jpg', 'jpeg')};base64,{self.data}"
193
+
194
+ @property
195
+ def data_uri_content(self) -> "ChatCompletionContentPartImageParam":
196
+ return {"type": "image_url", "image_url": {"url": self.data_uri}}
197
+
198
+ @property
199
+ def data_uri_content_dict(self) -> dict[str, object]:
200
+ return {"type": "image_url", "image_url": {"url": self.data_uri}}
201
+
202
+ @staticmethod
203
+ def _verify_ext(ext: str, allowed_types: Sequence[ImageFormat]) -> TypeGuard[ImageFormat]:
204
+ return ext in allowed_types
205
+
206
+ @classmethod
207
+ def _fetch_remote_image(cls, url: str, headers: dict[str, str]) -> bytes:
208
+ try:
209
+ with requests.Session() as session:
210
+ response = session.get(url.strip(), headers={k: str(v) for k, v in headers.items()})
211
+ response.raise_for_status()
212
+ image_bytes = bytes(response.content or b"")
213
+ if not image_bytes:
214
+ return b""
215
+ return image_bytes
216
+ except Exception:
217
+ return b""
218
+
219
+ @classmethod
220
+ async def _afetch_remote_image(cls, url: str, headers: dict[str, str]) -> bytes:
221
+ try:
222
+ async with ClientSession() as session:
223
+ async with session.get(url.strip(), headers={k: str(v) for k, v in headers.items()}) as response:
224
+ response.raise_for_status()
225
+ return await response.read()
226
+ except Exception:
227
+ return b""
228
+
229
+ @classmethod
230
+ def _convert_image_into_base64(cls, image_data: bytes, config: Optional[ImageProcessingConfig]) -> Optional[Self]:
231
+ """
232
+ Retrieve an image in bytes and return a base64-encoded data URL,
233
+ applying dynamic rules from 'config'.
234
+ """
235
+
236
+ if not config:
237
+ # config 없으면 그냥 기존 헤더만 보고 돌려주는 간단 로직
238
+ return cls._simple_base64_encode(image_data)
239
+
240
+ # 1) 용량 검사
241
+ max_size_mb = config.get("max_size_mb", float("inf"))
242
+ image_size_mb = len(image_data) / (1024 * 1024)
243
+ if image_size_mb > max_size_mb:
244
+ logger.error(f"Image too large: {image_size_mb:.2f} MB > {max_size_mb} MB")
245
+ return None
246
+
247
+ # 2) Pillow로 이미지 열기
248
+ try:
249
+ with image_open(BytesIO(image_data)) as im:
250
+ w, h = im.size
251
+ # 가장
252
+ largest_side = max(w, h)
253
+ # 가장 작은
254
+ smallest_side = min(w, h)
255
+
256
+ # min_largest_side 기준
257
+ min_largest_side = config.get("min_largest_side", 1)
258
+ if largest_side < min_largest_side:
259
+ logger.error(f"Image too small: {largest_side} < {min_largest_side}")
260
+ return None
261
+
262
+ # resize 로직
263
+ resize_if_min_side_exceeds = config.get("resize_if_min_side_exceeds", float("inf"))
264
+ if smallest_side >= resize_if_min_side_exceeds:
265
+ # resize_target_for_min_side 축소
266
+ resize_target = config.get("resize_target_for_min_side", 1000)
267
+ ratio = resize_target / float(smallest_side)
268
+ new_w = int(w * ratio)
269
+ new_h = int(h * ratio)
270
+ im = im.resize((new_w, new_h), Resampling.LANCZOS)
271
+
272
+ # 포맷 제한
273
+ # PIL이 인식한 포맷이 대문자(JPEG)일 수 있으므로 소문자로
274
+ pil_format: str = (im.format or "").lower()
275
+ allowed_formats: Sequence[ImageFormat] = config.get("formats", [])
276
+ if not cls._verify_ext(pil_format, allowed_formats):
277
+ logger.error(f"Invalid format: {pil_format} not in {allowed_formats}")
278
+ return None
279
+
280
+ # 다시 bytes 로 저장
281
+ output_buffer = BytesIO()
282
+ im.save(output_buffer, format=pil_format.upper()) # PIL에 맞춰서 대문자로
283
+ output_buffer.seek(0)
284
+ final_bytes = output_buffer.read()
285
+
286
+ except Exception:
287
+ return None
288
+
289
+ # 최종 base64 인코딩
290
+ encoded_data = b64encode(final_bytes).decode("utf-8")
291
+ return cls(ext=pil_format, data=encoded_data)
292
+
293
+ @classmethod
294
+ def _simple_base64_encode(cls, image_data: bytes) -> Optional[Self]:
295
+ """
296
+ Retrieve an image URL and return a base64-encoded data URL.
297
+ """
298
+ ext = detect_image_type(image_data)
299
+ if not ext:
300
+ return
301
+ return cls(ext=ext, data=b64encode(image_data).decode("utf-8"))
302
+
303
+ @classmethod
304
+ def _process_local_image(cls, path: Path, config: ImageProcessingConfig) -> Optional[Self]:
305
+ """로컬 파일이 존재하고 유효한 이미지 포맷이면 Base64 데이터 URL을 반환, 아니면 None."""
306
+ if not path.is_file():
307
+ return None
308
+ ext = path.suffix.lower().removeprefix(".")
309
+ if not cls._verify_ext(ext, config["formats"]):
310
+ return None
311
+ return cls(ext=ext, data=b64encode(path.read_bytes()).decode("ascii"))
312
+
313
+
314
+ def _to_image_format(ext: str) -> ImageFormat:
315
+ lowered = ext.lower()
316
+ if lowered in ALLOWED_IMAGE_FORMATS:
317
+ return lowered
318
+ elif lowered == "jpg":
319
+ return "jpeg" # jpg -> jpeg
320
+ else:
321
+ raise ValueError(f"Invalid image format: {ext}")
322
+
323
+
324
+ def is_remote_url(path: str) -> bool:
325
+ parsed = urlparse(path)
326
+ return bool(parsed.scheme and parsed.netloc)
327
+
328
+
329
+ def detect_image_type(image_data: bytes) -> Optional[ImageFormat]:
330
+ """
331
+ Detect the image format based on the image binary signature (header).
332
+ Only JPEG, PNG, GIF, WEBP, and BMP are handled as examples.
333
+ If the format is not recognized, return None.
334
+ """
335
+ # JPEG: 시작 바이트가 FF D8 FF
336
+ if image_data.startswith(b"\xff\xd8\xff"):
337
+ return "jpeg"
338
+ # PNG: 시작 바이트가 89 50 4E 47 0D 0A 1A 0A
339
+ elif image_data.startswith(b"\x89PNG\r\n\x1a\n"):
340
+ return "png"
341
+ # GIF: 시작 바이트가 GIF87a 또는 GIF89a
342
+ elif image_data.startswith(b"GIF87a") or image_data.startswith(b"GIF89a"):
343
+ return "gif"
344
+ # WEBP: 시작 바이트가 RIFF....WEBP
345
+ elif image_data.startswith(b"RIFF") and image_data[8:12] == b"WEBP":
346
+ return "webp"
347
+ # BMP: 시작 바이트가 BM
348
+ elif image_data.startswith(b"BM"):
349
+ return "bmp"