chatterer 0.1.23__py3-none-any.whl → 0.1.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. chatterer/__init__.py +97 -93
  2. chatterer/common_types/__init__.py +21 -21
  3. chatterer/common_types/io.py +19 -19
  4. chatterer/examples/__main__.py +75 -0
  5. chatterer/examples/{anything_to_markdown.py → any2md.py} +85 -85
  6. chatterer/examples/{pdf_to_markdown.py → pdf2md.py} +338 -338
  7. chatterer/examples/{pdf_to_text.py → pdf2txt.py} +54 -54
  8. chatterer/examples/{make_ppt.py → ppt.py} +486 -488
  9. chatterer/examples/pw.py +143 -0
  10. chatterer/examples/{get_code_snippets.py → snippet.py} +56 -55
  11. chatterer/examples/transcribe.py +192 -0
  12. chatterer/examples/{upstage_parser.py → upstage.py} +89 -89
  13. chatterer/examples/{webpage_to_markdown.py → web2md.py} +80 -70
  14. chatterer/interactive.py +354 -354
  15. chatterer/language_model.py +536 -536
  16. chatterer/messages.py +21 -21
  17. chatterer/strategies/__init__.py +13 -13
  18. chatterer/strategies/atom_of_thoughts.py +975 -975
  19. chatterer/strategies/base.py +14 -14
  20. chatterer/tools/__init__.py +46 -46
  21. chatterer/tools/caption_markdown_images.py +384 -384
  22. chatterer/tools/citation_chunking/__init__.py +3 -3
  23. chatterer/tools/citation_chunking/chunks.py +53 -53
  24. chatterer/tools/citation_chunking/citation_chunker.py +118 -118
  25. chatterer/tools/citation_chunking/citations.py +285 -285
  26. chatterer/tools/citation_chunking/prompt.py +157 -157
  27. chatterer/tools/citation_chunking/reference.py +26 -26
  28. chatterer/tools/citation_chunking/utils.py +138 -138
  29. chatterer/tools/convert_pdf_to_markdown.py +645 -625
  30. chatterer/tools/convert_to_text.py +446 -446
  31. chatterer/tools/upstage_document_parser.py +705 -705
  32. chatterer/tools/webpage_to_markdown.py +739 -739
  33. chatterer/tools/youtube.py +146 -146
  34. chatterer/utils/__init__.py +15 -15
  35. chatterer/utils/base64_image.py +293 -285
  36. chatterer/utils/bytesio.py +59 -59
  37. chatterer/utils/code_agent.py +237 -237
  38. chatterer/utils/imghdr.py +148 -148
  39. {chatterer-0.1.23.dist-info → chatterer-0.1.25.dist-info}/METADATA +390 -392
  40. chatterer-0.1.25.dist-info/RECORD +45 -0
  41. chatterer-0.1.25.dist-info/entry_points.txt +2 -0
  42. chatterer/examples/login_with_playwright.py +0 -156
  43. chatterer/examples/transcription_api.py +0 -112
  44. chatterer-0.1.23.dist-info/RECORD +0 -44
  45. chatterer-0.1.23.dist-info/entry_points.txt +0 -10
  46. {chatterer-0.1.23.dist-info → chatterer-0.1.25.dist-info}/WHEEL +0 -0
  47. {chatterer-0.1.23.dist-info → chatterer-0.1.25.dist-info}/top_level.txt +0 -0
@@ -1,285 +1,293 @@
1
- from __future__ import annotations
2
-
3
- import re
4
- from base64 import b64encode
5
- from io import BytesIO
6
- from logging import getLogger
7
- from pathlib import Path
8
- from typing import (
9
- Awaitable,
10
- Callable,
11
- ClassVar,
12
- Literal,
13
- NotRequired,
14
- Optional,
15
- Self,
16
- Sequence,
17
- TypeAlias,
18
- TypedDict,
19
- TypeGuard,
20
- cast,
21
- get_args,
22
- )
23
- from urllib.parse import urlparse
24
-
25
- import requests
26
- from aiohttp import ClientSession
27
- from PIL.Image import Resampling
28
- from PIL.Image import open as image_open
29
- from pydantic import BaseModel
30
-
31
- logger = getLogger(__name__)
32
- ImageType: TypeAlias = Literal["jpeg", "jpg", "png", "gif", "webp", "bmp"]
33
-
34
-
35
- class ImageProcessingConfig(TypedDict):
36
- """
37
- 이미지 필터링/변환 시 사용할 설정.
38
- - formats: (Sequence[str]) 허용할 이미지 포맷(소문자, 예: ["jpeg", "png", "webp"]).
39
- - max_size_mb: (float) 이미지 용량 상한(MB). 초과 시 제외.
40
- - min_largest_side: (int) 가로나 세로 중 가장 큰 변의 최소 크기. 미만 시 제외.
41
- - resize_if_min_side_exceeds: (int) 가로나 세로 중 작은 변이 이 값 이상이면 리스케일.
42
- - resize_target_for_min_side: (int) 리스케일시, '가장 작은 변'을 값으로 줄임(비율 유지는 Lanczos).
43
- """
44
-
45
- formats: Sequence[ImageType]
46
- max_size_mb: NotRequired[float]
47
- min_largest_side: NotRequired[int]
48
- resize_if_min_side_exceeds: NotRequired[int]
49
- resize_target_for_min_side: NotRequired[int]
50
-
51
-
52
- def get_default_image_processing_config() -> ImageProcessingConfig:
53
- return {
54
- "max_size_mb": 5,
55
- "min_largest_side": 200,
56
- "resize_if_min_side_exceeds": 2000,
57
- "resize_target_for_min_side": 1000,
58
- "formats": ["png", "jpeg", "jpg", "gif", "bmp", "webp"],
59
- }
60
-
61
-
62
- # image_url: str, headers: dict[str, str]) -> Optional[bytes]:
63
- class Base64Image(BaseModel):
64
- ext: ImageType
65
- data: str
66
-
67
- IMAGE_TYPES: ClassVar[tuple[str, ...]] = tuple(map(str, get_args(ImageType)))
68
- IMAGE_PATTERN: ClassVar[re.Pattern[str]] = re.compile(
69
- r"data:image/(" + "|".join(IMAGE_TYPES) + r");base64,([A-Za-z0-9+/]+={0,2})"
70
- )
71
-
72
- def __hash__(self) -> int:
73
- return hash((self.ext, self.data))
74
-
75
- def model_post_init(self, __context: object) -> None:
76
- if self.ext == "jpg":
77
- self.ext = "jpeg"
78
-
79
- @classmethod
80
- def from_string(cls, data: str) -> Optional[Self]:
81
- match = cls.IMAGE_PATTERN.fullmatch(data)
82
- if not match:
83
- return None
84
- return cls(ext=cast(ImageType, match.group(1)), data=match.group(2))
85
-
86
- @classmethod
87
- def from_bytes(cls, data: bytes, ext: ImageType) -> Self:
88
- return cls(ext=ext, data=b64encode(data).decode("utf-8"))
89
-
90
- @classmethod
91
- def from_url_or_path(
92
- cls,
93
- url_or_path: str,
94
- *,
95
- headers: dict[str, str] = {},
96
- config: ImageProcessingConfig = get_default_image_processing_config(),
97
- img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], bytes]] = None,
98
- ) -> Optional[Self]:
99
- """Return a Base64Image instance from a URL or local file path."""
100
- if maybe_base64 := cls.from_string(url_or_path):
101
- return maybe_base64
102
- elif is_remote_url(url_or_path):
103
- if img_bytes_fetcher:
104
- img_bytes = img_bytes_fetcher(url_or_path, headers)
105
- else:
106
- img_bytes = cls._fetch_remote_image(url_or_path, headers)
107
- if not img_bytes:
108
- return None
109
- return cls._convert_image_into_base64(img_bytes, config)
110
- try:
111
- return cls._process_local_image(Path(url_or_path), config)
112
- except Exception:
113
- return None
114
-
115
- @classmethod
116
- async def afrom_url_or_path(
117
- cls,
118
- url_or_path: str,
119
- *,
120
- headers: dict[str, str] = {},
121
- config: ImageProcessingConfig = get_default_image_processing_config(),
122
- img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], Awaitable[bytes]]] = None,
123
- ) -> Optional[Self]:
124
- """Return a Base64Image instance from a URL or local file path."""
125
- if maybe_base64 := cls.from_string(url_or_path):
126
- return maybe_base64
127
- elif is_remote_url(url_or_path):
128
- if img_bytes_fetcher:
129
- img_bytes = await img_bytes_fetcher(url_or_path, headers)
130
- else:
131
- img_bytes = await cls._afetch_remote_image(url_or_path, headers)
132
- if not img_bytes:
133
- return None
134
- return cls._convert_image_into_base64(img_bytes, config)
135
- try:
136
- return cls._process_local_image(Path(url_or_path), config)
137
- except Exception:
138
- return None
139
-
140
- @property
141
- def data_uri(self) -> str:
142
- return f"data:image/{self.ext.replace('jpg', 'jpeg')};base64,{self.data}"
143
-
144
- @property
145
- def data_uri_content(self) -> dict[Literal["type", "image_url"], Literal["image_url"] | dict[Literal["url"], str]]:
146
- return {"type": "image_url", "image_url": {"url": self.data_uri}}
147
-
148
- @staticmethod
149
- def _verify_ext(ext: str, allowed_types: Sequence[ImageType]) -> TypeGuard[ImageType]:
150
- return ext in allowed_types
151
-
152
- @classmethod
153
- def _fetch_remote_image(cls, url: str, headers: dict[str, str]) -> bytes:
154
- try:
155
- with requests.Session() as session:
156
- response = session.get(url.strip(), headers={k: str(v) for k, v in headers.items()})
157
- response.raise_for_status()
158
- image_bytes = bytes(response.content or b"")
159
- if not image_bytes:
160
- return b""
161
- return image_bytes
162
- except Exception:
163
- return b""
164
-
165
- @classmethod
166
- async def _afetch_remote_image(cls, url: str, headers: dict[str, str]) -> bytes:
167
- try:
168
- async with ClientSession() as session:
169
- async with session.get(url.strip(), headers={k: str(v) for k, v in headers.items()}) as response:
170
- response.raise_for_status()
171
- return await response.read()
172
- except Exception:
173
- return b""
174
-
175
- @classmethod
176
- def _convert_image_into_base64(cls, image_data: bytes, config: Optional[ImageProcessingConfig]) -> Optional[Self]:
177
- """
178
- Retrieve an image in bytes and return a base64-encoded data URL,
179
- applying dynamic rules from 'config'.
180
- """
181
-
182
- if not config:
183
- # config 없으면 그냥 기존 헤더만 보고 돌려주는 간단 로직
184
- return cls._simple_base64_encode(image_data)
185
-
186
- # 1) 용량 검사
187
- max_size_mb = config.get("max_size_mb", float("inf"))
188
- image_size_mb = len(image_data) / (1024 * 1024)
189
- if image_size_mb > max_size_mb:
190
- logger.error(f"Image too large: {image_size_mb:.2f} MB > {max_size_mb} MB")
191
- return None
192
-
193
- # 2) Pillow로 이미지 열기
194
- try:
195
- with image_open(BytesIO(image_data)) as im:
196
- w, h = im.size
197
- # 가장
198
- largest_side = max(w, h)
199
- # 가장 작은 변
200
- smallest_side = min(w, h)
201
-
202
- # min_largest_side 기준
203
- min_largest_side = config.get("min_largest_side", 1)
204
- if largest_side < min_largest_side:
205
- logger.error(f"Image too small: {largest_side} < {min_largest_side}")
206
- return None
207
-
208
- # resize 로직
209
- resize_if_min_side_exceeds = config.get("resize_if_min_side_exceeds", float("inf"))
210
- if smallest_side >= resize_if_min_side_exceeds:
211
- # resize_target_for_min_side 축소
212
- resize_target = config.get("resize_target_for_min_side", 1000)
213
- ratio = resize_target / float(smallest_side)
214
- new_w = int(w * ratio)
215
- new_h = int(h * ratio)
216
- im = im.resize((new_w, new_h), Resampling.LANCZOS)
217
-
218
- # 포맷 제한
219
- # PIL이 인식한 포맷이 대문자(JPEG)일 수 있으므로 소문자로
220
- pil_format: str = (im.format or "").lower()
221
- allowed_formats: Sequence[ImageType] = config.get("formats", [])
222
- if not cls._verify_ext(pil_format, allowed_formats):
223
- logger.error(f"Invalid format: {pil_format} not in {allowed_formats}")
224
- return None
225
-
226
- # 다시 bytes 로 저장
227
- output_buffer = BytesIO()
228
- im.save(output_buffer, format=pil_format.upper()) # PIL에 맞춰서 대문자로
229
- output_buffer.seek(0)
230
- final_bytes = output_buffer.read()
231
-
232
- except Exception:
233
- return None
234
-
235
- # 최종 base64 인코딩
236
- encoded_data = b64encode(final_bytes).decode("utf-8")
237
- return cls(ext=pil_format, data=encoded_data)
238
-
239
- @classmethod
240
- def _simple_base64_encode(cls, image_data: bytes) -> Optional[Self]:
241
- """
242
- Retrieve an image URL and return a base64-encoded data URL.
243
- """
244
- ext = detect_image_type(image_data)
245
- if not ext:
246
- return
247
- return cls(ext=ext, data=b64encode(image_data).decode("utf-8"))
248
-
249
- @classmethod
250
- def _process_local_image(cls, path: Path, config: ImageProcessingConfig) -> Optional[Self]:
251
- """로컬 파일이 존재하고 유효한 이미지 포맷이면 Base64 데이터 URL을 반환, 아니면 None."""
252
- if not path.is_file():
253
- return None
254
- ext = path.suffix.lower().removeprefix(".")
255
- if not cls._verify_ext(ext, config["formats"]):
256
- return None
257
- return cls(ext=ext, data=b64encode(path.read_bytes()).decode("ascii"))
258
-
259
-
260
- def is_remote_url(path: str) -> bool:
261
- parsed = urlparse(path)
262
- return bool(parsed.scheme and parsed.netloc)
263
-
264
-
265
- def detect_image_type(image_data: bytes) -> Optional[ImageType]:
266
- """
267
- Detect the image format based on the image binary signature (header).
268
- Only JPEG, PNG, GIF, WEBP, and BMP are handled as examples.
269
- If the format is not recognized, return None.
270
- """
271
- # JPEG: 시작 바이트가 FF D8 FF
272
- if image_data.startswith(b"\xff\xd8\xff"):
273
- return "jpeg"
274
- # PNG: 시작 바이트가 89 50 4E 47 0D 0A 1A 0A
275
- elif image_data.startswith(b"\x89PNG\r\n\x1a\n"):
276
- return "png"
277
- # GIF: 시작 바이트가 GIF87a 또는 GIF89a
278
- elif image_data.startswith(b"GIF87a") or image_data.startswith(b"GIF89a"):
279
- return "gif"
280
- # WEBP: 시작 바이트가 RIFF....WEBP
281
- elif image_data.startswith(b"RIFF") and image_data[8:12] == b"WEBP":
282
- return "webp"
283
- # BMP: 시작 바이트가 BM
284
- elif image_data.startswith(b"BM"):
285
- return "bmp"
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from base64 import b64encode
5
+ from io import BytesIO
6
+ from logging import getLogger
7
+ from pathlib import Path
8
+ from typing import (
9
+ TYPE_CHECKING,
10
+ Awaitable,
11
+ Callable,
12
+ ClassVar,
13
+ Literal,
14
+ NotRequired,
15
+ Optional,
16
+ Self,
17
+ Sequence,
18
+ TypeAlias,
19
+ TypedDict,
20
+ TypeGuard,
21
+ cast,
22
+ get_args,
23
+ )
24
+ from urllib.parse import urlparse
25
+
26
+ import requests
27
+ from aiohttp import ClientSession
28
+ from PIL.Image import Resampling
29
+ from PIL.Image import open as image_open
30
+ from pydantic import BaseModel
31
+
32
+ if TYPE_CHECKING:
33
+ from openai.types.chat.chat_completion_content_part_image_param import ChatCompletionContentPartImageParam
34
+
35
+ logger = getLogger(__name__)
36
+ ImageType: TypeAlias = Literal["jpeg", "jpg", "png", "gif", "webp", "bmp"]
37
+
38
+
39
+ class ImageProcessingConfig(TypedDict):
40
+ """
41
+ 이미지 필터링/변환 사용할 설정.
42
+ - formats: (Sequence[str]) 허용할 이미지 포맷(소문자, 예: ["jpeg", "png", "webp"]).
43
+ - max_size_mb: (float) 이미지 용량 상한(MB). 초과 시 제외.
44
+ - min_largest_side: (int) 가로나 세로 중 가장 큰 변의 최소 크기. 미만 시 제외.
45
+ - resize_if_min_side_exceeds: (int) 가로나 세로 중 작은 변이 이 값 이상이면 리스케일.
46
+ - resize_target_for_min_side: (int) 리스케일시, '가장 작은 변'을 이 값으로 줄임(비율 유지는 Lanczos).
47
+ """
48
+
49
+ formats: Sequence[ImageType]
50
+ max_size_mb: NotRequired[float]
51
+ min_largest_side: NotRequired[int]
52
+ resize_if_min_side_exceeds: NotRequired[int]
53
+ resize_target_for_min_side: NotRequired[int]
54
+
55
+
56
+ def get_default_image_processing_config() -> ImageProcessingConfig:
57
+ return {
58
+ "max_size_mb": 5,
59
+ "min_largest_side": 200,
60
+ "resize_if_min_side_exceeds": 2000,
61
+ "resize_target_for_min_side": 1000,
62
+ "formats": ["png", "jpeg", "jpg", "gif", "bmp", "webp"],
63
+ }
64
+
65
+
66
+ # image_url: str, headers: dict[str, str]) -> Optional[bytes]:
67
+ class Base64Image(BaseModel):
68
+ ext: ImageType
69
+ data: str
70
+
71
+ IMAGE_TYPES: ClassVar[tuple[str, ...]] = tuple(map(str, get_args(ImageType)))
72
+ IMAGE_PATTERN: ClassVar[re.Pattern[str]] = re.compile(
73
+ r"data:image/(" + "|".join(IMAGE_TYPES) + r");base64,([A-Za-z0-9+/]+={0,2})"
74
+ )
75
+
76
+ def __hash__(self) -> int:
77
+ return hash((self.ext, self.data))
78
+
79
+ def model_post_init(self, __context: object) -> None:
80
+ if self.ext == "jpg":
81
+ self.ext = "jpeg"
82
+
83
+ @classmethod
84
+ def from_string(cls, data: str) -> Optional[Self]:
85
+ match = cls.IMAGE_PATTERN.fullmatch(data)
86
+ if not match:
87
+ return None
88
+ return cls(ext=cast(ImageType, match.group(1)), data=match.group(2))
89
+
90
+ @classmethod
91
+ def from_bytes(cls, data: bytes, ext: ImageType) -> Self:
92
+ return cls(ext=ext, data=b64encode(data).decode("utf-8"))
93
+
94
+ @classmethod
95
+ def from_url_or_path(
96
+ cls,
97
+ url_or_path: str,
98
+ *,
99
+ headers: dict[str, str] = {},
100
+ config: ImageProcessingConfig = get_default_image_processing_config(),
101
+ img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], bytes]] = None,
102
+ ) -> Optional[Self]:
103
+ """Return a Base64Image instance from a URL or local file path."""
104
+ if maybe_base64 := cls.from_string(url_or_path):
105
+ return maybe_base64
106
+ elif is_remote_url(url_or_path):
107
+ if img_bytes_fetcher:
108
+ img_bytes = img_bytes_fetcher(url_or_path, headers)
109
+ else:
110
+ img_bytes = cls._fetch_remote_image(url_or_path, headers)
111
+ if not img_bytes:
112
+ return None
113
+ return cls._convert_image_into_base64(img_bytes, config)
114
+ try:
115
+ return cls._process_local_image(Path(url_or_path), config)
116
+ except Exception:
117
+ return None
118
+
119
+ @classmethod
120
+ async def afrom_url_or_path(
121
+ cls,
122
+ url_or_path: str,
123
+ *,
124
+ headers: dict[str, str] = {},
125
+ config: ImageProcessingConfig = get_default_image_processing_config(),
126
+ img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], Awaitable[bytes]]] = None,
127
+ ) -> Optional[Self]:
128
+ """Return a Base64Image instance from a URL or local file path."""
129
+ if maybe_base64 := cls.from_string(url_or_path):
130
+ return maybe_base64
131
+ elif is_remote_url(url_or_path):
132
+ if img_bytes_fetcher:
133
+ img_bytes = await img_bytes_fetcher(url_or_path, headers)
134
+ else:
135
+ img_bytes = await cls._afetch_remote_image(url_or_path, headers)
136
+ if not img_bytes:
137
+ return None
138
+ return cls._convert_image_into_base64(img_bytes, config)
139
+ try:
140
+ return cls._process_local_image(Path(url_or_path), config)
141
+ except Exception:
142
+ return None
143
+
144
+ @property
145
+ def data_uri(self) -> str:
146
+ return f"data:image/{self.ext.replace('jpg', 'jpeg')};base64,{self.data}"
147
+
148
+ @property
149
+ def data_uri_content(self) -> "ChatCompletionContentPartImageParam":
150
+ return {"type": "image_url", "image_url": {"url": self.data_uri}}
151
+
152
+ @property
153
+ def data_uri_content_dict(self) -> dict[str, object]:
154
+ return {"type": "image_url", "image_url": {"url": self.data_uri}}
155
+
156
+ @staticmethod
157
+ def _verify_ext(ext: str, allowed_types: Sequence[ImageType]) -> TypeGuard[ImageType]:
158
+ return ext in allowed_types
159
+
160
+ @classmethod
161
+ def _fetch_remote_image(cls, url: str, headers: dict[str, str]) -> bytes:
162
+ try:
163
+ with requests.Session() as session:
164
+ response = session.get(url.strip(), headers={k: str(v) for k, v in headers.items()})
165
+ response.raise_for_status()
166
+ image_bytes = bytes(response.content or b"")
167
+ if not image_bytes:
168
+ return b""
169
+ return image_bytes
170
+ except Exception:
171
+ return b""
172
+
173
+ @classmethod
174
+ async def _afetch_remote_image(cls, url: str, headers: dict[str, str]) -> bytes:
175
+ try:
176
+ async with ClientSession() as session:
177
+ async with session.get(url.strip(), headers={k: str(v) for k, v in headers.items()}) as response:
178
+ response.raise_for_status()
179
+ return await response.read()
180
+ except Exception:
181
+ return b""
182
+
183
+ @classmethod
184
+ def _convert_image_into_base64(cls, image_data: bytes, config: Optional[ImageProcessingConfig]) -> Optional[Self]:
185
+ """
186
+ Retrieve an image in bytes and return a base64-encoded data URL,
187
+ applying dynamic rules from 'config'.
188
+ """
189
+
190
+ if not config:
191
+ # config 없으면 그냥 기존 헤더만 보고 돌려주는 간단 로직
192
+ return cls._simple_base64_encode(image_data)
193
+
194
+ # 1) 용량 검사
195
+ max_size_mb = config.get("max_size_mb", float("inf"))
196
+ image_size_mb = len(image_data) / (1024 * 1024)
197
+ if image_size_mb > max_size_mb:
198
+ logger.error(f"Image too large: {image_size_mb:.2f} MB > {max_size_mb} MB")
199
+ return None
200
+
201
+ # 2) Pillow로 이미지 열기
202
+ try:
203
+ with image_open(BytesIO(image_data)) as im:
204
+ w, h = im.size
205
+ # 가장
206
+ largest_side = max(w, h)
207
+ # 가장 작은 변
208
+ smallest_side = min(w, h)
209
+
210
+ # min_largest_side 기준
211
+ min_largest_side = config.get("min_largest_side", 1)
212
+ if largest_side < min_largest_side:
213
+ logger.error(f"Image too small: {largest_side} < {min_largest_side}")
214
+ return None
215
+
216
+ # resize 로직
217
+ resize_if_min_side_exceeds = config.get("resize_if_min_side_exceeds", float("inf"))
218
+ if smallest_side >= resize_if_min_side_exceeds:
219
+ # resize_target_for_min_side 축소
220
+ resize_target = config.get("resize_target_for_min_side", 1000)
221
+ ratio = resize_target / float(smallest_side)
222
+ new_w = int(w * ratio)
223
+ new_h = int(h * ratio)
224
+ im = im.resize((new_w, new_h), Resampling.LANCZOS)
225
+
226
+ # 포맷 제한
227
+ # PIL이 인식한 포맷이 대문자(JPEG)일 수 있으므로 소문자로
228
+ pil_format: str = (im.format or "").lower()
229
+ allowed_formats: Sequence[ImageType] = config.get("formats", [])
230
+ if not cls._verify_ext(pil_format, allowed_formats):
231
+ logger.error(f"Invalid format: {pil_format} not in {allowed_formats}")
232
+ return None
233
+
234
+ # 다시 bytes 로 저장
235
+ output_buffer = BytesIO()
236
+ im.save(output_buffer, format=pil_format.upper()) # PIL에 맞춰서 대문자로
237
+ output_buffer.seek(0)
238
+ final_bytes = output_buffer.read()
239
+
240
+ except Exception:
241
+ return None
242
+
243
+ # 최종 base64 인코딩
244
+ encoded_data = b64encode(final_bytes).decode("utf-8")
245
+ return cls(ext=pil_format, data=encoded_data)
246
+
247
+ @classmethod
248
+ def _simple_base64_encode(cls, image_data: bytes) -> Optional[Self]:
249
+ """
250
+ Retrieve an image URL and return a base64-encoded data URL.
251
+ """
252
+ ext = detect_image_type(image_data)
253
+ if not ext:
254
+ return
255
+ return cls(ext=ext, data=b64encode(image_data).decode("utf-8"))
256
+
257
+ @classmethod
258
+ def _process_local_image(cls, path: Path, config: ImageProcessingConfig) -> Optional[Self]:
259
+ """로컬 파일이 존재하고 유효한 이미지 포맷이면 Base64 데이터 URL을 반환, 아니면 None."""
260
+ if not path.is_file():
261
+ return None
262
+ ext = path.suffix.lower().removeprefix(".")
263
+ if not cls._verify_ext(ext, config["formats"]):
264
+ return None
265
+ return cls(ext=ext, data=b64encode(path.read_bytes()).decode("ascii"))
266
+
267
+
268
+ def is_remote_url(path: str) -> bool:
269
+ parsed = urlparse(path)
270
+ return bool(parsed.scheme and parsed.netloc)
271
+
272
+
273
+ def detect_image_type(image_data: bytes) -> Optional[ImageType]:
274
+ """
275
+ Detect the image format based on the image binary signature (header).
276
+ Only JPEG, PNG, GIF, WEBP, and BMP are handled as examples.
277
+ If the format is not recognized, return None.
278
+ """
279
+ # JPEG: 시작 바이트가 FF D8 FF
280
+ if image_data.startswith(b"\xff\xd8\xff"):
281
+ return "jpeg"
282
+ # PNG: 시작 바이트가 89 50 4E 47 0D 0A 1A 0A
283
+ elif image_data.startswith(b"\x89PNG\r\n\x1a\n"):
284
+ return "png"
285
+ # GIF: 시작 바이트가 GIF87a 또는 GIF89a
286
+ elif image_data.startswith(b"GIF87a") or image_data.startswith(b"GIF89a"):
287
+ return "gif"
288
+ # WEBP: 시작 바이트가 RIFF....WEBP
289
+ elif image_data.startswith(b"RIFF") and image_data[8:12] == b"WEBP":
290
+ return "webp"
291
+ # BMP: 시작 바이트가 BM
292
+ elif image_data.startswith(b"BM"):
293
+ return "bmp"