chatterer 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. chatterer/__init__.py +62 -60
  2. chatterer/common_types/__init__.py +21 -0
  3. chatterer/common_types/io.py +19 -0
  4. chatterer/language_model.py +577 -577
  5. chatterer/messages.py +9 -9
  6. chatterer/strategies/__init__.py +13 -13
  7. chatterer/strategies/atom_of_thoughts.py +975 -975
  8. chatterer/strategies/base.py +14 -14
  9. chatterer/tools/__init__.py +35 -28
  10. chatterer/tools/citation_chunking/__init__.py +3 -3
  11. chatterer/tools/citation_chunking/chunks.py +53 -53
  12. chatterer/tools/citation_chunking/citation_chunker.py +118 -118
  13. chatterer/tools/citation_chunking/citations.py +285 -285
  14. chatterer/tools/citation_chunking/prompt.py +157 -157
  15. chatterer/tools/citation_chunking/reference.py +26 -26
  16. chatterer/tools/citation_chunking/utils.py +138 -138
  17. chatterer/tools/convert_to_text.py +418 -463
  18. chatterer/tools/upstage_document_parser.py +438 -0
  19. chatterer/tools/webpage_to_markdown/__init__.py +4 -4
  20. chatterer/tools/webpage_to_markdown/playwright_bot.py +649 -649
  21. chatterer/tools/webpage_to_markdown/utils.py +334 -334
  22. chatterer/tools/youtube.py +146 -146
  23. chatterer/utils/__init__.py +15 -15
  24. chatterer/utils/bytesio.py +59 -0
  25. chatterer/utils/code_agent.py +138 -138
  26. chatterer/utils/image.py +291 -291
  27. {chatterer-0.1.12.dist-info → chatterer-0.1.13.dist-info}/METADATA +171 -170
  28. chatterer-0.1.13.dist-info/RECORD +31 -0
  29. chatterer-0.1.12.dist-info/RECORD +0 -27
  30. {chatterer-0.1.12.dist-info → chatterer-0.1.13.dist-info}/WHEEL +0 -0
  31. {chatterer-0.1.12.dist-info → chatterer-0.1.13.dist-info}/top_level.txt +0 -0
chatterer/utils/image.py CHANGED
@@ -1,291 +1,291 @@
1
- from __future__ import annotations
2
-
3
- import re
4
- from base64 import b64encode
5
- from io import BytesIO
6
- from logging import getLogger
7
- from pathlib import Path
8
- from typing import (
9
- Awaitable,
10
- ClassVar,
11
- Literal,
12
- NotRequired,
13
- Optional,
14
- Self,
15
- Sequence,
16
- TypeAlias,
17
- TypedDict,
18
- TypeGuard,
19
- cast,
20
- get_args,
21
- overload,
22
- )
23
- from urllib.parse import urlparse
24
-
25
- import requests
26
- from aiohttp import ClientSession
27
- from PIL.Image import Resampling
28
- from PIL.Image import open as image_open
29
- from pydantic import BaseModel
30
-
31
- logger = getLogger(__name__)
32
- ImageType: TypeAlias = Literal["jpeg", "jpg", "png", "gif", "webp", "bmp"]
33
-
34
-
35
- class ImageProcessingConfig(TypedDict):
36
- """
37
- 이미지 필터링/변환 시 사용할 설정.
38
- - formats: (Sequence[str]) 허용할 이미지 포맷(소문자, 예: ["jpeg", "png", "webp"]).
39
- - max_size_mb: (float) 이미지 용량 상한(MB). 초과 시 제외.
40
- - min_largest_side: (int) 가로나 세로 중 가장 큰 변의 최소 크기. 미만 시 제외.
41
- - resize_if_min_side_exceeds: (int) 가로나 세로 중 작은 변이 이 값 이상이면 리스케일.
42
- - resize_target_for_min_side: (int) 리스케일시, '가장 작은 변'을 이 값으로 줄임(비율 유지는 Lanczos).
43
- """
44
-
45
- formats: Sequence[ImageType]
46
- max_size_mb: NotRequired[float]
47
- min_largest_side: NotRequired[int]
48
- resize_if_min_side_exceeds: NotRequired[int]
49
- resize_target_for_min_side: NotRequired[int]
50
-
51
-
52
- def get_default_image_processing_config() -> ImageProcessingConfig:
53
- return {
54
- "max_size_mb": 5,
55
- "min_largest_side": 200,
56
- "resize_if_min_side_exceeds": 2000,
57
- "resize_target_for_min_side": 1000,
58
- "formats": ["png", "jpeg", "gif", "bmp", "webp"],
59
- }
60
-
61
-
62
- class Base64Image(BaseModel):
63
- ext: ImageType
64
- data: str
65
-
66
- IMAGE_TYPES: ClassVar[tuple[str, ...]] = tuple(map(str, get_args(ImageType)))
67
- IMAGE_PATTERN: ClassVar[re.Pattern[str]] = re.compile(
68
- r"data:image/(" + "|".join(IMAGE_TYPES) + r");base64,([A-Za-z0-9+/]+={0,2})"
69
- )
70
-
71
- def __hash__(self) -> int:
72
- return hash((self.ext, self.data))
73
-
74
- def model_post_init(self, __context: object) -> None:
75
- if self.ext == "jpg":
76
- self.ext = "jpeg"
77
-
78
- @classmethod
79
- def from_string(cls, data: str) -> Optional[Self]:
80
- match = cls.IMAGE_PATTERN.fullmatch(data)
81
- if not match:
82
- return None
83
- return cls(ext=cast(ImageType, match.group(1)), data=match.group(2))
84
-
85
- @classmethod
86
- def from_bytes(cls, data: bytes, ext: ImageType) -> Self:
87
- return cls(ext=ext, data=b64encode(data).decode("utf-8"))
88
-
89
- @overload
90
- @classmethod
91
- def from_url_or_path(
92
- cls,
93
- url_or_path: str,
94
- *,
95
- headers: dict[str, str] = ...,
96
- config: ImageProcessingConfig = ...,
97
- return_coro: Literal[True],
98
- ) -> Awaitable[Optional[Self]]: ...
99
-
100
- @overload
101
- @classmethod
102
- def from_url_or_path(
103
- cls,
104
- url_or_path: str,
105
- *,
106
- headers: dict[str, str] = ...,
107
- config: ImageProcessingConfig = ...,
108
- return_coro: Literal[False] = False,
109
- ) -> Optional[Self]: ...
110
-
111
- @classmethod
112
- def from_url_or_path(
113
- cls,
114
- url_or_path: str,
115
- *,
116
- headers: dict[str, str] = {},
117
- config: ImageProcessingConfig = get_default_image_processing_config(),
118
- return_coro: bool = False,
119
- ) -> Optional[Self] | Awaitable[Optional[Self]]:
120
- """Return a Base64Image instance from a URL or local file path."""
121
- if maybe_base64 := cls.from_string(url_or_path):
122
- return maybe_base64
123
- elif _is_remote_url(url_or_path):
124
- if return_coro:
125
- return cls._afetch_remote_image(url_or_path, headers, config)
126
- return cls._fetch_remote_image(url_or_path, headers, config)
127
- try:
128
- return cls._process_local_image(Path(url_or_path), config)
129
- except Exception:
130
- return None
131
-
132
- @property
133
- def data_uri(self) -> str:
134
- return f"data:image/{self.ext.replace('jpg', 'jpeg')};base64,{self.data}"
135
-
136
- @property
137
- def data_uri_content(self) -> dict[Literal["type", "image_url"], Literal["image_url"] | dict[Literal["url"], str]]:
138
- return {"type": "image_url", "image_url": {"url": self.data_uri}}
139
-
140
- @staticmethod
141
- def _verify_ext(ext: str, allowed_types: Sequence[ImageType]) -> TypeGuard[ImageType]:
142
- return ext in allowed_types
143
-
144
- @classmethod
145
- def _fetch_remote_image(cls, url: str, headers: dict[str, str], config: ImageProcessingConfig) -> Optional[Self]:
146
- image_bytes = _get_image_bytes(image_url=url.strip(), headers=headers)
147
- if not image_bytes:
148
- return None
149
- return cls._convert_image_into_base64(image_bytes, config)
150
-
151
- @classmethod
152
- async def _afetch_remote_image(
153
- cls, url: str, headers: dict[str, str], config: ImageProcessingConfig
154
- ) -> Optional[Self]:
155
- image_bytes = await _aget_image_bytes(image_url=url.strip(), headers=headers)
156
- if not image_bytes:
157
- return None
158
- return cls._convert_image_into_base64(image_bytes, config)
159
-
160
- @classmethod
161
- def _convert_image_into_base64(cls, image_data: bytes, config: Optional[ImageProcessingConfig]) -> Optional[Self]:
162
- """
163
- Retrieve an image in bytes and return a base64-encoded data URL,
164
- applying dynamic rules from 'config'.
165
- """
166
- if not config:
167
- # config 없으면 그냥 기존 헤더만 보고 돌려주는 간단 로직
168
- return cls._simple_base64_encode(image_data)
169
-
170
- # 1) 용량 검사
171
- max_size_mb = config.get("max_size_mb", float("inf"))
172
- image_size_mb = len(image_data) / (1024 * 1024)
173
- if image_size_mb > max_size_mb:
174
- logger.error(f"Image too large: {image_size_mb:.2f} MB > {max_size_mb} MB")
175
- return None
176
-
177
- # 2) Pillow로 이미지 열기
178
- try:
179
- with image_open(BytesIO(image_data)) as im:
180
- w, h = im.size
181
- # 가장 큰 변
182
- largest_side = max(w, h)
183
- # 가장 작은 변
184
- smallest_side = min(w, h)
185
-
186
- # min_largest_side 기준
187
- min_largest_side = config.get("min_largest_side", 1)
188
- if largest_side < min_largest_side:
189
- logger.error(f"Image too small: {largest_side} < {min_largest_side}")
190
- return None
191
-
192
- # resize 로직
193
- resize_if_min_side_exceeds = config.get("resize_if_min_side_exceeds", float("inf"))
194
- if smallest_side >= resize_if_min_side_exceeds:
195
- # resize_target_for_min_side 로 축소
196
- resize_target = config.get("resize_target_for_min_side", 1000)
197
- ratio = resize_target / float(smallest_side)
198
- new_w = int(w * ratio)
199
- new_h = int(h * ratio)
200
- im = im.resize((new_w, new_h), Resampling.LANCZOS)
201
-
202
- # 포맷 제한
203
- # PIL이 인식한 포맷이 대문자(JPEG)일 수 있으므로 소문자로
204
- pil_format: str = (im.format or "").lower()
205
- allowed_formats: Sequence[ImageType] = config.get("formats", [])
206
- if not cls._verify_ext(pil_format, allowed_formats):
207
- logger.error(f"Invalid format: {pil_format} not in {allowed_formats}")
208
- return None
209
-
210
- # 다시 bytes 로 저장
211
- output_buffer = BytesIO()
212
- im.save(output_buffer, format=pil_format.upper()) # PIL에 맞춰서 대문자로
213
- output_buffer.seek(0)
214
- final_bytes = output_buffer.read()
215
-
216
- except Exception:
217
- return None
218
-
219
- # 최종 base64 인코딩
220
- encoded_data = b64encode(final_bytes).decode("utf-8")
221
- return cls(ext=pil_format, data=encoded_data)
222
-
223
- @classmethod
224
- def _simple_base64_encode(cls, image_data: bytes) -> Optional[Self]:
225
- """
226
- Retrieve an image URL and return a base64-encoded data URL.
227
- """
228
- ext = _detect_image_type(image_data)
229
- if not ext:
230
- return
231
- return cls(ext=ext, data=b64encode(image_data).decode("utf-8"))
232
-
233
- @classmethod
234
- def _process_local_image(cls, path: Path, config: ImageProcessingConfig) -> Optional[Self]:
235
- """로컬 파일이 존재하고 유효한 이미지 포맷이면 Base64 데이터 URL을 반환, 아니면 None."""
236
- if not path.is_file():
237
- return None
238
- ext = path.suffix.lower().removeprefix(".")
239
- if not cls._verify_ext(ext, config["formats"]):
240
- return None
241
- return cls(ext=ext, data=b64encode(path.read_bytes()).decode("ascii"))
242
-
243
-
244
- def _is_remote_url(path: str) -> bool:
245
- parsed = urlparse(path)
246
- return bool(parsed.scheme and parsed.netloc)
247
-
248
-
249
- def _detect_image_type(image_data: bytes) -> Optional[ImageType]:
250
- """
251
- Detect the image format based on the image binary signature (header).
252
- Only JPEG, PNG, GIF, WEBP, and BMP are handled as examples.
253
- If the format is not recognized, return None.
254
- """
255
- # JPEG: 시작 바이트가 FF D8 FF
256
- if image_data.startswith(b"\xff\xd8\xff"):
257
- return "jpeg"
258
- # PNG: 시작 바이트가 89 50 4E 47 0D 0A 1A 0A
259
- elif image_data.startswith(b"\x89PNG\r\n\x1a\n"):
260
- return "png"
261
- # GIF: 시작 바이트가 GIF87a 또는 GIF89a
262
- elif image_data.startswith(b"GIF87a") or image_data.startswith(b"GIF89a"):
263
- return "gif"
264
- # WEBP: 시작 바이트가 RIFF....WEBP
265
- elif image_data.startswith(b"RIFF") and image_data[8:12] == b"WEBP":
266
- return "webp"
267
- # BMP: 시작 바이트가 BM
268
- elif image_data.startswith(b"BM"):
269
- return "bmp"
270
-
271
-
272
- def _get_image_bytes(image_url: str, headers: dict[str, str]) -> Optional[bytes]:
273
- try:
274
- with requests.Session() as session:
275
- response = session.get(image_url, headers={k: str(v) for k, v in headers.items()})
276
- if not response.ok:
277
- return
278
- return bytes(response.content or b"")
279
- except Exception:
280
- return
281
-
282
-
283
- async def _aget_image_bytes(image_url: str, headers: dict[str, str]) -> Optional[bytes]:
284
- try:
285
- async with ClientSession() as session:
286
- async with session.get(image_url, headers={k: str(v) for k, v in headers.items()}) as response:
287
- if not response.ok:
288
- return
289
- return await response.read()
290
- except Exception:
291
- return
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from base64 import b64encode
5
+ from io import BytesIO
6
+ from logging import getLogger
7
+ from pathlib import Path
8
+ from typing import (
9
+ Awaitable,
10
+ ClassVar,
11
+ Literal,
12
+ NotRequired,
13
+ Optional,
14
+ Self,
15
+ Sequence,
16
+ TypeAlias,
17
+ TypedDict,
18
+ TypeGuard,
19
+ cast,
20
+ get_args,
21
+ overload,
22
+ )
23
+ from urllib.parse import urlparse
24
+
25
+ import requests
26
+ from aiohttp import ClientSession
27
+ from PIL.Image import Resampling
28
+ from PIL.Image import open as image_open
29
+ from pydantic import BaseModel
30
+
31
+ logger = getLogger(__name__)
32
+ ImageType: TypeAlias = Literal["jpeg", "jpg", "png", "gif", "webp", "bmp"]
33
+
34
+
35
+ class ImageProcessingConfig(TypedDict):
36
+ """
37
+ 이미지 필터링/변환 시 사용할 설정.
38
+ - formats: (Sequence[str]) 허용할 이미지 포맷(소문자, 예: ["jpeg", "png", "webp"]).
39
+ - max_size_mb: (float) 이미지 용량 상한(MB). 초과 시 제외.
40
+ - min_largest_side: (int) 가로나 세로 중 가장 큰 변의 최소 크기. 미만 시 제외.
41
+ - resize_if_min_side_exceeds: (int) 가로나 세로 중 작은 변이 이 값 이상이면 리스케일.
42
+ - resize_target_for_min_side: (int) 리스케일시, '가장 작은 변'을 이 값으로 줄임(비율 유지는 Lanczos).
43
+ """
44
+
45
+ formats: Sequence[ImageType]
46
+ max_size_mb: NotRequired[float]
47
+ min_largest_side: NotRequired[int]
48
+ resize_if_min_side_exceeds: NotRequired[int]
49
+ resize_target_for_min_side: NotRequired[int]
50
+
51
+
52
+ def get_default_image_processing_config() -> ImageProcessingConfig:
53
+ return {
54
+ "max_size_mb": 5,
55
+ "min_largest_side": 200,
56
+ "resize_if_min_side_exceeds": 2000,
57
+ "resize_target_for_min_side": 1000,
58
+ "formats": ["png", "jpeg", "gif", "bmp", "webp"],
59
+ }
60
+
61
+
62
+ class Base64Image(BaseModel):
63
+ ext: ImageType
64
+ data: str
65
+
66
+ IMAGE_TYPES: ClassVar[tuple[str, ...]] = tuple(map(str, get_args(ImageType)))
67
+ IMAGE_PATTERN: ClassVar[re.Pattern[str]] = re.compile(
68
+ r"data:image/(" + "|".join(IMAGE_TYPES) + r");base64,([A-Za-z0-9+/]+={0,2})"
69
+ )
70
+
71
+ def __hash__(self) -> int:
72
+ return hash((self.ext, self.data))
73
+
74
+ def model_post_init(self, __context: object) -> None:
75
+ if self.ext == "jpg":
76
+ self.ext = "jpeg"
77
+
78
+ @classmethod
79
+ def from_string(cls, data: str) -> Optional[Self]:
80
+ match = cls.IMAGE_PATTERN.fullmatch(data)
81
+ if not match:
82
+ return None
83
+ return cls(ext=cast(ImageType, match.group(1)), data=match.group(2))
84
+
85
+ @classmethod
86
+ def from_bytes(cls, data: bytes, ext: ImageType) -> Self:
87
+ return cls(ext=ext, data=b64encode(data).decode("utf-8"))
88
+
89
+ @overload
90
+ @classmethod
91
+ def from_url_or_path(
92
+ cls,
93
+ url_or_path: str,
94
+ *,
95
+ headers: dict[str, str] = ...,
96
+ config: ImageProcessingConfig = ...,
97
+ return_coro: Literal[True],
98
+ ) -> Awaitable[Optional[Self]]: ...
99
+
100
+ @overload
101
+ @classmethod
102
+ def from_url_or_path(
103
+ cls,
104
+ url_or_path: str,
105
+ *,
106
+ headers: dict[str, str] = ...,
107
+ config: ImageProcessingConfig = ...,
108
+ return_coro: Literal[False] = False,
109
+ ) -> Optional[Self]: ...
110
+
111
+ @classmethod
112
+ def from_url_or_path(
113
+ cls,
114
+ url_or_path: str,
115
+ *,
116
+ headers: dict[str, str] = {},
117
+ config: ImageProcessingConfig = get_default_image_processing_config(),
118
+ return_coro: bool = False,
119
+ ) -> Optional[Self] | Awaitable[Optional[Self]]:
120
+ """Return a Base64Image instance from a URL or local file path."""
121
+ if maybe_base64 := cls.from_string(url_or_path):
122
+ return maybe_base64
123
+ elif _is_remote_url(url_or_path):
124
+ if return_coro:
125
+ return cls._afetch_remote_image(url_or_path, headers, config)
126
+ return cls._fetch_remote_image(url_or_path, headers, config)
127
+ try:
128
+ return cls._process_local_image(Path(url_or_path), config)
129
+ except Exception:
130
+ return None
131
+
132
+ @property
133
+ def data_uri(self) -> str:
134
+ return f"data:image/{self.ext.replace('jpg', 'jpeg')};base64,{self.data}"
135
+
136
+ @property
137
+ def data_uri_content(self) -> dict[Literal["type", "image_url"], Literal["image_url"] | dict[Literal["url"], str]]:
138
+ return {"type": "image_url", "image_url": {"url": self.data_uri}}
139
+
140
+ @staticmethod
141
+ def _verify_ext(ext: str, allowed_types: Sequence[ImageType]) -> TypeGuard[ImageType]:
142
+ return ext in allowed_types
143
+
144
+ @classmethod
145
+ def _fetch_remote_image(cls, url: str, headers: dict[str, str], config: ImageProcessingConfig) -> Optional[Self]:
146
+ image_bytes = _get_image_bytes(image_url=url.strip(), headers=headers)
147
+ if not image_bytes:
148
+ return None
149
+ return cls._convert_image_into_base64(image_bytes, config)
150
+
151
+ @classmethod
152
+ async def _afetch_remote_image(
153
+ cls, url: str, headers: dict[str, str], config: ImageProcessingConfig
154
+ ) -> Optional[Self]:
155
+ image_bytes = await _aget_image_bytes(image_url=url.strip(), headers=headers)
156
+ if not image_bytes:
157
+ return None
158
+ return cls._convert_image_into_base64(image_bytes, config)
159
+
160
+ @classmethod
161
+ def _convert_image_into_base64(cls, image_data: bytes, config: Optional[ImageProcessingConfig]) -> Optional[Self]:
162
+ """
163
+ Retrieve an image in bytes and return a base64-encoded data URL,
164
+ applying dynamic rules from 'config'.
165
+ """
166
+ if not config:
167
+ # config 없으면 그냥 기존 헤더만 보고 돌려주는 간단 로직
168
+ return cls._simple_base64_encode(image_data)
169
+
170
+ # 1) 용량 검사
171
+ max_size_mb = config.get("max_size_mb", float("inf"))
172
+ image_size_mb = len(image_data) / (1024 * 1024)
173
+ if image_size_mb > max_size_mb:
174
+ logger.error(f"Image too large: {image_size_mb:.2f} MB > {max_size_mb} MB")
175
+ return None
176
+
177
+ # 2) Pillow로 이미지 열기
178
+ try:
179
+ with image_open(BytesIO(image_data)) as im:
180
+ w, h = im.size
181
+ # 가장 큰 변
182
+ largest_side = max(w, h)
183
+ # 가장 작은 변
184
+ smallest_side = min(w, h)
185
+
186
+ # min_largest_side 기준
187
+ min_largest_side = config.get("min_largest_side", 1)
188
+ if largest_side < min_largest_side:
189
+ logger.error(f"Image too small: {largest_side} < {min_largest_side}")
190
+ return None
191
+
192
+ # resize 로직
193
+ resize_if_min_side_exceeds = config.get("resize_if_min_side_exceeds", float("inf"))
194
+ if smallest_side >= resize_if_min_side_exceeds:
195
+ # resize_target_for_min_side 로 축소
196
+ resize_target = config.get("resize_target_for_min_side", 1000)
197
+ ratio = resize_target / float(smallest_side)
198
+ new_w = int(w * ratio)
199
+ new_h = int(h * ratio)
200
+ im = im.resize((new_w, new_h), Resampling.LANCZOS)
201
+
202
+ # 포맷 제한
203
+ # PIL이 인식한 포맷이 대문자(JPEG)일 수 있으므로 소문자로
204
+ pil_format: str = (im.format or "").lower()
205
+ allowed_formats: Sequence[ImageType] = config.get("formats", [])
206
+ if not cls._verify_ext(pil_format, allowed_formats):
207
+ logger.error(f"Invalid format: {pil_format} not in {allowed_formats}")
208
+ return None
209
+
210
+ # 다시 bytes 로 저장
211
+ output_buffer = BytesIO()
212
+ im.save(output_buffer, format=pil_format.upper()) # PIL에 맞춰서 대문자로
213
+ output_buffer.seek(0)
214
+ final_bytes = output_buffer.read()
215
+
216
+ except Exception:
217
+ return None
218
+
219
+ # 최종 base64 인코딩
220
+ encoded_data = b64encode(final_bytes).decode("utf-8")
221
+ return cls(ext=pil_format, data=encoded_data)
222
+
223
+ @classmethod
224
+ def _simple_base64_encode(cls, image_data: bytes) -> Optional[Self]:
225
+ """
226
+ Retrieve an image URL and return a base64-encoded data URL.
227
+ """
228
+ ext = _detect_image_type(image_data)
229
+ if not ext:
230
+ return
231
+ return cls(ext=ext, data=b64encode(image_data).decode("utf-8"))
232
+
233
+ @classmethod
234
+ def _process_local_image(cls, path: Path, config: ImageProcessingConfig) -> Optional[Self]:
235
+ """로컬 파일이 존재하고 유효한 이미지 포맷이면 Base64 데이터 URL을 반환, 아니면 None."""
236
+ if not path.is_file():
237
+ return None
238
+ ext = path.suffix.lower().removeprefix(".")
239
+ if not cls._verify_ext(ext, config["formats"]):
240
+ return None
241
+ return cls(ext=ext, data=b64encode(path.read_bytes()).decode("ascii"))
242
+
243
+
244
+ def _is_remote_url(path: str) -> bool:
245
+ parsed = urlparse(path)
246
+ return bool(parsed.scheme and parsed.netloc)
247
+
248
+
249
+ def _detect_image_type(image_data: bytes) -> Optional[ImageType]:
250
+ """
251
+ Detect the image format based on the image binary signature (header).
252
+ Only JPEG, PNG, GIF, WEBP, and BMP are handled as examples.
253
+ If the format is not recognized, return None.
254
+ """
255
+ # JPEG: 시작 바이트가 FF D8 FF
256
+ if image_data.startswith(b"\xff\xd8\xff"):
257
+ return "jpeg"
258
+ # PNG: 시작 바이트가 89 50 4E 47 0D 0A 1A 0A
259
+ elif image_data.startswith(b"\x89PNG\r\n\x1a\n"):
260
+ return "png"
261
+ # GIF: 시작 바이트가 GIF87a 또는 GIF89a
262
+ elif image_data.startswith(b"GIF87a") or image_data.startswith(b"GIF89a"):
263
+ return "gif"
264
+ # WEBP: 시작 바이트가 RIFF....WEBP
265
+ elif image_data.startswith(b"RIFF") and image_data[8:12] == b"WEBP":
266
+ return "webp"
267
+ # BMP: 시작 바이트가 BM
268
+ elif image_data.startswith(b"BM"):
269
+ return "bmp"
270
+
271
+
272
+ def _get_image_bytes(image_url: str, headers: dict[str, str]) -> Optional[bytes]:
273
+ try:
274
+ with requests.Session() as session:
275
+ response = session.get(image_url, headers={k: str(v) for k, v in headers.items()})
276
+ if not response.ok:
277
+ return
278
+ return bytes(response.content or b"")
279
+ except Exception:
280
+ return
281
+
282
+
283
+ async def _aget_image_bytes(image_url: str, headers: dict[str, str]) -> Optional[bytes]:
284
+ try:
285
+ async with ClientSession() as session:
286
+ async with session.get(image_url, headers={k: str(v) for k, v in headers.items()}) as response:
287
+ if not response.ok:
288
+ return
289
+ return await response.read()
290
+ except Exception:
291
+ return