chatterer 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,10 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import os.path
4
4
  import re
5
- from base64 import b64encode
6
- from io import BytesIO
7
5
  from pathlib import Path
8
- from traceback import print_exc
9
6
  from typing import (
10
7
  ClassVar,
11
8
  Literal,
@@ -24,14 +21,11 @@ from urllib.parse import urljoin, urlparse
24
21
 
25
22
  import mistune
26
23
  import playwright.sync_api
27
- import requests
28
- from aiohttp import ClientSession
29
- from PIL.Image import Resampling
30
- from PIL.Image import open as image_open
31
24
  from pydantic import BaseModel, Field
32
25
 
26
+ from ...utils.image import Base64Image, ImageProcessingConfig
27
+
33
28
 
34
- # Define a Pydantic model for the selected line ranges returned by the LLM.
35
29
  class SelectedLineRanges(BaseModel):
36
30
  line_ranges: list[str] = Field(description="List of inclusive line ranges, e.g., ['1-3', '5-5', '7-10']")
37
31
 
@@ -68,33 +62,6 @@ def get_default_playwright_launch_options() -> PlaywrightLaunchOptions:
68
62
  return {"headless": True}
69
63
 
70
64
 
71
- class ImageProcessingConfig(TypedDict):
72
- """
73
- 이미지 필터링/변환 시 사용할 설정.
74
- - formats: (Sequence[str]) 허용할 이미지 포맷(소문자, 예: ["jpeg", "png", "webp"]).
75
- - max_size_mb: (float) 이미지 용량 상한(MB). 초과 시 제외.
76
- - min_largest_side: (int) 가로나 세로 중 가장 큰 변의 최소 크기. 미만 시 제외.
77
- - resize_if_min_side_exceeds: (int) 가로나 세로 중 작은 변이 이 값 이상이면 리스케일.
78
- - resize_target_for_min_side: (int) 리스케일시, '가장 작은 변'을 이 값으로 줄임(비율 유지는 Lanczos).
79
- """
80
-
81
- formats: Sequence[str]
82
- max_size_mb: NotRequired[float]
83
- min_largest_side: NotRequired[int]
84
- resize_if_min_side_exceeds: NotRequired[int]
85
- resize_target_for_min_side: NotRequired[int]
86
-
87
-
88
- def get_default_image_processing_config() -> ImageProcessingConfig:
89
- return {
90
- "max_size_mb": 5,
91
- "min_largest_side": 200,
92
- "resize_if_min_side_exceeds": 2000,
93
- "resize_target_for_min_side": 1000,
94
- "formats": ["png", "jpg", "jpeg", "gif", "bmp", "webp"],
95
- }
96
-
97
-
98
65
  class _TrackingInlineState(mistune.InlineState):
99
66
  meta_offset: int = 0 # Where in the original text does self.src start?
100
67
 
@@ -261,15 +228,6 @@ def _extract_text(tokens: list[dict[str, object]]) -> str:
261
228
  return "".join(parts)
262
229
 
263
230
 
264
- def _is_url(path: str) -> bool:
265
- """
266
- path가 절대 URL 형태인지 여부를 bool로 반환
267
- (scheme과 netloc이 모두 존재하면 URL로 간주)
268
- """
269
- parsed = urlparse(path)
270
- return bool(parsed.scheme and parsed.netloc)
271
-
272
-
273
231
  def _to_absolute_path(path: str, referer: str) -> str:
274
232
  """
275
233
  path : 변환할 경로(상대/절대 경로 혹은 URL일 수도 있음)
@@ -308,88 +266,16 @@ def _to_absolute_path(path: str, referer: str) -> str:
308
266
  # =======================
309
267
 
310
268
 
311
- def _get_image_bytes(image_url: str, headers: dict[str, str]) -> Optional[bytes]:
312
- try:
313
- with requests.Session() as session:
314
- response = session.get(image_url, headers={k: str(v) for k, v in headers.items()})
315
- if not response.ok:
316
- return
317
- return bytes(response.content or b"")
318
- except Exception:
319
- return
320
-
321
-
322
- async def _aget_image_bytes(image_url: str, headers: dict[str, str]) -> Optional[bytes]:
323
- try:
324
- async with ClientSession() as session:
325
- async with session.get(image_url, headers={k: str(v) for k, v in headers.items()}) as response:
326
- if not response.ok:
327
- return
328
- return await response.read()
329
- except Exception:
330
- return
331
-
332
-
333
- # =======================
334
-
335
-
336
- def _fetch_remote_image(url: str, headers: dict[str, str], config: ImageProcessingConfig) -> Optional[str]:
337
- image_bytes = _get_image_bytes(image_url=url.strip(), headers=headers)
338
- if not image_bytes:
339
- return None
340
- return _convert_image_into_base64(image_bytes, config)
341
-
342
-
343
- async def _afetch_remote_image(url: str, headers: dict[str, str], config: ImageProcessingConfig) -> Optional[str]:
344
- image_bytes = await _aget_image_bytes(image_url=url.strip(), headers=headers)
345
- if not image_bytes:
346
- return None
347
- return _convert_image_into_base64(image_bytes, config)
348
-
349
-
350
- # =======================
351
-
352
-
353
- def _process_markdown_image(
354
- markdown_link: MarkdownLink, headers: dict[str, str], config: ImageProcessingConfig
355
- ) -> Optional[str]:
356
- """마크다운 이미지 패턴에 매칭된 하나의 이미지를 처리해 Base64 URL을 반환(동기)."""
357
- if markdown_link.type != "image":
358
- return
359
- url: str = markdown_link.url
360
- if url.startswith("data:image/"):
361
- return url
362
- elif _is_url(url):
363
- return _fetch_remote_image(url, headers, config)
364
- return _process_local_image(Path(url), config)
365
-
366
-
367
- async def _aprocess_markdown_image(
368
- markdown_link: MarkdownLink, headers: dict[str, str], config: ImageProcessingConfig
369
- ) -> Optional[str]:
370
- """마크다운 이미지 패턴에 매칭된 하나의 이미지를 처리해 Base64 URL을 반환(비동기)."""
371
- if markdown_link.type != "image":
372
- return
373
- url: str = markdown_link.url
374
- if url.startswith("data:image/"):
375
- return url
376
- elif _is_url(url):
377
- return await _afetch_remote_image(url, headers, config)
378
- return _process_local_image(Path(url), config)
379
-
380
-
381
- # =======================
382
-
383
-
384
269
  def get_image_url_and_markdown_links(
385
270
  markdown_text: str, headers: dict[str, str], config: ImageProcessingConfig
386
- ) -> dict[Optional[str], list[MarkdownLink]]:
387
- image_matches: dict[Optional[str], list[MarkdownLink]] = {}
271
+ ) -> dict[Optional[Base64Image], list[MarkdownLink]]:
272
+ image_matches: dict[Optional[Base64Image], list[MarkdownLink]] = {}
388
273
  for markdown_link in MarkdownLink.from_markdown(markdown_text=markdown_text, referer_url=headers.get("Referer")):
389
274
  if markdown_link.type == "link":
390
275
  image_matches.setdefault(None, []).append(markdown_link)
391
276
  continue
392
- image_data = _process_markdown_image(markdown_link, headers, config)
277
+
278
+ image_data = Base64Image.from_url_or_path(markdown_link.url, headers=headers, config=config)
393
279
  if not image_data:
394
280
  continue
395
281
  image_matches.setdefault(image_data, []).append(markdown_link)
@@ -398,134 +284,21 @@ def get_image_url_and_markdown_links(
398
284
 
399
285
  async def aget_image_url_and_markdown_links(
400
286
  markdown_text: str, headers: dict[str, str], config: ImageProcessingConfig
401
- ) -> dict[Optional[str], list[MarkdownLink]]:
402
- image_matches: dict[Optional[str], list[MarkdownLink]] = {}
287
+ ) -> dict[Optional[Base64Image], list[MarkdownLink]]:
288
+ image_matches: dict[Optional[Base64Image], list[MarkdownLink]] = {}
403
289
  for markdown_link in MarkdownLink.from_markdown(markdown_text=markdown_text, referer_url=headers.get("Referer")):
404
290
  if markdown_link.type == "link":
405
291
  image_matches.setdefault(None, []).append(markdown_link)
406
292
  continue
407
- image_data = await _aprocess_markdown_image(markdown_link, headers, config)
293
+ image_data = await Base64Image.from_url_or_path(
294
+ markdown_link.url, headers=headers, config=config, return_coro=True
295
+ )
408
296
  if not image_data:
409
297
  continue
410
298
  image_matches.setdefault(image_data, []).append(markdown_link)
411
299
  return image_matches
412
300
 
413
301
 
414
- # =======================
415
-
416
-
417
- def _simple_base64_encode(image_data: bytes) -> Optional[str]:
418
- """
419
- Retrieve an image URL and return a base64-encoded data URL.
420
- """
421
- image_type = _detect_image_type(image_data)
422
- if not image_type:
423
- return
424
- encoded_data = b64encode(image_data).decode("utf-8")
425
- return f"data:image/{image_type};base64,{encoded_data}"
426
-
427
-
428
- def _convert_image_into_base64(image_data: bytes, config: Optional[ImageProcessingConfig]) -> Optional[str]:
429
- """
430
- Retrieve an image in bytes and return a base64-encoded data URL,
431
- applying dynamic rules from 'config'.
432
- """
433
- if not config:
434
- # config 없으면 그냥 기존 헤더만 보고 돌려주는 간단 로직
435
- return _simple_base64_encode(image_data)
436
-
437
- # 1) 용량 검사
438
- max_size_mb = config.get("max_size_mb", float("inf"))
439
- image_size_mb = len(image_data) / (1024 * 1024)
440
- if image_size_mb > max_size_mb:
441
- print(f"Image too large: {image_size_mb:.2f} MB > {max_size_mb} MB")
442
- return None
443
-
444
- # 2) Pillow로 이미지 열기
445
- try:
446
- with image_open(BytesIO(image_data)) as im:
447
- w, h = im.size
448
- # 가장 큰 변
449
- largest_side = max(w, h)
450
- # 가장 작은 변
451
- smallest_side = min(w, h)
452
-
453
- # min_largest_side 기준
454
- min_largest_side = config.get("min_largest_side", 1)
455
- if largest_side < min_largest_side:
456
- print(f"Image too small: {largest_side} < {min_largest_side}")
457
- return None
458
-
459
- # resize 로직
460
- resize_if_min_side_exceeds = config.get("resize_if_min_side_exceeds", float("inf"))
461
- if smallest_side >= resize_if_min_side_exceeds:
462
- # resize_target_for_min_side 로 축소
463
- resize_target = config.get("resize_target_for_min_side", 1000)
464
- ratio = resize_target / float(smallest_side)
465
- new_w = int(w * ratio)
466
- new_h = int(h * ratio)
467
- im = im.resize((new_w, new_h), Resampling.LANCZOS)
468
-
469
- # 포맷 제한
470
- # PIL이 인식한 포맷이 대문자(JPEG)일 수 있으므로 소문자로
471
- pil_format = (im.format or "").lower()
472
- allowed_formats = config.get("formats", [])
473
- if pil_format not in allowed_formats:
474
- print(f"Invalid format: {pil_format} not in {allowed_formats}")
475
- return None
476
-
477
- # JPG -> JPEG 로 포맷명 정리
478
- if pil_format == "jpg":
479
- pil_format = "jpeg"
480
-
481
- # 다시 bytes 로 저장
482
- output_buffer = BytesIO()
483
- im.save(output_buffer, format=pil_format.upper()) # PIL에 맞춰서 대문자로
484
- output_buffer.seek(0)
485
- final_bytes = output_buffer.read()
486
-
487
- except Exception:
488
- print_exc()
489
- return None
490
-
491
- # 최종 base64 인코딩
492
- encoded_data = b64encode(final_bytes).decode("utf-8")
493
- return f"data:image/{pil_format};base64,{encoded_data}"
494
-
495
-
496
- def _detect_image_type(image_data: bytes) -> Optional[str]:
497
- """
498
- Detect the image format based on the image binary signature (header).
499
- Only JPEG, PNG, GIF, WEBP, and BMP are handled as examples.
500
- If the format is not recognized, return None.
501
- """
502
- # JPEG: 시작 바이트가 FF D8 FF
503
- if image_data.startswith(b"\xff\xd8\xff"):
504
- return "jpeg"
505
- # PNG: 시작 바이트가 89 50 4E 47 0D 0A 1A 0A
506
- elif image_data.startswith(b"\x89PNG\r\n\x1a\n"):
507
- return "png"
508
- # GIF: 시작 바이트가 GIF87a 또는 GIF89a
509
- elif image_data.startswith(b"GIF87a") or image_data.startswith(b"GIF89a"):
510
- return "gif"
511
- # WEBP: 시작 바이트가 RIFF....WEBP
512
- elif image_data.startswith(b"RIFF") and image_data[8:12] == b"WEBP":
513
- return "webp"
514
- # BMP: 시작 바이트가 BM
515
- elif image_data.startswith(b"BM"):
516
- return "bmp"
517
-
518
-
519
- def _process_local_image(path: Path, config: ImageProcessingConfig) -> Optional[str]:
520
- """로컬 파일이 존재하고 유효한 이미지 포맷이면 Base64 데이터 URL을 반환, 아니면 None."""
521
- if not path.is_file():
522
- return None
523
- lowered_suffix = path.suffix.lower()
524
- if not lowered_suffix or (lowered_suffix_without_dot := lowered_suffix[1:]) not in config["formats"]:
525
- return None
526
- return f"data:image/{lowered_suffix_without_dot};base64,{path.read_bytes().hex()}"
527
-
528
-
529
302
  def replace_images(
530
303
  markdown_text: str, image_description_and_references: ImageDescriptionAndReferences, description_format: str
531
304
  ) -> str:
@@ -0,0 +1,288 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from base64 import b64encode
5
+ from io import BytesIO
6
+ from pathlib import Path
7
+ from traceback import print_exc
8
+ from typing import (
9
+ Awaitable,
10
+ ClassVar,
11
+ Literal,
12
+ NotRequired,
13
+ Optional,
14
+ Self,
15
+ Sequence,
16
+ TypeAlias,
17
+ TypedDict,
18
+ TypeGuard,
19
+ cast,
20
+ get_args,
21
+ overload,
22
+ )
23
+ from urllib.parse import urlparse
24
+
25
+ import requests
26
+ from aiohttp import ClientSession
27
+ from PIL.Image import Resampling
28
+ from PIL.Image import open as image_open
29
+ from pydantic import BaseModel
30
+
31
+ ImageType: TypeAlias = Literal["jpeg", "jpg", "png", "gif", "webp", "bmp"]
32
+
33
+
34
+ class ImageProcessingConfig(TypedDict):
35
+ """
36
+ 이미지 필터링/변환 시 사용할 설정.
37
+ - formats: (Sequence[str]) 허용할 이미지 포맷(소문자, 예: ["jpeg", "png", "webp"]).
38
+ - max_size_mb: (float) 이미지 용량 상한(MB). 초과 시 제외.
39
+ - min_largest_side: (int) 가로나 세로 중 가장 큰 변의 최소 크기. 미만 시 제외.
40
+ - resize_if_min_side_exceeds: (int) 가로나 세로 중 작은 변이 이 값 이상이면 리스케일.
41
+ - resize_target_for_min_side: (int) 리스케일시, '가장 작은 변'을 이 값으로 줄임(비율 유지는 Lanczos).
42
+ """
43
+
44
+ formats: Sequence[ImageType]
45
+ max_size_mb: NotRequired[float]
46
+ min_largest_side: NotRequired[int]
47
+ resize_if_min_side_exceeds: NotRequired[int]
48
+ resize_target_for_min_side: NotRequired[int]
49
+
50
+
51
+ def get_default_image_processing_config() -> ImageProcessingConfig:
52
+ return {
53
+ "max_size_mb": 5,
54
+ "min_largest_side": 200,
55
+ "resize_if_min_side_exceeds": 2000,
56
+ "resize_target_for_min_side": 1000,
57
+ "formats": ["png", "jpeg", "gif", "bmp", "webp"],
58
+ }
59
+
60
+
61
+ class Base64Image(BaseModel):
62
+ ext: ImageType
63
+ data: str
64
+
65
+ IMAGE_TYPES: ClassVar[tuple[str, ...]] = tuple(map(str, get_args(ImageType)))
66
+ IMAGE_PATTERN: ClassVar[re.Pattern[str]] = re.compile(
67
+ rf"data:image/({'|'.join(IMAGE_TYPES)});base64,[A-Za-z0-9+/]+={0, 2}$"
68
+ )
69
+
70
+ def __hash__(self) -> int:
71
+ return hash((self.ext, self.data))
72
+
73
+ def model_post_init(self, __context: object) -> None:
74
+ if self.ext == "jpg":
75
+ self.ext = "jpeg"
76
+
77
+ @classmethod
78
+ def from_string(cls, data: str) -> Optional[Self]:
79
+ match = cls.IMAGE_PATTERN.fullmatch(data)
80
+ if not match:
81
+ return None
82
+ return cls(ext=cast(ImageType, match.group(1)), data=match.group(2))
83
+
84
+ @classmethod
85
+ def from_bytes(cls, data: bytes, ext: ImageType) -> Self:
86
+ return cls(ext=ext, data=b64encode(data).decode("utf-8"))
87
+
88
+ @overload
89
+ @classmethod
90
+ def from_url_or_path(
91
+ cls,
92
+ url_or_path: str,
93
+ *,
94
+ headers: dict[str, str] = ...,
95
+ config: ImageProcessingConfig = ...,
96
+ return_coro: Literal[True],
97
+ ) -> Awaitable[Optional[Self]]: ...
98
+
99
+ @overload
100
+ @classmethod
101
+ def from_url_or_path(
102
+ cls,
103
+ url_or_path: str,
104
+ *,
105
+ headers: dict[str, str] = ...,
106
+ config: ImageProcessingConfig = ...,
107
+ return_coro: Literal[False] = False,
108
+ ) -> Optional[Self]: ...
109
+
110
+ @classmethod
111
+ def from_url_or_path(
112
+ cls,
113
+ url_or_path: str,
114
+ *,
115
+ headers: dict[str, str] = {},
116
+ config: ImageProcessingConfig = get_default_image_processing_config(),
117
+ return_coro: bool = False,
118
+ ) -> Optional[Self] | Awaitable[Optional[Self]]:
119
+ """Return a Base64Image instance from a URL or local file path."""
120
+ if maybe_base64 := cls.from_string(url_or_path):
121
+ return maybe_base64
122
+ elif _is_remote_url(url_or_path):
123
+ if return_coro:
124
+ return cls._afetch_remote_image(url_or_path, headers, config)
125
+ return cls._fetch_remote_image(url_or_path, headers, config)
126
+ return cls._process_local_image(Path(url_or_path), config)
127
+
128
+ @property
129
+ def data_uri(self) -> str:
130
+ return f"data:image/{self.ext.replace('jpg', 'jpeg')};base64,{self.data}"
131
+
132
+ @property
133
+ def data_uri_content(self) -> dict[Literal["type", "image_url"], Literal["image_url"] | dict[Literal["url"], str]]:
134
+ return {"type": "image_url", "image_url": {"url": self.data_uri}}
135
+
136
+ @staticmethod
137
+ def _verify_ext(ext: str, allowed_types: Sequence[ImageType]) -> TypeGuard[ImageType]:
138
+ return ext in allowed_types
139
+
140
+ @classmethod
141
+ def _fetch_remote_image(cls, url: str, headers: dict[str, str], config: ImageProcessingConfig) -> Optional[Self]:
142
+ image_bytes = _get_image_bytes(image_url=url.strip(), headers=headers)
143
+ if not image_bytes:
144
+ return None
145
+ return cls._convert_image_into_base64(image_bytes, config)
146
+
147
+ @classmethod
148
+ async def _afetch_remote_image(
149
+ cls, url: str, headers: dict[str, str], config: ImageProcessingConfig
150
+ ) -> Optional[Self]:
151
+ image_bytes = await _aget_image_bytes(image_url=url.strip(), headers=headers)
152
+ if not image_bytes:
153
+ return None
154
+ return cls._convert_image_into_base64(image_bytes, config)
155
+
156
+ @classmethod
157
+ def _convert_image_into_base64(cls, image_data: bytes, config: Optional[ImageProcessingConfig]) -> Optional[Self]:
158
+ """
159
+ Retrieve an image in bytes and return a base64-encoded data URL,
160
+ applying dynamic rules from 'config'.
161
+ """
162
+ if not config:
163
+ # config 없으면 그냥 기존 헤더만 보고 돌려주는 간단 로직
164
+ return cls._simple_base64_encode(image_data)
165
+
166
+ # 1) 용량 검사
167
+ max_size_mb = config.get("max_size_mb", float("inf"))
168
+ image_size_mb = len(image_data) / (1024 * 1024)
169
+ if image_size_mb > max_size_mb:
170
+ print(f"Image too large: {image_size_mb:.2f} MB > {max_size_mb} MB")
171
+ return None
172
+
173
+ # 2) Pillow로 이미지 열기
174
+ try:
175
+ with image_open(BytesIO(image_data)) as im:
176
+ w, h = im.size
177
+ # 가장 큰 변
178
+ largest_side = max(w, h)
179
+ # 가장 작은 변
180
+ smallest_side = min(w, h)
181
+
182
+ # min_largest_side 기준
183
+ min_largest_side = config.get("min_largest_side", 1)
184
+ if largest_side < min_largest_side:
185
+ print(f"Image too small: {largest_side} < {min_largest_side}")
186
+ return None
187
+
188
+ # resize 로직
189
+ resize_if_min_side_exceeds = config.get("resize_if_min_side_exceeds", float("inf"))
190
+ if smallest_side >= resize_if_min_side_exceeds:
191
+ # resize_target_for_min_side 로 축소
192
+ resize_target = config.get("resize_target_for_min_side", 1000)
193
+ ratio = resize_target / float(smallest_side)
194
+ new_w = int(w * ratio)
195
+ new_h = int(h * ratio)
196
+ im = im.resize((new_w, new_h), Resampling.LANCZOS)
197
+
198
+ # 포맷 제한
199
+ # PIL이 인식한 포맷이 대문자(JPEG)일 수 있으므로 소문자로
200
+ pil_format: str = (im.format or "").lower()
201
+ allowed_formats: Sequence[ImageType] = config.get("formats", [])
202
+ if not cls._verify_ext(pil_format, allowed_formats):
203
+ print(f"Invalid format: {pil_format} not in {allowed_formats}")
204
+ return None
205
+
206
+ # 다시 bytes 로 저장
207
+ output_buffer = BytesIO()
208
+ im.save(output_buffer, format=pil_format.upper()) # PIL에 맞춰서 대문자로
209
+ output_buffer.seek(0)
210
+ final_bytes = output_buffer.read()
211
+
212
+ except Exception:
213
+ print_exc()
214
+ return None
215
+
216
+ # 최종 base64 인코딩
217
+ encoded_data = b64encode(final_bytes).decode("utf-8")
218
+ return cls(ext=pil_format, data=encoded_data)
219
+
220
+ @classmethod
221
+ def _simple_base64_encode(cls, image_data: bytes) -> Optional[Self]:
222
+ """
223
+ Retrieve an image URL and return a base64-encoded data URL.
224
+ """
225
+ ext = _detect_image_type(image_data)
226
+ if not ext:
227
+ return
228
+ return cls(ext=ext, data=b64encode(image_data).decode("utf-8"))
229
+
230
+ @classmethod
231
+ def _process_local_image(cls, path: Path, config: ImageProcessingConfig) -> Optional[Self]:
232
+ """로컬 파일이 존재하고 유효한 이미지 포맷이면 Base64 데이터 URL을 반환, 아니면 None."""
233
+ if not path.is_file():
234
+ return None
235
+ ext = path.suffix.lower().removeprefix(".")
236
+ if not cls._verify_ext(ext, config["formats"]):
237
+ return None
238
+ return cls(ext=ext, data=b64encode(path.read_bytes()).decode("ascii"))
239
+
240
+
241
+ def _is_remote_url(path: str) -> bool:
242
+ parsed = urlparse(path)
243
+ return bool(parsed.scheme and parsed.netloc)
244
+
245
+
246
+ def _detect_image_type(image_data: bytes) -> Optional[ImageType]:
247
+ """
248
+ Detect the image format based on the image binary signature (header).
249
+ Only JPEG, PNG, GIF, WEBP, and BMP are handled as examples.
250
+ If the format is not recognized, return None.
251
+ """
252
+ # JPEG: 시작 바이트가 FF D8 FF
253
+ if image_data.startswith(b"\xff\xd8\xff"):
254
+ return "jpeg"
255
+ # PNG: 시작 바이트가 89 50 4E 47 0D 0A 1A 0A
256
+ elif image_data.startswith(b"\x89PNG\r\n\x1a\n"):
257
+ return "png"
258
+ # GIF: 시작 바이트가 GIF87a 또는 GIF89a
259
+ elif image_data.startswith(b"GIF87a") or image_data.startswith(b"GIF89a"):
260
+ return "gif"
261
+ # WEBP: 시작 바이트가 RIFF....WEBP
262
+ elif image_data.startswith(b"RIFF") and image_data[8:12] == b"WEBP":
263
+ return "webp"
264
+ # BMP: 시작 바이트가 BM
265
+ elif image_data.startswith(b"BM"):
266
+ return "bmp"
267
+
268
+
269
+ def _get_image_bytes(image_url: str, headers: dict[str, str]) -> Optional[bytes]:
270
+ try:
271
+ with requests.Session() as session:
272
+ response = session.get(image_url, headers={k: str(v) for k, v in headers.items()})
273
+ if not response.ok:
274
+ return
275
+ return bytes(response.content or b"")
276
+ except Exception:
277
+ return
278
+
279
+
280
+ async def _aget_image_bytes(image_url: str, headers: dict[str, str]) -> Optional[bytes]:
281
+ try:
282
+ async with ClientSession() as session:
283
+ async with session.get(image_url, headers={k: str(v) for k, v in headers.items()}) as response:
284
+ if not response.ok:
285
+ return
286
+ return await response.read()
287
+ except Exception:
288
+ return
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: chatterer
3
- Version: 0.1.6
3
+ Version: 0.1.8
4
4
  Summary: The highest-level interface for various LLM APIs.
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
@@ -0,0 +1,24 @@
1
+ chatterer/__init__.py,sha256=kl8VWiDJIt5IQjaBpQu13n0GrzP3qzaNXyA68B1xHTE,802
2
+ chatterer/language_model.py,sha256=S8x2IbzZBi1mAKSKrGuoB4-gfKBz73RCNXt_H-fiDzc,13826
3
+ chatterer/messages.py,sha256=-NyOIK7wJI1uVD8qaJPeLA0LqirFEsZ1mOYoO1F2wLc,188
4
+ chatterer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ chatterer/strategies/__init__.py,sha256=SdOggbmHpw4f7Njwy-T8q64e91OLOUp1k0a0ozZd4qI,221
6
+ chatterer/strategies/atom_of_thoughts.py,sha256=CygOCLu5vLk-fzY9O-iE3qLShfjD7iY40ks9jH4ULBM,40872
7
+ chatterer/strategies/base.py,sha256=b2gMPqodp97OP1dkHfj0UqixjdjVhmTw_V5qJ7i2S6g,427
8
+ chatterer/tools/__init__.py,sha256=yA4RcHIAO33xsmWXQTmtSm9bk1p80yJKSadtMa3X-aY,415
9
+ chatterer/tools/convert_to_text.py,sha256=kBqxCJ0IoiAw2eiPYqep_SPZm-TtYKF7mdACLsWQUuI,15915
10
+ chatterer/tools/citation_chunking/__init__.py,sha256=gG7Fnkkp28UpcWMbfMY_4gqzZSZ8QzlhalHBoeoq7K0,82
11
+ chatterer/tools/citation_chunking/chunks.py,sha256=50Dpa43RaYftlNox8tM1qI8htZ3_AJ9Uyyn02WsmxYk,2173
12
+ chatterer/tools/citation_chunking/citation_chunker.py,sha256=yx5O9pUkowlNcFyyNf7f3sbq7-CV8AXOzFnviDldPR8,4894
13
+ chatterer/tools/citation_chunking/citations.py,sha256=RWVJA38yvlER9PhLDPZnqaRsbQ334W4FDQXBqGpdi08,12593
14
+ chatterer/tools/citation_chunking/prompt.py,sha256=S0Z6v8R23_Vknt3qYyjoDE1_gBsb0fCEx7LIw7BFXmA,7714
15
+ chatterer/tools/citation_chunking/reference.py,sha256=uRKufkU41Zedz6MQUCy-aCk4Rwxg94m4b332zKDpXAs,919
16
+ chatterer/tools/citation_chunking/utils.py,sha256=M4pH2-UIE1VLzQLXDqjEe4L3Xcy0e0KhAP3I2U2BNms,6348
17
+ chatterer/tools/webpage_to_markdown/__init__.py,sha256=bHH4qfnXyw8Zz-yBPLaTezF1sh9njvNBJmhBVtcpjsA,123
18
+ chatterer/tools/webpage_to_markdown/playwright_bot.py,sha256=yP0KixYZNQ4Kn_ZCFDI3mVyBD_DpUGfqgklpaGJUTCU,27496
19
+ chatterer/tools/webpage_to_markdown/utils.py,sha256=ZLUU94imYciEdynD2K7Dmcsbt8BVQTaOP56Ba6DAFvk,12593
20
+ chatterer/utils/image.py,sha256=F3_D1677UDFlgp-UQBS_ChkNODzf_VOfjYNSUi02MaI,10852
21
+ chatterer-0.1.8.dist-info/METADATA,sha256=01CGNp0oae5VdHM5gzqPKYFtlSqufE0h5XFMdn2E_6c,4234
22
+ chatterer-0.1.8.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
23
+ chatterer-0.1.8.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
24
+ chatterer-0.1.8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.0.0)
2
+ Generator: setuptools (77.0.3)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,15 +0,0 @@
1
- chatterer/__init__.py,sha256=9mpj_kaqaGPvaAng2Ol1fzWftYvIs4y97v9umXWPWJg,572
2
- chatterer/language_model.py,sha256=dHHjXPje9FjRpEA0cV1G9LJwkiOSfMyVMOTXvrTo69A,12275
3
- chatterer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- chatterer/strategies/__init__.py,sha256=SdOggbmHpw4f7Njwy-T8q64e91OLOUp1k0a0ozZd4qI,221
5
- chatterer/strategies/atom_of_thoughts.py,sha256=S_j4R26Drr8qJjAA9HYtWRF1_F4_ldo11_A2Z1ObYOI,40936
6
- chatterer/strategies/base.py,sha256=b2gMPqodp97OP1dkHfj0UqixjdjVhmTw_V5qJ7i2S6g,427
7
- chatterer/tools/__init__.py,sha256=YJc0xaDGwcdHvVlz-xFRjzrek2Q8icxc4Xsq2nOQXQA,341
8
- chatterer/tools/convert_to_text.py,sha256=5bOlo9hkUJtJhrB5hmEl4VM-_3Qoh3c2CxUrGmVOoQ4,16188
9
- chatterer/tools/webpage_to_markdown/__init__.py,sha256=bHH4qfnXyw8Zz-yBPLaTezF1sh9njvNBJmhBVtcpjsA,123
10
- chatterer/tools/webpage_to_markdown/playwright_bot.py,sha256=9k5e4jy6QrekEg8J7ZuJ_E_akDHyZ6yQI-AhTaclEfc,26687
11
- chatterer/tools/webpage_to_markdown/utils.py,sha256=0_LnrU7WLM0TbEXcNSND9xEDW4geS0OTCpQszHq68zE,21282
12
- chatterer-0.1.6.dist-info/METADATA,sha256=O-5dA1okHEZJanGLLKwDoZR04aQDSj7_1MNjykVbcws,4234
13
- chatterer-0.1.6.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
14
- chatterer-0.1.6.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
15
- chatterer-0.1.6.dist-info/RECORD,,