chatterer 0.1.18__py3-none-any.whl → 0.1.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. chatterer/__init__.py +93 -93
  2. chatterer/common_types/__init__.py +21 -21
  3. chatterer/common_types/io.py +19 -19
  4. chatterer/examples/__init__.py +0 -0
  5. chatterer/examples/anything_to_markdown.py +85 -91
  6. chatterer/examples/get_code_snippets.py +55 -62
  7. chatterer/examples/login_with_playwright.py +156 -167
  8. chatterer/examples/make_ppt.py +488 -497
  9. chatterer/examples/pdf_to_markdown.py +100 -107
  10. chatterer/examples/pdf_to_text.py +54 -56
  11. chatterer/examples/transcription_api.py +112 -123
  12. chatterer/examples/upstage_parser.py +89 -100
  13. chatterer/examples/webpage_to_markdown.py +70 -79
  14. chatterer/interactive.py +354 -354
  15. chatterer/language_model.py +533 -533
  16. chatterer/messages.py +21 -21
  17. chatterer/strategies/__init__.py +13 -13
  18. chatterer/strategies/atom_of_thoughts.py +975 -975
  19. chatterer/strategies/base.py +14 -14
  20. chatterer/tools/__init__.py +46 -46
  21. chatterer/tools/caption_markdown_images.py +384 -384
  22. chatterer/tools/citation_chunking/__init__.py +3 -3
  23. chatterer/tools/citation_chunking/chunks.py +53 -53
  24. chatterer/tools/citation_chunking/citation_chunker.py +118 -118
  25. chatterer/tools/citation_chunking/citations.py +285 -285
  26. chatterer/tools/citation_chunking/prompt.py +157 -157
  27. chatterer/tools/citation_chunking/reference.py +26 -26
  28. chatterer/tools/citation_chunking/utils.py +138 -138
  29. chatterer/tools/convert_pdf_to_markdown.py +393 -302
  30. chatterer/tools/convert_to_text.py +446 -447
  31. chatterer/tools/upstage_document_parser.py +705 -705
  32. chatterer/tools/webpage_to_markdown.py +739 -739
  33. chatterer/tools/youtube.py +146 -146
  34. chatterer/utils/__init__.py +15 -15
  35. chatterer/utils/base64_image.py +285 -285
  36. chatterer/utils/bytesio.py +59 -59
  37. chatterer/utils/code_agent.py +237 -237
  38. chatterer/utils/imghdr.py +148 -148
  39. {chatterer-0.1.18.dist-info → chatterer-0.1.20.dist-info}/METADATA +392 -392
  40. chatterer-0.1.20.dist-info/RECORD +44 -0
  41. {chatterer-0.1.18.dist-info → chatterer-0.1.20.dist-info}/WHEEL +1 -1
  42. chatterer-0.1.20.dist-info/entry_points.txt +10 -0
  43. chatterer-0.1.18.dist-info/RECORD +0 -42
  44. {chatterer-0.1.18.dist-info → chatterer-0.1.20.dist-info}/top_level.txt +0 -0
@@ -1,384 +1,384 @@
1
- import os.path
2
- import re
3
- from asyncio import gather
4
- from traceback import format_exception_only, print_exc
5
- from typing import (
6
- Awaitable,
7
- Callable,
8
- ClassVar,
9
- Literal,
10
- NamedTuple,
11
- NewType,
12
- Optional,
13
- Self,
14
- TypeGuard,
15
- cast,
16
- )
17
- from urllib.parse import urljoin, urlparse
18
-
19
- from chatterer.language_model import Chatterer
20
-
21
- from ..utils.base64_image import Base64Image, ImageProcessingConfig
22
-
23
-
24
- class MarkdownLink(NamedTuple):
25
- type: Literal["link", "image"]
26
- url: str
27
- text: str
28
- title: Optional[str]
29
- pos: int
30
- end_pos: int
31
-
32
- @classmethod
33
- def from_markdown(cls, markdown_text: str, referer_url: Optional[str]) -> list[Self]:
34
- """
35
- The main function that returns the list of MarkdownLink for the input text.
36
- For simplicity, we do a "pure inline parse" of the entire text
37
- instead of letting the block parser break it up. That ensures that
38
- link tokens cover the global positions of the entire input.
39
- """
40
-
41
- from mistune import InlineParser, InlineState, Markdown
42
-
43
- class _TrackingInlineState(InlineState):
44
- meta_offset: int = 0 # Where in the original text does self.src start?
45
-
46
- def copy(self) -> Self:
47
- new_state = self.__class__(self.env)
48
- new_state.src = self.src
49
- new_state.tokens = []
50
- new_state.in_image = self.in_image
51
- new_state.in_link = self.in_link
52
- new_state.in_emphasis = self.in_emphasis
53
- new_state.in_strong = self.in_strong
54
- new_state.meta_offset = self.meta_offset
55
- return new_state
56
-
57
- class _TrackingInlineParser(InlineParser):
58
- state_cls: ClassVar = _TrackingInlineState
59
-
60
- def parse_link( # pyright: ignore[reportIncompatibleMethodOverride]
61
- self, m: re.Match[str], state: _TrackingInlineState
62
- ) -> Optional[int]:
63
- """
64
- Mistune calls parse_link with a match object for the link syntax
65
- and the current inline state. If we successfully parse the link,
66
- super().parse_link(...) returns the new position *within self.src*.
67
- We add that to state.meta_offset for the global position.
68
-
69
- Because parse_link in mistune might return None or an int, we only
70
- record positions if we get an int back (meaning success).
71
- """
72
- offset = state.meta_offset
73
- new_pos: int | None = super().parse_link(m, state)
74
- if new_pos is not None:
75
- # We have successfully parsed a link.
76
- # The link token we just added should be the last token in state.tokens:
77
- if state.tokens:
78
- token = state.tokens[-1]
79
- # The local end is new_pos in the substring.
80
- # So the global start/end in the *original* text is offset + local positions.
81
- token["global_pos"] = (offset + m.start(), offset + new_pos)
82
- return new_pos
83
-
84
- md = Markdown(inline=_TrackingInlineParser())
85
- # Create an inline state that references the full text.
86
- state = _TrackingInlineState({})
87
- state.src = markdown_text
88
-
89
- # Instead of calling md.parse, we can directly run the inline parser on
90
- # the entire text, so that positions match the entire input:
91
- md.inline.parse(state)
92
-
93
- # Now gather all the link info from the tokens.
94
- return cls._extract_links(tokens=state.tokens, referer_url=referer_url)
95
-
96
- @property
97
- def inline_text(self) -> str:
98
- return self.text.replace("\n", " ").strip()
99
-
100
- @property
101
- def inline_title(self) -> str:
102
- return self.title.replace("\n", " ").strip() if self.title else ""
103
-
104
- @property
105
- def link_markdown(self) -> str:
106
- if self.title:
107
- return f'[{self.inline_text}]({self.url} "{self.inline_title}")'
108
- return f"[{self.inline_text}]({self.url})"
109
-
110
- @classmethod
111
- def replace(cls, text: str, replacements: list[tuple[Self, str]]) -> str:
112
- for self, replacement in sorted(replacements, key=lambda x: x[0].pos, reverse=True):
113
- text = text[: self.pos] + replacement + text[self.end_pos :]
114
- return text
115
-
116
- @classmethod
117
- def _extract_links(cls, tokens: list[dict[str, object]], referer_url: Optional[str]) -> list[Self]:
118
- results: list[Self] = []
119
- for token in tokens:
120
- if (
121
- (type := token.get("type")) in ("link", "image")
122
- and "global_pos" in token
123
- and "attrs" in token
124
- and _attrs_typeguard(attrs := token["attrs"])
125
- and "url" in attrs
126
- and _url_typeguard(url := attrs["url"])
127
- and _global_pos_typeguard(global_pos := token["global_pos"])
128
- ):
129
- if referer_url:
130
- url = _to_absolute_path(path=url, referer=referer_url)
131
- children: object | None = token.get("children")
132
- if _children_typeguard(children):
133
- text = _extract_text(children)
134
- else:
135
- text = ""
136
-
137
- if "title" in attrs:
138
- title = str(attrs["title"])
139
- else:
140
- title = None
141
-
142
- start, end = global_pos
143
- results.append(cls(type, url, text, title, start, end))
144
- if "children" in token and _children_typeguard(children := token["children"]):
145
- results.extend(cls._extract_links(children, referer_url))
146
- return results
147
-
148
-
149
- ImageDataAndReferences = dict[Optional[str], list[MarkdownLink]]
150
- ImageDescriptionAndReferences = NewType("ImageDescriptionAndReferences", ImageDataAndReferences)
151
-
152
-
153
- def caption_markdown_images(
154
- markdown_text: str,
155
- headers: dict[str, str],
156
- image_processing_config: ImageProcessingConfig,
157
- description_format: str,
158
- image_description_instruction: str,
159
- chatterer: Chatterer,
160
- img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], bytes]] = None,
161
- ) -> str:
162
- """
163
- Replace image URLs in Markdown text with their alt text and generate descriptions using a language model.
164
- """
165
- image_url_and_markdown_links: dict[Optional[Base64Image], list[MarkdownLink]] = _get_image_url_and_markdown_links(
166
- markdown_text=markdown_text,
167
- headers=headers,
168
- config=image_processing_config,
169
- img_bytes_fetcher=img_bytes_fetcher,
170
- )
171
-
172
- image_description_and_references: ImageDescriptionAndReferences = ImageDescriptionAndReferences({})
173
- for image_url, markdown_links in image_url_and_markdown_links.items():
174
- if image_url is not None:
175
- try:
176
- image_summary: str = chatterer.describe_image(
177
- image_url=image_url.data_uri,
178
- instruction=image_description_instruction,
179
- )
180
- except Exception:
181
- print_exc()
182
- continue
183
- image_description_and_references[image_summary] = markdown_links
184
- else:
185
- image_description_and_references[None] = markdown_links
186
-
187
- return _replace_images(
188
- markdown_text=markdown_text,
189
- image_description_and_references=image_description_and_references,
190
- description_format=description_format,
191
- )
192
-
193
-
194
- async def acaption_markdown_images(
195
- markdown_text: str,
196
- headers: dict[str, str],
197
- image_processing_config: ImageProcessingConfig,
198
- description_format: str,
199
- image_description_instruction: str,
200
- chatterer: Chatterer,
201
- img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], Awaitable[bytes]]] = None,
202
- ) -> str:
203
- """
204
- Replace image URLs in Markdown text with their alt text and generate descriptions using a language model.
205
- """
206
- image_url_and_markdown_links: dict[
207
- Optional[Base64Image], list[MarkdownLink]
208
- ] = await _aget_image_url_and_markdown_links(
209
- markdown_text=markdown_text,
210
- headers=headers,
211
- config=image_processing_config,
212
- img_bytes_fetcher=img_bytes_fetcher,
213
- )
214
-
215
- async def dummy() -> None:
216
- pass
217
-
218
- def _handle_exception(e: Optional[str | BaseException]) -> TypeGuard[Optional[str]]:
219
- if isinstance(e, BaseException):
220
- print(format_exception_only(type(e), e))
221
- return False
222
- return True
223
-
224
- coros: list[Awaitable[Optional[str]]] = [
225
- chatterer.adescribe_image(image_url=image_url.data_uri, instruction=image_description_instruction)
226
- if image_url is not None
227
- else dummy()
228
- for image_url in image_url_and_markdown_links.keys()
229
- ]
230
-
231
- return _replace_images(
232
- markdown_text=markdown_text,
233
- image_description_and_references=ImageDescriptionAndReferences({
234
- image_summary: markdown_links
235
- for markdown_links, image_summary in zip(
236
- image_url_and_markdown_links.values(), await gather(*coros, return_exceptions=True)
237
- )
238
- if _handle_exception(image_summary)
239
- }),
240
- description_format=description_format,
241
- )
242
-
243
-
244
- # --------------------------------------------------------------------
245
- # Type Guards & Helper to gather plain text from nested tokens (for the link text).
246
- # --------------------------------------------------------------------
247
- def _children_typeguard(obj: object) -> TypeGuard[list[dict[str, object]]]:
248
- if not isinstance(obj, list):
249
- return False
250
- return all(isinstance(i, dict) for i in cast(list[object], obj))
251
-
252
-
253
- def _attrs_typeguard(obj: object) -> TypeGuard[dict[str, object]]:
254
- if not isinstance(obj, dict):
255
- return False
256
- return all(isinstance(k, str) for k in cast(dict[object, object], obj))
257
-
258
-
259
- def _global_pos_typeguard(obj: object) -> TypeGuard[tuple[int, int]]:
260
- if not isinstance(obj, tuple):
261
- return False
262
- obj = cast(tuple[object, ...], obj)
263
- if len(obj) != 2:
264
- return False
265
- return all(isinstance(i, int) for i in obj)
266
-
267
-
268
- def _url_typeguard(obj: object) -> TypeGuard[str]:
269
- return isinstance(obj, str)
270
-
271
-
272
- def _extract_text(tokens: list[dict[str, object]]) -> str:
273
- parts: list[str] = []
274
- for t in tokens:
275
- if t.get("type") == "text":
276
- parts.append(str(t.get("raw", "")))
277
- elif "children" in t:
278
- children: object = t["children"]
279
- if not _children_typeguard(children):
280
- continue
281
- parts.append(_extract_text(children))
282
- return "".join(parts)
283
-
284
-
285
- def _to_absolute_path(path: str, referer: str) -> str:
286
- """
287
- path : 변환할 경로(상대/절대 경로 혹은 URL일 수도 있음)
288
- referer : 기준이 되는 절대경로(혹은 URL)
289
- """
290
- # referer가 URL인지 파일 경로인지 먼저 판별
291
- ref_parsed = urlparse(referer)
292
- is_referer_url = bool(ref_parsed.scheme and ref_parsed.netloc)
293
-
294
- if is_referer_url:
295
- # referer가 URL이라면,
296
- # 1) path 자체가 이미 절대 URL인지 확인
297
- parsed = urlparse(path)
298
- if parsed.scheme and parsed.netloc:
299
- # path가 이미 완전한 URL (예: http://, https:// 등)
300
- return path
301
- else:
302
- # 그렇지 않다면(슬래시로 시작 포함), urljoin을 써서 referer + path 로 합침
303
- return urljoin(referer, path)
304
- else:
305
- # referer가 로컬 경로라면,
306
- # path가 로컬 파일 시스템에서의 절대경로인지 판단
307
- if os.path.isabs(path):
308
- return path
309
- else:
310
- # 파일이면 referer의 디렉토리만 추출
311
- if not os.path.isdir(referer):
312
- referer_dir = os.path.dirname(referer)
313
- else:
314
- referer_dir = referer
315
-
316
- combined = os.path.join(referer_dir, path)
317
- return os.path.abspath(combined)
318
-
319
-
320
- def _get_image_url_and_markdown_links(
321
- markdown_text: str,
322
- headers: dict[str, str],
323
- config: ImageProcessingConfig,
324
- img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], bytes]] = None,
325
- ) -> dict[Optional[Base64Image], list[MarkdownLink]]:
326
- image_matches: dict[Optional[Base64Image], list[MarkdownLink]] = {}
327
- for markdown_link in MarkdownLink.from_markdown(markdown_text=markdown_text, referer_url=headers.get("Referer")):
328
- if markdown_link.type == "link":
329
- image_matches.setdefault(None, []).append(markdown_link)
330
- continue
331
-
332
- image_data = Base64Image.from_url_or_path(
333
- markdown_link.url, headers=headers, config=config, img_bytes_fetcher=img_bytes_fetcher
334
- )
335
- if not image_data:
336
- image_matches.setdefault(None, []).append(markdown_link)
337
- continue
338
- image_matches.setdefault(image_data, []).append(markdown_link)
339
- return image_matches
340
-
341
-
342
- async def _aget_image_url_and_markdown_links(
343
- markdown_text: str,
344
- headers: dict[str, str],
345
- config: ImageProcessingConfig,
346
- img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], Awaitable[bytes]]] = None,
347
- ) -> dict[Optional[Base64Image], list[MarkdownLink]]:
348
- image_matches: dict[Optional[Base64Image], list[MarkdownLink]] = {}
349
- for markdown_link in MarkdownLink.from_markdown(markdown_text=markdown_text, referer_url=headers.get("Referer")):
350
- if markdown_link.type == "link":
351
- image_matches.setdefault(None, []).append(markdown_link)
352
- continue
353
- image_data = await Base64Image.afrom_url_or_path(
354
- markdown_link.url, headers=headers, config=config, img_bytes_fetcher=img_bytes_fetcher
355
- )
356
- if not image_data:
357
- image_matches.setdefault(None, []).append(markdown_link)
358
- continue
359
- image_matches.setdefault(image_data, []).append(markdown_link)
360
- return image_matches
361
-
362
-
363
- def _replace_images(
364
- markdown_text: str, image_description_and_references: ImageDescriptionAndReferences, description_format: str
365
- ) -> str:
366
- replacements: list[tuple[MarkdownLink, str]] = []
367
- for image_description, markdown_links in image_description_and_references.items():
368
- for markdown_link in markdown_links:
369
- if image_description is None:
370
- if markdown_link.type == "link":
371
- replacements.append((markdown_link, markdown_link.link_markdown))
372
- elif markdown_link.type == "image":
373
- replacements.append((markdown_link, f"![{markdown_link.inline_text}](...)"))
374
- else:
375
- replacements.append((
376
- markdown_link,
377
- description_format.format(
378
- image_summary=image_description.replace("\n", " "),
379
- inline_text=markdown_link.inline_text,
380
- **markdown_link._asdict(),
381
- ),
382
- ))
383
-
384
- return MarkdownLink.replace(markdown_text, replacements)
1
+ import os.path
2
+ import re
3
+ from asyncio import gather
4
+ from traceback import format_exception_only, print_exc
5
+ from typing import (
6
+ Awaitable,
7
+ Callable,
8
+ ClassVar,
9
+ Literal,
10
+ NamedTuple,
11
+ NewType,
12
+ Optional,
13
+ Self,
14
+ TypeGuard,
15
+ cast,
16
+ )
17
+ from urllib.parse import urljoin, urlparse
18
+
19
+ from chatterer.language_model import Chatterer
20
+
21
+ from ..utils.base64_image import Base64Image, ImageProcessingConfig
22
+
23
+
24
+ class MarkdownLink(NamedTuple):
25
+ type: Literal["link", "image"]
26
+ url: str
27
+ text: str
28
+ title: Optional[str]
29
+ pos: int
30
+ end_pos: int
31
+
32
+ @classmethod
33
+ def from_markdown(cls, markdown_text: str, referer_url: Optional[str]) -> list[Self]:
34
+ """
35
+ The main function that returns the list of MarkdownLink for the input text.
36
+ For simplicity, we do a "pure inline parse" of the entire text
37
+ instead of letting the block parser break it up. That ensures that
38
+ link tokens cover the global positions of the entire input.
39
+ """
40
+
41
+ from mistune import InlineParser, InlineState, Markdown
42
+
43
+ class _TrackingInlineState(InlineState):
44
+ meta_offset: int = 0 # Where in the original text does self.src start?
45
+
46
+ def copy(self) -> Self:
47
+ new_state = self.__class__(self.env)
48
+ new_state.src = self.src
49
+ new_state.tokens = []
50
+ new_state.in_image = self.in_image
51
+ new_state.in_link = self.in_link
52
+ new_state.in_emphasis = self.in_emphasis
53
+ new_state.in_strong = self.in_strong
54
+ new_state.meta_offset = self.meta_offset
55
+ return new_state
56
+
57
+ class _TrackingInlineParser(InlineParser):
58
+ state_cls: ClassVar = _TrackingInlineState
59
+
60
+ def parse_link( # pyright: ignore[reportIncompatibleMethodOverride]
61
+ self, m: re.Match[str], state: _TrackingInlineState
62
+ ) -> Optional[int]:
63
+ """
64
+ Mistune calls parse_link with a match object for the link syntax
65
+ and the current inline state. If we successfully parse the link,
66
+ super().parse_link(...) returns the new position *within self.src*.
67
+ We add that to state.meta_offset for the global position.
68
+
69
+ Because parse_link in mistune might return None or an int, we only
70
+ record positions if we get an int back (meaning success).
71
+ """
72
+ offset = state.meta_offset
73
+ new_pos: int | None = super().parse_link(m, state)
74
+ if new_pos is not None:
75
+ # We have successfully parsed a link.
76
+ # The link token we just added should be the last token in state.tokens:
77
+ if state.tokens:
78
+ token = state.tokens[-1]
79
+ # The local end is new_pos in the substring.
80
+ # So the global start/end in the *original* text is offset + local positions.
81
+ token["global_pos"] = (offset + m.start(), offset + new_pos)
82
+ return new_pos
83
+
84
+ md = Markdown(inline=_TrackingInlineParser())
85
+ # Create an inline state that references the full text.
86
+ state = _TrackingInlineState({})
87
+ state.src = markdown_text
88
+
89
+ # Instead of calling md.parse, we can directly run the inline parser on
90
+ # the entire text, so that positions match the entire input:
91
+ md.inline.parse(state)
92
+
93
+ # Now gather all the link info from the tokens.
94
+ return cls._extract_links(tokens=state.tokens, referer_url=referer_url)
95
+
96
+ @property
97
+ def inline_text(self) -> str:
98
+ return self.text.replace("\n", " ").strip()
99
+
100
+ @property
101
+ def inline_title(self) -> str:
102
+ return self.title.replace("\n", " ").strip() if self.title else ""
103
+
104
+ @property
105
+ def link_markdown(self) -> str:
106
+ if self.title:
107
+ return f'[{self.inline_text}]({self.url} "{self.inline_title}")'
108
+ return f"[{self.inline_text}]({self.url})"
109
+
110
+ @classmethod
111
+ def replace(cls, text: str, replacements: list[tuple[Self, str]]) -> str:
112
+ for self, replacement in sorted(replacements, key=lambda x: x[0].pos, reverse=True):
113
+ text = text[: self.pos] + replacement + text[self.end_pos :]
114
+ return text
115
+
116
+ @classmethod
117
+ def _extract_links(cls, tokens: list[dict[str, object]], referer_url: Optional[str]) -> list[Self]:
118
+ results: list[Self] = []
119
+ for token in tokens:
120
+ if (
121
+ (type := token.get("type")) in ("link", "image")
122
+ and "global_pos" in token
123
+ and "attrs" in token
124
+ and _attrs_typeguard(attrs := token["attrs"])
125
+ and "url" in attrs
126
+ and _url_typeguard(url := attrs["url"])
127
+ and _global_pos_typeguard(global_pos := token["global_pos"])
128
+ ):
129
+ if referer_url:
130
+ url = _to_absolute_path(path=url, referer=referer_url)
131
+ children: object | None = token.get("children")
132
+ if _children_typeguard(children):
133
+ text = _extract_text(children)
134
+ else:
135
+ text = ""
136
+
137
+ if "title" in attrs:
138
+ title = str(attrs["title"])
139
+ else:
140
+ title = None
141
+
142
+ start, end = global_pos
143
+ results.append(cls(type, url, text, title, start, end))
144
+ if "children" in token and _children_typeguard(children := token["children"]):
145
+ results.extend(cls._extract_links(children, referer_url))
146
+ return results
147
+
148
+
149
+ ImageDataAndReferences = dict[Optional[str], list[MarkdownLink]]
150
+ ImageDescriptionAndReferences = NewType("ImageDescriptionAndReferences", ImageDataAndReferences)
151
+
152
+
153
+ def caption_markdown_images(
154
+ markdown_text: str,
155
+ headers: dict[str, str],
156
+ image_processing_config: ImageProcessingConfig,
157
+ description_format: str,
158
+ image_description_instruction: str,
159
+ chatterer: Chatterer,
160
+ img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], bytes]] = None,
161
+ ) -> str:
162
+ """
163
+ Replace image URLs in Markdown text with their alt text and generate descriptions using a language model.
164
+ """
165
+ image_url_and_markdown_links: dict[Optional[Base64Image], list[MarkdownLink]] = _get_image_url_and_markdown_links(
166
+ markdown_text=markdown_text,
167
+ headers=headers,
168
+ config=image_processing_config,
169
+ img_bytes_fetcher=img_bytes_fetcher,
170
+ )
171
+
172
+ image_description_and_references: ImageDescriptionAndReferences = ImageDescriptionAndReferences({})
173
+ for image_url, markdown_links in image_url_and_markdown_links.items():
174
+ if image_url is not None:
175
+ try:
176
+ image_summary: str = chatterer.describe_image(
177
+ image_url=image_url.data_uri,
178
+ instruction=image_description_instruction,
179
+ )
180
+ except Exception:
181
+ print_exc()
182
+ continue
183
+ image_description_and_references[image_summary] = markdown_links
184
+ else:
185
+ image_description_and_references[None] = markdown_links
186
+
187
+ return _replace_images(
188
+ markdown_text=markdown_text,
189
+ image_description_and_references=image_description_and_references,
190
+ description_format=description_format,
191
+ )
192
+
193
+
194
+ async def acaption_markdown_images(
195
+ markdown_text: str,
196
+ headers: dict[str, str],
197
+ image_processing_config: ImageProcessingConfig,
198
+ description_format: str,
199
+ image_description_instruction: str,
200
+ chatterer: Chatterer,
201
+ img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], Awaitable[bytes]]] = None,
202
+ ) -> str:
203
+ """
204
+ Replace image URLs in Markdown text with their alt text and generate descriptions using a language model.
205
+ """
206
+ image_url_and_markdown_links: dict[
207
+ Optional[Base64Image], list[MarkdownLink]
208
+ ] = await _aget_image_url_and_markdown_links(
209
+ markdown_text=markdown_text,
210
+ headers=headers,
211
+ config=image_processing_config,
212
+ img_bytes_fetcher=img_bytes_fetcher,
213
+ )
214
+
215
+ async def dummy() -> None:
216
+ pass
217
+
218
+ def _handle_exception(e: Optional[str | BaseException]) -> TypeGuard[Optional[str]]:
219
+ if isinstance(e, BaseException):
220
+ print(format_exception_only(type(e), e))
221
+ return False
222
+ return True
223
+
224
+ coros: list[Awaitable[Optional[str]]] = [
225
+ chatterer.adescribe_image(image_url=image_url.data_uri, instruction=image_description_instruction)
226
+ if image_url is not None
227
+ else dummy()
228
+ for image_url in image_url_and_markdown_links.keys()
229
+ ]
230
+
231
+ return _replace_images(
232
+ markdown_text=markdown_text,
233
+ image_description_and_references=ImageDescriptionAndReferences({
234
+ image_summary: markdown_links
235
+ for markdown_links, image_summary in zip(
236
+ image_url_and_markdown_links.values(), await gather(*coros, return_exceptions=True)
237
+ )
238
+ if _handle_exception(image_summary)
239
+ }),
240
+ description_format=description_format,
241
+ )
242
+
243
+
244
+ # --------------------------------------------------------------------
245
+ # Type Guards & Helper to gather plain text from nested tokens (for the link text).
246
+ # --------------------------------------------------------------------
247
+ def _children_typeguard(obj: object) -> TypeGuard[list[dict[str, object]]]:
248
+ if not isinstance(obj, list):
249
+ return False
250
+ return all(isinstance(i, dict) for i in cast(list[object], obj))
251
+
252
+
253
+ def _attrs_typeguard(obj: object) -> TypeGuard[dict[str, object]]:
254
+ if not isinstance(obj, dict):
255
+ return False
256
+ return all(isinstance(k, str) for k in cast(dict[object, object], obj))
257
+
258
+
259
+ def _global_pos_typeguard(obj: object) -> TypeGuard[tuple[int, int]]:
260
+ if not isinstance(obj, tuple):
261
+ return False
262
+ obj = cast(tuple[object, ...], obj)
263
+ if len(obj) != 2:
264
+ return False
265
+ return all(isinstance(i, int) for i in obj)
266
+
267
+
268
+ def _url_typeguard(obj: object) -> TypeGuard[str]:
269
+ return isinstance(obj, str)
270
+
271
+
272
+ def _extract_text(tokens: list[dict[str, object]]) -> str:
273
+ parts: list[str] = []
274
+ for t in tokens:
275
+ if t.get("type") == "text":
276
+ parts.append(str(t.get("raw", "")))
277
+ elif "children" in t:
278
+ children: object = t["children"]
279
+ if not _children_typeguard(children):
280
+ continue
281
+ parts.append(_extract_text(children))
282
+ return "".join(parts)
283
+
284
+
285
+ def _to_absolute_path(path: str, referer: str) -> str:
286
+ """
287
+ path : 변환할 경로(상대/절대 경로 혹은 URL일 수도 있음)
288
+ referer : 기준이 되는 절대경로(혹은 URL)
289
+ """
290
+ # referer가 URL인지 파일 경로인지 먼저 판별
291
+ ref_parsed = urlparse(referer)
292
+ is_referer_url = bool(ref_parsed.scheme and ref_parsed.netloc)
293
+
294
+ if is_referer_url:
295
+ # referer가 URL이라면,
296
+ # 1) path 자체가 이미 절대 URL인지 확인
297
+ parsed = urlparse(path)
298
+ if parsed.scheme and parsed.netloc:
299
+ # path가 이미 완전한 URL (예: http://, https:// 등)
300
+ return path
301
+ else:
302
+ # 그렇지 않다면(슬래시로 시작 포함), urljoin을 써서 referer + path 로 합침
303
+ return urljoin(referer, path)
304
+ else:
305
+ # referer가 로컬 경로라면,
306
+ # path가 로컬 파일 시스템에서의 절대경로인지 판단
307
+ if os.path.isabs(path):
308
+ return path
309
+ else:
310
+ # 파일이면 referer의 디렉토리만 추출
311
+ if not os.path.isdir(referer):
312
+ referer_dir = os.path.dirname(referer)
313
+ else:
314
+ referer_dir = referer
315
+
316
+ combined = os.path.join(referer_dir, path)
317
+ return os.path.abspath(combined)
318
+
319
+
320
+ def _get_image_url_and_markdown_links(
321
+ markdown_text: str,
322
+ headers: dict[str, str],
323
+ config: ImageProcessingConfig,
324
+ img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], bytes]] = None,
325
+ ) -> dict[Optional[Base64Image], list[MarkdownLink]]:
326
+ image_matches: dict[Optional[Base64Image], list[MarkdownLink]] = {}
327
+ for markdown_link in MarkdownLink.from_markdown(markdown_text=markdown_text, referer_url=headers.get("Referer")):
328
+ if markdown_link.type == "link":
329
+ image_matches.setdefault(None, []).append(markdown_link)
330
+ continue
331
+
332
+ image_data = Base64Image.from_url_or_path(
333
+ markdown_link.url, headers=headers, config=config, img_bytes_fetcher=img_bytes_fetcher
334
+ )
335
+ if not image_data:
336
+ image_matches.setdefault(None, []).append(markdown_link)
337
+ continue
338
+ image_matches.setdefault(image_data, []).append(markdown_link)
339
+ return image_matches
340
+
341
+
342
+ async def _aget_image_url_and_markdown_links(
343
+ markdown_text: str,
344
+ headers: dict[str, str],
345
+ config: ImageProcessingConfig,
346
+ img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], Awaitable[bytes]]] = None,
347
+ ) -> dict[Optional[Base64Image], list[MarkdownLink]]:
348
+ image_matches: dict[Optional[Base64Image], list[MarkdownLink]] = {}
349
+ for markdown_link in MarkdownLink.from_markdown(markdown_text=markdown_text, referer_url=headers.get("Referer")):
350
+ if markdown_link.type == "link":
351
+ image_matches.setdefault(None, []).append(markdown_link)
352
+ continue
353
+ image_data = await Base64Image.afrom_url_or_path(
354
+ markdown_link.url, headers=headers, config=config, img_bytes_fetcher=img_bytes_fetcher
355
+ )
356
+ if not image_data:
357
+ image_matches.setdefault(None, []).append(markdown_link)
358
+ continue
359
+ image_matches.setdefault(image_data, []).append(markdown_link)
360
+ return image_matches
361
+
362
+
363
+ def _replace_images(
364
+ markdown_text: str, image_description_and_references: ImageDescriptionAndReferences, description_format: str
365
+ ) -> str:
366
+ replacements: list[tuple[MarkdownLink, str]] = []
367
+ for image_description, markdown_links in image_description_and_references.items():
368
+ for markdown_link in markdown_links:
369
+ if image_description is None:
370
+ if markdown_link.type == "link":
371
+ replacements.append((markdown_link, markdown_link.link_markdown))
372
+ elif markdown_link.type == "image":
373
+ replacements.append((markdown_link, f"![{markdown_link.inline_text}](...)"))
374
+ else:
375
+ replacements.append((
376
+ markdown_link,
377
+ description_format.format(
378
+ image_summary=image_description.replace("\n", " "),
379
+ inline_text=markdown_link.inline_text,
380
+ **markdown_link._asdict(),
381
+ ),
382
+ ))
383
+
384
+ return MarkdownLink.replace(markdown_text, replacements)