PyPI - chatterer - Versions diffs - 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl - Mend

chatterer 0.1.12py3-none-any.whl → 0.1.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

chatterer/__init__.py +41 -4
chatterer/common_types/__init__.py +21 -0
chatterer/common_types/io.py +19 -0
chatterer/interactive.py +353 -0
chatterer/language_model.py +129 -252
chatterer/messages.py +13 -1
chatterer/tools/__init__.py +27 -9
chatterer/tools/{webpage_to_markdown/utils.py → caption_markdown_images.py} +158 -108
chatterer/tools/convert_pdf_to_markdown.py +302 -0
chatterer/tools/convert_to_text.py +49 -65
chatterer/tools/upstage_document_parser.py +705 -0
chatterer/tools/{webpage_to_markdown/playwright_bot.py → webpage_to_markdown.py} +197 -107
chatterer/tools/youtube.py +2 -1
chatterer/utils/__init__.py +4 -1
chatterer/utils/{image.py → base64_image.py} +56 -62
chatterer/utils/bytesio.py +59 -0
chatterer/utils/cli.py +476 -0
chatterer/utils/code_agent.py +137 -38
chatterer/utils/imghdr.py +148 -0
chatterer-0.1.14.dist-info/METADATA +387 -0
chatterer-0.1.14.dist-info/RECORD +34 -0
chatterer/tools/webpage_to_markdown/__init__.py +0 -4
chatterer-0.1.12.dist-info/METADATA +0 -170
chatterer-0.1.12.dist-info/RECORD +0 -27
{chatterer-0.1.12.dist-info → chatterer-0.1.14.dist-info}/WHEEL +0 -0
{chatterer-0.1.12.dist-info → chatterer-0.1.14.dist-info}/top_level.txt +0 -0

chatterer/tools/{webpage_to_markdown/playwright_bot.py → webpage_to_markdown.py} RENAMED Viewed

@@ -13,41 +13,77 @@ Use the synchronous methods (without the "a" prefix) in a normal context manager
 or use the asynchronous methods (prefixed with "a") within an async context manager.
 """
-import asyncio
+from __future__ import annotations
 from dataclasses import dataclass, field
-from traceback import format_exception_only, print_exc
+from pathlib import Path
 from types import TracebackType
 from typing import (
-    Awaitable,
+    TYPE_CHECKING,
+    Literal,
+    NotRequired,
     Optional,
     Self,
+    Sequence,
     Type,
-    TypeGuard,
+    TypeAlias,
+    TypedDict,
     Union,
 )
-import playwright.async_api
-import playwright.sync_api
-from ...language_model import DEFAULT_IMAGE_DESCRIPTION_INSTRUCTION, Chatterer
-from ...utils.image import Base64Image, get_default_image_processing_config
-from ..convert_to_text import HtmlToMarkdownOptions, get_default_html_to_markdown_options, html_to_markdown
-from .utils import (
-    DEFAULT_UA,
-    ImageDescriptionAndReferences,
-    ImageProcessingConfig,
-    MarkdownLink,
-    PlaywrightLaunchOptions,
-    PlaywrightPersistencyOptions,
-    SelectedLineRanges,
-    WaitUntil,
-    aget_image_url_and_markdown_links,
-    get_default_playwright_launch_options,
-    get_image_url_and_markdown_links,
-    replace_images,
+from pydantic import BaseModel, Field
+from ..language_model import DEFAULT_IMAGE_DESCRIPTION_INSTRUCTION, Chatterer
+from ..utils.base64_image import ImageProcessingConfig, get_default_image_processing_config, is_remote_url
+from .caption_markdown_images import acaption_markdown_images, caption_markdown_images
+from .convert_to_text import HtmlToMarkdownOptions, get_default_html_to_markdown_options, html_to_markdown
+if TYPE_CHECKING:
+    import playwright.async_api
+    import playwright.sync_api
+WaitUntil: TypeAlias = Literal["commit", "domcontentloaded", "load", "networkidle"]
+DEFAULT_UA: str = (
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36"
 )
+class SelectedLineRanges(BaseModel):
+    line_ranges: list[str] = Field(description="List of inclusive line ranges, e.g., ['1-3', '5-5', '7-10']")
+class PlaywrightLaunchOptions(TypedDict):
+    executable_path: NotRequired[str | Path]
+    channel: NotRequired[str]
+    args: NotRequired[Sequence[str]]
+    ignore_default_args: NotRequired[bool | Sequence[str]]
+    handle_sigint: NotRequired[bool]
+    handle_sigterm: NotRequired[bool]
+    handle_sighup: NotRequired[bool]
+    timeout: NotRequired[float]
+    env: NotRequired[dict[str, str | float | bool]]
+    headless: NotRequired[bool]
+    devtools: NotRequired[bool]
+    proxy: NotRequired[playwright.sync_api.ProxySettings]
+    downloads_path: NotRequired[str | Path]
+    slow_mo: NotRequired[float]
+    traces_dir: NotRequired[str | Path]
+    chromium_sandbox: NotRequired[bool]
+    firefox_user_prefs: NotRequired[dict[str, str | float | bool]]
+class PlaywrightPersistencyOptions(TypedDict):
+    user_data_dir: NotRequired[str | Path]
+    storage_state: NotRequired[playwright.sync_api.StorageState]
+class PlaywrightOptions(PlaywrightLaunchOptions, PlaywrightPersistencyOptions): ...
+def get_default_playwright_launch_options() -> PlaywrightLaunchOptions:
+    return {"headless": True}
 @dataclass
 class PlayWrightBot:
     """
@@ -73,7 +109,8 @@ class PlayWrightBot:
         chatterer (Chatterer): An instance of the language model interface for processing text.
     """
-    chatterer: Chatterer = field(default_factory=Chatterer.openai)
+    engine: Literal["firefox", "chromium", "webkit"] = "firefox"
+    chatterer: Optional[Chatterer] = field(default_factory=Chatterer.openai)
     playwright_launch_options: PlaywrightLaunchOptions = field(default_factory=get_default_playwright_launch_options)
     playwright_persistency_options: PlaywrightPersistencyOptions = field(default_factory=PlaywrightPersistencyOptions)
     html_to_markdown_options: HtmlToMarkdownOptions = field(default_factory=get_default_html_to_markdown_options)
@@ -109,28 +146,43 @@ Markdown-formatted webpage content is provided below for your reference:
     def get_sync_playwright(self) -> playwright.sync_api.Playwright:
         if self.sync_playwright is None:
-            self.sync_playwright = playwright.sync_api.sync_playwright().start()
+            from playwright.sync_api import sync_playwright
+            self.sync_playwright = sync_playwright().start()
         return self.sync_playwright
     async def get_async_playwright(self) -> playwright.async_api.Playwright:
         if self.async_playwright is None:
-            self.async_playwright = await playwright.async_api.async_playwright().start()
+            from playwright.async_api import async_playwright
+            self.async_playwright = await async_playwright().start()
         return self.async_playwright
     def get_sync_browser(self) -> playwright.sync_api.BrowserContext:
         if self.sync_browser_context is not None:
             return self.sync_browser_context
+        def get_browser() -> playwright.sync_api.BrowserType:
+            playwright = self.get_sync_playwright()
+            if self.engine == "firefox":
+                return playwright.firefox
+            elif self.engine == "chromium":
+                return playwright.chromium
+            elif self.engine == "webkit":
+                return playwright.webkit
+            else:
+                raise ValueError(f"Unsupported engine: {self.engine}")
         user_data_dir = self.playwright_persistency_options.get("user_data_dir")
         if user_data_dir:
             # Use persistent context if user_data_dir is provided
-            self.sync_browser_context = self.get_sync_playwright().chromium.launch_persistent_context(
+            self.sync_browser_context = get_browser().launch_persistent_context(
                 user_data_dir=user_data_dir, **self.playwright_launch_options
             )
             return self.sync_browser_context
         # Otherwise, launch a new context
-        browser = self.get_sync_playwright().chromium.launch(**self.playwright_launch_options)
+        browser = get_browser().launch(**self.playwright_launch_options)
         storage_state = self.playwright_persistency_options.get("storage_state")
         if storage_state:
             self.sync_browser_context = browser.new_context(storage_state=storage_state)
@@ -142,16 +194,27 @@ Markdown-formatted webpage content is provided below for your reference:
         if self.async_browser_context is not None:
             return self.async_browser_context
+        async def get_browser() -> playwright.async_api.BrowserType:
+            playwright = await self.get_async_playwright()
+            if self.engine == "firefox":
+                return playwright.firefox
+            elif self.engine == "chromium":
+                return playwright.chromium
+            elif self.engine == "webkit":
+                return playwright.webkit
+            else:
+                raise ValueError(f"Unsupported engine: {self.engine}")
         user_data_dir = self.playwright_persistency_options.get("user_data_dir")
         if user_data_dir:
             # Use persistent context if user_data_dir is provided
-            self.async_browser_context = await (await self.get_async_playwright()).chromium.launch_persistent_context(
+            self.async_browser_context = await (await get_browser()).launch_persistent_context(
                 user_data_dir=user_data_dir, **self.playwright_launch_options
             )
             return self.async_browser_context
         # Otherwise, launch a new context
-        browser = await (await self.get_async_playwright()).chromium.launch(**self.playwright_launch_options)
+        browser = await (await get_browser()).launch(**self.playwright_launch_options)
         storage_state = self.playwright_persistency_options.get("storage_state")
         if storage_state:
             self.async_browser_context = await browser.new_context(storage_state=storage_state)
@@ -232,18 +295,24 @@ Markdown-formatted webpage content is provided below for your reference:
         Returns:
             str: The page content converted to Markdown.
         """
-        page = self.get_page(url, timeout=timeout, referer=referer)
-        if wait:
-            page.wait_for_timeout(wait * 1000)
-        if scrolldown:
-            page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
-            if sleep:
-                page.wait_for_timeout(sleep * 1000)
-        if reload:
-            page.reload(timeout=int(timeout * 1000))
-        html = page.content()
+        page: Optional[playwright.sync_api.Page] = None
+        if not is_remote_url(url) and Path(url).is_file() and Path(url).suffix.lower() == ".html":
+            with open(url, "r", encoding="utf-8") as f:
+                html = f.read()
+        else:
+            page = self.get_page(url, timeout=timeout, referer=referer)
+            if wait:
+                page.wait_for_timeout(wait * 1000)
+            if scrolldown:
+                page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
+                if sleep:
+                    page.wait_for_timeout(sleep * 1000)
+            if reload:
+                page.reload(timeout=int(timeout * 1000))
+            html = page.content()
         md = html_to_markdown(html=html, options=self.html_to_markdown_options)
-        if not keep_page:
+        if not keep_page and page is not None:
             page.close()
         return md
@@ -275,18 +344,23 @@ Markdown-formatted webpage content is provided below for your reference:
         Returns:
             str: The page content converted to Markdown.
         """
-        page = await self.aget_page(url, timeout=timeout, referer=referer)
-        if wait:
-            await page.wait_for_timeout(wait * 1000)
-        if scrolldown:
-            await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
-            if sleep:
-                await page.wait_for_timeout(sleep * 1000)
-        if reload:
-            await page.reload(timeout=int(timeout * 1000))
-        html = await page.content()
+        page: Optional[playwright.async_api.Page] = None
+        if not is_remote_url(url) and Path(url).is_file() and Path(url).suffix.lower() == ".html":
+            with open(url, "r", encoding="utf-8") as f:
+                html = f.read()
+        else:
+            page = await self.aget_page(url, timeout=timeout, referer=referer)
+            if wait:
+                await page.wait_for_timeout(wait * 1000)
+            if scrolldown:
+                await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
+                if sleep:
+                    await page.wait_for_timeout(sleep * 1000)
+            if reload:
+                await page.reload(timeout=int(timeout * 1000))
+            html = await page.content()
         md = html_to_markdown(html=html, options=self.html_to_markdown_options)
-        if not keep_page:
+        if not keep_page and page is not None:
             await page.close()
         return md
@@ -417,6 +491,8 @@ Markdown-formatted webpage content is provided below for your reference:
         Returns:
             str: Filtered Markdown containing only the important lines.
         """
+        if self.chatterer is None:
+            raise ValueError("Chatterer instance is not set. Please provide a valid Chatterer instance.")
         markdown_content = self.url_to_md(
             url,
             wait=wait,
@@ -498,6 +574,8 @@ Markdown-formatted webpage content is provided below for your reference:
         Returns:
             str: Filtered Markdown containing only the important lines.
         """
+        if self.chatterer is None:
+            raise ValueError("Chatterer instance is not set. Please provide a valid Chatterer instance.")
         markdown_content = await self.aurl_to_md(
             url,
             wait=wait,
@@ -542,75 +620,87 @@ Markdown-formatted webpage content is provided below for your reference:
     def describe_images(self, markdown_text: str, referer_url: str) -> str:
         """
         Replace image URLs in Markdown text with their alt text and generate descriptions using a language model.
+        Using Playwright for fetching images to bypass CDN protections.
         """
-        image_url_and_markdown_links: dict[Optional[Base64Image], list[MarkdownLink]] = (
-            get_image_url_and_markdown_links(
-                markdown_text=markdown_text,
-                headers=self.headers | {"Referer": referer_url},
-                config=self.image_processing_config,
-            )
-        )
-        image_description_and_references: ImageDescriptionAndReferences = ImageDescriptionAndReferences({})
-        for image_url, markdown_links in image_url_and_markdown_links.items():
-            if image_url is not None:
-                try:
-                    image_summary: str = self.chatterer.describe_image(
-                        image_url=image_url.data_uri,
-                        instruction=self.image_description_instruction,
-                    )
-                except Exception:
-                    print_exc()
-                    continue
-                image_description_and_references[image_summary] = markdown_links
-            else:
-                image_description_and_references[None] = markdown_links
-        return replace_images(
+        if self.chatterer is None:
+            raise ValueError("Chatterer instance is not set. Please provide a valid Chatterer instance.")
+        return caption_markdown_images(
             markdown_text=markdown_text,
-            image_description_and_references=image_description_and_references,
+            headers=self.headers | {"Referer": referer_url},
             description_format=self.description_format,
+            image_description_instruction=self.image_description_instruction,
+            chatterer=self.chatterer,
+            image_processing_config=self.image_processing_config,
+            img_bytes_fetcher=self._playwright_fetch_image_bytes,
         )
+    # 기존 adescribe_images 메서드를 다음과 같이 수정합니다.
     async def adescribe_images(self, markdown_text: str, referer_url: str) -> str:
         """
         Replace image URLs in Markdown text with their alt text and generate descriptions using a language model.
+        Using Playwright for fetching images to bypass CDN protections.
         """
-        image_url_and_markdown_links: dict[
-            Optional[Base64Image], list[MarkdownLink]
-        ] = await aget_image_url_and_markdown_links(
+        if self.chatterer is None:
+            raise ValueError("Chatterer instance is not set. Please provide a valid Chatterer instance.")
+        return await acaption_markdown_images(
             markdown_text=markdown_text,
             headers=self.headers | {"Referer": referer_url},
-            config=self.image_processing_config,
+            description_format=self.description_format,
+            image_description_instruction=self.image_description_instruction,
+            chatterer=self.chatterer,
+            image_processing_config=self.image_processing_config,
+            img_bytes_fetcher=self._aplaywright_fetch_image_bytes,
         )
-        async def dummy() -> None:
-            pass
-        def _handle_exception(e: Optional[str | BaseException]) -> TypeGuard[Optional[str]]:
-            if isinstance(e, BaseException):
-                print(format_exception_only(type(e), e))
-                return False
-            return True
-        coros: list[Awaitable[Optional[str]]] = [
-            self.chatterer.adescribe_image(image_url=image_url.data_uri, instruction=self.image_description_instruction)
-            if image_url is not None
-            else dummy()
-            for image_url in image_url_and_markdown_links.keys()
-        ]
-        return replace_images(
-            markdown_text=markdown_text,
-            image_description_and_references=ImageDescriptionAndReferences({
-                image_summary: markdown_links
-                for markdown_links, image_summary in zip(
-                    image_url_and_markdown_links.values(), await asyncio.gather(*coros, return_exceptions=True)
+    def _playwright_fetch_image_bytes(self, image_url: str, headers: dict[str, str]) -> bytes:
+        """Playwright를 사용하여 동기적으로 이미지 바이트를 가져옵니다."""
+        page: Optional[playwright.sync_api.Page] = None
+        try:
+            # Get the existing synchronous browser context.
+            page = self.get_sync_browser().new_page()
+            # Set the provided headers as extra HTTP headers for the page.
+            # This will apply to all subsequent requests made by the page.
+            page.set_extra_http_headers(headers)
+            response = page.goto(image_url, wait_until="load", timeout=15000)
+            if response and response.ok:
+                return response.body()
+            else:
+                return b""
+        except Exception as e:
+            print(f"Playwright exception fetching image: {image_url}, Error: {e}")
+            return b""
+        finally:
+            if page:
+                page.close()
+    async def _aplaywright_fetch_image_bytes(self, image_url: str, headers: dict[str, str]) -> bytes:
+        """Playwright를 사용하여 비동기적으로 이미지 바이트를 가져옵니다."""
+        page: Optional[playwright.async_api.Page] = None
+        try:
+            # Get the existing asynchronous browser context.
+            page = await (await self.get_async_browser()).new_page()
+            # Set the provided headers as extra HTTP headers for the page.
+            # This will apply to all subsequent requests made by the page.
+            await page.set_extra_http_headers(headers)
+            response = await page.goto(image_url, wait_until="load", timeout=15000)
+            if response and response.ok:
+                return await response.body()
+            else:
+                # 실패 시 로그를 남기거나 None을 반환할 수 있습니다.
+                print(
+                    f"Playwright failed to fetch image: {image_url}, Status: {response.status if response else 'No Response'}"
                 )
-                if _handle_exception(image_summary)
-            }),
-            description_format=self.description_format,
-        )
+                return b""
+        except Exception as e:
+            # 예외 발생 시 로그를 남깁니다.
+            print(f"Playwright exception fetching image: {image_url}, Error: {e}")
+            return b""
+        finally:
+            # 페이지를 항상 닫아 리소스를 정리합니다.
+            if page:
+                await page.close()
     def __enter__(self) -> Self:
         return self

chatterer/tools/youtube.py CHANGED Viewed

@@ -29,7 +29,7 @@ def get_youtube_video_details(
 def get_youtube_video_subtitle(video_id: str) -> str:
     """Get the transcript of a YouTube video using the given video ID."""
-    from youtube_transcript_api._api import YouTubeTranscriptApi
+    from youtube_transcript_api import YouTubeTranscriptApi  # pyright: ignore[reportPrivateImportUsage]
     get_transcript = YouTubeTranscriptApi.get_transcript  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
     list_transcripts = YouTubeTranscriptApi.list_transcripts  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
@@ -144,3 +144,4 @@ class YoutubeSearchResult:
 if __name__ == "__main__":
     print(get_youtube_video_details("BTS"))
     # print(get_youtube_transcript("y7jrpS8GHxs"))

chatterer/utils/__init__.py CHANGED Viewed

@@ -1,10 +1,11 @@
+from .base64_image import Base64Image
+from .cli import ArgumentSpec, BaseArguments
 from .code_agent import (
     CodeExecutionResult,
     FunctionSignature,
     get_default_repl_tool,
     insert_callables_into_global,
 )
-from .image import Base64Image
 __all__ = [
     "Base64Image",
@@ -12,4 +13,6 @@ __all__ = [
     "CodeExecutionResult",
     "get_default_repl_tool",
     "insert_callables_into_global",
+    "BaseArguments",
+    "ArgumentSpec",
 ]

chatterer/utils/{image.py → base64_image.py} RENAMED Viewed

@@ -7,6 +7,7 @@ from logging import getLogger
 from pathlib import Path
 from typing import (
     Awaitable,
+    Callable,
     ClassVar,
     Literal,
     NotRequired,
@@ -18,7 +19,6 @@ from typing import (
     TypeGuard,
     cast,
     get_args,
-    overload,
 )
 from urllib.parse import urlparse
@@ -55,10 +55,11 @@ def get_default_image_processing_config() -> ImageProcessingConfig:
         "min_largest_side": 200,
         "resize_if_min_side_exceeds": 2000,
         "resize_target_for_min_side": 1000,
-        "formats": ["png", "jpeg", "gif", "bmp", "webp"],
+        "formats": ["png", "jpeg", "jpg", "gif", "bmp", "webp"],
     }
+# image_url: str, headers: dict[str, str]) -> Optional[bytes]:
 class Base64Image(BaseModel):
     ext: ImageType
     data: str
@@ -86,44 +87,51 @@ class Base64Image(BaseModel):
     def from_bytes(cls, data: bytes, ext: ImageType) -> Self:
         return cls(ext=ext, data=b64encode(data).decode("utf-8"))
-    @overload
     @classmethod
     def from_url_or_path(
         cls,
         url_or_path: str,
         *,
-        headers: dict[str, str] = ...,
-        config: ImageProcessingConfig = ...,
-        return_coro: Literal[True],
-    ) -> Awaitable[Optional[Self]]: ...
-    @overload
-    @classmethod
-    def from_url_or_path(
-        cls,
-        url_or_path: str,
-        *,
-        headers: dict[str, str] = ...,
-        config: ImageProcessingConfig = ...,
-        return_coro: Literal[False] = False,
-    ) -> Optional[Self]: ...
+        headers: dict[str, str] = {},
+        config: ImageProcessingConfig = get_default_image_processing_config(),
+        img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], bytes]] = None,
+    ) -> Optional[Self]:
+        """Return a Base64Image instance from a URL or local file path."""
+        if maybe_base64 := cls.from_string(url_or_path):
+            return maybe_base64
+        elif is_remote_url(url_or_path):
+            if img_bytes_fetcher:
+                img_bytes = img_bytes_fetcher(url_or_path, headers)
+            else:
+                img_bytes = cls._fetch_remote_image(url_or_path, headers)
+            if not img_bytes:
+                return None
+            return cls._convert_image_into_base64(img_bytes, config)
+        try:
+            return cls._process_local_image(Path(url_or_path), config)
+        except Exception:
+            return None
     @classmethod
-    def from_url_or_path(
+    async def afrom_url_or_path(
         cls,
         url_or_path: str,
         *,
         headers: dict[str, str] = {},
         config: ImageProcessingConfig = get_default_image_processing_config(),
-        return_coro: bool = False,
-    ) -> Optional[Self] | Awaitable[Optional[Self]]:
+        img_bytes_fetcher: Optional[Callable[[str, dict[str, str]], Awaitable[bytes]]] = None,
+    ) -> Optional[Self]:
         """Return a Base64Image instance from a URL or local file path."""
         if maybe_base64 := cls.from_string(url_or_path):
             return maybe_base64
-        elif _is_remote_url(url_or_path):
-            if return_coro:
-                return cls._afetch_remote_image(url_or_path, headers, config)
-            return cls._fetch_remote_image(url_or_path, headers, config)
+        elif is_remote_url(url_or_path):
+            if img_bytes_fetcher:
+                img_bytes = await img_bytes_fetcher(url_or_path, headers)
+            else:
+                img_bytes = await cls._afetch_remote_image(url_or_path, headers)
+            if not img_bytes:
+                return None
+            return cls._convert_image_into_base64(img_bytes, config)
         try:
             return cls._process_local_image(Path(url_or_path), config)
         except Exception:
@@ -142,20 +150,27 @@ class Base64Image(BaseModel):
         return ext in allowed_types
     @classmethod
-    def _fetch_remote_image(cls, url: str, headers: dict[str, str], config: ImageProcessingConfig) -> Optional[Self]:
-        image_bytes = _get_image_bytes(image_url=url.strip(), headers=headers)
-        if not image_bytes:
-            return None
-        return cls._convert_image_into_base64(image_bytes, config)
+    def _fetch_remote_image(cls, url: str, headers: dict[str, str]) -> bytes:
+        try:
+            with requests.Session() as session:
+                response = session.get(url.strip(), headers={k: str(v) for k, v in headers.items()})
+                response.raise_for_status()
+                image_bytes = bytes(response.content or b"")
+                if not image_bytes:
+                    return b""
+                return image_bytes
+        except Exception:
+            return b""
     @classmethod
-    async def _afetch_remote_image(
-        cls, url: str, headers: dict[str, str], config: ImageProcessingConfig
-    ) -> Optional[Self]:
-        image_bytes = await _aget_image_bytes(image_url=url.strip(), headers=headers)
-        if not image_bytes:
-            return None
-        return cls._convert_image_into_base64(image_bytes, config)
+    async def _afetch_remote_image(cls, url: str, headers: dict[str, str]) -> bytes:
+        try:
+            async with ClientSession() as session:
+                async with session.get(url.strip(), headers={k: str(v) for k, v in headers.items()}) as response:
+                    response.raise_for_status()
+                    return await response.read()
+        except Exception:
+            return b""
     @classmethod
     def _convert_image_into_base64(cls, image_data: bytes, config: Optional[ImageProcessingConfig]) -> Optional[Self]:
@@ -163,6 +178,7 @@ class Base64Image(BaseModel):
         Retrieve an image in bytes and return a base64-encoded data URL,
         applying dynamic rules from 'config'.
         """
         if not config:
             # config 없으면 그냥 기존 헤더만 보고 돌려주는 간단 로직
             return cls._simple_base64_encode(image_data)
@@ -225,7 +241,7 @@ class Base64Image(BaseModel):
         """
         Retrieve an image URL and return a base64-encoded data URL.
         """
-        ext = _detect_image_type(image_data)
+        ext = detect_image_type(image_data)
         if not ext:
             return
         return cls(ext=ext, data=b64encode(image_data).decode("utf-8"))
@@ -241,12 +257,12 @@ class Base64Image(BaseModel):
         return cls(ext=ext, data=b64encode(path.read_bytes()).decode("ascii"))
-def _is_remote_url(path: str) -> bool:
+def is_remote_url(path: str) -> bool:
     parsed = urlparse(path)
     return bool(parsed.scheme and parsed.netloc)
-def _detect_image_type(image_data: bytes) -> Optional[ImageType]:
+def detect_image_type(image_data: bytes) -> Optional[ImageType]:
     """
     Detect the image format based on the image binary signature (header).
     Only JPEG, PNG, GIF, WEBP, and BMP are handled as examples.
@@ -267,25 +283,3 @@ def _detect_image_type(image_data: bytes) -> Optional[ImageType]:
     # BMP: 시작 바이트가 BM
     elif image_data.startswith(b"BM"):
         return "bmp"
-def _get_image_bytes(image_url: str, headers: dict[str, str]) -> Optional[bytes]:
-    try:
-        with requests.Session() as session:
-            response = session.get(image_url, headers={k: str(v) for k, v in headers.items()})
-            if not response.ok:
-                return
-            return bytes(response.content or b"")
-    except Exception:
-        return
-async def _aget_image_bytes(image_url: str, headers: dict[str, str]) -> Optional[bytes]:
-    try:
-        async with ClientSession() as session:
-            async with session.get(image_url, headers={k: str(v) for k, v in headers.items()}) as response:
-                if not response.ok:
-                    return
-                return await response.read()
-    except Exception:
-        return

chatterer 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl

chatterer 0.1.12py3-none-any.whl → 0.1.14py3-none-any.whl