PyPI - chatterer - Versions diffs - 0.1.19__py3-none-any.whl → 0.1.21__py3-none-any.whl - Mend

chatterer 0.1.19py3-none-any.whl → 0.1.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

chatterer/examples/anything_to_markdown.py +21 -31
chatterer/examples/get_code_snippets.py +18 -27
chatterer/examples/login_with_playwright.py +68 -83
chatterer/examples/make_ppt.py +3 -14
chatterer/examples/pdf_to_markdown.py +29 -59
chatterer/examples/pdf_to_text.py +14 -20
chatterer/examples/transcription_api.py +21 -36
chatterer/examples/upstage_parser.py +41 -47
chatterer/examples/webpage_to_markdown.py +19 -28
chatterer/strategies/atom_of_thoughts.py +1 -1
chatterer/tools/convert_pdf_to_markdown.py +105 -14
chatterer/tools/convert_to_text.py +3 -4
chatterer/tools/upstage_document_parser.py +2 -2
chatterer/utils/code_agent.py +1 -1
{chatterer-0.1.19.dist-info → chatterer-0.1.21.dist-info}/METADATA +2 -2
{chatterer-0.1.19.dist-info → chatterer-0.1.21.dist-info}/RECORD +19 -19
{chatterer-0.1.19.dist-info → chatterer-0.1.21.dist-info}/WHEEL +1 -1
{chatterer-0.1.19.dist-info → chatterer-0.1.21.dist-info}/entry_points.txt +0 -0
{chatterer-0.1.19.dist-info → chatterer-0.1.21.dist-info}/top_level.txt +0 -0

chatterer/examples/anything_to_markdown.py CHANGED Viewed

@@ -1,36 +1,27 @@
-def resolve_import_path_and_get_logger():
-    # ruff: noqa: E402
-    import logging
-    import sys
-    if __name__ == "__main__" and "." not in sys.path:
-        sys.path.append(".")
-    logger = logging.getLogger(__name__)
-    return logger
-logger = resolve_import_path_and_get_logger()
+import logging
 from pathlib import Path
 from typing import Optional, TypedDict
 import openai
-from spargear import ArgumentSpec, BaseArguments
+from spargear import BaseArguments
 from chatterer import anything_to_markdown
+logger = logging.getLogger(__name__)
 class AnythingToMarkdownReturns(TypedDict):
-    in_path: str
-    out_path: Optional[str]
+    input: str
+    output: Optional[str]
     out_text: str
 class AnythingToMarkdownArguments(BaseArguments):
     """Command line arguments for converting various file types to markdown."""
-    in_path: ArgumentSpec[str] = ArgumentSpec(["in-path"], help="Input file to convert to markdown")
-    out_path: Optional[str] = None
+    input: str
+    """Input file to convert to markdown. Can be a file path or a URL."""
+    output: Optional[str] = None
     """Output path for the converted markdown file. If not provided, the input file's suffix is replaced with .md"""
     model: Optional[str] = None
     """OpenAI Model to use for conversion"""
@@ -50,14 +41,13 @@ class AnythingToMarkdownArguments(BaseArguments):
     """Encoding for the output file."""
     def run(self) -> AnythingToMarkdownReturns:
-        in_path = self.in_path.unwrap()
         if not self.prevent_save_file:
-            if not self.out_path:
-                out_path = Path(in_path).with_suffix(".md")
+            if not self.output:
+                output = Path(self.input).with_suffix(".md")
             else:
-                out_path = Path(self.out_path)
+                output = Path(self.output)
         else:
-            out_path = None
+            output = None
         if self.model:
             llm_client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url)
@@ -67,22 +57,22 @@ class AnythingToMarkdownArguments(BaseArguments):
             llm_model = None
         text: str = anything_to_markdown(
-            in_path,
+            self.input,
             llm_client=llm_client,
             llm_model=llm_model,
             style_map=self.style_map,
             exiftool_path=self.exiftool_path,
             docintel_endpoint=self.docintel_endpoint,
         )
-        if out_path:
-            out_path.parent.mkdir(parents=True, exist_ok=True)
-            out_path.write_text(text, encoding=self.encoding)
-            logger.info(f"Converted `{in_path}` to markdown and saved to `{out_path}`.")
+        if output:
+            output.parent.mkdir(parents=True, exist_ok=True)
+            output.write_text(text, encoding=self.encoding)
+            logger.info(f"Converted `{self.input}` to markdown and saved to `{output}`.")
         else:
-            logger.info(f"Converted `{in_path}` to markdown.")
+            logger.info(f"Converted `{self.input}` to markdown.")
         return {
-            "in_path": in_path,
-            "out_path": str(out_path) if out_path is not None else None,
+            "input": self.input,
+            "output": str(output) if output is not None else None,
             "out_text": text,
         }

chatterer/examples/get_code_snippets.py CHANGED Viewed

@@ -1,27 +1,19 @@
-def resolve_import_path_and_get_logger():
-    # ruff: noqa: E402
-    import logging
-    import sys
-    if __name__ == "__main__" and "." not in sys.path:
-        sys.path.append(".")
-    logger = logging.getLogger(__name__)
-    return logger
-logger = resolve_import_path_and_get_logger()
+import logging
 from pathlib import Path
 from typing import Optional
-from spargear import ArgumentSpec, BaseArguments
+from spargear import BaseArguments
 from chatterer import CodeSnippets
+logger = logging.getLogger(__name__)
 class GetCodeSnippetsArgs(BaseArguments):
-    path_or_pkgname: ArgumentSpec[str] = ArgumentSpec(["path_or_pkgname"], help="Path to the package or file from which to extract code snippets.")
-    out_path: Optional[str] = None
+    input: str
+    """Path to the package or file from which to extract code snippets."""
+    output: Optional[str] = None
+    """Output path for the extracted code snippets. If not provided, defaults to a file with the same name as the input."""
     ban_file_patterns: list[str] = [".venv/*", Path(__file__).relative_to(Path.cwd()).as_posix()]
     """List of file patterns to ignore."""
     glob_patterns: list[str] = ["*.py"]
@@ -32,27 +24,26 @@ class GetCodeSnippetsArgs(BaseArguments):
     """Prevent saving the extracted code snippets to a file."""
     def run(self) -> CodeSnippets:
-        path_or_pkgname = self.path_or_pkgname.unwrap()
         if not self.prevent_save_file:
-            if not self.out_path:
-                out_path = Path(__file__).with_suffix(".txt")
+            if not self.output:
+                output = Path(__file__).with_suffix(".txt")
             else:
-                out_path = Path(self.out_path)
+                output = Path(self.output)
         else:
-            out_path = None
+            output = None
         cs = CodeSnippets.from_path_or_pkgname(
-            path_or_pkgname=path_or_pkgname,
+            path_or_pkgname=self.input,
             ban_file_patterns=self.ban_file_patterns,
             glob_patterns=self.glob_patterns,
             case_sensitive=self.case_sensitive,
         )
-        if out_path is not None:
-            out_path.parent.mkdir(parents=True, exist_ok=True)
-            out_path.write_text(cs.snippets_text, encoding="utf-8")
-            logger.info(f"Extracted code snippets from `{path_or_pkgname}` and saved to `{out_path}`.")
+        if output is not None:
+            output.parent.mkdir(parents=True, exist_ok=True)
+            output.write_text(cs.snippets_text, encoding="utf-8")
+            logger.info(f"Extracted code snippets from `{self.input}` and saved to `{output}`.")
         else:
-            logger.info(f"Extracted code snippets from `{path_or_pkgname}`.")
+            logger.info(f"Extracted code snippets from `{self.input}`.")
         return cs

chatterer/examples/login_with_playwright.py CHANGED Viewed

@@ -1,17 +1,5 @@
-def resolve_import_path_and_get_logger():
-    # ruff: noqa: E402
-    import logging
-    import sys
-    if __name__ == "__main__" and "." not in sys.path:
-        sys.path.append(".")
-    logger = logging.getLogger(__name__)
-    return logger
-logger = resolve_import_path_and_get_logger()
 import json
+import logging
 import sys
 from pathlib import Path
@@ -19,76 +7,8 @@ from spargear import BaseArguments, SubcommandSpec
 from chatterer import PlayWrightBot
+logger = logging.getLogger(__name__)
-def read_session(url: str, jsonpath: Path) -> None:
-    """
-    Loads the session state from the specified JSON file, then navigates
-    to a protected_url that normally requires login. If the stored session
-    is valid, it should open without re-entering credentials.
-    Correction: Loads the JSON content into a dict first to satisfy type hints.
-    """
-    logger.info(f"Loading session from {jsonpath} and navigating to {url} ...")
-    if not jsonpath.exists():
-        logger.error(f"Session file not found at {jsonpath}")
-        sys.exit(1)
-    # Load the storage state from the JSON file into a dictionary
-    logger.info(f"Reading storage state content from {jsonpath} ...")
-    try:
-        with open(jsonpath, "r", encoding="utf-8") as f:
-            # This dictionary should match the 'StorageState' type expected by Playwright/chatterer
-            storage_state_dict = json.load(f)
-    except json.JSONDecodeError:
-        logger.error(f"Failed to decode JSON from {jsonpath}")
-        sys.exit(1)
-    except Exception as e:
-        logger.error(f"Error reading file {jsonpath}: {e}")
-        sys.exit(1)
-    logger.info("Launching browser with loaded session state...")
-    with PlayWrightBot(
-        playwright_launch_options={"headless": False},
-        # Pass the loaded dictionary, which should match the expected 'StorageState' type
-        playwright_persistency_options={"storage_state": storage_state_dict},
-    ) as bot:
-        bot.get_page(url)
-        logger.info("Press Enter in the console when you're done checking the protected page.")
-        input("    >> Press Enter to exit: ")
-    logger.info("Done! Browser is now closed.")
-def write_session(url: str, jsonpath: Path) -> None:
-    """
-    Launches a non-headless browser and navigates to the login_url.
-    The user can manually log in, then press Enter in the console
-    to store the current session state into a JSON file.
-    """
-    logger.info(f"Launching browser and navigating to {url} ... Please log in manually.")
-    # Ensure jsonpath directory exists
-    jsonpath.parent.mkdir(parents=True, exist_ok=True)
-    with PlayWrightBot(playwright_launch_options={"headless": False}) as bot:
-        bot.get_page(url)
-        logger.info("After completing the login in the browser, press Enter here to save the session.")
-        input("    >> Press Enter when ready: ")
-        # get_sync_browser() returns the BrowserContext internally
-        context = bot.get_sync_browser()
-        # Save the current session (cookies, localStorage) to a JSON file
-        logger.info(f"Saving storage state to {jsonpath} ...")
-        context.storage_state(path=jsonpath)  # Pass Path object directly
-    logger.info("Done! Browser is now closed.")
-# --- Spargear Declarative CLI Definition ---
 # Define the default path location relative to this script file
 DEFAULT_JSON_PATH = Path(__file__).resolve().parent / "session_state.json"
@@ -160,7 +80,72 @@ class LoginWithPlaywrightArgs(BaseArguments):
             sys.exit(1)
-# --- Main Execution Logic ---
+def read_session(url: str, jsonpath: Path) -> None:
+    """
+    Loads the session state from the specified JSON file, then navigates
+    to a protected_url that normally requires login. If the stored session
+    is valid, it should open without re-entering credentials.
+    Correction: Loads the JSON content into a dict first to satisfy type hints.
+    """
+    logger.info(f"Loading session from {jsonpath} and navigating to {url} ...")
+    if not jsonpath.exists():
+        logger.error(f"Session file not found at {jsonpath}")
+        sys.exit(1)
+    # Load the storage state from the JSON file into a dictionary
+    logger.info(f"Reading storage state content from {jsonpath} ...")
+    try:
+        with open(jsonpath, "r", encoding="utf-8") as f:
+            # This dictionary should match the 'StorageState' type expected by Playwright/chatterer
+            storage_state_dict = json.load(f)
+    except json.JSONDecodeError:
+        logger.error(f"Failed to decode JSON from {jsonpath}")
+        sys.exit(1)
+    except Exception as e:
+        logger.error(f"Error reading file {jsonpath}: {e}")
+        sys.exit(1)
+    logger.info("Launching browser with loaded session state...")
+    with PlayWrightBot(
+        playwright_launch_options={"headless": False},
+        # Pass the loaded dictionary, which should match the expected 'StorageState' type
+        playwright_persistency_options={"storage_state": storage_state_dict},
+    ) as bot:
+        bot.get_page(url)
+        logger.info("Press Enter in the console when you're done checking the protected page.")
+        input("    >> Press Enter to exit: ")
+    logger.info("Done! Browser is now closed.")
+def write_session(url: str, jsonpath: Path) -> None:
+    """
+    Launches a non-headless browser and navigates to the login_url.
+    The user can manually log in, then press Enter in the console
+    to store the current session state into a JSON file.
+    """
+    logger.info(f"Launching browser and navigating to {url} ... Please log in manually.")
+    # Ensure jsonpath directory exists
+    jsonpath.parent.mkdir(parents=True, exist_ok=True)
+    with PlayWrightBot(playwright_launch_options={"headless": False}) as bot:
+        bot.get_page(url)
+        logger.info("After completing the login in the browser, press Enter here to save the session.")
+        input("    >> Press Enter when ready: ")
+        # get_sync_browser() returns the BrowserContext internally
+        context = bot.get_sync_browser()
+        # Save the current session (cookies, localStorage) to a JSON file
+        logger.info(f"Saving storage state to {jsonpath} ...")
+        context.storage_state(path=jsonpath)  # Pass Path object directly
+    logger.info("Done! Browser is now closed.")
 def main() -> None:

chatterer/examples/make_ppt.py CHANGED Viewed

@@ -1,16 +1,3 @@
-def resolve_import_path_and_get_logger():
-    # ruff: noqa: E402
-    import logging
-    import sys
-    if __name__ == "__main__" and "." not in sys.path:
-        sys.path.append(".")
-    logger = logging.getLogger(__name__)
-    return logger
-logger = resolve_import_path_and_get_logger()
 import re
 import sys
 from pathlib import Path
@@ -192,7 +179,9 @@ class MakePptArguments(BaseArguments):
     """Prompt for organizing slides into a presentation script"""
     # LLM Settings
-    provider: str = "openai:gpt-4.1"  # Example: "openai:gpt-4o", "anthropic:claude-3-haiku-20240307", "google:gemini-1.5-flash"
+    provider: str = (
+        "openai:gpt-4.1"  # Example: "openai:gpt-4o", "anthropic:claude-3-haiku-20240307", "google:gemini-1.5-flash"
+    )
     """Name of the language model to use (provider:model_name)"""
     # Other settings

chatterer/examples/pdf_to_markdown.py CHANGED Viewed

@@ -1,16 +1,4 @@
-def resolve_import_path_and_get_logger():
-    # ruff: noqa: E402
-    import logging
-    import sys
-    if __name__ == "__main__" and "." not in sys.path:
-        sys.path.append(".")
-    logger = logging.getLogger(__name__)
-    return logger
-logger = resolve_import_path_and_get_logger()
+import logging
 import sys
 from pathlib import Path
 from typing import Optional
@@ -19,46 +7,50 @@ from spargear import ArgumentSpec, BaseArguments
 from chatterer import Chatterer, PdfToMarkdown
+logger = logging.getLogger(__name__)
 class PdfToMarkdownArgs(BaseArguments):
-    in_path: ArgumentSpec[str] = ArgumentSpec(["in-path"], help="Path to the input PDF file or a directory containing PDF files.")
-    out_path: Optional[str] = None
+    input: str
+    """Input PDF file or directory containing PDF files to convert to markdown."""
+    output: Optional[str] = None
     """Output path. For a file, path to the output markdown file. For a directory, output directory for .md files."""
+    """Chatterer instance for communication."""
+    page: Optional[str] = None
+    """Zero-based page indices to convert (e.g., '0,2,4-8')."""
+    recursive: bool = False
+    """If input is a directory, search for PDFs recursively."""
     chatterer: ArgumentSpec[Chatterer] = ArgumentSpec(
         ["--chatterer"],
-        default=None,
+        default_factory=lambda: Chatterer.from_provider("google:gemini-2.5-flash-preview-05-20"),
         help="Chatterer instance for communication.",
         type=Chatterer.from_provider,
-        required=True,
     )
-    pages: Optional[str] = None
-    """Page indices to convert (e.g., '1,3,5-9')."""
-    recursive: bool = False
-    """If input is a directory, search for PDFs recursively."""
     def run(self) -> list[dict[str, str]]:
-        in_path = Path(self.in_path.unwrap()).resolve()
-        page_indices = parse_page_indices(self.pages) if self.pages else None
+        input = Path(self.input).resolve()
         pdf_files: list[Path] = []
         is_dir = False
-        if in_path.is_file():
-            if in_path.suffix.lower() != ".pdf":
+        if input.is_file():
+            if input.suffix.lower() != ".pdf":
                 sys.exit(1)
-            pdf_files.append(in_path)
-        elif in_path.is_dir():
+            pdf_files.append(input)
+        elif input.is_dir():
             is_dir = True
             pattern = "*.pdf"
-            pdf_files = sorted([f for f in (in_path.rglob(pattern) if self.recursive else in_path.glob(pattern)) if f.is_file()])
+            pdf_files = sorted([
+                f for f in (input.rglob(pattern) if self.recursive else input.glob(pattern)) if f.is_file()
+            ])
             if not pdf_files:
                 sys.exit(0)
         else:
             sys.exit(1)
-        if self.out_path:
-            out_base = Path(self.out_path).resolve()
+        if self.output:
+            out_base = Path(self.output).resolve()
         elif is_dir:
-            out_base = in_path
+            out_base = input
         else:
-            out_base = in_path.with_suffix(".md")
+            out_base = input.with_suffix(".md")
         if is_dir:
             out_base.mkdir(parents=True, exist_ok=True)
@@ -68,37 +60,15 @@ class PdfToMarkdownArgs(BaseArguments):
         converter = PdfToMarkdown(chatterer=self.chatterer.unwrap())
         results: list[dict[str, str]] = []
         for pdf in pdf_files:
-            out_path = (out_base / (pdf.stem + ".md")) if is_dir else out_base
-            md = converter.convert(str(pdf), page_indices)
-            out_path.parent.mkdir(parents=True, exist_ok=True)
-            out_path.write_text(md, encoding="utf-8")
-            results.append({"input": pdf.as_posix(), "output": out_path.as_posix(), "result": md})
+            output: Path = (out_base / (pdf.stem + ".md")) if is_dir else out_base
+            md: str = converter.convert(pdf_input=str(pdf), page_indices=self.page)
+            output.parent.mkdir(parents=True, exist_ok=True)
+            output.write_text(md, encoding="utf-8")
+            results.append({"input": pdf.as_posix(), "output": output.as_posix(), "result": md})
         logger.info(f"Converted {len(pdf_files)} PDF(s) to markdown and saved to `{out_base}`.")
         return results
-def parse_page_indices(pages_str: str) -> list[int] | None:
-    if not pages_str:
-        return None
-    indices: set[int] = set()
-    for part in pages_str.split(","):
-        part = part.strip()
-        if not part:
-            continue
-        if "-" in part:
-            start_str, end_str = part.split("-", 1)
-            start = int(start_str.strip())
-            end = int(end_str.strip())
-            if start > end:
-                raise ValueError
-            indices.update(range(start, end + 1))
-        else:
-            indices.add(int(part))
-    if not indices:
-        raise ValueError
-    return sorted(indices)
 def main() -> None:
     PdfToMarkdownArgs().run()

chatterer/examples/pdf_to_text.py CHANGED Viewed

@@ -1,36 +1,30 @@
-def resolve_import_path_and_get_logger():
-    # ruff: noqa: E402
-    import logging
-    import sys
-    if __name__ == "__main__" and "." not in sys.path:
-        sys.path.append(".")
-    logger = logging.getLogger(__name__)
-    return logger
-logger = resolve_import_path_and_get_logger()
+import logging
 import sys
 from pathlib import Path
+from typing import Optional
-from spargear import ArgumentSpec, BaseArguments
+from spargear import BaseArguments
 from chatterer.tools.convert_to_text import pdf_to_text
+logger = logging.getLogger(__name__)
 class PdfToTextArgs(BaseArguments):
-    in_path: ArgumentSpec[Path] = ArgumentSpec(["in-path"], help="Path to the PDF file.")
-    out_path: ArgumentSpec[Path] = ArgumentSpec(["--out-path"], default=None, help="Output file path.")
-    pages: ArgumentSpec[str] = ArgumentSpec(["--pages"], default=None, help="Page indices to extract, e.g. '1,3,5-9'.")
+    input: Path
+    """Path to the PDF file to convert to text."""
+    output: Optional[Path]
+    """Path to the output text file. If not provided, defaults to the input file with a .txt suffix."""
+    page: Optional[str] = None
+    """Comma-separated list of zero-based page indices to extract from the PDF. Supports ranges, e.g., '0,2,4-8'."""
     def run(self) -> None:
-        input = self.in_path.unwrap().resolve()
-        out = self.out_path.value or input.with_suffix(".txt")
+        input = self.input.resolve()
+        out = self.output or input.with_suffix(".txt")
         if not input.is_file():
             sys.exit(1)
         out.write_text(
-            pdf_to_text(input, parse_page_indices(pages_arg) if (pages_arg := self.pages.value) else None),
+            pdf_to_text(path_or_file=input, page_indices=self.page),
             encoding="utf-8",
         )
         logger.info(f"Extracted text from `{input}` to `{out}`")

chatterer/examples/transcription_api.py CHANGED Viewed

@@ -2,51 +2,36 @@
 from io import BytesIO
 from pathlib import Path
-from typing import cast
+from typing import Optional, cast
 from openai import OpenAI
 from pydub import AudioSegment
-from spargear import ArgumentSpec, BaseArguments
+from spargear import BaseArguments
 # Maximum chunk length in seconds
 MAX_CHUNK_DURATION = 600
 class TranscriptionApiArguments(BaseArguments):
-    in_path = ArgumentSpec(
-        ["in-path"],
-        type=Path,
-        help="The audio file to transcribe.",
-    )
-    out_path = ArgumentSpec(
-        ["--out-path"],
-        type=Path,
-        default=None,
-        help="Path to save the transcription output.",
-    )
-    model: ArgumentSpec[str] = ArgumentSpec(
-        ["--model"],
-        default="gpt-4o-transcribe",
-        help="The model to use for transcription.",
-    )
-    api_key: ArgumentSpec[str] = ArgumentSpec(
-        ["--api-key"],
-        default=None,
-        help="The API key for authentication.",
-    )
-    base_url: ArgumentSpec[str] = ArgumentSpec(
-        ["--base-url"],
-        default="https://api.openai.com/v1",
-        help="The base URL for the API.",
-    )
+    input: Path
+    """The audio file to transcribe."""
+    output: Optional[Path] = None
+    """Path to save the transcription output."""
+    model: str = "gpt-4o-transcribe"
+    """The model to use for transcription."""
+    api_key: Optional[str] = None
+    """The API key for authentication."""
+    base_url: str = "https://api.openai.com/v1"
+    """The base URL for the API."""
+    prompt: str = "Transcribe whole text from audio."
+    """The prompt to use for transcription."""
     def run(self) -> None:
-        audio_path = self.in_path.unwrap()
-        model = self.model.unwrap()
+        model = self.model
-        client = OpenAI(api_key=self.api_key.value, base_url=self.base_url.value)
+        client = OpenAI(api_key=self.api_key, base_url=self.base_url)
-        audio = load_audio_segment(audio_path)
+        audio = load_audio_segment(self.input)
         segments = split_audio(audio, MAX_CHUNK_DURATION)
         print(f"[i] Audio duration: {len(audio) / 1000:.1f}s; splitting into {len(segments)} segment(s)")
@@ -54,10 +39,10 @@ class TranscriptionApiArguments(BaseArguments):
         transcripts: list[str] = []
         for idx, seg in enumerate(segments, start=1):
             print(f"[i] Transcribing segment {idx}/{len(segments)}...")
-            transcripts.append(transcribe_segment(seg, client, model))
+            transcripts.append(transcribe_segment(seg, client, model, self.prompt))
         full_transcript = "\n\n".join(transcripts)
-        output_path: Path = self.out_path.value or audio_path.with_suffix(".txt")
+        output_path: Path = self.output or self.input.with_suffix(".txt")
         output_path.write_text(full_transcript, encoding="utf-8")
         print(f"[✓] Transcription saved to: {output_path}")
@@ -94,7 +79,7 @@ def split_audio(audio: AudioSegment, max_duration_s: int) -> list[AudioSegment]:
     return segments
-def transcribe_segment(segment: AudioSegment, client: OpenAI, model: str) -> str:
+def transcribe_segment(segment: AudioSegment, client: OpenAI, model: str, prompt: str) -> str:
     """
     Transcribe a single AudioSegment chunk and return its text.
     """
@@ -104,7 +89,7 @@ def transcribe_segment(segment: AudioSegment, client: OpenAI, model: str) -> str
     mp3_bytes = buffer.read()
     response = client.audio.transcriptions.create(
         model=model,
-        prompt="Transcribe whole text from audio.",
+        prompt=prompt,
         file=("audio.mp3", mp3_bytes),
         response_format="text",
         stream=True,

chatterer/examples/upstage_parser.py CHANGED Viewed

@@ -1,17 +1,6 @@
-def resolve_import_path_and_get_logger():
-    # ruff: noqa: E402
-    import logging
-    import sys
-    if __name__ == "__main__" and "." not in sys.path:
-        sys.path.append(".")
-    logger = logging.getLogger(__name__)
-    return logger
-logger = resolve_import_path_and_get_logger()
+import logging
 from pathlib import Path
+from typing import Optional
 from langchain_core.documents.base import Blob
 from spargear import ArgumentSpec, BaseArguments
@@ -27,28 +16,34 @@ from chatterer.tools.upstage_document_parser import (
     SplitType,
 )
+logger = logging.getLogger(__name__)
 class UpstageParserArguments(BaseArguments):
-    in_path: ArgumentSpec[Path] = ArgumentSpec(["in-path"], help="Path to the input file.")
-    out_path: ArgumentSpec[Path] = ArgumentSpec(["--out-path"], default=None, help="Output file path.")
-    api_key: ArgumentSpec[str] = ArgumentSpec(["--api-key"], default=None, help="API key for the Upstage API.")
-    base_url: ArgumentSpec[str] = ArgumentSpec(["--base-url"], default=DOCUMENT_PARSE_BASE_URL, help="Base URL for the Upstage API.")
-    model: ArgumentSpec[str] = ArgumentSpec(["--model"], default=DOCUMENT_PARSE_DEFAULT_MODEL, help="Model to use for parsing.")
-    split: ArgumentSpec[SplitType] = ArgumentSpec(["--split"], default="none", help="Split type for parsing.")
-    ocr: ArgumentSpec[OCR] = ArgumentSpec(["--ocr"], default="auto", help="OCR type for parsing.")
-    output_format: ArgumentSpec[OutputFormat] = ArgumentSpec(["--output-format"], default="markdown", help="Output format.")
-    coordinates: ArgumentSpec[bool] = ArgumentSpec(["--coordinates"], action="store_true", help="Include coordinates.")
-    base64_encoding: ArgumentSpec[list[Category]] = ArgumentSpec(["--base64-encoding"], default=["figure"], help="Base64 encoding for specific categories.")
-    image_description_instruction: ArgumentSpec[str] = ArgumentSpec(
-        ["--image-description-instruction"],
-        default="Describe the image in detail.",
-        help="Instruction for image description.",
-    )
-    image_dir: ArgumentSpec[str] = ArgumentSpec(
-        ["--image-dir"],
-        default=DEFAULT_IMAGE_DIR,
-        help="Directory for image paths.",
-    )
+    input: Path
+    """Input file to parse. Can be a PDF, image, or other supported formats."""
+    output: Optional[Path] = None
+    """Output file path for the parsed content. Defaults to input file with .md suffix if not provided."""
+    api_key: Optional[str] = None
+    """API key for the Upstage API."""
+    base_url: str = DOCUMENT_PARSE_BASE_URL
+    """Base URL for the Upstage API."""
+    model: str = DOCUMENT_PARSE_DEFAULT_MODEL
+    """Model to use for parsing."""
+    split: SplitType = "none"
+    """Split type for the parsed content."""
+    ocr: OCR = "auto"
+    """OCR type for parsing."""
+    output_format: OutputFormat = "markdown"
+    """Output format for the parsed content."""
+    coordinates: bool = False
+    """Whether to include coordinates in the output."""
+    base64_encoding: list[Category] = ["figure"]
+    """Base64 encoding for specific categories in the parsed content."""
+    image_description_instruction: str = "Describe the image in detail."
+    """Instruction for generating image descriptions."""
+    image_dir: str = DEFAULT_IMAGE_DIR
+    """Directory to save images extracted from the document."""
     chatterer: ArgumentSpec[Chatterer] = ArgumentSpec(
         ["--chatterer"],
         default=None,
@@ -57,26 +52,25 @@ class UpstageParserArguments(BaseArguments):
     )
     def run(self) -> None:
-        input = UpstageParserArguments.in_path.unwrap().resolve()
-        out = UpstageParserArguments.out_path.value or input.with_suffix(".md")
+        input = UpstageParserArguments.input.resolve()
+        out = UpstageParserArguments.output or input.with_suffix(".md")
         parser = UpstageDocumentParseParser(
-            api_key=UpstageParserArguments.api_key.value,
-            base_url=UpstageParserArguments.base_url.unwrap(),
-            model=UpstageParserArguments.model.unwrap(),
-            split=UpstageParserArguments.split.unwrap(),
-            ocr=UpstageParserArguments.ocr.unwrap(),
-            output_format=UpstageParserArguments.output_format.unwrap(),
-            coordinates=UpstageParserArguments.coordinates.unwrap(),
-            base64_encoding=UpstageParserArguments.base64_encoding.unwrap(),
-            image_description_instruction=UpstageParserArguments.image_description_instruction.unwrap(),
-            image_dir=UpstageParserArguments.image_dir.value,
+            api_key=UpstageParserArguments.api_key,
+            base_url=UpstageParserArguments.base_url,
+            model=UpstageParserArguments.model,
+            split=UpstageParserArguments.split,
+            ocr=UpstageParserArguments.ocr,
+            output_format=UpstageParserArguments.output_format,
+            coordinates=UpstageParserArguments.coordinates,
+            base64_encoding=UpstageParserArguments.base64_encoding,
+            image_description_instruction=UpstageParserArguments.image_description_instruction,
+            image_dir=UpstageParserArguments.image_dir,
             chatterer=UpstageParserArguments.chatterer.value,
         )
         docs = parser.parse(Blob.from_path(input))  # pyright: ignore[reportUnknownMemberType]
-        if UpstageParserArguments.image_dir.value:
+        if UpstageParserArguments.image_dir:
             for path, image in parser.image_data.items():
                 (path := Path(path)).parent.mkdir(parents=True, exist_ok=True)
                 path.write_bytes(image)

chatterer/examples/webpage_to_markdown.py CHANGED Viewed

@@ -1,16 +1,3 @@
-def resolve_import_path_and_get_logger():
-    # ruff: noqa: E402
-    import logging
-    import sys
-    if __name__ == "__main__" and "." not in sys.path:
-        sys.path.append(".")
-    logger = logging.getLogger(__name__)
-    return logger
-logger = resolve_import_path_and_get_logger()
 from pathlib import Path
 from typing import Literal
@@ -20,49 +7,53 @@ from chatterer import Chatterer, MarkdownLink, PlayWrightBot
 class WebpageToMarkdownArgs(BaseArguments):
-    url: ArgumentSpec[str] = ArgumentSpec(["url"], help="The URL to crawl.")
-    out_path: str = Path(__file__).with_suffix(".md").as_posix()
+    url: str
+    """The URL to crawl."""
+    output: str = Path(__file__).with_suffix(".md").as_posix()
     """The output file path for the markdown file."""
     chatterer: ArgumentSpec[Chatterer] = ArgumentSpec(
-        ["--llm"],
-        default=None,
-        type=Chatterer.from_provider,
+        ["--chatterer"],
         help="The Chatterer backend and model to use for filtering the markdown.",
+        type=Chatterer.from_provider,
     )
     engine: Literal["firefox", "chromium", "webkit"] = "firefox"
     """The browser engine to use."""
     def run(self) -> None:
         chatterer = self.chatterer.value
-        url: str = self.url.unwrap().strip()
-        out_path: Path = Path(self.out_path).resolve()
+        url: str = self.url.strip()
+        output: Path = Path(self.output).resolve()
         with PlayWrightBot(chatterer=chatterer, engine=self.engine) as bot:
             md = bot.url_to_md(url)
-            out_path.write_text(md, encoding="utf-8")
+            output.write_text(md, encoding="utf-8")
             if chatterer is not None:
                 md_llm = bot.url_to_md_with_llm(url.strip())
-                out_path.write_text(md_llm, encoding="utf-8")
+                output.write_text(md_llm, encoding="utf-8")
             links = MarkdownLink.from_markdown(md, referer_url=url)
             for link in links:
                 if link.type == "link":
-                    print(f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})")
+                    print(
+                        f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})"
+                    )
                 elif link.type == "image":
                     print(f"- ![{truncate_string(link.url)}] ({truncate_string(link.inline_text)})")
     async def arun(self) -> None:
         chatterer = self.chatterer.value
-        url: str = self.url.unwrap().strip()
-        out_path: Path = Path(self.out_path).resolve()
+        url: str = self.url.strip()
+        output: Path = Path(self.output).resolve()
         async with PlayWrightBot(chatterer=chatterer, engine=self.engine) as bot:
             md = await bot.aurl_to_md(url)
-            out_path.write_text(md, encoding="utf-8")
+            output.write_text(md, encoding="utf-8")
             if chatterer is not None:
                 md_llm = await bot.aurl_to_md_with_llm(url.strip())
-                out_path.write_text(md_llm, encoding="utf-8")
+                output.write_text(md_llm, encoding="utf-8")
             links = MarkdownLink.from_markdown(md, referer_url=url)
             for link in links:
                 if link.type == "link":
-                    print(f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})")
+                    print(
+                        f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})"
+                    )
                 elif link.type == "image":
                     print(f"- ![{truncate_string(link.url)}] ({truncate_string(link.inline_text)})")

chatterer/strategies/atom_of_thoughts.py CHANGED Viewed

@@ -379,7 +379,7 @@ class AoTPipeline:
     chatterer: Chatterer
     max_depth: int = 2
     max_retries: int = 2
-    steps_history: list[StepRecord] = field(default_factory=list)
+    steps_history: list[StepRecord] = field(default_factory=list[StepRecord])
     prompter: AoTPrompter = field(default_factory=AoTPrompter)
     # 4.1) Utility for calling the LLM with Pydantic parsing

chatterer/tools/convert_pdf_to_markdown.py CHANGED Viewed

@@ -4,7 +4,8 @@ import logging
 import re
 from contextlib import contextmanager
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Callable, Iterable, List, Literal, Optional, Union
+from types import EllipsisType
+from typing import TYPE_CHECKING, Callable, Iterable, List, Literal, Optional
 from ..language_model import Chatterer, HumanMessage
 from ..utils.base64_image import Base64Image
@@ -17,6 +18,7 @@ if TYPE_CHECKING:
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
 MARKDOWN_PATTERN: re.Pattern[str] = re.compile(r"```(?:markdown\s*\n)?(.*?)```", re.DOTALL)
+PageIndexType = Iterable[int | tuple[int | EllipsisType, int | EllipsisType]] | int | str
 @dataclass
@@ -107,8 +109,8 @@ class PdfToMarkdown:
     def convert(
         self,
-        pdf_input: Union[str, "Document"],
-        page_indices: Optional[Union[Iterable[int], int]] = None,
+        pdf_input: "Document | PathOrReadable",
+        page_indices: Optional[PageIndexType] = None,
         progress_callback: Optional[Callable[[int, int], None]] = None,
     ) -> str:
         """
@@ -123,7 +125,9 @@ class PdfToMarkdown:
             A single string containing the concatenated Markdown output for the processed pages.
         """
         with open_pdf(pdf_input) as doc:
-            target_page_indices = list(_get_page_indices(page_indices, len(doc)))
+            target_page_indices = list(
+                _get_page_indices(page_indices=page_indices, max_doc_pages=len(doc), is_input_zero_based=True)
+            )
             total_pages_to_process = len(target_page_indices)
             if total_pages_to_process == 0:
                 logger.warning("No pages selected for processing.")
@@ -232,7 +236,7 @@ def render_pdf_as_image(
     images_bytes: dict[int, bytes] = {}
     matrix = Matrix(zoom, zoom)  # Control output resolution
-    for page_idx in _get_page_indices(page_indices, len(doc)):
+    for page_idx in _get_page_indices(page_indices=page_indices, max_doc_pages=len(doc), is_input_zero_based=True):
         img_bytes = bytes(
             get_pixmap(
                 page=doc[page_idx],
@@ -243,10 +247,7 @@ def render_pdf_as_image(
     return images_bytes
-def extract_text_from_pdf(
-    doc: "Document",
-    page_indices: Iterable[int] | int | None = None,
-) -> dict[int, str]:
+def extract_text_from_pdf(doc: "Document", page_indices: Optional[PageIndexType] = None) -> dict[int, str]:
     """Convert a PDF file to plain text.
     Extracts text from each page of a PDF file and formats it with page markers.
@@ -261,7 +262,11 @@ def extract_text_from_pdf(
     """
     return {
         page_idx: doc[page_idx].get_textpage().extractText().strip()  # pyright: ignore[reportUnknownMemberType]
-        for page_idx in _get_page_indices(page_indices, len(doc))
+        for page_idx in _get_page_indices(
+            page_indices=page_indices,
+            max_doc_pages=len(doc),
+            is_input_zero_based=True,
+        )
     }
@@ -292,11 +297,97 @@ def open_pdf(pdf_input: PathOrReadable | Document):
         doc.close()
-def _get_page_indices(page_indices: Iterable[int] | int | None, max_doc_pages: int) -> Iterable[int]:
+def _get_page_indices(
+    page_indices: Optional[PageIndexType], max_doc_pages: int, is_input_zero_based: bool
+) -> list[int]:
     """Helper function to handle page indices for PDF conversion."""
+    def _to_zero_based_int(idx: int) -> int:
+        """Convert a 1-based index to a 0-based index if necessary."""
+        if is_input_zero_based:
+            return idx
+        else:
+            if idx < 1 or idx > max_doc_pages:
+                raise ValueError(f"Index {idx} is out of bounds for document with {max_doc_pages} pages (1-based).")
+            return idx - 1
     if page_indices is None:
-        return range(max_doc_pages)
+        return list(range(max_doc_pages))  # Convert all pages
     elif isinstance(page_indices, int):
-        return [page_indices]
+        # Handle single integer input for page index
+        return [_to_zero_based_int(page_indices)]
+    elif isinstance(page_indices, str):
+        # Handle string input for page indices
+        return _interpret_index_string(
+            index_str=page_indices, max_doc_pages=max_doc_pages, is_input_zero_based=is_input_zero_based
+        )
     else:
-        return [i for i in page_indices if 0 <= i < max_doc_pages]
+        # Handle iterable input for page indices
+        indices: set[int] = set()
+        for idx in page_indices:
+            if isinstance(idx, int):
+                indices.add(_to_zero_based_int(idx))
+            else:
+                start, end = idx
+                if isinstance(start, EllipsisType):
+                    start = 0
+                else:
+                    start = _to_zero_based_int(start)
+                if isinstance(end, EllipsisType):
+                    end = max_doc_pages - 1
+                else:
+                    end = _to_zero_based_int(end)
+                if start > end:
+                    raise ValueError(
+                        f"Invalid range: {start} - {end}. Start index must be less than or equal to end index."
+                    )
+                indices.update(range(start, end + 1))
+        return sorted(indices)  # Return sorted list of indices
+def _interpret_index_string(index_str: str, max_doc_pages: int, is_input_zero_based: bool) -> list[int]:
+    """Interpret a string of comma-separated indices and ranges."""
+    def _to_zero_based_int(idx_str: str) -> int:
+        i = int(idx_str)
+        if is_input_zero_based:
+            if i < 0 or i >= max_doc_pages:
+                raise ValueError(f"Index {i} is out of bounds for document with {max_doc_pages} pages.")
+            return i
+        else:
+            if i < 1 or i > max_doc_pages:
+                raise ValueError(f"Index {i} is out of bounds for document with {max_doc_pages} pages (1-based).")
+            return i - 1  # Convert to zero-based index
+    indices: set[int] = set()
+    for part in index_str.split(","):
+        part: str = part.strip()
+        count_dash: int = part.count("-")
+        if count_dash == 0:
+            indices.add(_to_zero_based_int(part))
+        elif count_dash == 1:
+            idx_dash: int = part.index("-")
+            start = part[:idx_dash].strip()
+            end = part[idx_dash + 1 :].strip()
+            if not start:
+                start = _to_zero_based_int("0")  # Default to 0 if no start index is provided
+            else:
+                start = _to_zero_based_int(start)
+            if not end:
+                end = _to_zero_based_int(str(max_doc_pages - 1))  # Default to last page if no end index is provided
+            else:
+                end = _to_zero_based_int(end)
+            if start > end:
+                raise ValueError(
+                    f"Invalid range: {start} - {end}. Start index must be less than or equal to end index."
+                )
+            indices.update(range(start, end + 1))
+        else:
+            raise ValueError(f"Invalid page index format: '{part}'. Expected format is '1,2,3' or '1-3'.")
+    return sorted(indices)  # Return sorted list of indices, ensuring no duplicates

chatterer/tools/convert_to_text.py CHANGED Viewed

@@ -8,7 +8,6 @@ from pathlib import Path
 from typing import (
     TYPE_CHECKING,
     Callable,
-    Iterable,
     NamedTuple,
     NotRequired,
     Optional,
@@ -20,7 +19,7 @@ from typing import (
 from ..common_types.io import PathOrReadable
 from ..utils.bytesio import read_bytes_stream
-from .convert_pdf_to_markdown import extract_text_from_pdf
+from .convert_pdf_to_markdown import PageIndexType, extract_text_from_pdf
 if TYPE_CHECKING:
     from bs4 import Tag
@@ -222,7 +221,7 @@ def html_to_markdown(html: str, options: Optional[HtmlToMarkdownOptions]) -> str
     return str(markdownify(html, **(options or {})))  # pyright: ignore[reportUnknownArgumentType]
-def pdf_to_text(path_or_file: PathOrReadable, page_indices: Iterable[int] | int | None = None) -> str:
+def pdf_to_text(path_or_file: PathOrReadable, page_indices: Optional[PageIndexType] = None) -> str:
     """
     Convert a PDF file to plain text.
@@ -248,7 +247,7 @@ def pdf_to_text(path_or_file: PathOrReadable, page_indices: Iterable[int] | int
         with Document(stream=stream.read()) as doc:
             return "\n".join(
                 f"<!-- Page {page_no} -->\n{text}\n"
-                for page_no, text in extract_text_from_pdf(doc, page_indices).items()
+                for page_no, text in extract_text_from_pdf(doc=doc, page_indices=page_indices).items()
             )

chatterer/tools/upstage_document_parser.py CHANGED Viewed

@@ -67,7 +67,7 @@ class Coordinate(BaseModel):
 class Element(BaseModel):
     category: Category
     content: Content
-    coordinates: list[Coordinate] = Field(default_factory=list)
+    coordinates: list[Coordinate] = Field(default_factory=list[Coordinate])
     base64_encoding: str = ""
     id: int
     page: int
@@ -701,5 +701,5 @@ def _get_metadata_from_document(doc: Document) -> dict[object, object]:
     Helper function to extract metadata from a Document object.
     This is a placeholder and should be adjusted based on actual metadata structure.
     """
-    metadata: dict[object, object] = doc.metadata  # pyright: ignore[reportUnknownMemberType]
+    metadata: dict[object, object] = doc.metadata  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
     return metadata

chatterer/utils/code_agent.py CHANGED Viewed

@@ -185,7 +185,7 @@ def insert_callables_into_global(
         repl_tool.globals = {}  # Or handle appropriately
     # Safely update globals
-    current_globals: dict[object, object] = repl_tool.globals  # pyright: ignore[reportUnknownMemberType]
+    current_globals: dict[object, object] = repl_tool.globals  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
     for fsig in function_signatures:
         current_globals[fsig.name] = fsig.callable
     # No need to reassign if globals is mutable (dict)

{chatterer-0.1.19.dist-info → chatterer-0.1.21.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chatterer
-Version: 0.1.19
+Version: 0.1.21
 Summary: The highest-level interface for various LLM APIs.
 Requires-Python: >=3.12
 Description-Content-Type: text/markdown
@@ -11,7 +11,7 @@ Requires-Dist: pillow>=11.1.0
 Requires-Dist: regex>=2024.11.6
 Requires-Dist: rich>=13.9.4
 Requires-Dist: colorama>=0.4.6
-Requires-Dist: spargear>=0.1.6
+Requires-Dist: spargear>=0.2.0
 Provides-Extra: dev
 Requires-Dist: neo4j-extension>=0.1.14; extra == "dev"
 Requires-Dist: ipykernel>=6.29.5; extra == "dev"

{chatterer-0.1.19.dist-info → chatterer-0.1.21.dist-info}/RECORD RENAMED Viewed

@@ -6,23 +6,23 @@ chatterer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 chatterer/common_types/__init__.py,sha256=jfS6m5UANSvGjzQ_nzYDpryn5uZqNb06-4xCsQ2C_lw,376
 chatterer/common_types/io.py,sha256=fetiyi1suZ3NF2mj5k5KDLJLGKS1n4J-5UmH7JN36g8,817
 chatterer/examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-chatterer/examples/anything_to_markdown.py,sha256=fyqUfzQWlUMyPEAmRyvvoSnNGVOjhPS6AGWru0wIpuo,3066
-chatterer/examples/get_code_snippets.py,sha256=L6hilONtNItWlX4lHhWJNEJpA9yviDSeJlWwvGCfMmk,2198
-chatterer/examples/login_with_playwright.py,sha256=d_EJQYGhNdAEYSmRiv_RlyqpV-sCc_6_VGd5br7prTg,6322
-chatterer/examples/make_ppt.py,sha256=62I0DTyOuxirgHnO5WojFK7KQjoYKYYICgGTxgszL98,23565
-chatterer/examples/pdf_to_markdown.py,sha256=cCO2mXLXLfXLPtiVbQl1g4VI5Qklti8Udz8eNtL_srE,3670
-chatterer/examples/pdf_to_text.py,sha256=K8CmZ-E7SbUtdjutBMk80qWZXo21CcFrqvTf6pUTN5c,1847
-chatterer/examples/transcription_api.py,sha256=mj8kQKYL23ayYWzaO_THQtPtMLsU0pqGEVOiZb1nckQ,4255
-chatterer/examples/upstage_parser.py,sha256=UtGKt9UNqXqIoJlKpg6q1h7DI7LDujFqyKXnH0FsZVw,4236
-chatterer/examples/webpage_to_markdown.py,sha256=ZBmZ0AjPnETBpXn_RScUSLF6PGYJpdcaZzXbp8Zfack,3143
+chatterer/examples/anything_to_markdown.py,sha256=4O9ze7AIHcwEzvVmm5JMMKo_rVSFwhPL8MVHtfMLJ5Y,2734
+chatterer/examples/get_code_snippets.py,sha256=pz05JjhKaWAknVKlk1ftEEzpSG4-sqD9oa_gyIQoCAs,1911
+chatterer/examples/login_with_playwright.py,sha256=EhvJLaH5TD7bmDi12uP8YLd0fRhdjR-oyIkBHLi1Jjs,5988
+chatterer/examples/make_ppt.py,sha256=vsT_iL_jS2ami5VYrReLMQcD576FfZUH7913F7_As0A,23278
+chatterer/examples/pdf_to_markdown.py,sha256=ZeGRO5CZxGQxJpScK0iB1lTzUkfSiXtuqoeKEQL1ICA,2787
+chatterer/examples/pdf_to_text.py,sha256=DznTyhu1REv8Wp4RimQWVgEU5j0_BmlwjfJYJvx3dbI,1590
+chatterer/examples/transcription_api.py,sha256=WUs12qHH4616eLMQDHOiyVGxaXstTpgeE47djYyli6c,3897
+chatterer/examples/upstage_parser.py,sha256=TrfeSIiF0xklhFCknop22TIOVibI4CJ_UKj5-lD8c8E,3487
+chatterer/examples/webpage_to_markdown.py,sha256=DnZfQ-trXBiOiszA2tMlgadgKH-ObTi6l4gGloT-cQw,2846
 chatterer/strategies/__init__.py,sha256=SdOggbmHpw4f7Njwy-T8q64e91OLOUp1k0a0ozZd4qI,221
-chatterer/strategies/atom_of_thoughts.py,sha256=CygOCLu5vLk-fzY9O-iE3qLShfjD7iY40ks9jH4ULBM,40872
+chatterer/strategies/atom_of_thoughts.py,sha256=pUhqt47YlzBIVNRh0UebeBwuJ0J94Ge6yZgXxrsiDPE,40884
 chatterer/strategies/base.py,sha256=b2gMPqodp97OP1dkHfj0UqixjdjVhmTw_V5qJ7i2S6g,427
 chatterer/tools/__init__.py,sha256=m3PRK9H5vOhk-2gG9W2eg8CYBlEn-K9-eaulOu91bgo,1474
 chatterer/tools/caption_markdown_images.py,sha256=r4QajHYuL4mdyYQXP1vQcNmqKN8lxBf5y0VKELXILOI,15392
-chatterer/tools/convert_pdf_to_markdown.py,sha256=8VEnZiaZSDq5k5BGaJEaSaJpJrAxSeL-gc7s38Vf9Fg,14990
-chatterer/tools/convert_to_text.py,sha256=IWtSHBh8-bvEZekk9Vli8yK4Ufa_VezN9mpfKCUWvys,15443
-chatterer/tools/upstage_document_parser.py,sha256=Bn6dGwgCsCOvhA5GfTWcfJ9unKqyuXNMMwoqZLwYLYU,33103
+chatterer/tools/convert_pdf_to_markdown.py,sha256=Q5ln-_av2eor0A2LkQG7-IgyQKJ79wwrSOvv5Jncfso,18901
+chatterer/tools/convert_to_text.py,sha256=WHQ0Xj4Ri_jYbFjzTx3mjmvJ9U8bAv4wGaKEVC88Nlk,15457
+chatterer/tools/upstage_document_parser.py,sha256=CXslVYAHDK8EV8jtUAUWzf8rxU4qilSnW8_dhAxHOE8,33142
 chatterer/tools/webpage_to_markdown.py,sha256=ADH4sqM6iquJR7HU6umMQ5qO7EvcbNutuchXDpAcxAo,31961
 chatterer/tools/youtube.py,sha256=Hl2MMXJwwZ-i6_YAq0zh0rN4LHpYOb1Rt88P1gMjlLE,6081
 chatterer/tools/citation_chunking/__init__.py,sha256=gG7Fnkkp28UpcWMbfMY_4gqzZSZ8QzlhalHBoeoq7K0,82
@@ -35,10 +35,10 @@ chatterer/tools/citation_chunking/utils.py,sha256=M4pH2-UIE1VLzQLXDqjEe4L3Xcy0e0
 chatterer/utils/__init__.py,sha256=2v-lB2dqHgBlGcyaKKHc_hcyeH_AVoOddpr0STF7YAw,341
 chatterer/utils/base64_image.py,sha256=m_qAT3ERBiq8D-H4H9Z7rLfL31_BiPmV_m4uQ5XRLs0,11124
 chatterer/utils/bytesio.py,sha256=3MC2atOOFKo5YxuReo_y_t8Wem9p2Y1ahC5M2lGclwI,2618
-chatterer/utils/code_agent.py,sha256=A0IIgUAvW3lWMdJa8OZNKqmwdd5uuSvco-TxAN5-cek,10468
+chatterer/utils/code_agent.py,sha256=7ka_WRI4TQmZ5H46mjY3hI6RO_pxw6pg3LAxjgW4AbM,10495
 chatterer/utils/imghdr.py,sha256=6JhJMXD4MZ0dQolT2VM87YrRYm3hPf3RTEWnP4lYRVc,3842
-chatterer-0.1.19.dist-info/METADATA,sha256=F_g5KhtlpDxZqeMlDi2Wewt_fBPSJgC_gKPMlIJeC50,11826
-chatterer-0.1.19.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
-chatterer-0.1.19.dist-info/entry_points.txt,sha256=KhxL2dctnZalnDSmPoB5dZBBa9hZpJETW3C5xkoRaW4,554
-chatterer-0.1.19.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
-chatterer-0.1.19.dist-info/RECORD,,
+chatterer-0.1.21.dist-info/METADATA,sha256=j3QGPYik-jm75MHIfAvbvUbv-EaxvlVKdEIc7_dMUjk,11826
+chatterer-0.1.21.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
+chatterer-0.1.21.dist-info/entry_points.txt,sha256=KhxL2dctnZalnDSmPoB5dZBBa9hZpJETW3C5xkoRaW4,554
+chatterer-0.1.21.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
+chatterer-0.1.21.dist-info/RECORD,,

{chatterer-0.1.19.dist-info → chatterer-0.1.21.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.7.1)
+Generator: setuptools (80.8.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{chatterer-0.1.19.dist-info → chatterer-0.1.21.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{chatterer-0.1.19.dist-info → chatterer-0.1.21.dist-info}/top_level.txt RENAMED Viewed

File without changes

chatterer 0.1.19__py3-none-any.whl → 0.1.21__py3-none-any.whl

chatterer 0.1.19py3-none-any.whl → 0.1.21py3-none-any.whl