PyPI - chatterer - Versions diffs - 0.1.19__tar.gz → 0.1.21__tar.gz - Mend

chatterer 0.1.19tar.gz → 0.1.21tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

{chatterer-0.1.19 → chatterer-0.1.21}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chatterer
-Version: 0.1.19
+Version: 0.1.21
 Summary: The highest-level interface for various LLM APIs.
 Requires-Python: >=3.12
 Description-Content-Type: text/markdown
@@ -11,7 +11,7 @@ Requires-Dist: pillow>=11.1.0
 Requires-Dist: regex>=2024.11.6
 Requires-Dist: rich>=13.9.4
 Requires-Dist: colorama>=0.4.6
-Requires-Dist: spargear>=0.1.6
+Requires-Dist: spargear>=0.2.0
 Provides-Extra: dev
 Requires-Dist: neo4j-extension>=0.1.14; extra == "dev"
 Requires-Dist: ipykernel>=6.29.5; extra == "dev"

{chatterer-0.1.19 → chatterer-0.1.21}/chatterer/examples/anything_to_markdown.py RENAMED Viewed

@@ -1,36 +1,27 @@
-def resolve_import_path_and_get_logger():
-    # ruff: noqa: E402
-    import logging
-    import sys
-    if __name__ == "__main__" and "." not in sys.path:
-        sys.path.append(".")
-    logger = logging.getLogger(__name__)
-    return logger
-logger = resolve_import_path_and_get_logger()
+import logging
 from pathlib import Path
 from typing import Optional, TypedDict
 import openai
-from spargear import ArgumentSpec, BaseArguments
+from spargear import BaseArguments
 from chatterer import anything_to_markdown
+logger = logging.getLogger(__name__)
 class AnythingToMarkdownReturns(TypedDict):
-    in_path: str
-    out_path: Optional[str]
+    input: str
+    output: Optional[str]
     out_text: str
 class AnythingToMarkdownArguments(BaseArguments):
     """Command line arguments for converting various file types to markdown."""
-    in_path: ArgumentSpec[str] = ArgumentSpec(["in-path"], help="Input file to convert to markdown")
-    out_path: Optional[str] = None
+    input: str
+    """Input file to convert to markdown. Can be a file path or a URL."""
+    output: Optional[str] = None
     """Output path for the converted markdown file. If not provided, the input file's suffix is replaced with .md"""
     model: Optional[str] = None
     """OpenAI Model to use for conversion"""
@@ -50,14 +41,13 @@ class AnythingToMarkdownArguments(BaseArguments):
     """Encoding for the output file."""
     def run(self) -> AnythingToMarkdownReturns:
-        in_path = self.in_path.unwrap()
         if not self.prevent_save_file:
-            if not self.out_path:
-                out_path = Path(in_path).with_suffix(".md")
+            if not self.output:
+                output = Path(self.input).with_suffix(".md")
             else:
-                out_path = Path(self.out_path)
+                output = Path(self.output)
         else:
-            out_path = None
+            output = None
         if self.model:
             llm_client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url)
@@ -67,22 +57,22 @@ class AnythingToMarkdownArguments(BaseArguments):
             llm_model = None
         text: str = anything_to_markdown(
-            in_path,
+            self.input,
             llm_client=llm_client,
             llm_model=llm_model,
             style_map=self.style_map,
             exiftool_path=self.exiftool_path,
             docintel_endpoint=self.docintel_endpoint,
         )
-        if out_path:
-            out_path.parent.mkdir(parents=True, exist_ok=True)
-            out_path.write_text(text, encoding=self.encoding)
-            logger.info(f"Converted `{in_path}` to markdown and saved to `{out_path}`.")
+        if output:
+            output.parent.mkdir(parents=True, exist_ok=True)
+            output.write_text(text, encoding=self.encoding)
+            logger.info(f"Converted `{self.input}` to markdown and saved to `{output}`.")
         else:
-            logger.info(f"Converted `{in_path}` to markdown.")
+            logger.info(f"Converted `{self.input}` to markdown.")
         return {
-            "in_path": in_path,
-            "out_path": str(out_path) if out_path is not None else None,
+            "input": self.input,
+            "output": str(output) if output is not None else None,
             "out_text": text,
         }

chatterer-0.1.21/chatterer/examples/get_code_snippets.py ADDED Viewed

@@ -0,0 +1,55 @@
+import logging
+from pathlib import Path
+from typing import Optional
+from spargear import BaseArguments
+from chatterer import CodeSnippets
+logger = logging.getLogger(__name__)
+class GetCodeSnippetsArgs(BaseArguments):
+    input: str
+    """Path to the package or file from which to extract code snippets."""
+    output: Optional[str] = None
+    """Output path for the extracted code snippets. If not provided, defaults to a file with the same name as the input."""
+    ban_file_patterns: list[str] = [".venv/*", Path(__file__).relative_to(Path.cwd()).as_posix()]
+    """List of file patterns to ignore."""
+    glob_patterns: list[str] = ["*.py"]
+    """List of glob patterns to include."""
+    case_sensitive: bool = False
+    """Enable case-sensitive matching for glob patterns."""
+    prevent_save_file: bool = False
+    """Prevent saving the extracted code snippets to a file."""
+    def run(self) -> CodeSnippets:
+        if not self.prevent_save_file:
+            if not self.output:
+                output = Path(__file__).with_suffix(".txt")
+            else:
+                output = Path(self.output)
+        else:
+            output = None
+        cs = CodeSnippets.from_path_or_pkgname(
+            path_or_pkgname=self.input,
+            ban_file_patterns=self.ban_file_patterns,
+            glob_patterns=self.glob_patterns,
+            case_sensitive=self.case_sensitive,
+        )
+        if output is not None:
+            output.parent.mkdir(parents=True, exist_ok=True)
+            output.write_text(cs.snippets_text, encoding="utf-8")
+            logger.info(f"Extracted code snippets from `{self.input}` and saved to `{output}`.")
+        else:
+            logger.info(f"Extracted code snippets from `{self.input}`.")
+        return cs
+def main() -> None:
+    GetCodeSnippetsArgs().run()
+if __name__ == "__main__":
+    main()

{chatterer-0.1.19 → chatterer-0.1.21}/chatterer/examples/login_with_playwright.py RENAMED Viewed

@@ -1,17 +1,5 @@
-def resolve_import_path_and_get_logger():
-    # ruff: noqa: E402
-    import logging
-    import sys
-    if __name__ == "__main__" and "." not in sys.path:
-        sys.path.append(".")
-    logger = logging.getLogger(__name__)
-    return logger
-logger = resolve_import_path_and_get_logger()
 import json
+import logging
 import sys
 from pathlib import Path
@@ -19,76 +7,8 @@ from spargear import BaseArguments, SubcommandSpec
 from chatterer import PlayWrightBot
+logger = logging.getLogger(__name__)
-def read_session(url: str, jsonpath: Path) -> None:
-    """
-    Loads the session state from the specified JSON file, then navigates
-    to a protected_url that normally requires login. If the stored session
-    is valid, it should open without re-entering credentials.
-    Correction: Loads the JSON content into a dict first to satisfy type hints.
-    """
-    logger.info(f"Loading session from {jsonpath} and navigating to {url} ...")
-    if not jsonpath.exists():
-        logger.error(f"Session file not found at {jsonpath}")
-        sys.exit(1)
-    # Load the storage state from the JSON file into a dictionary
-    logger.info(f"Reading storage state content from {jsonpath} ...")
-    try:
-        with open(jsonpath, "r", encoding="utf-8") as f:
-            # This dictionary should match the 'StorageState' type expected by Playwright/chatterer
-            storage_state_dict = json.load(f)
-    except json.JSONDecodeError:
-        logger.error(f"Failed to decode JSON from {jsonpath}")
-        sys.exit(1)
-    except Exception as e:
-        logger.error(f"Error reading file {jsonpath}: {e}")
-        sys.exit(1)
-    logger.info("Launching browser with loaded session state...")
-    with PlayWrightBot(
-        playwright_launch_options={"headless": False},
-        # Pass the loaded dictionary, which should match the expected 'StorageState' type
-        playwright_persistency_options={"storage_state": storage_state_dict},
-    ) as bot:
-        bot.get_page(url)
-        logger.info("Press Enter in the console when you're done checking the protected page.")
-        input("    >> Press Enter to exit: ")
-    logger.info("Done! Browser is now closed.")
-def write_session(url: str, jsonpath: Path) -> None:
-    """
-    Launches a non-headless browser and navigates to the login_url.
-    The user can manually log in, then press Enter in the console
-    to store the current session state into a JSON file.
-    """
-    logger.info(f"Launching browser and navigating to {url} ... Please log in manually.")
-    # Ensure jsonpath directory exists
-    jsonpath.parent.mkdir(parents=True, exist_ok=True)
-    with PlayWrightBot(playwright_launch_options={"headless": False}) as bot:
-        bot.get_page(url)
-        logger.info("After completing the login in the browser, press Enter here to save the session.")
-        input("    >> Press Enter when ready: ")
-        # get_sync_browser() returns the BrowserContext internally
-        context = bot.get_sync_browser()
-        # Save the current session (cookies, localStorage) to a JSON file
-        logger.info(f"Saving storage state to {jsonpath} ...")
-        context.storage_state(path=jsonpath)  # Pass Path object directly
-    logger.info("Done! Browser is now closed.")
-# --- Spargear Declarative CLI Definition ---
 # Define the default path location relative to this script file
 DEFAULT_JSON_PATH = Path(__file__).resolve().parent / "session_state.json"
@@ -160,7 +80,72 @@ class LoginWithPlaywrightArgs(BaseArguments):
             sys.exit(1)
-# --- Main Execution Logic ---
+def read_session(url: str, jsonpath: Path) -> None:
+    """
+    Loads the session state from the specified JSON file, then navigates
+    to a protected_url that normally requires login. If the stored session
+    is valid, it should open without re-entering credentials.
+    Correction: Loads the JSON content into a dict first to satisfy type hints.
+    """
+    logger.info(f"Loading session from {jsonpath} and navigating to {url} ...")
+    if not jsonpath.exists():
+        logger.error(f"Session file not found at {jsonpath}")
+        sys.exit(1)
+    # Load the storage state from the JSON file into a dictionary
+    logger.info(f"Reading storage state content from {jsonpath} ...")
+    try:
+        with open(jsonpath, "r", encoding="utf-8") as f:
+            # This dictionary should match the 'StorageState' type expected by Playwright/chatterer
+            storage_state_dict = json.load(f)
+    except json.JSONDecodeError:
+        logger.error(f"Failed to decode JSON from {jsonpath}")
+        sys.exit(1)
+    except Exception as e:
+        logger.error(f"Error reading file {jsonpath}: {e}")
+        sys.exit(1)
+    logger.info("Launching browser with loaded session state...")
+    with PlayWrightBot(
+        playwright_launch_options={"headless": False},
+        # Pass the loaded dictionary, which should match the expected 'StorageState' type
+        playwright_persistency_options={"storage_state": storage_state_dict},
+    ) as bot:
+        bot.get_page(url)
+        logger.info("Press Enter in the console when you're done checking the protected page.")
+        input("    >> Press Enter to exit: ")
+    logger.info("Done! Browser is now closed.")
+def write_session(url: str, jsonpath: Path) -> None:
+    """
+    Launches a non-headless browser and navigates to the login_url.
+    The user can manually log in, then press Enter in the console
+    to store the current session state into a JSON file.
+    """
+    logger.info(f"Launching browser and navigating to {url} ... Please log in manually.")
+    # Ensure jsonpath directory exists
+    jsonpath.parent.mkdir(parents=True, exist_ok=True)
+    with PlayWrightBot(playwright_launch_options={"headless": False}) as bot:
+        bot.get_page(url)
+        logger.info("After completing the login in the browser, press Enter here to save the session.")
+        input("    >> Press Enter when ready: ")
+        # get_sync_browser() returns the BrowserContext internally
+        context = bot.get_sync_browser()
+        # Save the current session (cookies, localStorage) to a JSON file
+        logger.info(f"Saving storage state to {jsonpath} ...")
+        context.storage_state(path=jsonpath)  # Pass Path object directly
+    logger.info("Done! Browser is now closed.")
 def main() -> None:

{chatterer-0.1.19 → chatterer-0.1.21}/chatterer/examples/make_ppt.py RENAMED Viewed

@@ -1,16 +1,3 @@
-def resolve_import_path_and_get_logger():
-    # ruff: noqa: E402
-    import logging
-    import sys
-    if __name__ == "__main__" and "." not in sys.path:
-        sys.path.append(".")
-    logger = logging.getLogger(__name__)
-    return logger
-logger = resolve_import_path_and_get_logger()
 import re
 import sys
 from pathlib import Path
@@ -192,7 +179,9 @@ class MakePptArguments(BaseArguments):
     """Prompt for organizing slides into a presentation script"""
     # LLM Settings
-    provider: str = "openai:gpt-4.1"  # Example: "openai:gpt-4o", "anthropic:claude-3-haiku-20240307", "google:gemini-1.5-flash"
+    provider: str = (
+        "openai:gpt-4.1"  # Example: "openai:gpt-4o", "anthropic:claude-3-haiku-20240307", "google:gemini-1.5-flash"
+    )
     """Name of the language model to use (provider:model_name)"""
     # Other settings

chatterer-0.1.21/chatterer/examples/pdf_to_markdown.py ADDED Viewed

@@ -0,0 +1,77 @@
+import logging
+import sys
+from pathlib import Path
+from typing import Optional
+from spargear import ArgumentSpec, BaseArguments
+from chatterer import Chatterer, PdfToMarkdown
+logger = logging.getLogger(__name__)
+class PdfToMarkdownArgs(BaseArguments):
+    input: str
+    """Input PDF file or directory containing PDF files to convert to markdown."""
+    output: Optional[str] = None
+    """Output path. For a file, path to the output markdown file. For a directory, output directory for .md files."""
+    """Chatterer instance for communication."""
+    page: Optional[str] = None
+    """Zero-based page indices to convert (e.g., '0,2,4-8')."""
+    recursive: bool = False
+    """If input is a directory, search for PDFs recursively."""
+    chatterer: ArgumentSpec[Chatterer] = ArgumentSpec(
+        ["--chatterer"],
+        default_factory=lambda: Chatterer.from_provider("google:gemini-2.5-flash-preview-05-20"),
+        help="Chatterer instance for communication.",
+        type=Chatterer.from_provider,
+    )
+    def run(self) -> list[dict[str, str]]:
+        input = Path(self.input).resolve()
+        pdf_files: list[Path] = []
+        is_dir = False
+        if input.is_file():
+            if input.suffix.lower() != ".pdf":
+                sys.exit(1)
+            pdf_files.append(input)
+        elif input.is_dir():
+            is_dir = True
+            pattern = "*.pdf"
+            pdf_files = sorted([
+                f for f in (input.rglob(pattern) if self.recursive else input.glob(pattern)) if f.is_file()
+            ])
+            if not pdf_files:
+                sys.exit(0)
+        else:
+            sys.exit(1)
+        if self.output:
+            out_base = Path(self.output).resolve()
+        elif is_dir:
+            out_base = input
+        else:
+            out_base = input.with_suffix(".md")
+        if is_dir:
+            out_base.mkdir(parents=True, exist_ok=True)
+        else:
+            out_base.parent.mkdir(parents=True, exist_ok=True)
+        converter = PdfToMarkdown(chatterer=self.chatterer.unwrap())
+        results: list[dict[str, str]] = []
+        for pdf in pdf_files:
+            output: Path = (out_base / (pdf.stem + ".md")) if is_dir else out_base
+            md: str = converter.convert(pdf_input=str(pdf), page_indices=self.page)
+            output.parent.mkdir(parents=True, exist_ok=True)
+            output.write_text(md, encoding="utf-8")
+            results.append({"input": pdf.as_posix(), "output": output.as_posix(), "result": md})
+        logger.info(f"Converted {len(pdf_files)} PDF(s) to markdown and saved to `{out_base}`.")
+        return results
+def main() -> None:
+    PdfToMarkdownArgs().run()
+if __name__ == "__main__":
+    main()

chatterer-0.1.21/chatterer/examples/pdf_to_text.py ADDED Viewed

@@ -0,0 +1,54 @@
+import logging
+import sys
+from pathlib import Path
+from typing import Optional
+from spargear import BaseArguments
+from chatterer.tools.convert_to_text import pdf_to_text
+logger = logging.getLogger(__name__)
+class PdfToTextArgs(BaseArguments):
+    input: Path
+    """Path to the PDF file to convert to text."""
+    output: Optional[Path]
+    """Path to the output text file. If not provided, defaults to the input file with a .txt suffix."""
+    page: Optional[str] = None
+    """Comma-separated list of zero-based page indices to extract from the PDF. Supports ranges, e.g., '0,2,4-8'."""
+    def run(self) -> None:
+        input = self.input.resolve()
+        out = self.output or input.with_suffix(".txt")
+        if not input.is_file():
+            sys.exit(1)
+        out.write_text(
+            pdf_to_text(path_or_file=input, page_indices=self.page),
+            encoding="utf-8",
+        )
+        logger.info(f"Extracted text from `{input}` to `{out}`")
+def parse_page_indices(pages_str: str) -> list[int]:
+    indices: set[int] = set()
+    for part in pages_str.split(","):
+        part = part.strip()
+        if "-" in part:
+            start_str, end_str = part.split("-", 1)
+            start = int(start_str)
+            end = int(end_str)
+            if start > end:
+                raise ValueError
+            indices.update(range(start, end + 1))
+        else:
+            indices.add(int(part))
+    return sorted(indices)
+def main() -> None:
+    PdfToTextArgs().run()
+if __name__ == "__main__":
+    main()

{chatterer-0.1.19 → chatterer-0.1.21}/chatterer/examples/transcription_api.py RENAMED Viewed

@@ -2,51 +2,36 @@
 from io import BytesIO
 from pathlib import Path
-from typing import cast
+from typing import Optional, cast
 from openai import OpenAI
 from pydub import AudioSegment
-from spargear import ArgumentSpec, BaseArguments
+from spargear import BaseArguments
 # Maximum chunk length in seconds
 MAX_CHUNK_DURATION = 600
 class TranscriptionApiArguments(BaseArguments):
-    in_path = ArgumentSpec(
-        ["in-path"],
-        type=Path,
-        help="The audio file to transcribe.",
-    )
-    out_path = ArgumentSpec(
-        ["--out-path"],
-        type=Path,
-        default=None,
-        help="Path to save the transcription output.",
-    )
-    model: ArgumentSpec[str] = ArgumentSpec(
-        ["--model"],
-        default="gpt-4o-transcribe",
-        help="The model to use for transcription.",
-    )
-    api_key: ArgumentSpec[str] = ArgumentSpec(
-        ["--api-key"],
-        default=None,
-        help="The API key for authentication.",
-    )
-    base_url: ArgumentSpec[str] = ArgumentSpec(
-        ["--base-url"],
-        default="https://api.openai.com/v1",
-        help="The base URL for the API.",
-    )
+    input: Path
+    """The audio file to transcribe."""
+    output: Optional[Path] = None
+    """Path to save the transcription output."""
+    model: str = "gpt-4o-transcribe"
+    """The model to use for transcription."""
+    api_key: Optional[str] = None
+    """The API key for authentication."""
+    base_url: str = "https://api.openai.com/v1"
+    """The base URL for the API."""
+    prompt: str = "Transcribe whole text from audio."
+    """The prompt to use for transcription."""
     def run(self) -> None:
-        audio_path = self.in_path.unwrap()
-        model = self.model.unwrap()
+        model = self.model
-        client = OpenAI(api_key=self.api_key.value, base_url=self.base_url.value)
+        client = OpenAI(api_key=self.api_key, base_url=self.base_url)
-        audio = load_audio_segment(audio_path)
+        audio = load_audio_segment(self.input)
         segments = split_audio(audio, MAX_CHUNK_DURATION)
         print(f"[i] Audio duration: {len(audio) / 1000:.1f}s; splitting into {len(segments)} segment(s)")
@@ -54,10 +39,10 @@ class TranscriptionApiArguments(BaseArguments):
         transcripts: list[str] = []
         for idx, seg in enumerate(segments, start=1):
             print(f"[i] Transcribing segment {idx}/{len(segments)}...")
-            transcripts.append(transcribe_segment(seg, client, model))
+            transcripts.append(transcribe_segment(seg, client, model, self.prompt))
         full_transcript = "\n\n".join(transcripts)
-        output_path: Path = self.out_path.value or audio_path.with_suffix(".txt")
+        output_path: Path = self.output or self.input.with_suffix(".txt")
         output_path.write_text(full_transcript, encoding="utf-8")
         print(f"[✓] Transcription saved to: {output_path}")
@@ -94,7 +79,7 @@ def split_audio(audio: AudioSegment, max_duration_s: int) -> list[AudioSegment]:
     return segments
-def transcribe_segment(segment: AudioSegment, client: OpenAI, model: str) -> str:
+def transcribe_segment(segment: AudioSegment, client: OpenAI, model: str, prompt: str) -> str:
     """
     Transcribe a single AudioSegment chunk and return its text.
     """
@@ -104,7 +89,7 @@ def transcribe_segment(segment: AudioSegment, client: OpenAI, model: str) -> str
     mp3_bytes = buffer.read()
     response = client.audio.transcriptions.create(
         model=model,
-        prompt="Transcribe whole text from audio.",
+        prompt=prompt,
         file=("audio.mp3", mp3_bytes),
         response_format="text",
         stream=True,

chatterer 0.1.19__tar.gz → 0.1.21__tar.gz

chatterer 0.1.19tar.gz → 0.1.21tar.gz