PyPI - chatterer - Versions diffs - 0.1.16__py3-none-any.whl → 0.1.18__py3-none-any.whl - Mend

chatterer 0.1.16py3-none-any.whl → 0.1.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

chatterer/__init__.py +93 -93
chatterer/common_types/__init__.py +21 -21
chatterer/common_types/io.py +19 -19
chatterer/examples/anything_to_markdown.py +91 -0
chatterer/examples/get_code_snippets.py +62 -0
chatterer/examples/login_with_playwright.py +167 -0
chatterer/examples/make_ppt.py +497 -0
chatterer/examples/pdf_to_markdown.py +107 -0
chatterer/examples/pdf_to_text.py +56 -0
chatterer/examples/transcription_api.py +123 -0
chatterer/examples/upstage_parser.py +100 -0
chatterer/examples/webpage_to_markdown.py +79 -0
chatterer/interactive.py +354 -692
chatterer/language_model.py +533 -533
chatterer/messages.py +21 -21
chatterer/strategies/__init__.py +13 -13
chatterer/strategies/atom_of_thoughts.py +975 -975
chatterer/strategies/base.py +14 -14
chatterer/tools/__init__.py +46 -46
chatterer/tools/caption_markdown_images.py +384 -384
chatterer/tools/citation_chunking/__init__.py +3 -3
chatterer/tools/citation_chunking/chunks.py +53 -53
chatterer/tools/citation_chunking/citation_chunker.py +118 -118
chatterer/tools/citation_chunking/citations.py +285 -285
chatterer/tools/citation_chunking/prompt.py +157 -157
chatterer/tools/citation_chunking/reference.py +26 -26
chatterer/tools/citation_chunking/utils.py +138 -138
chatterer/tools/convert_pdf_to_markdown.py +302 -302
chatterer/tools/convert_to_text.py +447 -447
chatterer/tools/upstage_document_parser.py +705 -705
chatterer/tools/webpage_to_markdown.py +739 -739
chatterer/tools/youtube.py +146 -146
chatterer/utils/__init__.py +15 -15
chatterer/utils/base64_image.py +285 -285
chatterer/utils/bytesio.py +59 -59
chatterer/utils/code_agent.py +237 -237
chatterer/utils/imghdr.py +148 -148
{chatterer-0.1.16.dist-info → chatterer-0.1.18.dist-info}/METADATA +392 -392
chatterer-0.1.18.dist-info/RECORD +42 -0
{chatterer-0.1.16.dist-info → chatterer-0.1.18.dist-info}/WHEEL +1 -1
chatterer-0.1.16.dist-info/RECORD +0 -33
{chatterer-0.1.16.dist-info → chatterer-0.1.18.dist-info}/top_level.txt +0 -0

chatterer/__init__.py CHANGED Viewed

@@ -1,93 +1,93 @@
-from .interactive import interactive_shell
-from .language_model import Chatterer
-from .messages import (
-    AIMessage,
-    BaseMessage,
-    BaseMessageChunk,
-    FunctionMessage,
-    HumanMessage,
-    LanguageModelInput,
-    SystemMessage,
-    UsageMetadata,
-)
-from .strategies import (
-    AoTPipeline,
-    AoTPrompter,
-    AoTStrategy,
-    BaseStrategy,
-)
-from .tools import (
-    CodeSnippets,
-    MarkdownLink,
-    PdfToMarkdown,
-    PlayWrightBot,
-    PlaywrightLaunchOptions,
-    PlaywrightOptions,
-    PlaywrightPersistencyOptions,
-    UpstageDocumentParseParser,
-    acaption_markdown_images,
-    anything_to_markdown,
-    caption_markdown_images,
-    citation_chunker,
-    extract_text_from_pdf,
-    get_default_html_to_markdown_options,
-    get_default_playwright_launch_options,
-    get_youtube_video_details,
-    get_youtube_video_subtitle,
-    html_to_markdown,
-    open_pdf,
-    pdf_to_text,
-    pyscripts_to_snippets,
-    render_pdf_as_image,
-)
-from .utils import (
-    Base64Image,
-    CodeExecutionResult,
-    FunctionSignature,
-    get_default_repl_tool,
-    insert_callables_into_global,
-)
-__all__ = [
-    "BaseStrategy",
-    "Chatterer",
-    "AoTStrategy",
-    "AoTPipeline",
-    "AoTPrompter",
-    "html_to_markdown",
-    "anything_to_markdown",
-    "pdf_to_text",
-    "get_default_html_to_markdown_options",
-    "pyscripts_to_snippets",
-    "citation_chunker",
-    "BaseMessage",
-    "HumanMessage",
-    "SystemMessage",
-    "AIMessage",
-    "FunctionMessage",
-    "Base64Image",
-    "FunctionSignature",
-    "CodeExecutionResult",
-    "get_default_repl_tool",
-    "insert_callables_into_global",
-    "get_youtube_video_subtitle",
-    "get_youtube_video_details",
-    "interactive_shell",
-    "UpstageDocumentParseParser",
-    "BaseMessageChunk",
-    "CodeSnippets",
-    "LanguageModelInput",
-    "UsageMetadata",
-    "PlayWrightBot",
-    "PlaywrightLaunchOptions",
-    "PlaywrightOptions",
-    "PlaywrightPersistencyOptions",
-    "get_default_playwright_launch_options",
-    "acaption_markdown_images",
-    "caption_markdown_images",
-    "MarkdownLink",
-    "PdfToMarkdown",
-    "extract_text_from_pdf",
-    "open_pdf",
-    "render_pdf_as_image",
-]
+from .interactive import interactive_shell
+from .language_model import Chatterer
+from .messages import (
+    AIMessage,
+    BaseMessage,
+    BaseMessageChunk,
+    FunctionMessage,
+    HumanMessage,
+    LanguageModelInput,
+    SystemMessage,
+    UsageMetadata,
+)
+from .strategies import (
+    AoTPipeline,
+    AoTPrompter,
+    AoTStrategy,
+    BaseStrategy,
+)
+from .tools import (
+    CodeSnippets,
+    MarkdownLink,
+    PdfToMarkdown,
+    PlayWrightBot,
+    PlaywrightLaunchOptions,
+    PlaywrightOptions,
+    PlaywrightPersistencyOptions,
+    UpstageDocumentParseParser,
+    acaption_markdown_images,
+    anything_to_markdown,
+    caption_markdown_images,
+    citation_chunker,
+    extract_text_from_pdf,
+    get_default_html_to_markdown_options,
+    get_default_playwright_launch_options,
+    get_youtube_video_details,
+    get_youtube_video_subtitle,
+    html_to_markdown,
+    open_pdf,
+    pdf_to_text,
+    pyscripts_to_snippets,
+    render_pdf_as_image,
+)
+from .utils import (
+    Base64Image,
+    CodeExecutionResult,
+    FunctionSignature,
+    get_default_repl_tool,
+    insert_callables_into_global,
+)
+__all__ = [
+    "BaseStrategy",
+    "Chatterer",
+    "AoTStrategy",
+    "AoTPipeline",
+    "AoTPrompter",
+    "html_to_markdown",
+    "anything_to_markdown",
+    "pdf_to_text",
+    "get_default_html_to_markdown_options",
+    "pyscripts_to_snippets",
+    "citation_chunker",
+    "BaseMessage",
+    "HumanMessage",
+    "SystemMessage",
+    "AIMessage",
+    "FunctionMessage",
+    "Base64Image",
+    "FunctionSignature",
+    "CodeExecutionResult",
+    "get_default_repl_tool",
+    "insert_callables_into_global",
+    "get_youtube_video_subtitle",
+    "get_youtube_video_details",
+    "interactive_shell",
+    "UpstageDocumentParseParser",
+    "BaseMessageChunk",
+    "CodeSnippets",
+    "LanguageModelInput",
+    "UsageMetadata",
+    "PlayWrightBot",
+    "PlaywrightLaunchOptions",
+    "PlaywrightOptions",
+    "PlaywrightPersistencyOptions",
+    "get_default_playwright_launch_options",
+    "acaption_markdown_images",
+    "caption_markdown_images",
+    "MarkdownLink",
+    "PdfToMarkdown",
+    "extract_text_from_pdf",
+    "open_pdf",
+    "render_pdf_as_image",
+]

chatterer/common_types/__init__.py CHANGED Viewed

@@ -1,21 +1,21 @@
-from .io import (
-    BytesReadable,
-    BytesWritable,
-    FileDescriptorOrPath,
-    PathOrReadable,
-    Readable,
-    StringReadable,
-    StringWritable,
-    Writable,
-)
-__all__ = [
-    "BytesReadable",
-    "BytesWritable",
-    "FileDescriptorOrPath",
-    "PathOrReadable",
-    "Readable",
-    "StringReadable",
-    "StringWritable",
-    "Writable",
-]
+from .io import (
+    BytesReadable,
+    BytesWritable,
+    FileDescriptorOrPath,
+    PathOrReadable,
+    Readable,
+    StringReadable,
+    StringWritable,
+    Writable,
+)
+__all__ = [
+    "BytesReadable",
+    "BytesWritable",
+    "FileDescriptorOrPath",
+    "PathOrReadable",
+    "Readable",
+    "StringReadable",
+    "StringWritable",
+    "Writable",
+]

chatterer/common_types/io.py CHANGED Viewed

@@ -1,19 +1,19 @@
-import os
-from io import BufferedReader, BufferedWriter, BytesIO, StringIO, TextIOWrapper
-from typing import TypeAlias
-# Type aliases for callback functions and file descriptors
-FileDescriptorOrPath: TypeAlias = int | str | bytes | os.PathLike[str] | os.PathLike[bytes]
-# Type aliases for different types of IO objects
-BytesReadable: TypeAlias = BytesIO | BufferedReader
-BytesWritable: TypeAlias = BytesIO | BufferedWriter
-StringReadable: TypeAlias = StringIO | TextIOWrapper
-StringWritable: TypeAlias = StringIO | TextIOWrapper
-# Combined type aliases for readable and writable objects
-Readable: TypeAlias = BytesReadable | StringReadable
-Writable: TypeAlias = BytesWritable | StringWritable
-# Type alias for path or readable object
-PathOrReadable: TypeAlias = FileDescriptorOrPath | Readable
+import os
+from io import BufferedReader, BufferedWriter, BytesIO, StringIO, TextIOWrapper
+from typing import TypeAlias
+# Type aliases for callback functions and file descriptors
+FileDescriptorOrPath: TypeAlias = int | str | bytes | os.PathLike[str] | os.PathLike[bytes]
+# Type aliases for different types of IO objects
+BytesReadable: TypeAlias = BytesIO | BufferedReader
+BytesWritable: TypeAlias = BytesIO | BufferedWriter
+StringReadable: TypeAlias = StringIO | TextIOWrapper
+StringWritable: TypeAlias = StringIO | TextIOWrapper
+# Combined type aliases for readable and writable objects
+Readable: TypeAlias = BytesReadable | StringReadable
+Writable: TypeAlias = BytesWritable | StringWritable
+# Type alias for path or readable object
+PathOrReadable: TypeAlias = FileDescriptorOrPath | Readable

chatterer/examples/anything_to_markdown.py ADDED Viewed

@@ -0,0 +1,91 @@
+def resolve_import_path_and_get_logger():
+    # ruff: noqa: E402
+    import logging
+    import sys
+    if __name__ == "__main__" and "." not in sys.path:
+        sys.path.append(".")
+    logger = logging.getLogger(__name__)
+    return logger
+logger = resolve_import_path_and_get_logger()
+from pathlib import Path
+from typing import Optional, TypedDict
+import openai
+from spargear import ArgumentSpec, BaseArguments
+from chatterer import anything_to_markdown
+class AnythingToMarkdownReturns(TypedDict):
+    in_path: str
+    out_path: Optional[str]
+    out_text: str
+class AnythingToMarkdownArguments(BaseArguments):
+    """Command line arguments for converting various file types to markdown."""
+    in_path: ArgumentSpec[str] = ArgumentSpec(["in-path"], help="Input file to convert to markdown")
+    out_path: Optional[str] = None
+    """Output path for the converted markdown file. If not provided, the input file's suffix is replaced with .md"""
+    model: Optional[str] = None
+    """OpenAI Model to use for conversion"""
+    api_key: Optional[str] = None
+    """API key for OpenAI API"""
+    base_url: Optional[str] = None
+    """Base URL for OpenAI API"""
+    style_map: Optional[str] = None
+    """Output style map"""
+    exiftool_path: Optional[str] = None
+    """"Path to exiftool for metadata extraction"""
+    docintel_endpoint: Optional[str] = None
+    "Document Intelligence API endpoint"
+    prevent_save_file: bool = False
+    """Prevent saving the converted file to disk."""
+    encoding: str = "utf-8"
+    """Encoding for the output file."""
+    def run(self) -> AnythingToMarkdownReturns:
+        in_path = self.in_path.unwrap()
+        if not self.prevent_save_file:
+            if not self.out_path:
+                out_path = Path(in_path).with_suffix(".md")
+            else:
+                out_path = Path(self.out_path)
+        else:
+            out_path = None
+        if self.model:
+            llm_client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url)
+            llm_model = self.model
+        else:
+            llm_client = None
+            llm_model = None
+        text: str = anything_to_markdown(
+            in_path,
+            llm_client=llm_client,
+            llm_model=llm_model,
+            style_map=self.style_map,
+            exiftool_path=self.exiftool_path,
+            docintel_endpoint=self.docintel_endpoint,
+        )
+        if out_path:
+            out_path.parent.mkdir(parents=True, exist_ok=True)
+            out_path.write_text(text, encoding=self.encoding)
+            logger.info(f"Converted `{in_path}` to markdown and saved to `{out_path}`.")
+        else:
+            logger.info(f"Converted `{in_path}` to markdown.")
+        return {
+            "in_path": in_path,
+            "out_path": str(out_path) if out_path is not None else None,
+            "out_text": text,
+        }
+if __name__ == "__main__":
+    AnythingToMarkdownArguments().run()

chatterer/examples/get_code_snippets.py ADDED Viewed

@@ -0,0 +1,62 @@
+def resolve_import_path_and_get_logger():
+    # ruff: noqa: E402
+    import logging
+    import sys
+    if __name__ == "__main__" and "." not in sys.path:
+        sys.path.append(".")
+    logger = logging.getLogger(__name__)
+    return logger
+logger = resolve_import_path_and_get_logger()
+from pathlib import Path
+from typing import Optional
+from spargear import ArgumentSpec, BaseArguments
+from chatterer import CodeSnippets
+class GetCodeSnippetsArgs(BaseArguments):
+    path_or_pkgname: ArgumentSpec[str] = ArgumentSpec(
+        ["path_or_pkgname"], help="Path to the package or file from which to extract code snippets."
+    )
+    out_path: Optional[str] = None
+    ban_file_patterns: list[str] = [".venv/*", Path(__file__).relative_to(Path.cwd()).as_posix()]
+    """List of file patterns to ignore."""
+    glob_patterns: list[str] = ["*.py"]
+    """List of glob patterns to include."""
+    case_sensitive: bool = False
+    """Enable case-sensitive matching for glob patterns."""
+    prevent_save_file: bool = False
+    """Prevent saving the extracted code snippets to a file."""
+    def run(self) -> CodeSnippets:
+        path_or_pkgname = self.path_or_pkgname.unwrap()
+        if not self.prevent_save_file:
+            if not self.out_path:
+                out_path = Path(__file__).with_suffix(".txt")
+            else:
+                out_path = Path(self.out_path)
+        else:
+            out_path = None
+        cs = CodeSnippets.from_path_or_pkgname(
+            path_or_pkgname=path_or_pkgname,
+            ban_file_patterns=self.ban_file_patterns,
+            glob_patterns=self.glob_patterns,
+            case_sensitive=self.case_sensitive,
+        )
+        if out_path is not None:
+            out_path.parent.mkdir(parents=True, exist_ok=True)
+            out_path.write_text(cs.snippets_text, encoding="utf-8")
+            logger.info(f"Extracted code snippets from `{path_or_pkgname}` and saved to `{out_path}`.")
+        else:
+            logger.info(f"Extracted code snippets from `{path_or_pkgname}`.")
+        return cs
+if __name__ == "__main__":
+    GetCodeSnippetsArgs().run()

chatterer/examples/login_with_playwright.py ADDED Viewed

@@ -0,0 +1,167 @@
+def resolve_import_path_and_get_logger():
+    # ruff: noqa: E402
+    import logging
+    import sys
+    if __name__ == "__main__" and "." not in sys.path:
+        sys.path.append(".")
+    logger = logging.getLogger(__name__)
+    return logger
+logger = resolve_import_path_and_get_logger()
+import json
+import sys
+from pathlib import Path
+from spargear import BaseArguments, SubcommandSpec
+from chatterer import PlayWrightBot
+def read_session(url: str, jsonpath: Path) -> None:
+    """
+    Loads the session state from the specified JSON file, then navigates
+    to a protected_url that normally requires login. If the stored session
+    is valid, it should open without re-entering credentials.
+    Correction: Loads the JSON content into a dict first to satisfy type hints.
+    """
+    logger.info(f"Loading session from {jsonpath} and navigating to {url} ...")
+    if not jsonpath.exists():
+        logger.error(f"Session file not found at {jsonpath}")
+        sys.exit(1)
+    # Load the storage state from the JSON file into a dictionary
+    logger.info(f"Reading storage state content from {jsonpath} ...")
+    try:
+        with open(jsonpath, "r", encoding="utf-8") as f:
+            # This dictionary should match the 'StorageState' type expected by Playwright/chatterer
+            storage_state_dict = json.load(f)
+    except json.JSONDecodeError:
+        logger.error(f"Failed to decode JSON from {jsonpath}")
+        sys.exit(1)
+    except Exception as e:
+        logger.error(f"Error reading file {jsonpath}: {e}")
+        sys.exit(1)
+    logger.info("Launching browser with loaded session state...")
+    with PlayWrightBot(
+        playwright_launch_options={"headless": False},
+        # Pass the loaded dictionary, which should match the expected 'StorageState' type
+        playwright_persistency_options={"storage_state": storage_state_dict},
+    ) as bot:
+        bot.get_page(url)
+        logger.info("Press Enter in the console when you're done checking the protected page.")
+        input("    >> Press Enter to exit: ")
+    logger.info("Done! Browser is now closed.")
+def write_session(url: str, jsonpath: Path) -> None:
+    """
+    Launches a non-headless browser and navigates to the login_url.
+    The user can manually log in, then press Enter in the console
+    to store the current session state into a JSON file.
+    """
+    logger.info(f"Launching browser and navigating to {url} ... Please log in manually.")
+    # Ensure jsonpath directory exists
+    jsonpath.parent.mkdir(parents=True, exist_ok=True)
+    with PlayWrightBot(playwright_launch_options={"headless": False}) as bot:
+        bot.get_page(url)
+        logger.info("After completing the login in the browser, press Enter here to save the session.")
+        input("    >> Press Enter when ready: ")
+        # get_sync_browser() returns the BrowserContext internally
+        context = bot.get_sync_browser()
+        # Save the current session (cookies, localStorage) to a JSON file
+        logger.info(f"Saving storage state to {jsonpath} ...")
+        context.storage_state(path=jsonpath)  # Pass Path object directly
+    logger.info("Done! Browser is now closed.")
+# --- Spargear Declarative CLI Definition ---
+# Define the default path location relative to this script file
+DEFAULT_JSON_PATH = Path(__file__).resolve().parent / "session_state.json"
+class ReadArgs(BaseArguments):
+    """Arguments for the 'read' subcommand."""
+    url: str
+    """URL (potentially protected) to navigate to using the saved session."""
+    jsonpath: Path = DEFAULT_JSON_PATH
+    """Path to the session state JSON file to load."""
+class WriteArgs(BaseArguments):
+    """Arguments for the 'write' subcommand."""
+    url: str
+    """URL to navigate to for manual login."""
+    jsonpath: Path = DEFAULT_JSON_PATH
+    """Path to save the session state JSON file."""
+class LoginWithPlaywrightArgs(BaseArguments):
+    """
+    A simple CLI tool for saving and using Playwright sessions via storage_state.
+    Uses spargear for declarative argument parsing.
+    """
+    read: SubcommandSpec[ReadArgs] = SubcommandSpec(
+        name="read",
+        argument_class=ReadArgs,
+        help="Use a saved session to view a protected page.",
+        description="Loads session state from the specified JSON file and navigates to the URL.",
+    )
+    write: SubcommandSpec[WriteArgs] = SubcommandSpec(
+        name="write",
+        argument_class=WriteArgs,
+        help="Save a new session by manually logging in.",
+        description="Launches a browser to the specified URL. Log in manually, then press Enter to save session state.",
+    )
+    def run(self) -> None:
+        """Parses arguments using spargear and executes the corresponding command."""
+        try:
+            if (read := self.read.argument_class).url:
+                # Access attributes directly from the returned instance
+                logger.info("Running READ command:")
+                logger.info(f"    URL: {read.url}")
+                logger.info(f"    JSON Path: {read.jsonpath}")
+                read_session(url=read.url, jsonpath=read.jsonpath)
+            elif (write := self.write.argument_class).url:
+                # Access attributes directly from the returned instance
+                logger.info("Running WRITE command:")
+                logger.info(f"    URL: {write.url}")
+                logger.info(f"    JSON Path: {write.jsonpath}")
+                write_session(url=write.url, jsonpath=write.jsonpath)
+            else:
+                logger.error("No valid subcommand provided. Use 'read' or 'write'.")
+                sys.exit(1)
+        except SystemExit as e:
+            # Handle cases like -h/--help or argparse errors that exit
+            sys.exit(e.code)
+        except Exception as e:
+            logger.error(f"\nAn error occurred: {e}")
+            # from traceback import print_exc # Uncomment for full traceback
+            # print_exc()                   # Uncomment for full traceback
+            sys.exit(1)
+# --- Main Execution Logic ---
+if __name__ == "__main__":
+    LoginWithPlaywrightArgs().run()

chatterer 0.1.16__py3-none-any.whl → 0.1.18__py3-none-any.whl

chatterer 0.1.16py3-none-any.whl → 0.1.18py3-none-any.whl