PyPI - chatterer - Versions diffs - 0.1.26__py3-none-any.whl → 0.1.28__py3-none-any.whl - Mend

chatterer 0.1.26py3-none-any.whl → 0.1.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

chatterer/__init__.py +87 -87
chatterer/common_types/__init__.py +21 -21
chatterer/common_types/io.py +19 -19
chatterer/constants.py +5 -0
chatterer/examples/__main__.py +75 -75
chatterer/examples/any2md.py +83 -85
chatterer/examples/pdf2md.py +231 -338
chatterer/examples/pdf2txt.py +52 -54
chatterer/examples/ppt.py +487 -486
chatterer/examples/pw.py +141 -143
chatterer/examples/snippet.py +54 -56
chatterer/examples/transcribe.py +192 -192
chatterer/examples/upstage.py +87 -89
chatterer/examples/web2md.py +80 -80
chatterer/interactive.py +422 -354
chatterer/language_model.py +530 -536
chatterer/messages.py +21 -21
chatterer/tools/__init__.py +46 -46
chatterer/tools/caption_markdown_images.py +388 -384
chatterer/tools/citation_chunking/__init__.py +3 -3
chatterer/tools/citation_chunking/chunks.py +51 -53
chatterer/tools/citation_chunking/citation_chunker.py +117 -118
chatterer/tools/citation_chunking/citations.py +284 -285
chatterer/tools/citation_chunking/prompt.py +157 -157
chatterer/tools/citation_chunking/reference.py +26 -26
chatterer/tools/citation_chunking/utils.py +138 -138
chatterer/tools/convert_pdf_to_markdown.py +634 -645
chatterer/tools/convert_to_text.py +446 -446
chatterer/tools/upstage_document_parser.py +704 -705
chatterer/tools/webpage_to_markdown.py +739 -739
chatterer/tools/youtube.py +146 -147
chatterer/utils/__init__.py +15 -15
chatterer/utils/base64_image.py +349 -350
chatterer/utils/bytesio.py +59 -59
chatterer/utils/code_agent.py +237 -237
chatterer/utils/imghdr.py +145 -145
{chatterer-0.1.26.dist-info → chatterer-0.1.28.dist-info}/METADATA +377 -390
chatterer-0.1.28.dist-info/RECORD +43 -0
chatterer-0.1.26.dist-info/RECORD +0 -42
{chatterer-0.1.26.dist-info → chatterer-0.1.28.dist-info}/WHEEL +0 -0
{chatterer-0.1.26.dist-info → chatterer-0.1.28.dist-info}/entry_points.txt +0 -0
{chatterer-0.1.26.dist-info → chatterer-0.1.28.dist-info}/top_level.txt +0 -0

chatterer/examples/pdf2txt.py CHANGED Viewed

@@ -1,54 +1,52 @@
-import logging
-import sys
-from pathlib import Path
-from typing import Optional
-from spargear import RunnableArguments
-from chatterer.tools.convert_to_text import pdf_to_text
-logger = logging.getLogger(__name__)
-class Arguments(RunnableArguments[None]):
-    PDF_PATH: Path
-    """Path to the PDF file to convert to text."""
-    output: Optional[Path]
-    """Path to the output text file. If not provided, defaults to the input file with a .txt suffix."""
-    page: Optional[str] = None
-    """Comma-separated list of zero-based page indices to extract from the PDF. Supports ranges, e.g., '0,2,4-8'."""
-    def run(self) -> None:
-        input = self.PDF_PATH.resolve()
-        out = self.output or input.with_suffix(".txt")
-        if not input.is_file():
-            sys.exit(1)
-        out.write_text(
-            pdf_to_text(path_or_file=input, page_indices=self.page),
-            encoding="utf-8",
-        )
-        logger.info(f"Extracted text from `{input}` to `{out}`")
-def parse_page_indices(pages_str: str) -> list[int]:
-    indices: set[int] = set()
-    for part in pages_str.split(","):
-        part = part.strip()
-        if "-" in part:
-            start_str, end_str = part.split("-", 1)
-            start = int(start_str)
-            end = int(end_str)
-            if start > end:
-                raise ValueError
-            indices.update(range(start, end + 1))
-        else:
-            indices.add(int(part))
-    return sorted(indices)
-def main() -> None:
-    Arguments().run()
-if __name__ == "__main__":
-    main()
+import sys
+from pathlib import Path
+from typing import Optional
+from loguru import logger
+from spargear import RunnableArguments
+from chatterer.tools.convert_to_text import pdf_to_text
+class Arguments(RunnableArguments[None]):
+    PDF_PATH: Path
+    """Path to the PDF file to convert to text."""
+    output: Optional[Path]
+    """Path to the output text file. If not provided, defaults to the input file with a .txt suffix."""
+    page: Optional[str] = None
+    """Comma-separated list of zero-based page indices to extract from the PDF. Supports ranges, e.g., '0,2,4-8'."""
+    def run(self) -> None:
+        input = self.PDF_PATH.resolve()
+        out = self.output or input.with_suffix(".txt")
+        if not input.is_file():
+            sys.exit(1)
+        out.write_text(
+            pdf_to_text(path_or_file=input, page_indices=self.page),
+            encoding="utf-8",
+        )
+        logger.info(f"Extracted text from `{input}` to `{out}`")
+def parse_page_indices(pages_str: str) -> list[int]:
+    indices: set[int] = set()
+    for part in pages_str.split(","):
+        part = part.strip()
+        if "-" in part:
+            start_str, end_str = part.split("-", 1)
+            start = int(start_str)
+            end = int(end_str)
+            if start > end:
+                raise ValueError
+            indices.update(range(start, end + 1))
+        else:
+            indices.add(int(part))
+    return sorted(indices)
+def main() -> None:
+    Arguments().run()
+if __name__ == "__main__":
+    main()

chatterer 0.1.26__py3-none-any.whl → 0.1.28__py3-none-any.whl

chatterer 0.1.26py3-none-any.whl → 0.1.28py3-none-any.whl