chatterer 0.1.23__py3-none-any.whl → 0.1.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. chatterer/__init__.py +97 -93
  2. chatterer/common_types/__init__.py +21 -21
  3. chatterer/common_types/io.py +19 -19
  4. chatterer/examples/__main__.py +75 -0
  5. chatterer/examples/{anything_to_markdown.py → any2md.py} +85 -85
  6. chatterer/examples/{pdf_to_markdown.py → pdf2md.py} +338 -338
  7. chatterer/examples/{pdf_to_text.py → pdf2txt.py} +54 -54
  8. chatterer/examples/{make_ppt.py → ppt.py} +486 -488
  9. chatterer/examples/pw.py +143 -0
  10. chatterer/examples/{get_code_snippets.py → snippet.py} +56 -55
  11. chatterer/examples/transcribe.py +192 -0
  12. chatterer/examples/{upstage_parser.py → upstage.py} +89 -89
  13. chatterer/examples/{webpage_to_markdown.py → web2md.py} +80 -70
  14. chatterer/interactive.py +354 -354
  15. chatterer/language_model.py +536 -536
  16. chatterer/messages.py +21 -21
  17. chatterer/strategies/__init__.py +13 -13
  18. chatterer/strategies/atom_of_thoughts.py +975 -975
  19. chatterer/strategies/base.py +14 -14
  20. chatterer/tools/__init__.py +46 -46
  21. chatterer/tools/caption_markdown_images.py +384 -384
  22. chatterer/tools/citation_chunking/__init__.py +3 -3
  23. chatterer/tools/citation_chunking/chunks.py +53 -53
  24. chatterer/tools/citation_chunking/citation_chunker.py +118 -118
  25. chatterer/tools/citation_chunking/citations.py +285 -285
  26. chatterer/tools/citation_chunking/prompt.py +157 -157
  27. chatterer/tools/citation_chunking/reference.py +26 -26
  28. chatterer/tools/citation_chunking/utils.py +138 -138
  29. chatterer/tools/convert_pdf_to_markdown.py +645 -625
  30. chatterer/tools/convert_to_text.py +446 -446
  31. chatterer/tools/upstage_document_parser.py +705 -705
  32. chatterer/tools/webpage_to_markdown.py +739 -739
  33. chatterer/tools/youtube.py +146 -146
  34. chatterer/utils/__init__.py +15 -15
  35. chatterer/utils/base64_image.py +293 -285
  36. chatterer/utils/bytesio.py +59 -59
  37. chatterer/utils/code_agent.py +237 -237
  38. chatterer/utils/imghdr.py +148 -148
  39. {chatterer-0.1.23.dist-info → chatterer-0.1.25.dist-info}/METADATA +390 -392
  40. chatterer-0.1.25.dist-info/RECORD +45 -0
  41. chatterer-0.1.25.dist-info/entry_points.txt +2 -0
  42. chatterer/examples/login_with_playwright.py +0 -156
  43. chatterer/examples/transcription_api.py +0 -112
  44. chatterer-0.1.23.dist-info/RECORD +0 -44
  45. chatterer-0.1.23.dist-info/entry_points.txt +0 -10
  46. {chatterer-0.1.23.dist-info → chatterer-0.1.25.dist-info}/WHEEL +0 -0
  47. {chatterer-0.1.23.dist-info → chatterer-0.1.25.dist-info}/top_level.txt +0 -0
@@ -1,54 +1,54 @@
1
- import logging
2
- import sys
3
- from pathlib import Path
4
- from typing import Optional
5
-
6
- from spargear import BaseArguments
7
-
8
- from chatterer.tools.convert_to_text import pdf_to_text
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
-
13
- class PdfToTextArgs(BaseArguments):
14
- input: Path
15
- """Path to the PDF file to convert to text."""
16
- output: Optional[Path]
17
- """Path to the output text file. If not provided, defaults to the input file with a .txt suffix."""
18
- page: Optional[str] = None
19
- """Comma-separated list of zero-based page indices to extract from the PDF. Supports ranges, e.g., '0,2,4-8'."""
20
-
21
- def run(self) -> None:
22
- input = self.input.resolve()
23
- out = self.output or input.with_suffix(".txt")
24
- if not input.is_file():
25
- sys.exit(1)
26
- out.write_text(
27
- pdf_to_text(path_or_file=input, page_indices=self.page),
28
- encoding="utf-8",
29
- )
30
- logger.info(f"Extracted text from `{input}` to `{out}`")
31
-
32
-
33
- def parse_page_indices(pages_str: str) -> list[int]:
34
- indices: set[int] = set()
35
- for part in pages_str.split(","):
36
- part = part.strip()
37
- if "-" in part:
38
- start_str, end_str = part.split("-", 1)
39
- start = int(start_str)
40
- end = int(end_str)
41
- if start > end:
42
- raise ValueError
43
- indices.update(range(start, end + 1))
44
- else:
45
- indices.add(int(part))
46
- return sorted(indices)
47
-
48
-
49
- def main() -> None:
50
- PdfToTextArgs().run()
51
-
52
-
53
- if __name__ == "__main__":
54
- main()
1
+ import logging
2
+ import sys
3
+ from pathlib import Path
4
+ from typing import Optional
5
+
6
+ from spargear import RunnableArguments
7
+
8
+ from chatterer.tools.convert_to_text import pdf_to_text
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class Arguments(RunnableArguments[None]):
14
+ PDF_PATH: Path
15
+ """Path to the PDF file to convert to text."""
16
+ output: Optional[Path]
17
+ """Path to the output text file. If not provided, defaults to the input file with a .txt suffix."""
18
+ page: Optional[str] = None
19
+ """Comma-separated list of zero-based page indices to extract from the PDF. Supports ranges, e.g., '0,2,4-8'."""
20
+
21
+ def run(self) -> None:
22
+ input = self.PDF_PATH.resolve()
23
+ out = self.output or input.with_suffix(".txt")
24
+ if not input.is_file():
25
+ sys.exit(1)
26
+ out.write_text(
27
+ pdf_to_text(path_or_file=input, page_indices=self.page),
28
+ encoding="utf-8",
29
+ )
30
+ logger.info(f"Extracted text from `{input}` to `{out}`")
31
+
32
+
33
+ def parse_page_indices(pages_str: str) -> list[int]:
34
+ indices: set[int] = set()
35
+ for part in pages_str.split(","):
36
+ part = part.strip()
37
+ if "-" in part:
38
+ start_str, end_str = part.split("-", 1)
39
+ start = int(start_str)
40
+ end = int(end_str)
41
+ if start > end:
42
+ raise ValueError
43
+ indices.update(range(start, end + 1))
44
+ else:
45
+ indices.add(int(part))
46
+ return sorted(indices)
47
+
48
+
49
+ def main() -> None:
50
+ Arguments().run()
51
+
52
+
53
+ if __name__ == "__main__":
54
+ main()