chatterer 0.1.20__tar.gz → 0.1.21__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {chatterer-0.1.20 → chatterer-0.1.21}/PKG-INFO +1 -1
  2. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/examples/pdf_to_markdown.py +4 -27
  3. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/examples/pdf_to_text.py +3 -3
  4. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer.egg-info/PKG-INFO +1 -1
  5. {chatterer-0.1.20 → chatterer-0.1.21}/pyproject.toml +1 -1
  6. {chatterer-0.1.20 → chatterer-0.1.21}/README.md +0 -0
  7. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/__init__.py +0 -0
  8. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/common_types/__init__.py +0 -0
  9. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/common_types/io.py +0 -0
  10. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/examples/__init__.py +0 -0
  11. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/examples/anything_to_markdown.py +0 -0
  12. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/examples/get_code_snippets.py +0 -0
  13. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/examples/login_with_playwright.py +0 -0
  14. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/examples/make_ppt.py +0 -0
  15. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/examples/transcription_api.py +0 -0
  16. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/examples/upstage_parser.py +0 -0
  17. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/examples/webpage_to_markdown.py +0 -0
  18. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/interactive.py +0 -0
  19. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/language_model.py +0 -0
  20. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/messages.py +0 -0
  21. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/py.typed +0 -0
  22. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/strategies/__init__.py +0 -0
  23. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/strategies/atom_of_thoughts.py +0 -0
  24. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/strategies/base.py +0 -0
  25. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/tools/__init__.py +0 -0
  26. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/tools/caption_markdown_images.py +0 -0
  27. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/tools/citation_chunking/__init__.py +0 -0
  28. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/tools/citation_chunking/chunks.py +0 -0
  29. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/tools/citation_chunking/citation_chunker.py +0 -0
  30. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/tools/citation_chunking/citations.py +0 -0
  31. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/tools/citation_chunking/prompt.py +0 -0
  32. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/tools/citation_chunking/reference.py +0 -0
  33. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/tools/citation_chunking/utils.py +0 -0
  34. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/tools/convert_pdf_to_markdown.py +0 -0
  35. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/tools/convert_to_text.py +0 -0
  36. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/tools/upstage_document_parser.py +0 -0
  37. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/tools/webpage_to_markdown.py +0 -0
  38. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/tools/youtube.py +0 -0
  39. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/utils/__init__.py +0 -0
  40. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/utils/base64_image.py +0 -0
  41. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/utils/bytesio.py +0 -0
  42. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/utils/code_agent.py +0 -0
  43. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer/utils/imghdr.py +0 -0
  44. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer.egg-info/SOURCES.txt +0 -0
  45. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer.egg-info/dependency_links.txt +0 -0
  46. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer.egg-info/entry_points.txt +0 -0
  47. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer.egg-info/requires.txt +0 -0
  48. {chatterer-0.1.20 → chatterer-0.1.21}/chatterer.egg-info/top_level.txt +0 -0
  49. {chatterer-0.1.20 → chatterer-0.1.21}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatterer
3
- Version: 0.1.20
3
+ Version: 0.1.21
4
4
  Summary: The highest-level interface for various LLM APIs.
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
@@ -16,8 +16,8 @@ class PdfToMarkdownArgs(BaseArguments):
16
16
  output: Optional[str] = None
17
17
  """Output path. For a file, path to the output markdown file. For a directory, output directory for .md files."""
18
18
  """Chatterer instance for communication."""
19
- pages: Optional[str] = None
20
- """Page indices to convert (e.g., '1,3,5-9')."""
19
+ page: Optional[str] = None
20
+ """Zero-based page indices to convert (e.g., '0,2,4-8')."""
21
21
  recursive: bool = False
22
22
  """If input is a directory, search for PDFs recursively."""
23
23
  chatterer: ArgumentSpec[Chatterer] = ArgumentSpec(
@@ -29,7 +29,6 @@ class PdfToMarkdownArgs(BaseArguments):
29
29
 
30
30
  def run(self) -> list[dict[str, str]]:
31
31
  input = Path(self.input).resolve()
32
- page_indices = parse_page_indices(self.pages) if self.pages else None
33
32
  pdf_files: list[Path] = []
34
33
  is_dir = False
35
34
  if input.is_file():
@@ -61,8 +60,8 @@ class PdfToMarkdownArgs(BaseArguments):
61
60
  converter = PdfToMarkdown(chatterer=self.chatterer.unwrap())
62
61
  results: list[dict[str, str]] = []
63
62
  for pdf in pdf_files:
64
- output = (out_base / (pdf.stem + ".md")) if is_dir else out_base
65
- md = converter.convert(str(pdf), page_indices)
63
+ output: Path = (out_base / (pdf.stem + ".md")) if is_dir else out_base
64
+ md: str = converter.convert(pdf_input=str(pdf), page_indices=self.page)
66
65
  output.parent.mkdir(parents=True, exist_ok=True)
67
66
  output.write_text(md, encoding="utf-8")
68
67
  results.append({"input": pdf.as_posix(), "output": output.as_posix(), "result": md})
@@ -70,28 +69,6 @@ class PdfToMarkdownArgs(BaseArguments):
70
69
  return results
71
70
 
72
71
 
73
- def parse_page_indices(pages_str: str) -> list[int] | None:
74
- if not pages_str:
75
- return None
76
- indices: set[int] = set()
77
- for part in pages_str.split(","):
78
- part = part.strip()
79
- if not part:
80
- continue
81
- if "-" in part:
82
- start_str, end_str = part.split("-", 1)
83
- start = int(start_str.strip())
84
- end = int(end_str.strip())
85
- if start > end:
86
- raise ValueError
87
- indices.update(range(start, end + 1))
88
- else:
89
- indices.add(int(part))
90
- if not indices:
91
- raise ValueError
92
- return sorted(indices)
93
-
94
-
95
72
  def main() -> None:
96
73
  PdfToMarkdownArgs().run()
97
74
 
@@ -15,8 +15,8 @@ class PdfToTextArgs(BaseArguments):
15
15
  """Path to the PDF file to convert to text."""
16
16
  output: Optional[Path]
17
17
  """Path to the output text file. If not provided, defaults to the input file with a .txt suffix."""
18
- pages: Optional[str] = None
19
- """Comma-separated list of page indices to extract from the PDF. Supports ranges, e.g., '1,3,5-9'."""
18
+ page: Optional[str] = None
19
+ """Comma-separated list of zero-based page indices to extract from the PDF. Supports ranges, e.g., '0,2,4-8'."""
20
20
 
21
21
  def run(self) -> None:
22
22
  input = self.input.resolve()
@@ -24,7 +24,7 @@ class PdfToTextArgs(BaseArguments):
24
24
  if not input.is_file():
25
25
  sys.exit(1)
26
26
  out.write_text(
27
- pdf_to_text(path_or_file=input, page_indices=self.pages),
27
+ pdf_to_text(path_or_file=input, page_indices=self.page),
28
28
  encoding="utf-8",
29
29
  )
30
30
  logger.info(f"Extracted text from `{input}` to `{out}`")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatterer
3
- Version: 0.1.20
3
+ Version: 0.1.21
4
4
  Summary: The highest-level interface for various LLM APIs.
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
@@ -10,7 +10,7 @@ dependencies = [
10
10
  "spargear>=0.2.0",
11
11
  ]
12
12
  name = "chatterer"
13
- version = "0.1.20"
13
+ version = "0.1.21"
14
14
  description = "The highest-level interface for various LLM APIs."
15
15
  readme = "README.md"
16
16
  requires-python = ">=3.12"
File without changes
File without changes