chatterer 0.1.19__tar.gz → 0.1.21__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {chatterer-0.1.19 → chatterer-0.1.21}/PKG-INFO +2 -2
  2. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/examples/anything_to_markdown.py +21 -31
  3. chatterer-0.1.21/chatterer/examples/get_code_snippets.py +55 -0
  4. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/examples/login_with_playwright.py +68 -83
  5. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/examples/make_ppt.py +3 -14
  6. chatterer-0.1.21/chatterer/examples/pdf_to_markdown.py +77 -0
  7. chatterer-0.1.21/chatterer/examples/pdf_to_text.py +54 -0
  8. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/examples/transcription_api.py +21 -36
  9. chatterer-0.1.21/chatterer/examples/upstage_parser.py +89 -0
  10. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/examples/webpage_to_markdown.py +19 -28
  11. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/strategies/atom_of_thoughts.py +1 -1
  12. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/tools/convert_pdf_to_markdown.py +105 -14
  13. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/tools/convert_to_text.py +3 -4
  14. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/tools/upstage_document_parser.py +2 -2
  15. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/utils/code_agent.py +1 -1
  16. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer.egg-info/PKG-INFO +2 -2
  17. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer.egg-info/requires.txt +1 -1
  18. {chatterer-0.1.19 → chatterer-0.1.21}/pyproject.toml +2 -2
  19. chatterer-0.1.19/chatterer/examples/get_code_snippets.py +0 -64
  20. chatterer-0.1.19/chatterer/examples/pdf_to_markdown.py +0 -107
  21. chatterer-0.1.19/chatterer/examples/pdf_to_text.py +0 -60
  22. chatterer-0.1.19/chatterer/examples/upstage_parser.py +0 -95
  23. {chatterer-0.1.19 → chatterer-0.1.21}/README.md +0 -0
  24. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/__init__.py +0 -0
  25. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/common_types/__init__.py +0 -0
  26. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/common_types/io.py +0 -0
  27. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/examples/__init__.py +0 -0
  28. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/interactive.py +0 -0
  29. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/language_model.py +0 -0
  30. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/messages.py +0 -0
  31. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/py.typed +0 -0
  32. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/strategies/__init__.py +0 -0
  33. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/strategies/base.py +0 -0
  34. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/tools/__init__.py +0 -0
  35. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/tools/caption_markdown_images.py +0 -0
  36. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/tools/citation_chunking/__init__.py +0 -0
  37. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/tools/citation_chunking/chunks.py +0 -0
  38. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/tools/citation_chunking/citation_chunker.py +0 -0
  39. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/tools/citation_chunking/citations.py +0 -0
  40. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/tools/citation_chunking/prompt.py +0 -0
  41. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/tools/citation_chunking/reference.py +0 -0
  42. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/tools/citation_chunking/utils.py +0 -0
  43. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/tools/webpage_to_markdown.py +0 -0
  44. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/tools/youtube.py +0 -0
  45. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/utils/__init__.py +0 -0
  46. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/utils/base64_image.py +0 -0
  47. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/utils/bytesio.py +0 -0
  48. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer/utils/imghdr.py +0 -0
  49. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer.egg-info/SOURCES.txt +0 -0
  50. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer.egg-info/dependency_links.txt +0 -0
  51. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer.egg-info/entry_points.txt +0 -0
  52. {chatterer-0.1.19 → chatterer-0.1.21}/chatterer.egg-info/top_level.txt +0 -0
  53. {chatterer-0.1.19 → chatterer-0.1.21}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatterer
3
- Version: 0.1.19
3
+ Version: 0.1.21
4
4
  Summary: The highest-level interface for various LLM APIs.
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
@@ -11,7 +11,7 @@ Requires-Dist: pillow>=11.1.0
11
11
  Requires-Dist: regex>=2024.11.6
12
12
  Requires-Dist: rich>=13.9.4
13
13
  Requires-Dist: colorama>=0.4.6
14
- Requires-Dist: spargear>=0.1.6
14
+ Requires-Dist: spargear>=0.2.0
15
15
  Provides-Extra: dev
16
16
  Requires-Dist: neo4j-extension>=0.1.14; extra == "dev"
17
17
  Requires-Dist: ipykernel>=6.29.5; extra == "dev"
@@ -1,36 +1,27 @@
1
- def resolve_import_path_and_get_logger():
2
- # ruff: noqa: E402
3
- import logging
4
- import sys
5
-
6
- if __name__ == "__main__" and "." not in sys.path:
7
- sys.path.append(".")
8
-
9
- logger = logging.getLogger(__name__)
10
- return logger
11
-
12
-
13
- logger = resolve_import_path_and_get_logger()
1
+ import logging
14
2
  from pathlib import Path
15
3
  from typing import Optional, TypedDict
16
4
 
17
5
  import openai
18
- from spargear import ArgumentSpec, BaseArguments
6
+ from spargear import BaseArguments
19
7
 
20
8
  from chatterer import anything_to_markdown
21
9
 
10
+ logger = logging.getLogger(__name__)
11
+
22
12
 
23
13
  class AnythingToMarkdownReturns(TypedDict):
24
- in_path: str
25
- out_path: Optional[str]
14
+ input: str
15
+ output: Optional[str]
26
16
  out_text: str
27
17
 
28
18
 
29
19
  class AnythingToMarkdownArguments(BaseArguments):
30
20
  """Command line arguments for converting various file types to markdown."""
31
21
 
32
- in_path: ArgumentSpec[str] = ArgumentSpec(["in-path"], help="Input file to convert to markdown")
33
- out_path: Optional[str] = None
22
+ input: str
23
+ """Input file to convert to markdown. Can be a file path or a URL."""
24
+ output: Optional[str] = None
34
25
  """Output path for the converted markdown file. If not provided, the input file's suffix is replaced with .md"""
35
26
  model: Optional[str] = None
36
27
  """OpenAI Model to use for conversion"""
@@ -50,14 +41,13 @@ class AnythingToMarkdownArguments(BaseArguments):
50
41
  """Encoding for the output file."""
51
42
 
52
43
  def run(self) -> AnythingToMarkdownReturns:
53
- in_path = self.in_path.unwrap()
54
44
  if not self.prevent_save_file:
55
- if not self.out_path:
56
- out_path = Path(in_path).with_suffix(".md")
45
+ if not self.output:
46
+ output = Path(self.input).with_suffix(".md")
57
47
  else:
58
- out_path = Path(self.out_path)
48
+ output = Path(self.output)
59
49
  else:
60
- out_path = None
50
+ output = None
61
51
 
62
52
  if self.model:
63
53
  llm_client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url)
@@ -67,22 +57,22 @@ class AnythingToMarkdownArguments(BaseArguments):
67
57
  llm_model = None
68
58
 
69
59
  text: str = anything_to_markdown(
70
- in_path,
60
+ self.input,
71
61
  llm_client=llm_client,
72
62
  llm_model=llm_model,
73
63
  style_map=self.style_map,
74
64
  exiftool_path=self.exiftool_path,
75
65
  docintel_endpoint=self.docintel_endpoint,
76
66
  )
77
- if out_path:
78
- out_path.parent.mkdir(parents=True, exist_ok=True)
79
- out_path.write_text(text, encoding=self.encoding)
80
- logger.info(f"Converted `{in_path}` to markdown and saved to `{out_path}`.")
67
+ if output:
68
+ output.parent.mkdir(parents=True, exist_ok=True)
69
+ output.write_text(text, encoding=self.encoding)
70
+ logger.info(f"Converted `{self.input}` to markdown and saved to `{output}`.")
81
71
  else:
82
- logger.info(f"Converted `{in_path}` to markdown.")
72
+ logger.info(f"Converted `{self.input}` to markdown.")
83
73
  return {
84
- "in_path": in_path,
85
- "out_path": str(out_path) if out_path is not None else None,
74
+ "input": self.input,
75
+ "output": str(output) if output is not None else None,
86
76
  "out_text": text,
87
77
  }
88
78
 
@@ -0,0 +1,55 @@
1
+ import logging
2
+ from pathlib import Path
3
+ from typing import Optional
4
+
5
+ from spargear import BaseArguments
6
+
7
+ from chatterer import CodeSnippets
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class GetCodeSnippetsArgs(BaseArguments):
13
+ input: str
14
+ """Path to the package or file from which to extract code snippets."""
15
+ output: Optional[str] = None
16
+ """Output path for the extracted code snippets. If not provided, defaults to a file with the same name as the input."""
17
+ ban_file_patterns: list[str] = [".venv/*", Path(__file__).relative_to(Path.cwd()).as_posix()]
18
+ """List of file patterns to ignore."""
19
+ glob_patterns: list[str] = ["*.py"]
20
+ """List of glob patterns to include."""
21
+ case_sensitive: bool = False
22
+ """Enable case-sensitive matching for glob patterns."""
23
+ prevent_save_file: bool = False
24
+ """Prevent saving the extracted code snippets to a file."""
25
+
26
+ def run(self) -> CodeSnippets:
27
+ if not self.prevent_save_file:
28
+ if not self.output:
29
+ output = Path(__file__).with_suffix(".txt")
30
+ else:
31
+ output = Path(self.output)
32
+ else:
33
+ output = None
34
+
35
+ cs = CodeSnippets.from_path_or_pkgname(
36
+ path_or_pkgname=self.input,
37
+ ban_file_patterns=self.ban_file_patterns,
38
+ glob_patterns=self.glob_patterns,
39
+ case_sensitive=self.case_sensitive,
40
+ )
41
+ if output is not None:
42
+ output.parent.mkdir(parents=True, exist_ok=True)
43
+ output.write_text(cs.snippets_text, encoding="utf-8")
44
+ logger.info(f"Extracted code snippets from `{self.input}` and saved to `{output}`.")
45
+ else:
46
+ logger.info(f"Extracted code snippets from `{self.input}`.")
47
+ return cs
48
+
49
+
50
+ def main() -> None:
51
+ GetCodeSnippetsArgs().run()
52
+
53
+
54
+ if __name__ == "__main__":
55
+ main()
@@ -1,17 +1,5 @@
1
- def resolve_import_path_and_get_logger():
2
- # ruff: noqa: E402
3
- import logging
4
- import sys
5
-
6
- if __name__ == "__main__" and "." not in sys.path:
7
- sys.path.append(".")
8
-
9
- logger = logging.getLogger(__name__)
10
- return logger
11
-
12
-
13
- logger = resolve_import_path_and_get_logger()
14
1
  import json
2
+ import logging
15
3
  import sys
16
4
  from pathlib import Path
17
5
 
@@ -19,76 +7,8 @@ from spargear import BaseArguments, SubcommandSpec
19
7
 
20
8
  from chatterer import PlayWrightBot
21
9
 
10
+ logger = logging.getLogger(__name__)
22
11
 
23
- def read_session(url: str, jsonpath: Path) -> None:
24
- """
25
- Loads the session state from the specified JSON file, then navigates
26
- to a protected_url that normally requires login. If the stored session
27
- is valid, it should open without re-entering credentials.
28
-
29
- Correction: Loads the JSON content into a dict first to satisfy type hints.
30
- """
31
- logger.info(f"Loading session from {jsonpath} and navigating to {url} ...")
32
-
33
- if not jsonpath.exists():
34
- logger.error(f"Session file not found at {jsonpath}")
35
- sys.exit(1)
36
-
37
- # Load the storage state from the JSON file into a dictionary
38
- logger.info(f"Reading storage state content from {jsonpath} ...")
39
- try:
40
- with open(jsonpath, "r", encoding="utf-8") as f:
41
- # This dictionary should match the 'StorageState' type expected by Playwright/chatterer
42
- storage_state_dict = json.load(f)
43
- except json.JSONDecodeError:
44
- logger.error(f"Failed to decode JSON from {jsonpath}")
45
- sys.exit(1)
46
- except Exception as e:
47
- logger.error(f"Error reading file {jsonpath}: {e}")
48
- sys.exit(1)
49
-
50
- logger.info("Launching browser with loaded session state...")
51
- with PlayWrightBot(
52
- playwright_launch_options={"headless": False},
53
- # Pass the loaded dictionary, which should match the expected 'StorageState' type
54
- playwright_persistency_options={"storage_state": storage_state_dict},
55
- ) as bot:
56
- bot.get_page(url)
57
-
58
- logger.info("Press Enter in the console when you're done checking the protected page.")
59
- input(" >> Press Enter to exit: ")
60
-
61
- logger.info("Done! Browser is now closed.")
62
-
63
-
64
- def write_session(url: str, jsonpath: Path) -> None:
65
- """
66
- Launches a non-headless browser and navigates to the login_url.
67
- The user can manually log in, then press Enter in the console
68
- to store the current session state into a JSON file.
69
- """
70
- logger.info(f"Launching browser and navigating to {url} ... Please log in manually.")
71
-
72
- # Ensure jsonpath directory exists
73
- jsonpath.parent.mkdir(parents=True, exist_ok=True)
74
-
75
- with PlayWrightBot(playwright_launch_options={"headless": False}) as bot:
76
- bot.get_page(url)
77
-
78
- logger.info("After completing the login in the browser, press Enter here to save the session.")
79
- input(" >> Press Enter when ready: ")
80
-
81
- # get_sync_browser() returns the BrowserContext internally
82
- context = bot.get_sync_browser()
83
-
84
- # Save the current session (cookies, localStorage) to a JSON file
85
- logger.info(f"Saving storage state to {jsonpath} ...")
86
- context.storage_state(path=jsonpath) # Pass Path object directly
87
-
88
- logger.info("Done! Browser is now closed.")
89
-
90
-
91
- # --- Spargear Declarative CLI Definition ---
92
12
 
93
13
  # Define the default path location relative to this script file
94
14
  DEFAULT_JSON_PATH = Path(__file__).resolve().parent / "session_state.json"
@@ -160,7 +80,72 @@ class LoginWithPlaywrightArgs(BaseArguments):
160
80
  sys.exit(1)
161
81
 
162
82
 
163
- # --- Main Execution Logic ---
83
+ def read_session(url: str, jsonpath: Path) -> None:
84
+ """
85
+ Loads the session state from the specified JSON file, then navigates
86
+ to a protected_url that normally requires login. If the stored session
87
+ is valid, it should open without re-entering credentials.
88
+
89
+ Correction: Loads the JSON content into a dict first to satisfy type hints.
90
+ """
91
+ logger.info(f"Loading session from {jsonpath} and navigating to {url} ...")
92
+
93
+ if not jsonpath.exists():
94
+ logger.error(f"Session file not found at {jsonpath}")
95
+ sys.exit(1)
96
+
97
+ # Load the storage state from the JSON file into a dictionary
98
+ logger.info(f"Reading storage state content from {jsonpath} ...")
99
+ try:
100
+ with open(jsonpath, "r", encoding="utf-8") as f:
101
+ # This dictionary should match the 'StorageState' type expected by Playwright/chatterer
102
+ storage_state_dict = json.load(f)
103
+ except json.JSONDecodeError:
104
+ logger.error(f"Failed to decode JSON from {jsonpath}")
105
+ sys.exit(1)
106
+ except Exception as e:
107
+ logger.error(f"Error reading file {jsonpath}: {e}")
108
+ sys.exit(1)
109
+
110
+ logger.info("Launching browser with loaded session state...")
111
+ with PlayWrightBot(
112
+ playwright_launch_options={"headless": False},
113
+ # Pass the loaded dictionary, which should match the expected 'StorageState' type
114
+ playwright_persistency_options={"storage_state": storage_state_dict},
115
+ ) as bot:
116
+ bot.get_page(url)
117
+
118
+ logger.info("Press Enter in the console when you're done checking the protected page.")
119
+ input(" >> Press Enter to exit: ")
120
+
121
+ logger.info("Done! Browser is now closed.")
122
+
123
+
124
+ def write_session(url: str, jsonpath: Path) -> None:
125
+ """
126
+ Launches a non-headless browser and navigates to the login_url.
127
+ The user can manually log in, then press Enter in the console
128
+ to store the current session state into a JSON file.
129
+ """
130
+ logger.info(f"Launching browser and navigating to {url} ... Please log in manually.")
131
+
132
+ # Ensure jsonpath directory exists
133
+ jsonpath.parent.mkdir(parents=True, exist_ok=True)
134
+
135
+ with PlayWrightBot(playwright_launch_options={"headless": False}) as bot:
136
+ bot.get_page(url)
137
+
138
+ logger.info("After completing the login in the browser, press Enter here to save the session.")
139
+ input(" >> Press Enter when ready: ")
140
+
141
+ # get_sync_browser() returns the BrowserContext internally
142
+ context = bot.get_sync_browser()
143
+
144
+ # Save the current session (cookies, localStorage) to a JSON file
145
+ logger.info(f"Saving storage state to {jsonpath} ...")
146
+ context.storage_state(path=jsonpath) # Pass Path object directly
147
+
148
+ logger.info("Done! Browser is now closed.")
164
149
 
165
150
 
166
151
  def main() -> None:
@@ -1,16 +1,3 @@
1
- def resolve_import_path_and_get_logger():
2
- # ruff: noqa: E402
3
- import logging
4
- import sys
5
-
6
- if __name__ == "__main__" and "." not in sys.path:
7
- sys.path.append(".")
8
-
9
- logger = logging.getLogger(__name__)
10
- return logger
11
-
12
-
13
- logger = resolve_import_path_and_get_logger()
14
1
  import re
15
2
  import sys
16
3
  from pathlib import Path
@@ -192,7 +179,9 @@ class MakePptArguments(BaseArguments):
192
179
  """Prompt for organizing slides into a presentation script"""
193
180
 
194
181
  # LLM Settings
195
- provider: str = "openai:gpt-4.1" # Example: "openai:gpt-4o", "anthropic:claude-3-haiku-20240307", "google:gemini-1.5-flash"
182
+ provider: str = (
183
+ "openai:gpt-4.1" # Example: "openai:gpt-4o", "anthropic:claude-3-haiku-20240307", "google:gemini-1.5-flash"
184
+ )
196
185
  """Name of the language model to use (provider:model_name)"""
197
186
 
198
187
  # Other settings
@@ -0,0 +1,77 @@
1
+ import logging
2
+ import sys
3
+ from pathlib import Path
4
+ from typing import Optional
5
+
6
+ from spargear import ArgumentSpec, BaseArguments
7
+
8
+ from chatterer import Chatterer, PdfToMarkdown
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class PdfToMarkdownArgs(BaseArguments):
14
+ input: str
15
+ """Input PDF file or directory containing PDF files to convert to markdown."""
16
+ output: Optional[str] = None
17
+ """Output path. For a file, path to the output markdown file. For a directory, output directory for .md files."""
18
+ """Chatterer instance for communication."""
19
+ page: Optional[str] = None
20
+ """Zero-based page indices to convert (e.g., '0,2,4-8')."""
21
+ recursive: bool = False
22
+ """If input is a directory, search for PDFs recursively."""
23
+ chatterer: ArgumentSpec[Chatterer] = ArgumentSpec(
24
+ ["--chatterer"],
25
+ default_factory=lambda: Chatterer.from_provider("google:gemini-2.5-flash-preview-05-20"),
26
+ help="Chatterer instance for communication.",
27
+ type=Chatterer.from_provider,
28
+ )
29
+
30
+ def run(self) -> list[dict[str, str]]:
31
+ input = Path(self.input).resolve()
32
+ pdf_files: list[Path] = []
33
+ is_dir = False
34
+ if input.is_file():
35
+ if input.suffix.lower() != ".pdf":
36
+ sys.exit(1)
37
+ pdf_files.append(input)
38
+ elif input.is_dir():
39
+ is_dir = True
40
+ pattern = "*.pdf"
41
+ pdf_files = sorted([
42
+ f for f in (input.rglob(pattern) if self.recursive else input.glob(pattern)) if f.is_file()
43
+ ])
44
+ if not pdf_files:
45
+ sys.exit(0)
46
+ else:
47
+ sys.exit(1)
48
+ if self.output:
49
+ out_base = Path(self.output).resolve()
50
+ elif is_dir:
51
+ out_base = input
52
+ else:
53
+ out_base = input.with_suffix(".md")
54
+
55
+ if is_dir:
56
+ out_base.mkdir(parents=True, exist_ok=True)
57
+ else:
58
+ out_base.parent.mkdir(parents=True, exist_ok=True)
59
+
60
+ converter = PdfToMarkdown(chatterer=self.chatterer.unwrap())
61
+ results: list[dict[str, str]] = []
62
+ for pdf in pdf_files:
63
+ output: Path = (out_base / (pdf.stem + ".md")) if is_dir else out_base
64
+ md: str = converter.convert(pdf_input=str(pdf), page_indices=self.page)
65
+ output.parent.mkdir(parents=True, exist_ok=True)
66
+ output.write_text(md, encoding="utf-8")
67
+ results.append({"input": pdf.as_posix(), "output": output.as_posix(), "result": md})
68
+ logger.info(f"Converted {len(pdf_files)} PDF(s) to markdown and saved to `{out_base}`.")
69
+ return results
70
+
71
+
72
+ def main() -> None:
73
+ PdfToMarkdownArgs().run()
74
+
75
+
76
+ if __name__ == "__main__":
77
+ main()
@@ -0,0 +1,54 @@
1
+ import logging
2
+ import sys
3
+ from pathlib import Path
4
+ from typing import Optional
5
+
6
+ from spargear import BaseArguments
7
+
8
+ from chatterer.tools.convert_to_text import pdf_to_text
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class PdfToTextArgs(BaseArguments):
14
+ input: Path
15
+ """Path to the PDF file to convert to text."""
16
+ output: Optional[Path]
17
+ """Path to the output text file. If not provided, defaults to the input file with a .txt suffix."""
18
+ page: Optional[str] = None
19
+ """Comma-separated list of zero-based page indices to extract from the PDF. Supports ranges, e.g., '0,2,4-8'."""
20
+
21
+ def run(self) -> None:
22
+ input = self.input.resolve()
23
+ out = self.output or input.with_suffix(".txt")
24
+ if not input.is_file():
25
+ sys.exit(1)
26
+ out.write_text(
27
+ pdf_to_text(path_or_file=input, page_indices=self.page),
28
+ encoding="utf-8",
29
+ )
30
+ logger.info(f"Extracted text from `{input}` to `{out}`")
31
+
32
+
33
+ def parse_page_indices(pages_str: str) -> list[int]:
34
+ indices: set[int] = set()
35
+ for part in pages_str.split(","):
36
+ part = part.strip()
37
+ if "-" in part:
38
+ start_str, end_str = part.split("-", 1)
39
+ start = int(start_str)
40
+ end = int(end_str)
41
+ if start > end:
42
+ raise ValueError
43
+ indices.update(range(start, end + 1))
44
+ else:
45
+ indices.add(int(part))
46
+ return sorted(indices)
47
+
48
+
49
+ def main() -> None:
50
+ PdfToTextArgs().run()
51
+
52
+
53
+ if __name__ == "__main__":
54
+ main()
@@ -2,51 +2,36 @@
2
2
 
3
3
  from io import BytesIO
4
4
  from pathlib import Path
5
- from typing import cast
5
+ from typing import Optional, cast
6
6
 
7
7
  from openai import OpenAI
8
8
  from pydub import AudioSegment
9
- from spargear import ArgumentSpec, BaseArguments
9
+ from spargear import BaseArguments
10
10
 
11
11
  # Maximum chunk length in seconds
12
12
  MAX_CHUNK_DURATION = 600
13
13
 
14
14
 
15
15
  class TranscriptionApiArguments(BaseArguments):
16
- in_path = ArgumentSpec(
17
- ["in-path"],
18
- type=Path,
19
- help="The audio file to transcribe.",
20
- )
21
- out_path = ArgumentSpec(
22
- ["--out-path"],
23
- type=Path,
24
- default=None,
25
- help="Path to save the transcription output.",
26
- )
27
- model: ArgumentSpec[str] = ArgumentSpec(
28
- ["--model"],
29
- default="gpt-4o-transcribe",
30
- help="The model to use for transcription.",
31
- )
32
- api_key: ArgumentSpec[str] = ArgumentSpec(
33
- ["--api-key"],
34
- default=None,
35
- help="The API key for authentication.",
36
- )
37
- base_url: ArgumentSpec[str] = ArgumentSpec(
38
- ["--base-url"],
39
- default="https://api.openai.com/v1",
40
- help="The base URL for the API.",
41
- )
16
+ input: Path
17
+ """The audio file to transcribe."""
18
+ output: Optional[Path] = None
19
+ """Path to save the transcription output."""
20
+ model: str = "gpt-4o-transcribe"
21
+ """The model to use for transcription."""
22
+ api_key: Optional[str] = None
23
+ """The API key for authentication."""
24
+ base_url: str = "https://api.openai.com/v1"
25
+ """The base URL for the API."""
26
+ prompt: str = "Transcribe whole text from audio."
27
+ """The prompt to use for transcription."""
42
28
 
43
29
  def run(self) -> None:
44
- audio_path = self.in_path.unwrap()
45
- model = self.model.unwrap()
30
+ model = self.model
46
31
 
47
- client = OpenAI(api_key=self.api_key.value, base_url=self.base_url.value)
32
+ client = OpenAI(api_key=self.api_key, base_url=self.base_url)
48
33
 
49
- audio = load_audio_segment(audio_path)
34
+ audio = load_audio_segment(self.input)
50
35
 
51
36
  segments = split_audio(audio, MAX_CHUNK_DURATION)
52
37
  print(f"[i] Audio duration: {len(audio) / 1000:.1f}s; splitting into {len(segments)} segment(s)")
@@ -54,10 +39,10 @@ class TranscriptionApiArguments(BaseArguments):
54
39
  transcripts: list[str] = []
55
40
  for idx, seg in enumerate(segments, start=1):
56
41
  print(f"[i] Transcribing segment {idx}/{len(segments)}...")
57
- transcripts.append(transcribe_segment(seg, client, model))
42
+ transcripts.append(transcribe_segment(seg, client, model, self.prompt))
58
43
 
59
44
  full_transcript = "\n\n".join(transcripts)
60
- output_path: Path = self.out_path.value or audio_path.with_suffix(".txt")
45
+ output_path: Path = self.output or self.input.with_suffix(".txt")
61
46
  output_path.write_text(full_transcript, encoding="utf-8")
62
47
  print(f"[✓] Transcription saved to: {output_path}")
63
48
 
@@ -94,7 +79,7 @@ def split_audio(audio: AudioSegment, max_duration_s: int) -> list[AudioSegment]:
94
79
  return segments
95
80
 
96
81
 
97
- def transcribe_segment(segment: AudioSegment, client: OpenAI, model: str) -> str:
82
+ def transcribe_segment(segment: AudioSegment, client: OpenAI, model: str, prompt: str) -> str:
98
83
  """
99
84
  Transcribe a single AudioSegment chunk and return its text.
100
85
  """
@@ -104,7 +89,7 @@ def transcribe_segment(segment: AudioSegment, client: OpenAI, model: str) -> str
104
89
  mp3_bytes = buffer.read()
105
90
  response = client.audio.transcriptions.create(
106
91
  model=model,
107
- prompt="Transcribe whole text from audio.",
92
+ prompt=prompt,
108
93
  file=("audio.mp3", mp3_bytes),
109
94
  response_format="text",
110
95
  stream=True,