chatterer 0.1.23__tar.gz → 0.1.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {chatterer-0.1.23 → chatterer-0.1.24}/PKG-INFO +6 -9
  2. chatterer-0.1.24/chatterer/examples/__main__.py +75 -0
  3. chatterer-0.1.23/chatterer/examples/anything_to_markdown.py → chatterer-0.1.24/chatterer/examples/any2md.py +9 -9
  4. chatterer-0.1.23/chatterer/examples/pdf_to_markdown.py → chatterer-0.1.24/chatterer/examples/pdf2md.py +5 -5
  5. chatterer-0.1.23/chatterer/examples/pdf_to_text.py → chatterer-0.1.24/chatterer/examples/pdf2txt.py +5 -5
  6. chatterer-0.1.23/chatterer/examples/make_ppt.py → chatterer-0.1.24/chatterer/examples/ppt.py +5 -7
  7. chatterer-0.1.24/chatterer/examples/pw.py +137 -0
  8. chatterer-0.1.23/chatterer/examples/get_code_snippets.py → chatterer-0.1.24/chatterer/examples/snippet.py +7 -7
  9. chatterer-0.1.23/chatterer/examples/transcription_api.py → chatterer-0.1.24/chatterer/examples/transcribe.py +6 -6
  10. chatterer-0.1.23/chatterer/examples/upstage_parser.py → chatterer-0.1.24/chatterer/examples/upstage.py +17 -17
  11. chatterer-0.1.23/chatterer/examples/webpage_to_markdown.py → chatterer-0.1.24/chatterer/examples/web2md.py +8 -12
  12. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/strategies/atom_of_thoughts.py +161 -161
  13. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer.egg-info/PKG-INFO +6 -9
  14. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer.egg-info/SOURCES.txt +10 -9
  15. chatterer-0.1.24/chatterer.egg-info/entry_points.txt +2 -0
  16. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer.egg-info/requires.txt +3 -7
  17. {chatterer-0.1.23 → chatterer-0.1.24}/pyproject.toml +58 -66
  18. chatterer-0.1.23/chatterer/examples/login_with_playwright.py +0 -156
  19. chatterer-0.1.23/chatterer.egg-info/entry_points.txt +0 -10
  20. {chatterer-0.1.23 → chatterer-0.1.24}/README.md +0 -0
  21. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/__init__.py +0 -0
  22. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/common_types/__init__.py +0 -0
  23. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/common_types/io.py +0 -0
  24. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/examples/__init__.py +0 -0
  25. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/interactive.py +0 -0
  26. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/language_model.py +0 -0
  27. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/messages.py +0 -0
  28. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/py.typed +0 -0
  29. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/strategies/__init__.py +0 -0
  30. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/strategies/base.py +0 -0
  31. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/tools/__init__.py +0 -0
  32. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/tools/caption_markdown_images.py +0 -0
  33. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/tools/citation_chunking/__init__.py +0 -0
  34. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/tools/citation_chunking/chunks.py +0 -0
  35. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/tools/citation_chunking/citation_chunker.py +0 -0
  36. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/tools/citation_chunking/citations.py +0 -0
  37. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/tools/citation_chunking/prompt.py +0 -0
  38. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/tools/citation_chunking/reference.py +0 -0
  39. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/tools/citation_chunking/utils.py +0 -0
  40. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/tools/convert_pdf_to_markdown.py +0 -0
  41. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/tools/convert_to_text.py +0 -0
  42. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/tools/upstage_document_parser.py +0 -0
  43. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/tools/webpage_to_markdown.py +0 -0
  44. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/tools/youtube.py +0 -0
  45. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/utils/__init__.py +0 -0
  46. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/utils/base64_image.py +0 -0
  47. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/utils/bytesio.py +0 -0
  48. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/utils/code_agent.py +0 -0
  49. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer/utils/imghdr.py +0 -0
  50. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer.egg-info/dependency_links.txt +0 -0
  51. {chatterer-0.1.23 → chatterer-0.1.24}/chatterer.egg-info/top_level.txt +0 -0
  52. {chatterer-0.1.23 → chatterer-0.1.24}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatterer
3
- Version: 0.1.23
3
+ Version: 0.1.24
4
4
  Summary: The highest-level interface for various LLM APIs.
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
@@ -11,10 +11,9 @@ Requires-Dist: pillow>=11.1.0
11
11
  Requires-Dist: regex>=2024.11.6
12
12
  Requires-Dist: rich>=13.9.4
13
13
  Requires-Dist: colorama>=0.4.6
14
- Requires-Dist: spargear>=0.2.0
14
+ Requires-Dist: spargear>=0.2.7
15
15
  Provides-Extra: dev
16
- Requires-Dist: neo4j-extension>=0.1.14; extra == "dev"
17
- Requires-Dist: ipykernel>=6.29.5; extra == "dev"
16
+ Requires-Dist: pyright>=1.1.401; extra == "dev"
18
17
  Provides-Extra: conversion
19
18
  Requires-Dist: youtube-transcript-api>=1.0.3; extra == "conversion"
20
19
  Requires-Dist: chatterer[browser]; extra == "conversion"
@@ -34,12 +33,10 @@ Requires-Dist: mistune>=3.1.3; extra == "markdown"
34
33
  Provides-Extra: video
35
34
  Requires-Dist: pydub>=0.25.1; extra == "video"
36
35
  Provides-Extra: langchain
37
- Requires-Dist: chatterer[langchain-providers]; extra == "langchain"
36
+ Requires-Dist: langchain-anthropic>=0.3.10; extra == "langchain"
37
+ Requires-Dist: langchain-google-genai>=2.1.1; extra == "langchain"
38
+ Requires-Dist: langchain-ollama>=0.3.0; extra == "langchain"
38
39
  Requires-Dist: langchain-experimental>=0.3.4; extra == "langchain"
39
- Provides-Extra: langchain-providers
40
- Requires-Dist: langchain-anthropic>=0.3.10; extra == "langchain-providers"
41
- Requires-Dist: langchain-google-genai>=2.1.1; extra == "langchain-providers"
42
- Requires-Dist: langchain-ollama>=0.3.0; extra == "langchain-providers"
43
40
  Provides-Extra: all
44
41
  Requires-Dist: chatterer[dev]; extra == "all"
45
42
  Requires-Dist: chatterer[langchain]; extra == "all"
@@ -0,0 +1,75 @@
1
+ from spargear import SubcommandArguments, SubcommandSpec
2
+
3
+
4
+ def any2md():
5
+ from .any2md import Arguments
6
+
7
+ return Arguments
8
+
9
+
10
+ def pdf2md():
11
+ from .pdf2md import Arguments
12
+
13
+ return Arguments
14
+
15
+
16
+ def pdf2txt():
17
+ from .pdf2txt import Arguments
18
+
19
+ return Arguments
20
+
21
+
22
+ def ppt():
23
+ from .ppt import Arguments
24
+
25
+ return Arguments
26
+
27
+
28
+ def pw():
29
+ from .pw import Arguments
30
+
31
+ return Arguments
32
+
33
+
34
+ def snippet():
35
+ from .snippet import Arguments
36
+
37
+ return Arguments
38
+
39
+
40
+ def transcribe():
41
+ from .transcribe import Arguments
42
+
43
+ return Arguments
44
+
45
+
46
+ def upstage():
47
+ from .upstage import Arguments
48
+
49
+ return Arguments
50
+
51
+
52
+ def web2md():
53
+ from .web2md import Arguments
54
+
55
+ return Arguments
56
+
57
+
58
+ class Arguments(SubcommandArguments):
59
+ any2md = SubcommandSpec(name="any2md", argument_class_factory=any2md)
60
+ pdf2md = SubcommandSpec(name="pdf2md", argument_class_factory=pdf2md)
61
+ pdf2txt = SubcommandSpec(name="pdf2txt", argument_class_factory=pdf2txt)
62
+ ppt = SubcommandSpec(name="ppt", argument_class_factory=ppt)
63
+ pw = SubcommandSpec(name="pw", argument_class_factory=pw)
64
+ snippet = SubcommandSpec(name="snippet", argument_class_factory=snippet)
65
+ transcribe = SubcommandSpec(name="transcribe", argument_class_factory=transcribe)
66
+ upstage = SubcommandSpec(name="upstage", argument_class_factory=upstage)
67
+ web2md = SubcommandSpec(name="web2md", argument_class_factory=web2md)
68
+
69
+
70
+ def main():
71
+ Arguments().execute()
72
+
73
+
74
+ if __name__ == "__main__":
75
+ main()
@@ -3,7 +3,7 @@ from pathlib import Path
3
3
  from typing import Optional, TypedDict
4
4
 
5
5
  import openai
6
- from spargear import BaseArguments
6
+ from spargear import RunnableArguments
7
7
 
8
8
  from chatterer import anything_to_markdown
9
9
 
@@ -16,10 +16,10 @@ class AnythingToMarkdownReturns(TypedDict):
16
16
  out_text: str
17
17
 
18
18
 
19
- class AnythingToMarkdownArguments(BaseArguments):
19
+ class Arguments(RunnableArguments[AnythingToMarkdownReturns]):
20
20
  """Command line arguments for converting various file types to markdown."""
21
21
 
22
- input: str
22
+ SOURCE: str
23
23
  """Input file to convert to markdown. Can be a file path or a URL."""
24
24
  output: Optional[str] = None
25
25
  """Output path for the converted markdown file. If not provided, the input file's suffix is replaced with .md"""
@@ -43,7 +43,7 @@ class AnythingToMarkdownArguments(BaseArguments):
43
43
  def run(self) -> AnythingToMarkdownReturns:
44
44
  if not self.prevent_save_file:
45
45
  if not self.output:
46
- output = Path(self.input).with_suffix(".md")
46
+ output = Path(self.SOURCE).with_suffix(".md")
47
47
  else:
48
48
  output = Path(self.output)
49
49
  else:
@@ -57,7 +57,7 @@ class AnythingToMarkdownArguments(BaseArguments):
57
57
  llm_model = None
58
58
 
59
59
  text: str = anything_to_markdown(
60
- self.input,
60
+ self.SOURCE,
61
61
  llm_client=llm_client,
62
62
  llm_model=llm_model,
63
63
  style_map=self.style_map,
@@ -67,18 +67,18 @@ class AnythingToMarkdownArguments(BaseArguments):
67
67
  if output:
68
68
  output.parent.mkdir(parents=True, exist_ok=True)
69
69
  output.write_text(text, encoding=self.encoding)
70
- logger.info(f"Converted `{self.input}` to markdown and saved to `{output}`.")
70
+ logger.info(f"Converted `{self.SOURCE}` to markdown and saved to `{output}`.")
71
71
  else:
72
- logger.info(f"Converted `{self.input}` to markdown.")
72
+ logger.info(f"Converted `{self.SOURCE}` to markdown.")
73
73
  return {
74
- "input": self.input,
74
+ "input": self.SOURCE,
75
75
  "output": str(output) if output is not None else None,
76
76
  "out_text": text,
77
77
  }
78
78
 
79
79
 
80
80
  def main() -> None:
81
- AnythingToMarkdownArguments().run()
81
+ Arguments().run()
82
82
 
83
83
 
84
84
  if __name__ == "__main__":
@@ -13,7 +13,7 @@ import time
13
13
  from pathlib import Path
14
14
  from typing import List, Literal, Optional, TypedDict
15
15
 
16
- from spargear import ArgumentSpec, BaseArguments
16
+ from spargear import ArgumentSpec, RunnableArguments
17
17
 
18
18
  from chatterer import Chatterer
19
19
  from chatterer.tools.convert_pdf_to_markdown import PdfToMarkdown
@@ -35,10 +35,10 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(level
35
35
  logger = logging.getLogger(__name__)
36
36
 
37
37
 
38
- class PdfToMarkdownArgs(BaseArguments):
38
+ class Arguments(RunnableArguments[List[ConversionResult]]):
39
39
  """Command-line arguments for PDF to Markdown conversion."""
40
40
 
41
- input: str
41
+ PDF_OR_DIRECTORY_PATH: str
42
42
  """Input PDF file or directory containing PDF files to convert to markdown."""
43
43
 
44
44
  output: Optional[str] = None
@@ -274,7 +274,7 @@ class PdfToMarkdownArgs(BaseArguments):
274
274
 
275
275
  def _prepare_files(self) -> tuple[List[Path], Path, bool]:
276
276
  """Prepare input and output file paths."""
277
- input_path = Path(self.input).resolve()
277
+ input_path = Path(self.PDF_OR_DIRECTORY_PATH).resolve()
278
278
  pdf_files: List[Path] = []
279
279
  is_dir = False
280
280
 
@@ -320,7 +320,7 @@ def main() -> None:
320
320
  """Main entry point for the CLI application."""
321
321
  args = None
322
322
  try:
323
- args = PdfToMarkdownArgs()
323
+ args = Arguments()
324
324
  args.run()
325
325
  except KeyboardInterrupt:
326
326
  logger.info("🛑 Conversion interrupted by user")
@@ -3,15 +3,15 @@ import sys
3
3
  from pathlib import Path
4
4
  from typing import Optional
5
5
 
6
- from spargear import BaseArguments
6
+ from spargear import RunnableArguments
7
7
 
8
8
  from chatterer.tools.convert_to_text import pdf_to_text
9
9
 
10
10
  logger = logging.getLogger(__name__)
11
11
 
12
12
 
13
- class PdfToTextArgs(BaseArguments):
14
- input: Path
13
+ class Arguments(RunnableArguments[None]):
14
+ PDF_PATH: Path
15
15
  """Path to the PDF file to convert to text."""
16
16
  output: Optional[Path]
17
17
  """Path to the output text file. If not provided, defaults to the input file with a .txt suffix."""
@@ -19,7 +19,7 @@ class PdfToTextArgs(BaseArguments):
19
19
  """Comma-separated list of zero-based page indices to extract from the PDF. Supports ranges, e.g., '0,2,4-8'."""
20
20
 
21
21
  def run(self) -> None:
22
- input = self.input.resolve()
22
+ input = self.PDF_PATH.resolve()
23
23
  out = self.output or input.with_suffix(".txt")
24
24
  if not input.is_file():
25
25
  sys.exit(1)
@@ -47,7 +47,7 @@ def parse_page_indices(pages_str: str) -> list[int]:
47
47
 
48
48
 
49
49
  def main() -> None:
50
- PdfToTextArgs().run()
50
+ Arguments().run()
51
51
 
52
52
 
53
53
  if __name__ == "__main__":
@@ -3,7 +3,7 @@ import sys
3
3
  from pathlib import Path
4
4
  from typing import NotRequired, TypedDict
5
5
 
6
- from spargear import BaseArguments
6
+ from spargear import RunnableArguments
7
7
 
8
8
  from chatterer import BaseMessage, Chatterer, HumanMessage, SystemMessage
9
9
 
@@ -155,7 +155,7 @@ Now, generate the final `presentation.html` file using impress.js and the provid
155
155
  # --- Argument Parsing ---
156
156
 
157
157
 
158
- class MakePptArguments(BaseArguments):
158
+ class Arguments(RunnableArguments[None]):
159
159
  """
160
160
  Arguments for the presentation generation process.
161
161
  """
@@ -179,9 +179,7 @@ class MakePptArguments(BaseArguments):
179
179
  """Prompt for organizing slides into a presentation script"""
180
180
 
181
181
  # LLM Settings
182
- provider: str = (
183
- "openai:gpt-4.1" # Example: "openai:gpt-4o", "anthropic:claude-3-haiku-20240307", "google:gemini-1.5-flash"
184
- )
182
+ provider: str = "openai:gpt-4.1" # Example: "openai:gpt-4o", "anthropic:claude-3-haiku-20240307", "google:gemini-1.5-flash"
185
183
  """Name of the language model to use (provider:model_name)"""
186
184
 
187
185
  # Other settings
@@ -293,7 +291,7 @@ class GeneratedSlide(TypedDict):
293
291
  script: NotRequired[str]
294
292
 
295
293
 
296
- def run_presentation_agent(args: MakePptArguments):
294
+ def run_presentation_agent(args: Arguments):
297
295
  """Executes the presentation generation agent loop."""
298
296
 
299
297
  if args.verbose:
@@ -481,7 +479,7 @@ Remember to follow all instructions in the role prompt, especially regarding HTM
481
479
 
482
480
 
483
481
  def main() -> None:
484
- MakePptArguments().run()
482
+ Arguments().run()
485
483
 
486
484
 
487
485
  if __name__ == "__main__":
@@ -0,0 +1,137 @@
1
+ import json
2
+ import logging
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ from spargear import BaseArguments, RunnableArguments, SubcommandSpec
7
+
8
+ from chatterer import PlayWrightBot
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ # Define the default path location relative to this script file
14
+ DEFAULT_JSON_PATH = Path(__file__).resolve().parent / "session_state.json"
15
+
16
+
17
+ class ReadArgs(RunnableArguments[None]):
18
+ """Arguments for the 'read' subcommand."""
19
+
20
+ URL: str
21
+ """URL (potentially protected) to navigate to using the saved session."""
22
+ jsonpath: Path = DEFAULT_JSON_PATH
23
+ """Path to the session state JSON file to load."""
24
+
25
+ def run(self) -> None:
26
+ """
27
+ Loads the session state from the specified JSON file, then navigates
28
+ to a protected_url that normally requires login. If the stored session
29
+ is valid, it should open without re-entering credentials.
30
+
31
+ Correction: Loads the JSON content into a dict first to satisfy type hints.
32
+ """
33
+ url = self.URL
34
+ jsonpath = self.jsonpath
35
+ logger.info(f"Loading session from {jsonpath} and navigating to {url} ...")
36
+
37
+ if not jsonpath.exists():
38
+ logger.error(f"Session file not found at {jsonpath}")
39
+ sys.exit(1)
40
+
41
+ # Load the storage state from the JSON file into a dictionary
42
+ logger.info(f"Reading storage state content from {jsonpath} ...")
43
+ try:
44
+ with open(jsonpath, "r", encoding="utf-8") as f:
45
+ # This dictionary should match the 'StorageState' type expected by Playwright/chatterer
46
+ storage_state_dict = json.load(f)
47
+ except json.JSONDecodeError:
48
+ logger.error(f"Failed to decode JSON from {jsonpath}")
49
+ sys.exit(1)
50
+ except Exception as e:
51
+ logger.error(f"Error reading file {jsonpath}: {e}")
52
+ sys.exit(1)
53
+
54
+ logger.info("Launching browser with loaded session state...")
55
+ with PlayWrightBot(
56
+ playwright_launch_options={"headless": False},
57
+ # Pass the loaded dictionary, which should match the expected 'StorageState' type
58
+ playwright_persistency_options={"storage_state": storage_state_dict},
59
+ ) as bot:
60
+ bot.get_page(url)
61
+
62
+ logger.info("Press Enter in the console when you're done checking the protected page.")
63
+ input(" >> Press Enter to exit: ")
64
+
65
+ logger.info("Done! Browser is now closed.")
66
+
67
+
68
+ class WriteArgs(RunnableArguments[None]):
69
+ """Arguments for the 'write' subcommand."""
70
+
71
+ URL: str
72
+ """URL to navigate to for manual login."""
73
+ jsonpath: Path = DEFAULT_JSON_PATH
74
+ """Path to save the session state JSON file."""
75
+
76
+ def run(self) -> None:
77
+ """
78
+ Launches a non-headless browser and navigates to the login_url.
79
+ The user can manually log in, then press Enter in the console
80
+ to store the current session state into a JSON file.
81
+ """
82
+ url = self.URL
83
+ jsonpath = self.jsonpath
84
+ logger.info(f"Launching browser and navigating to {url} ... Please log in manually.")
85
+
86
+ # Ensure jsonpath directory exists
87
+ jsonpath.parent.mkdir(parents=True, exist_ok=True)
88
+
89
+ with PlayWrightBot(playwright_launch_options={"headless": False}) as bot:
90
+ bot.get_page(url)
91
+
92
+ logger.info("After completing the login in the browser, press Enter here to save the session.")
93
+ input(" >> Press Enter when ready: ")
94
+
95
+ # get_sync_browser() returns the BrowserContext internally
96
+ context = bot.get_sync_browser()
97
+
98
+ # Save the current session (cookies, localStorage) to a JSON file
99
+ logger.info(f"Saving storage state to {jsonpath} ...")
100
+ context.storage_state(path=jsonpath) # Pass Path object directly
101
+
102
+ logger.info("Done! Browser is now closed.")
103
+
104
+
105
+ class Arguments(BaseArguments):
106
+ """
107
+ A simple CLI tool for saving and using Playwright sessions via storage_state.
108
+ Uses spargear for declarative argument parsing.
109
+ """
110
+
111
+ read: SubcommandSpec[ReadArgs] = SubcommandSpec(
112
+ name="read",
113
+ argument_class=ReadArgs,
114
+ help="Use a saved session to view a protected page.",
115
+ description="Loads session state from the specified JSON file and navigates to the URL.",
116
+ )
117
+ write: SubcommandSpec[WriteArgs] = SubcommandSpec(
118
+ name="write",
119
+ argument_class=WriteArgs,
120
+ help="Save a new session by manually logging in.",
121
+ description="Launches a browser to the specified URL. Log in manually, then press Enter to save session state.",
122
+ )
123
+
124
+ def run(self) -> None:
125
+ """Parses arguments using spargear and executes the corresponding command."""
126
+ if isinstance(last_subcommand := self.last_command, RunnableArguments):
127
+ last_subcommand.run()
128
+ else:
129
+ self.get_parser().print_help()
130
+
131
+
132
+ def main() -> None:
133
+ Arguments().run()
134
+
135
+
136
+ if __name__ == "__main__":
137
+ main()
@@ -2,15 +2,15 @@ import logging
2
2
  from pathlib import Path
3
3
  from typing import Optional
4
4
 
5
- from spargear import BaseArguments
5
+ from spargear import RunnableArguments
6
6
 
7
7
  from chatterer import CodeSnippets
8
8
 
9
9
  logger = logging.getLogger(__name__)
10
10
 
11
11
 
12
- class GetCodeSnippetsArgs(BaseArguments):
13
- input: str
12
+ class Arguments(RunnableArguments[CodeSnippets]):
13
+ PATH_OR_PACKAGE_NAME: str
14
14
  """Path to the package or file from which to extract code snippets."""
15
15
  output: Optional[str] = None
16
16
  """Output path for the extracted code snippets. If not provided, defaults to a file with the same name as the input."""
@@ -33,7 +33,7 @@ class GetCodeSnippetsArgs(BaseArguments):
33
33
  output = None
34
34
 
35
35
  cs = CodeSnippets.from_path_or_pkgname(
36
- path_or_pkgname=self.input,
36
+ path_or_pkgname=self.PATH_OR_PACKAGE_NAME,
37
37
  ban_file_patterns=self.ban_file_patterns,
38
38
  glob_patterns=self.glob_patterns,
39
39
  case_sensitive=self.case_sensitive,
@@ -41,14 +41,14 @@ class GetCodeSnippetsArgs(BaseArguments):
41
41
  if output is not None:
42
42
  output.parent.mkdir(parents=True, exist_ok=True)
43
43
  output.write_text(cs.snippets_text, encoding="utf-8")
44
- logger.info(f"Extracted code snippets from `{self.input}` and saved to `{output}`.")
44
+ logger.info(f"Extracted code snippets from `{self.PATH_OR_PACKAGE_NAME}` and saved to `{output}`.")
45
45
  else:
46
- logger.info(f"Extracted code snippets from `{self.input}`.")
46
+ logger.info(f"Extracted code snippets from `{self.PATH_OR_PACKAGE_NAME}`.")
47
47
  return cs
48
48
 
49
49
 
50
50
  def main() -> None:
51
- GetCodeSnippetsArgs().run()
51
+ Arguments().run()
52
52
 
53
53
 
54
54
  if __name__ == "__main__":
@@ -6,14 +6,14 @@ from typing import Optional, cast
6
6
 
7
7
  from openai import OpenAI
8
8
  from pydub import AudioSegment
9
- from spargear import BaseArguments
9
+ from spargear import RunnableArguments
10
10
 
11
11
  # Maximum chunk length in seconds
12
12
  MAX_CHUNK_DURATION = 600
13
13
 
14
14
 
15
- class TranscriptionApiArguments(BaseArguments):
16
- input: Path
15
+ class Arguments(RunnableArguments[None]):
16
+ AUDIO_PATH: Path
17
17
  """The audio file to transcribe."""
18
18
  output: Optional[Path] = None
19
19
  """Path to save the transcription output."""
@@ -31,7 +31,7 @@ class TranscriptionApiArguments(BaseArguments):
31
31
 
32
32
  client = OpenAI(api_key=self.api_key, base_url=self.base_url)
33
33
 
34
- audio = load_audio_segment(self.input)
34
+ audio = load_audio_segment(self.AUDIO_PATH)
35
35
 
36
36
  segments = split_audio(audio, MAX_CHUNK_DURATION)
37
37
  print(f"[i] Audio duration: {len(audio) / 1000:.1f}s; splitting into {len(segments)} segment(s)")
@@ -42,7 +42,7 @@ class TranscriptionApiArguments(BaseArguments):
42
42
  transcripts.append(transcribe_segment(seg, client, model, self.prompt))
43
43
 
44
44
  full_transcript = "\n\n".join(transcripts)
45
- output_path: Path = self.output or self.input.with_suffix(".txt")
45
+ output_path: Path = self.output or self.AUDIO_PATH.with_suffix(".txt")
46
46
  output_path.write_text(full_transcript, encoding="utf-8")
47
47
  print(f"[✓] Transcription saved to: {output_path}")
48
48
 
@@ -105,7 +105,7 @@ def transcribe_segment(segment: AudioSegment, client: OpenAI, model: str, prompt
105
105
 
106
106
 
107
107
  def main() -> None:
108
- TranscriptionApiArguments().run()
108
+ Arguments().run()
109
109
 
110
110
 
111
111
  if __name__ == "__main__":
@@ -19,8 +19,8 @@ from chatterer.tools.upstage_document_parser import (
19
19
  logger = logging.getLogger(__name__)
20
20
 
21
21
 
22
- class UpstageParserArguments(BaseArguments):
23
- input: Path
22
+ class Arguments(BaseArguments):
23
+ INPUT_PATH: Path
24
24
  """Input file to parse. Can be a PDF, image, or other supported formats."""
25
25
  output: Optional[Path] = None
26
26
  """Output file path for the parsed content. Defaults to input file with .md suffix if not provided."""
@@ -52,25 +52,25 @@ class UpstageParserArguments(BaseArguments):
52
52
  )
53
53
 
54
54
  def run(self) -> None:
55
- input = UpstageParserArguments.input.resolve()
56
- out = UpstageParserArguments.output or input.with_suffix(".md")
55
+ input = self.INPUT_PATH.resolve()
56
+ out = self.output or input.with_suffix(".md")
57
57
 
58
58
  parser = UpstageDocumentParseParser(
59
- api_key=UpstageParserArguments.api_key,
60
- base_url=UpstageParserArguments.base_url,
61
- model=UpstageParserArguments.model,
62
- split=UpstageParserArguments.split,
63
- ocr=UpstageParserArguments.ocr,
64
- output_format=UpstageParserArguments.output_format,
65
- coordinates=UpstageParserArguments.coordinates,
66
- base64_encoding=UpstageParserArguments.base64_encoding,
67
- image_description_instruction=UpstageParserArguments.image_description_instruction,
68
- image_dir=UpstageParserArguments.image_dir,
69
- chatterer=UpstageParserArguments.chatterer.value,
59
+ api_key=self.api_key,
60
+ base_url=self.base_url,
61
+ model=self.model,
62
+ split=self.split,
63
+ ocr=self.ocr,
64
+ output_format=self.output_format,
65
+ coordinates=self.coordinates,
66
+ base64_encoding=self.base64_encoding,
67
+ image_description_instruction=self.image_description_instruction,
68
+ image_dir=self.image_dir,
69
+ chatterer=self.chatterer.value,
70
70
  )
71
71
  docs = parser.parse(Blob.from_path(input)) # pyright: ignore[reportUnknownMemberType]
72
72
 
73
- if UpstageParserArguments.image_dir:
73
+ if self.image_dir:
74
74
  for path, image in parser.image_data.items():
75
75
  (path := Path(path)).parent.mkdir(parents=True, exist_ok=True)
76
76
  path.write_bytes(image)
@@ -82,7 +82,7 @@ class UpstageParserArguments(BaseArguments):
82
82
 
83
83
 
84
84
  def main() -> None:
85
- UpstageParserArguments().run()
85
+ Arguments().run()
86
86
 
87
87
 
88
88
  if __name__ == "__main__":
@@ -1,13 +1,13 @@
1
1
  from pathlib import Path
2
2
  from typing import Literal
3
3
 
4
- from spargear import ArgumentSpec, BaseArguments
4
+ from spargear import ArgumentSpec, RunnableArguments
5
5
 
6
6
  from chatterer import Chatterer, MarkdownLink, PlayWrightBot
7
7
 
8
8
 
9
- class WebpageToMarkdownArgs(BaseArguments):
10
- url: str
9
+ class Arguments(RunnableArguments[None]):
10
+ URL: str
11
11
  """The URL to crawl."""
12
12
  output: str = Path(__file__).with_suffix(".md").as_posix()
13
13
  """The output file path for the markdown file."""
@@ -21,7 +21,7 @@ class WebpageToMarkdownArgs(BaseArguments):
21
21
 
22
22
  def run(self) -> None:
23
23
  chatterer = self.chatterer.value
24
- url: str = self.url.strip()
24
+ url: str = self.URL.strip()
25
25
  output: Path = Path(self.output).resolve()
26
26
  with PlayWrightBot(chatterer=chatterer, engine=self.engine) as bot:
27
27
  md = bot.url_to_md(url)
@@ -32,15 +32,13 @@ class WebpageToMarkdownArgs(BaseArguments):
32
32
  links = MarkdownLink.from_markdown(md, referer_url=url)
33
33
  for link in links:
34
34
  if link.type == "link":
35
- print(
36
- f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})"
37
- )
35
+ print(f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})")
38
36
  elif link.type == "image":
39
37
  print(f"- ![{truncate_string(link.url)}] ({truncate_string(link.inline_text)})")
40
38
 
41
39
  async def arun(self) -> None:
42
40
  chatterer = self.chatterer.value
43
- url: str = self.url.strip()
41
+ url: str = self.URL.strip()
44
42
  output: Path = Path(self.output).resolve()
45
43
  async with PlayWrightBot(chatterer=chatterer, engine=self.engine) as bot:
46
44
  md = await bot.aurl_to_md(url)
@@ -51,9 +49,7 @@ class WebpageToMarkdownArgs(BaseArguments):
51
49
  links = MarkdownLink.from_markdown(md, referer_url=url)
52
50
  for link in links:
53
51
  if link.type == "link":
54
- print(
55
- f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})"
56
- )
52
+ print(f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})")
57
53
  elif link.type == "image":
58
54
  print(f"- ![{truncate_string(link.url)}] ({truncate_string(link.inline_text)})")
59
55
 
@@ -63,7 +59,7 @@ def truncate_string(s: str) -> str:
63
59
 
64
60
 
65
61
  def main() -> None:
66
- WebpageToMarkdownArgs().run()
62
+ Arguments().run()
67
63
 
68
64
 
69
65
  if __name__ == "__main__":