chatterer 0.1.22__py3-none-any.whl → 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,75 @@
1
+ from spargear import SubcommandArguments, SubcommandSpec
2
+
3
+
4
+ def any2md():
5
+ from .any2md import Arguments
6
+
7
+ return Arguments
8
+
9
+
10
+ def pdf2md():
11
+ from .pdf2md import Arguments
12
+
13
+ return Arguments
14
+
15
+
16
+ def pdf2txt():
17
+ from .pdf2txt import Arguments
18
+
19
+ return Arguments
20
+
21
+
22
+ def ppt():
23
+ from .ppt import Arguments
24
+
25
+ return Arguments
26
+
27
+
28
+ def pw():
29
+ from .pw import Arguments
30
+
31
+ return Arguments
32
+
33
+
34
+ def snippet():
35
+ from .snippet import Arguments
36
+
37
+ return Arguments
38
+
39
+
40
+ def transcribe():
41
+ from .transcribe import Arguments
42
+
43
+ return Arguments
44
+
45
+
46
+ def upstage():
47
+ from .upstage import Arguments
48
+
49
+ return Arguments
50
+
51
+
52
+ def web2md():
53
+ from .web2md import Arguments
54
+
55
+ return Arguments
56
+
57
+
58
+ class Arguments(SubcommandArguments):
59
+ any2md = SubcommandSpec(name="any2md", argument_class_factory=any2md)
60
+ pdf2md = SubcommandSpec(name="pdf2md", argument_class_factory=pdf2md)
61
+ pdf2txt = SubcommandSpec(name="pdf2txt", argument_class_factory=pdf2txt)
62
+ ppt = SubcommandSpec(name="ppt", argument_class_factory=ppt)
63
+ pw = SubcommandSpec(name="pw", argument_class_factory=pw)
64
+ snippet = SubcommandSpec(name="snippet", argument_class_factory=snippet)
65
+ transcribe = SubcommandSpec(name="transcribe", argument_class_factory=transcribe)
66
+ upstage = SubcommandSpec(name="upstage", argument_class_factory=upstage)
67
+ web2md = SubcommandSpec(name="web2md", argument_class_factory=web2md)
68
+
69
+
70
+ def main():
71
+ Arguments().execute()
72
+
73
+
74
+ if __name__ == "__main__":
75
+ main()
@@ -3,7 +3,7 @@ from pathlib import Path
3
3
  from typing import Optional, TypedDict
4
4
 
5
5
  import openai
6
- from spargear import BaseArguments
6
+ from spargear import RunnableArguments
7
7
 
8
8
  from chatterer import anything_to_markdown
9
9
 
@@ -16,10 +16,10 @@ class AnythingToMarkdownReturns(TypedDict):
16
16
  out_text: str
17
17
 
18
18
 
19
- class AnythingToMarkdownArguments(BaseArguments):
19
+ class Arguments(RunnableArguments[AnythingToMarkdownReturns]):
20
20
  """Command line arguments for converting various file types to markdown."""
21
21
 
22
- input: str
22
+ SOURCE: str
23
23
  """Input file to convert to markdown. Can be a file path or a URL."""
24
24
  output: Optional[str] = None
25
25
  """Output path for the converted markdown file. If not provided, the input file's suffix is replaced with .md"""
@@ -43,7 +43,7 @@ class AnythingToMarkdownArguments(BaseArguments):
43
43
  def run(self) -> AnythingToMarkdownReturns:
44
44
  if not self.prevent_save_file:
45
45
  if not self.output:
46
- output = Path(self.input).with_suffix(".md")
46
+ output = Path(self.SOURCE).with_suffix(".md")
47
47
  else:
48
48
  output = Path(self.output)
49
49
  else:
@@ -57,7 +57,7 @@ class AnythingToMarkdownArguments(BaseArguments):
57
57
  llm_model = None
58
58
 
59
59
  text: str = anything_to_markdown(
60
- self.input,
60
+ self.SOURCE,
61
61
  llm_client=llm_client,
62
62
  llm_model=llm_model,
63
63
  style_map=self.style_map,
@@ -67,18 +67,18 @@ class AnythingToMarkdownArguments(BaseArguments):
67
67
  if output:
68
68
  output.parent.mkdir(parents=True, exist_ok=True)
69
69
  output.write_text(text, encoding=self.encoding)
70
- logger.info(f"Converted `{self.input}` to markdown and saved to `{output}`.")
70
+ logger.info(f"Converted `{self.SOURCE}` to markdown and saved to `{output}`.")
71
71
  else:
72
- logger.info(f"Converted `{self.input}` to markdown.")
72
+ logger.info(f"Converted `{self.SOURCE}` to markdown.")
73
73
  return {
74
- "input": self.input,
74
+ "input": self.SOURCE,
75
75
  "output": str(output) if output is not None else None,
76
76
  "out_text": text,
77
77
  }
78
78
 
79
79
 
80
80
  def main() -> None:
81
- AnythingToMarkdownArguments().run()
81
+ Arguments().run()
82
82
 
83
83
 
84
84
  if __name__ == "__main__":
@@ -0,0 +1,338 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ PDF to Markdown Converter CLI
4
+
5
+ A command-line tool for converting PDF documents to Markdown using multimodal LLMs.
6
+ Supports both sequential and parallel processing modes with async capabilities.
7
+ """
8
+
9
+ import asyncio
10
+ import logging
11
+ import sys
12
+ import time
13
+ from pathlib import Path
14
+ from typing import List, Literal, Optional, TypedDict
15
+
16
+ from spargear import ArgumentSpec, RunnableArguments
17
+
18
+ from chatterer import Chatterer
19
+ from chatterer.tools.convert_pdf_to_markdown import PdfToMarkdown
20
+
21
+
22
+ class ConversionResult(TypedDict, total=False):
23
+ """Type definition for conversion results."""
24
+
25
+ input: str
26
+ output: str
27
+ result: str
28
+ processing_time: float
29
+ characters: int
30
+ error: str
31
+
32
+
33
+ # Setup enhanced logging
34
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%H:%M:%S")
35
+ logger = logging.getLogger(__name__)
36
+
37
+
38
+ class Arguments(RunnableArguments[List[ConversionResult]]):
39
+ """Command-line arguments for PDF to Markdown conversion."""
40
+
41
+ PDF_OR_DIRECTORY_PATH: str
42
+ """Input PDF file or directory containing PDF files to convert to markdown."""
43
+
44
+ output: Optional[str] = None
45
+ """Output path. For a file, path to the output markdown file. For a directory, output directory for .md files."""
46
+
47
+ page: Optional[str] = None
48
+ """Zero-based page indices to convert (e.g., '0,2,4-8'). If None, converts all pages."""
49
+
50
+ recursive: bool = False
51
+ """If input is a directory, search for PDFs recursively."""
52
+
53
+ mode: Literal["sequential", "parallel"] = "parallel"
54
+ """Processing mode: 'sequential' for strict continuity, 'parallel' for faster processing."""
55
+
56
+ sync: bool = False
57
+ """Enable synchronous processing for sequential mode. If set to True, will run in sync mode."""
58
+
59
+ max_concurrent: int = 10
60
+ """Maximum number of concurrent LLM requests when using async mode."""
61
+
62
+ image_zoom: float = 2.0
63
+ """Zoom factor for rendering PDF pages as images (higher zoom = higher resolution)."""
64
+
65
+ image_format: Literal["png", "jpg", "jpeg"] = "png"
66
+ """Image format for PDF page rendering."""
67
+
68
+ image_quality: int = 95
69
+ """JPEG quality when using jpg/jpeg format (1-100)."""
70
+
71
+ context_tail_lines: int = 10
72
+ """Number of lines from previous page's markdown to use as context (sequential mode only)."""
73
+
74
+ verbose: bool = False
75
+ """Enable verbose logging output."""
76
+
77
+ chatterer: ArgumentSpec[Chatterer] = ArgumentSpec(
78
+ ["--chatterer"],
79
+ default_factory=lambda: Chatterer.from_provider("google:gemini-2.5-flash-preview-05-20"),
80
+ help="Chatterer instance configuration (e.g., 'google:gemini-2.5-flash-preview-05-20').",
81
+ type=Chatterer.from_provider,
82
+ )
83
+
84
+ def __post_init__(self) -> None:
85
+ """Validate and adjust arguments after initialization."""
86
+ if self.verbose:
87
+ logging.getLogger().setLevel(logging.DEBUG)
88
+
89
+ if not self.sync and self.mode == "sequential":
90
+ logger.warning("Async mode is only available with parallel mode. Switching to parallel mode.")
91
+ self.mode = "parallel"
92
+
93
+ if self.max_concurrent < 1:
94
+ logger.warning("max_concurrent must be >= 1. Setting to 1.")
95
+ self.max_concurrent = 1
96
+ elif self.max_concurrent > 10:
97
+ logger.warning("max_concurrent > 10 may cause rate limiting. Consider reducing.")
98
+
99
+ def run(self) -> List[ConversionResult]:
100
+ """Execute the PDF to Markdown conversion."""
101
+ if not self.sync:
102
+ return asyncio.run(self._run_async())
103
+ else:
104
+ return self._run_sync()
105
+
106
+ def _run_sync(self) -> List[ConversionResult]:
107
+ """Execute synchronous conversion."""
108
+ pdf_files, output_base, is_dir = self._prepare_files()
109
+
110
+ converter = PdfToMarkdown(
111
+ chatterer=self.chatterer.unwrap(),
112
+ image_zoom=self.image_zoom,
113
+ image_format=self.image_format,
114
+ image_jpg_quality=self.image_quality,
115
+ context_tail_lines=self.context_tail_lines,
116
+ )
117
+
118
+ results: List[ConversionResult] = []
119
+ total_start_time = time.time()
120
+
121
+ logger.info(f"🚀 Starting {self.mode} conversion of {len(pdf_files)} PDF(s)...")
122
+
123
+ for i, pdf in enumerate(pdf_files, 1):
124
+ output_path = (output_base / f"{pdf.stem}.md") if is_dir else output_base
125
+
126
+ logger.info(f"📄 Processing {i}/{len(pdf_files)}: {pdf.name}")
127
+ start_time = time.time()
128
+
129
+ # Progress callback for individual PDF
130
+ def progress_callback(current: int, total: int) -> None:
131
+ progress = (current / total) * 100
132
+ logger.info(f" └─ Progress: {current}/{total} pages ({progress:.1f}%)")
133
+
134
+ try:
135
+ markdown = converter.convert(
136
+ pdf_input=str(pdf),
137
+ page_indices=self.page,
138
+ mode=self.mode,
139
+ progress_callback=progress_callback,
140
+ )
141
+
142
+ # Save result
143
+ output_path.parent.mkdir(parents=True, exist_ok=True)
144
+ output_path.write_text(markdown, encoding="utf-8")
145
+
146
+ elapsed = time.time() - start_time
147
+ chars_per_sec = len(markdown) / elapsed if elapsed > 0 else 0
148
+
149
+ logger.info(f" ✅ Completed in {elapsed:.1f}s ({chars_per_sec:.0f} chars/s)")
150
+ logger.info(f" 📝 Generated {len(markdown):,} characters → {output_path}")
151
+
152
+ results.append({
153
+ "input": pdf.as_posix(),
154
+ "output": output_path.as_posix(),
155
+ "result": markdown,
156
+ "processing_time": elapsed,
157
+ "characters": len(markdown),
158
+ })
159
+
160
+ except Exception as e:
161
+ logger.error(f" ❌ Failed to process {pdf.name}: {e}")
162
+ results.append({
163
+ "input": pdf.as_posix(),
164
+ "output": "",
165
+ "result": "",
166
+ "error": str(e),
167
+ })
168
+
169
+ total_elapsed = time.time() - total_start_time
170
+ total_chars = sum(len(r.get("result", "")) for r in results)
171
+ successful_conversions = sum(1 for r in results if "error" not in r)
172
+
173
+ logger.info("🎉 Conversion complete!")
174
+ logger.info(f" 📊 Total time: {total_elapsed:.1f}s")
175
+ logger.info(f" 📈 Success rate: {successful_conversions}/{len(pdf_files)} ({(successful_conversions / len(pdf_files) * 100):.1f}%)")
176
+ logger.info(f" 📝 Total output: {total_chars:,} characters")
177
+ logger.info(f" ⚡ Average speed: {total_chars / total_elapsed:.0f} chars/s")
178
+
179
+ return results
180
+
181
+ async def _run_async(self) -> List[ConversionResult]:
182
+ """Execute asynchronous conversion with parallel processing."""
183
+ pdf_files, output_base, is_dir = self._prepare_files()
184
+
185
+ converter = PdfToMarkdown(
186
+ chatterer=self.chatterer.unwrap(),
187
+ image_zoom=self.image_zoom,
188
+ image_format=self.image_format,
189
+ image_jpg_quality=self.image_quality,
190
+ context_tail_lines=self.context_tail_lines,
191
+ )
192
+
193
+ total_start_time = time.time()
194
+
195
+ logger.info(f"🚀 Starting ASYNC parallel conversion of {len(pdf_files)} PDF(s)...")
196
+ logger.info(f"⚡ Max concurrent: {self.max_concurrent} LLM requests")
197
+
198
+ # Process PDFs concurrently
199
+ semaphore = asyncio.Semaphore(self.max_concurrent)
200
+
201
+ async def process_pdf(pdf: Path, index: int) -> ConversionResult:
202
+ async with semaphore:
203
+ output_path = (output_base / f"{pdf.stem}.md") if is_dir else output_base
204
+
205
+ logger.info(f"📄 Processing {index}/{len(pdf_files)}: {pdf.name}")
206
+ start_time = time.time()
207
+
208
+ # Progress callback for individual PDF
209
+ def progress_callback(current: int, total: int) -> None:
210
+ progress = (current / total) * 100
211
+ logger.info(f" └─ {pdf.name}: {current}/{total} pages ({progress:.1f}%)")
212
+
213
+ try:
214
+ markdown = await converter.aconvert(
215
+ pdf_input=str(pdf),
216
+ page_indices=self.page,
217
+ progress_callback=progress_callback,
218
+ max_concurrent=self.max_concurrent, # Limit per-PDF concurrency
219
+ )
220
+
221
+ # Save result
222
+ output_path.parent.mkdir(parents=True, exist_ok=True)
223
+ output_path.write_text(markdown, encoding="utf-8")
224
+
225
+ elapsed = time.time() - start_time
226
+ chars_per_sec = len(markdown) / elapsed if elapsed > 0 else 0
227
+
228
+ logger.info(f" ✅ {pdf.name} completed in {elapsed:.1f}s ({chars_per_sec:.0f} chars/s)")
229
+ logger.info(f" 📝 Generated {len(markdown):,} characters → {output_path}")
230
+
231
+ return {
232
+ "input": pdf.as_posix(),
233
+ "output": output_path.as_posix(),
234
+ "result": markdown,
235
+ "processing_time": elapsed,
236
+ "characters": len(markdown),
237
+ }
238
+
239
+ except Exception as e:
240
+ logger.error(f" ❌ Failed to process {pdf.name}: {e}")
241
+ return {
242
+ "input": pdf.as_posix(),
243
+ "output": "",
244
+ "result": "",
245
+ "error": str(e),
246
+ }
247
+
248
+ # Execute all PDF processing tasks
249
+ tasks = [process_pdf(pdf, i) for i, pdf in enumerate(pdf_files, 1)]
250
+ raw_results = await asyncio.gather(*tasks, return_exceptions=True)
251
+
252
+ # Handle exceptions in results
253
+ final_results: List[ConversionResult] = []
254
+ for result in raw_results:
255
+ if isinstance(result, Exception):
256
+ logger.error(f"Task failed with exception: {result}")
257
+ final_results.append(ConversionResult(input="", output="", result="", error=str(result)))
258
+ else:
259
+ # Type narrowing: result is ConversionResult after isinstance check
260
+ final_results.append(result) # type: ignore[arg-type]
261
+
262
+ total_elapsed = time.time() - total_start_time
263
+ total_chars = sum(len(r.get("result", "")) for r in final_results)
264
+ successful_conversions = sum(1 for r in final_results if "error" not in r)
265
+
266
+ logger.info("🎉 ASYNC conversion complete!")
267
+ logger.info(f" 📊 Total time: {total_elapsed:.1f}s")
268
+ logger.info(f" 📈 Success rate: {successful_conversions}/{len(pdf_files)} ({(successful_conversions / len(pdf_files) * 100):.1f}%)")
269
+ logger.info(f" 📝 Total output: {total_chars:,} characters")
270
+ logger.info(f" ⚡ Average speed: {total_chars / total_elapsed:.0f} chars/s")
271
+ logger.info(f" 🚀 Speedup: ~{len(pdf_files) / max(1, total_elapsed / 60):.1f}x faster than sequential")
272
+
273
+ return final_results
274
+
275
+ def _prepare_files(self) -> tuple[List[Path], Path, bool]:
276
+ """Prepare input and output file paths."""
277
+ input_path = Path(self.PDF_OR_DIRECTORY_PATH).resolve()
278
+ pdf_files: List[Path] = []
279
+ is_dir = False
280
+
281
+ # Determine input files
282
+ if input_path.is_file():
283
+ if input_path.suffix.lower() != ".pdf":
284
+ logger.error(f"❌ Input file must be a PDF: {input_path}")
285
+ sys.exit(1)
286
+ pdf_files.append(input_path)
287
+ elif input_path.is_dir():
288
+ is_dir = True
289
+ pattern = "**/*.pdf" if self.recursive else "*.pdf"
290
+ pdf_files = sorted([f for f in input_path.glob(pattern) if f.is_file()])
291
+ if not pdf_files:
292
+ logger.warning(f"⚠️ No PDF files found in {input_path}")
293
+ sys.exit(0)
294
+ else:
295
+ logger.error(f"❌ Input path does not exist: {input_path}")
296
+ sys.exit(1)
297
+
298
+ # Determine output path
299
+ if self.output:
300
+ output_base = Path(self.output).resolve()
301
+ elif is_dir:
302
+ output_base = input_path
303
+ else:
304
+ output_base = input_path.with_suffix(".md")
305
+
306
+ # Create output directories
307
+ if is_dir:
308
+ output_base.mkdir(parents=True, exist_ok=True)
309
+ else:
310
+ output_base.parent.mkdir(parents=True, exist_ok=True)
311
+
312
+ logger.info(f"📂 Input: {input_path}")
313
+ logger.info(f"📁 Output: {output_base}")
314
+ logger.info(f"📄 Found {len(pdf_files)} PDF file(s)")
315
+
316
+ return pdf_files, output_base, is_dir
317
+
318
+
319
+ def main() -> None:
320
+ """Main entry point for the CLI application."""
321
+ args = None
322
+ try:
323
+ args = Arguments()
324
+ args.run()
325
+ except KeyboardInterrupt:
326
+ logger.info("🛑 Conversion interrupted by user")
327
+ sys.exit(130)
328
+ except Exception as e:
329
+ logger.error(f"❌ Unexpected error: {e}")
330
+ if args and hasattr(args, "verbose") and args.verbose:
331
+ import traceback
332
+
333
+ traceback.print_exc()
334
+ sys.exit(1)
335
+
336
+
337
+ if __name__ == "__main__":
338
+ main()
@@ -3,15 +3,15 @@ import sys
3
3
  from pathlib import Path
4
4
  from typing import Optional
5
5
 
6
- from spargear import BaseArguments
6
+ from spargear import RunnableArguments
7
7
 
8
8
  from chatterer.tools.convert_to_text import pdf_to_text
9
9
 
10
10
  logger = logging.getLogger(__name__)
11
11
 
12
12
 
13
- class PdfToTextArgs(BaseArguments):
14
- input: Path
13
+ class Arguments(RunnableArguments[None]):
14
+ PDF_PATH: Path
15
15
  """Path to the PDF file to convert to text."""
16
16
  output: Optional[Path]
17
17
  """Path to the output text file. If not provided, defaults to the input file with a .txt suffix."""
@@ -19,7 +19,7 @@ class PdfToTextArgs(BaseArguments):
19
19
  """Comma-separated list of zero-based page indices to extract from the PDF. Supports ranges, e.g., '0,2,4-8'."""
20
20
 
21
21
  def run(self) -> None:
22
- input = self.input.resolve()
22
+ input = self.PDF_PATH.resolve()
23
23
  out = self.output or input.with_suffix(".txt")
24
24
  if not input.is_file():
25
25
  sys.exit(1)
@@ -47,7 +47,7 @@ def parse_page_indices(pages_str: str) -> list[int]:
47
47
 
48
48
 
49
49
  def main() -> None:
50
- PdfToTextArgs().run()
50
+ Arguments().run()
51
51
 
52
52
 
53
53
  if __name__ == "__main__":
@@ -3,7 +3,7 @@ import sys
3
3
  from pathlib import Path
4
4
  from typing import NotRequired, TypedDict
5
5
 
6
- from spargear import BaseArguments
6
+ from spargear import RunnableArguments
7
7
 
8
8
  from chatterer import BaseMessage, Chatterer, HumanMessage, SystemMessage
9
9
 
@@ -155,7 +155,7 @@ Now, generate the final `presentation.html` file using impress.js and the provid
155
155
  # --- Argument Parsing ---
156
156
 
157
157
 
158
- class MakePptArguments(BaseArguments):
158
+ class Arguments(RunnableArguments[None]):
159
159
  """
160
160
  Arguments for the presentation generation process.
161
161
  """
@@ -179,9 +179,7 @@ class MakePptArguments(BaseArguments):
179
179
  """Prompt for organizing slides into a presentation script"""
180
180
 
181
181
  # LLM Settings
182
- provider: str = (
183
- "openai:gpt-4.1" # Example: "openai:gpt-4o", "anthropic:claude-3-haiku-20240307", "google:gemini-1.5-flash"
184
- )
182
+ provider: str = "openai:gpt-4.1" # Example: "openai:gpt-4o", "anthropic:claude-3-haiku-20240307", "google:gemini-1.5-flash"
185
183
  """Name of the language model to use (provider:model_name)"""
186
184
 
187
185
  # Other settings
@@ -293,7 +291,7 @@ class GeneratedSlide(TypedDict):
293
291
  script: NotRequired[str]
294
292
 
295
293
 
296
- def run_presentation_agent(args: MakePptArguments):
294
+ def run_presentation_agent(args: Arguments):
297
295
  """Executes the presentation generation agent loop."""
298
296
 
299
297
  if args.verbose:
@@ -481,7 +479,7 @@ Remember to follow all instructions in the role prompt, especially regarding HTM
481
479
 
482
480
 
483
481
  def main() -> None:
484
- MakePptArguments().run()
482
+ Arguments().run()
485
483
 
486
484
 
487
485
  if __name__ == "__main__":
@@ -0,0 +1,137 @@
1
+ import json
2
+ import logging
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ from spargear import BaseArguments, RunnableArguments, SubcommandSpec
7
+
8
+ from chatterer import PlayWrightBot
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ # Define the default path location relative to this script file
14
+ DEFAULT_JSON_PATH = Path(__file__).resolve().parent / "session_state.json"
15
+
16
+
17
+ class ReadArgs(RunnableArguments[None]):
18
+ """Arguments for the 'read' subcommand."""
19
+
20
+ URL: str
21
+ """URL (potentially protected) to navigate to using the saved session."""
22
+ jsonpath: Path = DEFAULT_JSON_PATH
23
+ """Path to the session state JSON file to load."""
24
+
25
+ def run(self) -> None:
26
+ """
27
+ Loads the session state from the specified JSON file, then navigates
28
+ to a protected_url that normally requires login. If the stored session
29
+ is valid, it should open without re-entering credentials.
30
+
31
+ Correction: Loads the JSON content into a dict first to satisfy type hints.
32
+ """
33
+ url = self.URL
34
+ jsonpath = self.jsonpath
35
+ logger.info(f"Loading session from {jsonpath} and navigating to {url} ...")
36
+
37
+ if not jsonpath.exists():
38
+ logger.error(f"Session file not found at {jsonpath}")
39
+ sys.exit(1)
40
+
41
+ # Load the storage state from the JSON file into a dictionary
42
+ logger.info(f"Reading storage state content from {jsonpath} ...")
43
+ try:
44
+ with open(jsonpath, "r", encoding="utf-8") as f:
45
+ # This dictionary should match the 'StorageState' type expected by Playwright/chatterer
46
+ storage_state_dict = json.load(f)
47
+ except json.JSONDecodeError:
48
+ logger.error(f"Failed to decode JSON from {jsonpath}")
49
+ sys.exit(1)
50
+ except Exception as e:
51
+ logger.error(f"Error reading file {jsonpath}: {e}")
52
+ sys.exit(1)
53
+
54
+ logger.info("Launching browser with loaded session state...")
55
+ with PlayWrightBot(
56
+ playwright_launch_options={"headless": False},
57
+ # Pass the loaded dictionary, which should match the expected 'StorageState' type
58
+ playwright_persistency_options={"storage_state": storage_state_dict},
59
+ ) as bot:
60
+ bot.get_page(url)
61
+
62
+ logger.info("Press Enter in the console when you're done checking the protected page.")
63
+ input(" >> Press Enter to exit: ")
64
+
65
+ logger.info("Done! Browser is now closed.")
66
+
67
+
68
+ class WriteArgs(RunnableArguments[None]):
69
+ """Arguments for the 'write' subcommand."""
70
+
71
+ URL: str
72
+ """URL to navigate to for manual login."""
73
+ jsonpath: Path = DEFAULT_JSON_PATH
74
+ """Path to save the session state JSON file."""
75
+
76
+ def run(self) -> None:
77
+ """
78
+ Launches a non-headless browser and navigates to the login_url.
79
+ The user can manually log in, then press Enter in the console
80
+ to store the current session state into a JSON file.
81
+ """
82
+ url = self.URL
83
+ jsonpath = self.jsonpath
84
+ logger.info(f"Launching browser and navigating to {url} ... Please log in manually.")
85
+
86
+ # Ensure jsonpath directory exists
87
+ jsonpath.parent.mkdir(parents=True, exist_ok=True)
88
+
89
+ with PlayWrightBot(playwright_launch_options={"headless": False}) as bot:
90
+ bot.get_page(url)
91
+
92
+ logger.info("After completing the login in the browser, press Enter here to save the session.")
93
+ input(" >> Press Enter when ready: ")
94
+
95
+ # get_sync_browser() returns the BrowserContext internally
96
+ context = bot.get_sync_browser()
97
+
98
+ # Save the current session (cookies, localStorage) to a JSON file
99
+ logger.info(f"Saving storage state to {jsonpath} ...")
100
+ context.storage_state(path=jsonpath) # Pass Path object directly
101
+
102
+ logger.info("Done! Browser is now closed.")
103
+
104
+
105
+ class Arguments(BaseArguments):
106
+ """
107
+ A simple CLI tool for saving and using Playwright sessions via storage_state.
108
+ Uses spargear for declarative argument parsing.
109
+ """
110
+
111
+ read: SubcommandSpec[ReadArgs] = SubcommandSpec(
112
+ name="read",
113
+ argument_class=ReadArgs,
114
+ help="Use a saved session to view a protected page.",
115
+ description="Loads session state from the specified JSON file and navigates to the URL.",
116
+ )
117
+ write: SubcommandSpec[WriteArgs] = SubcommandSpec(
118
+ name="write",
119
+ argument_class=WriteArgs,
120
+ help="Save a new session by manually logging in.",
121
+ description="Launches a browser to the specified URL. Log in manually, then press Enter to save session state.",
122
+ )
123
+
124
+ def run(self) -> None:
125
+ """Parses arguments using spargear and executes the corresponding command."""
126
+ if isinstance(last_subcommand := self.last_command, RunnableArguments):
127
+ last_subcommand.run()
128
+ else:
129
+ self.get_parser().print_help()
130
+
131
+
132
+ def main() -> None:
133
+ Arguments().run()
134
+
135
+
136
+ if __name__ == "__main__":
137
+ main()