chatterer 0.1.22__py3-none-any.whl → 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,156 +0,0 @@
1
- import json
2
- import logging
3
- import sys
4
- from pathlib import Path
5
-
6
- from spargear import BaseArguments, SubcommandSpec
7
-
8
- from chatterer import PlayWrightBot
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
-
13
- # Define the default path location relative to this script file
14
- DEFAULT_JSON_PATH = Path(__file__).resolve().parent / "session_state.json"
15
-
16
-
17
- class ReadArgs(BaseArguments):
18
- """Arguments for the 'read' subcommand."""
19
-
20
- url: str
21
- """URL (potentially protected) to navigate to using the saved session."""
22
- jsonpath: Path = DEFAULT_JSON_PATH
23
- """Path to the session state JSON file to load."""
24
-
25
-
26
- class WriteArgs(BaseArguments):
27
- """Arguments for the 'write' subcommand."""
28
-
29
- url: str
30
- """URL to navigate to for manual login."""
31
- jsonpath: Path = DEFAULT_JSON_PATH
32
- """Path to save the session state JSON file."""
33
-
34
-
35
- class LoginWithPlaywrightArgs(BaseArguments):
36
- """
37
- A simple CLI tool for saving and using Playwright sessions via storage_state.
38
- Uses spargear for declarative argument parsing.
39
- """
40
-
41
- read: SubcommandSpec[ReadArgs] = SubcommandSpec(
42
- name="read",
43
- argument_class=ReadArgs,
44
- help="Use a saved session to view a protected page.",
45
- description="Loads session state from the specified JSON file and navigates to the URL.",
46
- )
47
- write: SubcommandSpec[WriteArgs] = SubcommandSpec(
48
- name="write",
49
- argument_class=WriteArgs,
50
- help="Save a new session by manually logging in.",
51
- description="Launches a browser to the specified URL. Log in manually, then press Enter to save session state.",
52
- )
53
-
54
- def run(self) -> None:
55
- """Parses arguments using spargear and executes the corresponding command."""
56
- try:
57
- if (read := self.read.argument_class).url:
58
- # Access attributes directly from the returned instance
59
- logger.info("Running READ command:")
60
- logger.info(f" URL: {read.url}")
61
- logger.info(f" JSON Path: {read.jsonpath}")
62
- read_session(url=read.url, jsonpath=read.jsonpath)
63
- elif (write := self.write.argument_class).url:
64
- # Access attributes directly from the returned instance
65
- logger.info("Running WRITE command:")
66
- logger.info(f" URL: {write.url}")
67
- logger.info(f" JSON Path: {write.jsonpath}")
68
- write_session(url=write.url, jsonpath=write.jsonpath)
69
- else:
70
- logger.error("No valid subcommand provided. Use 'read' or 'write'.")
71
- sys.exit(1)
72
-
73
- except SystemExit as e:
74
- # Handle cases like -h/--help or argparse errors that exit
75
- sys.exit(e.code)
76
- except Exception as e:
77
- logger.error(f"\nAn error occurred: {e}")
78
- # from traceback import print_exc # Uncomment for full traceback
79
- # print_exc() # Uncomment for full traceback
80
- sys.exit(1)
81
-
82
-
83
- def read_session(url: str, jsonpath: Path) -> None:
84
- """
85
- Loads the session state from the specified JSON file, then navigates
86
- to a protected_url that normally requires login. If the stored session
87
- is valid, it should open without re-entering credentials.
88
-
89
- Correction: Loads the JSON content into a dict first to satisfy type hints.
90
- """
91
- logger.info(f"Loading session from {jsonpath} and navigating to {url} ...")
92
-
93
- if not jsonpath.exists():
94
- logger.error(f"Session file not found at {jsonpath}")
95
- sys.exit(1)
96
-
97
- # Load the storage state from the JSON file into a dictionary
98
- logger.info(f"Reading storage state content from {jsonpath} ...")
99
- try:
100
- with open(jsonpath, "r", encoding="utf-8") as f:
101
- # This dictionary should match the 'StorageState' type expected by Playwright/chatterer
102
- storage_state_dict = json.load(f)
103
- except json.JSONDecodeError:
104
- logger.error(f"Failed to decode JSON from {jsonpath}")
105
- sys.exit(1)
106
- except Exception as e:
107
- logger.error(f"Error reading file {jsonpath}: {e}")
108
- sys.exit(1)
109
-
110
- logger.info("Launching browser with loaded session state...")
111
- with PlayWrightBot(
112
- playwright_launch_options={"headless": False},
113
- # Pass the loaded dictionary, which should match the expected 'StorageState' type
114
- playwright_persistency_options={"storage_state": storage_state_dict},
115
- ) as bot:
116
- bot.get_page(url)
117
-
118
- logger.info("Press Enter in the console when you're done checking the protected page.")
119
- input(" >> Press Enter to exit: ")
120
-
121
- logger.info("Done! Browser is now closed.")
122
-
123
-
124
- def write_session(url: str, jsonpath: Path) -> None:
125
- """
126
- Launches a non-headless browser and navigates to the login_url.
127
- The user can manually log in, then press Enter in the console
128
- to store the current session state into a JSON file.
129
- """
130
- logger.info(f"Launching browser and navigating to {url} ... Please log in manually.")
131
-
132
- # Ensure jsonpath directory exists
133
- jsonpath.parent.mkdir(parents=True, exist_ok=True)
134
-
135
- with PlayWrightBot(playwright_launch_options={"headless": False}) as bot:
136
- bot.get_page(url)
137
-
138
- logger.info("After completing the login in the browser, press Enter here to save the session.")
139
- input(" >> Press Enter when ready: ")
140
-
141
- # get_sync_browser() returns the BrowserContext internally
142
- context = bot.get_sync_browser()
143
-
144
- # Save the current session (cookies, localStorage) to a JSON file
145
- logger.info(f"Saving storage state to {jsonpath} ...")
146
- context.storage_state(path=jsonpath) # Pass Path object directly
147
-
148
- logger.info("Done! Browser is now closed.")
149
-
150
-
151
- def main() -> None:
152
- LoginWithPlaywrightArgs().run()
153
-
154
-
155
- if __name__ == "__main__":
156
- main()
@@ -1,77 +0,0 @@
1
- import logging
2
- import sys
3
- from pathlib import Path
4
- from typing import Optional
5
-
6
- from spargear import ArgumentSpec, BaseArguments
7
-
8
- from chatterer import Chatterer, PdfToMarkdown
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
-
13
- class PdfToMarkdownArgs(BaseArguments):
14
- input: str
15
- """Input PDF file or directory containing PDF files to convert to markdown."""
16
- output: Optional[str] = None
17
- """Output path. For a file, path to the output markdown file. For a directory, output directory for .md files."""
18
- """Chatterer instance for communication."""
19
- page: Optional[str] = None
20
- """Zero-based page indices to convert (e.g., '0,2,4-8')."""
21
- recursive: bool = False
22
- """If input is a directory, search for PDFs recursively."""
23
- chatterer: ArgumentSpec[Chatterer] = ArgumentSpec(
24
- ["--chatterer"],
25
- default_factory=lambda: Chatterer.from_provider("google:gemini-2.5-flash-preview-05-20"),
26
- help="Chatterer instance for communication.",
27
- type=Chatterer.from_provider,
28
- )
29
-
30
- def run(self) -> list[dict[str, str]]:
31
- input = Path(self.input).resolve()
32
- pdf_files: list[Path] = []
33
- is_dir = False
34
- if input.is_file():
35
- if input.suffix.lower() != ".pdf":
36
- sys.exit(1)
37
- pdf_files.append(input)
38
- elif input.is_dir():
39
- is_dir = True
40
- pattern = "*.pdf"
41
- pdf_files = sorted([
42
- f for f in (input.rglob(pattern) if self.recursive else input.glob(pattern)) if f.is_file()
43
- ])
44
- if not pdf_files:
45
- sys.exit(0)
46
- else:
47
- sys.exit(1)
48
- if self.output:
49
- out_base = Path(self.output).resolve()
50
- elif is_dir:
51
- out_base = input
52
- else:
53
- out_base = input.with_suffix(".md")
54
-
55
- if is_dir:
56
- out_base.mkdir(parents=True, exist_ok=True)
57
- else:
58
- out_base.parent.mkdir(parents=True, exist_ok=True)
59
-
60
- converter = PdfToMarkdown(chatterer=self.chatterer.unwrap())
61
- results: list[dict[str, str]] = []
62
- for pdf in pdf_files:
63
- output: Path = (out_base / (pdf.stem + ".md")) if is_dir else out_base
64
- md: str = converter.convert(pdf_input=str(pdf), page_indices=self.page)
65
- output.parent.mkdir(parents=True, exist_ok=True)
66
- output.write_text(md, encoding="utf-8")
67
- results.append({"input": pdf.as_posix(), "output": output.as_posix(), "result": md})
68
- logger.info(f"Converted {len(pdf_files)} PDF(s) to markdown and saved to `{out_base}`.")
69
- return results
70
-
71
-
72
- def main() -> None:
73
- PdfToMarkdownArgs().run()
74
-
75
-
76
- if __name__ == "__main__":
77
- main()
@@ -1,10 +0,0 @@
1
- [console_scripts]
2
- anything-to-markdown = chatterer.examples.anything_to_markdown:main
3
- get-code-snippets = chatterer.examples.get_code_snippets:main
4
- login-with-playwright = chatterer.examples.login_with_playwright:main
5
- make-ppt = chatterer.examples.make_ppt:main
6
- pdf-to-markdown = chatterer.examples.pdf_to_markdown:main
7
- pdf-to-text = chatterer.examples.pdf_to_text:main
8
- transcription-api = chatterer.examples.transcription_api:main
9
- upstage-parser = chatterer.examples.upstage_parser:main
10
- webpage-to-markdown = chatterer.examples.webpage_to_markdown:main