chatterer 0.1.26__py3-none-any.whl → 0.1.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. chatterer/__init__.py +87 -87
  2. chatterer/common_types/__init__.py +21 -21
  3. chatterer/common_types/io.py +19 -19
  4. chatterer/constants.py +5 -0
  5. chatterer/examples/__main__.py +75 -75
  6. chatterer/examples/any2md.py +83 -85
  7. chatterer/examples/pdf2md.py +231 -338
  8. chatterer/examples/pdf2txt.py +52 -54
  9. chatterer/examples/ppt.py +487 -486
  10. chatterer/examples/pw.py +141 -143
  11. chatterer/examples/snippet.py +54 -56
  12. chatterer/examples/transcribe.py +192 -192
  13. chatterer/examples/upstage.py +87 -89
  14. chatterer/examples/web2md.py +80 -80
  15. chatterer/interactive.py +422 -354
  16. chatterer/language_model.py +530 -536
  17. chatterer/messages.py +21 -21
  18. chatterer/tools/__init__.py +46 -46
  19. chatterer/tools/caption_markdown_images.py +388 -384
  20. chatterer/tools/citation_chunking/__init__.py +3 -3
  21. chatterer/tools/citation_chunking/chunks.py +51 -53
  22. chatterer/tools/citation_chunking/citation_chunker.py +117 -118
  23. chatterer/tools/citation_chunking/citations.py +284 -285
  24. chatterer/tools/citation_chunking/prompt.py +157 -157
  25. chatterer/tools/citation_chunking/reference.py +26 -26
  26. chatterer/tools/citation_chunking/utils.py +138 -138
  27. chatterer/tools/convert_pdf_to_markdown.py +634 -645
  28. chatterer/tools/convert_to_text.py +446 -446
  29. chatterer/tools/upstage_document_parser.py +704 -705
  30. chatterer/tools/webpage_to_markdown.py +739 -739
  31. chatterer/tools/youtube.py +146 -147
  32. chatterer/utils/__init__.py +15 -15
  33. chatterer/utils/base64_image.py +349 -350
  34. chatterer/utils/bytesio.py +59 -59
  35. chatterer/utils/code_agent.py +237 -237
  36. chatterer/utils/imghdr.py +145 -145
  37. {chatterer-0.1.26.dist-info → chatterer-0.1.28.dist-info}/METADATA +377 -390
  38. chatterer-0.1.28.dist-info/RECORD +43 -0
  39. chatterer-0.1.26.dist-info/RECORD +0 -42
  40. {chatterer-0.1.26.dist-info → chatterer-0.1.28.dist-info}/WHEEL +0 -0
  41. {chatterer-0.1.26.dist-info → chatterer-0.1.28.dist-info}/entry_points.txt +0 -0
  42. {chatterer-0.1.26.dist-info → chatterer-0.1.28.dist-info}/top_level.txt +0 -0
chatterer/examples/pw.py CHANGED
@@ -1,143 +1,141 @@
1
- import json
2
- import logging
3
- import sys
4
- from pathlib import Path
5
-
6
- from spargear import ArgumentSpec, BaseArguments, RunnableArguments, SubcommandSpec
7
-
8
- from chatterer import PlayWrightBot
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
-
13
- def generate_json_path() -> Path:
14
- return Path("session_state.json").resolve()
15
-
16
-
17
- class ReadArgs(RunnableArguments[None]):
18
- """Arguments for the 'read' subcommand."""
19
-
20
- URL: str
21
- """URL (potentially protected) to navigate to using the saved session."""
22
- json: ArgumentSpec[Path] = ArgumentSpec(
23
- ["--json", "-j"],
24
- default_factory=generate_json_path,
25
- help="Path to the session state JSON file to load.",
26
- )
27
-
28
- def run(self) -> None:
29
- """
30
- Loads the session state from the specified JSON file, then navigates
31
- to a protected_url that normally requires login. If the stored session
32
- is valid, it should open without re-entering credentials.
33
-
34
- Correction: Loads the JSON content into a dict first to satisfy type hints.
35
- """
36
- url = self.URL
37
- jsonpath = self.json.unwrap()
38
- logger.info(f"Loading session from {jsonpath} and navigating to {url} ...")
39
-
40
- if not jsonpath.exists():
41
- logger.error(f"Session file not found at {jsonpath}")
42
- sys.exit(1)
43
-
44
- # Load the storage state from the JSON file into a dictionary
45
- logger.info(f"Reading storage state content from {jsonpath} ...")
46
- try:
47
- with open(jsonpath, "r", encoding="utf-8") as f:
48
- # This dictionary should match the 'StorageState' type expected by Playwright/chatterer
49
- storage_state_dict = json.load(f)
50
- except json.JSONDecodeError:
51
- logger.error(f"Failed to decode JSON from {jsonpath}")
52
- sys.exit(1)
53
- except Exception as e:
54
- logger.error(f"Error reading file {jsonpath}: {e}")
55
- sys.exit(1)
56
-
57
- logger.info("Launching browser with loaded session state...")
58
- with PlayWrightBot(
59
- playwright_launch_options={"headless": False},
60
- # Pass the loaded dictionary, which should match the expected 'StorageState' type
61
- playwright_persistency_options={"storage_state": storage_state_dict},
62
- ) as bot:
63
- bot.get_page(url)
64
-
65
- logger.info("Press Enter in the console when you're done checking the protected page.")
66
- input(" >> Press Enter to exit: ")
67
-
68
- logger.info("Done! Browser is now closed.")
69
-
70
-
71
- class WriteArgs(RunnableArguments[None]):
72
- """Arguments for the 'write' subcommand."""
73
-
74
- URL: str
75
- """URL to navigate to for manual login."""
76
- json: ArgumentSpec[Path] = ArgumentSpec(
77
- ["--json", "-j"],
78
- default_factory=generate_json_path,
79
- help="Path to save the session state JSON file.",
80
- )
81
-
82
- def run(self) -> None:
83
- """
84
- Launches a non-headless browser and navigates to the login_url.
85
- The user can manually log in, then press Enter in the console
86
- to store the current session state into a JSON file.
87
- """
88
- url = self.URL
89
- jsonpath = self.json.unwrap()
90
- logger.info(f"Launching browser and navigating to {url} ... Please log in manually.")
91
-
92
- # Ensure jsonpath directory exists
93
- jsonpath.parent.mkdir(parents=True, exist_ok=True)
94
-
95
- with PlayWrightBot(playwright_launch_options={"headless": False}) as bot:
96
- bot.get_page(url)
97
-
98
- logger.info("After completing the login in the browser, press Enter here to save the session.")
99
- input(" >> Press Enter when ready: ")
100
-
101
- # get_sync_browser() returns the BrowserContext internally
102
- context = bot.get_sync_browser()
103
-
104
- # Save the current session (cookies, localStorage) to a JSON file
105
- logger.info(f"Saving storage state to {jsonpath} ...")
106
- context.storage_state(path=jsonpath) # Pass Path object directly
107
-
108
- logger.info("Done! Browser is now closed.")
109
-
110
-
111
- class Arguments(BaseArguments):
112
- """
113
- A simple CLI tool for saving and using Playwright sessions via storage_state.
114
- Uses spargear for declarative argument parsing.
115
- """
116
-
117
- read: SubcommandSpec[ReadArgs] = SubcommandSpec(
118
- name="read",
119
- argument_class=ReadArgs,
120
- help="Use a saved session to view a protected page.",
121
- description="Loads session state from the specified JSON file and navigates to the URL.",
122
- )
123
- write: SubcommandSpec[WriteArgs] = SubcommandSpec(
124
- name="write",
125
- argument_class=WriteArgs,
126
- help="Save a new session by manually logging in.",
127
- description="Launches a browser to the specified URL. Log in manually, then press Enter to save session state.",
128
- )
129
-
130
- def run(self) -> None:
131
- """Parses arguments using spargear and executes the corresponding command."""
132
- if isinstance(last_subcommand := self.last_command, RunnableArguments):
133
- last_subcommand.run()
134
- else:
135
- self.get_parser().print_help()
136
-
137
-
138
- def main() -> None:
139
- Arguments().run()
140
-
141
-
142
- if __name__ == "__main__":
143
- main()
1
+ import json
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ from loguru import logger
6
+ from spargear import ArgumentSpec, BaseArguments, RunnableArguments, SubcommandSpec
7
+
8
+ from chatterer import PlayWrightBot
9
+
10
+
11
+ def generate_json_path() -> Path:
12
+ return Path("session_state.json").resolve()
13
+
14
+
15
+ class ReadArgs(RunnableArguments[None]):
16
+ """Arguments for the 'read' subcommand."""
17
+
18
+ URL: str
19
+ """URL (potentially protected) to navigate to using the saved session."""
20
+ json: ArgumentSpec[Path] = ArgumentSpec(
21
+ ["--json", "-j"],
22
+ default_factory=generate_json_path,
23
+ help="Path to the session state JSON file to load.",
24
+ )
25
+
26
+ def run(self) -> None:
27
+ """
28
+ Loads the session state from the specified JSON file, then navigates
29
+ to a protected_url that normally requires login. If the stored session
30
+ is valid, it should open without re-entering credentials.
31
+
32
+ Correction: Loads the JSON content into a dict first to satisfy type hints.
33
+ """
34
+ url = self.URL
35
+ jsonpath = self.json.unwrap()
36
+ logger.info(f"Loading session from {jsonpath} and navigating to {url} ...")
37
+
38
+ if not jsonpath.exists():
39
+ logger.error(f"Session file not found at {jsonpath}")
40
+ sys.exit(1)
41
+
42
+ # Load the storage state from the JSON file into a dictionary
43
+ logger.info(f"Reading storage state content from {jsonpath} ...")
44
+ try:
45
+ with open(jsonpath, "r", encoding="utf-8") as f:
46
+ # This dictionary should match the 'StorageState' type expected by Playwright/chatterer
47
+ storage_state_dict = json.load(f)
48
+ except json.JSONDecodeError:
49
+ logger.error(f"Failed to decode JSON from {jsonpath}")
50
+ sys.exit(1)
51
+ except Exception as e:
52
+ logger.error(f"Error reading file {jsonpath}: {e}")
53
+ sys.exit(1)
54
+
55
+ logger.info("Launching browser with loaded session state...")
56
+ with PlayWrightBot(
57
+ playwright_launch_options={"headless": False},
58
+ # Pass the loaded dictionary, which should match the expected 'StorageState' type
59
+ playwright_persistency_options={"storage_state": storage_state_dict},
60
+ ) as bot:
61
+ bot.get_page(url)
62
+
63
+ logger.info("Press Enter in the console when you're done checking the protected page.")
64
+ input(" >> Press Enter to exit: ")
65
+
66
+ logger.info("Done! Browser is now closed.")
67
+
68
+
69
+ class WriteArgs(RunnableArguments[None]):
70
+ """Arguments for the 'write' subcommand."""
71
+
72
+ URL: str
73
+ """URL to navigate to for manual login."""
74
+ json: ArgumentSpec[Path] = ArgumentSpec(
75
+ ["--json", "-j"],
76
+ default_factory=generate_json_path,
77
+ help="Path to save the session state JSON file.",
78
+ )
79
+
80
+ def run(self) -> None:
81
+ """
82
+ Launches a non-headless browser and navigates to the login_url.
83
+ The user can manually log in, then press Enter in the console
84
+ to store the current session state into a JSON file.
85
+ """
86
+ url = self.URL
87
+ jsonpath = self.json.unwrap()
88
+ logger.info(f"Launching browser and navigating to {url} ... Please log in manually.")
89
+
90
+ # Ensure jsonpath directory exists
91
+ jsonpath.parent.mkdir(parents=True, exist_ok=True)
92
+
93
+ with PlayWrightBot(playwright_launch_options={"headless": False}) as bot:
94
+ bot.get_page(url)
95
+
96
+ logger.info("After completing the login in the browser, press Enter here to save the session.")
97
+ input(" >> Press Enter when ready: ")
98
+
99
+ # get_sync_browser() returns the BrowserContext internally
100
+ context = bot.get_sync_browser()
101
+
102
+ # Save the current session (cookies, localStorage) to a JSON file
103
+ logger.info(f"Saving storage state to {jsonpath} ...")
104
+ context.storage_state(path=jsonpath) # Pass Path object directly
105
+
106
+ logger.info("Done! Browser is now closed.")
107
+
108
+
109
+ class Arguments(BaseArguments):
110
+ """
111
+ A simple CLI tool for saving and using Playwright sessions via storage_state.
112
+ Uses spargear for declarative argument parsing.
113
+ """
114
+
115
+ read: SubcommandSpec[ReadArgs] = SubcommandSpec(
116
+ name="read",
117
+ argument_class=ReadArgs,
118
+ help="Use a saved session to view a protected page.",
119
+ description="Loads session state from the specified JSON file and navigates to the URL.",
120
+ )
121
+ write: SubcommandSpec[WriteArgs] = SubcommandSpec(
122
+ name="write",
123
+ argument_class=WriteArgs,
124
+ help="Save a new session by manually logging in.",
125
+ description="Launches a browser to the specified URL. Log in manually, then press Enter to save session state.",
126
+ )
127
+
128
+ def run(self) -> None:
129
+ """Parses arguments using spargear and executes the corresponding command."""
130
+ if isinstance(last_subcommand := self.last_command, RunnableArguments):
131
+ last_subcommand.run()
132
+ else:
133
+ self.get_parser().print_help()
134
+
135
+
136
+ def main() -> None:
137
+ Arguments().run()
138
+
139
+
140
+ if __name__ == "__main__":
141
+ main()
@@ -1,56 +1,54 @@
1
- import logging
2
- from datetime import datetime
3
- from pathlib import Path
4
- from typing import Optional
5
-
6
- from spargear import RunnableArguments
7
-
8
- from chatterer import CodeSnippets
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
-
13
- class Arguments(RunnableArguments[CodeSnippets]):
14
- PATH_OR_PACKAGE_NAME: str
15
- """Path to the package or file from which to extract code snippets."""
16
- output: Optional[str] = None
17
- """Output path for the extracted code snippets. If not provided, defaults to a file with the current timestamp."""
18
- ban_file_patterns: list[str] = [".venv/*", Path(__file__).relative_to(Path.cwd()).as_posix()]
19
- """List of file patterns to ignore."""
20
- glob_patterns: list[str] = ["*.py"]
21
- """List of glob patterns to include."""
22
- case_sensitive: bool = False
23
- """Enable case-sensitive matching for glob patterns."""
24
- prevent_save_file: bool = False
25
- """Prevent saving the extracted code snippets to a file."""
26
-
27
- def run(self) -> CodeSnippets:
28
- if not self.prevent_save_file:
29
- if not self.output:
30
- output = Path(datetime.now().strftime("%Y%m%d_%H%M%S") + "_snippets.txt")
31
- else:
32
- output = Path(self.output)
33
- else:
34
- output = None
35
-
36
- cs = CodeSnippets.from_path_or_pkgname(
37
- path_or_pkgname=self.PATH_OR_PACKAGE_NAME,
38
- ban_file_patterns=self.ban_file_patterns,
39
- glob_patterns=self.glob_patterns,
40
- case_sensitive=self.case_sensitive,
41
- )
42
- if output is not None:
43
- output.parent.mkdir(parents=True, exist_ok=True)
44
- output.write_text(cs.snippets_text, encoding="utf-8")
45
- logger.info(f"Extracted code snippets from `{self.PATH_OR_PACKAGE_NAME}` and saved to `{output}`.")
46
- else:
47
- logger.info(f"Extracted code snippets from `{self.PATH_OR_PACKAGE_NAME}`.")
48
- return cs
49
-
50
-
51
- def main() -> None:
52
- Arguments().run()
53
-
54
-
55
- if __name__ == "__main__":
56
- main()
1
+ from datetime import datetime
2
+ from pathlib import Path
3
+ from typing import Optional
4
+
5
+ from loguru import logger
6
+ from spargear import RunnableArguments
7
+
8
+ from chatterer import CodeSnippets
9
+
10
+
11
+ class Arguments(RunnableArguments[CodeSnippets]):
12
+ PATH_OR_PACKAGE_NAME: str
13
+ """Path to the package or file from which to extract code snippets."""
14
+ output: Optional[str] = None
15
+ """Output path for the extracted code snippets. If not provided, defaults to a file with the current timestamp."""
16
+ ban_file_patterns: list[str] = [".venv/*", Path(__file__).relative_to(Path.cwd()).as_posix()]
17
+ """List of file patterns to ignore."""
18
+ glob_patterns: list[str] = ["*.py"]
19
+ """List of glob patterns to include."""
20
+ case_sensitive: bool = False
21
+ """Enable case-sensitive matching for glob patterns."""
22
+ prevent_save_file: bool = False
23
+ """Prevent saving the extracted code snippets to a file."""
24
+
25
+ def run(self) -> CodeSnippets:
26
+ if not self.prevent_save_file:
27
+ if not self.output:
28
+ output = Path(datetime.now().strftime("%Y%m%d_%H%M%S") + "_snippets.txt")
29
+ else:
30
+ output = Path(self.output)
31
+ else:
32
+ output = None
33
+
34
+ cs = CodeSnippets.from_path_or_pkgname(
35
+ path_or_pkgname=self.PATH_OR_PACKAGE_NAME,
36
+ ban_file_patterns=self.ban_file_patterns,
37
+ glob_patterns=self.glob_patterns,
38
+ case_sensitive=self.case_sensitive,
39
+ )
40
+ if output is not None:
41
+ output.parent.mkdir(parents=True, exist_ok=True)
42
+ output.write_text(cs.snippets_text, encoding="utf-8")
43
+ logger.info(f"Extracted code snippets from `{self.PATH_OR_PACKAGE_NAME}` and saved to `{output}`.")
44
+ else:
45
+ logger.info(f"Extracted code snippets from `{self.PATH_OR_PACKAGE_NAME}`.")
46
+ return cs
47
+
48
+
49
+ def main() -> None:
50
+ Arguments().run()
51
+
52
+
53
+ if __name__ == "__main__":
54
+ main()