chatterer 0.1.17__tar.gz → 0.1.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {chatterer-0.1.17 → chatterer-0.1.18}/PKG-INFO +3 -3
  2. chatterer-0.1.18/chatterer/examples/anything_to_markdown.py +91 -0
  3. chatterer-0.1.18/chatterer/examples/get_code_snippets.py +62 -0
  4. chatterer-0.1.18/chatterer/examples/login_with_playwright.py +167 -0
  5. chatterer-0.1.18/chatterer/examples/make_ppt.py +497 -0
  6. chatterer-0.1.18/chatterer/examples/pdf_to_markdown.py +107 -0
  7. chatterer-0.1.18/chatterer/examples/pdf_to_text.py +56 -0
  8. chatterer-0.1.18/chatterer/examples/transcription_api.py +123 -0
  9. chatterer-0.1.18/chatterer/examples/upstage_parser.py +100 -0
  10. chatterer-0.1.18/chatterer/examples/webpage_to_markdown.py +79 -0
  11. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/interactive.py +4 -4
  12. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer.egg-info/PKG-INFO +3 -3
  13. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer.egg-info/SOURCES.txt +9 -0
  14. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer.egg-info/requires.txt +2 -2
  15. {chatterer-0.1.17 → chatterer-0.1.18}/pyproject.toml +9 -12
  16. {chatterer-0.1.17 → chatterer-0.1.18}/README.md +0 -0
  17. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/__init__.py +0 -0
  18. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/common_types/__init__.py +0 -0
  19. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/common_types/io.py +0 -0
  20. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/language_model.py +0 -0
  21. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/messages.py +0 -0
  22. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/py.typed +0 -0
  23. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/strategies/__init__.py +0 -0
  24. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/strategies/atom_of_thoughts.py +0 -0
  25. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/strategies/base.py +0 -0
  26. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/tools/__init__.py +0 -0
  27. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/tools/caption_markdown_images.py +0 -0
  28. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/tools/citation_chunking/__init__.py +0 -0
  29. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/tools/citation_chunking/chunks.py +0 -0
  30. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/tools/citation_chunking/citation_chunker.py +0 -0
  31. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/tools/citation_chunking/citations.py +0 -0
  32. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/tools/citation_chunking/prompt.py +0 -0
  33. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/tools/citation_chunking/reference.py +0 -0
  34. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/tools/citation_chunking/utils.py +0 -0
  35. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/tools/convert_pdf_to_markdown.py +0 -0
  36. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/tools/convert_to_text.py +0 -0
  37. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/tools/upstage_document_parser.py +0 -0
  38. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/tools/webpage_to_markdown.py +0 -0
  39. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/tools/youtube.py +0 -0
  40. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/utils/__init__.py +0 -0
  41. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/utils/base64_image.py +0 -0
  42. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/utils/bytesio.py +0 -0
  43. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/utils/code_agent.py +0 -0
  44. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer/utils/imghdr.py +0 -0
  45. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer.egg-info/dependency_links.txt +0 -0
  46. {chatterer-0.1.17 → chatterer-0.1.18}/chatterer.egg-info/top_level.txt +0 -0
  47. {chatterer-0.1.17 → chatterer-0.1.18}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatterer
3
- Version: 0.1.17
3
+ Version: 0.1.18
4
4
  Summary: The highest-level interface for various LLM APIs.
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
@@ -10,11 +10,11 @@ Requires-Dist: langchain-openai>=0.3.11
10
10
  Requires-Dist: pillow>=11.1.0
11
11
  Requires-Dist: regex>=2024.11.6
12
12
  Requires-Dist: rich>=13.9.4
13
+ Requires-Dist: colorama>=0.4.6
14
+ Requires-Dist: spargear>=0.1.6
13
15
  Provides-Extra: dev
14
16
  Requires-Dist: neo4j-extension>=0.1.14; extra == "dev"
15
- Requires-Dist: colorama>=0.4.6; extra == "dev"
16
17
  Requires-Dist: ipykernel>=6.29.5; extra == "dev"
17
- Requires-Dist: spargear>=0.1.4; extra == "dev"
18
18
  Provides-Extra: conversion
19
19
  Requires-Dist: youtube-transcript-api>=1.0.3; extra == "conversion"
20
20
  Requires-Dist: chatterer[browser]; extra == "conversion"
@@ -0,0 +1,91 @@
1
+ def resolve_import_path_and_get_logger():
2
+ # ruff: noqa: E402
3
+ import logging
4
+ import sys
5
+
6
+ if __name__ == "__main__" and "." not in sys.path:
7
+ sys.path.append(".")
8
+
9
+ logger = logging.getLogger(__name__)
10
+ return logger
11
+
12
+
13
+ logger = resolve_import_path_and_get_logger()
14
+ from pathlib import Path
15
+ from typing import Optional, TypedDict
16
+
17
+ import openai
18
+ from spargear import ArgumentSpec, BaseArguments
19
+
20
+ from chatterer import anything_to_markdown
21
+
22
+
23
+ class AnythingToMarkdownReturns(TypedDict):
24
+ in_path: str
25
+ out_path: Optional[str]
26
+ out_text: str
27
+
28
+
29
+ class AnythingToMarkdownArguments(BaseArguments):
30
+ """Command line arguments for converting various file types to markdown."""
31
+
32
+ in_path: ArgumentSpec[str] = ArgumentSpec(["in-path"], help="Input file to convert to markdown")
33
+ out_path: Optional[str] = None
34
+ """Output path for the converted markdown file. If not provided, the input file's suffix is replaced with .md"""
35
+ model: Optional[str] = None
36
+ """OpenAI Model to use for conversion"""
37
+ api_key: Optional[str] = None
38
+ """API key for OpenAI API"""
39
+ base_url: Optional[str] = None
40
+ """Base URL for OpenAI API"""
41
+ style_map: Optional[str] = None
42
+ """Output style map"""
43
+ exiftool_path: Optional[str] = None
44
+ """"Path to exiftool for metadata extraction"""
45
+ docintel_endpoint: Optional[str] = None
46
+ "Document Intelligence API endpoint"
47
+ prevent_save_file: bool = False
48
+ """Prevent saving the converted file to disk."""
49
+ encoding: str = "utf-8"
50
+ """Encoding for the output file."""
51
+
52
+ def run(self) -> AnythingToMarkdownReturns:
53
+ in_path = self.in_path.unwrap()
54
+ if not self.prevent_save_file:
55
+ if not self.out_path:
56
+ out_path = Path(in_path).with_suffix(".md")
57
+ else:
58
+ out_path = Path(self.out_path)
59
+ else:
60
+ out_path = None
61
+
62
+ if self.model:
63
+ llm_client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url)
64
+ llm_model = self.model
65
+ else:
66
+ llm_client = None
67
+ llm_model = None
68
+
69
+ text: str = anything_to_markdown(
70
+ in_path,
71
+ llm_client=llm_client,
72
+ llm_model=llm_model,
73
+ style_map=self.style_map,
74
+ exiftool_path=self.exiftool_path,
75
+ docintel_endpoint=self.docintel_endpoint,
76
+ )
77
+ if out_path:
78
+ out_path.parent.mkdir(parents=True, exist_ok=True)
79
+ out_path.write_text(text, encoding=self.encoding)
80
+ logger.info(f"Converted `{in_path}` to markdown and saved to `{out_path}`.")
81
+ else:
82
+ logger.info(f"Converted `{in_path}` to markdown.")
83
+ return {
84
+ "in_path": in_path,
85
+ "out_path": str(out_path) if out_path is not None else None,
86
+ "out_text": text,
87
+ }
88
+
89
+
90
+ if __name__ == "__main__":
91
+ AnythingToMarkdownArguments().run()
@@ -0,0 +1,62 @@
1
+ def resolve_import_path_and_get_logger():
2
+ # ruff: noqa: E402
3
+ import logging
4
+ import sys
5
+
6
+ if __name__ == "__main__" and "." not in sys.path:
7
+ sys.path.append(".")
8
+
9
+ logger = logging.getLogger(__name__)
10
+ return logger
11
+
12
+
13
+ logger = resolve_import_path_and_get_logger()
14
+ from pathlib import Path
15
+ from typing import Optional
16
+
17
+ from spargear import ArgumentSpec, BaseArguments
18
+
19
+ from chatterer import CodeSnippets
20
+
21
+
22
+ class GetCodeSnippetsArgs(BaseArguments):
23
+ path_or_pkgname: ArgumentSpec[str] = ArgumentSpec(
24
+ ["path_or_pkgname"], help="Path to the package or file from which to extract code snippets."
25
+ )
26
+ out_path: Optional[str] = None
27
+ ban_file_patterns: list[str] = [".venv/*", Path(__file__).relative_to(Path.cwd()).as_posix()]
28
+ """List of file patterns to ignore."""
29
+ glob_patterns: list[str] = ["*.py"]
30
+ """List of glob patterns to include."""
31
+ case_sensitive: bool = False
32
+ """Enable case-sensitive matching for glob patterns."""
33
+ prevent_save_file: bool = False
34
+ """Prevent saving the extracted code snippets to a file."""
35
+
36
+ def run(self) -> CodeSnippets:
37
+ path_or_pkgname = self.path_or_pkgname.unwrap()
38
+ if not self.prevent_save_file:
39
+ if not self.out_path:
40
+ out_path = Path(__file__).with_suffix(".txt")
41
+ else:
42
+ out_path = Path(self.out_path)
43
+ else:
44
+ out_path = None
45
+
46
+ cs = CodeSnippets.from_path_or_pkgname(
47
+ path_or_pkgname=path_or_pkgname,
48
+ ban_file_patterns=self.ban_file_patterns,
49
+ glob_patterns=self.glob_patterns,
50
+ case_sensitive=self.case_sensitive,
51
+ )
52
+ if out_path is not None:
53
+ out_path.parent.mkdir(parents=True, exist_ok=True)
54
+ out_path.write_text(cs.snippets_text, encoding="utf-8")
55
+ logger.info(f"Extracted code snippets from `{path_or_pkgname}` and saved to `{out_path}`.")
56
+ else:
57
+ logger.info(f"Extracted code snippets from `{path_or_pkgname}`.")
58
+ return cs
59
+
60
+
61
+ if __name__ == "__main__":
62
+ GetCodeSnippetsArgs().run()
@@ -0,0 +1,167 @@
1
+ def resolve_import_path_and_get_logger():
2
+ # ruff: noqa: E402
3
+ import logging
4
+ import sys
5
+
6
+ if __name__ == "__main__" and "." not in sys.path:
7
+ sys.path.append(".")
8
+
9
+ logger = logging.getLogger(__name__)
10
+ return logger
11
+
12
+
13
+ logger = resolve_import_path_and_get_logger()
14
+ import json
15
+ import sys
16
+ from pathlib import Path
17
+
18
+ from spargear import BaseArguments, SubcommandSpec
19
+
20
+ from chatterer import PlayWrightBot
21
+
22
+
23
+ def read_session(url: str, jsonpath: Path) -> None:
24
+ """
25
+ Loads the session state from the specified JSON file, then navigates
26
+ to a protected_url that normally requires login. If the stored session
27
+ is valid, it should open without re-entering credentials.
28
+
29
+ Correction: Loads the JSON content into a dict first to satisfy type hints.
30
+ """
31
+ logger.info(f"Loading session from {jsonpath} and navigating to {url} ...")
32
+
33
+ if not jsonpath.exists():
34
+ logger.error(f"Session file not found at {jsonpath}")
35
+ sys.exit(1)
36
+
37
+ # Load the storage state from the JSON file into a dictionary
38
+ logger.info(f"Reading storage state content from {jsonpath} ...")
39
+ try:
40
+ with open(jsonpath, "r", encoding="utf-8") as f:
41
+ # This dictionary should match the 'StorageState' type expected by Playwright/chatterer
42
+ storage_state_dict = json.load(f)
43
+ except json.JSONDecodeError:
44
+ logger.error(f"Failed to decode JSON from {jsonpath}")
45
+ sys.exit(1)
46
+ except Exception as e:
47
+ logger.error(f"Error reading file {jsonpath}: {e}")
48
+ sys.exit(1)
49
+
50
+ logger.info("Launching browser with loaded session state...")
51
+ with PlayWrightBot(
52
+ playwright_launch_options={"headless": False},
53
+ # Pass the loaded dictionary, which should match the expected 'StorageState' type
54
+ playwright_persistency_options={"storage_state": storage_state_dict},
55
+ ) as bot:
56
+ bot.get_page(url)
57
+
58
+ logger.info("Press Enter in the console when you're done checking the protected page.")
59
+ input(" >> Press Enter to exit: ")
60
+
61
+ logger.info("Done! Browser is now closed.")
62
+
63
+
64
+ def write_session(url: str, jsonpath: Path) -> None:
65
+ """
66
+ Launches a non-headless browser and navigates to the login_url.
67
+ The user can manually log in, then press Enter in the console
68
+ to store the current session state into a JSON file.
69
+ """
70
+ logger.info(f"Launching browser and navigating to {url} ... Please log in manually.")
71
+
72
+ # Ensure jsonpath directory exists
73
+ jsonpath.parent.mkdir(parents=True, exist_ok=True)
74
+
75
+ with PlayWrightBot(playwright_launch_options={"headless": False}) as bot:
76
+ bot.get_page(url)
77
+
78
+ logger.info("After completing the login in the browser, press Enter here to save the session.")
79
+ input(" >> Press Enter when ready: ")
80
+
81
+ # get_sync_browser() returns the BrowserContext internally
82
+ context = bot.get_sync_browser()
83
+
84
+ # Save the current session (cookies, localStorage) to a JSON file
85
+ logger.info(f"Saving storage state to {jsonpath} ...")
86
+ context.storage_state(path=jsonpath) # Pass Path object directly
87
+
88
+ logger.info("Done! Browser is now closed.")
89
+
90
+
91
+ # --- Spargear Declarative CLI Definition ---
92
+
93
+ # Define the default path location relative to this script file
94
+ DEFAULT_JSON_PATH = Path(__file__).resolve().parent / "session_state.json"
95
+
96
+
97
+ class ReadArgs(BaseArguments):
98
+ """Arguments for the 'read' subcommand."""
99
+
100
+ url: str
101
+ """URL (potentially protected) to navigate to using the saved session."""
102
+ jsonpath: Path = DEFAULT_JSON_PATH
103
+ """Path to the session state JSON file to load."""
104
+
105
+
106
+ class WriteArgs(BaseArguments):
107
+ """Arguments for the 'write' subcommand."""
108
+
109
+ url: str
110
+ """URL to navigate to for manual login."""
111
+ jsonpath: Path = DEFAULT_JSON_PATH
112
+ """Path to save the session state JSON file."""
113
+
114
+
115
+ class LoginWithPlaywrightArgs(BaseArguments):
116
+ """
117
+ A simple CLI tool for saving and using Playwright sessions via storage_state.
118
+ Uses spargear for declarative argument parsing.
119
+ """
120
+
121
+ read: SubcommandSpec[ReadArgs] = SubcommandSpec(
122
+ name="read",
123
+ argument_class=ReadArgs,
124
+ help="Use a saved session to view a protected page.",
125
+ description="Loads session state from the specified JSON file and navigates to the URL.",
126
+ )
127
+ write: SubcommandSpec[WriteArgs] = SubcommandSpec(
128
+ name="write",
129
+ argument_class=WriteArgs,
130
+ help="Save a new session by manually logging in.",
131
+ description="Launches a browser to the specified URL. Log in manually, then press Enter to save session state.",
132
+ )
133
+
134
+ def run(self) -> None:
135
+ """Parses arguments using spargear and executes the corresponding command."""
136
+ try:
137
+ if (read := self.read.argument_class).url:
138
+ # Access attributes directly from the returned instance
139
+ logger.info("Running READ command:")
140
+ logger.info(f" URL: {read.url}")
141
+ logger.info(f" JSON Path: {read.jsonpath}")
142
+ read_session(url=read.url, jsonpath=read.jsonpath)
143
+ elif (write := self.write.argument_class).url:
144
+ # Access attributes directly from the returned instance
145
+ logger.info("Running WRITE command:")
146
+ logger.info(f" URL: {write.url}")
147
+ logger.info(f" JSON Path: {write.jsonpath}")
148
+ write_session(url=write.url, jsonpath=write.jsonpath)
149
+ else:
150
+ logger.error("No valid subcommand provided. Use 'read' or 'write'.")
151
+ sys.exit(1)
152
+
153
+ except SystemExit as e:
154
+ # Handle cases like -h/--help or argparse errors that exit
155
+ sys.exit(e.code)
156
+ except Exception as e:
157
+ logger.error(f"\nAn error occurred: {e}")
158
+ # from traceback import print_exc # Uncomment for full traceback
159
+ # print_exc() # Uncomment for full traceback
160
+ sys.exit(1)
161
+
162
+
163
+ # --- Main Execution Logic ---
164
+
165
+
166
+ if __name__ == "__main__":
167
+ LoginWithPlaywrightArgs().run()