chatterer 0.1.23__py3-none-any.whl → 0.1.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatterer/examples/__main__.py +75 -0
- chatterer/examples/{anything_to_markdown.py → any2md.py} +9 -9
- chatterer/examples/{pdf_to_markdown.py → pdf2md.py} +5 -5
- chatterer/examples/{pdf_to_text.py → pdf2txt.py} +5 -5
- chatterer/examples/{make_ppt.py → ppt.py} +5 -7
- chatterer/examples/pw.py +137 -0
- chatterer/examples/{get_code_snippets.py → snippet.py} +7 -7
- chatterer/examples/{transcription_api.py → transcribe.py} +6 -6
- chatterer/examples/{upstage_parser.py → upstage.py} +17 -17
- chatterer/examples/{webpage_to_markdown.py → web2md.py} +8 -12
- chatterer/strategies/atom_of_thoughts.py +161 -161
- {chatterer-0.1.23.dist-info → chatterer-0.1.24.dist-info}/METADATA +6 -9
- {chatterer-0.1.23.dist-info → chatterer-0.1.24.dist-info}/RECORD +16 -15
- chatterer-0.1.24.dist-info/entry_points.txt +2 -0
- chatterer/examples/login_with_playwright.py +0 -156
- chatterer-0.1.23.dist-info/entry_points.txt +0 -10
- {chatterer-0.1.23.dist-info → chatterer-0.1.24.dist-info}/WHEEL +0 -0
- {chatterer-0.1.23.dist-info → chatterer-0.1.24.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,75 @@
|
|
1
|
+
from spargear import SubcommandArguments, SubcommandSpec
|
2
|
+
|
3
|
+
|
4
|
+
def any2md():
|
5
|
+
from .any2md import Arguments
|
6
|
+
|
7
|
+
return Arguments
|
8
|
+
|
9
|
+
|
10
|
+
def pdf2md():
|
11
|
+
from .pdf2md import Arguments
|
12
|
+
|
13
|
+
return Arguments
|
14
|
+
|
15
|
+
|
16
|
+
def pdf2txt():
|
17
|
+
from .pdf2txt import Arguments
|
18
|
+
|
19
|
+
return Arguments
|
20
|
+
|
21
|
+
|
22
|
+
def ppt():
|
23
|
+
from .ppt import Arguments
|
24
|
+
|
25
|
+
return Arguments
|
26
|
+
|
27
|
+
|
28
|
+
def pw():
|
29
|
+
from .pw import Arguments
|
30
|
+
|
31
|
+
return Arguments
|
32
|
+
|
33
|
+
|
34
|
+
def snippet():
|
35
|
+
from .snippet import Arguments
|
36
|
+
|
37
|
+
return Arguments
|
38
|
+
|
39
|
+
|
40
|
+
def transcribe():
|
41
|
+
from .transcribe import Arguments
|
42
|
+
|
43
|
+
return Arguments
|
44
|
+
|
45
|
+
|
46
|
+
def upstage():
|
47
|
+
from .upstage import Arguments
|
48
|
+
|
49
|
+
return Arguments
|
50
|
+
|
51
|
+
|
52
|
+
def web2md():
|
53
|
+
from .web2md import Arguments
|
54
|
+
|
55
|
+
return Arguments
|
56
|
+
|
57
|
+
|
58
|
+
class Arguments(SubcommandArguments):
|
59
|
+
any2md = SubcommandSpec(name="any2md", argument_class_factory=any2md)
|
60
|
+
pdf2md = SubcommandSpec(name="pdf2md", argument_class_factory=pdf2md)
|
61
|
+
pdf2txt = SubcommandSpec(name="pdf2txt", argument_class_factory=pdf2txt)
|
62
|
+
ppt = SubcommandSpec(name="ppt", argument_class_factory=ppt)
|
63
|
+
pw = SubcommandSpec(name="pw", argument_class_factory=pw)
|
64
|
+
snippet = SubcommandSpec(name="snippet", argument_class_factory=snippet)
|
65
|
+
transcribe = SubcommandSpec(name="transcribe", argument_class_factory=transcribe)
|
66
|
+
upstage = SubcommandSpec(name="upstage", argument_class_factory=upstage)
|
67
|
+
web2md = SubcommandSpec(name="web2md", argument_class_factory=web2md)
|
68
|
+
|
69
|
+
|
70
|
+
def main():
|
71
|
+
Arguments().execute()
|
72
|
+
|
73
|
+
|
74
|
+
if __name__ == "__main__":
|
75
|
+
main()
|
@@ -3,7 +3,7 @@ from pathlib import Path
|
|
3
3
|
from typing import Optional, TypedDict
|
4
4
|
|
5
5
|
import openai
|
6
|
-
from spargear import
|
6
|
+
from spargear import RunnableArguments
|
7
7
|
|
8
8
|
from chatterer import anything_to_markdown
|
9
9
|
|
@@ -16,10 +16,10 @@ class AnythingToMarkdownReturns(TypedDict):
|
|
16
16
|
out_text: str
|
17
17
|
|
18
18
|
|
19
|
-
class
|
19
|
+
class Arguments(RunnableArguments[AnythingToMarkdownReturns]):
|
20
20
|
"""Command line arguments for converting various file types to markdown."""
|
21
21
|
|
22
|
-
|
22
|
+
SOURCE: str
|
23
23
|
"""Input file to convert to markdown. Can be a file path or a URL."""
|
24
24
|
output: Optional[str] = None
|
25
25
|
"""Output path for the converted markdown file. If not provided, the input file's suffix is replaced with .md"""
|
@@ -43,7 +43,7 @@ class AnythingToMarkdownArguments(BaseArguments):
|
|
43
43
|
def run(self) -> AnythingToMarkdownReturns:
|
44
44
|
if not self.prevent_save_file:
|
45
45
|
if not self.output:
|
46
|
-
output = Path(self.
|
46
|
+
output = Path(self.SOURCE).with_suffix(".md")
|
47
47
|
else:
|
48
48
|
output = Path(self.output)
|
49
49
|
else:
|
@@ -57,7 +57,7 @@ class AnythingToMarkdownArguments(BaseArguments):
|
|
57
57
|
llm_model = None
|
58
58
|
|
59
59
|
text: str = anything_to_markdown(
|
60
|
-
self.
|
60
|
+
self.SOURCE,
|
61
61
|
llm_client=llm_client,
|
62
62
|
llm_model=llm_model,
|
63
63
|
style_map=self.style_map,
|
@@ -67,18 +67,18 @@ class AnythingToMarkdownArguments(BaseArguments):
|
|
67
67
|
if output:
|
68
68
|
output.parent.mkdir(parents=True, exist_ok=True)
|
69
69
|
output.write_text(text, encoding=self.encoding)
|
70
|
-
logger.info(f"Converted `{self.
|
70
|
+
logger.info(f"Converted `{self.SOURCE}` to markdown and saved to `{output}`.")
|
71
71
|
else:
|
72
|
-
logger.info(f"Converted `{self.
|
72
|
+
logger.info(f"Converted `{self.SOURCE}` to markdown.")
|
73
73
|
return {
|
74
|
-
"input": self.
|
74
|
+
"input": self.SOURCE,
|
75
75
|
"output": str(output) if output is not None else None,
|
76
76
|
"out_text": text,
|
77
77
|
}
|
78
78
|
|
79
79
|
|
80
80
|
def main() -> None:
|
81
|
-
|
81
|
+
Arguments().run()
|
82
82
|
|
83
83
|
|
84
84
|
if __name__ == "__main__":
|
@@ -13,7 +13,7 @@ import time
|
|
13
13
|
from pathlib import Path
|
14
14
|
from typing import List, Literal, Optional, TypedDict
|
15
15
|
|
16
|
-
from spargear import ArgumentSpec,
|
16
|
+
from spargear import ArgumentSpec, RunnableArguments
|
17
17
|
|
18
18
|
from chatterer import Chatterer
|
19
19
|
from chatterer.tools.convert_pdf_to_markdown import PdfToMarkdown
|
@@ -35,10 +35,10 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(level
|
|
35
35
|
logger = logging.getLogger(__name__)
|
36
36
|
|
37
37
|
|
38
|
-
class
|
38
|
+
class Arguments(RunnableArguments[List[ConversionResult]]):
|
39
39
|
"""Command-line arguments for PDF to Markdown conversion."""
|
40
40
|
|
41
|
-
|
41
|
+
PDF_OR_DIRECTORY_PATH: str
|
42
42
|
"""Input PDF file or directory containing PDF files to convert to markdown."""
|
43
43
|
|
44
44
|
output: Optional[str] = None
|
@@ -274,7 +274,7 @@ class PdfToMarkdownArgs(BaseArguments):
|
|
274
274
|
|
275
275
|
def _prepare_files(self) -> tuple[List[Path], Path, bool]:
|
276
276
|
"""Prepare input and output file paths."""
|
277
|
-
input_path = Path(self.
|
277
|
+
input_path = Path(self.PDF_OR_DIRECTORY_PATH).resolve()
|
278
278
|
pdf_files: List[Path] = []
|
279
279
|
is_dir = False
|
280
280
|
|
@@ -320,7 +320,7 @@ def main() -> None:
|
|
320
320
|
"""Main entry point for the CLI application."""
|
321
321
|
args = None
|
322
322
|
try:
|
323
|
-
args =
|
323
|
+
args = Arguments()
|
324
324
|
args.run()
|
325
325
|
except KeyboardInterrupt:
|
326
326
|
logger.info("🛑 Conversion interrupted by user")
|
@@ -3,15 +3,15 @@ import sys
|
|
3
3
|
from pathlib import Path
|
4
4
|
from typing import Optional
|
5
5
|
|
6
|
-
from spargear import
|
6
|
+
from spargear import RunnableArguments
|
7
7
|
|
8
8
|
from chatterer.tools.convert_to_text import pdf_to_text
|
9
9
|
|
10
10
|
logger = logging.getLogger(__name__)
|
11
11
|
|
12
12
|
|
13
|
-
class
|
14
|
-
|
13
|
+
class Arguments(RunnableArguments[None]):
|
14
|
+
PDF_PATH: Path
|
15
15
|
"""Path to the PDF file to convert to text."""
|
16
16
|
output: Optional[Path]
|
17
17
|
"""Path to the output text file. If not provided, defaults to the input file with a .txt suffix."""
|
@@ -19,7 +19,7 @@ class PdfToTextArgs(BaseArguments):
|
|
19
19
|
"""Comma-separated list of zero-based page indices to extract from the PDF. Supports ranges, e.g., '0,2,4-8'."""
|
20
20
|
|
21
21
|
def run(self) -> None:
|
22
|
-
input = self.
|
22
|
+
input = self.PDF_PATH.resolve()
|
23
23
|
out = self.output or input.with_suffix(".txt")
|
24
24
|
if not input.is_file():
|
25
25
|
sys.exit(1)
|
@@ -47,7 +47,7 @@ def parse_page_indices(pages_str: str) -> list[int]:
|
|
47
47
|
|
48
48
|
|
49
49
|
def main() -> None:
|
50
|
-
|
50
|
+
Arguments().run()
|
51
51
|
|
52
52
|
|
53
53
|
if __name__ == "__main__":
|
@@ -3,7 +3,7 @@ import sys
|
|
3
3
|
from pathlib import Path
|
4
4
|
from typing import NotRequired, TypedDict
|
5
5
|
|
6
|
-
from spargear import
|
6
|
+
from spargear import RunnableArguments
|
7
7
|
|
8
8
|
from chatterer import BaseMessage, Chatterer, HumanMessage, SystemMessage
|
9
9
|
|
@@ -155,7 +155,7 @@ Now, generate the final `presentation.html` file using impress.js and the provid
|
|
155
155
|
# --- Argument Parsing ---
|
156
156
|
|
157
157
|
|
158
|
-
class
|
158
|
+
class Arguments(RunnableArguments[None]):
|
159
159
|
"""
|
160
160
|
Arguments for the presentation generation process.
|
161
161
|
"""
|
@@ -179,9 +179,7 @@ class MakePptArguments(BaseArguments):
|
|
179
179
|
"""Prompt for organizing slides into a presentation script"""
|
180
180
|
|
181
181
|
# LLM Settings
|
182
|
-
provider: str =
|
183
|
-
"openai:gpt-4.1" # Example: "openai:gpt-4o", "anthropic:claude-3-haiku-20240307", "google:gemini-1.5-flash"
|
184
|
-
)
|
182
|
+
provider: str = "openai:gpt-4.1" # Example: "openai:gpt-4o", "anthropic:claude-3-haiku-20240307", "google:gemini-1.5-flash"
|
185
183
|
"""Name of the language model to use (provider:model_name)"""
|
186
184
|
|
187
185
|
# Other settings
|
@@ -293,7 +291,7 @@ class GeneratedSlide(TypedDict):
|
|
293
291
|
script: NotRequired[str]
|
294
292
|
|
295
293
|
|
296
|
-
def run_presentation_agent(args:
|
294
|
+
def run_presentation_agent(args: Arguments):
|
297
295
|
"""Executes the presentation generation agent loop."""
|
298
296
|
|
299
297
|
if args.verbose:
|
@@ -481,7 +479,7 @@ Remember to follow all instructions in the role prompt, especially regarding HTM
|
|
481
479
|
|
482
480
|
|
483
481
|
def main() -> None:
|
484
|
-
|
482
|
+
Arguments().run()
|
485
483
|
|
486
484
|
|
487
485
|
if __name__ == "__main__":
|
chatterer/examples/pw.py
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
import sys
|
4
|
+
from pathlib import Path
|
5
|
+
|
6
|
+
from spargear import BaseArguments, RunnableArguments, SubcommandSpec
|
7
|
+
|
8
|
+
from chatterer import PlayWrightBot
|
9
|
+
|
10
|
+
logger = logging.getLogger(__name__)
|
11
|
+
|
12
|
+
|
13
|
+
# Define the default path location relative to this script file
|
14
|
+
DEFAULT_JSON_PATH = Path(__file__).resolve().parent / "session_state.json"
|
15
|
+
|
16
|
+
|
17
|
+
class ReadArgs(RunnableArguments[None]):
|
18
|
+
"""Arguments for the 'read' subcommand."""
|
19
|
+
|
20
|
+
URL: str
|
21
|
+
"""URL (potentially protected) to navigate to using the saved session."""
|
22
|
+
jsonpath: Path = DEFAULT_JSON_PATH
|
23
|
+
"""Path to the session state JSON file to load."""
|
24
|
+
|
25
|
+
def run(self) -> None:
|
26
|
+
"""
|
27
|
+
Loads the session state from the specified JSON file, then navigates
|
28
|
+
to a protected_url that normally requires login. If the stored session
|
29
|
+
is valid, it should open without re-entering credentials.
|
30
|
+
|
31
|
+
Correction: Loads the JSON content into a dict first to satisfy type hints.
|
32
|
+
"""
|
33
|
+
url = self.URL
|
34
|
+
jsonpath = self.jsonpath
|
35
|
+
logger.info(f"Loading session from {jsonpath} and navigating to {url} ...")
|
36
|
+
|
37
|
+
if not jsonpath.exists():
|
38
|
+
logger.error(f"Session file not found at {jsonpath}")
|
39
|
+
sys.exit(1)
|
40
|
+
|
41
|
+
# Load the storage state from the JSON file into a dictionary
|
42
|
+
logger.info(f"Reading storage state content from {jsonpath} ...")
|
43
|
+
try:
|
44
|
+
with open(jsonpath, "r", encoding="utf-8") as f:
|
45
|
+
# This dictionary should match the 'StorageState' type expected by Playwright/chatterer
|
46
|
+
storage_state_dict = json.load(f)
|
47
|
+
except json.JSONDecodeError:
|
48
|
+
logger.error(f"Failed to decode JSON from {jsonpath}")
|
49
|
+
sys.exit(1)
|
50
|
+
except Exception as e:
|
51
|
+
logger.error(f"Error reading file {jsonpath}: {e}")
|
52
|
+
sys.exit(1)
|
53
|
+
|
54
|
+
logger.info("Launching browser with loaded session state...")
|
55
|
+
with PlayWrightBot(
|
56
|
+
playwright_launch_options={"headless": False},
|
57
|
+
# Pass the loaded dictionary, which should match the expected 'StorageState' type
|
58
|
+
playwright_persistency_options={"storage_state": storage_state_dict},
|
59
|
+
) as bot:
|
60
|
+
bot.get_page(url)
|
61
|
+
|
62
|
+
logger.info("Press Enter in the console when you're done checking the protected page.")
|
63
|
+
input(" >> Press Enter to exit: ")
|
64
|
+
|
65
|
+
logger.info("Done! Browser is now closed.")
|
66
|
+
|
67
|
+
|
68
|
+
class WriteArgs(RunnableArguments[None]):
|
69
|
+
"""Arguments for the 'write' subcommand."""
|
70
|
+
|
71
|
+
URL: str
|
72
|
+
"""URL to navigate to for manual login."""
|
73
|
+
jsonpath: Path = DEFAULT_JSON_PATH
|
74
|
+
"""Path to save the session state JSON file."""
|
75
|
+
|
76
|
+
def run(self) -> None:
|
77
|
+
"""
|
78
|
+
Launches a non-headless browser and navigates to the login_url.
|
79
|
+
The user can manually log in, then press Enter in the console
|
80
|
+
to store the current session state into a JSON file.
|
81
|
+
"""
|
82
|
+
url = self.URL
|
83
|
+
jsonpath = self.jsonpath
|
84
|
+
logger.info(f"Launching browser and navigating to {url} ... Please log in manually.")
|
85
|
+
|
86
|
+
# Ensure jsonpath directory exists
|
87
|
+
jsonpath.parent.mkdir(parents=True, exist_ok=True)
|
88
|
+
|
89
|
+
with PlayWrightBot(playwright_launch_options={"headless": False}) as bot:
|
90
|
+
bot.get_page(url)
|
91
|
+
|
92
|
+
logger.info("After completing the login in the browser, press Enter here to save the session.")
|
93
|
+
input(" >> Press Enter when ready: ")
|
94
|
+
|
95
|
+
# get_sync_browser() returns the BrowserContext internally
|
96
|
+
context = bot.get_sync_browser()
|
97
|
+
|
98
|
+
# Save the current session (cookies, localStorage) to a JSON file
|
99
|
+
logger.info(f"Saving storage state to {jsonpath} ...")
|
100
|
+
context.storage_state(path=jsonpath) # Pass Path object directly
|
101
|
+
|
102
|
+
logger.info("Done! Browser is now closed.")
|
103
|
+
|
104
|
+
|
105
|
+
class Arguments(BaseArguments):
|
106
|
+
"""
|
107
|
+
A simple CLI tool for saving and using Playwright sessions via storage_state.
|
108
|
+
Uses spargear for declarative argument parsing.
|
109
|
+
"""
|
110
|
+
|
111
|
+
read: SubcommandSpec[ReadArgs] = SubcommandSpec(
|
112
|
+
name="read",
|
113
|
+
argument_class=ReadArgs,
|
114
|
+
help="Use a saved session to view a protected page.",
|
115
|
+
description="Loads session state from the specified JSON file and navigates to the URL.",
|
116
|
+
)
|
117
|
+
write: SubcommandSpec[WriteArgs] = SubcommandSpec(
|
118
|
+
name="write",
|
119
|
+
argument_class=WriteArgs,
|
120
|
+
help="Save a new session by manually logging in.",
|
121
|
+
description="Launches a browser to the specified URL. Log in manually, then press Enter to save session state.",
|
122
|
+
)
|
123
|
+
|
124
|
+
def run(self) -> None:
|
125
|
+
"""Parses arguments using spargear and executes the corresponding command."""
|
126
|
+
if isinstance(last_subcommand := self.last_command, RunnableArguments):
|
127
|
+
last_subcommand.run()
|
128
|
+
else:
|
129
|
+
self.get_parser().print_help()
|
130
|
+
|
131
|
+
|
132
|
+
def main() -> None:
|
133
|
+
Arguments().run()
|
134
|
+
|
135
|
+
|
136
|
+
if __name__ == "__main__":
|
137
|
+
main()
|
@@ -2,15 +2,15 @@ import logging
|
|
2
2
|
from pathlib import Path
|
3
3
|
from typing import Optional
|
4
4
|
|
5
|
-
from spargear import
|
5
|
+
from spargear import RunnableArguments
|
6
6
|
|
7
7
|
from chatterer import CodeSnippets
|
8
8
|
|
9
9
|
logger = logging.getLogger(__name__)
|
10
10
|
|
11
11
|
|
12
|
-
class
|
13
|
-
|
12
|
+
class Arguments(RunnableArguments[CodeSnippets]):
|
13
|
+
PATH_OR_PACKAGE_NAME: str
|
14
14
|
"""Path to the package or file from which to extract code snippets."""
|
15
15
|
output: Optional[str] = None
|
16
16
|
"""Output path for the extracted code snippets. If not provided, defaults to a file with the same name as the input."""
|
@@ -33,7 +33,7 @@ class GetCodeSnippetsArgs(BaseArguments):
|
|
33
33
|
output = None
|
34
34
|
|
35
35
|
cs = CodeSnippets.from_path_or_pkgname(
|
36
|
-
path_or_pkgname=self.
|
36
|
+
path_or_pkgname=self.PATH_OR_PACKAGE_NAME,
|
37
37
|
ban_file_patterns=self.ban_file_patterns,
|
38
38
|
glob_patterns=self.glob_patterns,
|
39
39
|
case_sensitive=self.case_sensitive,
|
@@ -41,14 +41,14 @@ class GetCodeSnippetsArgs(BaseArguments):
|
|
41
41
|
if output is not None:
|
42
42
|
output.parent.mkdir(parents=True, exist_ok=True)
|
43
43
|
output.write_text(cs.snippets_text, encoding="utf-8")
|
44
|
-
logger.info(f"Extracted code snippets from `{self.
|
44
|
+
logger.info(f"Extracted code snippets from `{self.PATH_OR_PACKAGE_NAME}` and saved to `{output}`.")
|
45
45
|
else:
|
46
|
-
logger.info(f"Extracted code snippets from `{self.
|
46
|
+
logger.info(f"Extracted code snippets from `{self.PATH_OR_PACKAGE_NAME}`.")
|
47
47
|
return cs
|
48
48
|
|
49
49
|
|
50
50
|
def main() -> None:
|
51
|
-
|
51
|
+
Arguments().run()
|
52
52
|
|
53
53
|
|
54
54
|
if __name__ == "__main__":
|
@@ -6,14 +6,14 @@ from typing import Optional, cast
|
|
6
6
|
|
7
7
|
from openai import OpenAI
|
8
8
|
from pydub import AudioSegment
|
9
|
-
from spargear import
|
9
|
+
from spargear import RunnableArguments
|
10
10
|
|
11
11
|
# Maximum chunk length in seconds
|
12
12
|
MAX_CHUNK_DURATION = 600
|
13
13
|
|
14
14
|
|
15
|
-
class
|
16
|
-
|
15
|
+
class Arguments(RunnableArguments[None]):
|
16
|
+
AUDIO_PATH: Path
|
17
17
|
"""The audio file to transcribe."""
|
18
18
|
output: Optional[Path] = None
|
19
19
|
"""Path to save the transcription output."""
|
@@ -31,7 +31,7 @@ class TranscriptionApiArguments(BaseArguments):
|
|
31
31
|
|
32
32
|
client = OpenAI(api_key=self.api_key, base_url=self.base_url)
|
33
33
|
|
34
|
-
audio = load_audio_segment(self.
|
34
|
+
audio = load_audio_segment(self.AUDIO_PATH)
|
35
35
|
|
36
36
|
segments = split_audio(audio, MAX_CHUNK_DURATION)
|
37
37
|
print(f"[i] Audio duration: {len(audio) / 1000:.1f}s; splitting into {len(segments)} segment(s)")
|
@@ -42,7 +42,7 @@ class TranscriptionApiArguments(BaseArguments):
|
|
42
42
|
transcripts.append(transcribe_segment(seg, client, model, self.prompt))
|
43
43
|
|
44
44
|
full_transcript = "\n\n".join(transcripts)
|
45
|
-
output_path: Path = self.output or self.
|
45
|
+
output_path: Path = self.output or self.AUDIO_PATH.with_suffix(".txt")
|
46
46
|
output_path.write_text(full_transcript, encoding="utf-8")
|
47
47
|
print(f"[✓] Transcription saved to: {output_path}")
|
48
48
|
|
@@ -105,7 +105,7 @@ def transcribe_segment(segment: AudioSegment, client: OpenAI, model: str, prompt
|
|
105
105
|
|
106
106
|
|
107
107
|
def main() -> None:
|
108
|
-
|
108
|
+
Arguments().run()
|
109
109
|
|
110
110
|
|
111
111
|
if __name__ == "__main__":
|
@@ -19,8 +19,8 @@ from chatterer.tools.upstage_document_parser import (
|
|
19
19
|
logger = logging.getLogger(__name__)
|
20
20
|
|
21
21
|
|
22
|
-
class
|
23
|
-
|
22
|
+
class Arguments(BaseArguments):
|
23
|
+
INPUT_PATH: Path
|
24
24
|
"""Input file to parse. Can be a PDF, image, or other supported formats."""
|
25
25
|
output: Optional[Path] = None
|
26
26
|
"""Output file path for the parsed content. Defaults to input file with .md suffix if not provided."""
|
@@ -52,25 +52,25 @@ class UpstageParserArguments(BaseArguments):
|
|
52
52
|
)
|
53
53
|
|
54
54
|
def run(self) -> None:
|
55
|
-
input =
|
56
|
-
out =
|
55
|
+
input = self.INPUT_PATH.resolve()
|
56
|
+
out = self.output or input.with_suffix(".md")
|
57
57
|
|
58
58
|
parser = UpstageDocumentParseParser(
|
59
|
-
api_key=
|
60
|
-
base_url=
|
61
|
-
model=
|
62
|
-
split=
|
63
|
-
ocr=
|
64
|
-
output_format=
|
65
|
-
coordinates=
|
66
|
-
base64_encoding=
|
67
|
-
image_description_instruction=
|
68
|
-
image_dir=
|
69
|
-
chatterer=
|
59
|
+
api_key=self.api_key,
|
60
|
+
base_url=self.base_url,
|
61
|
+
model=self.model,
|
62
|
+
split=self.split,
|
63
|
+
ocr=self.ocr,
|
64
|
+
output_format=self.output_format,
|
65
|
+
coordinates=self.coordinates,
|
66
|
+
base64_encoding=self.base64_encoding,
|
67
|
+
image_description_instruction=self.image_description_instruction,
|
68
|
+
image_dir=self.image_dir,
|
69
|
+
chatterer=self.chatterer.value,
|
70
70
|
)
|
71
71
|
docs = parser.parse(Blob.from_path(input)) # pyright: ignore[reportUnknownMemberType]
|
72
72
|
|
73
|
-
if
|
73
|
+
if self.image_dir:
|
74
74
|
for path, image in parser.image_data.items():
|
75
75
|
(path := Path(path)).parent.mkdir(parents=True, exist_ok=True)
|
76
76
|
path.write_bytes(image)
|
@@ -82,7 +82,7 @@ class UpstageParserArguments(BaseArguments):
|
|
82
82
|
|
83
83
|
|
84
84
|
def main() -> None:
|
85
|
-
|
85
|
+
Arguments().run()
|
86
86
|
|
87
87
|
|
88
88
|
if __name__ == "__main__":
|
@@ -1,13 +1,13 @@
|
|
1
1
|
from pathlib import Path
|
2
2
|
from typing import Literal
|
3
3
|
|
4
|
-
from spargear import ArgumentSpec,
|
4
|
+
from spargear import ArgumentSpec, RunnableArguments
|
5
5
|
|
6
6
|
from chatterer import Chatterer, MarkdownLink, PlayWrightBot
|
7
7
|
|
8
8
|
|
9
|
-
class
|
10
|
-
|
9
|
+
class Arguments(RunnableArguments[None]):
|
10
|
+
URL: str
|
11
11
|
"""The URL to crawl."""
|
12
12
|
output: str = Path(__file__).with_suffix(".md").as_posix()
|
13
13
|
"""The output file path for the markdown file."""
|
@@ -21,7 +21,7 @@ class WebpageToMarkdownArgs(BaseArguments):
|
|
21
21
|
|
22
22
|
def run(self) -> None:
|
23
23
|
chatterer = self.chatterer.value
|
24
|
-
url: str = self.
|
24
|
+
url: str = self.URL.strip()
|
25
25
|
output: Path = Path(self.output).resolve()
|
26
26
|
with PlayWrightBot(chatterer=chatterer, engine=self.engine) as bot:
|
27
27
|
md = bot.url_to_md(url)
|
@@ -32,15 +32,13 @@ class WebpageToMarkdownArgs(BaseArguments):
|
|
32
32
|
links = MarkdownLink.from_markdown(md, referer_url=url)
|
33
33
|
for link in links:
|
34
34
|
if link.type == "link":
|
35
|
-
print(
|
36
|
-
f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})"
|
37
|
-
)
|
35
|
+
print(f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})")
|
38
36
|
elif link.type == "image":
|
39
37
|
print(f"- ![{truncate_string(link.url)}] ({truncate_string(link.inline_text)})")
|
40
38
|
|
41
39
|
async def arun(self) -> None:
|
42
40
|
chatterer = self.chatterer.value
|
43
|
-
url: str = self.
|
41
|
+
url: str = self.URL.strip()
|
44
42
|
output: Path = Path(self.output).resolve()
|
45
43
|
async with PlayWrightBot(chatterer=chatterer, engine=self.engine) as bot:
|
46
44
|
md = await bot.aurl_to_md(url)
|
@@ -51,9 +49,7 @@ class WebpageToMarkdownArgs(BaseArguments):
|
|
51
49
|
links = MarkdownLink.from_markdown(md, referer_url=url)
|
52
50
|
for link in links:
|
53
51
|
if link.type == "link":
|
54
|
-
print(
|
55
|
-
f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})"
|
56
|
-
)
|
52
|
+
print(f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})")
|
57
53
|
elif link.type == "image":
|
58
54
|
print(f"- ![{truncate_string(link.url)}] ({truncate_string(link.inline_text)})")
|
59
55
|
|
@@ -63,7 +59,7 @@ def truncate_string(s: str) -> str:
|
|
63
59
|
|
64
60
|
|
65
61
|
def main() -> None:
|
66
|
-
|
62
|
+
Arguments().run()
|
67
63
|
|
68
64
|
|
69
65
|
if __name__ == "__main__":
|
@@ -781,143 +781,143 @@ class AoTPipeline:
|
|
781
781
|
# 4.6) Build or export a reasoning graph
|
782
782
|
# ---------------------------------------------------------------------------------
|
783
783
|
|
784
|
-
def get_reasoning_graph(self, global_id_prefix: str = "AoT"):
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
901
|
-
|
902
|
-
|
903
|
-
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
784
|
+
# def get_reasoning_graph(self, global_id_prefix: str = "AoT"):
|
785
|
+
# """
|
786
|
+
# Constructs a Graph object (from hypothetical `neo4j_extension`)
|
787
|
+
# capturing the pipeline steps, including devil's advocate steps.
|
788
|
+
# """
|
789
|
+
# from neo4j_extension import Graph, Node, Relationship
|
790
|
+
|
791
|
+
# g = Graph()
|
792
|
+
# step_nodes: dict[int, Node] = {}
|
793
|
+
# subq_nodes: dict[str, Node] = {}
|
794
|
+
|
795
|
+
# # Step A: Create nodes for each pipeline step
|
796
|
+
# for i, record in enumerate(self.steps_history):
|
797
|
+
# # We'll skip nested Decomposition steps only if we want to flatten them.
|
798
|
+
# # But let's keep them for clarity.
|
799
|
+
# step_node = Node(
|
800
|
+
# properties=record.as_properties(), labels={record.step_name}, globalId=f"{global_id_prefix}_step_{i}"
|
801
|
+
# )
|
802
|
+
# g.add_node(step_node)
|
803
|
+
# step_nodes[i] = step_node
|
804
|
+
|
805
|
+
# # Step B: Collect sub-questions from each DECOMPOSITION or DEVILS_ADVOCATE
|
806
|
+
# all_sub_questions: dict[str, tuple[int, int, SubQuestionNode]] = {}
|
807
|
+
# for i, record in enumerate(self.steps_history):
|
808
|
+
# if record.sub_questions:
|
809
|
+
# for sq_idx, sq in enumerate(record.sub_questions):
|
810
|
+
# sq_id = f"{global_id_prefix}_decomp_{i}_sub_{sq_idx}"
|
811
|
+
# all_sub_questions[sq_id] = (i, sq_idx, sq)
|
812
|
+
|
813
|
+
# for sq_id, (i, sq_idx, sq) in all_sub_questions.items():
|
814
|
+
# n_subq = Node(
|
815
|
+
# properties={
|
816
|
+
# "question": sq.question,
|
817
|
+
# "answer": sq.answer or "",
|
818
|
+
# },
|
819
|
+
# labels={"SubQuestion"},
|
820
|
+
# globalId=sq_id,
|
821
|
+
# )
|
822
|
+
# g.add_node(n_subq)
|
823
|
+
# subq_nodes[sq_id] = n_subq
|
824
|
+
|
825
|
+
# # Step C: Add relationships. We do a simple approach:
|
826
|
+
# # - If StepRecord is DECOMPOSITION or DEVILS_ADVOCATE with sub_questions, link them via SPLIT_INTO.
|
827
|
+
# for i, record in enumerate(self.steps_history):
|
828
|
+
# if record.sub_questions:
|
829
|
+
# start_node = step_nodes[i]
|
830
|
+
# for sq_idx, sq in enumerate(record.sub_questions):
|
831
|
+
# sq_id = f"{global_id_prefix}_decomp_{i}_sub_{sq_idx}"
|
832
|
+
# end_node = subq_nodes[sq_id]
|
833
|
+
# rel = Relationship(
|
834
|
+
# properties={},
|
835
|
+
# rel_type=StepRelation.SPLIT_INTO,
|
836
|
+
# start_node=start_node,
|
837
|
+
# end_node=end_node,
|
838
|
+
# globalId=f"{global_id_prefix}_split_{i}_{sq_idx}",
|
839
|
+
# )
|
840
|
+
# g.add_relationship(rel)
|
841
|
+
# # Also add sub-question dependencies
|
842
|
+
# for dep in sq.depend:
|
843
|
+
# # The same record i -> sub-question subq
|
844
|
+
# if 0 <= dep < len(record.sub_questions):
|
845
|
+
# dep_id = f"{global_id_prefix}_decomp_{i}_sub_{dep}"
|
846
|
+
# if dep_id in subq_nodes:
|
847
|
+
# dep_node = subq_nodes[dep_id]
|
848
|
+
# rel_dep = Relationship(
|
849
|
+
# properties={},
|
850
|
+
# rel_type=StepRelation.DEPEND_ON,
|
851
|
+
# start_node=end_node,
|
852
|
+
# end_node=dep_node,
|
853
|
+
# globalId=f"{global_id_prefix}_dep_{i}_q_{sq_idx}_on_{dep}",
|
854
|
+
# )
|
855
|
+
# g.add_relationship(rel_dep)
|
856
|
+
|
857
|
+
# # Step D: We add PRECEDES relationships in a linear chain for the pipeline steps
|
858
|
+
# for i in range(len(self.steps_history) - 1):
|
859
|
+
# start_node = step_nodes[i]
|
860
|
+
# end_node = step_nodes[i + 1]
|
861
|
+
# rel = Relationship(
|
862
|
+
# properties={},
|
863
|
+
# rel_type=StepRelation.PRECEDES,
|
864
|
+
# start_node=start_node,
|
865
|
+
# end_node=end_node,
|
866
|
+
# globalId=f"{global_id_prefix}_precede_{i}_to_{i + 1}",
|
867
|
+
# )
|
868
|
+
# g.add_relationship(rel)
|
869
|
+
|
870
|
+
# # Step E: CRITIQUES, SELECTS, RESULT_OF can be similarly added:
|
871
|
+
# # We'll do a simple pass:
|
872
|
+
# # If step_name ends with CRITIQUE => it critiques the step before it
|
873
|
+
# for i, record in enumerate(self.steps_history):
|
874
|
+
# if "CRITIQUE" in record.step_name:
|
875
|
+
# # Let it point to the preceding step
|
876
|
+
# if i > 0:
|
877
|
+
# start_node = step_nodes[i]
|
878
|
+
# end_node = step_nodes[i - 1]
|
879
|
+
# rel = Relationship(
|
880
|
+
# properties={},
|
881
|
+
# rel_type=StepRelation.CRITIQUES,
|
882
|
+
# start_node=start_node,
|
883
|
+
# end_node=end_node,
|
884
|
+
# globalId=f"{global_id_prefix}_crit_{i}",
|
885
|
+
# )
|
886
|
+
# g.add_relationship(rel)
|
887
|
+
|
888
|
+
# # If there's a BEST_APPROACH_DECISION step, link it to the step it uses
|
889
|
+
# best_decision_idx = None
|
890
|
+
# used_step_idx = None
|
891
|
+
# for i, record in enumerate(self.steps_history):
|
892
|
+
# if record.step_name == StepName.BEST_APPROACH_DECISION and record.used:
|
893
|
+
# best_decision_idx = i
|
894
|
+
# # find the step with that name
|
895
|
+
# used_step_idx = next((j for j in step_nodes if self.steps_history[j].step_name == record.used), None)
|
896
|
+
# if used_step_idx is not None:
|
897
|
+
# rel = Relationship(
|
898
|
+
# properties={},
|
899
|
+
# rel_type=StepRelation.SELECTS,
|
900
|
+
# start_node=step_nodes[i],
|
901
|
+
# end_node=step_nodes[used_step_idx],
|
902
|
+
# globalId=f"{global_id_prefix}_select_{i}_use_{used_step_idx}",
|
903
|
+
# )
|
904
|
+
# g.add_relationship(rel)
|
905
|
+
|
906
|
+
# # And link the final answer to the best approach
|
907
|
+
# final_answer_idx = next(
|
908
|
+
# (i for i, r in enumerate(self.steps_history) if r.step_name == StepName.FINAL_ANSWER), None
|
909
|
+
# )
|
910
|
+
# if final_answer_idx is not None and best_decision_idx is not None:
|
911
|
+
# rel = Relationship(
|
912
|
+
# properties={},
|
913
|
+
# rel_type=StepRelation.RESULT_OF,
|
914
|
+
# start_node=step_nodes[final_answer_idx],
|
915
|
+
# end_node=step_nodes[best_decision_idx],
|
916
|
+
# globalId=f"{global_id_prefix}_final_{final_answer_idx}_resultof_{best_decision_idx}",
|
917
|
+
# )
|
918
|
+
# g.add_relationship(rel)
|
919
|
+
|
920
|
+
# return g
|
921
921
|
|
922
922
|
|
923
923
|
# ---------------------------------------------------------------------------------
|
@@ -944,32 +944,32 @@ class AoTStrategy(BaseStrategy):
|
|
944
944
|
msgs = self.pipeline.chatterer.client._convert_input(messages).to_messages() # type: ignore
|
945
945
|
return self.pipeline.run_pipeline(msgs)
|
946
946
|
|
947
|
-
def get_reasoning_graph(self):
|
948
|
-
|
949
|
-
|
947
|
+
# def get_reasoning_graph(self):
|
948
|
+
# """Return the AoT reasoning graph from the pipeline’s steps history."""
|
949
|
+
# return self.pipeline.get_reasoning_graph(global_id_prefix="AoT")
|
950
950
|
|
951
951
|
|
952
952
|
# ---------------------------------------------------------------------------------
|
953
953
|
# Example usage (pseudo-code)
|
954
954
|
# ---------------------------------------------------------------------------------
|
955
|
-
if __name__ == "__main__":
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
955
|
+
# if __name__ == "__main__":
|
956
|
+
# from neo4j_extension import Neo4jConnection # or your actual DB connector
|
957
|
+
|
958
|
+
# # You would create a Chatterer with your chosen LLM backend (OpenAI, etc.)
|
959
|
+
# chatterer = Chatterer.openai() # pseudo-code
|
960
|
+
# pipeline = AoTPipeline(chatterer=chatterer, max_depth=3)
|
961
|
+
# strategy = AoTStrategy(pipeline=pipeline)
|
962
|
+
|
963
|
+
# question = "Solve 5.9 = 5.11 - x. Also compare 9.11 and 9.9."
|
964
|
+
# answer = strategy.invoke(question)
|
965
|
+
# print("Final Answer:", answer)
|
966
|
+
|
967
|
+
# # Build the reasoning graph
|
968
|
+
# graph = strategy.get_reasoning_graph()
|
969
|
+
# print(f"\nGraph has {len(graph.nodes)} nodes and {len(graph.relationships)} relationships.")
|
970
|
+
|
971
|
+
# # Optionally store in Neo4j
|
972
|
+
# with Neo4jConnection() as conn:
|
973
|
+
# conn.clear_all()
|
974
|
+
# conn.upsert_graph(graph)
|
975
|
+
# print("Graph stored in Neo4j.")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: chatterer
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.24
|
4
4
|
Summary: The highest-level interface for various LLM APIs.
|
5
5
|
Requires-Python: >=3.12
|
6
6
|
Description-Content-Type: text/markdown
|
@@ -11,10 +11,9 @@ Requires-Dist: pillow>=11.1.0
|
|
11
11
|
Requires-Dist: regex>=2024.11.6
|
12
12
|
Requires-Dist: rich>=13.9.4
|
13
13
|
Requires-Dist: colorama>=0.4.6
|
14
|
-
Requires-Dist: spargear>=0.2.
|
14
|
+
Requires-Dist: spargear>=0.2.7
|
15
15
|
Provides-Extra: dev
|
16
|
-
Requires-Dist:
|
17
|
-
Requires-Dist: ipykernel>=6.29.5; extra == "dev"
|
16
|
+
Requires-Dist: pyright>=1.1.401; extra == "dev"
|
18
17
|
Provides-Extra: conversion
|
19
18
|
Requires-Dist: youtube-transcript-api>=1.0.3; extra == "conversion"
|
20
19
|
Requires-Dist: chatterer[browser]; extra == "conversion"
|
@@ -34,12 +33,10 @@ Requires-Dist: mistune>=3.1.3; extra == "markdown"
|
|
34
33
|
Provides-Extra: video
|
35
34
|
Requires-Dist: pydub>=0.25.1; extra == "video"
|
36
35
|
Provides-Extra: langchain
|
37
|
-
Requires-Dist:
|
36
|
+
Requires-Dist: langchain-anthropic>=0.3.10; extra == "langchain"
|
37
|
+
Requires-Dist: langchain-google-genai>=2.1.1; extra == "langchain"
|
38
|
+
Requires-Dist: langchain-ollama>=0.3.0; extra == "langchain"
|
38
39
|
Requires-Dist: langchain-experimental>=0.3.4; extra == "langchain"
|
39
|
-
Provides-Extra: langchain-providers
|
40
|
-
Requires-Dist: langchain-anthropic>=0.3.10; extra == "langchain-providers"
|
41
|
-
Requires-Dist: langchain-google-genai>=2.1.1; extra == "langchain-providers"
|
42
|
-
Requires-Dist: langchain-ollama>=0.3.0; extra == "langchain-providers"
|
43
40
|
Provides-Extra: all
|
44
41
|
Requires-Dist: chatterer[dev]; extra == "all"
|
45
42
|
Requires-Dist: chatterer[langchain]; extra == "all"
|
@@ -6,17 +6,18 @@ chatterer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
chatterer/common_types/__init__.py,sha256=jfS6m5UANSvGjzQ_nzYDpryn5uZqNb06-4xCsQ2C_lw,376
|
7
7
|
chatterer/common_types/io.py,sha256=fetiyi1suZ3NF2mj5k5KDLJLGKS1n4J-5UmH7JN36g8,817
|
8
8
|
chatterer/examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
-
chatterer/examples/
|
10
|
-
chatterer/examples/
|
11
|
-
chatterer/examples/
|
12
|
-
chatterer/examples/
|
13
|
-
chatterer/examples/
|
14
|
-
chatterer/examples/
|
15
|
-
chatterer/examples/
|
16
|
-
chatterer/examples/
|
17
|
-
chatterer/examples/
|
9
|
+
chatterer/examples/__main__.py,sha256=W-Zo7z9RyA0PrY-tPDqf9BSkOqPpaIXROcHCXCwNXc4,1566
|
10
|
+
chatterer/examples/any2md.py,sha256=4AtdlwP1jxSsvh31yWmB5HP2Wmof4Fh0W_F3im2yJ_M,2739
|
11
|
+
chatterer/examples/pdf2md.py,sha256=viru-9vrUdiYMXRpQVpbYiZy6bjkkF-HTXSvy37ICUY,13625
|
12
|
+
chatterer/examples/pdf2txt.py,sha256=ULfA2cr-lrfLVqpMlSa08qo5AXVXiyL8N2-KiD0Orhc,1602
|
13
|
+
chatterer/examples/ppt.py,sha256=7AhS2hZtmMHOJQt1j5DQDDgrMwM-GX1HjPrmKDV2Bgs,23253
|
14
|
+
chatterer/examples/pw.py,sha256=FGmCQg5XFyVAczBF8mQcobJcvITKEOtbrXm4pyKvbAw,5138
|
15
|
+
chatterer/examples/snippet.py,sha256=JvR_xBV8skePCtIczz73EdjmiHzj_A-5HzS53j0bLI4,1973
|
16
|
+
chatterer/examples/transcribe.py,sha256=fBFuo442VEM7NbF9xN3ub3nAnYnQojrseN_kI049fsM,3894
|
17
|
+
chatterer/examples/upstage.py,sha256=lK2OOY6U4GGnDBbPHKaqwlh_0Vu-0RMb0M01M8dngRs,3219
|
18
|
+
chatterer/examples/web2md.py,sha256=zfemaE3KwfU8LHvWzJHX-knASpikBUUNzv6jTmfac1E,2740
|
18
19
|
chatterer/strategies/__init__.py,sha256=SdOggbmHpw4f7Njwy-T8q64e91OLOUp1k0a0ozZd4qI,221
|
19
|
-
chatterer/strategies/atom_of_thoughts.py,sha256=
|
20
|
+
chatterer/strategies/atom_of_thoughts.py,sha256=30XvnVKjty8Geo2z_n2-RWL_eEvo_AnK8sg8uVPQHOQ,41178
|
20
21
|
chatterer/strategies/base.py,sha256=b2gMPqodp97OP1dkHfj0UqixjdjVhmTw_V5qJ7i2S6g,427
|
21
22
|
chatterer/tools/__init__.py,sha256=m3PRK9H5vOhk-2gG9W2eg8CYBlEn-K9-eaulOu91bgo,1474
|
22
23
|
chatterer/tools/caption_markdown_images.py,sha256=r4QajHYuL4mdyYQXP1vQcNmqKN8lxBf5y0VKELXILOI,15392
|
@@ -37,8 +38,8 @@ chatterer/utils/base64_image.py,sha256=m_qAT3ERBiq8D-H4H9Z7rLfL31_BiPmV_m4uQ5XRL
|
|
37
38
|
chatterer/utils/bytesio.py,sha256=3MC2atOOFKo5YxuReo_y_t8Wem9p2Y1ahC5M2lGclwI,2618
|
38
39
|
chatterer/utils/code_agent.py,sha256=7ka_WRI4TQmZ5H46mjY3hI6RO_pxw6pg3LAxjgW4AbM,10495
|
39
40
|
chatterer/utils/imghdr.py,sha256=6JhJMXD4MZ0dQolT2VM87YrRYm3hPf3RTEWnP4lYRVc,3842
|
40
|
-
chatterer-0.1.
|
41
|
-
chatterer-0.1.
|
42
|
-
chatterer-0.1.
|
43
|
-
chatterer-0.1.
|
44
|
-
chatterer-0.1.
|
41
|
+
chatterer-0.1.24.dist-info/METADATA,sha256=mpTNGDkwWEK-9XdP52DGaVKQphtJ_p6Wmibq-eiq07g,11633
|
42
|
+
chatterer-0.1.24.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
43
|
+
chatterer-0.1.24.dist-info/entry_points.txt,sha256=IzGKhTnZ7G5V23SRmulmSsyt9HcaFH4lU4r3wR1zMsc,63
|
44
|
+
chatterer-0.1.24.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
|
45
|
+
chatterer-0.1.24.dist-info/RECORD,,
|
@@ -1,156 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
import logging
|
3
|
-
import sys
|
4
|
-
from pathlib import Path
|
5
|
-
|
6
|
-
from spargear import BaseArguments, SubcommandSpec
|
7
|
-
|
8
|
-
from chatterer import PlayWrightBot
|
9
|
-
|
10
|
-
logger = logging.getLogger(__name__)
|
11
|
-
|
12
|
-
|
13
|
-
# Define the default path location relative to this script file
|
14
|
-
DEFAULT_JSON_PATH = Path(__file__).resolve().parent / "session_state.json"
|
15
|
-
|
16
|
-
|
17
|
-
class ReadArgs(BaseArguments):
|
18
|
-
"""Arguments for the 'read' subcommand."""
|
19
|
-
|
20
|
-
url: str
|
21
|
-
"""URL (potentially protected) to navigate to using the saved session."""
|
22
|
-
jsonpath: Path = DEFAULT_JSON_PATH
|
23
|
-
"""Path to the session state JSON file to load."""
|
24
|
-
|
25
|
-
|
26
|
-
class WriteArgs(BaseArguments):
|
27
|
-
"""Arguments for the 'write' subcommand."""
|
28
|
-
|
29
|
-
url: str
|
30
|
-
"""URL to navigate to for manual login."""
|
31
|
-
jsonpath: Path = DEFAULT_JSON_PATH
|
32
|
-
"""Path to save the session state JSON file."""
|
33
|
-
|
34
|
-
|
35
|
-
class LoginWithPlaywrightArgs(BaseArguments):
|
36
|
-
"""
|
37
|
-
A simple CLI tool for saving and using Playwright sessions via storage_state.
|
38
|
-
Uses spargear for declarative argument parsing.
|
39
|
-
"""
|
40
|
-
|
41
|
-
read: SubcommandSpec[ReadArgs] = SubcommandSpec(
|
42
|
-
name="read",
|
43
|
-
argument_class=ReadArgs,
|
44
|
-
help="Use a saved session to view a protected page.",
|
45
|
-
description="Loads session state from the specified JSON file and navigates to the URL.",
|
46
|
-
)
|
47
|
-
write: SubcommandSpec[WriteArgs] = SubcommandSpec(
|
48
|
-
name="write",
|
49
|
-
argument_class=WriteArgs,
|
50
|
-
help="Save a new session by manually logging in.",
|
51
|
-
description="Launches a browser to the specified URL. Log in manually, then press Enter to save session state.",
|
52
|
-
)
|
53
|
-
|
54
|
-
def run(self) -> None:
|
55
|
-
"""Parses arguments using spargear and executes the corresponding command."""
|
56
|
-
try:
|
57
|
-
if (read := self.read.argument_class).url:
|
58
|
-
# Access attributes directly from the returned instance
|
59
|
-
logger.info("Running READ command:")
|
60
|
-
logger.info(f" URL: {read.url}")
|
61
|
-
logger.info(f" JSON Path: {read.jsonpath}")
|
62
|
-
read_session(url=read.url, jsonpath=read.jsonpath)
|
63
|
-
elif (write := self.write.argument_class).url:
|
64
|
-
# Access attributes directly from the returned instance
|
65
|
-
logger.info("Running WRITE command:")
|
66
|
-
logger.info(f" URL: {write.url}")
|
67
|
-
logger.info(f" JSON Path: {write.jsonpath}")
|
68
|
-
write_session(url=write.url, jsonpath=write.jsonpath)
|
69
|
-
else:
|
70
|
-
logger.error("No valid subcommand provided. Use 'read' or 'write'.")
|
71
|
-
sys.exit(1)
|
72
|
-
|
73
|
-
except SystemExit as e:
|
74
|
-
# Handle cases like -h/--help or argparse errors that exit
|
75
|
-
sys.exit(e.code)
|
76
|
-
except Exception as e:
|
77
|
-
logger.error(f"\nAn error occurred: {e}")
|
78
|
-
# from traceback import print_exc # Uncomment for full traceback
|
79
|
-
# print_exc() # Uncomment for full traceback
|
80
|
-
sys.exit(1)
|
81
|
-
|
82
|
-
|
83
|
-
def read_session(url: str, jsonpath: Path) -> None:
|
84
|
-
"""
|
85
|
-
Loads the session state from the specified JSON file, then navigates
|
86
|
-
to a protected_url that normally requires login. If the stored session
|
87
|
-
is valid, it should open without re-entering credentials.
|
88
|
-
|
89
|
-
Correction: Loads the JSON content into a dict first to satisfy type hints.
|
90
|
-
"""
|
91
|
-
logger.info(f"Loading session from {jsonpath} and navigating to {url} ...")
|
92
|
-
|
93
|
-
if not jsonpath.exists():
|
94
|
-
logger.error(f"Session file not found at {jsonpath}")
|
95
|
-
sys.exit(1)
|
96
|
-
|
97
|
-
# Load the storage state from the JSON file into a dictionary
|
98
|
-
logger.info(f"Reading storage state content from {jsonpath} ...")
|
99
|
-
try:
|
100
|
-
with open(jsonpath, "r", encoding="utf-8") as f:
|
101
|
-
# This dictionary should match the 'StorageState' type expected by Playwright/chatterer
|
102
|
-
storage_state_dict = json.load(f)
|
103
|
-
except json.JSONDecodeError:
|
104
|
-
logger.error(f"Failed to decode JSON from {jsonpath}")
|
105
|
-
sys.exit(1)
|
106
|
-
except Exception as e:
|
107
|
-
logger.error(f"Error reading file {jsonpath}: {e}")
|
108
|
-
sys.exit(1)
|
109
|
-
|
110
|
-
logger.info("Launching browser with loaded session state...")
|
111
|
-
with PlayWrightBot(
|
112
|
-
playwright_launch_options={"headless": False},
|
113
|
-
# Pass the loaded dictionary, which should match the expected 'StorageState' type
|
114
|
-
playwright_persistency_options={"storage_state": storage_state_dict},
|
115
|
-
) as bot:
|
116
|
-
bot.get_page(url)
|
117
|
-
|
118
|
-
logger.info("Press Enter in the console when you're done checking the protected page.")
|
119
|
-
input(" >> Press Enter to exit: ")
|
120
|
-
|
121
|
-
logger.info("Done! Browser is now closed.")
|
122
|
-
|
123
|
-
|
124
|
-
def write_session(url: str, jsonpath: Path) -> None:
|
125
|
-
"""
|
126
|
-
Launches a non-headless browser and navigates to the login_url.
|
127
|
-
The user can manually log in, then press Enter in the console
|
128
|
-
to store the current session state into a JSON file.
|
129
|
-
"""
|
130
|
-
logger.info(f"Launching browser and navigating to {url} ... Please log in manually.")
|
131
|
-
|
132
|
-
# Ensure jsonpath directory exists
|
133
|
-
jsonpath.parent.mkdir(parents=True, exist_ok=True)
|
134
|
-
|
135
|
-
with PlayWrightBot(playwright_launch_options={"headless": False}) as bot:
|
136
|
-
bot.get_page(url)
|
137
|
-
|
138
|
-
logger.info("After completing the login in the browser, press Enter here to save the session.")
|
139
|
-
input(" >> Press Enter when ready: ")
|
140
|
-
|
141
|
-
# get_sync_browser() returns the BrowserContext internally
|
142
|
-
context = bot.get_sync_browser()
|
143
|
-
|
144
|
-
# Save the current session (cookies, localStorage) to a JSON file
|
145
|
-
logger.info(f"Saving storage state to {jsonpath} ...")
|
146
|
-
context.storage_state(path=jsonpath) # Pass Path object directly
|
147
|
-
|
148
|
-
logger.info("Done! Browser is now closed.")
|
149
|
-
|
150
|
-
|
151
|
-
def main() -> None:
|
152
|
-
LoginWithPlaywrightArgs().run()
|
153
|
-
|
154
|
-
|
155
|
-
if __name__ == "__main__":
|
156
|
-
main()
|
@@ -1,10 +0,0 @@
|
|
1
|
-
[console_scripts]
|
2
|
-
anything-to-markdown = chatterer.examples.anything_to_markdown:main
|
3
|
-
get-code-snippets = chatterer.examples.get_code_snippets:main
|
4
|
-
login-with-playwright = chatterer.examples.login_with_playwright:main
|
5
|
-
make-ppt = chatterer.examples.make_ppt:main
|
6
|
-
pdf-to-markdown = chatterer.examples.pdf_to_markdown:main
|
7
|
-
pdf-to-text = chatterer.examples.pdf_to_text:main
|
8
|
-
transcription-api = chatterer.examples.transcription_api:main
|
9
|
-
upstage-parser = chatterer.examples.upstage_parser:main
|
10
|
-
webpage-to-markdown = chatterer.examples.webpage_to_markdown:main
|
File without changes
|
File without changes
|