chatterer 0.1.23__py3-none-any.whl → 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,75 @@
1
+ from spargear import SubcommandArguments, SubcommandSpec
2
+
3
+
4
+ def any2md():
5
+ from .any2md import Arguments
6
+
7
+ return Arguments
8
+
9
+
10
+ def pdf2md():
11
+ from .pdf2md import Arguments
12
+
13
+ return Arguments
14
+
15
+
16
+ def pdf2txt():
17
+ from .pdf2txt import Arguments
18
+
19
+ return Arguments
20
+
21
+
22
+ def ppt():
23
+ from .ppt import Arguments
24
+
25
+ return Arguments
26
+
27
+
28
+ def pw():
29
+ from .pw import Arguments
30
+
31
+ return Arguments
32
+
33
+
34
+ def snippet():
35
+ from .snippet import Arguments
36
+
37
+ return Arguments
38
+
39
+
40
+ def transcribe():
41
+ from .transcribe import Arguments
42
+
43
+ return Arguments
44
+
45
+
46
+ def upstage():
47
+ from .upstage import Arguments
48
+
49
+ return Arguments
50
+
51
+
52
+ def web2md():
53
+ from .web2md import Arguments
54
+
55
+ return Arguments
56
+
57
+
58
+ class Arguments(SubcommandArguments):
59
+ any2md = SubcommandSpec(name="any2md", argument_class_factory=any2md)
60
+ pdf2md = SubcommandSpec(name="pdf2md", argument_class_factory=pdf2md)
61
+ pdf2txt = SubcommandSpec(name="pdf2txt", argument_class_factory=pdf2txt)
62
+ ppt = SubcommandSpec(name="ppt", argument_class_factory=ppt)
63
+ pw = SubcommandSpec(name="pw", argument_class_factory=pw)
64
+ snippet = SubcommandSpec(name="snippet", argument_class_factory=snippet)
65
+ transcribe = SubcommandSpec(name="transcribe", argument_class_factory=transcribe)
66
+ upstage = SubcommandSpec(name="upstage", argument_class_factory=upstage)
67
+ web2md = SubcommandSpec(name="web2md", argument_class_factory=web2md)
68
+
69
+
70
+ def main():
71
+ Arguments().execute()
72
+
73
+
74
+ if __name__ == "__main__":
75
+ main()
@@ -3,7 +3,7 @@ from pathlib import Path
3
3
  from typing import Optional, TypedDict
4
4
 
5
5
  import openai
6
- from spargear import BaseArguments
6
+ from spargear import RunnableArguments
7
7
 
8
8
  from chatterer import anything_to_markdown
9
9
 
@@ -16,10 +16,10 @@ class AnythingToMarkdownReturns(TypedDict):
16
16
  out_text: str
17
17
 
18
18
 
19
- class AnythingToMarkdownArguments(BaseArguments):
19
+ class Arguments(RunnableArguments[AnythingToMarkdownReturns]):
20
20
  """Command line arguments for converting various file types to markdown."""
21
21
 
22
- input: str
22
+ SOURCE: str
23
23
  """Input file to convert to markdown. Can be a file path or a URL."""
24
24
  output: Optional[str] = None
25
25
  """Output path for the converted markdown file. If not provided, the input file's suffix is replaced with .md"""
@@ -43,7 +43,7 @@ class AnythingToMarkdownArguments(BaseArguments):
43
43
  def run(self) -> AnythingToMarkdownReturns:
44
44
  if not self.prevent_save_file:
45
45
  if not self.output:
46
- output = Path(self.input).with_suffix(".md")
46
+ output = Path(self.SOURCE).with_suffix(".md")
47
47
  else:
48
48
  output = Path(self.output)
49
49
  else:
@@ -57,7 +57,7 @@ class AnythingToMarkdownArguments(BaseArguments):
57
57
  llm_model = None
58
58
 
59
59
  text: str = anything_to_markdown(
60
- self.input,
60
+ self.SOURCE,
61
61
  llm_client=llm_client,
62
62
  llm_model=llm_model,
63
63
  style_map=self.style_map,
@@ -67,18 +67,18 @@ class AnythingToMarkdownArguments(BaseArguments):
67
67
  if output:
68
68
  output.parent.mkdir(parents=True, exist_ok=True)
69
69
  output.write_text(text, encoding=self.encoding)
70
- logger.info(f"Converted `{self.input}` to markdown and saved to `{output}`.")
70
+ logger.info(f"Converted `{self.SOURCE}` to markdown and saved to `{output}`.")
71
71
  else:
72
- logger.info(f"Converted `{self.input}` to markdown.")
72
+ logger.info(f"Converted `{self.SOURCE}` to markdown.")
73
73
  return {
74
- "input": self.input,
74
+ "input": self.SOURCE,
75
75
  "output": str(output) if output is not None else None,
76
76
  "out_text": text,
77
77
  }
78
78
 
79
79
 
80
80
  def main() -> None:
81
- AnythingToMarkdownArguments().run()
81
+ Arguments().run()
82
82
 
83
83
 
84
84
  if __name__ == "__main__":
@@ -13,7 +13,7 @@ import time
13
13
  from pathlib import Path
14
14
  from typing import List, Literal, Optional, TypedDict
15
15
 
16
- from spargear import ArgumentSpec, BaseArguments
16
+ from spargear import ArgumentSpec, RunnableArguments
17
17
 
18
18
  from chatterer import Chatterer
19
19
  from chatterer.tools.convert_pdf_to_markdown import PdfToMarkdown
@@ -35,10 +35,10 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(level
35
35
  logger = logging.getLogger(__name__)
36
36
 
37
37
 
38
- class PdfToMarkdownArgs(BaseArguments):
38
+ class Arguments(RunnableArguments[List[ConversionResult]]):
39
39
  """Command-line arguments for PDF to Markdown conversion."""
40
40
 
41
- input: str
41
+ PDF_OR_DIRECTORY_PATH: str
42
42
  """Input PDF file or directory containing PDF files to convert to markdown."""
43
43
 
44
44
  output: Optional[str] = None
@@ -274,7 +274,7 @@ class PdfToMarkdownArgs(BaseArguments):
274
274
 
275
275
  def _prepare_files(self) -> tuple[List[Path], Path, bool]:
276
276
  """Prepare input and output file paths."""
277
- input_path = Path(self.input).resolve()
277
+ input_path = Path(self.PDF_OR_DIRECTORY_PATH).resolve()
278
278
  pdf_files: List[Path] = []
279
279
  is_dir = False
280
280
 
@@ -320,7 +320,7 @@ def main() -> None:
320
320
  """Main entry point for the CLI application."""
321
321
  args = None
322
322
  try:
323
- args = PdfToMarkdownArgs()
323
+ args = Arguments()
324
324
  args.run()
325
325
  except KeyboardInterrupt:
326
326
  logger.info("🛑 Conversion interrupted by user")
@@ -3,15 +3,15 @@ import sys
3
3
  from pathlib import Path
4
4
  from typing import Optional
5
5
 
6
- from spargear import BaseArguments
6
+ from spargear import RunnableArguments
7
7
 
8
8
  from chatterer.tools.convert_to_text import pdf_to_text
9
9
 
10
10
  logger = logging.getLogger(__name__)
11
11
 
12
12
 
13
- class PdfToTextArgs(BaseArguments):
14
- input: Path
13
+ class Arguments(RunnableArguments[None]):
14
+ PDF_PATH: Path
15
15
  """Path to the PDF file to convert to text."""
16
16
  output: Optional[Path]
17
17
  """Path to the output text file. If not provided, defaults to the input file with a .txt suffix."""
@@ -19,7 +19,7 @@ class PdfToTextArgs(BaseArguments):
19
19
  """Comma-separated list of zero-based page indices to extract from the PDF. Supports ranges, e.g., '0,2,4-8'."""
20
20
 
21
21
  def run(self) -> None:
22
- input = self.input.resolve()
22
+ input = self.PDF_PATH.resolve()
23
23
  out = self.output or input.with_suffix(".txt")
24
24
  if not input.is_file():
25
25
  sys.exit(1)
@@ -47,7 +47,7 @@ def parse_page_indices(pages_str: str) -> list[int]:
47
47
 
48
48
 
49
49
  def main() -> None:
50
- PdfToTextArgs().run()
50
+ Arguments().run()
51
51
 
52
52
 
53
53
  if __name__ == "__main__":
@@ -3,7 +3,7 @@ import sys
3
3
  from pathlib import Path
4
4
  from typing import NotRequired, TypedDict
5
5
 
6
- from spargear import BaseArguments
6
+ from spargear import RunnableArguments
7
7
 
8
8
  from chatterer import BaseMessage, Chatterer, HumanMessage, SystemMessage
9
9
 
@@ -155,7 +155,7 @@ Now, generate the final `presentation.html` file using impress.js and the provid
155
155
  # --- Argument Parsing ---
156
156
 
157
157
 
158
- class MakePptArguments(BaseArguments):
158
+ class Arguments(RunnableArguments[None]):
159
159
  """
160
160
  Arguments for the presentation generation process.
161
161
  """
@@ -179,9 +179,7 @@ class MakePptArguments(BaseArguments):
179
179
  """Prompt for organizing slides into a presentation script"""
180
180
 
181
181
  # LLM Settings
182
- provider: str = (
183
- "openai:gpt-4.1" # Example: "openai:gpt-4o", "anthropic:claude-3-haiku-20240307", "google:gemini-1.5-flash"
184
- )
182
+ provider: str = "openai:gpt-4.1" # Example: "openai:gpt-4o", "anthropic:claude-3-haiku-20240307", "google:gemini-1.5-flash"
185
183
  """Name of the language model to use (provider:model_name)"""
186
184
 
187
185
  # Other settings
@@ -293,7 +291,7 @@ class GeneratedSlide(TypedDict):
293
291
  script: NotRequired[str]
294
292
 
295
293
 
296
- def run_presentation_agent(args: MakePptArguments):
294
+ def run_presentation_agent(args: Arguments):
297
295
  """Executes the presentation generation agent loop."""
298
296
 
299
297
  if args.verbose:
@@ -481,7 +479,7 @@ Remember to follow all instructions in the role prompt, especially regarding HTM
481
479
 
482
480
 
483
481
  def main() -> None:
484
- MakePptArguments().run()
482
+ Arguments().run()
485
483
 
486
484
 
487
485
  if __name__ == "__main__":
@@ -0,0 +1,137 @@
1
+ import json
2
+ import logging
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ from spargear import BaseArguments, RunnableArguments, SubcommandSpec
7
+
8
+ from chatterer import PlayWrightBot
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ # Define the default path location relative to this script file
14
+ DEFAULT_JSON_PATH = Path(__file__).resolve().parent / "session_state.json"
15
+
16
+
17
+ class ReadArgs(RunnableArguments[None]):
18
+ """Arguments for the 'read' subcommand."""
19
+
20
+ URL: str
21
+ """URL (potentially protected) to navigate to using the saved session."""
22
+ jsonpath: Path = DEFAULT_JSON_PATH
23
+ """Path to the session state JSON file to load."""
24
+
25
+ def run(self) -> None:
26
+ """
27
+ Loads the session state from the specified JSON file, then navigates
28
+ to a protected_url that normally requires login. If the stored session
29
+ is valid, it should open without re-entering credentials.
30
+
31
+ Correction: Loads the JSON content into a dict first to satisfy type hints.
32
+ """
33
+ url = self.URL
34
+ jsonpath = self.jsonpath
35
+ logger.info(f"Loading session from {jsonpath} and navigating to {url} ...")
36
+
37
+ if not jsonpath.exists():
38
+ logger.error(f"Session file not found at {jsonpath}")
39
+ sys.exit(1)
40
+
41
+ # Load the storage state from the JSON file into a dictionary
42
+ logger.info(f"Reading storage state content from {jsonpath} ...")
43
+ try:
44
+ with open(jsonpath, "r", encoding="utf-8") as f:
45
+ # This dictionary should match the 'StorageState' type expected by Playwright/chatterer
46
+ storage_state_dict = json.load(f)
47
+ except json.JSONDecodeError:
48
+ logger.error(f"Failed to decode JSON from {jsonpath}")
49
+ sys.exit(1)
50
+ except Exception as e:
51
+ logger.error(f"Error reading file {jsonpath}: {e}")
52
+ sys.exit(1)
53
+
54
+ logger.info("Launching browser with loaded session state...")
55
+ with PlayWrightBot(
56
+ playwright_launch_options={"headless": False},
57
+ # Pass the loaded dictionary, which should match the expected 'StorageState' type
58
+ playwright_persistency_options={"storage_state": storage_state_dict},
59
+ ) as bot:
60
+ bot.get_page(url)
61
+
62
+ logger.info("Press Enter in the console when you're done checking the protected page.")
63
+ input(" >> Press Enter to exit: ")
64
+
65
+ logger.info("Done! Browser is now closed.")
66
+
67
+
68
+ class WriteArgs(RunnableArguments[None]):
69
+ """Arguments for the 'write' subcommand."""
70
+
71
+ URL: str
72
+ """URL to navigate to for manual login."""
73
+ jsonpath: Path = DEFAULT_JSON_PATH
74
+ """Path to save the session state JSON file."""
75
+
76
+ def run(self) -> None:
77
+ """
78
+ Launches a non-headless browser and navigates to the login_url.
79
+ The user can manually log in, then press Enter in the console
80
+ to store the current session state into a JSON file.
81
+ """
82
+ url = self.URL
83
+ jsonpath = self.jsonpath
84
+ logger.info(f"Launching browser and navigating to {url} ... Please log in manually.")
85
+
86
+ # Ensure jsonpath directory exists
87
+ jsonpath.parent.mkdir(parents=True, exist_ok=True)
88
+
89
+ with PlayWrightBot(playwright_launch_options={"headless": False}) as bot:
90
+ bot.get_page(url)
91
+
92
+ logger.info("After completing the login in the browser, press Enter here to save the session.")
93
+ input(" >> Press Enter when ready: ")
94
+
95
+ # get_sync_browser() returns the BrowserContext internally
96
+ context = bot.get_sync_browser()
97
+
98
+ # Save the current session (cookies, localStorage) to a JSON file
99
+ logger.info(f"Saving storage state to {jsonpath} ...")
100
+ context.storage_state(path=jsonpath) # Pass Path object directly
101
+
102
+ logger.info("Done! Browser is now closed.")
103
+
104
+
105
+ class Arguments(BaseArguments):
106
+ """
107
+ A simple CLI tool for saving and using Playwright sessions via storage_state.
108
+ Uses spargear for declarative argument parsing.
109
+ """
110
+
111
+ read: SubcommandSpec[ReadArgs] = SubcommandSpec(
112
+ name="read",
113
+ argument_class=ReadArgs,
114
+ help="Use a saved session to view a protected page.",
115
+ description="Loads session state from the specified JSON file and navigates to the URL.",
116
+ )
117
+ write: SubcommandSpec[WriteArgs] = SubcommandSpec(
118
+ name="write",
119
+ argument_class=WriteArgs,
120
+ help="Save a new session by manually logging in.",
121
+ description="Launches a browser to the specified URL. Log in manually, then press Enter to save session state.",
122
+ )
123
+
124
+ def run(self) -> None:
125
+ """Parses arguments using spargear and executes the corresponding command."""
126
+ if isinstance(last_subcommand := self.last_command, RunnableArguments):
127
+ last_subcommand.run()
128
+ else:
129
+ self.get_parser().print_help()
130
+
131
+
132
+ def main() -> None:
133
+ Arguments().run()
134
+
135
+
136
+ if __name__ == "__main__":
137
+ main()
@@ -2,15 +2,15 @@ import logging
2
2
  from pathlib import Path
3
3
  from typing import Optional
4
4
 
5
- from spargear import BaseArguments
5
+ from spargear import RunnableArguments
6
6
 
7
7
  from chatterer import CodeSnippets
8
8
 
9
9
  logger = logging.getLogger(__name__)
10
10
 
11
11
 
12
- class GetCodeSnippetsArgs(BaseArguments):
13
- input: str
12
+ class Arguments(RunnableArguments[CodeSnippets]):
13
+ PATH_OR_PACKAGE_NAME: str
14
14
  """Path to the package or file from which to extract code snippets."""
15
15
  output: Optional[str] = None
16
16
  """Output path for the extracted code snippets. If not provided, defaults to a file with the same name as the input."""
@@ -33,7 +33,7 @@ class GetCodeSnippetsArgs(BaseArguments):
33
33
  output = None
34
34
 
35
35
  cs = CodeSnippets.from_path_or_pkgname(
36
- path_or_pkgname=self.input,
36
+ path_or_pkgname=self.PATH_OR_PACKAGE_NAME,
37
37
  ban_file_patterns=self.ban_file_patterns,
38
38
  glob_patterns=self.glob_patterns,
39
39
  case_sensitive=self.case_sensitive,
@@ -41,14 +41,14 @@ class GetCodeSnippetsArgs(BaseArguments):
41
41
  if output is not None:
42
42
  output.parent.mkdir(parents=True, exist_ok=True)
43
43
  output.write_text(cs.snippets_text, encoding="utf-8")
44
- logger.info(f"Extracted code snippets from `{self.input}` and saved to `{output}`.")
44
+ logger.info(f"Extracted code snippets from `{self.PATH_OR_PACKAGE_NAME}` and saved to `{output}`.")
45
45
  else:
46
- logger.info(f"Extracted code snippets from `{self.input}`.")
46
+ logger.info(f"Extracted code snippets from `{self.PATH_OR_PACKAGE_NAME}`.")
47
47
  return cs
48
48
 
49
49
 
50
50
  def main() -> None:
51
- GetCodeSnippetsArgs().run()
51
+ Arguments().run()
52
52
 
53
53
 
54
54
  if __name__ == "__main__":
@@ -6,14 +6,14 @@ from typing import Optional, cast
6
6
 
7
7
  from openai import OpenAI
8
8
  from pydub import AudioSegment
9
- from spargear import BaseArguments
9
+ from spargear import RunnableArguments
10
10
 
11
11
  # Maximum chunk length in seconds
12
12
  MAX_CHUNK_DURATION = 600
13
13
 
14
14
 
15
- class TranscriptionApiArguments(BaseArguments):
16
- input: Path
15
+ class Arguments(RunnableArguments[None]):
16
+ AUDIO_PATH: Path
17
17
  """The audio file to transcribe."""
18
18
  output: Optional[Path] = None
19
19
  """Path to save the transcription output."""
@@ -31,7 +31,7 @@ class TranscriptionApiArguments(BaseArguments):
31
31
 
32
32
  client = OpenAI(api_key=self.api_key, base_url=self.base_url)
33
33
 
34
- audio = load_audio_segment(self.input)
34
+ audio = load_audio_segment(self.AUDIO_PATH)
35
35
 
36
36
  segments = split_audio(audio, MAX_CHUNK_DURATION)
37
37
  print(f"[i] Audio duration: {len(audio) / 1000:.1f}s; splitting into {len(segments)} segment(s)")
@@ -42,7 +42,7 @@ class TranscriptionApiArguments(BaseArguments):
42
42
  transcripts.append(transcribe_segment(seg, client, model, self.prompt))
43
43
 
44
44
  full_transcript = "\n\n".join(transcripts)
45
- output_path: Path = self.output or self.input.with_suffix(".txt")
45
+ output_path: Path = self.output or self.AUDIO_PATH.with_suffix(".txt")
46
46
  output_path.write_text(full_transcript, encoding="utf-8")
47
47
  print(f"[✓] Transcription saved to: {output_path}")
48
48
 
@@ -105,7 +105,7 @@ def transcribe_segment(segment: AudioSegment, client: OpenAI, model: str, prompt
105
105
 
106
106
 
107
107
  def main() -> None:
108
- TranscriptionApiArguments().run()
108
+ Arguments().run()
109
109
 
110
110
 
111
111
  if __name__ == "__main__":
@@ -19,8 +19,8 @@ from chatterer.tools.upstage_document_parser import (
19
19
  logger = logging.getLogger(__name__)
20
20
 
21
21
 
22
- class UpstageParserArguments(BaseArguments):
23
- input: Path
22
+ class Arguments(BaseArguments):
23
+ INPUT_PATH: Path
24
24
  """Input file to parse. Can be a PDF, image, or other supported formats."""
25
25
  output: Optional[Path] = None
26
26
  """Output file path for the parsed content. Defaults to input file with .md suffix if not provided."""
@@ -52,25 +52,25 @@ class UpstageParserArguments(BaseArguments):
52
52
  )
53
53
 
54
54
  def run(self) -> None:
55
- input = UpstageParserArguments.input.resolve()
56
- out = UpstageParserArguments.output or input.with_suffix(".md")
55
+ input = self.INPUT_PATH.resolve()
56
+ out = self.output or input.with_suffix(".md")
57
57
 
58
58
  parser = UpstageDocumentParseParser(
59
- api_key=UpstageParserArguments.api_key,
60
- base_url=UpstageParserArguments.base_url,
61
- model=UpstageParserArguments.model,
62
- split=UpstageParserArguments.split,
63
- ocr=UpstageParserArguments.ocr,
64
- output_format=UpstageParserArguments.output_format,
65
- coordinates=UpstageParserArguments.coordinates,
66
- base64_encoding=UpstageParserArguments.base64_encoding,
67
- image_description_instruction=UpstageParserArguments.image_description_instruction,
68
- image_dir=UpstageParserArguments.image_dir,
69
- chatterer=UpstageParserArguments.chatterer.value,
59
+ api_key=self.api_key,
60
+ base_url=self.base_url,
61
+ model=self.model,
62
+ split=self.split,
63
+ ocr=self.ocr,
64
+ output_format=self.output_format,
65
+ coordinates=self.coordinates,
66
+ base64_encoding=self.base64_encoding,
67
+ image_description_instruction=self.image_description_instruction,
68
+ image_dir=self.image_dir,
69
+ chatterer=self.chatterer.value,
70
70
  )
71
71
  docs = parser.parse(Blob.from_path(input)) # pyright: ignore[reportUnknownMemberType]
72
72
 
73
- if UpstageParserArguments.image_dir:
73
+ if self.image_dir:
74
74
  for path, image in parser.image_data.items():
75
75
  (path := Path(path)).parent.mkdir(parents=True, exist_ok=True)
76
76
  path.write_bytes(image)
@@ -82,7 +82,7 @@ class UpstageParserArguments(BaseArguments):
82
82
 
83
83
 
84
84
  def main() -> None:
85
- UpstageParserArguments().run()
85
+ Arguments().run()
86
86
 
87
87
 
88
88
  if __name__ == "__main__":
@@ -1,13 +1,13 @@
1
1
  from pathlib import Path
2
2
  from typing import Literal
3
3
 
4
- from spargear import ArgumentSpec, BaseArguments
4
+ from spargear import ArgumentSpec, RunnableArguments
5
5
 
6
6
  from chatterer import Chatterer, MarkdownLink, PlayWrightBot
7
7
 
8
8
 
9
- class WebpageToMarkdownArgs(BaseArguments):
10
- url: str
9
+ class Arguments(RunnableArguments[None]):
10
+ URL: str
11
11
  """The URL to crawl."""
12
12
  output: str = Path(__file__).with_suffix(".md").as_posix()
13
13
  """The output file path for the markdown file."""
@@ -21,7 +21,7 @@ class WebpageToMarkdownArgs(BaseArguments):
21
21
 
22
22
  def run(self) -> None:
23
23
  chatterer = self.chatterer.value
24
- url: str = self.url.strip()
24
+ url: str = self.URL.strip()
25
25
  output: Path = Path(self.output).resolve()
26
26
  with PlayWrightBot(chatterer=chatterer, engine=self.engine) as bot:
27
27
  md = bot.url_to_md(url)
@@ -32,15 +32,13 @@ class WebpageToMarkdownArgs(BaseArguments):
32
32
  links = MarkdownLink.from_markdown(md, referer_url=url)
33
33
  for link in links:
34
34
  if link.type == "link":
35
- print(
36
- f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})"
37
- )
35
+ print(f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})")
38
36
  elif link.type == "image":
39
37
  print(f"- ![{truncate_string(link.url)}] ({truncate_string(link.inline_text)})")
40
38
 
41
39
  async def arun(self) -> None:
42
40
  chatterer = self.chatterer.value
43
- url: str = self.url.strip()
41
+ url: str = self.URL.strip()
44
42
  output: Path = Path(self.output).resolve()
45
43
  async with PlayWrightBot(chatterer=chatterer, engine=self.engine) as bot:
46
44
  md = await bot.aurl_to_md(url)
@@ -51,9 +49,7 @@ class WebpageToMarkdownArgs(BaseArguments):
51
49
  links = MarkdownLink.from_markdown(md, referer_url=url)
52
50
  for link in links:
53
51
  if link.type == "link":
54
- print(
55
- f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})"
56
- )
52
+ print(f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})")
57
53
  elif link.type == "image":
58
54
  print(f"- ![{truncate_string(link.url)}] ({truncate_string(link.inline_text)})")
59
55
 
@@ -63,7 +59,7 @@ def truncate_string(s: str) -> str:
63
59
 
64
60
 
65
61
  def main() -> None:
66
- WebpageToMarkdownArgs().run()
62
+ Arguments().run()
67
63
 
68
64
 
69
65
  if __name__ == "__main__":
@@ -781,143 +781,143 @@ class AoTPipeline:
781
781
  # 4.6) Build or export a reasoning graph
782
782
  # ---------------------------------------------------------------------------------
783
783
 
784
- def get_reasoning_graph(self, global_id_prefix: str = "AoT"):
785
- """
786
- Constructs a Graph object (from hypothetical `neo4j_extension`)
787
- capturing the pipeline steps, including devil's advocate steps.
788
- """
789
- from neo4j_extension import Graph, Node, Relationship
790
-
791
- g = Graph()
792
- step_nodes: dict[int, Node] = {}
793
- subq_nodes: dict[str, Node] = {}
794
-
795
- # Step A: Create nodes for each pipeline step
796
- for i, record in enumerate(self.steps_history):
797
- # We'll skip nested Decomposition steps only if we want to flatten them.
798
- # But let's keep them for clarity.
799
- step_node = Node(
800
- properties=record.as_properties(), labels={record.step_name}, globalId=f"{global_id_prefix}_step_{i}"
801
- )
802
- g.add_node(step_node)
803
- step_nodes[i] = step_node
804
-
805
- # Step B: Collect sub-questions from each DECOMPOSITION or DEVILS_ADVOCATE
806
- all_sub_questions: dict[str, tuple[int, int, SubQuestionNode]] = {}
807
- for i, record in enumerate(self.steps_history):
808
- if record.sub_questions:
809
- for sq_idx, sq in enumerate(record.sub_questions):
810
- sq_id = f"{global_id_prefix}_decomp_{i}_sub_{sq_idx}"
811
- all_sub_questions[sq_id] = (i, sq_idx, sq)
812
-
813
- for sq_id, (i, sq_idx, sq) in all_sub_questions.items():
814
- n_subq = Node(
815
- properties={
816
- "question": sq.question,
817
- "answer": sq.answer or "",
818
- },
819
- labels={"SubQuestion"},
820
- globalId=sq_id,
821
- )
822
- g.add_node(n_subq)
823
- subq_nodes[sq_id] = n_subq
824
-
825
- # Step C: Add relationships. We do a simple approach:
826
- # - If StepRecord is DECOMPOSITION or DEVILS_ADVOCATE with sub_questions, link them via SPLIT_INTO.
827
- for i, record in enumerate(self.steps_history):
828
- if record.sub_questions:
829
- start_node = step_nodes[i]
830
- for sq_idx, sq in enumerate(record.sub_questions):
831
- sq_id = f"{global_id_prefix}_decomp_{i}_sub_{sq_idx}"
832
- end_node = subq_nodes[sq_id]
833
- rel = Relationship(
834
- properties={},
835
- rel_type=StepRelation.SPLIT_INTO,
836
- start_node=start_node,
837
- end_node=end_node,
838
- globalId=f"{global_id_prefix}_split_{i}_{sq_idx}",
839
- )
840
- g.add_relationship(rel)
841
- # Also add sub-question dependencies
842
- for dep in sq.depend:
843
- # The same record i -> sub-question subq
844
- if 0 <= dep < len(record.sub_questions):
845
- dep_id = f"{global_id_prefix}_decomp_{i}_sub_{dep}"
846
- if dep_id in subq_nodes:
847
- dep_node = subq_nodes[dep_id]
848
- rel_dep = Relationship(
849
- properties={},
850
- rel_type=StepRelation.DEPEND_ON,
851
- start_node=end_node,
852
- end_node=dep_node,
853
- globalId=f"{global_id_prefix}_dep_{i}_q_{sq_idx}_on_{dep}",
854
- )
855
- g.add_relationship(rel_dep)
856
-
857
- # Step D: We add PRECEDES relationships in a linear chain for the pipeline steps
858
- for i in range(len(self.steps_history) - 1):
859
- start_node = step_nodes[i]
860
- end_node = step_nodes[i + 1]
861
- rel = Relationship(
862
- properties={},
863
- rel_type=StepRelation.PRECEDES,
864
- start_node=start_node,
865
- end_node=end_node,
866
- globalId=f"{global_id_prefix}_precede_{i}_to_{i + 1}",
867
- )
868
- g.add_relationship(rel)
869
-
870
- # Step E: CRITIQUES, SELECTS, RESULT_OF can be similarly added:
871
- # We'll do a simple pass:
872
- # If step_name ends with CRITIQUE => it critiques the step before it
873
- for i, record in enumerate(self.steps_history):
874
- if "CRITIQUE" in record.step_name:
875
- # Let it point to the preceding step
876
- if i > 0:
877
- start_node = step_nodes[i]
878
- end_node = step_nodes[i - 1]
879
- rel = Relationship(
880
- properties={},
881
- rel_type=StepRelation.CRITIQUES,
882
- start_node=start_node,
883
- end_node=end_node,
884
- globalId=f"{global_id_prefix}_crit_{i}",
885
- )
886
- g.add_relationship(rel)
887
-
888
- # If there's a BEST_APPROACH_DECISION step, link it to the step it uses
889
- best_decision_idx = None
890
- used_step_idx = None
891
- for i, record in enumerate(self.steps_history):
892
- if record.step_name == StepName.BEST_APPROACH_DECISION and record.used:
893
- best_decision_idx = i
894
- # find the step with that name
895
- used_step_idx = next((j for j in step_nodes if self.steps_history[j].step_name == record.used), None)
896
- if used_step_idx is not None:
897
- rel = Relationship(
898
- properties={},
899
- rel_type=StepRelation.SELECTS,
900
- start_node=step_nodes[i],
901
- end_node=step_nodes[used_step_idx],
902
- globalId=f"{global_id_prefix}_select_{i}_use_{used_step_idx}",
903
- )
904
- g.add_relationship(rel)
905
-
906
- # And link the final answer to the best approach
907
- final_answer_idx = next(
908
- (i for i, r in enumerate(self.steps_history) if r.step_name == StepName.FINAL_ANSWER), None
909
- )
910
- if final_answer_idx is not None and best_decision_idx is not None:
911
- rel = Relationship(
912
- properties={},
913
- rel_type=StepRelation.RESULT_OF,
914
- start_node=step_nodes[final_answer_idx],
915
- end_node=step_nodes[best_decision_idx],
916
- globalId=f"{global_id_prefix}_final_{final_answer_idx}_resultof_{best_decision_idx}",
917
- )
918
- g.add_relationship(rel)
919
-
920
- return g
784
+ # def get_reasoning_graph(self, global_id_prefix: str = "AoT"):
785
+ # """
786
+ # Constructs a Graph object (from hypothetical `neo4j_extension`)
787
+ # capturing the pipeline steps, including devil's advocate steps.
788
+ # """
789
+ # from neo4j_extension import Graph, Node, Relationship
790
+
791
+ # g = Graph()
792
+ # step_nodes: dict[int, Node] = {}
793
+ # subq_nodes: dict[str, Node] = {}
794
+
795
+ # # Step A: Create nodes for each pipeline step
796
+ # for i, record in enumerate(self.steps_history):
797
+ # # We'll skip nested Decomposition steps only if we want to flatten them.
798
+ # # But let's keep them for clarity.
799
+ # step_node = Node(
800
+ # properties=record.as_properties(), labels={record.step_name}, globalId=f"{global_id_prefix}_step_{i}"
801
+ # )
802
+ # g.add_node(step_node)
803
+ # step_nodes[i] = step_node
804
+
805
+ # # Step B: Collect sub-questions from each DECOMPOSITION or DEVILS_ADVOCATE
806
+ # all_sub_questions: dict[str, tuple[int, int, SubQuestionNode]] = {}
807
+ # for i, record in enumerate(self.steps_history):
808
+ # if record.sub_questions:
809
+ # for sq_idx, sq in enumerate(record.sub_questions):
810
+ # sq_id = f"{global_id_prefix}_decomp_{i}_sub_{sq_idx}"
811
+ # all_sub_questions[sq_id] = (i, sq_idx, sq)
812
+
813
+ # for sq_id, (i, sq_idx, sq) in all_sub_questions.items():
814
+ # n_subq = Node(
815
+ # properties={
816
+ # "question": sq.question,
817
+ # "answer": sq.answer or "",
818
+ # },
819
+ # labels={"SubQuestion"},
820
+ # globalId=sq_id,
821
+ # )
822
+ # g.add_node(n_subq)
823
+ # subq_nodes[sq_id] = n_subq
824
+
825
+ # # Step C: Add relationships. We do a simple approach:
826
+ # # - If StepRecord is DECOMPOSITION or DEVILS_ADVOCATE with sub_questions, link them via SPLIT_INTO.
827
+ # for i, record in enumerate(self.steps_history):
828
+ # if record.sub_questions:
829
+ # start_node = step_nodes[i]
830
+ # for sq_idx, sq in enumerate(record.sub_questions):
831
+ # sq_id = f"{global_id_prefix}_decomp_{i}_sub_{sq_idx}"
832
+ # end_node = subq_nodes[sq_id]
833
+ # rel = Relationship(
834
+ # properties={},
835
+ # rel_type=StepRelation.SPLIT_INTO,
836
+ # start_node=start_node,
837
+ # end_node=end_node,
838
+ # globalId=f"{global_id_prefix}_split_{i}_{sq_idx}",
839
+ # )
840
+ # g.add_relationship(rel)
841
+ # # Also add sub-question dependencies
842
+ # for dep in sq.depend:
843
+ # # The same record i -> sub-question subq
844
+ # if 0 <= dep < len(record.sub_questions):
845
+ # dep_id = f"{global_id_prefix}_decomp_{i}_sub_{dep}"
846
+ # if dep_id in subq_nodes:
847
+ # dep_node = subq_nodes[dep_id]
848
+ # rel_dep = Relationship(
849
+ # properties={},
850
+ # rel_type=StepRelation.DEPEND_ON,
851
+ # start_node=end_node,
852
+ # end_node=dep_node,
853
+ # globalId=f"{global_id_prefix}_dep_{i}_q_{sq_idx}_on_{dep}",
854
+ # )
855
+ # g.add_relationship(rel_dep)
856
+
857
+ # # Step D: We add PRECEDES relationships in a linear chain for the pipeline steps
858
+ # for i in range(len(self.steps_history) - 1):
859
+ # start_node = step_nodes[i]
860
+ # end_node = step_nodes[i + 1]
861
+ # rel = Relationship(
862
+ # properties={},
863
+ # rel_type=StepRelation.PRECEDES,
864
+ # start_node=start_node,
865
+ # end_node=end_node,
866
+ # globalId=f"{global_id_prefix}_precede_{i}_to_{i + 1}",
867
+ # )
868
+ # g.add_relationship(rel)
869
+
870
+ # # Step E: CRITIQUES, SELECTS, RESULT_OF can be similarly added:
871
+ # # We'll do a simple pass:
872
+ # # If step_name ends with CRITIQUE => it critiques the step before it
873
+ # for i, record in enumerate(self.steps_history):
874
+ # if "CRITIQUE" in record.step_name:
875
+ # # Let it point to the preceding step
876
+ # if i > 0:
877
+ # start_node = step_nodes[i]
878
+ # end_node = step_nodes[i - 1]
879
+ # rel = Relationship(
880
+ # properties={},
881
+ # rel_type=StepRelation.CRITIQUES,
882
+ # start_node=start_node,
883
+ # end_node=end_node,
884
+ # globalId=f"{global_id_prefix}_crit_{i}",
885
+ # )
886
+ # g.add_relationship(rel)
887
+
888
+ # # If there's a BEST_APPROACH_DECISION step, link it to the step it uses
889
+ # best_decision_idx = None
890
+ # used_step_idx = None
891
+ # for i, record in enumerate(self.steps_history):
892
+ # if record.step_name == StepName.BEST_APPROACH_DECISION and record.used:
893
+ # best_decision_idx = i
894
+ # # find the step with that name
895
+ # used_step_idx = next((j for j in step_nodes if self.steps_history[j].step_name == record.used), None)
896
+ # if used_step_idx is not None:
897
+ # rel = Relationship(
898
+ # properties={},
899
+ # rel_type=StepRelation.SELECTS,
900
+ # start_node=step_nodes[i],
901
+ # end_node=step_nodes[used_step_idx],
902
+ # globalId=f"{global_id_prefix}_select_{i}_use_{used_step_idx}",
903
+ # )
904
+ # g.add_relationship(rel)
905
+
906
+ # # And link the final answer to the best approach
907
+ # final_answer_idx = next(
908
+ # (i for i, r in enumerate(self.steps_history) if r.step_name == StepName.FINAL_ANSWER), None
909
+ # )
910
+ # if final_answer_idx is not None and best_decision_idx is not None:
911
+ # rel = Relationship(
912
+ # properties={},
913
+ # rel_type=StepRelation.RESULT_OF,
914
+ # start_node=step_nodes[final_answer_idx],
915
+ # end_node=step_nodes[best_decision_idx],
916
+ # globalId=f"{global_id_prefix}_final_{final_answer_idx}_resultof_{best_decision_idx}",
917
+ # )
918
+ # g.add_relationship(rel)
919
+
920
+ # return g
921
921
 
922
922
 
923
923
  # ---------------------------------------------------------------------------------
@@ -944,32 +944,32 @@ class AoTStrategy(BaseStrategy):
944
944
  msgs = self.pipeline.chatterer.client._convert_input(messages).to_messages() # type: ignore
945
945
  return self.pipeline.run_pipeline(msgs)
946
946
 
947
- def get_reasoning_graph(self):
948
- """Return the AoT reasoning graph from the pipeline’s steps history."""
949
- return self.pipeline.get_reasoning_graph(global_id_prefix="AoT")
947
+ # def get_reasoning_graph(self):
948
+ # """Return the AoT reasoning graph from the pipeline’s steps history."""
949
+ # return self.pipeline.get_reasoning_graph(global_id_prefix="AoT")
950
950
 
951
951
 
952
952
  # ---------------------------------------------------------------------------------
953
953
  # Example usage (pseudo-code)
954
954
  # ---------------------------------------------------------------------------------
955
- if __name__ == "__main__":
956
- from neo4j_extension import Neo4jConnection # or your actual DB connector
957
-
958
- # You would create a Chatterer with your chosen LLM backend (OpenAI, etc.)
959
- chatterer = Chatterer.openai() # pseudo-code
960
- pipeline = AoTPipeline(chatterer=chatterer, max_depth=3)
961
- strategy = AoTStrategy(pipeline=pipeline)
962
-
963
- question = "Solve 5.9 = 5.11 - x. Also compare 9.11 and 9.9."
964
- answer = strategy.invoke(question)
965
- print("Final Answer:", answer)
966
-
967
- # Build the reasoning graph
968
- graph = strategy.get_reasoning_graph()
969
- print(f"\nGraph has {len(graph.nodes)} nodes and {len(graph.relationships)} relationships.")
970
-
971
- # Optionally store in Neo4j
972
- with Neo4jConnection() as conn:
973
- conn.clear_all()
974
- conn.upsert_graph(graph)
975
- print("Graph stored in Neo4j.")
955
+ # if __name__ == "__main__":
956
+ # from neo4j_extension import Neo4jConnection # or your actual DB connector
957
+
958
+ # # You would create a Chatterer with your chosen LLM backend (OpenAI, etc.)
959
+ # chatterer = Chatterer.openai() # pseudo-code
960
+ # pipeline = AoTPipeline(chatterer=chatterer, max_depth=3)
961
+ # strategy = AoTStrategy(pipeline=pipeline)
962
+
963
+ # question = "Solve 5.9 = 5.11 - x. Also compare 9.11 and 9.9."
964
+ # answer = strategy.invoke(question)
965
+ # print("Final Answer:", answer)
966
+
967
+ # # Build the reasoning graph
968
+ # graph = strategy.get_reasoning_graph()
969
+ # print(f"\nGraph has {len(graph.nodes)} nodes and {len(graph.relationships)} relationships.")
970
+
971
+ # # Optionally store in Neo4j
972
+ # with Neo4jConnection() as conn:
973
+ # conn.clear_all()
974
+ # conn.upsert_graph(graph)
975
+ # print("Graph stored in Neo4j.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatterer
3
- Version: 0.1.23
3
+ Version: 0.1.24
4
4
  Summary: The highest-level interface for various LLM APIs.
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
@@ -11,10 +11,9 @@ Requires-Dist: pillow>=11.1.0
11
11
  Requires-Dist: regex>=2024.11.6
12
12
  Requires-Dist: rich>=13.9.4
13
13
  Requires-Dist: colorama>=0.4.6
14
- Requires-Dist: spargear>=0.2.0
14
+ Requires-Dist: spargear>=0.2.7
15
15
  Provides-Extra: dev
16
- Requires-Dist: neo4j-extension>=0.1.14; extra == "dev"
17
- Requires-Dist: ipykernel>=6.29.5; extra == "dev"
16
+ Requires-Dist: pyright>=1.1.401; extra == "dev"
18
17
  Provides-Extra: conversion
19
18
  Requires-Dist: youtube-transcript-api>=1.0.3; extra == "conversion"
20
19
  Requires-Dist: chatterer[browser]; extra == "conversion"
@@ -34,12 +33,10 @@ Requires-Dist: mistune>=3.1.3; extra == "markdown"
34
33
  Provides-Extra: video
35
34
  Requires-Dist: pydub>=0.25.1; extra == "video"
36
35
  Provides-Extra: langchain
37
- Requires-Dist: chatterer[langchain-providers]; extra == "langchain"
36
+ Requires-Dist: langchain-anthropic>=0.3.10; extra == "langchain"
37
+ Requires-Dist: langchain-google-genai>=2.1.1; extra == "langchain"
38
+ Requires-Dist: langchain-ollama>=0.3.0; extra == "langchain"
38
39
  Requires-Dist: langchain-experimental>=0.3.4; extra == "langchain"
39
- Provides-Extra: langchain-providers
40
- Requires-Dist: langchain-anthropic>=0.3.10; extra == "langchain-providers"
41
- Requires-Dist: langchain-google-genai>=2.1.1; extra == "langchain-providers"
42
- Requires-Dist: langchain-ollama>=0.3.0; extra == "langchain-providers"
43
40
  Provides-Extra: all
44
41
  Requires-Dist: chatterer[dev]; extra == "all"
45
42
  Requires-Dist: chatterer[langchain]; extra == "all"
@@ -6,17 +6,18 @@ chatterer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  chatterer/common_types/__init__.py,sha256=jfS6m5UANSvGjzQ_nzYDpryn5uZqNb06-4xCsQ2C_lw,376
7
7
  chatterer/common_types/io.py,sha256=fetiyi1suZ3NF2mj5k5KDLJLGKS1n4J-5UmH7JN36g8,817
8
8
  chatterer/examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- chatterer/examples/anything_to_markdown.py,sha256=4O9ze7AIHcwEzvVmm5JMMKo_rVSFwhPL8MVHtfMLJ5Y,2734
10
- chatterer/examples/get_code_snippets.py,sha256=pz05JjhKaWAknVKlk1ftEEzpSG4-sqD9oa_gyIQoCAs,1911
11
- chatterer/examples/login_with_playwright.py,sha256=EhvJLaH5TD7bmDi12uP8YLd0fRhdjR-oyIkBHLi1Jjs,5988
12
- chatterer/examples/make_ppt.py,sha256=vsT_iL_jS2ami5VYrReLMQcD576FfZUH7913F7_As0A,23278
13
- chatterer/examples/pdf_to_markdown.py,sha256=mur63PxI0uwl90Mh49VXPuO0YSwyEfs0-MwxJWKWXec,13577
14
- chatterer/examples/pdf_to_text.py,sha256=DznTyhu1REv8Wp4RimQWVgEU5j0_BmlwjfJYJvx3dbI,1590
15
- chatterer/examples/transcription_api.py,sha256=WUs12qHH4616eLMQDHOiyVGxaXstTpgeE47djYyli6c,3897
16
- chatterer/examples/upstage_parser.py,sha256=TrfeSIiF0xklhFCknop22TIOVibI4CJ_UKj5-lD8c8E,3487
17
- chatterer/examples/webpage_to_markdown.py,sha256=DnZfQ-trXBiOiszA2tMlgadgKH-ObTi6l4gGloT-cQw,2846
9
+ chatterer/examples/__main__.py,sha256=W-Zo7z9RyA0PrY-tPDqf9BSkOqPpaIXROcHCXCwNXc4,1566
10
+ chatterer/examples/any2md.py,sha256=4AtdlwP1jxSsvh31yWmB5HP2Wmof4Fh0W_F3im2yJ_M,2739
11
+ chatterer/examples/pdf2md.py,sha256=viru-9vrUdiYMXRpQVpbYiZy6bjkkF-HTXSvy37ICUY,13625
12
+ chatterer/examples/pdf2txt.py,sha256=ULfA2cr-lrfLVqpMlSa08qo5AXVXiyL8N2-KiD0Orhc,1602
13
+ chatterer/examples/ppt.py,sha256=7AhS2hZtmMHOJQt1j5DQDDgrMwM-GX1HjPrmKDV2Bgs,23253
14
+ chatterer/examples/pw.py,sha256=FGmCQg5XFyVAczBF8mQcobJcvITKEOtbrXm4pyKvbAw,5138
15
+ chatterer/examples/snippet.py,sha256=JvR_xBV8skePCtIczz73EdjmiHzj_A-5HzS53j0bLI4,1973
16
+ chatterer/examples/transcribe.py,sha256=fBFuo442VEM7NbF9xN3ub3nAnYnQojrseN_kI049fsM,3894
17
+ chatterer/examples/upstage.py,sha256=lK2OOY6U4GGnDBbPHKaqwlh_0Vu-0RMb0M01M8dngRs,3219
18
+ chatterer/examples/web2md.py,sha256=zfemaE3KwfU8LHvWzJHX-knASpikBUUNzv6jTmfac1E,2740
18
19
  chatterer/strategies/__init__.py,sha256=SdOggbmHpw4f7Njwy-T8q64e91OLOUp1k0a0ozZd4qI,221
19
- chatterer/strategies/atom_of_thoughts.py,sha256=pUhqt47YlzBIVNRh0UebeBwuJ0J94Ge6yZgXxrsiDPE,40884
20
+ chatterer/strategies/atom_of_thoughts.py,sha256=30XvnVKjty8Geo2z_n2-RWL_eEvo_AnK8sg8uVPQHOQ,41178
20
21
  chatterer/strategies/base.py,sha256=b2gMPqodp97OP1dkHfj0UqixjdjVhmTw_V5qJ7i2S6g,427
21
22
  chatterer/tools/__init__.py,sha256=m3PRK9H5vOhk-2gG9W2eg8CYBlEn-K9-eaulOu91bgo,1474
22
23
  chatterer/tools/caption_markdown_images.py,sha256=r4QajHYuL4mdyYQXP1vQcNmqKN8lxBf5y0VKELXILOI,15392
@@ -37,8 +38,8 @@ chatterer/utils/base64_image.py,sha256=m_qAT3ERBiq8D-H4H9Z7rLfL31_BiPmV_m4uQ5XRL
37
38
  chatterer/utils/bytesio.py,sha256=3MC2atOOFKo5YxuReo_y_t8Wem9p2Y1ahC5M2lGclwI,2618
38
39
  chatterer/utils/code_agent.py,sha256=7ka_WRI4TQmZ5H46mjY3hI6RO_pxw6pg3LAxjgW4AbM,10495
39
40
  chatterer/utils/imghdr.py,sha256=6JhJMXD4MZ0dQolT2VM87YrRYm3hPf3RTEWnP4lYRVc,3842
40
- chatterer-0.1.23.dist-info/METADATA,sha256=zCTgA4OAI2tSpNRiLwjCDPweTrW4oxzJnIXT7PA69Ck,11826
41
- chatterer-0.1.23.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
42
- chatterer-0.1.23.dist-info/entry_points.txt,sha256=KhxL2dctnZalnDSmPoB5dZBBa9hZpJETW3C5xkoRaW4,554
43
- chatterer-0.1.23.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
44
- chatterer-0.1.23.dist-info/RECORD,,
41
+ chatterer-0.1.24.dist-info/METADATA,sha256=mpTNGDkwWEK-9XdP52DGaVKQphtJ_p6Wmibq-eiq07g,11633
42
+ chatterer-0.1.24.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
+ chatterer-0.1.24.dist-info/entry_points.txt,sha256=IzGKhTnZ7G5V23SRmulmSsyt9HcaFH4lU4r3wR1zMsc,63
44
+ chatterer-0.1.24.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
45
+ chatterer-0.1.24.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ chatterer = chatterer.examples.__main__:main
@@ -1,156 +0,0 @@
1
- import json
2
- import logging
3
- import sys
4
- from pathlib import Path
5
-
6
- from spargear import BaseArguments, SubcommandSpec
7
-
8
- from chatterer import PlayWrightBot
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
-
13
- # Define the default path location relative to this script file
14
- DEFAULT_JSON_PATH = Path(__file__).resolve().parent / "session_state.json"
15
-
16
-
17
- class ReadArgs(BaseArguments):
18
- """Arguments for the 'read' subcommand."""
19
-
20
- url: str
21
- """URL (potentially protected) to navigate to using the saved session."""
22
- jsonpath: Path = DEFAULT_JSON_PATH
23
- """Path to the session state JSON file to load."""
24
-
25
-
26
- class WriteArgs(BaseArguments):
27
- """Arguments for the 'write' subcommand."""
28
-
29
- url: str
30
- """URL to navigate to for manual login."""
31
- jsonpath: Path = DEFAULT_JSON_PATH
32
- """Path to save the session state JSON file."""
33
-
34
-
35
- class LoginWithPlaywrightArgs(BaseArguments):
36
- """
37
- A simple CLI tool for saving and using Playwright sessions via storage_state.
38
- Uses spargear for declarative argument parsing.
39
- """
40
-
41
- read: SubcommandSpec[ReadArgs] = SubcommandSpec(
42
- name="read",
43
- argument_class=ReadArgs,
44
- help="Use a saved session to view a protected page.",
45
- description="Loads session state from the specified JSON file and navigates to the URL.",
46
- )
47
- write: SubcommandSpec[WriteArgs] = SubcommandSpec(
48
- name="write",
49
- argument_class=WriteArgs,
50
- help="Save a new session by manually logging in.",
51
- description="Launches a browser to the specified URL. Log in manually, then press Enter to save session state.",
52
- )
53
-
54
- def run(self) -> None:
55
- """Parses arguments using spargear and executes the corresponding command."""
56
- try:
57
- if (read := self.read.argument_class).url:
58
- # Access attributes directly from the returned instance
59
- logger.info("Running READ command:")
60
- logger.info(f" URL: {read.url}")
61
- logger.info(f" JSON Path: {read.jsonpath}")
62
- read_session(url=read.url, jsonpath=read.jsonpath)
63
- elif (write := self.write.argument_class).url:
64
- # Access attributes directly from the returned instance
65
- logger.info("Running WRITE command:")
66
- logger.info(f" URL: {write.url}")
67
- logger.info(f" JSON Path: {write.jsonpath}")
68
- write_session(url=write.url, jsonpath=write.jsonpath)
69
- else:
70
- logger.error("No valid subcommand provided. Use 'read' or 'write'.")
71
- sys.exit(1)
72
-
73
- except SystemExit as e:
74
- # Handle cases like -h/--help or argparse errors that exit
75
- sys.exit(e.code)
76
- except Exception as e:
77
- logger.error(f"\nAn error occurred: {e}")
78
- # from traceback import print_exc # Uncomment for full traceback
79
- # print_exc() # Uncomment for full traceback
80
- sys.exit(1)
81
-
82
-
83
- def read_session(url: str, jsonpath: Path) -> None:
84
- """
85
- Loads the session state from the specified JSON file, then navigates
86
- to a protected_url that normally requires login. If the stored session
87
- is valid, it should open without re-entering credentials.
88
-
89
- Correction: Loads the JSON content into a dict first to satisfy type hints.
90
- """
91
- logger.info(f"Loading session from {jsonpath} and navigating to {url} ...")
92
-
93
- if not jsonpath.exists():
94
- logger.error(f"Session file not found at {jsonpath}")
95
- sys.exit(1)
96
-
97
- # Load the storage state from the JSON file into a dictionary
98
- logger.info(f"Reading storage state content from {jsonpath} ...")
99
- try:
100
- with open(jsonpath, "r", encoding="utf-8") as f:
101
- # This dictionary should match the 'StorageState' type expected by Playwright/chatterer
102
- storage_state_dict = json.load(f)
103
- except json.JSONDecodeError:
104
- logger.error(f"Failed to decode JSON from {jsonpath}")
105
- sys.exit(1)
106
- except Exception as e:
107
- logger.error(f"Error reading file {jsonpath}: {e}")
108
- sys.exit(1)
109
-
110
- logger.info("Launching browser with loaded session state...")
111
- with PlayWrightBot(
112
- playwright_launch_options={"headless": False},
113
- # Pass the loaded dictionary, which should match the expected 'StorageState' type
114
- playwright_persistency_options={"storage_state": storage_state_dict},
115
- ) as bot:
116
- bot.get_page(url)
117
-
118
- logger.info("Press Enter in the console when you're done checking the protected page.")
119
- input(" >> Press Enter to exit: ")
120
-
121
- logger.info("Done! Browser is now closed.")
122
-
123
-
124
- def write_session(url: str, jsonpath: Path) -> None:
125
- """
126
- Launches a non-headless browser and navigates to the login_url.
127
- The user can manually log in, then press Enter in the console
128
- to store the current session state into a JSON file.
129
- """
130
- logger.info(f"Launching browser and navigating to {url} ... Please log in manually.")
131
-
132
- # Ensure jsonpath directory exists
133
- jsonpath.parent.mkdir(parents=True, exist_ok=True)
134
-
135
- with PlayWrightBot(playwright_launch_options={"headless": False}) as bot:
136
- bot.get_page(url)
137
-
138
- logger.info("After completing the login in the browser, press Enter here to save the session.")
139
- input(" >> Press Enter when ready: ")
140
-
141
- # get_sync_browser() returns the BrowserContext internally
142
- context = bot.get_sync_browser()
143
-
144
- # Save the current session (cookies, localStorage) to a JSON file
145
- logger.info(f"Saving storage state to {jsonpath} ...")
146
- context.storage_state(path=jsonpath) # Pass Path object directly
147
-
148
- logger.info("Done! Browser is now closed.")
149
-
150
-
151
- def main() -> None:
152
- LoginWithPlaywrightArgs().run()
153
-
154
-
155
- if __name__ == "__main__":
156
- main()
@@ -1,10 +0,0 @@
1
- [console_scripts]
2
- anything-to-markdown = chatterer.examples.anything_to_markdown:main
3
- get-code-snippets = chatterer.examples.get_code_snippets:main
4
- login-with-playwright = chatterer.examples.login_with_playwright:main
5
- make-ppt = chatterer.examples.make_ppt:main
6
- pdf-to-markdown = chatterer.examples.pdf_to_markdown:main
7
- pdf-to-text = chatterer.examples.pdf_to_text:main
8
- transcription-api = chatterer.examples.transcription_api:main
9
- upstage-parser = chatterer.examples.upstage_parser:main
10
- webpage-to-markdown = chatterer.examples.webpage_to_markdown:main