chatterer 0.1.19__py3-none-any.whl → 0.1.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,36 +1,27 @@
1
- def resolve_import_path_and_get_logger():
2
- # ruff: noqa: E402
3
- import logging
4
- import sys
5
-
6
- if __name__ == "__main__" and "." not in sys.path:
7
- sys.path.append(".")
8
-
9
- logger = logging.getLogger(__name__)
10
- return logger
11
-
12
-
13
- logger = resolve_import_path_and_get_logger()
1
+ import logging
14
2
  from pathlib import Path
15
3
  from typing import Optional, TypedDict
16
4
 
17
5
  import openai
18
- from spargear import ArgumentSpec, BaseArguments
6
+ from spargear import BaseArguments
19
7
 
20
8
  from chatterer import anything_to_markdown
21
9
 
10
+ logger = logging.getLogger(__name__)
11
+
22
12
 
23
13
  class AnythingToMarkdownReturns(TypedDict):
24
- in_path: str
25
- out_path: Optional[str]
14
+ input: str
15
+ output: Optional[str]
26
16
  out_text: str
27
17
 
28
18
 
29
19
  class AnythingToMarkdownArguments(BaseArguments):
30
20
  """Command line arguments for converting various file types to markdown."""
31
21
 
32
- in_path: ArgumentSpec[str] = ArgumentSpec(["in-path"], help="Input file to convert to markdown")
33
- out_path: Optional[str] = None
22
+ input: str
23
+ """Input file to convert to markdown. Can be a file path or a URL."""
24
+ output: Optional[str] = None
34
25
  """Output path for the converted markdown file. If not provided, the input file's suffix is replaced with .md"""
35
26
  model: Optional[str] = None
36
27
  """OpenAI Model to use for conversion"""
@@ -50,14 +41,13 @@ class AnythingToMarkdownArguments(BaseArguments):
50
41
  """Encoding for the output file."""
51
42
 
52
43
  def run(self) -> AnythingToMarkdownReturns:
53
- in_path = self.in_path.unwrap()
54
44
  if not self.prevent_save_file:
55
- if not self.out_path:
56
- out_path = Path(in_path).with_suffix(".md")
45
+ if not self.output:
46
+ output = Path(self.input).with_suffix(".md")
57
47
  else:
58
- out_path = Path(self.out_path)
48
+ output = Path(self.output)
59
49
  else:
60
- out_path = None
50
+ output = None
61
51
 
62
52
  if self.model:
63
53
  llm_client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url)
@@ -67,22 +57,22 @@ class AnythingToMarkdownArguments(BaseArguments):
67
57
  llm_model = None
68
58
 
69
59
  text: str = anything_to_markdown(
70
- in_path,
60
+ self.input,
71
61
  llm_client=llm_client,
72
62
  llm_model=llm_model,
73
63
  style_map=self.style_map,
74
64
  exiftool_path=self.exiftool_path,
75
65
  docintel_endpoint=self.docintel_endpoint,
76
66
  )
77
- if out_path:
78
- out_path.parent.mkdir(parents=True, exist_ok=True)
79
- out_path.write_text(text, encoding=self.encoding)
80
- logger.info(f"Converted `{in_path}` to markdown and saved to `{out_path}`.")
67
+ if output:
68
+ output.parent.mkdir(parents=True, exist_ok=True)
69
+ output.write_text(text, encoding=self.encoding)
70
+ logger.info(f"Converted `{self.input}` to markdown and saved to `{output}`.")
81
71
  else:
82
- logger.info(f"Converted `{in_path}` to markdown.")
72
+ logger.info(f"Converted `{self.input}` to markdown.")
83
73
  return {
84
- "in_path": in_path,
85
- "out_path": str(out_path) if out_path is not None else None,
74
+ "input": self.input,
75
+ "output": str(output) if output is not None else None,
86
76
  "out_text": text,
87
77
  }
88
78
 
@@ -1,27 +1,19 @@
1
- def resolve_import_path_and_get_logger():
2
- # ruff: noqa: E402
3
- import logging
4
- import sys
5
-
6
- if __name__ == "__main__" and "." not in sys.path:
7
- sys.path.append(".")
8
-
9
- logger = logging.getLogger(__name__)
10
- return logger
11
-
12
-
13
- logger = resolve_import_path_and_get_logger()
1
+ import logging
14
2
  from pathlib import Path
15
3
  from typing import Optional
16
4
 
17
- from spargear import ArgumentSpec, BaseArguments
5
+ from spargear import BaseArguments
18
6
 
19
7
  from chatterer import CodeSnippets
20
8
 
9
+ logger = logging.getLogger(__name__)
10
+
21
11
 
22
12
  class GetCodeSnippetsArgs(BaseArguments):
23
- path_or_pkgname: ArgumentSpec[str] = ArgumentSpec(["path_or_pkgname"], help="Path to the package or file from which to extract code snippets.")
24
- out_path: Optional[str] = None
13
+ input: str
14
+ """Path to the package or file from which to extract code snippets."""
15
+ output: Optional[str] = None
16
+ """Output path for the extracted code snippets. If not provided, defaults to a file with the same name as the input."""
25
17
  ban_file_patterns: list[str] = [".venv/*", Path(__file__).relative_to(Path.cwd()).as_posix()]
26
18
  """List of file patterns to ignore."""
27
19
  glob_patterns: list[str] = ["*.py"]
@@ -32,27 +24,26 @@ class GetCodeSnippetsArgs(BaseArguments):
32
24
  """Prevent saving the extracted code snippets to a file."""
33
25
 
34
26
  def run(self) -> CodeSnippets:
35
- path_or_pkgname = self.path_or_pkgname.unwrap()
36
27
  if not self.prevent_save_file:
37
- if not self.out_path:
38
- out_path = Path(__file__).with_suffix(".txt")
28
+ if not self.output:
29
+ output = Path(__file__).with_suffix(".txt")
39
30
  else:
40
- out_path = Path(self.out_path)
31
+ output = Path(self.output)
41
32
  else:
42
- out_path = None
33
+ output = None
43
34
 
44
35
  cs = CodeSnippets.from_path_or_pkgname(
45
- path_or_pkgname=path_or_pkgname,
36
+ path_or_pkgname=self.input,
46
37
  ban_file_patterns=self.ban_file_patterns,
47
38
  glob_patterns=self.glob_patterns,
48
39
  case_sensitive=self.case_sensitive,
49
40
  )
50
- if out_path is not None:
51
- out_path.parent.mkdir(parents=True, exist_ok=True)
52
- out_path.write_text(cs.snippets_text, encoding="utf-8")
53
- logger.info(f"Extracted code snippets from `{path_or_pkgname}` and saved to `{out_path}`.")
41
+ if output is not None:
42
+ output.parent.mkdir(parents=True, exist_ok=True)
43
+ output.write_text(cs.snippets_text, encoding="utf-8")
44
+ logger.info(f"Extracted code snippets from `{self.input}` and saved to `{output}`.")
54
45
  else:
55
- logger.info(f"Extracted code snippets from `{path_or_pkgname}`.")
46
+ logger.info(f"Extracted code snippets from `{self.input}`.")
56
47
  return cs
57
48
 
58
49
 
@@ -1,17 +1,5 @@
1
- def resolve_import_path_and_get_logger():
2
- # ruff: noqa: E402
3
- import logging
4
- import sys
5
-
6
- if __name__ == "__main__" and "." not in sys.path:
7
- sys.path.append(".")
8
-
9
- logger = logging.getLogger(__name__)
10
- return logger
11
-
12
-
13
- logger = resolve_import_path_and_get_logger()
14
1
  import json
2
+ import logging
15
3
  import sys
16
4
  from pathlib import Path
17
5
 
@@ -19,76 +7,8 @@ from spargear import BaseArguments, SubcommandSpec
19
7
 
20
8
  from chatterer import PlayWrightBot
21
9
 
10
+ logger = logging.getLogger(__name__)
22
11
 
23
- def read_session(url: str, jsonpath: Path) -> None:
24
- """
25
- Loads the session state from the specified JSON file, then navigates
26
- to a protected_url that normally requires login. If the stored session
27
- is valid, it should open without re-entering credentials.
28
-
29
- Correction: Loads the JSON content into a dict first to satisfy type hints.
30
- """
31
- logger.info(f"Loading session from {jsonpath} and navigating to {url} ...")
32
-
33
- if not jsonpath.exists():
34
- logger.error(f"Session file not found at {jsonpath}")
35
- sys.exit(1)
36
-
37
- # Load the storage state from the JSON file into a dictionary
38
- logger.info(f"Reading storage state content from {jsonpath} ...")
39
- try:
40
- with open(jsonpath, "r", encoding="utf-8") as f:
41
- # This dictionary should match the 'StorageState' type expected by Playwright/chatterer
42
- storage_state_dict = json.load(f)
43
- except json.JSONDecodeError:
44
- logger.error(f"Failed to decode JSON from {jsonpath}")
45
- sys.exit(1)
46
- except Exception as e:
47
- logger.error(f"Error reading file {jsonpath}: {e}")
48
- sys.exit(1)
49
-
50
- logger.info("Launching browser with loaded session state...")
51
- with PlayWrightBot(
52
- playwright_launch_options={"headless": False},
53
- # Pass the loaded dictionary, which should match the expected 'StorageState' type
54
- playwright_persistency_options={"storage_state": storage_state_dict},
55
- ) as bot:
56
- bot.get_page(url)
57
-
58
- logger.info("Press Enter in the console when you're done checking the protected page.")
59
- input(" >> Press Enter to exit: ")
60
-
61
- logger.info("Done! Browser is now closed.")
62
-
63
-
64
- def write_session(url: str, jsonpath: Path) -> None:
65
- """
66
- Launches a non-headless browser and navigates to the login_url.
67
- The user can manually log in, then press Enter in the console
68
- to store the current session state into a JSON file.
69
- """
70
- logger.info(f"Launching browser and navigating to {url} ... Please log in manually.")
71
-
72
- # Ensure jsonpath directory exists
73
- jsonpath.parent.mkdir(parents=True, exist_ok=True)
74
-
75
- with PlayWrightBot(playwright_launch_options={"headless": False}) as bot:
76
- bot.get_page(url)
77
-
78
- logger.info("After completing the login in the browser, press Enter here to save the session.")
79
- input(" >> Press Enter when ready: ")
80
-
81
- # get_sync_browser() returns the BrowserContext internally
82
- context = bot.get_sync_browser()
83
-
84
- # Save the current session (cookies, localStorage) to a JSON file
85
- logger.info(f"Saving storage state to {jsonpath} ...")
86
- context.storage_state(path=jsonpath) # Pass Path object directly
87
-
88
- logger.info("Done! Browser is now closed.")
89
-
90
-
91
- # --- Spargear Declarative CLI Definition ---
92
12
 
93
13
  # Define the default path location relative to this script file
94
14
  DEFAULT_JSON_PATH = Path(__file__).resolve().parent / "session_state.json"
@@ -160,7 +80,72 @@ class LoginWithPlaywrightArgs(BaseArguments):
160
80
  sys.exit(1)
161
81
 
162
82
 
163
- # --- Main Execution Logic ---
83
+ def read_session(url: str, jsonpath: Path) -> None:
84
+ """
85
+ Loads the session state from the specified JSON file, then navigates
86
+ to a protected_url that normally requires login. If the stored session
87
+ is valid, it should open without re-entering credentials.
88
+
89
+ Correction: Loads the JSON content into a dict first to satisfy type hints.
90
+ """
91
+ logger.info(f"Loading session from {jsonpath} and navigating to {url} ...")
92
+
93
+ if not jsonpath.exists():
94
+ logger.error(f"Session file not found at {jsonpath}")
95
+ sys.exit(1)
96
+
97
+ # Load the storage state from the JSON file into a dictionary
98
+ logger.info(f"Reading storage state content from {jsonpath} ...")
99
+ try:
100
+ with open(jsonpath, "r", encoding="utf-8") as f:
101
+ # This dictionary should match the 'StorageState' type expected by Playwright/chatterer
102
+ storage_state_dict = json.load(f)
103
+ except json.JSONDecodeError:
104
+ logger.error(f"Failed to decode JSON from {jsonpath}")
105
+ sys.exit(1)
106
+ except Exception as e:
107
+ logger.error(f"Error reading file {jsonpath}: {e}")
108
+ sys.exit(1)
109
+
110
+ logger.info("Launching browser with loaded session state...")
111
+ with PlayWrightBot(
112
+ playwright_launch_options={"headless": False},
113
+ # Pass the loaded dictionary, which should match the expected 'StorageState' type
114
+ playwright_persistency_options={"storage_state": storage_state_dict},
115
+ ) as bot:
116
+ bot.get_page(url)
117
+
118
+ logger.info("Press Enter in the console when you're done checking the protected page.")
119
+ input(" >> Press Enter to exit: ")
120
+
121
+ logger.info("Done! Browser is now closed.")
122
+
123
+
124
+ def write_session(url: str, jsonpath: Path) -> None:
125
+ """
126
+ Launches a non-headless browser and navigates to the login_url.
127
+ The user can manually log in, then press Enter in the console
128
+ to store the current session state into a JSON file.
129
+ """
130
+ logger.info(f"Launching browser and navigating to {url} ... Please log in manually.")
131
+
132
+ # Ensure jsonpath directory exists
133
+ jsonpath.parent.mkdir(parents=True, exist_ok=True)
134
+
135
+ with PlayWrightBot(playwright_launch_options={"headless": False}) as bot:
136
+ bot.get_page(url)
137
+
138
+ logger.info("After completing the login in the browser, press Enter here to save the session.")
139
+ input(" >> Press Enter when ready: ")
140
+
141
+ # get_sync_browser() returns the BrowserContext internally
142
+ context = bot.get_sync_browser()
143
+
144
+ # Save the current session (cookies, localStorage) to a JSON file
145
+ logger.info(f"Saving storage state to {jsonpath} ...")
146
+ context.storage_state(path=jsonpath) # Pass Path object directly
147
+
148
+ logger.info("Done! Browser is now closed.")
164
149
 
165
150
 
166
151
  def main() -> None:
@@ -1,16 +1,3 @@
1
- def resolve_import_path_and_get_logger():
2
- # ruff: noqa: E402
3
- import logging
4
- import sys
5
-
6
- if __name__ == "__main__" and "." not in sys.path:
7
- sys.path.append(".")
8
-
9
- logger = logging.getLogger(__name__)
10
- return logger
11
-
12
-
13
- logger = resolve_import_path_and_get_logger()
14
1
  import re
15
2
  import sys
16
3
  from pathlib import Path
@@ -192,7 +179,9 @@ class MakePptArguments(BaseArguments):
192
179
  """Prompt for organizing slides into a presentation script"""
193
180
 
194
181
  # LLM Settings
195
- provider: str = "openai:gpt-4.1" # Example: "openai:gpt-4o", "anthropic:claude-3-haiku-20240307", "google:gemini-1.5-flash"
182
+ provider: str = (
183
+ "openai:gpt-4.1" # Example: "openai:gpt-4o", "anthropic:claude-3-haiku-20240307", "google:gemini-1.5-flash"
184
+ )
196
185
  """Name of the language model to use (provider:model_name)"""
197
186
 
198
187
  # Other settings
@@ -1,16 +1,4 @@
1
- def resolve_import_path_and_get_logger():
2
- # ruff: noqa: E402
3
- import logging
4
- import sys
5
-
6
- if __name__ == "__main__" and "." not in sys.path:
7
- sys.path.append(".")
8
-
9
- logger = logging.getLogger(__name__)
10
- return logger
11
-
12
-
13
- logger = resolve_import_path_and_get_logger()
1
+ import logging
14
2
  import sys
15
3
  from pathlib import Path
16
4
  from typing import Optional
@@ -19,46 +7,51 @@ from spargear import ArgumentSpec, BaseArguments
19
7
 
20
8
  from chatterer import Chatterer, PdfToMarkdown
21
9
 
10
+ logger = logging.getLogger(__name__)
11
+
22
12
 
23
13
  class PdfToMarkdownArgs(BaseArguments):
24
- in_path: ArgumentSpec[str] = ArgumentSpec(["in-path"], help="Path to the input PDF file or a directory containing PDF files.")
25
- out_path: Optional[str] = None
14
+ input: str
15
+ """Input PDF file or directory containing PDF files to convert to markdown."""
16
+ output: Optional[str] = None
26
17
  """Output path. For a file, path to the output markdown file. For a directory, output directory for .md files."""
18
+ """Chatterer instance for communication."""
19
+ pages: Optional[str] = None
20
+ """Page indices to convert (e.g., '1,3,5-9')."""
21
+ recursive: bool = False
22
+ """If input is a directory, search for PDFs recursively."""
27
23
  chatterer: ArgumentSpec[Chatterer] = ArgumentSpec(
28
24
  ["--chatterer"],
29
- default=None,
25
+ default_factory=lambda: Chatterer.from_provider("google:gemini-2.5-flash-preview-05-20"),
30
26
  help="Chatterer instance for communication.",
31
27
  type=Chatterer.from_provider,
32
- required=True,
33
28
  )
34
- pages: Optional[str] = None
35
- """Page indices to convert (e.g., '1,3,5-9')."""
36
- recursive: bool = False
37
- """If input is a directory, search for PDFs recursively."""
38
29
 
39
30
  def run(self) -> list[dict[str, str]]:
40
- in_path = Path(self.in_path.unwrap()).resolve()
31
+ input = Path(self.input).resolve()
41
32
  page_indices = parse_page_indices(self.pages) if self.pages else None
42
33
  pdf_files: list[Path] = []
43
34
  is_dir = False
44
- if in_path.is_file():
45
- if in_path.suffix.lower() != ".pdf":
35
+ if input.is_file():
36
+ if input.suffix.lower() != ".pdf":
46
37
  sys.exit(1)
47
- pdf_files.append(in_path)
48
- elif in_path.is_dir():
38
+ pdf_files.append(input)
39
+ elif input.is_dir():
49
40
  is_dir = True
50
41
  pattern = "*.pdf"
51
- pdf_files = sorted([f for f in (in_path.rglob(pattern) if self.recursive else in_path.glob(pattern)) if f.is_file()])
42
+ pdf_files = sorted([
43
+ f for f in (input.rglob(pattern) if self.recursive else input.glob(pattern)) if f.is_file()
44
+ ])
52
45
  if not pdf_files:
53
46
  sys.exit(0)
54
47
  else:
55
48
  sys.exit(1)
56
- if self.out_path:
57
- out_base = Path(self.out_path).resolve()
49
+ if self.output:
50
+ out_base = Path(self.output).resolve()
58
51
  elif is_dir:
59
- out_base = in_path
52
+ out_base = input
60
53
  else:
61
- out_base = in_path.with_suffix(".md")
54
+ out_base = input.with_suffix(".md")
62
55
 
63
56
  if is_dir:
64
57
  out_base.mkdir(parents=True, exist_ok=True)
@@ -68,11 +61,11 @@ class PdfToMarkdownArgs(BaseArguments):
68
61
  converter = PdfToMarkdown(chatterer=self.chatterer.unwrap())
69
62
  results: list[dict[str, str]] = []
70
63
  for pdf in pdf_files:
71
- out_path = (out_base / (pdf.stem + ".md")) if is_dir else out_base
64
+ output = (out_base / (pdf.stem + ".md")) if is_dir else out_base
72
65
  md = converter.convert(str(pdf), page_indices)
73
- out_path.parent.mkdir(parents=True, exist_ok=True)
74
- out_path.write_text(md, encoding="utf-8")
75
- results.append({"input": pdf.as_posix(), "output": out_path.as_posix(), "result": md})
66
+ output.parent.mkdir(parents=True, exist_ok=True)
67
+ output.write_text(md, encoding="utf-8")
68
+ results.append({"input": pdf.as_posix(), "output": output.as_posix(), "result": md})
76
69
  logger.info(f"Converted {len(pdf_files)} PDF(s) to markdown and saved to `{out_base}`.")
77
70
  return results
78
71
 
@@ -1,36 +1,30 @@
1
- def resolve_import_path_and_get_logger():
2
- # ruff: noqa: E402
3
- import logging
4
- import sys
5
-
6
- if __name__ == "__main__" and "." not in sys.path:
7
- sys.path.append(".")
8
-
9
- logger = logging.getLogger(__name__)
10
- return logger
11
-
12
-
13
- logger = resolve_import_path_and_get_logger()
1
+ import logging
14
2
  import sys
15
3
  from pathlib import Path
4
+ from typing import Optional
16
5
 
17
- from spargear import ArgumentSpec, BaseArguments
6
+ from spargear import BaseArguments
18
7
 
19
8
  from chatterer.tools.convert_to_text import pdf_to_text
20
9
 
10
+ logger = logging.getLogger(__name__)
11
+
21
12
 
22
13
  class PdfToTextArgs(BaseArguments):
23
- in_path: ArgumentSpec[Path] = ArgumentSpec(["in-path"], help="Path to the PDF file.")
24
- out_path: ArgumentSpec[Path] = ArgumentSpec(["--out-path"], default=None, help="Output file path.")
25
- pages: ArgumentSpec[str] = ArgumentSpec(["--pages"], default=None, help="Page indices to extract, e.g. '1,3,5-9'.")
14
+ input: Path
15
+ """Path to the PDF file to convert to text."""
16
+ output: Optional[Path]
17
+ """Path to the output text file. If not provided, defaults to the input file with a .txt suffix."""
18
+ pages: Optional[str] = None
19
+ """Comma-separated list of page indices to extract from the PDF. Supports ranges, e.g., '1,3,5-9'."""
26
20
 
27
21
  def run(self) -> None:
28
- input = self.in_path.unwrap().resolve()
29
- out = self.out_path.value or input.with_suffix(".txt")
22
+ input = self.input.resolve()
23
+ out = self.output or input.with_suffix(".txt")
30
24
  if not input.is_file():
31
25
  sys.exit(1)
32
26
  out.write_text(
33
- pdf_to_text(input, parse_page_indices(pages_arg) if (pages_arg := self.pages.value) else None),
27
+ pdf_to_text(path_or_file=input, page_indices=self.pages),
34
28
  encoding="utf-8",
35
29
  )
36
30
  logger.info(f"Extracted text from `{input}` to `{out}`")
@@ -2,51 +2,36 @@
2
2
 
3
3
  from io import BytesIO
4
4
  from pathlib import Path
5
- from typing import cast
5
+ from typing import Optional, cast
6
6
 
7
7
  from openai import OpenAI
8
8
  from pydub import AudioSegment
9
- from spargear import ArgumentSpec, BaseArguments
9
+ from spargear import BaseArguments
10
10
 
11
11
  # Maximum chunk length in seconds
12
12
  MAX_CHUNK_DURATION = 600
13
13
 
14
14
 
15
15
  class TranscriptionApiArguments(BaseArguments):
16
- in_path = ArgumentSpec(
17
- ["in-path"],
18
- type=Path,
19
- help="The audio file to transcribe.",
20
- )
21
- out_path = ArgumentSpec(
22
- ["--out-path"],
23
- type=Path,
24
- default=None,
25
- help="Path to save the transcription output.",
26
- )
27
- model: ArgumentSpec[str] = ArgumentSpec(
28
- ["--model"],
29
- default="gpt-4o-transcribe",
30
- help="The model to use for transcription.",
31
- )
32
- api_key: ArgumentSpec[str] = ArgumentSpec(
33
- ["--api-key"],
34
- default=None,
35
- help="The API key for authentication.",
36
- )
37
- base_url: ArgumentSpec[str] = ArgumentSpec(
38
- ["--base-url"],
39
- default="https://api.openai.com/v1",
40
- help="The base URL for the API.",
41
- )
16
+ input: Path
17
+ """The audio file to transcribe."""
18
+ output: Optional[Path] = None
19
+ """Path to save the transcription output."""
20
+ model: str = "gpt-4o-transcribe"
21
+ """The model to use for transcription."""
22
+ api_key: Optional[str] = None
23
+ """The API key for authentication."""
24
+ base_url: str = "https://api.openai.com/v1"
25
+ """The base URL for the API."""
26
+ prompt: str = "Transcribe whole text from audio."
27
+ """The prompt to use for transcription."""
42
28
 
43
29
  def run(self) -> None:
44
- audio_path = self.in_path.unwrap()
45
- model = self.model.unwrap()
30
+ model = self.model
46
31
 
47
- client = OpenAI(api_key=self.api_key.value, base_url=self.base_url.value)
32
+ client = OpenAI(api_key=self.api_key, base_url=self.base_url)
48
33
 
49
- audio = load_audio_segment(audio_path)
34
+ audio = load_audio_segment(self.input)
50
35
 
51
36
  segments = split_audio(audio, MAX_CHUNK_DURATION)
52
37
  print(f"[i] Audio duration: {len(audio) / 1000:.1f}s; splitting into {len(segments)} segment(s)")
@@ -54,10 +39,10 @@ class TranscriptionApiArguments(BaseArguments):
54
39
  transcripts: list[str] = []
55
40
  for idx, seg in enumerate(segments, start=1):
56
41
  print(f"[i] Transcribing segment {idx}/{len(segments)}...")
57
- transcripts.append(transcribe_segment(seg, client, model))
42
+ transcripts.append(transcribe_segment(seg, client, model, self.prompt))
58
43
 
59
44
  full_transcript = "\n\n".join(transcripts)
60
- output_path: Path = self.out_path.value or audio_path.with_suffix(".txt")
45
+ output_path: Path = self.output or self.input.with_suffix(".txt")
61
46
  output_path.write_text(full_transcript, encoding="utf-8")
62
47
  print(f"[✓] Transcription saved to: {output_path}")
63
48
 
@@ -94,7 +79,7 @@ def split_audio(audio: AudioSegment, max_duration_s: int) -> list[AudioSegment]:
94
79
  return segments
95
80
 
96
81
 
97
- def transcribe_segment(segment: AudioSegment, client: OpenAI, model: str) -> str:
82
+ def transcribe_segment(segment: AudioSegment, client: OpenAI, model: str, prompt: str) -> str:
98
83
  """
99
84
  Transcribe a single AudioSegment chunk and return its text.
100
85
  """
@@ -104,7 +89,7 @@ def transcribe_segment(segment: AudioSegment, client: OpenAI, model: str) -> str
104
89
  mp3_bytes = buffer.read()
105
90
  response = client.audio.transcriptions.create(
106
91
  model=model,
107
- prompt="Transcribe whole text from audio.",
92
+ prompt=prompt,
108
93
  file=("audio.mp3", mp3_bytes),
109
94
  response_format="text",
110
95
  stream=True,
@@ -1,17 +1,6 @@
1
- def resolve_import_path_and_get_logger():
2
- # ruff: noqa: E402
3
- import logging
4
- import sys
5
-
6
- if __name__ == "__main__" and "." not in sys.path:
7
- sys.path.append(".")
8
-
9
- logger = logging.getLogger(__name__)
10
- return logger
11
-
12
-
13
- logger = resolve_import_path_and_get_logger()
1
+ import logging
14
2
  from pathlib import Path
3
+ from typing import Optional
15
4
 
16
5
  from langchain_core.documents.base import Blob
17
6
  from spargear import ArgumentSpec, BaseArguments
@@ -27,28 +16,34 @@ from chatterer.tools.upstage_document_parser import (
27
16
  SplitType,
28
17
  )
29
18
 
19
+ logger = logging.getLogger(__name__)
20
+
30
21
 
31
22
  class UpstageParserArguments(BaseArguments):
32
- in_path: ArgumentSpec[Path] = ArgumentSpec(["in-path"], help="Path to the input file.")
33
- out_path: ArgumentSpec[Path] = ArgumentSpec(["--out-path"], default=None, help="Output file path.")
34
- api_key: ArgumentSpec[str] = ArgumentSpec(["--api-key"], default=None, help="API key for the Upstage API.")
35
- base_url: ArgumentSpec[str] = ArgumentSpec(["--base-url"], default=DOCUMENT_PARSE_BASE_URL, help="Base URL for the Upstage API.")
36
- model: ArgumentSpec[str] = ArgumentSpec(["--model"], default=DOCUMENT_PARSE_DEFAULT_MODEL, help="Model to use for parsing.")
37
- split: ArgumentSpec[SplitType] = ArgumentSpec(["--split"], default="none", help="Split type for parsing.")
38
- ocr: ArgumentSpec[OCR] = ArgumentSpec(["--ocr"], default="auto", help="OCR type for parsing.")
39
- output_format: ArgumentSpec[OutputFormat] = ArgumentSpec(["--output-format"], default="markdown", help="Output format.")
40
- coordinates: ArgumentSpec[bool] = ArgumentSpec(["--coordinates"], action="store_true", help="Include coordinates.")
41
- base64_encoding: ArgumentSpec[list[Category]] = ArgumentSpec(["--base64-encoding"], default=["figure"], help="Base64 encoding for specific categories.")
42
- image_description_instruction: ArgumentSpec[str] = ArgumentSpec(
43
- ["--image-description-instruction"],
44
- default="Describe the image in detail.",
45
- help="Instruction for image description.",
46
- )
47
- image_dir: ArgumentSpec[str] = ArgumentSpec(
48
- ["--image-dir"],
49
- default=DEFAULT_IMAGE_DIR,
50
- help="Directory for image paths.",
51
- )
23
+ input: Path
24
+ """Input file to parse. Can be a PDF, image, or other supported formats."""
25
+ output: Optional[Path] = None
26
+ """Output file path for the parsed content. Defaults to input file with .md suffix if not provided."""
27
+ api_key: Optional[str] = None
28
+ """API key for the Upstage API."""
29
+ base_url: str = DOCUMENT_PARSE_BASE_URL
30
+ """Base URL for the Upstage API."""
31
+ model: str = DOCUMENT_PARSE_DEFAULT_MODEL
32
+ """Model to use for parsing."""
33
+ split: SplitType = "none"
34
+ """Split type for the parsed content."""
35
+ ocr: OCR = "auto"
36
+ """OCR type for parsing."""
37
+ output_format: OutputFormat = "markdown"
38
+ """Output format for the parsed content."""
39
+ coordinates: bool = False
40
+ """Whether to include coordinates in the output."""
41
+ base64_encoding: list[Category] = ["figure"]
42
+ """Base64 encoding for specific categories in the parsed content."""
43
+ image_description_instruction: str = "Describe the image in detail."
44
+ """Instruction for generating image descriptions."""
45
+ image_dir: str = DEFAULT_IMAGE_DIR
46
+ """Directory to save images extracted from the document."""
52
47
  chatterer: ArgumentSpec[Chatterer] = ArgumentSpec(
53
48
  ["--chatterer"],
54
49
  default=None,
@@ -57,26 +52,25 @@ class UpstageParserArguments(BaseArguments):
57
52
  )
58
53
 
59
54
  def run(self) -> None:
60
- input = UpstageParserArguments.in_path.unwrap().resolve()
61
- out = UpstageParserArguments.out_path.value or input.with_suffix(".md")
55
+ input = UpstageParserArguments.input.resolve()
56
+ out = UpstageParserArguments.output or input.with_suffix(".md")
62
57
 
63
58
  parser = UpstageDocumentParseParser(
64
- api_key=UpstageParserArguments.api_key.value,
65
- base_url=UpstageParserArguments.base_url.unwrap(),
66
- model=UpstageParserArguments.model.unwrap(),
67
- split=UpstageParserArguments.split.unwrap(),
68
- ocr=UpstageParserArguments.ocr.unwrap(),
69
- output_format=UpstageParserArguments.output_format.unwrap(),
70
- coordinates=UpstageParserArguments.coordinates.unwrap(),
71
- base64_encoding=UpstageParserArguments.base64_encoding.unwrap(),
72
- image_description_instruction=UpstageParserArguments.image_description_instruction.unwrap(),
73
- image_dir=UpstageParserArguments.image_dir.value,
59
+ api_key=UpstageParserArguments.api_key,
60
+ base_url=UpstageParserArguments.base_url,
61
+ model=UpstageParserArguments.model,
62
+ split=UpstageParserArguments.split,
63
+ ocr=UpstageParserArguments.ocr,
64
+ output_format=UpstageParserArguments.output_format,
65
+ coordinates=UpstageParserArguments.coordinates,
66
+ base64_encoding=UpstageParserArguments.base64_encoding,
67
+ image_description_instruction=UpstageParserArguments.image_description_instruction,
68
+ image_dir=UpstageParserArguments.image_dir,
74
69
  chatterer=UpstageParserArguments.chatterer.value,
75
70
  )
76
-
77
71
  docs = parser.parse(Blob.from_path(input)) # pyright: ignore[reportUnknownMemberType]
78
72
 
79
- if UpstageParserArguments.image_dir.value:
73
+ if UpstageParserArguments.image_dir:
80
74
  for path, image in parser.image_data.items():
81
75
  (path := Path(path)).parent.mkdir(parents=True, exist_ok=True)
82
76
  path.write_bytes(image)
@@ -1,16 +1,3 @@
1
- def resolve_import_path_and_get_logger():
2
- # ruff: noqa: E402
3
- import logging
4
- import sys
5
-
6
- if __name__ == "__main__" and "." not in sys.path:
7
- sys.path.append(".")
8
-
9
- logger = logging.getLogger(__name__)
10
- return logger
11
-
12
-
13
- logger = resolve_import_path_and_get_logger()
14
1
  from pathlib import Path
15
2
  from typing import Literal
16
3
 
@@ -20,49 +7,53 @@ from chatterer import Chatterer, MarkdownLink, PlayWrightBot
20
7
 
21
8
 
22
9
  class WebpageToMarkdownArgs(BaseArguments):
23
- url: ArgumentSpec[str] = ArgumentSpec(["url"], help="The URL to crawl.")
24
- out_path: str = Path(__file__).with_suffix(".md").as_posix()
10
+ url: str
11
+ """The URL to crawl."""
12
+ output: str = Path(__file__).with_suffix(".md").as_posix()
25
13
  """The output file path for the markdown file."""
26
14
  chatterer: ArgumentSpec[Chatterer] = ArgumentSpec(
27
- ["--llm"],
28
- default=None,
29
- type=Chatterer.from_provider,
15
+ ["--chatterer"],
30
16
  help="The Chatterer backend and model to use for filtering the markdown.",
17
+ type=Chatterer.from_provider,
31
18
  )
32
19
  engine: Literal["firefox", "chromium", "webkit"] = "firefox"
33
20
  """The browser engine to use."""
34
21
 
35
22
  def run(self) -> None:
36
23
  chatterer = self.chatterer.value
37
- url: str = self.url.unwrap().strip()
38
- out_path: Path = Path(self.out_path).resolve()
24
+ url: str = self.url.strip()
25
+ output: Path = Path(self.output).resolve()
39
26
  with PlayWrightBot(chatterer=chatterer, engine=self.engine) as bot:
40
27
  md = bot.url_to_md(url)
41
- out_path.write_text(md, encoding="utf-8")
28
+ output.write_text(md, encoding="utf-8")
42
29
  if chatterer is not None:
43
30
  md_llm = bot.url_to_md_with_llm(url.strip())
44
- out_path.write_text(md_llm, encoding="utf-8")
31
+ output.write_text(md_llm, encoding="utf-8")
45
32
  links = MarkdownLink.from_markdown(md, referer_url=url)
46
33
  for link in links:
47
34
  if link.type == "link":
48
- print(f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})")
35
+ print(
36
+ f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})"
37
+ )
49
38
  elif link.type == "image":
50
39
  print(f"- ![{truncate_string(link.url)}] ({truncate_string(link.inline_text)})")
51
40
 
52
41
  async def arun(self) -> None:
53
42
  chatterer = self.chatterer.value
54
- url: str = self.url.unwrap().strip()
55
- out_path: Path = Path(self.out_path).resolve()
43
+ url: str = self.url.strip()
44
+ output: Path = Path(self.output).resolve()
56
45
  async with PlayWrightBot(chatterer=chatterer, engine=self.engine) as bot:
57
46
  md = await bot.aurl_to_md(url)
58
- out_path.write_text(md, encoding="utf-8")
47
+ output.write_text(md, encoding="utf-8")
59
48
  if chatterer is not None:
60
49
  md_llm = await bot.aurl_to_md_with_llm(url.strip())
61
- out_path.write_text(md_llm, encoding="utf-8")
50
+ output.write_text(md_llm, encoding="utf-8")
62
51
  links = MarkdownLink.from_markdown(md, referer_url=url)
63
52
  for link in links:
64
53
  if link.type == "link":
65
- print(f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})")
54
+ print(
55
+ f"- [{truncate_string(link.url)}] {truncate_string(link.inline_text)} ({truncate_string(link.inline_title)})"
56
+ )
66
57
  elif link.type == "image":
67
58
  print(f"- ![{truncate_string(link.url)}] ({truncate_string(link.inline_text)})")
68
59
 
@@ -379,7 +379,7 @@ class AoTPipeline:
379
379
  chatterer: Chatterer
380
380
  max_depth: int = 2
381
381
  max_retries: int = 2
382
- steps_history: list[StepRecord] = field(default_factory=list)
382
+ steps_history: list[StepRecord] = field(default_factory=list[StepRecord])
383
383
  prompter: AoTPrompter = field(default_factory=AoTPrompter)
384
384
 
385
385
  # 4.1) Utility for calling the LLM with Pydantic parsing
@@ -4,7 +4,8 @@ import logging
4
4
  import re
5
5
  from contextlib import contextmanager
6
6
  from dataclasses import dataclass
7
- from typing import TYPE_CHECKING, Callable, Iterable, List, Literal, Optional, Union
7
+ from types import EllipsisType
8
+ from typing import TYPE_CHECKING, Callable, Iterable, List, Literal, Optional
8
9
 
9
10
  from ..language_model import Chatterer, HumanMessage
10
11
  from ..utils.base64_image import Base64Image
@@ -17,6 +18,7 @@ if TYPE_CHECKING:
17
18
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
18
19
  logger = logging.getLogger(__name__)
19
20
  MARKDOWN_PATTERN: re.Pattern[str] = re.compile(r"```(?:markdown\s*\n)?(.*?)```", re.DOTALL)
21
+ PageIndexType = Iterable[int | tuple[int | EllipsisType, int | EllipsisType]] | int | str
20
22
 
21
23
 
22
24
  @dataclass
@@ -107,8 +109,8 @@ class PdfToMarkdown:
107
109
 
108
110
  def convert(
109
111
  self,
110
- pdf_input: Union[str, "Document"],
111
- page_indices: Optional[Union[Iterable[int], int]] = None,
112
+ pdf_input: "Document | PathOrReadable",
113
+ page_indices: Optional[PageIndexType] = None,
112
114
  progress_callback: Optional[Callable[[int, int], None]] = None,
113
115
  ) -> str:
114
116
  """
@@ -123,7 +125,9 @@ class PdfToMarkdown:
123
125
  A single string containing the concatenated Markdown output for the processed pages.
124
126
  """
125
127
  with open_pdf(pdf_input) as doc:
126
- target_page_indices = list(_get_page_indices(page_indices, len(doc)))
128
+ target_page_indices = list(
129
+ _get_page_indices(page_indices=page_indices, max_doc_pages=len(doc), is_input_zero_based=True)
130
+ )
127
131
  total_pages_to_process = len(target_page_indices)
128
132
  if total_pages_to_process == 0:
129
133
  logger.warning("No pages selected for processing.")
@@ -232,7 +236,7 @@ def render_pdf_as_image(
232
236
 
233
237
  images_bytes: dict[int, bytes] = {}
234
238
  matrix = Matrix(zoom, zoom) # Control output resolution
235
- for page_idx in _get_page_indices(page_indices, len(doc)):
239
+ for page_idx in _get_page_indices(page_indices=page_indices, max_doc_pages=len(doc), is_input_zero_based=True):
236
240
  img_bytes = bytes(
237
241
  get_pixmap(
238
242
  page=doc[page_idx],
@@ -243,10 +247,7 @@ def render_pdf_as_image(
243
247
  return images_bytes
244
248
 
245
249
 
246
- def extract_text_from_pdf(
247
- doc: "Document",
248
- page_indices: Iterable[int] | int | None = None,
249
- ) -> dict[int, str]:
250
+ def extract_text_from_pdf(doc: "Document", page_indices: Optional[PageIndexType] = None) -> dict[int, str]:
250
251
  """Convert a PDF file to plain text.
251
252
 
252
253
  Extracts text from each page of a PDF file and formats it with page markers.
@@ -261,7 +262,11 @@ def extract_text_from_pdf(
261
262
  """
262
263
  return {
263
264
  page_idx: doc[page_idx].get_textpage().extractText().strip() # pyright: ignore[reportUnknownMemberType]
264
- for page_idx in _get_page_indices(page_indices, len(doc))
265
+ for page_idx in _get_page_indices(
266
+ page_indices=page_indices,
267
+ max_doc_pages=len(doc),
268
+ is_input_zero_based=True,
269
+ )
265
270
  }
266
271
 
267
272
 
@@ -292,11 +297,97 @@ def open_pdf(pdf_input: PathOrReadable | Document):
292
297
  doc.close()
293
298
 
294
299
 
295
- def _get_page_indices(page_indices: Iterable[int] | int | None, max_doc_pages: int) -> Iterable[int]:
300
+ def _get_page_indices(
301
+ page_indices: Optional[PageIndexType], max_doc_pages: int, is_input_zero_based: bool
302
+ ) -> list[int]:
296
303
  """Helper function to handle page indices for PDF conversion."""
304
+
305
+ def _to_zero_based_int(idx: int) -> int:
306
+ """Convert a 1-based index to a 0-based index if necessary."""
307
+ if is_input_zero_based:
308
+ return idx
309
+ else:
310
+ if idx < 1 or idx > max_doc_pages:
311
+ raise ValueError(f"Index {idx} is out of bounds for document with {max_doc_pages} pages (1-based).")
312
+ return idx - 1
313
+
297
314
  if page_indices is None:
298
- return range(max_doc_pages)
315
+ return list(range(max_doc_pages)) # Convert all pages
299
316
  elif isinstance(page_indices, int):
300
- return [page_indices]
317
+ # Handle single integer input for page index
318
+ return [_to_zero_based_int(page_indices)]
319
+ elif isinstance(page_indices, str):
320
+ # Handle string input for page indices
321
+ return _interpret_index_string(
322
+ index_str=page_indices, max_doc_pages=max_doc_pages, is_input_zero_based=is_input_zero_based
323
+ )
301
324
  else:
302
- return [i for i in page_indices if 0 <= i < max_doc_pages]
325
+ # Handle iterable input for page indices
326
+ indices: set[int] = set()
327
+ for idx in page_indices:
328
+ if isinstance(idx, int):
329
+ indices.add(_to_zero_based_int(idx))
330
+ else:
331
+ start, end = idx
332
+ if isinstance(start, EllipsisType):
333
+ start = 0
334
+ else:
335
+ start = _to_zero_based_int(start)
336
+
337
+ if isinstance(end, EllipsisType):
338
+ end = max_doc_pages - 1
339
+ else:
340
+ end = _to_zero_based_int(end)
341
+
342
+ if start > end:
343
+ raise ValueError(
344
+ f"Invalid range: {start} - {end}. Start index must be less than or equal to end index."
345
+ )
346
+ indices.update(range(start, end + 1))
347
+
348
+ return sorted(indices) # Return sorted list of indices
349
+
350
+
351
+ def _interpret_index_string(index_str: str, max_doc_pages: int, is_input_zero_based: bool) -> list[int]:
352
+ """Interpret a string of comma-separated indices and ranges."""
353
+
354
+ def _to_zero_based_int(idx_str: str) -> int:
355
+ i = int(idx_str)
356
+ if is_input_zero_based:
357
+ if i < 0 or i >= max_doc_pages:
358
+ raise ValueError(f"Index {i} is out of bounds for document with {max_doc_pages} pages.")
359
+ return i
360
+ else:
361
+ if i < 1 or i > max_doc_pages:
362
+ raise ValueError(f"Index {i} is out of bounds for document with {max_doc_pages} pages (1-based).")
363
+ return i - 1 # Convert to zero-based index
364
+
365
+ indices: set[int] = set()
366
+ for part in index_str.split(","):
367
+ part: str = part.strip()
368
+ count_dash: int = part.count("-")
369
+ if count_dash == 0:
370
+ indices.add(_to_zero_based_int(part))
371
+ elif count_dash == 1:
372
+ idx_dash: int = part.index("-")
373
+ start = part[:idx_dash].strip()
374
+ end = part[idx_dash + 1 :].strip()
375
+ if not start:
376
+ start = _to_zero_based_int("0") # Default to 0 if no start index is provided
377
+ else:
378
+ start = _to_zero_based_int(start)
379
+
380
+ if not end:
381
+ end = _to_zero_based_int(str(max_doc_pages - 1)) # Default to last page if no end index is provided
382
+ else:
383
+ end = _to_zero_based_int(end)
384
+
385
+ if start > end:
386
+ raise ValueError(
387
+ f"Invalid range: {start} - {end}. Start index must be less than or equal to end index."
388
+ )
389
+ indices.update(range(start, end + 1))
390
+ else:
391
+ raise ValueError(f"Invalid page index format: '{part}'. Expected format is '1,2,3' or '1-3'.")
392
+
393
+ return sorted(indices) # Return sorted list of indices, ensuring no duplicates
@@ -8,7 +8,6 @@ from pathlib import Path
8
8
  from typing import (
9
9
  TYPE_CHECKING,
10
10
  Callable,
11
- Iterable,
12
11
  NamedTuple,
13
12
  NotRequired,
14
13
  Optional,
@@ -20,7 +19,7 @@ from typing import (
20
19
 
21
20
  from ..common_types.io import PathOrReadable
22
21
  from ..utils.bytesio import read_bytes_stream
23
- from .convert_pdf_to_markdown import extract_text_from_pdf
22
+ from .convert_pdf_to_markdown import PageIndexType, extract_text_from_pdf
24
23
 
25
24
  if TYPE_CHECKING:
26
25
  from bs4 import Tag
@@ -222,7 +221,7 @@ def html_to_markdown(html: str, options: Optional[HtmlToMarkdownOptions]) -> str
222
221
  return str(markdownify(html, **(options or {}))) # pyright: ignore[reportUnknownArgumentType]
223
222
 
224
223
 
225
- def pdf_to_text(path_or_file: PathOrReadable, page_indices: Iterable[int] | int | None = None) -> str:
224
+ def pdf_to_text(path_or_file: PathOrReadable, page_indices: Optional[PageIndexType] = None) -> str:
226
225
  """
227
226
  Convert a PDF file to plain text.
228
227
 
@@ -248,7 +247,7 @@ def pdf_to_text(path_or_file: PathOrReadable, page_indices: Iterable[int] | int
248
247
  with Document(stream=stream.read()) as doc:
249
248
  return "\n".join(
250
249
  f"<!-- Page {page_no} -->\n{text}\n"
251
- for page_no, text in extract_text_from_pdf(doc, page_indices).items()
250
+ for page_no, text in extract_text_from_pdf(doc=doc, page_indices=page_indices).items()
252
251
  )
253
252
 
254
253
 
@@ -67,7 +67,7 @@ class Coordinate(BaseModel):
67
67
  class Element(BaseModel):
68
68
  category: Category
69
69
  content: Content
70
- coordinates: list[Coordinate] = Field(default_factory=list)
70
+ coordinates: list[Coordinate] = Field(default_factory=list[Coordinate])
71
71
  base64_encoding: str = ""
72
72
  id: int
73
73
  page: int
@@ -701,5 +701,5 @@ def _get_metadata_from_document(doc: Document) -> dict[object, object]:
701
701
  Helper function to extract metadata from a Document object.
702
702
  This is a placeholder and should be adjusted based on actual metadata structure.
703
703
  """
704
- metadata: dict[object, object] = doc.metadata # pyright: ignore[reportUnknownMemberType]
704
+ metadata: dict[object, object] = doc.metadata # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
705
705
  return metadata
@@ -185,7 +185,7 @@ def insert_callables_into_global(
185
185
  repl_tool.globals = {} # Or handle appropriately
186
186
 
187
187
  # Safely update globals
188
- current_globals: dict[object, object] = repl_tool.globals # pyright: ignore[reportUnknownMemberType]
188
+ current_globals: dict[object, object] = repl_tool.globals # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
189
189
  for fsig in function_signatures:
190
190
  current_globals[fsig.name] = fsig.callable
191
191
  # No need to reassign if globals is mutable (dict)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatterer
3
- Version: 0.1.19
3
+ Version: 0.1.20
4
4
  Summary: The highest-level interface for various LLM APIs.
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
@@ -11,7 +11,7 @@ Requires-Dist: pillow>=11.1.0
11
11
  Requires-Dist: regex>=2024.11.6
12
12
  Requires-Dist: rich>=13.9.4
13
13
  Requires-Dist: colorama>=0.4.6
14
- Requires-Dist: spargear>=0.1.6
14
+ Requires-Dist: spargear>=0.2.0
15
15
  Provides-Extra: dev
16
16
  Requires-Dist: neo4j-extension>=0.1.14; extra == "dev"
17
17
  Requires-Dist: ipykernel>=6.29.5; extra == "dev"
@@ -6,23 +6,23 @@ chatterer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  chatterer/common_types/__init__.py,sha256=jfS6m5UANSvGjzQ_nzYDpryn5uZqNb06-4xCsQ2C_lw,376
7
7
  chatterer/common_types/io.py,sha256=fetiyi1suZ3NF2mj5k5KDLJLGKS1n4J-5UmH7JN36g8,817
8
8
  chatterer/examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- chatterer/examples/anything_to_markdown.py,sha256=fyqUfzQWlUMyPEAmRyvvoSnNGVOjhPS6AGWru0wIpuo,3066
10
- chatterer/examples/get_code_snippets.py,sha256=L6hilONtNItWlX4lHhWJNEJpA9yviDSeJlWwvGCfMmk,2198
11
- chatterer/examples/login_with_playwright.py,sha256=d_EJQYGhNdAEYSmRiv_RlyqpV-sCc_6_VGd5br7prTg,6322
12
- chatterer/examples/make_ppt.py,sha256=62I0DTyOuxirgHnO5WojFK7KQjoYKYYICgGTxgszL98,23565
13
- chatterer/examples/pdf_to_markdown.py,sha256=cCO2mXLXLfXLPtiVbQl1g4VI5Qklti8Udz8eNtL_srE,3670
14
- chatterer/examples/pdf_to_text.py,sha256=K8CmZ-E7SbUtdjutBMk80qWZXo21CcFrqvTf6pUTN5c,1847
15
- chatterer/examples/transcription_api.py,sha256=mj8kQKYL23ayYWzaO_THQtPtMLsU0pqGEVOiZb1nckQ,4255
16
- chatterer/examples/upstage_parser.py,sha256=UtGKt9UNqXqIoJlKpg6q1h7DI7LDujFqyKXnH0FsZVw,4236
17
- chatterer/examples/webpage_to_markdown.py,sha256=ZBmZ0AjPnETBpXn_RScUSLF6PGYJpdcaZzXbp8Zfack,3143
9
+ chatterer/examples/anything_to_markdown.py,sha256=4O9ze7AIHcwEzvVmm5JMMKo_rVSFwhPL8MVHtfMLJ5Y,2734
10
+ chatterer/examples/get_code_snippets.py,sha256=pz05JjhKaWAknVKlk1ftEEzpSG4-sqD9oa_gyIQoCAs,1911
11
+ chatterer/examples/login_with_playwright.py,sha256=EhvJLaH5TD7bmDi12uP8YLd0fRhdjR-oyIkBHLi1Jjs,5988
12
+ chatterer/examples/make_ppt.py,sha256=vsT_iL_jS2ami5VYrReLMQcD576FfZUH7913F7_As0A,23278
13
+ chatterer/examples/pdf_to_markdown.py,sha256=Y83GfpmMApSvwbgNCw8CgFLeunYvZP0RCrgkvcXatGU,3479
14
+ chatterer/examples/pdf_to_text.py,sha256=JhM_meogmvDmQZ31NqAgYd5Wx_WWKnt20V79g-uVEdo,1581
15
+ chatterer/examples/transcription_api.py,sha256=WUs12qHH4616eLMQDHOiyVGxaXstTpgeE47djYyli6c,3897
16
+ chatterer/examples/upstage_parser.py,sha256=TrfeSIiF0xklhFCknop22TIOVibI4CJ_UKj5-lD8c8E,3487
17
+ chatterer/examples/webpage_to_markdown.py,sha256=DnZfQ-trXBiOiszA2tMlgadgKH-ObTi6l4gGloT-cQw,2846
18
18
  chatterer/strategies/__init__.py,sha256=SdOggbmHpw4f7Njwy-T8q64e91OLOUp1k0a0ozZd4qI,221
19
- chatterer/strategies/atom_of_thoughts.py,sha256=CygOCLu5vLk-fzY9O-iE3qLShfjD7iY40ks9jH4ULBM,40872
19
+ chatterer/strategies/atom_of_thoughts.py,sha256=pUhqt47YlzBIVNRh0UebeBwuJ0J94Ge6yZgXxrsiDPE,40884
20
20
  chatterer/strategies/base.py,sha256=b2gMPqodp97OP1dkHfj0UqixjdjVhmTw_V5qJ7i2S6g,427
21
21
  chatterer/tools/__init__.py,sha256=m3PRK9H5vOhk-2gG9W2eg8CYBlEn-K9-eaulOu91bgo,1474
22
22
  chatterer/tools/caption_markdown_images.py,sha256=r4QajHYuL4mdyYQXP1vQcNmqKN8lxBf5y0VKELXILOI,15392
23
- chatterer/tools/convert_pdf_to_markdown.py,sha256=8VEnZiaZSDq5k5BGaJEaSaJpJrAxSeL-gc7s38Vf9Fg,14990
24
- chatterer/tools/convert_to_text.py,sha256=IWtSHBh8-bvEZekk9Vli8yK4Ufa_VezN9mpfKCUWvys,15443
25
- chatterer/tools/upstage_document_parser.py,sha256=Bn6dGwgCsCOvhA5GfTWcfJ9unKqyuXNMMwoqZLwYLYU,33103
23
+ chatterer/tools/convert_pdf_to_markdown.py,sha256=Q5ln-_av2eor0A2LkQG7-IgyQKJ79wwrSOvv5Jncfso,18901
24
+ chatterer/tools/convert_to_text.py,sha256=WHQ0Xj4Ri_jYbFjzTx3mjmvJ9U8bAv4wGaKEVC88Nlk,15457
25
+ chatterer/tools/upstage_document_parser.py,sha256=CXslVYAHDK8EV8jtUAUWzf8rxU4qilSnW8_dhAxHOE8,33142
26
26
  chatterer/tools/webpage_to_markdown.py,sha256=ADH4sqM6iquJR7HU6umMQ5qO7EvcbNutuchXDpAcxAo,31961
27
27
  chatterer/tools/youtube.py,sha256=Hl2MMXJwwZ-i6_YAq0zh0rN4LHpYOb1Rt88P1gMjlLE,6081
28
28
  chatterer/tools/citation_chunking/__init__.py,sha256=gG7Fnkkp28UpcWMbfMY_4gqzZSZ8QzlhalHBoeoq7K0,82
@@ -35,10 +35,10 @@ chatterer/tools/citation_chunking/utils.py,sha256=M4pH2-UIE1VLzQLXDqjEe4L3Xcy0e0
35
35
  chatterer/utils/__init__.py,sha256=2v-lB2dqHgBlGcyaKKHc_hcyeH_AVoOddpr0STF7YAw,341
36
36
  chatterer/utils/base64_image.py,sha256=m_qAT3ERBiq8D-H4H9Z7rLfL31_BiPmV_m4uQ5XRLs0,11124
37
37
  chatterer/utils/bytesio.py,sha256=3MC2atOOFKo5YxuReo_y_t8Wem9p2Y1ahC5M2lGclwI,2618
38
- chatterer/utils/code_agent.py,sha256=A0IIgUAvW3lWMdJa8OZNKqmwdd5uuSvco-TxAN5-cek,10468
38
+ chatterer/utils/code_agent.py,sha256=7ka_WRI4TQmZ5H46mjY3hI6RO_pxw6pg3LAxjgW4AbM,10495
39
39
  chatterer/utils/imghdr.py,sha256=6JhJMXD4MZ0dQolT2VM87YrRYm3hPf3RTEWnP4lYRVc,3842
40
- chatterer-0.1.19.dist-info/METADATA,sha256=F_g5KhtlpDxZqeMlDi2Wewt_fBPSJgC_gKPMlIJeC50,11826
41
- chatterer-0.1.19.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
42
- chatterer-0.1.19.dist-info/entry_points.txt,sha256=KhxL2dctnZalnDSmPoB5dZBBa9hZpJETW3C5xkoRaW4,554
43
- chatterer-0.1.19.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
44
- chatterer-0.1.19.dist-info/RECORD,,
40
+ chatterer-0.1.20.dist-info/METADATA,sha256=9EIDlz3bfnlo0YfskNaooTj0UgCTyq3UHwBjcul1ijs,11826
41
+ chatterer-0.1.20.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
42
+ chatterer-0.1.20.dist-info/entry_points.txt,sha256=KhxL2dctnZalnDSmPoB5dZBBa9hZpJETW3C5xkoRaW4,554
43
+ chatterer-0.1.20.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
44
+ chatterer-0.1.20.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.7.1)
2
+ Generator: setuptools (80.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5