chatterer 0.1.21__py3-none-any.whl → 0.1.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,76 +1,337 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ PDF to Markdown Converter CLI
4
+
5
+ A command-line tool for converting PDF documents to Markdown using multimodal LLMs.
6
+ Supports both sequential and parallel processing modes with async capabilities.
7
+ """
8
+
9
+ import asyncio
1
10
  import logging
2
11
  import sys
12
+ import time
3
13
  from pathlib import Path
4
- from typing import Optional
14
+ from typing import List, Literal, Optional, TypedDict
5
15
 
6
16
  from spargear import ArgumentSpec, BaseArguments
7
17
 
8
- from chatterer import Chatterer, PdfToMarkdown
18
+ from chatterer import Chatterer
19
+ from chatterer.tools.convert_pdf_to_markdown import PdfToMarkdown
20
+
9
21
 
22
+ class ConversionResult(TypedDict, total=False):
23
+ """Type definition for conversion results."""
24
+
25
+ input: str
26
+ output: str
27
+ result: str
28
+ processing_time: float
29
+ characters: int
30
+ error: str
31
+
32
+
33
+ # Setup enhanced logging
34
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%H:%M:%S")
10
35
  logger = logging.getLogger(__name__)
11
36
 
12
37
 
13
38
  class PdfToMarkdownArgs(BaseArguments):
39
+ """Command-line arguments for PDF to Markdown conversion."""
40
+
14
41
  input: str
15
42
  """Input PDF file or directory containing PDF files to convert to markdown."""
43
+
16
44
  output: Optional[str] = None
17
45
  """Output path. For a file, path to the output markdown file. For a directory, output directory for .md files."""
18
- """Chatterer instance for communication."""
46
+
19
47
  page: Optional[str] = None
20
- """Zero-based page indices to convert (e.g., '0,2,4-8')."""
48
+ """Zero-based page indices to convert (e.g., '0,2,4-8'). If None, converts all pages."""
49
+
21
50
  recursive: bool = False
22
51
  """If input is a directory, search for PDFs recursively."""
52
+
53
+ mode: Literal["sequential", "parallel"] = "parallel"
54
+ """Processing mode: 'sequential' for strict continuity, 'parallel' for faster processing."""
55
+
56
+ sync: bool = False
57
+ """Enable synchronous processing for sequential mode. If set to True, will run in sync mode."""
58
+
59
+ max_concurrent: int = 10
60
+ """Maximum number of concurrent LLM requests when using async mode."""
61
+
62
+ image_zoom: float = 2.0
63
+ """Zoom factor for rendering PDF pages as images (higher zoom = higher resolution)."""
64
+
65
+ image_format: Literal["png", "jpg", "jpeg"] = "png"
66
+ """Image format for PDF page rendering."""
67
+
68
+ image_quality: int = 95
69
+ """JPEG quality when using jpg/jpeg format (1-100)."""
70
+
71
+ context_tail_lines: int = 10
72
+ """Number of lines from previous page's markdown to use as context (sequential mode only)."""
73
+
74
+ verbose: bool = False
75
+ """Enable verbose logging output."""
76
+
23
77
  chatterer: ArgumentSpec[Chatterer] = ArgumentSpec(
24
78
  ["--chatterer"],
25
79
  default_factory=lambda: Chatterer.from_provider("google:gemini-2.5-flash-preview-05-20"),
26
- help="Chatterer instance for communication.",
80
+ help="Chatterer instance configuration (e.g., 'google:gemini-2.5-flash-preview-05-20').",
27
81
  type=Chatterer.from_provider,
28
82
  )
29
83
 
30
- def run(self) -> list[dict[str, str]]:
31
- input = Path(self.input).resolve()
32
- pdf_files: list[Path] = []
84
+ def __post_init__(self) -> None:
85
+ """Validate and adjust arguments after initialization."""
86
+ if self.verbose:
87
+ logging.getLogger().setLevel(logging.DEBUG)
88
+
89
+ if not self.sync and self.mode == "sequential":
90
+ logger.warning("Async mode is only available with parallel mode. Switching to parallel mode.")
91
+ self.mode = "parallel"
92
+
93
+ if self.max_concurrent < 1:
94
+ logger.warning("max_concurrent must be >= 1. Setting to 1.")
95
+ self.max_concurrent = 1
96
+ elif self.max_concurrent > 10:
97
+ logger.warning("max_concurrent > 10 may cause rate limiting. Consider reducing.")
98
+
99
+ def run(self) -> List[ConversionResult]:
100
+ """Execute the PDF to Markdown conversion."""
101
+ if not self.sync:
102
+ return asyncio.run(self._run_async())
103
+ else:
104
+ return self._run_sync()
105
+
106
+ def _run_sync(self) -> List[ConversionResult]:
107
+ """Execute synchronous conversion."""
108
+ pdf_files, output_base, is_dir = self._prepare_files()
109
+
110
+ converter = PdfToMarkdown(
111
+ chatterer=self.chatterer.unwrap(),
112
+ image_zoom=self.image_zoom,
113
+ image_format=self.image_format,
114
+ image_jpg_quality=self.image_quality,
115
+ context_tail_lines=self.context_tail_lines,
116
+ )
117
+
118
+ results: List[ConversionResult] = []
119
+ total_start_time = time.time()
120
+
121
+ logger.info(f"🚀 Starting {self.mode} conversion of {len(pdf_files)} PDF(s)...")
122
+
123
+ for i, pdf in enumerate(pdf_files, 1):
124
+ output_path = (output_base / f"{pdf.stem}.md") if is_dir else output_base
125
+
126
+ logger.info(f"📄 Processing {i}/{len(pdf_files)}: {pdf.name}")
127
+ start_time = time.time()
128
+
129
+ # Progress callback for individual PDF
130
+ def progress_callback(current: int, total: int) -> None:
131
+ progress = (current / total) * 100
132
+ logger.info(f" └─ Progress: {current}/{total} pages ({progress:.1f}%)")
133
+
134
+ try:
135
+ markdown = converter.convert(
136
+ pdf_input=str(pdf),
137
+ page_indices=self.page,
138
+ mode=self.mode,
139
+ progress_callback=progress_callback,
140
+ )
141
+
142
+ # Save result
143
+ output_path.parent.mkdir(parents=True, exist_ok=True)
144
+ output_path.write_text(markdown, encoding="utf-8")
145
+
146
+ elapsed = time.time() - start_time
147
+ chars_per_sec = len(markdown) / elapsed if elapsed > 0 else 0
148
+
149
+ logger.info(f" ✅ Completed in {elapsed:.1f}s ({chars_per_sec:.0f} chars/s)")
150
+ logger.info(f" 📝 Generated {len(markdown):,} characters → {output_path}")
151
+
152
+ results.append({
153
+ "input": pdf.as_posix(),
154
+ "output": output_path.as_posix(),
155
+ "result": markdown,
156
+ "processing_time": elapsed,
157
+ "characters": len(markdown),
158
+ })
159
+
160
+ except Exception as e:
161
+ logger.error(f" ❌ Failed to process {pdf.name}: {e}")
162
+ results.append({
163
+ "input": pdf.as_posix(),
164
+ "output": "",
165
+ "result": "",
166
+ "error": str(e),
167
+ })
168
+
169
+ total_elapsed = time.time() - total_start_time
170
+ total_chars = sum(len(r.get("result", "")) for r in results)
171
+ successful_conversions = sum(1 for r in results if "error" not in r)
172
+
173
+ logger.info("🎉 Conversion complete!")
174
+ logger.info(f" 📊 Total time: {total_elapsed:.1f}s")
175
+ logger.info(f" 📈 Success rate: {successful_conversions}/{len(pdf_files)} ({(successful_conversions / len(pdf_files) * 100):.1f}%)")
176
+ logger.info(f" 📝 Total output: {total_chars:,} characters")
177
+ logger.info(f" ⚡ Average speed: {total_chars / total_elapsed:.0f} chars/s")
178
+
179
+ return results
180
+
181
+ async def _run_async(self) -> List[ConversionResult]:
182
+ """Execute asynchronous conversion with parallel processing."""
183
+ pdf_files, output_base, is_dir = self._prepare_files()
184
+
185
+ converter = PdfToMarkdown(
186
+ chatterer=self.chatterer.unwrap(),
187
+ image_zoom=self.image_zoom,
188
+ image_format=self.image_format,
189
+ image_jpg_quality=self.image_quality,
190
+ context_tail_lines=self.context_tail_lines,
191
+ )
192
+
193
+ total_start_time = time.time()
194
+
195
+ logger.info(f"🚀 Starting ASYNC parallel conversion of {len(pdf_files)} PDF(s)...")
196
+ logger.info(f"⚡ Max concurrent: {self.max_concurrent} LLM requests")
197
+
198
+ # Process PDFs concurrently
199
+ semaphore = asyncio.Semaphore(self.max_concurrent)
200
+
201
+ async def process_pdf(pdf: Path, index: int) -> ConversionResult:
202
+ async with semaphore:
203
+ output_path = (output_base / f"{pdf.stem}.md") if is_dir else output_base
204
+
205
+ logger.info(f"📄 Processing {index}/{len(pdf_files)}: {pdf.name}")
206
+ start_time = time.time()
207
+
208
+ # Progress callback for individual PDF
209
+ def progress_callback(current: int, total: int) -> None:
210
+ progress = (current / total) * 100
211
+ logger.info(f" └─ {pdf.name}: {current}/{total} pages ({progress:.1f}%)")
212
+
213
+ try:
214
+ markdown = await converter.aconvert(
215
+ pdf_input=str(pdf),
216
+ page_indices=self.page,
217
+ progress_callback=progress_callback,
218
+ max_concurrent=self.max_concurrent, # Limit per-PDF concurrency
219
+ )
220
+
221
+ # Save result
222
+ output_path.parent.mkdir(parents=True, exist_ok=True)
223
+ output_path.write_text(markdown, encoding="utf-8")
224
+
225
+ elapsed = time.time() - start_time
226
+ chars_per_sec = len(markdown) / elapsed if elapsed > 0 else 0
227
+
228
+ logger.info(f" ✅ {pdf.name} completed in {elapsed:.1f}s ({chars_per_sec:.0f} chars/s)")
229
+ logger.info(f" 📝 Generated {len(markdown):,} characters → {output_path}")
230
+
231
+ return {
232
+ "input": pdf.as_posix(),
233
+ "output": output_path.as_posix(),
234
+ "result": markdown,
235
+ "processing_time": elapsed,
236
+ "characters": len(markdown),
237
+ }
238
+
239
+ except Exception as e:
240
+ logger.error(f" ❌ Failed to process {pdf.name}: {e}")
241
+ return {
242
+ "input": pdf.as_posix(),
243
+ "output": "",
244
+ "result": "",
245
+ "error": str(e),
246
+ }
247
+
248
+ # Execute all PDF processing tasks
249
+ tasks = [process_pdf(pdf, i) for i, pdf in enumerate(pdf_files, 1)]
250
+ raw_results = await asyncio.gather(*tasks, return_exceptions=True)
251
+
252
+ # Handle exceptions in results
253
+ final_results: List[ConversionResult] = []
254
+ for result in raw_results:
255
+ if isinstance(result, Exception):
256
+ logger.error(f"Task failed with exception: {result}")
257
+ final_results.append(ConversionResult(input="", output="", result="", error=str(result)))
258
+ else:
259
+ # Type narrowing: result is ConversionResult after isinstance check
260
+ final_results.append(result) # type: ignore[arg-type]
261
+
262
+ total_elapsed = time.time() - total_start_time
263
+ total_chars = sum(len(r.get("result", "")) for r in final_results)
264
+ successful_conversions = sum(1 for r in final_results if "error" not in r)
265
+
266
+ logger.info("🎉 ASYNC conversion complete!")
267
+ logger.info(f" 📊 Total time: {total_elapsed:.1f}s")
268
+ logger.info(f" 📈 Success rate: {successful_conversions}/{len(pdf_files)} ({(successful_conversions / len(pdf_files) * 100):.1f}%)")
269
+ logger.info(f" 📝 Total output: {total_chars:,} characters")
270
+ logger.info(f" ⚡ Average speed: {total_chars / total_elapsed:.0f} chars/s")
271
+ logger.info(f" 🚀 Speedup: ~{len(pdf_files) / max(1, total_elapsed / 60):.1f}x faster than sequential")
272
+
273
+ return final_results
274
+
275
+ def _prepare_files(self) -> tuple[List[Path], Path, bool]:
276
+ """Prepare input and output file paths."""
277
+ input_path = Path(self.input).resolve()
278
+ pdf_files: List[Path] = []
33
279
  is_dir = False
34
- if input.is_file():
35
- if input.suffix.lower() != ".pdf":
280
+
281
+ # Determine input files
282
+ if input_path.is_file():
283
+ if input_path.suffix.lower() != ".pdf":
284
+ logger.error(f"❌ Input file must be a PDF: {input_path}")
36
285
  sys.exit(1)
37
- pdf_files.append(input)
38
- elif input.is_dir():
286
+ pdf_files.append(input_path)
287
+ elif input_path.is_dir():
39
288
  is_dir = True
40
- pattern = "*.pdf"
41
- pdf_files = sorted([
42
- f for f in (input.rglob(pattern) if self.recursive else input.glob(pattern)) if f.is_file()
43
- ])
289
+ pattern = "**/*.pdf" if self.recursive else "*.pdf"
290
+ pdf_files = sorted([f for f in input_path.glob(pattern) if f.is_file()])
44
291
  if not pdf_files:
292
+ logger.warning(f"⚠️ No PDF files found in {input_path}")
45
293
  sys.exit(0)
46
294
  else:
295
+ logger.error(f"❌ Input path does not exist: {input_path}")
47
296
  sys.exit(1)
297
+
298
+ # Determine output path
48
299
  if self.output:
49
- out_base = Path(self.output).resolve()
300
+ output_base = Path(self.output).resolve()
50
301
  elif is_dir:
51
- out_base = input
302
+ output_base = input_path
52
303
  else:
53
- out_base = input.with_suffix(".md")
304
+ output_base = input_path.with_suffix(".md")
54
305
 
306
+ # Create output directories
55
307
  if is_dir:
56
- out_base.mkdir(parents=True, exist_ok=True)
308
+ output_base.mkdir(parents=True, exist_ok=True)
57
309
  else:
58
- out_base.parent.mkdir(parents=True, exist_ok=True)
59
-
60
- converter = PdfToMarkdown(chatterer=self.chatterer.unwrap())
61
- results: list[dict[str, str]] = []
62
- for pdf in pdf_files:
63
- output: Path = (out_base / (pdf.stem + ".md")) if is_dir else out_base
64
- md: str = converter.convert(pdf_input=str(pdf), page_indices=self.page)
65
- output.parent.mkdir(parents=True, exist_ok=True)
66
- output.write_text(md, encoding="utf-8")
67
- results.append({"input": pdf.as_posix(), "output": output.as_posix(), "result": md})
68
- logger.info(f"Converted {len(pdf_files)} PDF(s) to markdown and saved to `{out_base}`.")
69
- return results
310
+ output_base.parent.mkdir(parents=True, exist_ok=True)
311
+
312
+ logger.info(f"📂 Input: {input_path}")
313
+ logger.info(f"📁 Output: {output_base}")
314
+ logger.info(f"📄 Found {len(pdf_files)} PDF file(s)")
315
+
316
+ return pdf_files, output_base, is_dir
70
317
 
71
318
 
72
319
  def main() -> None:
73
- PdfToMarkdownArgs().run()
320
+ """Main entry point for the CLI application."""
321
+ args = None
322
+ try:
323
+ args = PdfToMarkdownArgs()
324
+ args.run()
325
+ except KeyboardInterrupt:
326
+ logger.info("🛑 Conversion interrupted by user")
327
+ sys.exit(130)
328
+ except Exception as e:
329
+ logger.error(f"❌ Unexpected error: {e}")
330
+ if args and hasattr(args, "verbose") and args.verbose:
331
+ import traceback
332
+
333
+ traceback.print_exc()
334
+ sys.exit(1)
74
335
 
75
336
 
76
337
  if __name__ == "__main__":
@@ -66,12 +66,15 @@ class Chatterer(BaseModel):
66
66
 
67
67
  @classmethod
68
68
  def from_provider(
69
- cls, provider_and_model: str, structured_output_kwargs: Optional[dict[str, object]] = {"strict": True}
69
+ cls,
70
+ provider_and_model: str,
71
+ structured_output_kwargs: Optional[dict[str, object]] = {"strict": True},
72
+ **kwargs: object,
70
73
  ) -> Self:
71
74
  backend, model = provider_and_model.split(":", 1)
72
75
  backends = cls.get_backends()
73
76
  if func := backends.get(backend):
74
- return func(model, structured_output_kwargs)
77
+ return func(model, structured_output_kwargs, **kwargs)
75
78
  else:
76
79
  raise ValueError(f"Unsupported provider: {backend}. Supported providers are: {', '.join(backends.keys())}.")
77
80
 
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import asyncio
3
4
  import logging
4
5
  import re
5
6
  from contextlib import contextmanager
@@ -25,10 +26,11 @@ PageIndexType = Iterable[int | tuple[int | EllipsisType, int | EllipsisType]] |
25
26
  class PdfToMarkdown:
26
27
  """
27
28
  Converts PDF documents to Markdown using a multimodal LLM (Chatterer).
28
- Processes PDFs page by page, providing the LLM with both the extracted raw
29
- text and a rendered image of the page to handle complex layouts. It maintains
30
- context between pages by feeding the *tail end* of the previously generated
31
- Markdown back into the prompt for the next page to ensure smooth transitions.
29
+
30
+ This class supports both sequential and parallel processing:
31
+ - Sequential processing preserves strict page continuity using previous page context
32
+ - Parallel processing enables faster conversion for large documents by using
33
+ previous page image and text for context instead of generated markdown
32
34
  """
33
35
 
34
36
  chatterer: Chatterer
@@ -40,8 +42,7 @@ class PdfToMarkdown:
40
42
  image_jpg_quality: int = 95
41
43
  """Quality for JPEG images (if used)."""
42
44
  context_tail_lines: int = 10
43
- """Number of lines from the end of the previous page's Markdown to use as context."""
44
- # max_context_tokens: Optional[int] = None # This can be added later if needed
45
+ """Number of lines from the end of the previous page's Markdown to use as context (sequential mode only)."""
45
46
 
46
47
  def _get_context_tail(self, markdown_text: Optional[str]) -> Optional[str]:
47
48
  """Extracts the last N lines from the given markdown text."""
@@ -50,94 +51,279 @@ class PdfToMarkdown:
50
51
  lines = markdown_text.strip().splitlines()
51
52
  if not lines:
52
53
  return None
53
- # Get the last N lines, or fewer if the text is shorter
54
54
  tail_lines = lines[-self.context_tail_lines :]
55
55
  return "\n".join(tail_lines)
56
56
 
57
- def _format_prompt_content(
57
+ def _format_prompt_content_sequential(
58
58
  self,
59
59
  page_text: str,
60
60
  page_image_b64: Base64Image,
61
- previous_markdown_context_tail: Optional[str] = None, # Renamed for clarity
62
- page_number: int = 0, # For context, 0-indexed
61
+ previous_markdown_context_tail: Optional[str] = None,
62
+ page_number: int = 0,
63
63
  total_pages: int = 1,
64
64
  ) -> HumanMessage:
65
65
  """
66
- Formats the content list for the HumanMessage input to the LLM.
67
- Uses only the tail end of the previous page's markdown for context.
66
+ Formats the content for sequential processing using previous page's markdown context.
68
67
  """
69
- # Construct the main instruction prompt
70
- instruction = f"""You are an expert PDF to Markdown converter. Your task is to convert the content of the provided PDF page (Page {page_number + 1} of {total_pages}) into accurate and well-formatted Markdown. You are given:
71
- 1. The raw text extracted from the page ([Raw Text]).
72
- 2. A rendered image of the page ([Rendered Image]) showing its visual layout.
73
- 3. (Optional) The *ending portion* of the Markdown generated from the previous page ([End of Previous Page Markdown]) for context continuity.
74
-
75
- **Conversion Requirements:**
76
- * **Text:** Reconstruct paragraphs, headings, lists, etc., naturally based on the visual layout. Correct OCR/formatting issues from [Raw Text] using the image. Minimize unnecessary whitespace.
77
- * **Tables:** Convert tables accurately into Markdown table format (`| ... |`). Use image for text if [Raw Text] is garbled.
78
- * **Images/Diagrams:** Describe significant visual elements (charts, graphs) within `<details>` tags. Example: `<details><summary>Figure 1: Description</summary>Detailed textual description from the image.</details>`. Ignore simple decorative images. Do **not** use `![alt](...)`.
79
- * **Layout:** Respect columns, code blocks (``` ```), footnotes, etc., using standard Markdown.
80
- * **Continuity (Crucial):**
81
- * Examine the [End of Previous Page Markdown] if provided.
82
- * If the current page's content *continues* a sentence, paragraph, list, or code block from the previous page, ensure your generated Markdown for *this page* starts seamlessly from that continuation point.
83
- * For example, if the previous page ended mid-sentence, the Markdown for *this page* should begin with the rest of that sentence.
84
- * **Do NOT repeat the content already present in [End of Previous Page Markdown] in your output.**
85
- * If the current page starts a new section (e.g., with a heading), begin the Markdown output fresh, ignoring the previous context tail unless necessary for list numbering, etc.
86
-
87
- **Input Data:**
88
- [Raw Text]
68
+ instruction = f"""You are an expert PDF to Markdown converter. Convert Page {page_number + 1} of {total_pages} into accurate, well-formatted Markdown.
69
+
70
+ **Input provided:**
71
+ 1. **Raw Text**: Extracted text from the PDF page (may contain OCR errors)
72
+ 2. **Page Image**: Visual rendering of the page showing actual layout
73
+ 3. **Previous Context**: End portion of the previous page's generated Markdown (if available)
74
+
75
+ **Conversion Rules:**
76
+ **Text Structure**: Use the image to understand the actual layout and fix any OCR errors in the raw text
77
+ **Headings**: Use appropriate heading levels (# ## ### etc.) based on visual hierarchy
78
+ **Lists**: Convert to proper Markdown lists (- or 1. 2. 3.) maintaining structure
79
+ **Tables**: Convert to Markdown table format using | pipes |
80
+ **Images/Diagrams**: Describe significant visual elements as: `<details><summary>Figure: Brief title</summary>Detailed description based on what you see in the image</details>`
81
+ **Code/Formulas**: Use ``` code blocks ``` or LaTeX $$ math $$ as appropriate
82
+ **Continuity**: If previous context shows incomplete content (mid-sentence, list, table), seamlessly continue from that point
83
+ **NO REPETITION**: Never repeat content from the previous context - only generate new content for this page
84
+
85
+ **Raw Text:**
89
86
  ```
90
87
  {page_text if page_text else "No text extracted from this page."}
91
88
  ```
92
- [Rendered Image]
93
- (See attached image)
89
+
90
+ **Page Image:** (attached)
94
91
  """
92
+
95
93
  if previous_markdown_context_tail:
96
- instruction += f"""[End of Previous Page Markdown]
94
+ instruction += f"""
95
+ **Previous Page Context (DO NOT REPEAT):**
97
96
  ```markdown
98
- ... (content from previous page ends with) ...
97
+ ... (previous page ended with) ...
99
98
  {previous_markdown_context_tail}
100
99
  ```
101
- **Task:** Generate the Markdown for the *current* page (Page {page_number + 1}), ensuring it correctly continues from or follows the [End of Previous Page Markdown]. Start the output *only* with the content belonging to the current page."""
100
+
101
+ Continue seamlessly from the above context if the current page content flows from it.
102
+ """
102
103
  else:
103
- instruction += "**Task:** Generate the Markdown for the *current* page (Page {page_number + 1}). This is the first page being processed in this batch."
104
+ instruction += "\n**Note:** This is the first page or start of a new section."
104
105
 
105
- instruction += "\n\n**Output only the Markdown content for the current page.** Ensure your output starts correctly based on the continuity rules."
106
+ instruction += "\n\n**Output only the Markdown content for the current page. Ensure proper formatting and NO repetition of previous content.**"
106
107
 
107
- # Structure for multimodal input
108
108
  return HumanMessage(content=[instruction, page_image_b64.data_uri_content])
109
109
 
110
+ def _format_prompt_content_parallel(
111
+ self,
112
+ page_text: str,
113
+ page_image_b64: Base64Image,
114
+ previous_page_text: Optional[str] = None,
115
+ previous_page_image_b64: Optional[Base64Image] = None,
116
+ page_number: int = 0,
117
+ total_pages: int = 1,
118
+ ) -> HumanMessage:
119
+ """
120
+ Formats the content for parallel processing using previous page's raw data.
121
+ """
122
+ instruction = f"""You are an expert PDF to Markdown converter. Convert Page {page_number + 1} of {total_pages} into accurate, well-formatted Markdown.
123
+
124
+ **Task**: Convert the current page to Markdown while maintaining proper continuity with the previous page.
125
+
126
+ **Current Page Data:**
127
+ - **Raw Text**: Extracted text (may have OCR errors - use image to verify)
128
+ - **Page Image**: Visual rendering showing actual layout
129
+
130
+ **Previous Page Data** (for context only):
131
+ - **Previous Raw Text**: Text from the previous page
132
+ - **Previous Page Image**: Visual of the previous page
133
+
134
+ **Conversion Instructions:**
135
+ 1. **Primary Focus**: Convert the CURRENT page content accurately
136
+ 2. **Continuity Check**:
137
+ - Examine if the current page continues content from the previous page (sentences, paragraphs, lists, tables)
138
+ - If yes, start your Markdown naturally continuing that content
139
+ - If no, start fresh with proper heading/structure
140
+ 3. **Format Rules**:
141
+ - Use image to fix OCR errors and understand layout
142
+ - Convert headings to # ## ### based on visual hierarchy
143
+ - Convert lists to proper Markdown (- or 1. 2. 3.)
144
+ - Convert tables to | pipe | format
145
+ - Describe significant images/charts as: `<details><summary>Figure: Title</summary>Description</details>`
146
+ - Use ``` for code blocks and $$ for math formulas
147
+
148
+ **Current Page Raw Text:**
149
+ ```
150
+ {page_text if page_text else "No text extracted from this page."}
151
+ ```
152
+
153
+ **Current Page Image:** (see first attached image)
154
+ """
155
+
156
+ content = [instruction, page_image_b64.data_uri_content]
157
+
158
+ if previous_page_text is not None and previous_page_image_b64 is not None:
159
+ instruction += f"""
160
+
161
+ **Previous Page Raw Text (for context):**
162
+ ```
163
+ {previous_page_text if previous_page_text else "No text from previous page."}
164
+ ```
165
+
166
+ **Previous Page Image:** (see second attached image)
167
+ """
168
+ content.append(previous_page_image_b64.data_uri_content)
169
+ else:
170
+ instruction += "\n**Note:** This is the first page - no previous context available."
171
+
172
+ instruction += "\n\n**Generate ONLY the Markdown for the current page. Ensure proper continuity and formatting.**"
173
+ content[0] = instruction
174
+
175
+ return HumanMessage(content=content)
176
+
110
177
  def convert(
111
178
  self,
112
179
  pdf_input: "Document | PathOrReadable",
113
180
  page_indices: Optional[PageIndexType] = None,
114
181
  progress_callback: Optional[Callable[[int, int], None]] = None,
182
+ mode: Literal["sequential", "parallel"] = "sequential",
183
+ ) -> str:
184
+ """
185
+ Converts a PDF document to Markdown synchronously.
186
+
187
+ Args:
188
+ pdf_input: Path to PDF file or pymupdf.Document object
189
+ page_indices: Specific page indices to convert (0-based). If None, converts all pages
190
+ progress_callback: Optional callback function called with (current_page, total_pages)
191
+ mode: "sequential" for strict continuity or "parallel" for independent page processing
192
+
193
+ Returns:
194
+ Concatenated Markdown string for all processed pages
195
+ """
196
+ if mode == "sequential":
197
+ return self._convert_sequential(pdf_input, page_indices, progress_callback)
198
+ else:
199
+ return self._convert_parallel_sync(pdf_input, page_indices, progress_callback)
200
+
201
+ async def aconvert(
202
+ self,
203
+ pdf_input: "Document | PathOrReadable",
204
+ page_indices: Optional[PageIndexType] = None,
205
+ progress_callback: Optional[Callable[[int, int], None]] = None,
206
+ max_concurrent: int = 5,
115
207
  ) -> str:
116
208
  """
117
- Converts a PDF document (or specific pages) to Markdown synchronously.
209
+ Converts a PDF document to Markdown asynchronously with parallel processing.
210
+
118
211
  Args:
119
- pdf_input: Path to the PDF file or a pymupdf.Document object.
120
- page_indices: Specific 0-based page indices to convert. If None, converts all pages.
121
- Can be a single int or an iterable of ints.
122
- progress_callback: An optional function to call with (current_page_index, total_pages_to_process)
123
- after each page is processed.
212
+ pdf_input: Path to PDF file or pymupdf.Document object
213
+ page_indices: Specific page indices to convert (0-based). If None, converts all pages
214
+ progress_callback: Optional callback function called with (current_page, total_pages)
215
+ max_concurrent: Maximum number of concurrent LLM requests
216
+
124
217
  Returns:
125
- A single string containing the concatenated Markdown output for the processed pages.
218
+ Concatenated Markdown string for all processed pages
126
219
  """
127
220
  with open_pdf(pdf_input) as doc:
128
- target_page_indices = list(
129
- _get_page_indices(page_indices=page_indices, max_doc_pages=len(doc), is_input_zero_based=True)
221
+ target_page_indices = list(_get_page_indices(page_indices=page_indices, max_doc_pages=len(doc), is_input_zero_based=True))
222
+ total_pages_to_process = len(target_page_indices)
223
+
224
+ if total_pages_to_process == 0:
225
+ logger.warning("No pages selected for processing.")
226
+ return ""
227
+
228
+ logger.info(f"Starting parallel Markdown conversion for {total_pages_to_process} pages...")
229
+
230
+ # Pre-process all pages
231
+ page_text_dict = extract_text_from_pdf(doc, target_page_indices)
232
+ page_image_dict = render_pdf_as_image(
233
+ doc,
234
+ page_indices=target_page_indices,
235
+ zoom=self.image_zoom,
236
+ output=self.image_format,
237
+ jpg_quality=self.image_jpg_quality,
130
238
  )
239
+
240
+ # Process pages in parallel with semaphore for concurrency control
241
+ semaphore = asyncio.Semaphore(max_concurrent)
242
+
243
+ async def process_page(i: int, page_idx: int) -> tuple[int, str]:
244
+ async with semaphore:
245
+ logger.info(f"Processing page {i + 1}/{total_pages_to_process} (Index: {page_idx})...")
246
+
247
+ try:
248
+ # Get previous page data for context
249
+ prev_page_idx = target_page_indices[i - 1] if i > 0 else None
250
+ previous_page_text = page_text_dict.get(prev_page_idx) if prev_page_idx is not None else None
251
+ previous_page_image_b64 = None
252
+ if prev_page_idx is not None:
253
+ previous_page_image_b64 = Base64Image.from_bytes(page_image_dict[prev_page_idx], ext=self.image_format)
254
+
255
+ message = self._format_prompt_content_parallel(
256
+ page_text=page_text_dict.get(page_idx, ""),
257
+ page_image_b64=Base64Image.from_bytes(page_image_dict[page_idx], ext=self.image_format),
258
+ previous_page_text=previous_page_text,
259
+ previous_page_image_b64=previous_page_image_b64,
260
+ page_number=page_idx,
261
+ total_pages=len(doc),
262
+ )
263
+
264
+ response = await self.chatterer.agenerate([message])
265
+
266
+ # Extract markdown
267
+ markdowns = [match.group(1).strip() for match in MARKDOWN_PATTERN.finditer(response)]
268
+ if markdowns:
269
+ current_page_markdown = "\n".join(markdowns)
270
+ else:
271
+ current_page_markdown = response.strip()
272
+ if current_page_markdown.startswith("```") and current_page_markdown.endswith("```"):
273
+ current_page_markdown = current_page_markdown[3:-3].strip()
274
+
275
+ logger.debug(f"Completed processing page {i + 1}/{total_pages_to_process}")
276
+
277
+ # Call progress callback if provided
278
+ if progress_callback:
279
+ try:
280
+ progress_callback(i + 1, total_pages_to_process)
281
+ except Exception as cb_err:
282
+ logger.warning(f"Progress callback failed: {cb_err}")
283
+
284
+ return (i, current_page_markdown)
285
+
286
+ except Exception as e:
287
+ logger.error(f"Failed to process page index {page_idx}: {e}", exc_info=True)
288
+ return (i, f"<!-- Error processing page {page_idx + 1}: {str(e)} -->")
289
+
290
+ # Execute all page processing tasks
291
+
292
+ tasks = [process_page(i, page_idx) for i, page_idx in enumerate(target_page_indices)]
293
+ results = await asyncio.gather(*tasks, return_exceptions=True)
294
+
295
+ # Sort results by original page order and extract markdown
296
+ markdown_results = [""] * total_pages_to_process
297
+ for result in results:
298
+ if isinstance(result, Exception):
299
+ logger.error(f"Task failed with exception: {result}")
300
+ continue
301
+ if isinstance(result, tuple) and len(result) == 2:
302
+ page_order, markdown = result
303
+ markdown_results[page_order] = markdown
304
+ else:
305
+ logger.error(f"Unexpected result format: {result}")
306
+
307
+ return "\n\n".join(markdown_results).strip()
308
+
309
+ def _convert_sequential(
310
+ self,
311
+ pdf_input: "Document | PathOrReadable",
312
+ page_indices: Optional[PageIndexType] = None,
313
+ progress_callback: Optional[Callable[[int, int], None]] = None,
314
+ ) -> str:
315
+ """Sequential conversion maintaining strict page continuity."""
316
+ with open_pdf(pdf_input) as doc:
317
+ target_page_indices = list(_get_page_indices(page_indices=page_indices, max_doc_pages=len(doc), is_input_zero_based=True))
131
318
  total_pages_to_process = len(target_page_indices)
132
319
  if total_pages_to_process == 0:
133
320
  logger.warning("No pages selected for processing.")
134
321
  return ""
135
322
 
136
323
  full_markdown_output: List[str] = []
137
- # --- Context Tracking ---
138
- previous_page_markdown: Optional[str] = None # Store the full markdown of the previous page
324
+ previous_page_markdown: Optional[str] = None
139
325
 
140
- # Pre-process all pages (optional optimization)
326
+ # Pre-process all pages
141
327
  logger.info("Extracting text and rendering images for selected pages...")
142
328
  page_text_dict = extract_text_from_pdf(doc, target_page_indices)
143
329
  page_image_dict = render_pdf_as_image(
@@ -147,46 +333,33 @@ class PdfToMarkdown:
147
333
  output=self.image_format,
148
334
  jpg_quality=self.image_jpg_quality,
149
335
  )
150
- logger.info(f"Starting Markdown conversion for {total_pages_to_process} pages...")
336
+ logger.info(f"Starting sequential Markdown conversion for {total_pages_to_process} pages...")
151
337
 
152
- page_idx: int = target_page_indices.pop(0) # Get the first page index
153
- i: int = 1
154
- while True:
155
- logger.info(f"Processing page {i}/{total_pages_to_process} (Index: {page_idx})...")
338
+ for i, page_idx in enumerate(target_page_indices):
339
+ logger.info(f"Processing page {i + 1}/{total_pages_to_process} (Index: {page_idx})...")
156
340
  try:
157
- # --- Get Context Tail ---
158
341
  context_tail = self._get_context_tail(previous_page_markdown)
159
342
 
160
- message = self._format_prompt_content(
161
- page_text=page_text_dict.get(page_idx, ""), # Use .get for safety
343
+ message = self._format_prompt_content_sequential(
344
+ page_text=page_text_dict.get(page_idx, ""),
162
345
  page_image_b64=Base64Image.from_bytes(page_image_dict[page_idx], ext=self.image_format),
163
- previous_markdown_context_tail=context_tail, # Pass only the tail
346
+ previous_markdown_context_tail=context_tail,
164
347
  page_number=page_idx,
165
348
  total_pages=len(doc),
166
349
  )
167
- logger.debug(f"Sending request to LLM for page index {page_idx}...")
168
350
 
169
- response = self.chatterer([message])
170
- # Extract markdown, handling potential lack of backticks
171
- markdowns: list[str] = [match.group(1).strip() for match in MARKDOWN_PATTERN.finditer(response)]
351
+ response = self.chatterer.generate([message])
352
+
353
+ # Extract markdown
354
+ markdowns = [match.group(1).strip() for match in MARKDOWN_PATTERN.finditer(response)]
172
355
  if markdowns:
173
356
  current_page_markdown = "\n".join(markdowns)
174
357
  else:
175
- # Fallback: assume the whole response is markdown if no ```markdown blocks found
176
358
  current_page_markdown = response.strip()
177
359
  if current_page_markdown.startswith("```") and current_page_markdown.endswith("```"):
178
- # Basic cleanup if it just missed the 'markdown' language tag
179
360
  current_page_markdown = current_page_markdown[3:-3].strip()
180
- elif "```" in current_page_markdown:
181
- logger.warning(
182
- f"Page {page_idx + 1}: Response contains '```' but not in expected format. Using raw response."
183
- )
184
361
 
185
- logger.debug(f"Received response from LLM for page index {page_idx}.")
186
-
187
- # --- Store result and update context ---
188
362
  full_markdown_output.append(current_page_markdown)
189
- # Update the *full* previous markdown for the *next* iteration's tail calculation
190
363
  previous_page_markdown = current_page_markdown
191
364
 
192
365
  except Exception as e:
@@ -196,18 +369,85 @@ class PdfToMarkdown:
196
369
  # Progress callback
197
370
  if progress_callback:
198
371
  try:
199
- progress_callback(i, total_pages_to_process)
372
+ progress_callback(i + 1, total_pages_to_process)
200
373
  except Exception as cb_err:
201
374
  logger.warning(f"Progress callback failed: {cb_err}")
202
375
 
203
- if not target_page_indices:
204
- break
376
+ return "\n\n".join(full_markdown_output).strip()
377
+
378
+ def _convert_parallel_sync(
379
+ self,
380
+ pdf_input: "Document | PathOrReadable",
381
+ page_indices: Optional[PageIndexType] = None,
382
+ progress_callback: Optional[Callable[[int, int], None]] = None,
383
+ ) -> str:
384
+ """Synchronous parallel-style conversion (processes independently but sequentially)."""
385
+ with open_pdf(pdf_input) as doc:
386
+ target_page_indices = list(_get_page_indices(page_indices=page_indices, max_doc_pages=len(doc), is_input_zero_based=True))
387
+ total_pages_to_process = len(target_page_indices)
388
+ if total_pages_to_process == 0:
389
+ logger.warning("No pages selected for processing.")
390
+ return ""
391
+
392
+ logger.info(f"Starting parallel-style Markdown conversion for {total_pages_to_process} pages...")
393
+
394
+ # Pre-process all pages
395
+ page_text_dict = extract_text_from_pdf(doc, target_page_indices)
396
+ page_image_dict = render_pdf_as_image(
397
+ doc,
398
+ page_indices=target_page_indices,
399
+ zoom=self.image_zoom,
400
+ output=self.image_format,
401
+ jpg_quality=self.image_jpg_quality,
402
+ )
403
+
404
+ full_markdown_output: List[str] = []
405
+
406
+ for i, page_idx in enumerate(target_page_indices):
407
+ logger.info(f"Processing page {i + 1}/{total_pages_to_process} (Index: {page_idx})...")
205
408
 
206
- page_idx = target_page_indices.pop(0) # Get the next page index
207
- i += 1 # Increment the page counter
409
+ try:
410
+ # Get previous page data for context
411
+ prev_page_idx = target_page_indices[i - 1] if i > 0 else None
412
+ previous_page_text = page_text_dict.get(prev_page_idx) if prev_page_idx is not None else None
413
+ previous_page_image_b64 = None
414
+ if prev_page_idx is not None:
415
+ previous_page_image_b64 = Base64Image.from_bytes(page_image_dict[prev_page_idx], ext=self.image_format)
416
+
417
+ message = self._format_prompt_content_parallel(
418
+ page_text=page_text_dict.get(page_idx, ""),
419
+ page_image_b64=Base64Image.from_bytes(page_image_dict[page_idx], ext=self.image_format),
420
+ previous_page_text=previous_page_text,
421
+ previous_page_image_b64=previous_page_image_b64,
422
+ page_number=page_idx,
423
+ total_pages=len(doc),
424
+ )
208
425
 
209
- # Join with double newline, potentially adjust based on how well continuations work
210
- return "\n\n".join(full_markdown_output).strip() # Add strip() to remove leading/trailing whitespace
426
+ response = self.chatterer.generate([message])
427
+
428
+ # Extract markdown
429
+ markdowns = [match.group(1).strip() for match in MARKDOWN_PATTERN.finditer(response)]
430
+ if markdowns:
431
+ current_page_markdown = "\n".join(markdowns)
432
+ else:
433
+ current_page_markdown = response.strip()
434
+ if current_page_markdown.startswith("```") and current_page_markdown.endswith("```"):
435
+ current_page_markdown = current_page_markdown[3:-3].strip()
436
+
437
+ full_markdown_output.append(current_page_markdown)
438
+
439
+ except Exception as e:
440
+ logger.error(f"Failed to process page index {page_idx}: {e}", exc_info=True)
441
+ continue
442
+
443
+ # Progress callback
444
+ if progress_callback:
445
+ try:
446
+ progress_callback(i + 1, total_pages_to_process)
447
+ except Exception as cb_err:
448
+ logger.warning(f"Progress callback failed: {cb_err}")
449
+
450
+ return "\n\n".join(full_markdown_output).strip()
211
451
 
212
452
 
213
453
  def render_pdf_as_image(
@@ -297,9 +537,7 @@ def open_pdf(pdf_input: PathOrReadable | Document):
297
537
  doc.close()
298
538
 
299
539
 
300
- def _get_page_indices(
301
- page_indices: Optional[PageIndexType], max_doc_pages: int, is_input_zero_based: bool
302
- ) -> list[int]:
540
+ def _get_page_indices(page_indices: Optional[PageIndexType], max_doc_pages: int, is_input_zero_based: bool) -> list[int]:
303
541
  """Helper function to handle page indices for PDF conversion."""
304
542
 
305
543
  def _to_zero_based_int(idx: int) -> int:
@@ -318,9 +556,7 @@ def _get_page_indices(
318
556
  return [_to_zero_based_int(page_indices)]
319
557
  elif isinstance(page_indices, str):
320
558
  # Handle string input for page indices
321
- return _interpret_index_string(
322
- index_str=page_indices, max_doc_pages=max_doc_pages, is_input_zero_based=is_input_zero_based
323
- )
559
+ return _interpret_index_string(index_str=page_indices, max_doc_pages=max_doc_pages, is_input_zero_based=is_input_zero_based)
324
560
  else:
325
561
  # Handle iterable input for page indices
326
562
  indices: set[int] = set()
@@ -340,9 +576,7 @@ def _get_page_indices(
340
576
  end = _to_zero_based_int(end)
341
577
 
342
578
  if start > end:
343
- raise ValueError(
344
- f"Invalid range: {start} - {end}. Start index must be less than or equal to end index."
345
- )
579
+ raise ValueError(f"Invalid range: {start} - {end}. Start index must be less than or equal to end index.")
346
580
  indices.update(range(start, end + 1))
347
581
 
348
582
  return sorted(indices) # Return sorted list of indices
@@ -383,9 +617,7 @@ def _interpret_index_string(index_str: str, max_doc_pages: int, is_input_zero_ba
383
617
  end = _to_zero_based_int(end)
384
618
 
385
619
  if start > end:
386
- raise ValueError(
387
- f"Invalid range: {start} - {end}. Start index must be less than or equal to end index."
388
- )
620
+ raise ValueError(f"Invalid range: {start} - {end}. Start index must be less than or equal to end index.")
389
621
  indices.update(range(start, end + 1))
390
622
  else:
391
623
  raise ValueError(f"Invalid page index format: '{part}'. Expected format is '1,2,3' or '1-3'.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatterer
3
- Version: 0.1.21
3
+ Version: 0.1.23
4
4
  Summary: The highest-level interface for various LLM APIs.
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
@@ -1,6 +1,6 @@
1
1
  chatterer/__init__.py,sha256=1z3ocUMqgbqQ3eD4wq5Jq-JPt-VuWwdWT_U8r38Hodo,2267
2
2
  chatterer/interactive.py,sha256=B8KvlXAGpNEF-czJJpS_f9eJj1TenkE6896w9ixNjOk,17056
3
- chatterer/language_model.py,sha256=4aJrBHpDbFrGfcGOmglSy1IYFOhyiNGen20-BysqQTM,20659
3
+ chatterer/language_model.py,sha256=QkJLmmTYcWbqosm3D70zfhDSFETD7PIafRaY5upT7Gc,20715
4
4
  chatterer/messages.py,sha256=j_bjOVE2FbBaYYpykmJrQL-IH_BWyiZ1VAUCj_wSA2U,479
5
5
  chatterer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  chatterer/common_types/__init__.py,sha256=jfS6m5UANSvGjzQ_nzYDpryn5uZqNb06-4xCsQ2C_lw,376
@@ -10,7 +10,7 @@ chatterer/examples/anything_to_markdown.py,sha256=4O9ze7AIHcwEzvVmm5JMMKo_rVSFwh
10
10
  chatterer/examples/get_code_snippets.py,sha256=pz05JjhKaWAknVKlk1ftEEzpSG4-sqD9oa_gyIQoCAs,1911
11
11
  chatterer/examples/login_with_playwright.py,sha256=EhvJLaH5TD7bmDi12uP8YLd0fRhdjR-oyIkBHLi1Jjs,5988
12
12
  chatterer/examples/make_ppt.py,sha256=vsT_iL_jS2ami5VYrReLMQcD576FfZUH7913F7_As0A,23278
13
- chatterer/examples/pdf_to_markdown.py,sha256=ZeGRO5CZxGQxJpScK0iB1lTzUkfSiXtuqoeKEQL1ICA,2787
13
+ chatterer/examples/pdf_to_markdown.py,sha256=mur63PxI0uwl90Mh49VXPuO0YSwyEfs0-MwxJWKWXec,13577
14
14
  chatterer/examples/pdf_to_text.py,sha256=DznTyhu1REv8Wp4RimQWVgEU5j0_BmlwjfJYJvx3dbI,1590
15
15
  chatterer/examples/transcription_api.py,sha256=WUs12qHH4616eLMQDHOiyVGxaXstTpgeE47djYyli6c,3897
16
16
  chatterer/examples/upstage_parser.py,sha256=TrfeSIiF0xklhFCknop22TIOVibI4CJ_UKj5-lD8c8E,3487
@@ -20,7 +20,7 @@ chatterer/strategies/atom_of_thoughts.py,sha256=pUhqt47YlzBIVNRh0UebeBwuJ0J94Ge6
20
20
  chatterer/strategies/base.py,sha256=b2gMPqodp97OP1dkHfj0UqixjdjVhmTw_V5qJ7i2S6g,427
21
21
  chatterer/tools/__init__.py,sha256=m3PRK9H5vOhk-2gG9W2eg8CYBlEn-K9-eaulOu91bgo,1474
22
22
  chatterer/tools/caption_markdown_images.py,sha256=r4QajHYuL4mdyYQXP1vQcNmqKN8lxBf5y0VKELXILOI,15392
23
- chatterer/tools/convert_pdf_to_markdown.py,sha256=Q5ln-_av2eor0A2LkQG7-IgyQKJ79wwrSOvv5Jncfso,18901
23
+ chatterer/tools/convert_pdf_to_markdown.py,sha256=_a-nVNs_9j4QsDPKI5p6AZeasgOW3x_2rb49-yfBSPs,28501
24
24
  chatterer/tools/convert_to_text.py,sha256=WHQ0Xj4Ri_jYbFjzTx3mjmvJ9U8bAv4wGaKEVC88Nlk,15457
25
25
  chatterer/tools/upstage_document_parser.py,sha256=CXslVYAHDK8EV8jtUAUWzf8rxU4qilSnW8_dhAxHOE8,33142
26
26
  chatterer/tools/webpage_to_markdown.py,sha256=ADH4sqM6iquJR7HU6umMQ5qO7EvcbNutuchXDpAcxAo,31961
@@ -37,8 +37,8 @@ chatterer/utils/base64_image.py,sha256=m_qAT3ERBiq8D-H4H9Z7rLfL31_BiPmV_m4uQ5XRL
37
37
  chatterer/utils/bytesio.py,sha256=3MC2atOOFKo5YxuReo_y_t8Wem9p2Y1ahC5M2lGclwI,2618
38
38
  chatterer/utils/code_agent.py,sha256=7ka_WRI4TQmZ5H46mjY3hI6RO_pxw6pg3LAxjgW4AbM,10495
39
39
  chatterer/utils/imghdr.py,sha256=6JhJMXD4MZ0dQolT2VM87YrRYm3hPf3RTEWnP4lYRVc,3842
40
- chatterer-0.1.21.dist-info/METADATA,sha256=j3QGPYik-jm75MHIfAvbvUbv-EaxvlVKdEIc7_dMUjk,11826
41
- chatterer-0.1.21.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
42
- chatterer-0.1.21.dist-info/entry_points.txt,sha256=KhxL2dctnZalnDSmPoB5dZBBa9hZpJETW3C5xkoRaW4,554
43
- chatterer-0.1.21.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
44
- chatterer-0.1.21.dist-info/RECORD,,
40
+ chatterer-0.1.23.dist-info/METADATA,sha256=zCTgA4OAI2tSpNRiLwjCDPweTrW4oxzJnIXT7PA69Ck,11826
41
+ chatterer-0.1.23.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
42
+ chatterer-0.1.23.dist-info/entry_points.txt,sha256=KhxL2dctnZalnDSmPoB5dZBBa9hZpJETW3C5xkoRaW4,554
43
+ chatterer-0.1.23.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
44
+ chatterer-0.1.23.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5