chatterer 0.1.20__py3-none-any.whl → 0.1.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatterer/examples/pdf_to_markdown.py +4 -27
- chatterer/examples/pdf_to_text.py +3 -3
- chatterer/language_model.py +5 -2
- {chatterer-0.1.20.dist-info → chatterer-0.1.22.dist-info}/METADATA +1 -1
- {chatterer-0.1.20.dist-info → chatterer-0.1.22.dist-info}/RECORD +8 -8
- {chatterer-0.1.20.dist-info → chatterer-0.1.22.dist-info}/WHEEL +1 -1
- {chatterer-0.1.20.dist-info → chatterer-0.1.22.dist-info}/entry_points.txt +0 -0
- {chatterer-0.1.20.dist-info → chatterer-0.1.22.dist-info}/top_level.txt +0 -0
@@ -16,8 +16,8 @@ class PdfToMarkdownArgs(BaseArguments):
|
|
16
16
|
output: Optional[str] = None
|
17
17
|
"""Output path. For a file, path to the output markdown file. For a directory, output directory for .md files."""
|
18
18
|
"""Chatterer instance for communication."""
|
19
|
-
|
20
|
-
"""
|
19
|
+
page: Optional[str] = None
|
20
|
+
"""Zero-based page indices to convert (e.g., '0,2,4-8')."""
|
21
21
|
recursive: bool = False
|
22
22
|
"""If input is a directory, search for PDFs recursively."""
|
23
23
|
chatterer: ArgumentSpec[Chatterer] = ArgumentSpec(
|
@@ -29,7 +29,6 @@ class PdfToMarkdownArgs(BaseArguments):
|
|
29
29
|
|
30
30
|
def run(self) -> list[dict[str, str]]:
|
31
31
|
input = Path(self.input).resolve()
|
32
|
-
page_indices = parse_page_indices(self.pages) if self.pages else None
|
33
32
|
pdf_files: list[Path] = []
|
34
33
|
is_dir = False
|
35
34
|
if input.is_file():
|
@@ -61,8 +60,8 @@ class PdfToMarkdownArgs(BaseArguments):
|
|
61
60
|
converter = PdfToMarkdown(chatterer=self.chatterer.unwrap())
|
62
61
|
results: list[dict[str, str]] = []
|
63
62
|
for pdf in pdf_files:
|
64
|
-
output = (out_base / (pdf.stem + ".md")) if is_dir else out_base
|
65
|
-
md = converter.convert(str(pdf), page_indices)
|
63
|
+
output: Path = (out_base / (pdf.stem + ".md")) if is_dir else out_base
|
64
|
+
md: str = converter.convert(pdf_input=str(pdf), page_indices=self.page)
|
66
65
|
output.parent.mkdir(parents=True, exist_ok=True)
|
67
66
|
output.write_text(md, encoding="utf-8")
|
68
67
|
results.append({"input": pdf.as_posix(), "output": output.as_posix(), "result": md})
|
@@ -70,28 +69,6 @@ class PdfToMarkdownArgs(BaseArguments):
|
|
70
69
|
return results
|
71
70
|
|
72
71
|
|
73
|
-
def parse_page_indices(pages_str: str) -> list[int] | None:
|
74
|
-
if not pages_str:
|
75
|
-
return None
|
76
|
-
indices: set[int] = set()
|
77
|
-
for part in pages_str.split(","):
|
78
|
-
part = part.strip()
|
79
|
-
if not part:
|
80
|
-
continue
|
81
|
-
if "-" in part:
|
82
|
-
start_str, end_str = part.split("-", 1)
|
83
|
-
start = int(start_str.strip())
|
84
|
-
end = int(end_str.strip())
|
85
|
-
if start > end:
|
86
|
-
raise ValueError
|
87
|
-
indices.update(range(start, end + 1))
|
88
|
-
else:
|
89
|
-
indices.add(int(part))
|
90
|
-
if not indices:
|
91
|
-
raise ValueError
|
92
|
-
return sorted(indices)
|
93
|
-
|
94
|
-
|
95
72
|
def main() -> None:
|
96
73
|
PdfToMarkdownArgs().run()
|
97
74
|
|
@@ -15,8 +15,8 @@ class PdfToTextArgs(BaseArguments):
|
|
15
15
|
"""Path to the PDF file to convert to text."""
|
16
16
|
output: Optional[Path]
|
17
17
|
"""Path to the output text file. If not provided, defaults to the input file with a .txt suffix."""
|
18
|
-
|
19
|
-
"""Comma-separated list of page indices to extract from the PDF. Supports ranges, e.g., '
|
18
|
+
page: Optional[str] = None
|
19
|
+
"""Comma-separated list of zero-based page indices to extract from the PDF. Supports ranges, e.g., '0,2,4-8'."""
|
20
20
|
|
21
21
|
def run(self) -> None:
|
22
22
|
input = self.input.resolve()
|
@@ -24,7 +24,7 @@ class PdfToTextArgs(BaseArguments):
|
|
24
24
|
if not input.is_file():
|
25
25
|
sys.exit(1)
|
26
26
|
out.write_text(
|
27
|
-
pdf_to_text(path_or_file=input, page_indices=self.
|
27
|
+
pdf_to_text(path_or_file=input, page_indices=self.page),
|
28
28
|
encoding="utf-8",
|
29
29
|
)
|
30
30
|
logger.info(f"Extracted text from `{input}` to `{out}`")
|
chatterer/language_model.py
CHANGED
@@ -66,12 +66,15 @@ class Chatterer(BaseModel):
|
|
66
66
|
|
67
67
|
@classmethod
|
68
68
|
def from_provider(
|
69
|
-
cls,
|
69
|
+
cls,
|
70
|
+
provider_and_model: str,
|
71
|
+
structured_output_kwargs: Optional[dict[str, object]] = {"strict": True},
|
72
|
+
**kwargs: object,
|
70
73
|
) -> Self:
|
71
74
|
backend, model = provider_and_model.split(":", 1)
|
72
75
|
backends = cls.get_backends()
|
73
76
|
if func := backends.get(backend):
|
74
|
-
return func(model, structured_output_kwargs)
|
77
|
+
return func(model, structured_output_kwargs, **kwargs)
|
75
78
|
else:
|
76
79
|
raise ValueError(f"Unsupported provider: {backend}. Supported providers are: {', '.join(backends.keys())}.")
|
77
80
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
chatterer/__init__.py,sha256=1z3ocUMqgbqQ3eD4wq5Jq-JPt-VuWwdWT_U8r38Hodo,2267
|
2
2
|
chatterer/interactive.py,sha256=B8KvlXAGpNEF-czJJpS_f9eJj1TenkE6896w9ixNjOk,17056
|
3
|
-
chatterer/language_model.py,sha256=
|
3
|
+
chatterer/language_model.py,sha256=QkJLmmTYcWbqosm3D70zfhDSFETD7PIafRaY5upT7Gc,20715
|
4
4
|
chatterer/messages.py,sha256=j_bjOVE2FbBaYYpykmJrQL-IH_BWyiZ1VAUCj_wSA2U,479
|
5
5
|
chatterer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
chatterer/common_types/__init__.py,sha256=jfS6m5UANSvGjzQ_nzYDpryn5uZqNb06-4xCsQ2C_lw,376
|
@@ -10,8 +10,8 @@ chatterer/examples/anything_to_markdown.py,sha256=4O9ze7AIHcwEzvVmm5JMMKo_rVSFwh
|
|
10
10
|
chatterer/examples/get_code_snippets.py,sha256=pz05JjhKaWAknVKlk1ftEEzpSG4-sqD9oa_gyIQoCAs,1911
|
11
11
|
chatterer/examples/login_with_playwright.py,sha256=EhvJLaH5TD7bmDi12uP8YLd0fRhdjR-oyIkBHLi1Jjs,5988
|
12
12
|
chatterer/examples/make_ppt.py,sha256=vsT_iL_jS2ami5VYrReLMQcD576FfZUH7913F7_As0A,23278
|
13
|
-
chatterer/examples/pdf_to_markdown.py,sha256=
|
14
|
-
chatterer/examples/pdf_to_text.py,sha256=
|
13
|
+
chatterer/examples/pdf_to_markdown.py,sha256=ZeGRO5CZxGQxJpScK0iB1lTzUkfSiXtuqoeKEQL1ICA,2787
|
14
|
+
chatterer/examples/pdf_to_text.py,sha256=DznTyhu1REv8Wp4RimQWVgEU5j0_BmlwjfJYJvx3dbI,1590
|
15
15
|
chatterer/examples/transcription_api.py,sha256=WUs12qHH4616eLMQDHOiyVGxaXstTpgeE47djYyli6c,3897
|
16
16
|
chatterer/examples/upstage_parser.py,sha256=TrfeSIiF0xklhFCknop22TIOVibI4CJ_UKj5-lD8c8E,3487
|
17
17
|
chatterer/examples/webpage_to_markdown.py,sha256=DnZfQ-trXBiOiszA2tMlgadgKH-ObTi6l4gGloT-cQw,2846
|
@@ -37,8 +37,8 @@ chatterer/utils/base64_image.py,sha256=m_qAT3ERBiq8D-H4H9Z7rLfL31_BiPmV_m4uQ5XRL
|
|
37
37
|
chatterer/utils/bytesio.py,sha256=3MC2atOOFKo5YxuReo_y_t8Wem9p2Y1ahC5M2lGclwI,2618
|
38
38
|
chatterer/utils/code_agent.py,sha256=7ka_WRI4TQmZ5H46mjY3hI6RO_pxw6pg3LAxjgW4AbM,10495
|
39
39
|
chatterer/utils/imghdr.py,sha256=6JhJMXD4MZ0dQolT2VM87YrRYm3hPf3RTEWnP4lYRVc,3842
|
40
|
-
chatterer-0.1.
|
41
|
-
chatterer-0.1.
|
42
|
-
chatterer-0.1.
|
43
|
-
chatterer-0.1.
|
44
|
-
chatterer-0.1.
|
40
|
+
chatterer-0.1.22.dist-info/METADATA,sha256=_N_S_-lren8KuZAH8DJnc7DBXKZTGhp2hoFuMyvFUt4,11826
|
41
|
+
chatterer-0.1.22.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
42
|
+
chatterer-0.1.22.dist-info/entry_points.txt,sha256=KhxL2dctnZalnDSmPoB5dZBBa9hZpJETW3C5xkoRaW4,554
|
43
|
+
chatterer-0.1.22.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
|
44
|
+
chatterer-0.1.22.dist-info/RECORD,,
|
File without changes
|
File without changes
|