pdf-invoke 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) [2026] [Luciano Bermudez]
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,24 @@
1
+ Metadata-Version: 2.4
2
+ Name: pdf-invoke
3
+ Version: 0.1.0
4
+ Summary: A LLM utility for working with pdfs
5
+ License-Expression: MIT
6
+ License-File: LICENSE
7
+ Author: Luciano Bermudez
8
+ Author-email: lberm007@ucr.edu
9
+ Requires-Python: >=3.10,<4.0.0
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Programming Language :: Python :: 3.14
16
+ Requires-Dist: dotenv (>=0.9.9,<0.10.0)
17
+ Requires-Dist: langchain (>=1.2.10,<2.0.0)
18
+ Requires-Dist: langchain-core (>=1.2.11,<2.0.0)
19
+ Requires-Dist: pillow (>=12.1.1,<13.0.0)
20
+ Requires-Dist: pydantic (>=2.12.5,<3.0.0)
21
+ Requires-Dist: pymupdf (>=1.27.1,<2.0.0)
22
+ Description-Content-Type: text/markdown
23
+
24
+ # A package
@@ -0,0 +1 @@
1
+ # A package
@@ -0,0 +1,29 @@
1
+ [project]
2
+ name = "pdf-invoke"
3
+ version = "0.1.0"
4
+ description = "A LLM utility for working with pdfs"
5
+ authors = [
6
+ {name = "Luciano Bermudez",email = "lberm007@ucr.edu"}
7
+ ]
8
+ readme = "README.md"
9
+ requires-python = ">=3.10,<4.0.0"
10
+ dependencies = [
11
+ "langchain-core (>=1.2.11,<2.0.0)",
12
+ "pydantic (>=2.12.5,<3.0.0)",
13
+ "pymupdf (>=1.27.1,<2.0.0)",
14
+ "pillow (>=12.1.1,<13.0.0)",
15
+ "langchain (>=1.2.10,<2.0.0)",
16
+ "dotenv (>=0.9.9,<0.10.0)"
17
+ ]
18
+ license = "MIT"
19
+ license-files = ["LICEN[CS]E*"]
20
+
21
+ [build-system]
22
+ requires = ["poetry-core>=2.0.0,<3.0.0"]
23
+ build-backend = "poetry.core.masonry.api"
24
+
25
+ [dependency-groups]
26
+ dev = [
27
+ "pytest (>=9.0.2,<10.0.0)",
28
+ "langchain-openai (>=1.1.9,<2.0.0)"
29
+ ]
@@ -0,0 +1,2 @@
1
+ from .multimodal_llm import MultiModalLLM, BaseOutput
2
+ from .converter import PDFImageConverter
@@ -0,0 +1,121 @@
1
+ from pathlib import Path
2
+ from typing import Iterable, List
3
+
4
+ import pymupdf
5
+
6
+ from pdf_invoke.types import ImageExt, PDFInput
7
+ from pdf_invoke.utils import get_image_type, is_pdf_bytes, validate_image_bytes
8
+
9
+
10
+ class PDFImageConverter:
11
+ def pdf_to_images(
12
+ self, pdf: PDFInput, zoom: float = 0.2, ext: ImageExt = "png"
13
+ ) -> List[bytes]:
14
+ doc = None
15
+ try:
16
+ if isinstance(pdf, (bytes, bytearray, memoryview)):
17
+ pdf_bytes = bytes(pdf)
18
+ self._validate_pdf_bytes(pdf_bytes)
19
+ doc = pymupdf.open(stream=pdf, filetype="pdf")
20
+
21
+ elif isinstance(pdf, (Path, str)):
22
+ doc = pymupdf.open(Path(pdf).as_posix())
23
+ else:
24
+ raise TypeError("PDF is not of expected type")
25
+ assert doc
26
+ pass
27
+ except Exception as e:
28
+ raise ValueError(f"Failed to open pdf {e}")
29
+
30
+ matrix = pymupdf.Matrix(zoom, zoom)
31
+ image_bytes = [page.get_pixmap(matrix=matrix).tobytes(ext) for page in doc]
32
+ doc.close()
33
+ return image_bytes
34
+
35
+ def images_to_pdf(
36
+ self, images: Iterable[bytes], allowed_formats=["png", "jpeg"]
37
+ ) -> bytes:
38
+ validate_image_bytes(images, allowed_formats)
39
+ doc = pymupdf.open()
40
+ for img_bytes in images:
41
+ img_doc = pymupdf.open(stream=img_bytes, filetype=get_image_type(img_bytes))
42
+ rect = img_doc[0].rect
43
+ page = doc.new_page(width=rect.width, height=rect.height)
44
+ page.insert_image(rect, stream=img_bytes)
45
+ img_doc.close()
46
+ pdf_bytes = doc.tobytes()
47
+ # Should be bytes either way but just to make sure
48
+ self._validate_pdf_bytes(pdf_bytes)
49
+ doc.close()
50
+ return pdf_bytes
51
+
52
+ def save_pdf_to_images(
53
+ self,
54
+ pdf: PDFInput,
55
+ output_path: str | Path,
56
+ pdf_name: str | None = None,
57
+ ext: ImageExt = "png",
58
+ start: int = 0,
59
+ ) -> str:
60
+ pdf_name = self._validate_pdf_name(pdf, pdf_name)
61
+ output_path = self._validate_path(output_path)
62
+ data = self.pdf_to_images(pdf)
63
+ for i, b in enumerate(data, start=start):
64
+
65
+ out = output_path / f"{pdf_name}_page_{i}.{ext}"
66
+ out.write_bytes(b)
67
+ return output_path.as_posix()
68
+
69
+ def save_images_to_pdf(
70
+ self,
71
+ images: Iterable[bytes],
72
+ output_path: str | Path,
73
+ pdf_name: str,
74
+ ) -> str:
75
+ pdf_name = self._validate_pdf_name(name=pdf_name)
76
+ output_path = self._validate_path(output_path)
77
+ pdf_path = output_path / pdf_name
78
+
79
+ if pdf_path.suffix.lower() != ".pdf":
80
+ pdf_path = pdf_path.with_suffix(".pdf")
81
+
82
+ pdf_bytes = self.images_to_pdf(images)
83
+
84
+ pdf_path.write_bytes(pdf_bytes)
85
+ return pdf_path.as_posix()
86
+
87
+ def _validate_pdf_name(
88
+ self,
89
+ pdf: PDFInput | None = None,
90
+ name: str | None = None,
91
+ ) -> str:
92
+
93
+ if name is not None:
94
+ return name
95
+
96
+ if isinstance(pdf, (str, Path)):
97
+ return Path(pdf).stem
98
+
99
+ raise ValueError(
100
+ "Unable to determine PDF name. "
101
+ "Provide either a file path (str or Path) or explicitly pass `name` "
102
+ "when supplying raw PDF bytes."
103
+ )
104
+
105
+ def _validate_pdf_bytes(self, data):
106
+ if not is_pdf_bytes(data):
107
+ raise ValueError("Document is not pdf")
108
+
109
+ def _validate_path(self, path: str | Path) -> Path:
110
+ path = Path(path)
111
+ if not path.exists():
112
+ raise ValueError(f"Failed to validate pdf {path} cannot be resolved")
113
+ return path
114
+
115
+
116
+ if __name__ == "__main__":
117
+ print("Test")
118
+ path = Path(r"pdf_invoke\data\Lecture_02_03.pdf")
119
+ image = Path(r"pdf_invoke\data\images\Lecture_02_03_page_1.png").read_bytes()
120
+ print(image[:10])
121
+ print(get_image_type(image))
@@ -0,0 +1,120 @@
1
+ from base64 import b64encode
2
+ from typing import Iterable, Optional, Sequence, Type
3
+ from pathlib import Path
4
+ from langchain_core.language_models.chat_models import BaseChatModel
5
+ from pydantic import BaseModel
6
+
7
+ from pdf_invoke.converter import PDFImageConverter
8
+ from pdf_invoke.types import PDFInput, ALLOWED_MIME, ImageInput
9
+ from pdf_invoke.utils import validate_image_bytes
10
+
11
+
12
+ class BaseOutput(BaseModel):
13
+ data: str
14
+
15
+
16
+ class MultiModalLLM:
17
+ def __init__(
18
+ self,
19
+ *,
20
+ prompt: str,
21
+ model: BaseChatModel,
22
+ ):
23
+ # Base configuration
24
+ self.prompt = prompt
25
+ self.llm = model
26
+
27
+ def _validate_input(
28
+ self,
29
+ pdf: PDFInput | None = None,
30
+ images: Sequence[ImageInput] | None = None,
31
+ ) -> Sequence[bytes]:
32
+ # Ensure values are okay
33
+ if pdf is None and images is None:
34
+ raise ValueError("Either pdfinput or image_bytes must be provided")
35
+ if pdf is not None and images is not None:
36
+ raise ValueError("Provide only one of pdfinput or image_bytes")
37
+
38
+ # Return
39
+ if pdf:
40
+ return PDFImageConverter().pdf_to_images(pdf)
41
+ elif images:
42
+ return [self._image_to_bytes(i) for i in images]
43
+ else:
44
+ raise RuntimeError("Unexpected Error Occured ")
45
+
46
+ def invoke(
47
+ self,
48
+ pdf: PDFInput | None = None,
49
+ images: Sequence[ImageInput] | None = None,
50
+ output_model: Optional[Type[BaseModel]] = BaseOutput,
51
+ mime: ALLOWED_MIME = "image/png",
52
+ ):
53
+ image_bytes = self._validate_input(pdf, images)
54
+ try:
55
+ message = self.prepare_payload(image_bytes, mime)
56
+ if output_model:
57
+ chain = self.llm.with_structured_output(schema=output_model)
58
+ return chain.invoke([message])
59
+ else:
60
+ return self.llm.invoke([message])
61
+ except Exception as e:
62
+ raise RuntimeError(f"Failed to invoke model {e}")
63
+
64
+ async def ainvoke(
65
+ self,
66
+ pdf: PDFInput | None = None,
67
+ images: Sequence[ImageInput] | None = None,
68
+ output_model: Optional[Type[BaseModel]] = BaseOutput,
69
+ mime: ALLOWED_MIME = "image/png",
70
+ ):
71
+ image_bytes = self._validate_input(pdf, images)
72
+ message = self.prepare_payload(image_bytes, mime)
73
+ if output_model:
74
+ chain = self.llm.with_structured_output(
75
+ schema=output_model,
76
+ )
77
+ return chain.ainvoke([message])
78
+ else:
79
+ return self.llm.ainvoke([message])
80
+
81
+ def prepare_payload(self, data: Sequence[bytes], mime: ALLOWED_MIME = "image/png"):
82
+ try:
83
+ image_payload = self.prepare_image_payload(data, mime=mime)
84
+ message = {
85
+ "role": "user",
86
+ "content": [{"type": "text", "text": self.prompt}, *image_payload],
87
+ }
88
+ return message
89
+ except Exception as e:
90
+ raise RuntimeError(f"Failed to prepare payload for LLM. Error: {e}")
91
+
92
+ def prepare_image_payload(
93
+ self,
94
+ payload: Iterable[bytes],
95
+ mime: ALLOWED_MIME = "image/png",
96
+ ):
97
+ allowed_format = mime.split("/")[-1]
98
+ validate_image_bytes(payload, allowed_formats=set([allowed_format]))
99
+ return [
100
+ {
101
+ "type": "image_url",
102
+ "image_url": {
103
+ "url": f"data:{mime};base64,{b64encode(p).decode("utf-8")}"
104
+ },
105
+ }
106
+ for p in payload
107
+ ]
108
+
109
+ def _image_to_bytes(self, image: ImageInput) -> bytes:
110
+ try:
111
+ if isinstance(image, (bytes, memoryview)):
112
+ return image
113
+ elif isinstance(image, (str | Path)):
114
+ return Path(image).read_bytes()
115
+ else:
116
+ raise TypeError(
117
+ f"Failed to conver image to bytes received incorrect type image is of type {type(image)}"
118
+ )
119
+ except Exception as e:
120
+ raise ValueError(f"Failed to convert image to bytes {e}")
@@ -0,0 +1,9 @@
1
+ from pathlib import Path
2
+ from typing import Literal, Iterable
3
+
4
+
5
+ PDFInput = str | Path | bytes
6
+ ImageInput = str | Path | bytes
7
+ ImageBytes = bytes
8
+ ImageExt = Literal["png", "jpeg"]
9
+ ALLOWED_MIME = Literal["image/jpeg", "image/png"]
@@ -0,0 +1,33 @@
1
+ from PIL import Image
2
+ import io
3
+ from typing import Iterable, List
4
+
5
+
6
+ def is_pdf_bytes(data: bytes) -> bool:
7
+ return data.startswith(b"%PDF-")
8
+
9
+
10
+ def get_image_type(image_bytes: bytes) -> str:
11
+ try:
12
+ with Image.open(io.BytesIO(image_bytes)) as img:
13
+ assert img.format
14
+ return img.format # e.g., 'PNG', 'JPEG'
15
+ except Exception:
16
+ raise ValueError("Provided bytes are not a valid image.")
17
+
18
+
19
+ def validate_image_bytes(
20
+ images: Iterable[bytes],
21
+ allowed_formats: set[str] | None = None,
22
+ ) -> List[str]:
23
+ formats = []
24
+ for idx, img_bytes in enumerate(images):
25
+ try:
26
+ fmt = get_image_type(img_bytes)
27
+
28
+ if allowed_formats and fmt.lower() not in allowed_formats:
29
+ raise ValueError(f"Image at index {idx} has unsupported format: {fmt}")
30
+ formats.append(fmt)
31
+ except Exception as e:
32
+ raise ValueError(f"Invalid image at index {idx}: {e}") from e
33
+ return formats