optical-context-mcp 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Christopher Boebel
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,176 @@
1
+ Metadata-Version: 2.4
2
+ Name: optical-context-mcp
3
+ Version: 0.1.1
4
+ Summary: FastMCP server for compressing large OCR-heavy PDFs into dense packed images for agent workflows.
5
+ Author: Christopher Boebel
6
+ License-Expression: MIT
7
+ Project-URL: Repository, https://github.com/ChrBoebel/optical-context-mcp
8
+ Project-URL: Issues, https://github.com/ChrBoebel/optical-context-mcp/issues
9
+ Keywords: mcp,fastmcp,ocr,pdf,vision,document-processing
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Programming Language :: Python :: 3.14
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Requires-Python: >=3.11
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: fastmcp>=3.1.0
22
+ Requires-Dist: mistralai>=1.12.0
23
+ Requires-Dist: Pillow>=12.0.0
24
+ Requires-Dist: python-dotenv>=1.2.0
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
27
+ Dynamic: license-file
28
+
29
+ <!-- mcp-name: io.github.chrboebel/optical-context-mcp -->
30
+
31
+ <p align="center">
32
+ <img src="./assets/optical-context-logo.png" alt="Optical Context MCP logo" width="680">
33
+ </p>
34
+
35
+ <h1 align="center">Optical Context MCP</h1>
36
+
37
+ <p align="center">
38
+ FastMCP server for compressing large, OCR-heavy PDFs into dense packed images for agent workflows.
39
+ </p>
40
+
41
+ <p align="center">
42
+ <a href="https://www.python.org/"><img src="https://img.shields.io/badge/python-3.11%2B-blue.svg" alt="Python 3.11+"></a>
43
+ <a href="https://gofastmcp.com/"><img src="https://img.shields.io/badge/MCP-FastMCP-111111.svg" alt="FastMCP"></a>
44
+ <a href="https://github.com/ChrBoebel/optical-context-mcp/actions/workflows/ci.yml"><img src="https://github.com/ChrBoebel/optical-context-mcp/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
45
+ <a href="./LICENSE"><img src="https://img.shields.io/badge/license-MIT-green.svg" alt="MIT License"></a>
46
+ </p>
47
+
48
+ Optical Context MCP is built for one specific problem: giving agents a practical way to work with **large, visually structured PDFs** without sending every page individually to a vision model.
49
+
50
+ It reads a local PDF, runs OCR with Mistral, recomposes the extracted text and figures into a much smaller set of packed images, and exposes those artifacts over MCP for batch retrieval.
51
+
52
+ ## What It Does
53
+
54
+ - reads a local PDF from the MCP host machine
55
+ - extracts page markdown and embedded images with Mistral OCR
56
+ - packs that content into dense PNGs that preserve visual grouping
57
+ - stores a manifest and job artifacts for follow-up retrieval
58
+ - lets an agent pull only the packed images it needs
59
+
60
+ ## Where It Fits
61
+
62
+ Use it for:
63
+
64
+ - operating manuals
65
+ - scanned handbooks
66
+ - product catalogs
67
+ - PDF slide decks
68
+ - visually structured OCR-heavy documents
69
+
70
+ Skip it for:
71
+
72
+ - tiny PDFs
73
+ - clean text-native PDFs where normal extraction is enough
74
+ - workflows that require exact page-faithful rendering
75
+ - cases where OCR cost is not justified
76
+
77
+ ## Example Result
78
+
79
+ The image below shows a real local validation run on a public research paper with dense text, figures, charts, and page-level visual structure. The packed image on the right consolidates the seven source pages shown on the left.
80
+
81
+ <p align="center">
82
+ <img src="./assets/original-vs-packed-comparison-straight-arrow.png" alt="Side-by-side comparison of original pages and the generated packed output" width="980">
83
+ </p>
84
+
85
+ Example local run facts from the generated manifest:
86
+
87
+ - source paper pages: 22
88
+ - previewed source page range: 15 to 21
89
+ - extracted images: 30
90
+ - packed output images: 6
91
+ - example packed image size: `986x1084`
92
+ - example packed image file size: `536,697 bytes`
93
+
94
+ This example shows the intended workflow: take a long, visually structured PDF and compress it into a smaller set of retrievable packed images that still preserve the visual structure of the source.
95
+
96
+ ## Install
97
+
98
+ ```bash
99
+ python -m pip install "git+https://github.com/ChrBoebel/optical-context-mcp.git@v0.1.1"
100
+ ```
101
+
102
+ Run directly from GitHub with `uvx`:
103
+
104
+ ```bash
105
+ uvx --from git+https://github.com/ChrBoebel/optical-context-mcp@v0.1.1 optical-context-mcp
106
+ ```
107
+
108
+ - `MISTRAL_API_KEY` is required for `compress_pdf`
109
+
110
+ ## Run
111
+
112
+ Default transport is `stdio`:
113
+
114
+ ```bash
115
+ optical-context-mcp
116
+ ```
117
+
118
+ ## Claude Code
119
+
120
+ Register the server in a project:
121
+
122
+ ```bash
123
+ claude mcp add -s project optical-context -- uvx --from git+https://github.com/ChrBoebel/optical-context-mcp@v0.1.1 optical-context-mcp
124
+ ```
125
+
126
+ Typical use:
127
+
128
+ 1. call `compress_pdf`
129
+ 2. inspect the returned manifest
130
+ 3. fetch packed images with `get_packed_images`
131
+
132
+ ## MCP Tools
133
+
134
+ - `compress_pdf`: run OCR plus recomposition and create a stored job
135
+ - `get_job_manifest`: load metadata for an existing job
136
+ - `get_packed_images`: fetch one or more packed PNGs from an existing job
137
+
138
+ ## How It Works
139
+
140
+ ```mermaid
141
+ flowchart LR
142
+ A["Local PDF"] --> B["Mistral OCR"]
143
+ B --> C["Page markdown + embedded images"]
144
+ C --> D["Recomposition engine"]
145
+ D --> E["Dense packed PNG images"]
146
+ E --> F["Stored job artifacts"]
147
+ F --> G["Agent fetches manifest or image batches over MCP"]
148
+ ```
149
+
150
+ ## Why Packed Images Instead Of Just OCR Text
151
+
152
+ - section grouping
153
+ - table-like layout
154
+ - captions near figures
155
+ - visual adjacency between text and embedded graphics
156
+
157
+ For many vision-capable agents, that is a better intermediate format than a plain OCR dump.
158
+
159
+ ## Current Scope
160
+
161
+ - depends on Mistral OCR
162
+ - currently handles local file paths, not remote uploads
163
+ - optimized for compression and retrieval, not final polished markdown generation
164
+ - quality depends on OCR quality and the visual density of the source document
165
+
166
+ ## Roadmap
167
+
168
+ - make the OCR layer provider-agnostic so different OCR backends can be swapped behind the same MCP workflow
169
+
170
+ ## Development
171
+
172
+ ```bash
173
+ uv venv --python /opt/homebrew/bin/python3.11 .venv
174
+ uv pip install --python .venv/bin/python -e ".[dev]"
175
+ .venv/bin/python -m pytest
176
+ ```
@@ -0,0 +1,148 @@
1
+ <!-- mcp-name: io.github.chrboebel/optical-context-mcp -->
2
+
3
+ <p align="center">
4
+ <img src="./assets/optical-context-logo.png" alt="Optical Context MCP logo" width="680">
5
+ </p>
6
+
7
+ <h1 align="center">Optical Context MCP</h1>
8
+
9
+ <p align="center">
10
+ FastMCP server for compressing large, OCR-heavy PDFs into dense packed images for agent workflows.
11
+ </p>
12
+
13
+ <p align="center">
14
+ <a href="https://www.python.org/"><img src="https://img.shields.io/badge/python-3.11%2B-blue.svg" alt="Python 3.11+"></a>
15
+ <a href="https://gofastmcp.com/"><img src="https://img.shields.io/badge/MCP-FastMCP-111111.svg" alt="FastMCP"></a>
16
+ <a href="https://github.com/ChrBoebel/optical-context-mcp/actions/workflows/ci.yml"><img src="https://github.com/ChrBoebel/optical-context-mcp/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
17
+ <a href="./LICENSE"><img src="https://img.shields.io/badge/license-MIT-green.svg" alt="MIT License"></a>
18
+ </p>
19
+
20
+ Optical Context MCP is built for one specific problem: giving agents a practical way to work with **large, visually structured PDFs** without sending every page individually to a vision model.
21
+
22
+ It reads a local PDF, runs OCR with Mistral, recomposes the extracted text and figures into a much smaller set of packed images, and exposes those artifacts over MCP for batch retrieval.
23
+
24
+ ## What It Does
25
+
26
+ - reads a local PDF from the MCP host machine
27
+ - extracts page markdown and embedded images with Mistral OCR
28
+ - packs that content into dense PNGs that preserve visual grouping
29
+ - stores a manifest and job artifacts for follow-up retrieval
30
+ - lets an agent pull only the packed images it needs
31
+
32
+ ## Where It Fits
33
+
34
+ Use it for:
35
+
36
+ - operating manuals
37
+ - scanned handbooks
38
+ - product catalogs
39
+ - PDF slide decks
40
+ - visually structured OCR-heavy documents
41
+
42
+ Skip it for:
43
+
44
+ - tiny PDFs
45
+ - clean text-native PDFs where normal extraction is enough
46
+ - workflows that require exact page-faithful rendering
47
+ - cases where OCR cost is not justified
48
+
49
+ ## Example Result
50
+
51
+ The image below shows a real local validation run on a public research paper with dense text, figures, charts, and page-level visual structure. The packed image on the right consolidates the seven source pages shown on the left.
52
+
53
+ <p align="center">
54
+ <img src="./assets/original-vs-packed-comparison-straight-arrow.png" alt="Side-by-side comparison of original pages and the generated packed output" width="980">
55
+ </p>
56
+
57
+ Example local run facts from the generated manifest:
58
+
59
+ - source paper pages: 22
60
+ - previewed source page range: 15 to 21
61
+ - extracted images: 30
62
+ - packed output images: 6
63
+ - example packed image size: `986x1084`
64
+ - example packed image file size: `536,697 bytes`
65
+
66
+ This example shows the intended workflow: take a long, visually structured PDF and compress it into a smaller set of retrievable packed images that still preserve the visual structure of the source.
67
+
68
+ ## Install
69
+
70
+ ```bash
71
+ python -m pip install "git+https://github.com/ChrBoebel/optical-context-mcp.git@v0.1.1"
72
+ ```
73
+
74
+ Run directly from GitHub with `uvx`:
75
+
76
+ ```bash
77
+ uvx --from git+https://github.com/ChrBoebel/optical-context-mcp@v0.1.1 optical-context-mcp
78
+ ```
79
+
80
+ - `MISTRAL_API_KEY` is required for `compress_pdf`
81
+
82
+ ## Run
83
+
84
+ Default transport is `stdio`:
85
+
86
+ ```bash
87
+ optical-context-mcp
88
+ ```
89
+
90
+ ## Claude Code
91
+
92
+ Register the server in a project:
93
+
94
+ ```bash
95
+ claude mcp add -s project optical-context -- uvx --from git+https://github.com/ChrBoebel/optical-context-mcp@v0.1.1 optical-context-mcp
96
+ ```
97
+
98
+ Typical use:
99
+
100
+ 1. call `compress_pdf`
101
+ 2. inspect the returned manifest
102
+ 3. fetch packed images with `get_packed_images`
103
+
104
+ ## MCP Tools
105
+
106
+ - `compress_pdf`: run OCR plus recomposition and create a stored job
107
+ - `get_job_manifest`: load metadata for an existing job
108
+ - `get_packed_images`: fetch one or more packed PNGs from an existing job
109
+
110
+ ## How It Works
111
+
112
+ ```mermaid
113
+ flowchart LR
114
+ A["Local PDF"] --> B["Mistral OCR"]
115
+ B --> C["Page markdown + embedded images"]
116
+ C --> D["Recomposition engine"]
117
+ D --> E["Dense packed PNG images"]
118
+ E --> F["Stored job artifacts"]
119
+ F --> G["Agent fetches manifest or image batches over MCP"]
120
+ ```
121
+
122
+ ## Why Packed Images Instead Of Just OCR Text
123
+
124
+ - section grouping
125
+ - table-like layout
126
+ - captions near figures
127
+ - visual adjacency between text and embedded graphics
128
+
129
+ For many vision-capable agents, that is a better intermediate format than a plain OCR dump.
130
+
131
+ ## Current Scope
132
+
133
+ - depends on Mistral OCR
134
+ - currently handles local file paths, not remote uploads
135
+ - optimized for compression and retrieval, not final polished markdown generation
136
+ - quality depends on OCR quality and the visual density of the source document
137
+
138
+ ## Roadmap
139
+
140
+ - make the OCR layer provider-agnostic so different OCR backends can be swapped behind the same MCP workflow
141
+
142
+ ## Development
143
+
144
+ ```bash
145
+ uv venv --python /opt/homebrew/bin/python3.11 .venv
146
+ uv pip install --python .venv/bin/python -e ".[dev]"
147
+ .venv/bin/python -m pytest
148
+ ```
@@ -0,0 +1,176 @@
1
+ Metadata-Version: 2.4
2
+ Name: optical-context-mcp
3
+ Version: 0.1.1
4
+ Summary: FastMCP server for compressing large OCR-heavy PDFs into dense packed images for agent workflows.
5
+ Author: Christopher Boebel
6
+ License-Expression: MIT
7
+ Project-URL: Repository, https://github.com/ChrBoebel/optical-context-mcp
8
+ Project-URL: Issues, https://github.com/ChrBoebel/optical-context-mcp/issues
9
+ Keywords: mcp,fastmcp,ocr,pdf,vision,document-processing
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Programming Language :: Python :: 3.14
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Requires-Python: >=3.11
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: fastmcp>=3.1.0
22
+ Requires-Dist: mistralai>=1.12.0
23
+ Requires-Dist: Pillow>=12.0.0
24
+ Requires-Dist: python-dotenv>=1.2.0
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
27
+ Dynamic: license-file
28
+
29
+ <!-- mcp-name: io.github.chrboebel/optical-context-mcp -->
30
+
31
+ <p align="center">
32
+ <img src="./assets/optical-context-logo.png" alt="Optical Context MCP logo" width="680">
33
+ </p>
34
+
35
+ <h1 align="center">Optical Context MCP</h1>
36
+
37
+ <p align="center">
38
+ FastMCP server for compressing large, OCR-heavy PDFs into dense packed images for agent workflows.
39
+ </p>
40
+
41
+ <p align="center">
42
+ <a href="https://www.python.org/"><img src="https://img.shields.io/badge/python-3.11%2B-blue.svg" alt="Python 3.11+"></a>
43
+ <a href="https://gofastmcp.com/"><img src="https://img.shields.io/badge/MCP-FastMCP-111111.svg" alt="FastMCP"></a>
44
+ <a href="https://github.com/ChrBoebel/optical-context-mcp/actions/workflows/ci.yml"><img src="https://github.com/ChrBoebel/optical-context-mcp/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
45
+ <a href="./LICENSE"><img src="https://img.shields.io/badge/license-MIT-green.svg" alt="MIT License"></a>
46
+ </p>
47
+
48
+ Optical Context MCP is built for one specific problem: giving agents a practical way to work with **large, visually structured PDFs** without sending every page individually to a vision model.
49
+
50
+ It reads a local PDF, runs OCR with Mistral, recomposes the extracted text and figures into a much smaller set of packed images, and exposes those artifacts over MCP for batch retrieval.
51
+
52
+ ## What It Does
53
+
54
+ - reads a local PDF from the MCP host machine
55
+ - extracts page markdown and embedded images with Mistral OCR
56
+ - packs that content into dense PNGs that preserve visual grouping
57
+ - stores a manifest and job artifacts for follow-up retrieval
58
+ - lets an agent pull only the packed images it needs
59
+
60
+ ## Where It Fits
61
+
62
+ Use it for:
63
+
64
+ - operating manuals
65
+ - scanned handbooks
66
+ - product catalogs
67
+ - PDF slide decks
68
+ - visually structured OCR-heavy documents
69
+
70
+ Skip it for:
71
+
72
+ - tiny PDFs
73
+ - clean text-native PDFs where normal extraction is enough
74
+ - workflows that require exact page-faithful rendering
75
+ - cases where OCR cost is not justified
76
+
77
+ ## Example Result
78
+
79
+ The image below shows a real local validation run on a public research paper with dense text, figures, charts, and page-level visual structure. The packed image on the right consolidates the seven source pages shown on the left.
80
+
81
+ <p align="center">
82
+ <img src="./assets/original-vs-packed-comparison-straight-arrow.png" alt="Side-by-side comparison of original pages and the generated packed output" width="980">
83
+ </p>
84
+
85
+ Example local run facts from the generated manifest:
86
+
87
+ - source paper pages: 22
88
+ - previewed source page range: 15 to 21
89
+ - extracted images: 30
90
+ - packed output images: 6
91
+ - example packed image size: `986x1084`
92
+ - example packed image file size: `536,697 bytes`
93
+
94
+ This example shows the intended workflow: take a long, visually structured PDF and compress it into a smaller set of retrievable packed images that still preserve the visual structure of the source.
95
+
96
+ ## Install
97
+
98
+ ```bash
99
+ python -m pip install "git+https://github.com/ChrBoebel/optical-context-mcp.git@v0.1.1"
100
+ ```
101
+
102
+ Run directly from GitHub with `uvx`:
103
+
104
+ ```bash
105
+ uvx --from git+https://github.com/ChrBoebel/optical-context-mcp@v0.1.1 optical-context-mcp
106
+ ```
107
+
108
+ - `MISTRAL_API_KEY` is required for `compress_pdf`
109
+
110
+ ## Run
111
+
112
+ Default transport is `stdio`:
113
+
114
+ ```bash
115
+ optical-context-mcp
116
+ ```
117
+
118
+ ## Claude Code
119
+
120
+ Register the server in a project:
121
+
122
+ ```bash
123
+ claude mcp add -s project optical-context -- uvx --from git+https://github.com/ChrBoebel/optical-context-mcp@v0.1.1 optical-context-mcp
124
+ ```
125
+
126
+ Typical use:
127
+
128
+ 1. call `compress_pdf`
129
+ 2. inspect the returned manifest
130
+ 3. fetch packed images with `get_packed_images`
131
+
132
+ ## MCP Tools
133
+
134
+ - `compress_pdf`: run OCR plus recomposition and create a stored job
135
+ - `get_job_manifest`: load metadata for an existing job
136
+ - `get_packed_images`: fetch one or more packed PNGs from an existing job
137
+
138
+ ## How It Works
139
+
140
+ ```mermaid
141
+ flowchart LR
142
+ A["Local PDF"] --> B["Mistral OCR"]
143
+ B --> C["Page markdown + embedded images"]
144
+ C --> D["Recomposition engine"]
145
+ D --> E["Dense packed PNG images"]
146
+ E --> F["Stored job artifacts"]
147
+ F --> G["Agent fetches manifest or image batches over MCP"]
148
+ ```
149
+
150
+ ## Why Packed Images Instead Of Just OCR Text
151
+
152
+ - section grouping
153
+ - table-like layout
154
+ - captions near figures
155
+ - visual adjacency between text and embedded graphics
156
+
157
+ For many vision-capable agents, that is a better intermediate format than a plain OCR dump.
158
+
159
+ ## Current Scope
160
+
161
+ - depends on Mistral OCR
162
+ - currently handles local file paths, not remote uploads
163
+ - optimized for compression and retrieval, not final polished markdown generation
164
+ - quality depends on OCR quality and the visual density of the source document
165
+
166
+ ## Roadmap
167
+
168
+ - make the OCR layer provider-agnostic so different OCR backends can be swapped behind the same MCP workflow
169
+
170
+ ## Development
171
+
172
+ ```bash
173
+ uv venv --python /opt/homebrew/bin/python3.11 .venv
174
+ uv pip install --python .venv/bin/python -e ".[dev]"
175
+ .venv/bin/python -m pytest
176
+ ```
@@ -0,0 +1,19 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ optical_context_mcp.egg-info/PKG-INFO
5
+ optical_context_mcp.egg-info/SOURCES.txt
6
+ optical_context_mcp.egg-info/dependency_links.txt
7
+ optical_context_mcp.egg-info/entry_points.txt
8
+ optical_context_mcp.egg-info/requires.txt
9
+ optical_context_mcp.egg-info/top_level.txt
10
+ optical_mcp/__init__.py
11
+ optical_mcp/__main__.py
12
+ optical_mcp/mistral_client.py
13
+ optical_mcp/models.py
14
+ optical_mcp/recomposition.py
15
+ optical_mcp/server.py
16
+ optical_mcp/service.py
17
+ optical_mcp/storage.py
18
+ tests/test_server_tools.py
19
+ tests/test_storage.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ optical-context-mcp = optical_mcp.server:main
@@ -0,0 +1,7 @@
1
+ fastmcp>=3.1.0
2
+ mistralai>=1.12.0
3
+ Pillow>=12.0.0
4
+ python-dotenv>=1.2.0
5
+
6
+ [dev]
7
+ pytest>=8.0.0
@@ -0,0 +1 @@
1
+ """Optical Context MCP package."""
@@ -0,0 +1,5 @@
1
+ from .server import main
2
+
3
+
4
+ if __name__ == "__main__":
5
+ main()
@@ -0,0 +1,60 @@
1
+ """
2
+ Mistral OCR client for document extraction.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import base64
8
+ from pathlib import Path
9
+
10
+ from mistralai import Mistral
11
+
12
+
13
+ class MistralOCRClient:
14
+ """Client for the Mistral OCR API."""
15
+
16
+ def __init__(self, api_key: str):
17
+ if not api_key:
18
+ raise ValueError("Mistral API key not provided")
19
+ self.client = Mistral(api_key=api_key)
20
+ self.model = "mistral-ocr-latest"
21
+
22
+ def extract_pdf(self, pdf_path: Path | str):
23
+ """Extract text and images from a PDF."""
24
+ path = Path(pdf_path)
25
+ with open(path, "rb") as handle:
26
+ pdf_data = base64.b64encode(handle.read()).decode("utf-8")
27
+
28
+ return self.client.ocr.process(
29
+ model=self.model,
30
+ document={
31
+ "type": "document_url",
32
+ "document_url": f"data:application/pdf;base64,{pdf_data}",
33
+ },
34
+ include_image_base64=True,
35
+ )
36
+
37
+ def extract_image(self, image_path: Path | str):
38
+ """Extract text and elements from a single image."""
39
+ path = Path(image_path)
40
+ suffix = path.suffix.lower()
41
+ mime_map = {
42
+ ".png": "image/png",
43
+ ".jpg": "image/jpeg",
44
+ ".jpeg": "image/jpeg",
45
+ ".gif": "image/gif",
46
+ ".webp": "image/webp",
47
+ }
48
+ mime_type = mime_map.get(suffix, "image/png")
49
+
50
+ with open(path, "rb") as handle:
51
+ image_data = base64.b64encode(handle.read()).decode("utf-8")
52
+
53
+ return self.client.ocr.process(
54
+ model=self.model,
55
+ document={
56
+ "type": "image_url",
57
+ "image_url": f"data:{mime_type};base64,{image_data}",
58
+ },
59
+ include_image_base64=True,
60
+ )
@@ -0,0 +1,43 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import asdict, dataclass
4
+
5
+
6
+ @dataclass(slots=True)
7
+ class PackedImageArtifact:
8
+ index: int
9
+ path: str
10
+ width: int
11
+ height: int
12
+ size_bytes: int
13
+
14
+ def to_dict(self) -> dict[str, object]:
15
+ return asdict(self)
16
+
17
+
18
+ @dataclass(slots=True)
19
+ class CompressionJobManifest:
20
+ job_id: str
21
+ source_pdf: str
22
+ output_dir: str
23
+ created_at: str
24
+ chars_per_image: int
25
+ page_count: int
26
+ extracted_image_count: int
27
+ packed_image_count: int
28
+ ocr_markdown_path: str
29
+ packed_images: list[PackedImageArtifact]
30
+
31
+ def to_dict(self) -> dict[str, object]:
32
+ return {
33
+ "job_id": self.job_id,
34
+ "source_pdf": self.source_pdf,
35
+ "output_dir": self.output_dir,
36
+ "created_at": self.created_at,
37
+ "chars_per_image": self.chars_per_image,
38
+ "page_count": self.page_count,
39
+ "extracted_image_count": self.extracted_image_count,
40
+ "packed_image_count": self.packed_image_count,
41
+ "ocr_markdown_path": self.ocr_markdown_path,
42
+ "packed_images": [image.to_dict() for image in self.packed_images],
43
+ }