optical-context-mcp 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optical_context_mcp-0.1.1/LICENSE +21 -0
- optical_context_mcp-0.1.1/PKG-INFO +176 -0
- optical_context_mcp-0.1.1/README.md +148 -0
- optical_context_mcp-0.1.1/optical_context_mcp.egg-info/PKG-INFO +176 -0
- optical_context_mcp-0.1.1/optical_context_mcp.egg-info/SOURCES.txt +19 -0
- optical_context_mcp-0.1.1/optical_context_mcp.egg-info/dependency_links.txt +1 -0
- optical_context_mcp-0.1.1/optical_context_mcp.egg-info/entry_points.txt +2 -0
- optical_context_mcp-0.1.1/optical_context_mcp.egg-info/requires.txt +7 -0
- optical_context_mcp-0.1.1/optical_context_mcp.egg-info/top_level.txt +1 -0
- optical_context_mcp-0.1.1/optical_mcp/__init__.py +1 -0
- optical_context_mcp-0.1.1/optical_mcp/__main__.py +5 -0
- optical_context_mcp-0.1.1/optical_mcp/mistral_client.py +60 -0
- optical_context_mcp-0.1.1/optical_mcp/models.py +43 -0
- optical_context_mcp-0.1.1/optical_mcp/recomposition.py +483 -0
- optical_context_mcp-0.1.1/optical_mcp/server.py +210 -0
- optical_context_mcp-0.1.1/optical_mcp/service.py +109 -0
- optical_context_mcp-0.1.1/optical_mcp/storage.py +91 -0
- optical_context_mcp-0.1.1/pyproject.toml +48 -0
- optical_context_mcp-0.1.1/setup.cfg +4 -0
- optical_context_mcp-0.1.1/tests/test_server_tools.py +23 -0
- optical_context_mcp-0.1.1/tests/test_storage.py +37 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Christopher Boebel
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: optical-context-mcp
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: FastMCP server for compressing large OCR-heavy PDFs into dense packed images for agent workflows.
|
|
5
|
+
Author: Christopher Boebel
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/ChrBoebel/optical-context-mcp
|
|
8
|
+
Project-URL: Issues, https://github.com/ChrBoebel/optical-context-mcp/issues
|
|
9
|
+
Keywords: mcp,fastmcp,ocr,pdf,vision,document-processing
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: fastmcp>=3.1.0
|
|
22
|
+
Requires-Dist: mistralai>=1.12.0
|
|
23
|
+
Requires-Dist: Pillow>=12.0.0
|
|
24
|
+
Requires-Dist: python-dotenv>=1.2.0
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
27
|
+
Dynamic: license-file
|
|
28
|
+
|
|
29
|
+
<!-- mcp-name: io.github.chrboebel/optical-context-mcp -->
|
|
30
|
+
|
|
31
|
+
<p align="center">
|
|
32
|
+
<img src="./assets/optical-context-logo.png" alt="Optical Context MCP logo" width="680">
|
|
33
|
+
</p>
|
|
34
|
+
|
|
35
|
+
<h1 align="center">Optical Context MCP</h1>
|
|
36
|
+
|
|
37
|
+
<p align="center">
|
|
38
|
+
FastMCP server for compressing large, OCR-heavy PDFs into dense packed images for agent workflows.
|
|
39
|
+
</p>
|
|
40
|
+
|
|
41
|
+
<p align="center">
|
|
42
|
+
<a href="https://www.python.org/"><img src="https://img.shields.io/badge/python-3.11%2B-blue.svg" alt="Python 3.11+"></a>
|
|
43
|
+
<a href="https://gofastmcp.com/"><img src="https://img.shields.io/badge/MCP-FastMCP-111111.svg" alt="FastMCP"></a>
|
|
44
|
+
<a href="https://github.com/ChrBoebel/optical-context-mcp/actions/workflows/ci.yml"><img src="https://github.com/ChrBoebel/optical-context-mcp/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
|
|
45
|
+
<a href="./LICENSE"><img src="https://img.shields.io/badge/license-MIT-green.svg" alt="MIT License"></a>
|
|
46
|
+
</p>
|
|
47
|
+
|
|
48
|
+
Optical Context MCP is built for one specific problem: giving agents a practical way to work with **large, visually structured PDFs** without sending every page individually to a vision model.
|
|
49
|
+
|
|
50
|
+
It reads a local PDF, runs OCR with Mistral, recomposes the extracted text and figures into a much smaller set of packed images, and exposes those artifacts over MCP for batch retrieval.
|
|
51
|
+
|
|
52
|
+
## What It Does
|
|
53
|
+
|
|
54
|
+
- reads a local PDF from the MCP host machine
|
|
55
|
+
- extracts page markdown and embedded images with Mistral OCR
|
|
56
|
+
- packs that content into dense PNGs that preserve visual grouping
|
|
57
|
+
- stores a manifest and job artifacts for follow-up retrieval
|
|
58
|
+
- lets an agent pull only the packed images it needs
|
|
59
|
+
|
|
60
|
+
## Where It Fits
|
|
61
|
+
|
|
62
|
+
Use it for:
|
|
63
|
+
|
|
64
|
+
- operating manuals
|
|
65
|
+
- scanned handbooks
|
|
66
|
+
- product catalogs
|
|
67
|
+
- PDF slide decks
|
|
68
|
+
- visually structured OCR-heavy documents
|
|
69
|
+
|
|
70
|
+
Skip it for:
|
|
71
|
+
|
|
72
|
+
- tiny PDFs
|
|
73
|
+
- clean text-native PDFs where normal extraction is enough
|
|
74
|
+
- workflows that require exact page-faithful rendering
|
|
75
|
+
- cases where OCR cost is not justified
|
|
76
|
+
|
|
77
|
+
## Example Result
|
|
78
|
+
|
|
79
|
+
The image below shows a real local validation run on a public research paper with dense text, figures, charts, and page-level visual structure. The packed image on the right consolidates the seven source pages shown on the left.
|
|
80
|
+
|
|
81
|
+
<p align="center">
|
|
82
|
+
<img src="./assets/original-vs-packed-comparison-straight-arrow.png" alt="Side-by-side comparison of original pages and the generated packed output" width="980">
|
|
83
|
+
</p>
|
|
84
|
+
|
|
85
|
+
Example local run facts from the generated manifest:
|
|
86
|
+
|
|
87
|
+
- source paper pages: 22
|
|
88
|
+
- previewed source page range: 15 to 21
|
|
89
|
+
- extracted images: 30
|
|
90
|
+
- packed output images: 6
|
|
91
|
+
- example packed image size: `986x1084`
|
|
92
|
+
- example packed image file size: `536,697 bytes`
|
|
93
|
+
|
|
94
|
+
This example shows the intended workflow: take a long, visually structured PDF and compress it into a smaller set of retrievable packed images that still preserve the visual structure of the source.
|
|
95
|
+
|
|
96
|
+
## Install
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
python -m pip install "git+https://github.com/ChrBoebel/optical-context-mcp.git@v0.1.1"
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Run directly from GitHub with `uvx`:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
uvx --from git+https://github.com/ChrBoebel/optical-context-mcp@v0.1.1 optical-context-mcp
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
- `MISTRAL_API_KEY` is required for `compress_pdf`
|
|
109
|
+
|
|
110
|
+
## Run
|
|
111
|
+
|
|
112
|
+
Default transport is `stdio`:
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
optical-context-mcp
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Claude Code
|
|
119
|
+
|
|
120
|
+
Register the server in a project:
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
claude mcp add -s project optical-context -- uvx --from git+https://github.com/ChrBoebel/optical-context-mcp@v0.1.1 optical-context-mcp
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Typical use:
|
|
127
|
+
|
|
128
|
+
1. call `compress_pdf`
|
|
129
|
+
2. inspect the returned manifest
|
|
130
|
+
3. fetch packed images with `get_packed_images`
|
|
131
|
+
|
|
132
|
+
## MCP Tools
|
|
133
|
+
|
|
134
|
+
- `compress_pdf`: run OCR plus recomposition and create a stored job
|
|
135
|
+
- `get_job_manifest`: load metadata for an existing job
|
|
136
|
+
- `get_packed_images`: fetch one or more packed PNGs from an existing job
|
|
137
|
+
|
|
138
|
+
## How It Works
|
|
139
|
+
|
|
140
|
+
```mermaid
|
|
141
|
+
flowchart LR
|
|
142
|
+
A["Local PDF"] --> B["Mistral OCR"]
|
|
143
|
+
B --> C["Page markdown + embedded images"]
|
|
144
|
+
C --> D["Recomposition engine"]
|
|
145
|
+
D --> E["Dense packed PNG images"]
|
|
146
|
+
E --> F["Stored job artifacts"]
|
|
147
|
+
F --> G["Agent fetches manifest or image batches over MCP"]
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## Why Packed Images Instead Of Just OCR Text
|
|
151
|
+
|
|
152
|
+
- section grouping
|
|
153
|
+
- table-like layout
|
|
154
|
+
- captions near figures
|
|
155
|
+
- visual adjacency between text and embedded graphics
|
|
156
|
+
|
|
157
|
+
For many vision-capable agents, that is a better intermediate format than a plain OCR dump.
|
|
158
|
+
|
|
159
|
+
## Current Scope
|
|
160
|
+
|
|
161
|
+
- depends on Mistral OCR
|
|
162
|
+
- currently handles local file paths, not remote uploads
|
|
163
|
+
- optimized for compression and retrieval, not final polished markdown generation
|
|
164
|
+
- quality depends on OCR quality and the visual density of the source document
|
|
165
|
+
|
|
166
|
+
## Roadmap
|
|
167
|
+
|
|
168
|
+
- make the OCR layer provider-agnostic so different OCR backends can be swapped behind the same MCP workflow
|
|
169
|
+
|
|
170
|
+
## Development
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
uv venv --python /opt/homebrew/bin/python3.11 .venv
|
|
174
|
+
uv pip install --python .venv/bin/python -e ".[dev]"
|
|
175
|
+
.venv/bin/python -m pytest
|
|
176
|
+
```
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
<!-- mcp-name: io.github.chrboebel/optical-context-mcp -->
|
|
2
|
+
|
|
3
|
+
<p align="center">
|
|
4
|
+
<img src="./assets/optical-context-logo.png" alt="Optical Context MCP logo" width="680">
|
|
5
|
+
</p>
|
|
6
|
+
|
|
7
|
+
<h1 align="center">Optical Context MCP</h1>
|
|
8
|
+
|
|
9
|
+
<p align="center">
|
|
10
|
+
FastMCP server for compressing large, OCR-heavy PDFs into dense packed images for agent workflows.
|
|
11
|
+
</p>
|
|
12
|
+
|
|
13
|
+
<p align="center">
|
|
14
|
+
<a href="https://www.python.org/"><img src="https://img.shields.io/badge/python-3.11%2B-blue.svg" alt="Python 3.11+"></a>
|
|
15
|
+
<a href="https://gofastmcp.com/"><img src="https://img.shields.io/badge/MCP-FastMCP-111111.svg" alt="FastMCP"></a>
|
|
16
|
+
<a href="https://github.com/ChrBoebel/optical-context-mcp/actions/workflows/ci.yml"><img src="https://github.com/ChrBoebel/optical-context-mcp/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
|
|
17
|
+
<a href="./LICENSE"><img src="https://img.shields.io/badge/license-MIT-green.svg" alt="MIT License"></a>
|
|
18
|
+
</p>
|
|
19
|
+
|
|
20
|
+
Optical Context MCP is built for one specific problem: giving agents a practical way to work with **large, visually structured PDFs** without sending every page individually to a vision model.
|
|
21
|
+
|
|
22
|
+
It reads a local PDF, runs OCR with Mistral, recomposes the extracted text and figures into a much smaller set of packed images, and exposes those artifacts over MCP for batch retrieval.
|
|
23
|
+
|
|
24
|
+
## What It Does
|
|
25
|
+
|
|
26
|
+
- reads a local PDF from the MCP host machine
|
|
27
|
+
- extracts page markdown and embedded images with Mistral OCR
|
|
28
|
+
- packs that content into dense PNGs that preserve visual grouping
|
|
29
|
+
- stores a manifest and job artifacts for follow-up retrieval
|
|
30
|
+
- lets an agent pull only the packed images it needs
|
|
31
|
+
|
|
32
|
+
## Where It Fits
|
|
33
|
+
|
|
34
|
+
Use it for:
|
|
35
|
+
|
|
36
|
+
- operating manuals
|
|
37
|
+
- scanned handbooks
|
|
38
|
+
- product catalogs
|
|
39
|
+
- PDF slide decks
|
|
40
|
+
- visually structured OCR-heavy documents
|
|
41
|
+
|
|
42
|
+
Skip it for:
|
|
43
|
+
|
|
44
|
+
- tiny PDFs
|
|
45
|
+
- clean text-native PDFs where normal extraction is enough
|
|
46
|
+
- workflows that require exact page-faithful rendering
|
|
47
|
+
- cases where OCR cost is not justified
|
|
48
|
+
|
|
49
|
+
## Example Result
|
|
50
|
+
|
|
51
|
+
The image below shows a real local validation run on a public research paper with dense text, figures, charts, and page-level visual structure. The packed image on the right consolidates the seven source pages shown on the left.
|
|
52
|
+
|
|
53
|
+
<p align="center">
|
|
54
|
+
<img src="./assets/original-vs-packed-comparison-straight-arrow.png" alt="Side-by-side comparison of original pages and the generated packed output" width="980">
|
|
55
|
+
</p>
|
|
56
|
+
|
|
57
|
+
Example local run facts from the generated manifest:
|
|
58
|
+
|
|
59
|
+
- source paper pages: 22
|
|
60
|
+
- previewed source page range: 15 to 21
|
|
61
|
+
- extracted images: 30
|
|
62
|
+
- packed output images: 6
|
|
63
|
+
- example packed image size: `986x1084`
|
|
64
|
+
- example packed image file size: `536,697 bytes`
|
|
65
|
+
|
|
66
|
+
This example shows the intended workflow: take a long, visually structured PDF and compress it into a smaller set of retrievable packed images that still preserve the visual structure of the source.
|
|
67
|
+
|
|
68
|
+
## Install
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
python -m pip install "git+https://github.com/ChrBoebel/optical-context-mcp.git@v0.1.1"
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Run directly from GitHub with `uvx`:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
uvx --from git+https://github.com/ChrBoebel/optical-context-mcp@v0.1.1 optical-context-mcp
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
- `MISTRAL_API_KEY` is required for `compress_pdf`
|
|
81
|
+
|
|
82
|
+
## Run
|
|
83
|
+
|
|
84
|
+
Default transport is `stdio`:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
optical-context-mcp
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Claude Code
|
|
91
|
+
|
|
92
|
+
Register the server in a project:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
claude mcp add -s project optical-context -- uvx --from git+https://github.com/ChrBoebel/optical-context-mcp@v0.1.1 optical-context-mcp
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Typical use:
|
|
99
|
+
|
|
100
|
+
1. call `compress_pdf`
|
|
101
|
+
2. inspect the returned manifest
|
|
102
|
+
3. fetch packed images with `get_packed_images`
|
|
103
|
+
|
|
104
|
+
## MCP Tools
|
|
105
|
+
|
|
106
|
+
- `compress_pdf`: run OCR plus recomposition and create a stored job
|
|
107
|
+
- `get_job_manifest`: load metadata for an existing job
|
|
108
|
+
- `get_packed_images`: fetch one or more packed PNGs from an existing job
|
|
109
|
+
|
|
110
|
+
## How It Works
|
|
111
|
+
|
|
112
|
+
```mermaid
|
|
113
|
+
flowchart LR
|
|
114
|
+
A["Local PDF"] --> B["Mistral OCR"]
|
|
115
|
+
B --> C["Page markdown + embedded images"]
|
|
116
|
+
C --> D["Recomposition engine"]
|
|
117
|
+
D --> E["Dense packed PNG images"]
|
|
118
|
+
E --> F["Stored job artifacts"]
|
|
119
|
+
F --> G["Agent fetches manifest or image batches over MCP"]
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Why Packed Images Instead Of Just OCR Text
|
|
123
|
+
|
|
124
|
+
- section grouping
|
|
125
|
+
- table-like layout
|
|
126
|
+
- captions near figures
|
|
127
|
+
- visual adjacency between text and embedded graphics
|
|
128
|
+
|
|
129
|
+
For many vision-capable agents, that is a better intermediate format than a plain OCR dump.
|
|
130
|
+
|
|
131
|
+
## Current Scope
|
|
132
|
+
|
|
133
|
+
- depends on Mistral OCR
|
|
134
|
+
- currently handles local file paths, not remote uploads
|
|
135
|
+
- optimized for compression and retrieval, not final polished markdown generation
|
|
136
|
+
- quality depends on OCR quality and the visual density of the source document
|
|
137
|
+
|
|
138
|
+
## Roadmap
|
|
139
|
+
|
|
140
|
+
- make the OCR layer provider-agnostic so different OCR backends can be swapped behind the same MCP workflow
|
|
141
|
+
|
|
142
|
+
## Development
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
uv venv --python /opt/homebrew/bin/python3.11 .venv
|
|
146
|
+
uv pip install --python .venv/bin/python -e ".[dev]"
|
|
147
|
+
.venv/bin/python -m pytest
|
|
148
|
+
```
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: optical-context-mcp
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: FastMCP server for compressing large OCR-heavy PDFs into dense packed images for agent workflows.
|
|
5
|
+
Author: Christopher Boebel
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/ChrBoebel/optical-context-mcp
|
|
8
|
+
Project-URL: Issues, https://github.com/ChrBoebel/optical-context-mcp/issues
|
|
9
|
+
Keywords: mcp,fastmcp,ocr,pdf,vision,document-processing
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: fastmcp>=3.1.0
|
|
22
|
+
Requires-Dist: mistralai>=1.12.0
|
|
23
|
+
Requires-Dist: Pillow>=12.0.0
|
|
24
|
+
Requires-Dist: python-dotenv>=1.2.0
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
27
|
+
Dynamic: license-file
|
|
28
|
+
|
|
29
|
+
<!-- mcp-name: io.github.chrboebel/optical-context-mcp -->
|
|
30
|
+
|
|
31
|
+
<p align="center">
|
|
32
|
+
<img src="./assets/optical-context-logo.png" alt="Optical Context MCP logo" width="680">
|
|
33
|
+
</p>
|
|
34
|
+
|
|
35
|
+
<h1 align="center">Optical Context MCP</h1>
|
|
36
|
+
|
|
37
|
+
<p align="center">
|
|
38
|
+
FastMCP server for compressing large, OCR-heavy PDFs into dense packed images for agent workflows.
|
|
39
|
+
</p>
|
|
40
|
+
|
|
41
|
+
<p align="center">
|
|
42
|
+
<a href="https://www.python.org/"><img src="https://img.shields.io/badge/python-3.11%2B-blue.svg" alt="Python 3.11+"></a>
|
|
43
|
+
<a href="https://gofastmcp.com/"><img src="https://img.shields.io/badge/MCP-FastMCP-111111.svg" alt="FastMCP"></a>
|
|
44
|
+
<a href="https://github.com/ChrBoebel/optical-context-mcp/actions/workflows/ci.yml"><img src="https://github.com/ChrBoebel/optical-context-mcp/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
|
|
45
|
+
<a href="./LICENSE"><img src="https://img.shields.io/badge/license-MIT-green.svg" alt="MIT License"></a>
|
|
46
|
+
</p>
|
|
47
|
+
|
|
48
|
+
Optical Context MCP is built for one specific problem: giving agents a practical way to work with **large, visually structured PDFs** without sending every page individually to a vision model.
|
|
49
|
+
|
|
50
|
+
It reads a local PDF, runs OCR with Mistral, recomposes the extracted text and figures into a much smaller set of packed images, and exposes those artifacts over MCP for batch retrieval.
|
|
51
|
+
|
|
52
|
+
## What It Does
|
|
53
|
+
|
|
54
|
+
- reads a local PDF from the MCP host machine
|
|
55
|
+
- extracts page markdown and embedded images with Mistral OCR
|
|
56
|
+
- packs that content into dense PNGs that preserve visual grouping
|
|
57
|
+
- stores a manifest and job artifacts for follow-up retrieval
|
|
58
|
+
- lets an agent pull only the packed images it needs
|
|
59
|
+
|
|
60
|
+
## Where It Fits
|
|
61
|
+
|
|
62
|
+
Use it for:
|
|
63
|
+
|
|
64
|
+
- operating manuals
|
|
65
|
+
- scanned handbooks
|
|
66
|
+
- product catalogs
|
|
67
|
+
- PDF slide decks
|
|
68
|
+
- visually structured OCR-heavy documents
|
|
69
|
+
|
|
70
|
+
Skip it for:
|
|
71
|
+
|
|
72
|
+
- tiny PDFs
|
|
73
|
+
- clean text-native PDFs where normal extraction is enough
|
|
74
|
+
- workflows that require exact page-faithful rendering
|
|
75
|
+
- cases where OCR cost is not justified
|
|
76
|
+
|
|
77
|
+
## Example Result
|
|
78
|
+
|
|
79
|
+
The image below shows a real local validation run on a public research paper with dense text, figures, charts, and page-level visual structure. The packed image on the right consolidates the seven source pages shown on the left.
|
|
80
|
+
|
|
81
|
+
<p align="center">
|
|
82
|
+
<img src="./assets/original-vs-packed-comparison-straight-arrow.png" alt="Side-by-side comparison of original pages and the generated packed output" width="980">
|
|
83
|
+
</p>
|
|
84
|
+
|
|
85
|
+
Example local run facts from the generated manifest:
|
|
86
|
+
|
|
87
|
+
- source paper pages: 22
|
|
88
|
+
- previewed source page range: 15 to 21
|
|
89
|
+
- extracted images: 30
|
|
90
|
+
- packed output images: 6
|
|
91
|
+
- example packed image size: `986x1084`
|
|
92
|
+
- example packed image file size: `536,697 bytes`
|
|
93
|
+
|
|
94
|
+
This example shows the intended workflow: take a long, visually structured PDF and compress it into a smaller set of retrievable packed images that still preserve the visual structure of the source.
|
|
95
|
+
|
|
96
|
+
## Install
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
python -m pip install "git+https://github.com/ChrBoebel/optical-context-mcp.git@v0.1.1"
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Run directly from GitHub with `uvx`:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
uvx --from git+https://github.com/ChrBoebel/optical-context-mcp@v0.1.1 optical-context-mcp
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
- `MISTRAL_API_KEY` is required for `compress_pdf`
|
|
109
|
+
|
|
110
|
+
## Run
|
|
111
|
+
|
|
112
|
+
Default transport is `stdio`:
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
optical-context-mcp
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Claude Code
|
|
119
|
+
|
|
120
|
+
Register the server in a project:
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
claude mcp add -s project optical-context -- uvx --from git+https://github.com/ChrBoebel/optical-context-mcp@v0.1.1 optical-context-mcp
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Typical use:
|
|
127
|
+
|
|
128
|
+
1. call `compress_pdf`
|
|
129
|
+
2. inspect the returned manifest
|
|
130
|
+
3. fetch packed images with `get_packed_images`
|
|
131
|
+
|
|
132
|
+
## MCP Tools
|
|
133
|
+
|
|
134
|
+
- `compress_pdf`: run OCR plus recomposition and create a stored job
|
|
135
|
+
- `get_job_manifest`: load metadata for an existing job
|
|
136
|
+
- `get_packed_images`: fetch one or more packed PNGs from an existing job
|
|
137
|
+
|
|
138
|
+
## How It Works
|
|
139
|
+
|
|
140
|
+
```mermaid
|
|
141
|
+
flowchart LR
|
|
142
|
+
A["Local PDF"] --> B["Mistral OCR"]
|
|
143
|
+
B --> C["Page markdown + embedded images"]
|
|
144
|
+
C --> D["Recomposition engine"]
|
|
145
|
+
D --> E["Dense packed PNG images"]
|
|
146
|
+
E --> F["Stored job artifacts"]
|
|
147
|
+
F --> G["Agent fetches manifest or image batches over MCP"]
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## Why Packed Images Instead Of Just OCR Text
|
|
151
|
+
|
|
152
|
+
- section grouping
|
|
153
|
+
- table-like layout
|
|
154
|
+
- captions near figures
|
|
155
|
+
- visual adjacency between text and embedded graphics
|
|
156
|
+
|
|
157
|
+
For many vision-capable agents, that is a better intermediate format than a plain OCR dump.
|
|
158
|
+
|
|
159
|
+
## Current Scope
|
|
160
|
+
|
|
161
|
+
- depends on Mistral OCR
|
|
162
|
+
- currently handles local file paths, not remote uploads
|
|
163
|
+
- optimized for compression and retrieval, not final polished markdown generation
|
|
164
|
+
- quality depends on OCR quality and the visual density of the source document
|
|
165
|
+
|
|
166
|
+
## Roadmap
|
|
167
|
+
|
|
168
|
+
- make the OCR layer provider-agnostic so different OCR backends can be swapped behind the same MCP workflow
|
|
169
|
+
|
|
170
|
+
## Development
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
uv venv --python /opt/homebrew/bin/python3.11 .venv
|
|
174
|
+
uv pip install --python .venv/bin/python -e ".[dev]"
|
|
175
|
+
.venv/bin/python -m pytest
|
|
176
|
+
```
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
optical_context_mcp.egg-info/PKG-INFO
|
|
5
|
+
optical_context_mcp.egg-info/SOURCES.txt
|
|
6
|
+
optical_context_mcp.egg-info/dependency_links.txt
|
|
7
|
+
optical_context_mcp.egg-info/entry_points.txt
|
|
8
|
+
optical_context_mcp.egg-info/requires.txt
|
|
9
|
+
optical_context_mcp.egg-info/top_level.txt
|
|
10
|
+
optical_mcp/__init__.py
|
|
11
|
+
optical_mcp/__main__.py
|
|
12
|
+
optical_mcp/mistral_client.py
|
|
13
|
+
optical_mcp/models.py
|
|
14
|
+
optical_mcp/recomposition.py
|
|
15
|
+
optical_mcp/server.py
|
|
16
|
+
optical_mcp/service.py
|
|
17
|
+
optical_mcp/storage.py
|
|
18
|
+
tests/test_server_tools.py
|
|
19
|
+
tests/test_storage.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
optical_mcp
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Optical Context MCP package."""
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Mistral OCR client for document extraction.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import base64
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from mistralai import Mistral
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class MistralOCRClient:
|
|
14
|
+
"""Client for the Mistral OCR API."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, api_key: str):
|
|
17
|
+
if not api_key:
|
|
18
|
+
raise ValueError("Mistral API key not provided")
|
|
19
|
+
self.client = Mistral(api_key=api_key)
|
|
20
|
+
self.model = "mistral-ocr-latest"
|
|
21
|
+
|
|
22
|
+
def extract_pdf(self, pdf_path: Path | str):
|
|
23
|
+
"""Extract text and images from a PDF."""
|
|
24
|
+
path = Path(pdf_path)
|
|
25
|
+
with open(path, "rb") as handle:
|
|
26
|
+
pdf_data = base64.b64encode(handle.read()).decode("utf-8")
|
|
27
|
+
|
|
28
|
+
return self.client.ocr.process(
|
|
29
|
+
model=self.model,
|
|
30
|
+
document={
|
|
31
|
+
"type": "document_url",
|
|
32
|
+
"document_url": f"data:application/pdf;base64,{pdf_data}",
|
|
33
|
+
},
|
|
34
|
+
include_image_base64=True,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
def extract_image(self, image_path: Path | str):
|
|
38
|
+
"""Extract text and elements from a single image."""
|
|
39
|
+
path = Path(image_path)
|
|
40
|
+
suffix = path.suffix.lower()
|
|
41
|
+
mime_map = {
|
|
42
|
+
".png": "image/png",
|
|
43
|
+
".jpg": "image/jpeg",
|
|
44
|
+
".jpeg": "image/jpeg",
|
|
45
|
+
".gif": "image/gif",
|
|
46
|
+
".webp": "image/webp",
|
|
47
|
+
}
|
|
48
|
+
mime_type = mime_map.get(suffix, "image/png")
|
|
49
|
+
|
|
50
|
+
with open(path, "rb") as handle:
|
|
51
|
+
image_data = base64.b64encode(handle.read()).decode("utf-8")
|
|
52
|
+
|
|
53
|
+
return self.client.ocr.process(
|
|
54
|
+
model=self.model,
|
|
55
|
+
document={
|
|
56
|
+
"type": "image_url",
|
|
57
|
+
"image_url": f"data:{mime_type};base64,{image_data}",
|
|
58
|
+
},
|
|
59
|
+
include_image_base64=True,
|
|
60
|
+
)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import asdict, dataclass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass(slots=True)
|
|
7
|
+
class PackedImageArtifact:
|
|
8
|
+
index: int
|
|
9
|
+
path: str
|
|
10
|
+
width: int
|
|
11
|
+
height: int
|
|
12
|
+
size_bytes: int
|
|
13
|
+
|
|
14
|
+
def to_dict(self) -> dict[str, object]:
|
|
15
|
+
return asdict(self)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(slots=True)
|
|
19
|
+
class CompressionJobManifest:
|
|
20
|
+
job_id: str
|
|
21
|
+
source_pdf: str
|
|
22
|
+
output_dir: str
|
|
23
|
+
created_at: str
|
|
24
|
+
chars_per_image: int
|
|
25
|
+
page_count: int
|
|
26
|
+
extracted_image_count: int
|
|
27
|
+
packed_image_count: int
|
|
28
|
+
ocr_markdown_path: str
|
|
29
|
+
packed_images: list[PackedImageArtifact]
|
|
30
|
+
|
|
31
|
+
def to_dict(self) -> dict[str, object]:
|
|
32
|
+
return {
|
|
33
|
+
"job_id": self.job_id,
|
|
34
|
+
"source_pdf": self.source_pdf,
|
|
35
|
+
"output_dir": self.output_dir,
|
|
36
|
+
"created_at": self.created_at,
|
|
37
|
+
"chars_per_image": self.chars_per_image,
|
|
38
|
+
"page_count": self.page_count,
|
|
39
|
+
"extracted_image_count": self.extracted_image_count,
|
|
40
|
+
"packed_image_count": self.packed_image_count,
|
|
41
|
+
"ocr_markdown_path": self.ocr_markdown_path,
|
|
42
|
+
"packed_images": [image.to_dict() for image in self.packed_images],
|
|
43
|
+
}
|