pdf-porter 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(tail:*)",
5
+ "Bash(head:*)",
6
+ "Bash(source:*)",
7
+ "Bash(zsh:*)"
8
+ ]
9
+ }
10
+ }
@@ -0,0 +1,2 @@
1
+ # Auto detect text files and perform LF normalization
2
+ * text=auto
@@ -0,0 +1,181 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Ruff stuff:
171
+ .ruff_cache/
172
+
173
+ # PyPI configuration file
174
+ .pypirc
175
+
176
+ # Cursor
177
+ # Cursor is an AI-powered code editor.`.cursorignore` specifies files/directories to
178
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
179
+ # refer to https://docs.cursor.com/context/ignore-files
180
+ .cursorignore
181
+ .cursorindexingignore
@@ -0,0 +1,85 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project
6
+ `pdf-porter`: a minimal MCP server exposing one tool — `pdf_to_markdown(path, save_output)` — for layout-aware PDF→Markdown conversion via Docling. Pitch: one tool, zero config, five minutes.
7
+
8
+ **Note:** `docling-mcp` (the obvious name) is taken by IBM Research Zurich, the Docling authors themselves, at v1.3.2. This package is intentionally minimal — one tool, no config. The README should reference IBM's `docling-mcp` as the full-featured alternative: "if you want the full Docling toolset, see docling-mcp."
9
+
10
+ ## Stack
11
+ - Python 3.11+
12
+ - `mcp[cli]` (FastMCP)
13
+ - `docling`
14
+ - `uv` for packaging and running
15
+ - `reportlab` (dev only, for generating test fixtures)
16
+
17
+ ## Structure
18
+ ```
19
+ pdf_porter/
20
+ __init__.py # __version__ = "0.1.0"
21
+ server.py # FastMCP app, tool definitions, module-level converter singleton
22
+ tests/
23
+ test_server.py # integration tests (no mocking)
24
+ fixtures/ # simple.pdf, two_column.pdf, table.pdf (generated by generate_fixtures.py)
25
+ generate_fixtures.py # standalone script to regenerate test PDFs via reportlab
26
+ pyproject.toml
27
+ README.md
28
+ ```
29
+
30
+ ## Commands
31
+ ```bash
32
+ uv run python -m pdf_porter # start server
33
+ uv run pytest # run tests
34
+ uv run mcp dev pdf_porter/server.py # MCP inspector
35
+ uv run pytest tests/test_server.py::test_table_pdf # run a single test
36
+ python generate_fixtures.py # regenerate test fixture PDFs
37
+ uv build # build wheel + sdist into dist/
38
+ ```
39
+
40
+ ## Publishing to PyPI
41
+
42
+ `PYPI_TOKEN` must be set in the environment before publishing. Claude Code's Bash tool does **not** inherit variables exported in the user's interactive shell — the user must publish manually from their own terminal:
43
+
44
+ ```bash
45
+ # In your terminal (not via Claude Code):
46
+ uv build
47
+ uv publish --token $PYPI_TOKEN
48
+ ```
49
+
50
+ After publishing, verify the release:
51
+ ```bash
52
+ curl -s https://pypi.org/pypi/pdf-porter/json | python -m json.tool | grep '"version"'
53
+ uvx pdf-porter & # should start cleanly; kill with Ctrl-C
54
+ ```
55
+
56
+ ## Architecture
57
+
58
+ `server.py` holds a module-level `DocumentConverter` singleton (instantiated once, not per-call) and a boolean flag to log "Loading Docling models..." to stderr on the first conversion only. The `pdf_to_markdown` tool validates path existence and `.pdf` extension before calling Docling, and wraps all exceptions as `"Error: {e}"` strings — never raw tracebacks.
59
+
60
+ Entry point: `pdf_porter.server:main` → calls `mcp.run()`. Also handles `if __name__ == "__main__"`.
61
+
62
+ ## Tool signature
63
+ ```python
64
+ pdf_to_markdown(path: str, save_output: bool = False) -> str
65
+ ```
66
+ - Returns markdown string directly, or `"Saved to: {output_path}"` when `save_output=True`
67
+ - Returns `"Error: ..."` on any failure (bad path, wrong extension, conversion error)
68
+
69
+ ## Rules
70
+ - No global state beyond the converter singleton and the first-run flag — document both in server.py.
71
+ - All errors surface as readable strings, never raw tracebacks to the LLM.
72
+ - Tests are integration tests — do not mock `DocumentConverter`.
73
+ - Tests must cover: simple PDF, two-column PDF, table PDF (asserts `|` present), bad path, non-PDF extension, `save_output=True`.
74
+ - Do not add dependencies beyond `mcp`, `docling`, `pytest`, and `reportlab` (dev).
75
+ - README must include a working `claude_desktop_config.json` snippet.
76
+
77
+ ## Known limitations (inform users)
78
+ - First run downloads Docling model weights (~500MB); subsequent runs are fast.
79
+ - OCR mode not enabled in v0.1 — scanned/image-only PDFs will return empty text.
80
+ - Windows path handling not validated.
81
+
82
+
83
+ ## PATHS
84
+
85
+ NEVER HARD CODE PATHS. /path/to/repo should NEVER APPEAR in the codebase.
@@ -0,0 +1,212 @@
1
+ # Claude Code Prompt Sequence
2
+ ## mcp-docling — Autonomous Build Session
3
+
4
+ Run these prompts in order in Claude Code. Each phase should complete fully before the next begins. If a phase fails its acceptance criteria, fix before proceeding.
5
+
6
+ ---
7
+
8
+ ## Prompt 0 — Bootstrap
9
+
10
+ ```
11
+ Initialize a new Python project called mcp-docling.
12
+
13
+ Create this directory structure:
14
+ mcp_docling/__init__.py
15
+ mcp_docling/server.py
16
+ tests/__init__.py
17
+ tests/test_server.py
18
+ tests/fixtures/ (empty for now)
19
+ pyproject.toml
20
+ README.md
21
+ CLAUDE.md
22
+ LICENSE
23
+
24
+ pyproject.toml requirements:
25
+ - name: mcp-docling
26
+ - version: 0.1.0
27
+ - description: "MCP server for layout-aware PDF to Markdown conversion via Docling"
28
+ - requires-python: ">=3.11"
29
+ - dependencies: ["mcp[cli]", "docling"]
30
+ - optional-dependencies.dev: ["pytest"]
31
+ - [project.scripts] mcp-docling = "mcp_docling.server:main"
32
+ - [project.urls] homepage = "https://github.com/benmazzotta/mcp-docling"
33
+
34
+ In mcp_docling/__init__.py, just set __version__ = "0.1.0".
35
+
36
+ Do not write server.py or tests yet. Confirm the scaffold is in place.
37
+ ```
38
+
39
+ ---
40
+
41
+ ## Prompt 1 — Server
42
+
43
+ ```
44
+ Write mcp_docling/server.py.
45
+
46
+ Use FastMCP from the mcp package. Create a module-level DocumentConverter singleton
47
+ from docling.document_converter. Do not reinstantiate it on every call.
48
+
49
+ Expose one tool:
50
+
51
+ pdf_to_markdown(path: str, save_output: bool = False) -> str
52
+
53
+ Behavior:
54
+ - If path does not exist: return "Error: file not found: {path}"
55
+ - If path does not end in .pdf (case-insensitive): return "Error: not a PDF file: {path}"
56
+ - Convert using DocumentConverter().convert(path), then export_to_markdown()
57
+ - If save_output is True: write markdown to same directory, same stem, .md extension.
58
+ Return "Saved to: {output_path}"
59
+ - If save_output is False: return the markdown string directly
60
+ - Wrap everything in try/except Exception as e and return f"Error: {e}" on failure
61
+
62
+ Add a main() function:
63
+ def main():
64
+ mcp.run()
65
+
66
+ Add if __name__ == "__main__": main() at the bottom.
67
+
68
+ Log "Loading Docling models..." to stderr before the first conversion (not at import time —
69
+ use a module-level flag).
70
+ ```
71
+
72
+ ---
73
+
74
+ ## Prompt 2 — Fixtures and Tests
75
+
76
+ ```
77
+ Generate test fixture PDFs in tests/fixtures/ using reportlab.
78
+ Add reportlab as a dev dependency if not present.
79
+
80
+ Generate three PDFs:
81
+
82
+ 1. tests/fixtures/simple.pdf
83
+ - Single column
84
+ - Three paragraphs of Lorem Ipsum
85
+ - One H1 heading
86
+
87
+ 2. tests/fixtures/two_column.pdf
88
+ - Two-column layout
89
+ - Different text in each column, clearly labeled "Column A" and "Column B"
90
+
91
+ 3. tests/fixtures/table.pdf
92
+ - A table with 3 columns and 5 rows
93
+ - Headers: Name, Value, Notes
94
+ - Fill with sample data
95
+
96
+ Write a standalone script generate_fixtures.py at the project root that generates
97
+ all three. Run it. Confirm all three files exist and are valid PDFs.
98
+
99
+ Then write tests/test_server.py:
100
+
101
+ Import pdf_to_markdown directly from mcp_docling.server (not through MCP).
102
+
103
+ Tests:
104
+ - test_simple_pdf: result is str, len > 100, no "Error:" prefix
105
+ - test_two_column_pdf: result is str, len > 100, no "Error:" prefix
106
+ - test_table_pdf: result contains "|" character
107
+ - test_bad_path: result starts with "Error:"
108
+ - test_non_pdf: create a temp .txt file, result starts with "Error:"
109
+ - test_save_output: copy simple.pdf to tmp_path, call with save_output=True,
110
+ assert .md file exists and is non-empty
111
+
112
+ Run pytest. All tests must pass before proceeding.
113
+ ```
114
+
115
+ ---
116
+
117
+ ## Prompt 3 — README and Packaging
118
+
119
+ ```
120
+ Write README.md with these exact sections:
121
+
122
+ # mcp-docling
123
+
124
+ One sentence: what it does and why it's better than pypdf.
125
+
126
+ ## Installation
127
+
128
+ ### Use with Claude Desktop (recommended)
129
+ Add to claude_desktop_config.json:
130
+
131
+ {
132
+ "mcpServers": {
133
+ "pdf-tools": {
134
+ "command": "uvx",
135
+ "args": ["mcp-docling"]
136
+ }
137
+ }
138
+ }
139
+
140
+ Then restart Claude Desktop.
141
+
142
+ ### Development
143
+ git clone https://github.com/benmazzotta/mcp-docling
144
+ cd mcp-docling
145
+ uv sync --dev
146
+ uv run pytest
147
+
148
+ ## Tool Reference
149
+
150
+ ### pdf_to_markdown
151
+ - path (str): absolute or relative path to a PDF file
152
+ - save_output (bool, default False): if True, writes .md file next to the PDF and returns the path
153
+
154
+ ## Known Limitations
155
+ - First run downloads Docling model weights (~500MB). Subsequent runs are fast.
156
+ - Scanned PDFs: OCR mode is not enabled in v0.1. Text in image-only PDFs will be empty.
157
+ - Tested on macOS and Linux. Windows path handling not validated.
158
+
159
+ ## License
160
+ MIT. See LICENSE.
161
+
162
+ ---
163
+
164
+ Write LICENSE as MIT, author Ben Mazzotta, year 2025.
165
+
166
+ Then run:
167
+ uv build
168
+
169
+ Confirm dist/ contains a .whl and a .tar.gz. Report the filenames.
170
+ ```
171
+
172
+ ---
173
+
174
+ ## Prompt 4 — Publish
175
+
176
+ ```
177
+ Publish to PyPI.
178
+
179
+ First, check whether PYPI_TOKEN is set in the environment.
180
+ If not set: print the following and stop:
181
+
182
+ "To publish, set your PyPI API token:
183
+ export PYPI_TOKEN=pypi-...
184
+ Then re-run this prompt."
185
+
186
+ If set, run:
187
+ uv publish --token $PYPI_TOKEN
188
+
189
+ After publishing, wait 60 seconds, then verify:
190
+ curl -s https://pypi.org/pypi/mcp-docling/json | python -m json.tool | grep '"version"'
191
+
192
+ Then test the live package in a temporary directory:
193
+ cd /tmp
194
+ uvx mcp-docling &
195
+ sleep 5
196
+ kill %1
197
+
198
+ Confirm it starts without error. Report success or failure clearly.
199
+ ```
200
+
201
+ ---
202
+
203
+ ## Final Checklist
204
+
205
+ After all four prompts complete, verify:
206
+
207
+ - [ ] `uv run pytest` — all tests green
208
+ - [ ] `uv build` — dist/ has wheel and sdist
209
+ - [ ] `uvx mcp-docling` — server starts
210
+ - [ ] Package visible at pypi.org/project/mcp-docling
211
+ - [ ] README on PyPI renders correctly
212
+ - [ ] claude_desktop_config.json snippet in README is accurate
@@ -0,0 +1,181 @@
1
+ # mcp-docling: Concept Note & Implementation Plan
2
+
3
+ ## Problem
4
+
5
+ Claude's native PDF ingestion is shallow. It reads text streams, not document structure. Tables become noise. Two-column papers read in the wrong order. Scanned PDFs return nothing. The result is that users paste PDFs into Claude and get confused or degraded responses — not because Claude is weak, but because the input is scrambled before Claude ever sees it.
6
+
7
+ ## Solution
8
+
9
+ A local MCP server that wraps Docling — a layout-aware document converter — and exposes a single tool to Claude Desktop: `pdf_to_markdown(path)`. Claude calls it, gets clean structured markdown back, and reasons over that instead.
10
+
11
+ No upload. No cloud dependency. No background service. The server process lives only during a Claude Desktop session.
12
+
13
+ ## Scope
14
+
15
+ **In:** PDF to markdown. Local file paths. Optional output-to-file.
16
+ **Out:** Other document formats, cloud storage integration, batch processing, GUI. These are v2 concerns.
17
+
18
+ ---
19
+
20
+ ## Phase 1: Build
21
+
22
+ ### Goal
23
+ A working MCP server that converts a local PDF to markdown and returns it to Claude.
24
+
25
+ ### Prompt for Claude Code
26
+ ```
27
+ Create a Python MCP server using FastMCP (mcp[cli] package) in mcp_docling/server.py.
28
+
29
+ Expose one tool: pdf_to_markdown(path: str, save_output: bool = False) -> str
30
+
31
+ Behavior:
32
+ - Validate that path exists and is a .pdf file. Return a clear error string if not.
33
+ - Use DocumentConverter from docling to convert the PDF.
34
+ - Call result.document.export_to_markdown().
35
+ - If save_output is True, write the markdown to the same directory as the PDF,
36
+ with the same filename and .md extension. Return the output path as a string.
37
+ - If save_output is False, return the markdown content directly.
38
+ - Catch all exceptions and return them as readable error strings prefixed with "Error: ".
39
+ - Cache the DocumentConverter as a module-level singleton to avoid reloading weights on every call.
40
+
41
+ Set up pyproject.toml with:
42
+ - name: mcp-docling
43
+ - version: 0.1.0
44
+ - dependencies: mcp[cli], docling
45
+ - dev dependencies: pytest
46
+ - entry point: mcp_docling.server:mcp (as mcp script)
47
+ - Python >= 3.11
48
+
49
+ Do not add any other dependencies.
50
+ ```
51
+
52
+ ### Acceptance Criteria
53
+ - [ ] `uv run python -m mcp_docling` starts without error
54
+ - [ ] `uv run mcp dev mcp_docling/server.py` opens MCP inspector
55
+ - [ ] Tool appears in inspector with correct signature
56
+ - [ ] Calling with a valid PDF path returns non-empty markdown string
57
+ - [ ] Calling with a bad path returns a readable error string, not a traceback
58
+ - [ ] `save_output=True` writes a `.md` file next to the PDF
59
+
60
+ ---
61
+
62
+ ## Phase 2: Test
63
+
64
+ ### Goal
65
+ Confidence across the real-world PDF types that matter. This is where pypdf fails and docling earns its keep.
66
+
67
+ ### Prompt for Claude Code
68
+ ```
69
+ Create tests/test_server.py using pytest.
70
+
71
+ Download or generate four fixture PDFs into tests/fixtures/:
72
+ 1. simple.pdf — single column, clean text (generate with reportlab if needed)
73
+ 2. two_column.pdf — two-column layout (generate with reportlab)
74
+ 3. table.pdf — PDF containing a markdown-renderable table (generate with reportlab)
75
+ 4. scanned.pdf — use any freely licensed scanned PDF sample from a public URL,
76
+ or skip if download is unreliable and note it
77
+
78
+ For each fixture, write a test that:
79
+ - Calls pdf_to_markdown(path) directly (import the function, don't go through MCP)
80
+ - Asserts the result is a non-empty string
81
+ - Asserts no "Error:" prefix in result
82
+
83
+ Additional tests:
84
+ - Bad path returns string starting with "Error:"
85
+ - Non-PDF extension returns string starting with "Error:"
86
+ - save_output=True creates the expected .md file (use tmp_path fixture, copy a fixture there first)
87
+
88
+ Do not mock DocumentConverter. These are integration tests. They should call docling for real.
89
+ ```
90
+
91
+ ### Acceptance Criteria
92
+ - [ ] `uv run pytest` passes all tests
93
+ - [ ] Two-column test returns markdown where paragraphs are not interleaved
94
+ - [ ] Table test returns output containing a markdown table (`|` characters)
95
+ - [ ] Error path tests pass
96
+ - [ ] save_output test confirms file exists and is non-empty
97
+
98
+ ---
99
+
100
+ ## Phase 3: Package
101
+
102
+ ### Goal
103
+ Anyone can run this with a single `uvx` command. No cloning required.
104
+
105
+ ### Prompt for Claude Code
106
+ ```
107
+ Prepare mcp-docling for PyPI publication.
108
+
109
+ 1. Verify pyproject.toml is complete:
110
+ - name, version, description, license (MIT), author, homepage, readme
111
+ - Correct entry point so `uvx mcp-docling` starts the server
112
+
113
+ 2. Write README.md with these sections:
114
+ - One-sentence description
115
+ - Installation (two methods: uvx one-liner, and uv add for development)
116
+ - claude_desktop_config.json snippet showing exact configuration
117
+ - Tool reference: pdf_to_markdown parameters and return values
118
+ - Known limitations: large files, scanned PDFs need OCR mode, first-run weight download
119
+ - License
120
+
121
+ 3. Add LICENSE (MIT, author: Ben Mazzotta, year 2025)
122
+
123
+ 4. Build the package:
124
+ uv build
125
+
126
+ 5. Confirm dist/ contains a .whl and .tar.gz
127
+
128
+ 6. Dry-run install from the wheel locally to confirm the entry point works:
129
+ uv run --with dist/mcp_docling-*.whl mcp-docling --help
130
+ (adjust if FastMCP handles --help differently)
131
+ ```
132
+
133
+ ### Acceptance Criteria
134
+ - [ ] `uv build` completes without error
135
+ - [ ] `dist/` contains both wheel and sdist
136
+ - [ ] README contains working config snippet (verified by reading it)
137
+ - [ ] LICENSE file present
138
+ - [ ] Entry point is callable from the wheel
139
+
140
+ ---
141
+
142
+ ## Phase 4: Publish
143
+
144
+ ### Goal
145
+ Package is live on PyPI. Users can configure it in two lines.
146
+
147
+ ### Prompt for Claude Code
148
+ ```
149
+ Publish mcp-docling to PyPI using uv.
150
+
151
+ Steps:
152
+ 1. Ensure PYPI_TOKEN is available as environment variable (do not hardcode it)
153
+ 2. Run: uv publish --token $PYPI_TOKEN
154
+ 3. Wait ~60 seconds, then verify publication:
155
+ curl https://pypi.org/pypi/mcp-docling/json | python -m json.tool | grep version
156
+ 4. Test the live package:
157
+ uvx mcp-docling
158
+ (confirm it starts and prints MCP server startup message)
159
+ 5. Update README if the PyPI badge URL or install command needs adjusting.
160
+
161
+ If PYPI_TOKEN is not set, stop and print instructions for the user to set it.
162
+ Do not attempt to publish without a token.
163
+ ```
164
+
165
+ ### Acceptance Criteria
166
+ - [ ] Package appears at pypi.org/project/mcp-docling
167
+ - [ ] `uvx mcp-docling` works in a clean environment (test in a new uv venv)
168
+ - [ ] Version on PyPI matches pyproject.toml
169
+ - [ ] README on PyPI renders correctly (check the PyPI project page)
170
+
171
+ ---
172
+
173
+ ## Risk Notes
174
+
175
+ **Docling weight download on first run.** Users will see a pause of 30–90 seconds the first time `pdf_to_markdown` is called. The README must warn them. Consider printing a log message from the server: `"Loading Docling models (first run only)..."`.
176
+
177
+ **Scanned PDFs.** Docling handles these but OCR mode must be enabled explicitly. v0.1 can document this as a known limitation and expose it as a future `ocr=True` parameter.
178
+
179
+ **Path handling on Windows.** Docling and MCP both support Windows but path separators can bite. Test on the platform you care about.
180
+
181
+ **PyPI name collision.** Check that `mcp-docling` is unclaimed before Phase 3. If taken, `mcp-pdf-docling` or `docling-mcp` are reasonable alternatives.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Benjamin Mazzotta
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.