pdf2mcp 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,36 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(python -m pytest tests/test_config.py -v 2>&1)",
5
+ "Bash(python -c \"from __future__ import annotations; from pydantic_settings import BaseSettings; print\\('ok'\\)\" 2>&1)",
6
+ "Bash(python -c \"\nimport os\nos.environ['OPENAI_API_KEY'] = 'sk-test'\nfrom pdf2mcp.config import Settings\ns = Settings\\(\\)\nprint\\(s.model_fields.keys\\(\\)\\)\nprint\\(hasattr\\(s, 'openai_base_url'\\)\\)\nprint\\(s.model_dump\\(\\)\\)\n\" 2>&1)",
7
+ "Bash(python -c \"\nimport os\nos.environ['OPENAI_API_KEY'] = 'sk-test'\nfrom pdf2mcp.config import Settings\ns = Settings\\(\\)\nprint\\('has attr:', hasattr\\(s, 'openai_base_url'\\)\\)\nprint\\('value:', s.openai_base_url\\)\n\" 2>&1)",
8
+ "Bash(python -c \"\nfrom pdf2mcp.config import Settings\nprint\\(Settings.model_fields.keys\\(\\)\\)\n\" 2>&1)",
9
+ "Bash(python -c \"import pdf2mcp.config; print\\(pdf2mcp.config.__file__\\)\" 2>&1 && find /Users/aissam/SynologyDrive/Work/Aisobotics/Dev/pdf2mcp -path '*/pdf2mcp/config.py' -not -path '*/.venv/*' 2>&1)",
10
+ "Bash(pip install:*)",
11
+ "Bash(python -c \"\nimport os\nos.environ['OPENAI_API_KEY'] = 'sk-test'\nfrom pdf2mcp.config import Settings\nprint\\(Settings.model_fields.keys\\(\\)\\)\ns = Settings\\(\\)\nprint\\('base_url:', s.openai_base_url\\)\n\" 2>&1)",
12
+ "Bash(python -c \"import pdf2mcp.config; print\\(pdf2mcp.config.__file__\\)\" 2>&1)",
13
+ "Bash(pip uninstall:*)",
14
+ "Bash(uv pip:*)",
15
+ "Bash(python -c \"\nimport os; os.environ['OPENAI_API_KEY'] = 'sk-test'\nfrom pdf2mcp.config import Settings\nprint\\(Settings.model_fields.keys\\(\\)\\)\ns = Settings\\(\\)\nprint\\('base_url:', s.openai_base_url\\)\n\" 2>&1)",
16
+ "Bash(python -m pytest 2>&1)",
17
+ "Bash(python -c \"from pdf2mcp.cli import _ENV_TEMPLATE; print\\(_ENV_TEMPLATE\\)\")",
18
+ "Bash(which pdf2mcp:*)",
19
+ "Bash(git add:*)",
20
+ "Bash(git commit:*)",
21
+ "Bash(git push:*)",
22
+ "mcp__plugin_serena_serena__list_dir",
23
+ "Bash(python -m pytest tests/test_search.py tests/test_server.py -v 2>&1)",
24
+ "Bash(python -m pytest -q 2>&1)",
25
+ "Bash(uv run:*)",
26
+ "Bash(python -m pytest tests/test_store.py tests/test_embeddings.py tests/test_search.py tests/test_ingest.py -v 2>&1 | head -120)",
27
+ "Bash(python -m pytest -v 2>&1 | tail -30)",
28
+ "Bash(python -c \"import lancedb; help\\(lancedb.table.Table.create_index\\)\" 2>&1 | head -60)",
29
+ "Bash(gh release:*)",
30
+ "Bash(gh repo:*)",
31
+ "Bash(pip index:*)",
32
+ "Bash(python -m build 2>&1)",
33
+ "Bash(twine check:*)"
34
+ ]
35
+ }
36
+ }
@@ -0,0 +1,19 @@
1
+ # Required: OpenAI API key for embeddings
2
+ OPENAI_API_KEY=sk-your-api-key-here
3
+
4
+ # Optional: OpenAI base URL (for Azure, local proxies, or compatible providers)
5
+ # PDF2MCP_OPENAI_BASE_URL=https://api.openai.com/v1
6
+
7
+ # Optional: Override defaults
8
+ # PDF2MCP_DOCS_DIR=docs
9
+ # PDF2MCP_DATA_DIR=data
10
+ # PDF2MCP_EMBEDDING_MODEL=text-embedding-3-small
11
+ # PDF2MCP_CHUNK_SIZE=500
12
+ # PDF2MCP_CHUNK_OVERLAP=50
13
+ # PDF2MCP_DEFAULT_NUM_RESULTS=5
14
+ # PDF2MCP_SERVER_NAME=pdf-docs
15
+
16
+ # Server transport settings
17
+ # PDF2MCP_SERVER_TRANSPORT=streamable-http
18
+ # PDF2MCP_SERVER_HOST=0.0.0.0
19
+ # PDF2MCP_SERVER_PORT=8000
@@ -0,0 +1,36 @@
1
+ # Python
2
+ **/__pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ dist/
7
+ build/
8
+ *.egg
9
+
10
+ # Virtual environments
11
+ .venv/
12
+ venv/
13
+
14
+ # Type checking / linting caches
15
+ .mypy_cache/
16
+ .ruff_cache/
17
+
18
+ # Testing
19
+ .pytest_cache/
20
+ .coverage
21
+ htmlcov/
22
+
23
+ # Environment
24
+ .env
25
+
26
+ # Project-specific
27
+ data/
28
+ docs/*.pdf
29
+
30
+ # IDE
31
+ .vscode/
32
+ .idea/
33
+
34
+ # OS
35
+ .DS_Store
36
+ Thumbs.db
@@ -0,0 +1 @@
1
+ 3.12
pdf2mcp-0.2.2/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
pdf2mcp-0.2.2/PKG-INFO ADDED
@@ -0,0 +1,230 @@
1
+ Metadata-Version: 2.4
2
+ Name: pdf2mcp
3
+ Version: 0.2.2
4
+ Summary: Turn any PDF folder into a searchable MCP server
5
+ Project-URL: Homepage, https://github.com/iSamBa/pdf2mcp
6
+ Project-URL: Repository, https://github.com/iSamBa/pdf2mcp
7
+ Project-URL: Issues, https://github.com/iSamBa/pdf2mcp/issues
8
+ Author-email: iSamBa <bahou.aissam@gmail.com>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: embeddings,lancedb,mcp,pdf,rag,search
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Topic :: Text Processing :: Indexing
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: lancedb>=0.6
24
+ Requires-Dist: mcp[cli]>=1.0
25
+ Requires-Dist: openai>=1.0
26
+ Requires-Dist: pydantic-settings>=2.0
27
+ Requires-Dist: pymupdf4llm>=0.0.17
28
+ Requires-Dist: python-dotenv>=1.0
29
+ Requires-Dist: tenacity>=8.0
30
+ Provides-Extra: dev
31
+ Requires-Dist: mypy>=1.10; extra == 'dev'
32
+ Requires-Dist: pytest>=8.0; extra == 'dev'
33
+ Requires-Dist: ruff>=0.4; extra == 'dev'
34
+ Description-Content-Type: text/markdown
35
+
36
+ # pdf2mcp
37
+
38
+ ```
39
+ ██████╗ ██████╗ ███████╗██████╗ ███╗ ███╗ ██████╗██████╗
40
+ ██╔══██╗██╔══██╗██╔════╝╚════██╗████╗ ████║██╔════╝██╔══██╗
41
+ ██████╔╝██║ ██║█████╗ █████╔╝██╔████╔██║██║ ██████╔╝
42
+ ██╔═══╝ ██║ ██║██╔══╝ ██╔═══╝ ██║╚██╔╝██║██║ ██╔═══╝
43
+ ██║ ██████╔╝██║ ███████╗██║ ╚═╝ ██║╚██████╗██║
44
+ ╚═╝ ╚═════╝ ╚═╝ ╚══════╝╚═╝ ╚═╝ ╚═════╝╚═╝
45
+ ```
46
+
47
+ Turn any PDF folder into a searchable MCP server.
48
+
49
+ ## Installation
50
+
51
+ Clone the repo, then install globally with `uv tool`:
52
+
53
+ ```bash
54
+ git clone https://github.com/iSamBa/pdf2mcp.git
55
+ uv tool install ./pdf2mcp
56
+ ```
57
+
58
+ This makes `pdf2mcp` available as a command anywhere on your system.
59
+
60
+ To update after pulling new changes:
61
+
62
+ ```bash
63
+ uv tool install --force ./pdf2mcp
64
+ ```
65
+
66
+ To run directly from source without installing:
67
+
68
+ ```bash
69
+ cd ./pdf2mcp
70
+ uv run pdf2mcp --help
71
+ ```
72
+
73
+ ### Verify
74
+
75
+ ```bash
76
+ pdf2mcp --version
77
+ ```
78
+
79
+ ## Quick Start
80
+
81
+ ```bash
82
+ # 1. Scaffold a project (creates docs/ and .env)
83
+ pdf2mcp init ./my-project
84
+ cd my-project
85
+
86
+ # 2. Add your PDFs to docs/ and set OPENAI_API_KEY in .env
87
+
88
+ # 3. Ingest
89
+ pdf2mcp ingest
90
+
91
+ # 4. Start the server
92
+ pdf2mcp serve
93
+
94
+ # 5. Get config snippets for your MCP client
95
+ pdf2mcp config
96
+ ```
97
+
98
+ ## Architecture
99
+
100
+ pdf2mcp separates **server** and **client** concerns:
101
+
102
+ - **Server** (`pdf2mcp serve`) — runs independently, handles PDF ingestion, embedding, and search. Configured via `PDF2MCP_*` environment variables.
103
+ - **Client** (Claude Code, Cursor, VS Code, etc.) — connects to a running server over HTTP. Only needs the server URL.
104
+
105
+ The default transport is `streamable-http`. The server listens on `http://127.0.0.1:8000/mcp` and shuts down gracefully on SIGINT/SIGTERM.
106
+
107
+ ## Commands
108
+
109
+ | Command | Description |
110
+ |---------|-------------|
111
+ | `pdf2mcp init [dir]` | Scaffold a working directory with `docs/` and `.env` |
112
+ | `pdf2mcp ingest` | Parse PDFs, chunk, embed, and store in vector DB |
113
+ | `pdf2mcp serve` | Start the MCP server (HTTP by default) |
114
+ | `pdf2mcp config` | Print ready-to-paste config for MCP clients |
115
+
116
+ ### Common Flags
117
+
118
+ ```bash
119
+ # Override docs directory
120
+ pdf2mcp ingest --docs-dir ./my-pdfs
121
+ pdf2mcp serve --docs-dir ./my-pdfs
122
+
123
+ # Use stdio transport (for clients that spawn the server)
124
+ pdf2mcp serve --transport stdio
125
+
126
+ # Custom host/port
127
+ pdf2mcp serve --host 0.0.0.0 --port 9000
128
+
129
+ # Custom server name
130
+ pdf2mcp serve --name my-docs
131
+
132
+ # Config for a specific client
133
+ pdf2mcp config --client cursor
134
+ pdf2mcp config --client claude-desktop --transport stdio
135
+ ```
136
+
137
+ ## Client Configuration
138
+
139
+ `pdf2mcp config` generates ready-to-paste JSON for all supported clients. The default is HTTP — clients just need the server URL:
140
+
141
+ ```json
142
+ {
143
+ "mcpServers": {
144
+ "pdf-docs": {
145
+ "type": "http",
146
+ "url": "http://127.0.0.1:8000/mcp"
147
+ }
148
+ }
149
+ }
150
+ ```
151
+
152
+ | Client | Config File | Top-level Key | HTTP Support |
153
+ |--------|------------|--------------|--------------|
154
+ | Claude Code | `.mcp.json` | `mcpServers` | Yes |
155
+ | Claude Desktop | `claude_desktop_config.json` | `mcpServers` | No (stdio only) |
156
+ | Cursor | `.cursor/mcp.json` | `mcpServers` | Yes |
157
+ | VS Code / Copilot | `.vscode/mcp.json` | `servers` | Yes |
158
+
159
+ Use `--transport stdio` for clients that need to spawn the server process (e.g., Claude Desktop):
160
+
161
+ ```json
162
+ {
163
+ "mcpServers": {
164
+ "pdf-docs": {
165
+ "command": "uv",
166
+ "args": ["run", "pdf2mcp", "serve"]
167
+ }
168
+ }
169
+ }
170
+ ```
171
+
172
+ ## Environment Variables
173
+
174
+ ### Server settings (`PDF2MCP_*`)
175
+
176
+ These configure the server process. MCP clients never need these.
177
+
178
+ | Variable | Default | Description |
179
+ |----------|---------|-------------|
180
+ | `OPENAI_API_KEY` | (required) | OpenAI API key for embeddings |
181
+ | `PDF2MCP_OPENAI_BASE_URL` | `https://api.openai.com/v1` | OpenAI API base URL (for Azure, local proxies, or compatible providers) |
182
+ | `PDF2MCP_DOCS_DIR` | `docs` | Directory containing PDF files |
183
+ | `PDF2MCP_DATA_DIR` | `data` | Directory for vector database |
184
+ | `PDF2MCP_EMBEDDING_MODEL` | `text-embedding-3-small` | OpenAI embedding model |
185
+ | `PDF2MCP_CHUNK_SIZE` | `500` | Target chunk size in tokens |
186
+ | `PDF2MCP_CHUNK_OVERLAP` | `50` | Overlap between chunks in tokens |
187
+ | `PDF2MCP_DEFAULT_NUM_RESULTS` | `5` | Default search results count |
188
+ | `PDF2MCP_SERVER_NAME` | `pdf-docs` | MCP server name |
189
+ | `PDF2MCP_SERVER_TRANSPORT` | `streamable-http` | Transport protocol |
190
+ | `PDF2MCP_SERVER_HOST` | `127.0.0.1` | Host to bind to |
191
+ | `PDF2MCP_SERVER_PORT` | `8000` | Port to bind to |
192
+
193
+ ### Client settings (`PDF2MCP_CLIENT_*`)
194
+
195
+ These configure how a client connects to the server. No secrets needed.
196
+
197
+ | Variable | Default | Description |
198
+ |----------|---------|-------------|
199
+ | `PDF2MCP_CLIENT_SERVER_NAME` | `pdf-docs` | Server name in client config |
200
+ | `PDF2MCP_CLIENT_SERVER_URL` | `http://127.0.0.1:8000/mcp` | Server URL |
201
+ | `PDF2MCP_CLIENT_TRANSPORT` | `streamable-http` | Transport protocol |
202
+
203
+ ## MCP Tools
204
+
205
+ The server exposes six tools:
206
+
207
+ | Tool | Description |
208
+ |------|-------------|
209
+ | `search_docs(query)` | Semantic search across **all** ingested PDFs |
210
+ | `search_in_doc(query, filename)` | Semantic search scoped to a **single** document |
211
+ | `list_docs()` | List all ingested documents with chunk counts |
212
+ | `get_sections(filename)` | Get section headings for a specific document |
213
+ | `read_page(filename, page)` | Read the full content of a specific page |
214
+ | `read_section(filename, section_title)` | Read the full content of a named section |
215
+
216
+ ### Typical workflow
217
+
218
+ 1. **`list_docs`** — discover available documents
219
+ 2. **`get_sections`** — browse a document's structure
220
+ 3. **`read_section`** or **`read_page`** — read specific content
221
+ 4. **`search_docs`** or **`search_in_doc`** — find information by query
222
+
223
+ ## Development
224
+
225
+ ```bash
226
+ uv sync --all-extras
227
+ uv run pytest
228
+ uv run ruff check src/
229
+ uv run mypy src/
230
+ ```
@@ -0,0 +1,195 @@
1
+ # pdf2mcp
2
+
3
+ ```
4
+ ██████╗ ██████╗ ███████╗██████╗ ███╗ ███╗ ██████╗██████╗
5
+ ██╔══██╗██╔══██╗██╔════╝╚════██╗████╗ ████║██╔════╝██╔══██╗
6
+ ██████╔╝██║ ██║█████╗ █████╔╝██╔████╔██║██║ ██████╔╝
7
+ ██╔═══╝ ██║ ██║██╔══╝ ██╔═══╝ ██║╚██╔╝██║██║ ██╔═══╝
8
+ ██║ ██████╔╝██║ ███████╗██║ ╚═╝ ██║╚██████╗██║
9
+ ╚═╝ ╚═════╝ ╚═╝ ╚══════╝╚═╝ ╚═╝ ╚═════╝╚═╝
10
+ ```
11
+
12
+ Turn any PDF folder into a searchable MCP server.
13
+
14
+ ## Installation
15
+
16
+ Clone the repo, then install globally with `uv tool`:
17
+
18
+ ```bash
19
+ git clone https://github.com/iSamBa/pdf2mcp.git
20
+ uv tool install ./pdf2mcp
21
+ ```
22
+
23
+ This makes `pdf2mcp` available as a command anywhere on your system.
24
+
25
+ To update after pulling new changes:
26
+
27
+ ```bash
28
+ uv tool install --force ./pdf2mcp
29
+ ```
30
+
31
+ To run directly from source without installing:
32
+
33
+ ```bash
34
+ cd ./pdf2mcp
35
+ uv run pdf2mcp --help
36
+ ```
37
+
38
+ ### Verify
39
+
40
+ ```bash
41
+ pdf2mcp --version
42
+ ```
43
+
44
+ ## Quick Start
45
+
46
+ ```bash
47
+ # 1. Scaffold a project (creates docs/ and .env)
48
+ pdf2mcp init ./my-project
49
+ cd my-project
50
+
51
+ # 2. Add your PDFs to docs/ and set OPENAI_API_KEY in .env
52
+
53
+ # 3. Ingest
54
+ pdf2mcp ingest
55
+
56
+ # 4. Start the server
57
+ pdf2mcp serve
58
+
59
+ # 5. Get config snippets for your MCP client
60
+ pdf2mcp config
61
+ ```
62
+
63
+ ## Architecture
64
+
65
+ pdf2mcp separates **server** and **client** concerns:
66
+
67
+ - **Server** (`pdf2mcp serve`) — runs independently, handles PDF ingestion, embedding, and search. Configured via `PDF2MCP_*` environment variables.
68
+ - **Client** (Claude Code, Cursor, VS Code, etc.) — connects to a running server over HTTP. Only needs the server URL.
69
+
70
+ The default transport is `streamable-http`. The server listens on `http://127.0.0.1:8000/mcp` and shuts down gracefully on SIGINT/SIGTERM.
71
+
72
+ ## Commands
73
+
74
+ | Command | Description |
75
+ |---------|-------------|
76
+ | `pdf2mcp init [dir]` | Scaffold a working directory with `docs/` and `.env` |
77
+ | `pdf2mcp ingest` | Parse PDFs, chunk, embed, and store in vector DB |
78
+ | `pdf2mcp serve` | Start the MCP server (HTTP by default) |
79
+ | `pdf2mcp config` | Print ready-to-paste config for MCP clients |
80
+
81
+ ### Common Flags
82
+
83
+ ```bash
84
+ # Override docs directory
85
+ pdf2mcp ingest --docs-dir ./my-pdfs
86
+ pdf2mcp serve --docs-dir ./my-pdfs
87
+
88
+ # Use stdio transport (for clients that spawn the server)
89
+ pdf2mcp serve --transport stdio
90
+
91
+ # Custom host/port
92
+ pdf2mcp serve --host 0.0.0.0 --port 9000
93
+
94
+ # Custom server name
95
+ pdf2mcp serve --name my-docs
96
+
97
+ # Config for a specific client
98
+ pdf2mcp config --client cursor
99
+ pdf2mcp config --client claude-desktop --transport stdio
100
+ ```
101
+
102
+ ## Client Configuration
103
+
104
+ `pdf2mcp config` generates ready-to-paste JSON for all supported clients. The default is HTTP — clients just need the server URL:
105
+
106
+ ```json
107
+ {
108
+ "mcpServers": {
109
+ "pdf-docs": {
110
+ "type": "http",
111
+ "url": "http://127.0.0.1:8000/mcp"
112
+ }
113
+ }
114
+ }
115
+ ```
116
+
117
+ | Client | Config File | Top-level Key | HTTP Support |
118
+ |--------|------------|--------------|--------------|
119
+ | Claude Code | `.mcp.json` | `mcpServers` | Yes |
120
+ | Claude Desktop | `claude_desktop_config.json` | `mcpServers` | No (stdio only) |
121
+ | Cursor | `.cursor/mcp.json` | `mcpServers` | Yes |
122
+ | VS Code / Copilot | `.vscode/mcp.json` | `servers` | Yes |
123
+
124
+ Use `--transport stdio` for clients that need to spawn the server process (e.g., Claude Desktop):
125
+
126
+ ```json
127
+ {
128
+ "mcpServers": {
129
+ "pdf-docs": {
130
+ "command": "uv",
131
+ "args": ["run", "pdf2mcp", "serve"]
132
+ }
133
+ }
134
+ }
135
+ ```
136
+
137
+ ## Environment Variables
138
+
139
+ ### Server settings (`PDF2MCP_*`)
140
+
141
+ These configure the server process. MCP clients never need these.
142
+
143
+ | Variable | Default | Description |
144
+ |----------|---------|-------------|
145
+ | `OPENAI_API_KEY` | (required) | OpenAI API key for embeddings |
146
+ | `PDF2MCP_OPENAI_BASE_URL` | `https://api.openai.com/v1` | OpenAI API base URL (for Azure, local proxies, or compatible providers) |
147
+ | `PDF2MCP_DOCS_DIR` | `docs` | Directory containing PDF files |
148
+ | `PDF2MCP_DATA_DIR` | `data` | Directory for vector database |
149
+ | `PDF2MCP_EMBEDDING_MODEL` | `text-embedding-3-small` | OpenAI embedding model |
150
+ | `PDF2MCP_CHUNK_SIZE` | `500` | Target chunk size in tokens |
151
+ | `PDF2MCP_CHUNK_OVERLAP` | `50` | Overlap between chunks in tokens |
152
+ | `PDF2MCP_DEFAULT_NUM_RESULTS` | `5` | Default search results count |
153
+ | `PDF2MCP_SERVER_NAME` | `pdf-docs` | MCP server name |
154
+ | `PDF2MCP_SERVER_TRANSPORT` | `streamable-http` | Transport protocol |
155
+ | `PDF2MCP_SERVER_HOST` | `127.0.0.1` | Host to bind to |
156
+ | `PDF2MCP_SERVER_PORT` | `8000` | Port to bind to |
157
+
158
+ ### Client settings (`PDF2MCP_CLIENT_*`)
159
+
160
+ These configure how a client connects to the server. No secrets needed.
161
+
162
+ | Variable | Default | Description |
163
+ |----------|---------|-------------|
164
+ | `PDF2MCP_CLIENT_SERVER_NAME` | `pdf-docs` | Server name in client config |
165
+ | `PDF2MCP_CLIENT_SERVER_URL` | `http://127.0.0.1:8000/mcp` | Server URL |
166
+ | `PDF2MCP_CLIENT_TRANSPORT` | `streamable-http` | Transport protocol |
167
+
168
+ ## MCP Tools
169
+
170
+ The server exposes six tools:
171
+
172
+ | Tool | Description |
173
+ |------|-------------|
174
+ | `search_docs(query)` | Semantic search across **all** ingested PDFs |
175
+ | `search_in_doc(query, filename)` | Semantic search scoped to a **single** document |
176
+ | `list_docs()` | List all ingested documents with chunk counts |
177
+ | `get_sections(filename)` | Get section headings for a specific document |
178
+ | `read_page(filename, page)` | Read the full content of a specific page |
179
+ | `read_section(filename, section_title)` | Read the full content of a named section |
180
+
181
+ ### Typical workflow
182
+
183
+ 1. **`list_docs`** — discover available documents
184
+ 2. **`get_sections`** — browse a document's structure
185
+ 3. **`read_section`** or **`read_page`** — read specific content
186
+ 4. **`search_docs`** or **`search_in_doc`** — find information by query
187
+
188
+ ## Development
189
+
190
+ ```bash
191
+ uv sync --all-extras
192
+ uv run pytest
193
+ uv run ruff check src/
194
+ uv run mypy src/
195
+ ```
@@ -0,0 +1,68 @@
1
+ [project]
2
+ name = "pdf2mcp"
3
+ version = "0.2.2"
4
+ description = "Turn any PDF folder into a searchable MCP server"
5
+ readme = "README.md"
6
+ license = { text = "MIT" }
7
+ authors = [
8
+ { name = "iSamBa", email = "bahou.aissam@gmail.com" }
9
+ ]
10
+ keywords = ["pdf", "mcp", "search", "embeddings", "rag", "lancedb"]
11
+ classifiers = [
12
+ "Development Status :: 4 - Beta",
13
+ "Intended Audience :: Developers",
14
+ "License :: OSI Approved :: MIT License",
15
+ "Programming Language :: Python :: 3",
16
+ "Programming Language :: Python :: 3.10",
17
+ "Programming Language :: Python :: 3.11",
18
+ "Programming Language :: Python :: 3.12",
19
+ "Programming Language :: Python :: 3.13",
20
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
21
+ "Topic :: Text Processing :: Indexing",
22
+ ]
23
+ requires-python = ">=3.10"
24
+ dependencies = [
25
+ "mcp[cli]>=1.0",
26
+ "pymupdf4llm>=0.0.17",
27
+ "lancedb>=0.6",
28
+ "openai>=1.0",
29
+ "pydantic-settings>=2.0",
30
+ "python-dotenv>=1.0",
31
+ "tenacity>=8.0",
32
+ ]
33
+
34
+ [project.urls]
35
+ Homepage = "https://github.com/iSamBa/pdf2mcp"
36
+ Repository = "https://github.com/iSamBa/pdf2mcp"
37
+ Issues = "https://github.com/iSamBa/pdf2mcp/issues"
38
+
39
+ [project.scripts]
40
+ pdf2mcp = "pdf2mcp.cli:main"
41
+
42
+ [build-system]
43
+ requires = ["hatchling"]
44
+ build-backend = "hatchling.build"
45
+
46
+ [tool.hatch.build.targets.wheel]
47
+ packages = ["src/pdf2mcp"]
48
+
49
+ [project.optional-dependencies]
50
+ dev = [
51
+ "pytest>=8.0",
52
+ "ruff>=0.4",
53
+ "mypy>=1.10",
54
+ ]
55
+
56
+ [tool.ruff]
57
+ line-length = 88
58
+ target-version = "py310"
59
+
60
+ [tool.ruff.lint]
61
+ select = ["E", "F", "I", "N", "W", "UP"]
62
+
63
+ [tool.mypy]
64
+ python_version = "3.10"
65
+ strict = true
66
+
67
+ [tool.pytest.ini_options]
68
+ testpaths = ["tests"]
@@ -0,0 +1,5 @@
1
+ """pdf2mcp — Turn any PDF folder into a searchable MCP server."""
2
+
3
+ __all__ = ["__version__"]
4
+
5
+ __version__ = "0.1.0"