pageserve 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pageserve-0.1.0/.gitignore +221 -0
- pageserve-0.1.0/PKG-INFO +208 -0
- pageserve-0.1.0/README.md +168 -0
- pageserve-0.1.0/pageserve/__init__.py +68 -0
- pageserve-0.1.0/pageserve/_async_client.py +338 -0
- pageserve-0.1.0/pageserve/_auth.py +20 -0
- pageserve-0.1.0/pageserve/_client.py +543 -0
- pageserve-0.1.0/pageserve/_exceptions.py +90 -0
- pageserve-0.1.0/pageserve/_models.py +244 -0
- pageserve-0.1.0/pageserve/_sse.py +59 -0
- pageserve-0.1.0/pageserve/cli/__init__.py +6 -0
- pageserve-0.1.0/pageserve/cli/_commands.py +319 -0
- pageserve-0.1.0/pageserve/mcp/__init__.py +16 -0
- pageserve-0.1.0/pageserve/mcp/_server.py +252 -0
- pageserve-0.1.0/pyproject.toml +98 -0
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[codz]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py.cover
|
|
50
|
+
*.lcov
|
|
51
|
+
.hypothesis/
|
|
52
|
+
.pytest_cache/
|
|
53
|
+
cover/
|
|
54
|
+
|
|
55
|
+
# Translations
|
|
56
|
+
*.mo
|
|
57
|
+
*.pot
|
|
58
|
+
|
|
59
|
+
# Django stuff:
|
|
60
|
+
*.log
|
|
61
|
+
local_settings.py
|
|
62
|
+
db.sqlite3
|
|
63
|
+
db.sqlite3-journal
|
|
64
|
+
|
|
65
|
+
# Flask stuff:
|
|
66
|
+
instance/
|
|
67
|
+
.webassets-cache
|
|
68
|
+
|
|
69
|
+
# Scrapy stuff:
|
|
70
|
+
.scrapy
|
|
71
|
+
|
|
72
|
+
# Sphinx documentation
|
|
73
|
+
docs/_build/
|
|
74
|
+
docs_internal
|
|
75
|
+
|
|
76
|
+
# PyBuilder
|
|
77
|
+
.pybuilder/
|
|
78
|
+
target/
|
|
79
|
+
|
|
80
|
+
# Jupyter Notebook
|
|
81
|
+
.ipynb_checkpoints
|
|
82
|
+
|
|
83
|
+
# IPython
|
|
84
|
+
profile_default/
|
|
85
|
+
ipython_config.py
|
|
86
|
+
|
|
87
|
+
# pyenv
|
|
88
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
89
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
90
|
+
# .python-version
|
|
91
|
+
|
|
92
|
+
# pipenv
|
|
93
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
94
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
95
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
96
|
+
# install all needed dependencies.
|
|
97
|
+
# Pipfile.lock
|
|
98
|
+
|
|
99
|
+
# UV
|
|
100
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
101
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
102
|
+
# commonly ignored for libraries.
|
|
103
|
+
# uv.lock
|
|
104
|
+
|
|
105
|
+
# poetry
|
|
106
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
107
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
108
|
+
# commonly ignored for libraries.
|
|
109
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
110
|
+
# poetry.lock
|
|
111
|
+
# poetry.toml
|
|
112
|
+
|
|
113
|
+
# pdm
|
|
114
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
115
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
116
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
117
|
+
# pdm.lock
|
|
118
|
+
# pdm.toml
|
|
119
|
+
.pdm-python
|
|
120
|
+
.pdm-build/
|
|
121
|
+
|
|
122
|
+
# pixi
|
|
123
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
124
|
+
# pixi.lock
|
|
125
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
126
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
127
|
+
.pixi/*
|
|
128
|
+
!.pixi/config.toml
|
|
129
|
+
|
|
130
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
131
|
+
__pypackages__/
|
|
132
|
+
|
|
133
|
+
# Celery stuff
|
|
134
|
+
celerybeat-schedule*
|
|
135
|
+
celerybeat.pid
|
|
136
|
+
|
|
137
|
+
# Redis
|
|
138
|
+
*.rdb
|
|
139
|
+
*.aof
|
|
140
|
+
*.pid
|
|
141
|
+
|
|
142
|
+
# RabbitMQ
|
|
143
|
+
mnesia/
|
|
144
|
+
rabbitmq/
|
|
145
|
+
rabbitmq-data/
|
|
146
|
+
|
|
147
|
+
# ActiveMQ
|
|
148
|
+
activemq-data/
|
|
149
|
+
|
|
150
|
+
# SageMath parsed files
|
|
151
|
+
*.sage.py
|
|
152
|
+
|
|
153
|
+
# Environments
|
|
154
|
+
.env
|
|
155
|
+
.envrc
|
|
156
|
+
.venv
|
|
157
|
+
env/
|
|
158
|
+
venv/
|
|
159
|
+
ENV/
|
|
160
|
+
env.bak/
|
|
161
|
+
venv.bak/
|
|
162
|
+
|
|
163
|
+
# Spyder project settings
|
|
164
|
+
.spyderproject
|
|
165
|
+
.spyproject
|
|
166
|
+
|
|
167
|
+
# Rope project settings
|
|
168
|
+
.ropeproject
|
|
169
|
+
|
|
170
|
+
# mkdocs documentation
|
|
171
|
+
/site
|
|
172
|
+
|
|
173
|
+
# mypy
|
|
174
|
+
.mypy_cache/
|
|
175
|
+
.dmypy.json
|
|
176
|
+
dmypy.json
|
|
177
|
+
|
|
178
|
+
# Pyre type checker
|
|
179
|
+
.pyre/
|
|
180
|
+
|
|
181
|
+
# pytype static type analyzer
|
|
182
|
+
.pytype/
|
|
183
|
+
|
|
184
|
+
# Cython debug symbols
|
|
185
|
+
cython_debug/
|
|
186
|
+
|
|
187
|
+
# PyCharm
|
|
188
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
189
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
190
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
191
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
192
|
+
# .idea/
|
|
193
|
+
|
|
194
|
+
# Abstra
|
|
195
|
+
# Abstra is an AI-powered process automation framework.
|
|
196
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
197
|
+
# Learn more at https://abstra.io/docs
|
|
198
|
+
.abstra/
|
|
199
|
+
|
|
200
|
+
# Visual Studio Code
|
|
201
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
202
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
203
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
204
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
205
|
+
# .vscode/
|
|
206
|
+
# Temporary file for partial code execution
|
|
207
|
+
tempCodeRunnerFile.py
|
|
208
|
+
|
|
209
|
+
# Ruff stuff:
|
|
210
|
+
.ruff_cache/
|
|
211
|
+
|
|
212
|
+
# PyPI configuration file
|
|
213
|
+
.pypirc
|
|
214
|
+
|
|
215
|
+
# Marimo
|
|
216
|
+
marimo/_static/
|
|
217
|
+
marimo/_lsp/
|
|
218
|
+
__marimo__/
|
|
219
|
+
|
|
220
|
+
# Streamlit
|
|
221
|
+
.streamlit/secrets.toml
|
pageserve-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pageserve
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python SDK and MCP server for self-hosted PageIndex RAG service
|
|
5
|
+
Project-URL: Homepage, https://github.com/pageserve/pageserve
|
|
6
|
+
Project-URL: Repository, https://github.com/pageserve/pageserve
|
|
7
|
+
Project-URL: Issues, https://github.com/pageserve/pageserve/issues
|
|
8
|
+
Project-URL: Documentation, https://github.com/pageserve/pageserve#readme
|
|
9
|
+
Author-email: pageserve <pageserve03@gmail.com>
|
|
10
|
+
License: Apache-2.0
|
|
11
|
+
Keywords: document-ai,llm,mcp,pageindex,rag,self-host,vector-free
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Requires-Dist: httpx>=0.27.0
|
|
23
|
+
Requires-Dist: pydantic>=2.0
|
|
24
|
+
Provides-Extra: all
|
|
25
|
+
Requires-Dist: click>=8.0; extra == 'all'
|
|
26
|
+
Requires-Dist: mcp>=1.9.4; extra == 'all'
|
|
27
|
+
Provides-Extra: cli
|
|
28
|
+
Requires-Dist: click>=8.0; extra == 'cli'
|
|
29
|
+
Provides-Extra: dev
|
|
30
|
+
Requires-Dist: hatch; extra == 'dev'
|
|
31
|
+
Requires-Dist: mypy; extra == 'dev'
|
|
32
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
33
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
34
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
35
|
+
Requires-Dist: respx>=0.21; extra == 'dev'
|
|
36
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
37
|
+
Provides-Extra: mcp
|
|
38
|
+
Requires-Dist: mcp>=1.9.4; extra == 'mcp'
|
|
39
|
+
Description-Content-Type: text/markdown
|
|
40
|
+
|
|
41
|
+
# pageserve
|
|
42
|
+
|
|
43
|
+
Python SDK and MCP server for [PageIndex](https://github.com/VectifyAI/PageIndex) self-hosted RAG service.
|
|
44
|
+
|
|
45
|
+
PageIndex is a reasoning-based RAG engine that navigates document structure rather than doing vector similarity search — it reads the table of contents, picks the right sections, and synthesizes answers with page-level citations.
|
|
46
|
+
|
|
47
|
+
## Installation
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install pageserve # SDK only (sync + async)
|
|
51
|
+
pip install "pageserve[mcp]" # + MCP server for agent frameworks
|
|
52
|
+
pip install "pageserve[cli]" # + CLI commands
|
|
53
|
+
pip install "pageserve[all]" # everything
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Quick start
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
from pageserve import PageServeClient
|
|
60
|
+
|
|
61
|
+
client = PageServeClient(
|
|
62
|
+
base_url = "https://pageindex.company.com",
|
|
63
|
+
public_key = "<your-public-key>",
|
|
64
|
+
secret_key = "<your-secret-key>",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# List indexed documents
|
|
68
|
+
docs = client.list_documents()
|
|
69
|
+
|
|
70
|
+
# Ask a question
|
|
71
|
+
result = client.query(docs[0].doc_id, "What are the probation terms?")
|
|
72
|
+
print(result.answer)
|
|
73
|
+
print(result.citation) # "Employment Contract p.5, 6"
|
|
74
|
+
print(result.page_refs) # [5, 6]
|
|
75
|
+
|
|
76
|
+
# Upload and wait for indexing to complete
|
|
77
|
+
upload = client.upload("./contract.pdf", wait=True)
|
|
78
|
+
|
|
79
|
+
# Read specific pages (no LLM, instant)
|
|
80
|
+
pages = client.get_pages(docs[0].doc_id, "22-24")
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Async usage
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
import asyncio
|
|
87
|
+
from pageserve import AsyncPageServeClient
|
|
88
|
+
|
|
89
|
+
async def main():
|
|
90
|
+
async with AsyncPageServeClient(
|
|
91
|
+
base_url = "https://pageindex.company.com",
|
|
92
|
+
public_key = "<your-public-key>",
|
|
93
|
+
secret_key = "<your-secret-key>",
|
|
94
|
+
) as client:
|
|
95
|
+
docs = await client.list_documents()
|
|
96
|
+
|
|
97
|
+
# Query multiple documents concurrently
|
|
98
|
+
results = await client.query_many([
|
|
99
|
+
(docs[0].doc_id, "What are the probation terms?"),
|
|
100
|
+
(docs[1].doc_id, "What does the law say about probation?"),
|
|
101
|
+
])
|
|
102
|
+
for r in results:
|
|
103
|
+
print(r.answer)
|
|
104
|
+
|
|
105
|
+
asyncio.run(main())
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Streaming
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
for event in client.query_stream(doc_id, "What are the key clauses?"):
|
|
112
|
+
if event.type == "token":
|
|
113
|
+
print(event.content, end="", flush=True)
|
|
114
|
+
elif event.type == "sources":
|
|
115
|
+
for src in event.sources:
|
|
116
|
+
print(f"\nSource: {src.citation}")
|
|
117
|
+
elif event.type == "done":
|
|
118
|
+
break
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## MCP server (Claude Desktop / Cursor)
|
|
122
|
+
|
|
123
|
+
Run the MCP server so Claude or any MCP-compatible agent can query your documents:
|
|
124
|
+
|
|
125
|
+
```json
|
|
126
|
+
{
|
|
127
|
+
"mcpServers": {
|
|
128
|
+
"pageindex": {
|
|
129
|
+
"command": "pageserve",
|
|
130
|
+
"args": ["mcp"],
|
|
131
|
+
"env": {
|
|
132
|
+
"PAGESERVE_URL": "https://pageindex.company.com",
|
|
133
|
+
"PAGESERVE_PUBLIC_KEY": "<your-public-key>",
|
|
134
|
+
"PAGESERVE_SECRET_KEY": "<your-secret-key>"
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
The MCP server exposes five tools:
|
|
142
|
+
- `list_documents` — see what documents are available
|
|
143
|
+
- `query_document` — ask a question against one document
|
|
144
|
+
- `query_multiple_documents` — cross-reference multiple documents
|
|
145
|
+
- `get_page_content` — read raw page text (no LLM, instant)
|
|
146
|
+
- `get_document_structure` — browse the table of contents
|
|
147
|
+
|
|
148
|
+
Keys live in env vars and never appear in tool arguments or the model's context window.
|
|
149
|
+
|
|
150
|
+
## CLI
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
export PAGESERVE_URL=https://pageindex.company.com
|
|
154
|
+
export PAGESERVE_PUBLIC_KEY=<your-public-key>
|
|
155
|
+
export PAGESERVE_SECRET_KEY=<your-secret-key>
|
|
156
|
+
|
|
157
|
+
pageserve list # list documents
|
|
158
|
+
pageserve query <doc_id> "question" # ask a question
|
|
159
|
+
pageserve query <doc_id> "question" --stream # streaming output
|
|
160
|
+
pageserve upload ./report.pdf --watch # upload + progress bar
|
|
161
|
+
pageserve health # service status
|
|
162
|
+
pageserve keys list # list API keys
|
|
163
|
+
pageserve keys create "My App" # create a key
|
|
164
|
+
pageserve mcp # run MCP server (stdio)
|
|
165
|
+
pageserve mcp --transport sse --port 3000 # MCP over SSE
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
## Error handling
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
from pageserve import (
|
|
172
|
+
AuthError,
|
|
173
|
+
NotFoundError,
|
|
174
|
+
DocumentNotReadyError,
|
|
175
|
+
FileTooLargeError,
|
|
176
|
+
RateLimitError,
|
|
177
|
+
ServiceUnavailableError,
|
|
178
|
+
ServiceError,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
try:
|
|
182
|
+
result = client.query(doc_id, "question")
|
|
183
|
+
except AuthError:
|
|
184
|
+
print("Invalid or expired API key")
|
|
185
|
+
except NotFoundError:
|
|
186
|
+
print("Document not found")
|
|
187
|
+
except RateLimitError as e:
|
|
188
|
+
import time; time.sleep(e.retry_after)
|
|
189
|
+
except ServiceError as e:
|
|
190
|
+
print(f"Server error [{e.status_code}]")
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
## Documentation
|
|
194
|
+
|
|
195
|
+
- [Getting Started](docs/getting-started.md)
|
|
196
|
+
- [Authentication](docs/authentication.md)
|
|
197
|
+
- [Sync Client Reference](docs/sync-client.md)
|
|
198
|
+
- [Async Client Reference](docs/async-client.md)
|
|
199
|
+
- [Streaming (SSE)](docs/streaming.md)
|
|
200
|
+
- [MCP Server](docs/mcp-server.md)
|
|
201
|
+
- [CLI Reference](docs/cli.md)
|
|
202
|
+
- [Data Models](docs/models.md)
|
|
203
|
+
- [Error Handling](docs/error-handling.md)
|
|
204
|
+
|
|
205
|
+
## Related
|
|
206
|
+
|
|
207
|
+
- [PageIndex OSS](https://github.com/VectifyAI/PageIndex) — the self-hosted service this SDK wraps
|
|
208
|
+
- [PageIndex Cloud](https://pageindex.ai) — hosted version
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# pageserve
|
|
2
|
+
|
|
3
|
+
Python SDK and MCP server for [PageIndex](https://github.com/VectifyAI/PageIndex) self-hosted RAG service.
|
|
4
|
+
|
|
5
|
+
PageIndex is a reasoning-based RAG engine that navigates document structure rather than doing vector similarity search — it reads the table of contents, picks the right sections, and synthesizes answers with page-level citations.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install pageserve # SDK only (sync + async)
|
|
11
|
+
pip install "pageserve[mcp]" # + MCP server for agent frameworks
|
|
12
|
+
pip install "pageserve[cli]" # + CLI commands
|
|
13
|
+
pip install "pageserve[all]" # everything
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Quick start
|
|
17
|
+
|
|
18
|
+
```python
|
|
19
|
+
from pageserve import PageServeClient
|
|
20
|
+
|
|
21
|
+
client = PageServeClient(
|
|
22
|
+
base_url = "https://pageindex.company.com",
|
|
23
|
+
public_key = "<your-public-key>",
|
|
24
|
+
secret_key = "<your-secret-key>",
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# List indexed documents
|
|
28
|
+
docs = client.list_documents()
|
|
29
|
+
|
|
30
|
+
# Ask a question
|
|
31
|
+
result = client.query(docs[0].doc_id, "What are the probation terms?")
|
|
32
|
+
print(result.answer)
|
|
33
|
+
print(result.citation) # "Employment Contract p.5, 6"
|
|
34
|
+
print(result.page_refs) # [5, 6]
|
|
35
|
+
|
|
36
|
+
# Upload and wait for indexing to complete
|
|
37
|
+
upload = client.upload("./contract.pdf", wait=True)
|
|
38
|
+
|
|
39
|
+
# Read specific pages (no LLM, instant)
|
|
40
|
+
pages = client.get_pages(docs[0].doc_id, "22-24")
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Async usage
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
import asyncio
|
|
47
|
+
from pageserve import AsyncPageServeClient
|
|
48
|
+
|
|
49
|
+
async def main():
|
|
50
|
+
async with AsyncPageServeClient(
|
|
51
|
+
base_url = "https://pageindex.company.com",
|
|
52
|
+
public_key = "<your-public-key>",
|
|
53
|
+
secret_key = "<your-secret-key>",
|
|
54
|
+
) as client:
|
|
55
|
+
docs = await client.list_documents()
|
|
56
|
+
|
|
57
|
+
# Query multiple documents concurrently
|
|
58
|
+
results = await client.query_many([
|
|
59
|
+
(docs[0].doc_id, "What are the probation terms?"),
|
|
60
|
+
(docs[1].doc_id, "What does the law say about probation?"),
|
|
61
|
+
])
|
|
62
|
+
for r in results:
|
|
63
|
+
print(r.answer)
|
|
64
|
+
|
|
65
|
+
asyncio.run(main())
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Streaming
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
for event in client.query_stream(doc_id, "What are the key clauses?"):
|
|
72
|
+
if event.type == "token":
|
|
73
|
+
print(event.content, end="", flush=True)
|
|
74
|
+
elif event.type == "sources":
|
|
75
|
+
for src in event.sources:
|
|
76
|
+
print(f"\nSource: {src.citation}")
|
|
77
|
+
elif event.type == "done":
|
|
78
|
+
break
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## MCP server (Claude Desktop / Cursor)
|
|
82
|
+
|
|
83
|
+
Run the MCP server so Claude or any MCP-compatible agent can query your documents:
|
|
84
|
+
|
|
85
|
+
```json
|
|
86
|
+
{
|
|
87
|
+
"mcpServers": {
|
|
88
|
+
"pageindex": {
|
|
89
|
+
"command": "pageserve",
|
|
90
|
+
"args": ["mcp"],
|
|
91
|
+
"env": {
|
|
92
|
+
"PAGESERVE_URL": "https://pageindex.company.com",
|
|
93
|
+
"PAGESERVE_PUBLIC_KEY": "<your-public-key>",
|
|
94
|
+
"PAGESERVE_SECRET_KEY": "<your-secret-key>"
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
The MCP server exposes five tools:
|
|
102
|
+
- `list_documents` — see what documents are available
|
|
103
|
+
- `query_document` — ask a question against one document
|
|
104
|
+
- `query_multiple_documents` — cross-reference multiple documents
|
|
105
|
+
- `get_page_content` — read raw page text (no LLM, instant)
|
|
106
|
+
- `get_document_structure` — browse the table of contents
|
|
107
|
+
|
|
108
|
+
Keys live in env vars and never appear in tool arguments or the model's context window.
|
|
109
|
+
|
|
110
|
+
## CLI
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
export PAGESERVE_URL=https://pageindex.company.com
|
|
114
|
+
export PAGESERVE_PUBLIC_KEY=<your-public-key>
|
|
115
|
+
export PAGESERVE_SECRET_KEY=<your-secret-key>
|
|
116
|
+
|
|
117
|
+
pageserve list # list documents
|
|
118
|
+
pageserve query <doc_id> "question" # ask a question
|
|
119
|
+
pageserve query <doc_id> "question" --stream # streaming output
|
|
120
|
+
pageserve upload ./report.pdf --watch # upload + progress bar
|
|
121
|
+
pageserve health # service status
|
|
122
|
+
pageserve keys list # list API keys
|
|
123
|
+
pageserve keys create "My App" # create a key
|
|
124
|
+
pageserve mcp # run MCP server (stdio)
|
|
125
|
+
pageserve mcp --transport sse --port 3000 # MCP over SSE
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## Error handling
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
from pageserve import (
|
|
132
|
+
AuthError,
|
|
133
|
+
NotFoundError,
|
|
134
|
+
DocumentNotReadyError,
|
|
135
|
+
FileTooLargeError,
|
|
136
|
+
RateLimitError,
|
|
137
|
+
ServiceUnavailableError,
|
|
138
|
+
ServiceError,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
result = client.query(doc_id, "question")
|
|
143
|
+
except AuthError:
|
|
144
|
+
print("Invalid or expired API key")
|
|
145
|
+
except NotFoundError:
|
|
146
|
+
print("Document not found")
|
|
147
|
+
except RateLimitError as e:
|
|
148
|
+
import time; time.sleep(e.retry_after)
|
|
149
|
+
except ServiceError as e:
|
|
150
|
+
print(f"Server error [{e.status_code}]")
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Documentation
|
|
154
|
+
|
|
155
|
+
- [Getting Started](docs/getting-started.md)
|
|
156
|
+
- [Authentication](docs/authentication.md)
|
|
157
|
+
- [Sync Client Reference](docs/sync-client.md)
|
|
158
|
+
- [Async Client Reference](docs/async-client.md)
|
|
159
|
+
- [Streaming (SSE)](docs/streaming.md)
|
|
160
|
+
- [MCP Server](docs/mcp-server.md)
|
|
161
|
+
- [CLI Reference](docs/cli.md)
|
|
162
|
+
- [Data Models](docs/models.md)
|
|
163
|
+
- [Error Handling](docs/error-handling.md)
|
|
164
|
+
|
|
165
|
+
## Related
|
|
166
|
+
|
|
167
|
+
- [PageIndex OSS](https://github.com/VectifyAI/PageIndex) — the self-hosted service this SDK wraps
|
|
168
|
+
- [PageIndex Cloud](https://pageindex.ai) — hosted version
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from pageserve._async_client import AsyncPageServeClient
|
|
2
|
+
from pageserve._client import PageServeClient
|
|
3
|
+
from pageserve._exceptions import (
|
|
4
|
+
AuthError,
|
|
5
|
+
DocumentNotReadyError,
|
|
6
|
+
FileTooLargeError,
|
|
7
|
+
InsufficientStorageError,
|
|
8
|
+
NotFoundError,
|
|
9
|
+
PageServeError,
|
|
10
|
+
RateLimitError,
|
|
11
|
+
ServiceError,
|
|
12
|
+
ServiceUnavailableError,
|
|
13
|
+
TimeoutError,
|
|
14
|
+
)
|
|
15
|
+
from pageserve._models import (
|
|
16
|
+
ApiKey,
|
|
17
|
+
CreatedApiKey,
|
|
18
|
+
Document,
|
|
19
|
+
DocumentList,
|
|
20
|
+
HealthResult,
|
|
21
|
+
IndexProgress,
|
|
22
|
+
Page,
|
|
23
|
+
QueryResult,
|
|
24
|
+
QuerySource,
|
|
25
|
+
SSEEvent,
|
|
26
|
+
Stats,
|
|
27
|
+
StructureNode,
|
|
28
|
+
UploadResult,
|
|
29
|
+
Webhook,
|
|
30
|
+
WebhookTestResult,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
__version__ = "0.1.0"
|
|
34
|
+
__author__ = "pageserve"
|
|
35
|
+
__license__ = "Apache-2.0"
|
|
36
|
+
|
|
37
|
+
__all__ = [
|
|
38
|
+
# Clients
|
|
39
|
+
"PageServeClient",
|
|
40
|
+
"AsyncPageServeClient",
|
|
41
|
+
# Models
|
|
42
|
+
"Document",
|
|
43
|
+
"DocumentList",
|
|
44
|
+
"UploadResult",
|
|
45
|
+
"StructureNode",
|
|
46
|
+
"Page",
|
|
47
|
+
"QueryResult",
|
|
48
|
+
"QuerySource",
|
|
49
|
+
"SSEEvent",
|
|
50
|
+
"IndexProgress",
|
|
51
|
+
"ApiKey",
|
|
52
|
+
"CreatedApiKey",
|
|
53
|
+
"Stats",
|
|
54
|
+
"Webhook",
|
|
55
|
+
"WebhookTestResult",
|
|
56
|
+
"HealthResult",
|
|
57
|
+
# Exceptions
|
|
58
|
+
"PageServeError",
|
|
59
|
+
"AuthError",
|
|
60
|
+
"NotFoundError",
|
|
61
|
+
"DocumentNotReadyError",
|
|
62
|
+
"FileTooLargeError",
|
|
63
|
+
"ServiceUnavailableError",
|
|
64
|
+
"InsufficientStorageError",
|
|
65
|
+
"RateLimitError",
|
|
66
|
+
"ServiceError",
|
|
67
|
+
"TimeoutError",
|
|
68
|
+
]
|