docpull 2.4.0__tar.gz → 2.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docpull-2.4.0/src/docpull.egg-info → docpull-2.5.1}/PKG-INFO +33 -7
- {docpull-2.4.0 → docpull-2.5.1}/README.md +32 -6
- {docpull-2.4.0 → docpull-2.5.1}/pyproject.toml +1 -1
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/__init__.py +1 -1
- docpull-2.5.1/src/docpull/mcp/server.py +621 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/mcp/sources.py +18 -1
- docpull-2.5.1/src/docpull/mcp/tools.py +839 -0
- {docpull-2.4.0 → docpull-2.5.1/src/docpull.egg-info}/PKG-INFO +33 -7
- docpull-2.5.1/tests/test_mcp_tools.py +679 -0
- docpull-2.4.0/src/docpull/mcp/server.py +0 -200
- docpull-2.4.0/src/docpull/mcp/tools.py +0 -360
- docpull-2.4.0/tests/test_mcp_tools.py +0 -189
- {docpull-2.4.0 → docpull-2.5.1}/LICENSE +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/setup.cfg +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/__main__.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/cache/__init__.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/cache/manager.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/cache/streaming_dedup.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/cli.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/concurrency/__init__.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/concurrency/manager.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/conversion/__init__.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/conversion/chunking.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/conversion/extractor.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/conversion/markdown.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/conversion/protocols.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/conversion/special_cases.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/conversion/trafilatura_extractor.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/core/__init__.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/core/fetcher.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/__init__.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/composite.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/crawler.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/filters.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/link_extractors/__init__.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/link_extractors/enhanced.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/link_extractors/protocols.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/link_extractors/static.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/protocols.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/sitemap.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/doctor.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/http/__init__.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/http/client.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/http/protocols.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/http/rate_limiter.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/logging_config.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/mcp/__init__.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/metadata_extractor.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/models/__init__.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/models/config.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/models/events.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/models/profiles.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/__init__.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/base.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/__init__.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/chunk.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/convert.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/dedup.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/fetch.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/metadata.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/save.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/save_json.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/save_ndjson.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/save_sqlite.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/validate.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/py.typed +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/security/__init__.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/security/robots.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull/security/url_validator.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull.egg-info/SOURCES.txt +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull.egg-info/dependency_links.txt +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull.egg-info/entry_points.txt +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull.egg-info/requires.txt +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/src/docpull.egg-info/top_level.txt +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/tests/test_cache_conditional_get.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/tests/test_chunking.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/tests/test_cli.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/tests/test_convert_step_new.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/tests/test_fixes_v2_3_0.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/tests/test_link_extractors.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/tests/test_naming.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/tests/test_save_ndjson.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/tests/test_security_hardening.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/tests/test_special_cases.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/tests/test_v2_conversion.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/tests/test_v2_discovery.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/tests/test_v2_integration.py +0 -0
- {docpull-2.4.0 → docpull-2.5.1}/tests/test_v2_pipeline.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docpull
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.5.1
|
|
4
4
|
Summary: Pull documentation from the web and convert to clean markdown
|
|
5
5
|
Author-email: Zachary Roth <support@raintree.technology>
|
|
6
6
|
Maintainer-email: Raintree Technology <support@raintree.technology>
|
|
@@ -222,7 +222,7 @@ pip install 'docpull[mcp]'
|
|
|
222
222
|
docpull mcp # starts the stdio server
|
|
223
223
|
```
|
|
224
224
|
|
|
225
|
-
Add to Claude Desktop or Claude Code:
|
|
225
|
+
Add to Claude Desktop or Claude Code manually:
|
|
226
226
|
|
|
227
227
|
```json
|
|
228
228
|
{
|
|
@@ -235,13 +235,39 @@ Add to Claude Desktop or Claude Code:
|
|
|
235
235
|
}
|
|
236
236
|
```
|
|
237
237
|
|
|
238
|
-
|
|
238
|
+
Or, if you use Claude Code, install the plugin instead — it bundles the MCP
|
|
239
|
+
server, five slash commands (`/docs-add`, `/docs-search`, `/docs-list`,
|
|
240
|
+
`/docs-refresh`, `/docs-remove`), and a meta-skill that teaches Claude
|
|
241
|
+
when to reach for docpull automatically:
|
|
239
242
|
|
|
240
|
-
|
|
241
|
-
|
|
243
|
+
```bash
|
|
244
|
+
# 1. Install docpull with the MCP extra (required for the plugin)
|
|
245
|
+
pip install 'docpull[mcp]'
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
```
|
|
249
|
+
# 2. Then in Claude Code:
|
|
250
|
+
/plugin marketplace add raintree-technology/docpull
|
|
251
|
+
/plugin install docpull@docpull
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
See [plugin/README.md](plugin/README.md) for details.
|
|
255
|
+
|
|
256
|
+
Tools exposed (8 total — read tools advertise `readOnlyHint` so hosts that auto-approve safe tools won't prompt):
|
|
257
|
+
|
|
258
|
+
Read:
|
|
259
|
+
- `fetch_url(url, max_tokens?)` — one-shot fetch, no crawl. HTTPS-only, SSRF-validated.
|
|
242
260
|
- `list_sources(category?)` — show available aliases (react, nextjs, fastapi, …)
|
|
243
|
-
- `list_indexed()` — what has been fetched locally
|
|
244
|
-
- `grep_docs(pattern, library?)` — regex search across fetched Markdown
|
|
261
|
+
- `list_indexed()` — what has been fetched locally, with last-fetched age
|
|
262
|
+
- `grep_docs(pattern, library?, limit?, context?)` — regex search across fetched Markdown (length-capped + wall-clock budgeted to mitigate ReDoS)
|
|
263
|
+
- `read_doc(library, path, line_start?, line_end?)` — read a specific cached file, optionally line-sliced
|
|
264
|
+
|
|
265
|
+
Write:
|
|
266
|
+
- `ensure_docs(source, force?, profile?)` — fetch a named library (cached 7 days). Forwards progress to clients that supply a `progressToken`.
|
|
267
|
+
- `add_source(name, url, description?, category?, max_pages?, force?)` — register a user alias (HTTPS-only, atomic write to `sources.yaml`).
|
|
268
|
+
- `remove_source(name, delete_cache?)` — drop a user alias and (optionally) its cached docs.
|
|
269
|
+
|
|
270
|
+
All tools that carry data also return `structuredContent` validated against an `outputSchema` for clients that prefer typed output.
|
|
245
271
|
|
|
246
272
|
User-defined sources live in `~/.config/docpull-mcp/sources.yaml`:
|
|
247
273
|
|
|
@@ -140,7 +140,7 @@ pip install 'docpull[mcp]'
|
|
|
140
140
|
docpull mcp # starts the stdio server
|
|
141
141
|
```
|
|
142
142
|
|
|
143
|
-
Add to Claude Desktop or Claude Code:
|
|
143
|
+
Add to Claude Desktop or Claude Code manually:
|
|
144
144
|
|
|
145
145
|
```json
|
|
146
146
|
{
|
|
@@ -153,13 +153,39 @@ Add to Claude Desktop or Claude Code:
|
|
|
153
153
|
}
|
|
154
154
|
```
|
|
155
155
|
|
|
156
|
-
|
|
156
|
+
Or, if you use Claude Code, install the plugin instead — it bundles the MCP
|
|
157
|
+
server, five slash commands (`/docs-add`, `/docs-search`, `/docs-list`,
|
|
158
|
+
`/docs-refresh`, `/docs-remove`), and a meta-skill that teaches Claude
|
|
159
|
+
when to reach for docpull automatically:
|
|
157
160
|
|
|
158
|
-
|
|
159
|
-
|
|
161
|
+
```bash
|
|
162
|
+
# 1. Install docpull with the MCP extra (required for the plugin)
|
|
163
|
+
pip install 'docpull[mcp]'
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
```
|
|
167
|
+
# 2. Then in Claude Code:
|
|
168
|
+
/plugin marketplace add raintree-technology/docpull
|
|
169
|
+
/plugin install docpull@docpull
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
See [plugin/README.md](plugin/README.md) for details.
|
|
173
|
+
|
|
174
|
+
Tools exposed (8 total — read tools advertise `readOnlyHint` so hosts that auto-approve safe tools won't prompt):
|
|
175
|
+
|
|
176
|
+
Read:
|
|
177
|
+
- `fetch_url(url, max_tokens?)` — one-shot fetch, no crawl. HTTPS-only, SSRF-validated.
|
|
160
178
|
- `list_sources(category?)` — show available aliases (react, nextjs, fastapi, …)
|
|
161
|
-
- `list_indexed()` — what has been fetched locally
|
|
162
|
-
- `grep_docs(pattern, library?)` — regex search across fetched Markdown
|
|
179
|
+
- `list_indexed()` — what has been fetched locally, with last-fetched age
|
|
180
|
+
- `grep_docs(pattern, library?, limit?, context?)` — regex search across fetched Markdown (length-capped + wall-clock budgeted to mitigate ReDoS)
|
|
181
|
+
- `read_doc(library, path, line_start?, line_end?)` — read a specific cached file, optionally line-sliced
|
|
182
|
+
|
|
183
|
+
Write:
|
|
184
|
+
- `ensure_docs(source, force?, profile?)` — fetch a named library (cached 7 days). Forwards progress to clients that supply a `progressToken`.
|
|
185
|
+
- `add_source(name, url, description?, category?, max_pages?, force?)` — register a user alias (HTTPS-only, atomic write to `sources.yaml`).
|
|
186
|
+
- `remove_source(name, delete_cache?)` — drop a user alias and (optionally) its cached docs.
|
|
187
|
+
|
|
188
|
+
All tools that carry data also return `structuredContent` validated against an `outputSchema` for clients that prefer typed output.
|
|
163
189
|
|
|
164
190
|
User-defined sources live in `~/.config/docpull-mcp/sources.yaml`:
|
|
165
191
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "docpull"
|
|
7
|
-
version = "2.
|
|
7
|
+
version = "2.5.1"
|
|
8
8
|
dynamic = []
|
|
9
9
|
description = "Pull documentation from the web and convert to clean markdown"
|
|
10
10
|
readme = {file = "README.md", content-type = "text/markdown"}
|