docpull 2.5.0__tar.gz → 2.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docpull-2.5.0/src/docpull.egg-info → docpull-2.5.1}/PKG-INFO +33 -7
- {docpull-2.5.0 → docpull-2.5.1}/README.md +32 -6
- {docpull-2.5.0 → docpull-2.5.1}/pyproject.toml +1 -1
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/__init__.py +1 -1
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/mcp/server.py +12 -6
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/mcp/tools.py +18 -9
- {docpull-2.5.0 → docpull-2.5.1/src/docpull.egg-info}/PKG-INFO +33 -7
- {docpull-2.5.0 → docpull-2.5.1}/tests/test_mcp_tools.py +63 -1
- {docpull-2.5.0 → docpull-2.5.1}/LICENSE +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/setup.cfg +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/__main__.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/cache/__init__.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/cache/manager.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/cache/streaming_dedup.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/cli.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/concurrency/__init__.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/concurrency/manager.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/conversion/__init__.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/conversion/chunking.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/conversion/extractor.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/conversion/markdown.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/conversion/protocols.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/conversion/special_cases.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/conversion/trafilatura_extractor.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/core/__init__.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/core/fetcher.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/__init__.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/composite.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/crawler.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/filters.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/link_extractors/__init__.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/link_extractors/enhanced.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/link_extractors/protocols.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/link_extractors/static.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/protocols.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/sitemap.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/doctor.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/http/__init__.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/http/client.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/http/protocols.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/http/rate_limiter.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/logging_config.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/mcp/__init__.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/mcp/sources.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/metadata_extractor.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/models/__init__.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/models/config.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/models/events.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/models/profiles.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/__init__.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/base.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/__init__.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/chunk.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/convert.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/dedup.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/fetch.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/metadata.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/save.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/save_json.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/save_ndjson.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/save_sqlite.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/validate.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/py.typed +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/security/__init__.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/security/robots.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull/security/url_validator.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull.egg-info/SOURCES.txt +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull.egg-info/dependency_links.txt +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull.egg-info/entry_points.txt +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull.egg-info/requires.txt +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/src/docpull.egg-info/top_level.txt +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/tests/test_cache_conditional_get.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/tests/test_chunking.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/tests/test_cli.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/tests/test_convert_step_new.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/tests/test_fixes_v2_3_0.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/tests/test_link_extractors.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/tests/test_naming.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/tests/test_save_ndjson.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/tests/test_security_hardening.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/tests/test_special_cases.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/tests/test_v2_conversion.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/tests/test_v2_discovery.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/tests/test_v2_integration.py +0 -0
- {docpull-2.5.0 → docpull-2.5.1}/tests/test_v2_pipeline.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docpull
|
|
3
|
-
Version: 2.5.
|
|
3
|
+
Version: 2.5.1
|
|
4
4
|
Summary: Pull documentation from the web and convert to clean markdown
|
|
5
5
|
Author-email: Zachary Roth <support@raintree.technology>
|
|
6
6
|
Maintainer-email: Raintree Technology <support@raintree.technology>
|
|
@@ -222,7 +222,7 @@ pip install 'docpull[mcp]'
|
|
|
222
222
|
docpull mcp # starts the stdio server
|
|
223
223
|
```
|
|
224
224
|
|
|
225
|
-
Add to Claude Desktop or Claude Code:
|
|
225
|
+
Add to Claude Desktop or Claude Code manually:
|
|
226
226
|
|
|
227
227
|
```json
|
|
228
228
|
{
|
|
@@ -235,13 +235,39 @@ Add to Claude Desktop or Claude Code:
|
|
|
235
235
|
}
|
|
236
236
|
```
|
|
237
237
|
|
|
238
|
-
|
|
238
|
+
Or, if you use Claude Code, install the plugin instead — it bundles the MCP
|
|
239
|
+
server, five slash commands (`/docs-add`, `/docs-search`, `/docs-list`,
|
|
240
|
+
`/docs-refresh`, `/docs-remove`), and a meta-skill that teaches Claude
|
|
241
|
+
when to reach for docpull automatically:
|
|
239
242
|
|
|
240
|
-
|
|
241
|
-
|
|
243
|
+
```bash
|
|
244
|
+
# 1. Install docpull with the MCP extra (required for the plugin)
|
|
245
|
+
pip install 'docpull[mcp]'
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
```
|
|
249
|
+
# 2. Then in Claude Code:
|
|
250
|
+
/plugin marketplace add raintree-technology/docpull
|
|
251
|
+
/plugin install docpull@docpull
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
See [plugin/README.md](plugin/README.md) for details.
|
|
255
|
+
|
|
256
|
+
Tools exposed (8 total — read tools advertise `readOnlyHint` so hosts that auto-approve safe tools won't prompt):
|
|
257
|
+
|
|
258
|
+
Read:
|
|
259
|
+
- `fetch_url(url, max_tokens?)` — one-shot fetch, no crawl. HTTPS-only, SSRF-validated.
|
|
242
260
|
- `list_sources(category?)` — show available aliases (react, nextjs, fastapi, …)
|
|
243
|
-
- `list_indexed()` — what has been fetched locally
|
|
244
|
-
- `grep_docs(pattern, library?)` — regex search across fetched Markdown
|
|
261
|
+
- `list_indexed()` — what has been fetched locally, with last-fetched age
|
|
262
|
+
- `grep_docs(pattern, library?, limit?, context?)` — regex search across fetched Markdown (length-capped + wall-clock budgeted to mitigate ReDoS)
|
|
263
|
+
- `read_doc(library, path, line_start?, line_end?)` — read a specific cached file, optionally line-sliced
|
|
264
|
+
|
|
265
|
+
Write:
|
|
266
|
+
- `ensure_docs(source, force?, profile?)` — fetch a named library (cached 7 days). Forwards progress to clients that supply a `progressToken`.
|
|
267
|
+
- `add_source(name, url, description?, category?, max_pages?, force?)` — register a user alias (HTTPS-only, atomic write to `sources.yaml`).
|
|
268
|
+
- `remove_source(name, delete_cache?)` — drop a user alias and (optionally) its cached docs.
|
|
269
|
+
|
|
270
|
+
All tools that carry data also return `structuredContent` validated against an `outputSchema` for clients that prefer typed output.
|
|
245
271
|
|
|
246
272
|
User-defined sources live in `~/.config/docpull-mcp/sources.yaml`:
|
|
247
273
|
|
|
@@ -140,7 +140,7 @@ pip install 'docpull[mcp]'
|
|
|
140
140
|
docpull mcp # starts the stdio server
|
|
141
141
|
```
|
|
142
142
|
|
|
143
|
-
Add to Claude Desktop or Claude Code:
|
|
143
|
+
Add to Claude Desktop or Claude Code manually:
|
|
144
144
|
|
|
145
145
|
```json
|
|
146
146
|
{
|
|
@@ -153,13 +153,39 @@ Add to Claude Desktop or Claude Code:
|
|
|
153
153
|
}
|
|
154
154
|
```
|
|
155
155
|
|
|
156
|
-
|
|
156
|
+
Or, if you use Claude Code, install the plugin instead — it bundles the MCP
|
|
157
|
+
server, five slash commands (`/docs-add`, `/docs-search`, `/docs-list`,
|
|
158
|
+
`/docs-refresh`, `/docs-remove`), and a meta-skill that teaches Claude
|
|
159
|
+
when to reach for docpull automatically:
|
|
157
160
|
|
|
158
|
-
|
|
159
|
-
|
|
161
|
+
```bash
|
|
162
|
+
# 1. Install docpull with the MCP extra (required for the plugin)
|
|
163
|
+
pip install 'docpull[mcp]'
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
```
|
|
167
|
+
# 2. Then in Claude Code:
|
|
168
|
+
/plugin marketplace add raintree-technology/docpull
|
|
169
|
+
/plugin install docpull@docpull
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
See [plugin/README.md](plugin/README.md) for details.
|
|
173
|
+
|
|
174
|
+
Tools exposed (8 total — read tools advertise `readOnlyHint` so hosts that auto-approve safe tools won't prompt):
|
|
175
|
+
|
|
176
|
+
Read:
|
|
177
|
+
- `fetch_url(url, max_tokens?)` — one-shot fetch, no crawl. HTTPS-only, SSRF-validated.
|
|
160
178
|
- `list_sources(category?)` — show available aliases (react, nextjs, fastapi, …)
|
|
161
|
-
- `list_indexed()` — what has been fetched locally
|
|
162
|
-
- `grep_docs(pattern, library?)` — regex search across fetched Markdown
|
|
179
|
+
- `list_indexed()` — what has been fetched locally, with last-fetched age
|
|
180
|
+
- `grep_docs(pattern, library?, limit?, context?)` — regex search across fetched Markdown (length-capped + wall-clock budgeted to mitigate ReDoS)
|
|
181
|
+
- `read_doc(library, path, line_start?, line_end?)` — read a specific cached file, optionally line-sliced
|
|
182
|
+
|
|
183
|
+
Write:
|
|
184
|
+
- `ensure_docs(source, force?, profile?)` — fetch a named library (cached 7 days). Forwards progress to clients that supply a `progressToken`.
|
|
185
|
+
- `add_source(name, url, description?, category?, max_pages?, force?)` — register a user alias (HTTPS-only, atomic write to `sources.yaml`).
|
|
186
|
+
- `remove_source(name, delete_cache?)` — drop a user alias and (optionally) its cached docs.
|
|
187
|
+
|
|
188
|
+
All tools that carry data also return `structuredContent` validated against an `outputSchema` for clients that prefer typed output.
|
|
163
189
|
|
|
164
190
|
User-defined sources live in `~/.config/docpull-mcp/sources.yaml`:
|
|
165
191
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "docpull"
|
|
7
|
-
version = "2.5.
|
|
7
|
+
version = "2.5.1"
|
|
8
8
|
dynamic = []
|
|
9
9
|
description = "Pull documentation from the web and convert to clean markdown"
|
|
10
10
|
readme = {file = "README.md", content-type = "text/markdown"}
|
|
@@ -103,7 +103,11 @@ _GREP_DOCS_OUTPUT_SCHEMA = {
|
|
|
103
103
|
"items": {
|
|
104
104
|
"type": "object",
|
|
105
105
|
"properties": {
|
|
106
|
-
"
|
|
106
|
+
"library": {"type": "string"},
|
|
107
|
+
"path": {
|
|
108
|
+
"type": "string",
|
|
109
|
+
"description": "Relative to the library root; pass directly to read_doc",
|
|
110
|
+
},
|
|
107
111
|
"match_count": {"type": "integer"},
|
|
108
112
|
"matches": {
|
|
109
113
|
"type": "array",
|
|
@@ -119,7 +123,7 @@ _GREP_DOCS_OUTPUT_SCHEMA = {
|
|
|
119
123
|
},
|
|
120
124
|
},
|
|
121
125
|
},
|
|
122
|
-
"required": ["path", "match_count", "matches"],
|
|
126
|
+
"required": ["library", "path", "match_count", "matches"],
|
|
123
127
|
},
|
|
124
128
|
},
|
|
125
129
|
"truncated": {"type": "boolean"},
|
|
@@ -333,8 +337,9 @@ async def _run_stdio() -> int:
|
|
|
333
337
|
description=(
|
|
334
338
|
"Regex search through fetched Markdown. Results are ranked by "
|
|
335
339
|
"match density (most matches per file first) and rendered with "
|
|
336
|
-
"lines of surrounding context.
|
|
337
|
-
"
|
|
340
|
+
"lines of surrounding context. Each result returns the library "
|
|
341
|
+
"and a path relative to the library root, so you can feed both "
|
|
342
|
+
"fields straight into read_doc. Use ensure_docs first."
|
|
338
343
|
),
|
|
339
344
|
annotations=ToolAnnotations(
|
|
340
345
|
title="Regex-search cached docs",
|
|
@@ -370,8 +375,9 @@ async def _run_stdio() -> int:
|
|
|
370
375
|
name="read_doc",
|
|
371
376
|
description=(
|
|
372
377
|
"Read a Markdown file from a fetched library, optionally sliced "
|
|
373
|
-
"by line range. The natural follow-up to grep_docs: pass
|
|
374
|
-
"library
|
|
378
|
+
"by line range. The natural follow-up to grep_docs: pass each "
|
|
379
|
+
"result's library and path (path is already relative to the "
|
|
380
|
+
"library root) to pull more surrounding context."
|
|
375
381
|
),
|
|
376
382
|
annotations=ToolAnnotations(
|
|
377
383
|
title="Read a cached doc file",
|
|
@@ -392,9 +392,14 @@ class _FileHits:
|
|
|
392
392
|
|
|
393
393
|
Each match is ``(lineno, before_lines, hit_line, after_lines)`` where
|
|
394
394
|
``before_lines`` / ``after_lines`` are 0..context lines of context.
|
|
395
|
+
|
|
396
|
+
``library`` and ``path`` are split so that ``path`` is relative to the
|
|
397
|
+
library root and can be passed straight into ``read_doc`` alongside
|
|
398
|
+
``library``. Human-readable rendering still uses ``library/path``.
|
|
395
399
|
"""
|
|
396
400
|
|
|
397
|
-
|
|
401
|
+
library: str
|
|
402
|
+
path: str
|
|
398
403
|
matches: list[tuple[int, list[str], str, list[str]]]
|
|
399
404
|
|
|
400
405
|
|
|
@@ -483,7 +488,8 @@ def grep_docs(
|
|
|
483
488
|
if matches:
|
|
484
489
|
file_hits.append(
|
|
485
490
|
_FileHits(
|
|
486
|
-
|
|
491
|
+
library=root.name,
|
|
492
|
+
path=str(file.relative_to(root)),
|
|
487
493
|
matches=matches,
|
|
488
494
|
)
|
|
489
495
|
)
|
|
@@ -505,7 +511,7 @@ def grep_docs(
|
|
|
505
511
|
)
|
|
506
512
|
|
|
507
513
|
# Rank by raw count; tie-break alphabetically so output is stable.
|
|
508
|
-
file_hits.sort(key=lambda fh: (-len(fh.matches), fh.
|
|
514
|
+
file_hits.sort(key=lambda fh: (-len(fh.matches), fh.library, fh.path))
|
|
509
515
|
|
|
510
516
|
blocks: list[str] = []
|
|
511
517
|
files_payload: list[dict[str, Any]] = []
|
|
@@ -513,7 +519,8 @@ def grep_docs(
|
|
|
513
519
|
for fh in file_hits:
|
|
514
520
|
if rendered >= limit:
|
|
515
521
|
break
|
|
516
|
-
|
|
522
|
+
qualified = f"{fh.library}/{fh.path}"
|
|
523
|
+
block_lines = [f"## {qualified} ({len(fh.matches)} matches)"]
|
|
517
524
|
rendered_matches: list[dict[str, Any]] = []
|
|
518
525
|
for lineno, before, hit, after in fh.matches:
|
|
519
526
|
if rendered >= limit:
|
|
@@ -532,7 +539,8 @@ def grep_docs(
|
|
|
532
539
|
blocks.append("\n\n".join(block_lines))
|
|
533
540
|
files_payload.append(
|
|
534
541
|
{
|
|
535
|
-
"
|
|
542
|
+
"library": fh.library,
|
|
543
|
+
"path": fh.path,
|
|
536
544
|
"match_count": len(fh.matches),
|
|
537
545
|
"matches": rendered_matches,
|
|
538
546
|
}
|
|
@@ -568,10 +576,11 @@ def read_doc(
|
|
|
568
576
|
) -> ToolResult:
|
|
569
577
|
"""Read a Markdown file from a fetched library, optionally line-sliced.
|
|
570
578
|
|
|
571
|
-
The natural follow-up to ``grep_docs``:
|
|
572
|
-
and
|
|
573
|
-
|
|
574
|
-
|
|
579
|
+
The natural follow-up to ``grep_docs``: each grep result returns
|
|
580
|
+
``library`` and ``path`` (path relative to the library root), so
|
|
581
|
+
``read_doc(library=..., path=..., line_start=N-20, line_end=N+20)``
|
|
582
|
+
pulls the surrounding context. Path is validated against
|
|
583
|
+
``docs_dir / library`` to block traversal.
|
|
575
584
|
"""
|
|
576
585
|
docs_dir = docs_dir or default_docs_dir()
|
|
577
586
|
if not is_safe_library_name(library):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docpull
|
|
3
|
-
Version: 2.5.
|
|
3
|
+
Version: 2.5.1
|
|
4
4
|
Summary: Pull documentation from the web and convert to clean markdown
|
|
5
5
|
Author-email: Zachary Roth <support@raintree.technology>
|
|
6
6
|
Maintainer-email: Raintree Technology <support@raintree.technology>
|
|
@@ -222,7 +222,7 @@ pip install 'docpull[mcp]'
|
|
|
222
222
|
docpull mcp # starts the stdio server
|
|
223
223
|
```
|
|
224
224
|
|
|
225
|
-
Add to Claude Desktop or Claude Code:
|
|
225
|
+
Add to Claude Desktop or Claude Code manually:
|
|
226
226
|
|
|
227
227
|
```json
|
|
228
228
|
{
|
|
@@ -235,13 +235,39 @@ Add to Claude Desktop or Claude Code:
|
|
|
235
235
|
}
|
|
236
236
|
```
|
|
237
237
|
|
|
238
|
-
|
|
238
|
+
Or, if you use Claude Code, install the plugin instead — it bundles the MCP
|
|
239
|
+
server, five slash commands (`/docs-add`, `/docs-search`, `/docs-list`,
|
|
240
|
+
`/docs-refresh`, `/docs-remove`), and a meta-skill that teaches Claude
|
|
241
|
+
when to reach for docpull automatically:
|
|
239
242
|
|
|
240
|
-
|
|
241
|
-
|
|
243
|
+
```bash
|
|
244
|
+
# 1. Install docpull with the MCP extra (required for the plugin)
|
|
245
|
+
pip install 'docpull[mcp]'
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
```
|
|
249
|
+
# 2. Then in Claude Code:
|
|
250
|
+
/plugin marketplace add raintree-technology/docpull
|
|
251
|
+
/plugin install docpull@docpull
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
See [plugin/README.md](plugin/README.md) for details.
|
|
255
|
+
|
|
256
|
+
Tools exposed (8 total — read tools advertise `readOnlyHint` so hosts that auto-approve safe tools won't prompt):
|
|
257
|
+
|
|
258
|
+
Read:
|
|
259
|
+
- `fetch_url(url, max_tokens?)` — one-shot fetch, no crawl. HTTPS-only, SSRF-validated.
|
|
242
260
|
- `list_sources(category?)` — show available aliases (react, nextjs, fastapi, …)
|
|
243
|
-
- `list_indexed()` — what has been fetched locally
|
|
244
|
-
- `grep_docs(pattern, library?)` — regex search across fetched Markdown
|
|
261
|
+
- `list_indexed()` — what has been fetched locally, with last-fetched age
|
|
262
|
+
- `grep_docs(pattern, library?, limit?, context?)` — regex search across fetched Markdown (length-capped + wall-clock budgeted to mitigate ReDoS)
|
|
263
|
+
- `read_doc(library, path, line_start?, line_end?)` — read a specific cached file, optionally line-sliced
|
|
264
|
+
|
|
265
|
+
Write:
|
|
266
|
+
- `ensure_docs(source, force?, profile?)` — fetch a named library (cached 7 days). Forwards progress to clients that supply a `progressToken`.
|
|
267
|
+
- `add_source(name, url, description?, category?, max_pages?, force?)` — register a user alias (HTTPS-only, atomic write to `sources.yaml`).
|
|
268
|
+
- `remove_source(name, delete_cache?)` — drop a user alias and (optionally) its cached docs.
|
|
269
|
+
|
|
270
|
+
All tools that carry data also return `structuredContent` validated against an `outputSchema` for clients that prefer typed output.
|
|
245
271
|
|
|
246
272
|
User-defined sources live in `~/.config/docpull-mcp/sources.yaml`:
|
|
247
273
|
|
|
@@ -576,11 +576,73 @@ def test_grep_docs_structured_payload(tmp_path):
|
|
|
576
576
|
assert result.data["truncated"] is False
|
|
577
577
|
assert result.data["timed_out"] is False
|
|
578
578
|
files = result.data["files"]
|
|
579
|
-
|
|
579
|
+
# path is library-relative (no library prefix), library is its own field
|
|
580
|
+
assert files[0]["library"] == "lib"
|
|
581
|
+
assert files[0]["path"] == "a.md"
|
|
580
582
|
assert files[0]["matches"][0]["lineno"] == 2
|
|
581
583
|
assert files[0]["matches"][0]["line"] == "TARGET"
|
|
582
584
|
|
|
583
585
|
|
|
586
|
+
def test_grep_docs_path_is_library_relative_in_subdir(tmp_path):
|
|
587
|
+
"""Files nested inside a library should still report library-relative path."""
|
|
588
|
+
lib = tmp_path / "hono"
|
|
589
|
+
(lib / "middleware").mkdir(parents=True)
|
|
590
|
+
(lib / "middleware" / "basic-auth.md").write_text("alpha\nbasicAuth\nbravo")
|
|
591
|
+
result = grep_docs("basicAuth", docs_dir=tmp_path)
|
|
592
|
+
files = result.data["files"]
|
|
593
|
+
assert files[0]["library"] == "hono"
|
|
594
|
+
assert files[0]["path"] == "middleware/basic-auth.md"
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
def test_grep_to_read_doc_roundtrip(tmp_path):
|
|
598
|
+
"""Regression: grep_docs results must be directly usable as read_doc inputs.
|
|
599
|
+
|
|
600
|
+
The contract (per the read_doc tool description) is that an agent can
|
|
601
|
+
pass each grep result's `library` and `path` straight into read_doc.
|
|
602
|
+
Prior to this fix grep returned `<library>/<path>` and read_doc
|
|
603
|
+
re-prepended the library, producing a doubled prefix.
|
|
604
|
+
"""
|
|
605
|
+
lib = tmp_path / "hono"
|
|
606
|
+
(lib / "middleware").mkdir(parents=True)
|
|
607
|
+
(lib / "middleware" / "basic-auth.md").write_text(
|
|
608
|
+
"intro\nbasicAuth example\nmore text\nfinal line"
|
|
609
|
+
)
|
|
610
|
+
grep_result = grep_docs("basicAuth", docs_dir=tmp_path)
|
|
611
|
+
file_hit = grep_result.data["files"][0]
|
|
612
|
+
|
|
613
|
+
# Pass library + path verbatim — no manual munging.
|
|
614
|
+
read_result = read_doc(
|
|
615
|
+
file_hit["library"], file_hit["path"], docs_dir=tmp_path
|
|
616
|
+
)
|
|
617
|
+
assert read_result.is_error is False, read_result.text
|
|
618
|
+
assert "basicAuth example" in read_result.data["text"]
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
def test_grep_to_read_doc_roundtrip_with_line_slice(tmp_path):
|
|
622
|
+
"""Roundtrip should also work with line-range slicing around the hit."""
|
|
623
|
+
lib = tmp_path / "react"
|
|
624
|
+
lib.mkdir()
|
|
625
|
+
body = "\n".join(f"line{i}" for i in range(1, 21)) # line1..line20
|
|
626
|
+
body = body.replace("line10", "needle here")
|
|
627
|
+
(lib / "hooks.md").write_text(body)
|
|
628
|
+
|
|
629
|
+
grep_result = grep_docs("needle", docs_dir=tmp_path)
|
|
630
|
+
hit = grep_result.data["files"][0]
|
|
631
|
+
match = hit["matches"][0]
|
|
632
|
+
|
|
633
|
+
read_result = read_doc(
|
|
634
|
+
hit["library"],
|
|
635
|
+
hit["path"],
|
|
636
|
+
docs_dir=tmp_path,
|
|
637
|
+
line_start=match["lineno"] - 2,
|
|
638
|
+
line_end=match["lineno"] + 2,
|
|
639
|
+
)
|
|
640
|
+
assert read_result.is_error is False
|
|
641
|
+
assert "needle here" in read_result.data["text"]
|
|
642
|
+
assert read_result.data["line_start"] == 8
|
|
643
|
+
assert read_result.data["line_end"] == 12
|
|
644
|
+
|
|
645
|
+
|
|
584
646
|
def test_grep_docs_no_matches_structured_payload(tmp_path):
|
|
585
647
|
lib = tmp_path / "lib"
|
|
586
648
|
lib.mkdir()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|