docpull 2.4.0__tar.gz → 2.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {docpull-2.4.0/src/docpull.egg-info → docpull-2.5.1}/PKG-INFO +33 -7
  2. {docpull-2.4.0 → docpull-2.5.1}/README.md +32 -6
  3. {docpull-2.4.0 → docpull-2.5.1}/pyproject.toml +1 -1
  4. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/__init__.py +1 -1
  5. docpull-2.5.1/src/docpull/mcp/server.py +621 -0
  6. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/mcp/sources.py +18 -1
  7. docpull-2.5.1/src/docpull/mcp/tools.py +839 -0
  8. {docpull-2.4.0 → docpull-2.5.1/src/docpull.egg-info}/PKG-INFO +33 -7
  9. docpull-2.5.1/tests/test_mcp_tools.py +679 -0
  10. docpull-2.4.0/src/docpull/mcp/server.py +0 -200
  11. docpull-2.4.0/src/docpull/mcp/tools.py +0 -360
  12. docpull-2.4.0/tests/test_mcp_tools.py +0 -189
  13. {docpull-2.4.0 → docpull-2.5.1}/LICENSE +0 -0
  14. {docpull-2.4.0 → docpull-2.5.1}/setup.cfg +0 -0
  15. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/__main__.py +0 -0
  16. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/cache/__init__.py +0 -0
  17. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/cache/manager.py +0 -0
  18. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/cache/streaming_dedup.py +0 -0
  19. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/cli.py +0 -0
  20. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/concurrency/__init__.py +0 -0
  21. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/concurrency/manager.py +0 -0
  22. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/conversion/__init__.py +0 -0
  23. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/conversion/chunking.py +0 -0
  24. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/conversion/extractor.py +0 -0
  25. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/conversion/markdown.py +0 -0
  26. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/conversion/protocols.py +0 -0
  27. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/conversion/special_cases.py +0 -0
  28. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/conversion/trafilatura_extractor.py +0 -0
  29. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/core/__init__.py +0 -0
  30. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/core/fetcher.py +0 -0
  31. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/__init__.py +0 -0
  32. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/composite.py +0 -0
  33. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/crawler.py +0 -0
  34. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/filters.py +0 -0
  35. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/link_extractors/__init__.py +0 -0
  36. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/link_extractors/enhanced.py +0 -0
  37. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/link_extractors/protocols.py +0 -0
  38. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/link_extractors/static.py +0 -0
  39. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/protocols.py +0 -0
  40. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/discovery/sitemap.py +0 -0
  41. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/doctor.py +0 -0
  42. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/http/__init__.py +0 -0
  43. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/http/client.py +0 -0
  44. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/http/protocols.py +0 -0
  45. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/http/rate_limiter.py +0 -0
  46. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/logging_config.py +0 -0
  47. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/mcp/__init__.py +0 -0
  48. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/metadata_extractor.py +0 -0
  49. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/models/__init__.py +0 -0
  50. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/models/config.py +0 -0
  51. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/models/events.py +0 -0
  52. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/models/profiles.py +0 -0
  53. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/__init__.py +0 -0
  54. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/base.py +0 -0
  55. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/__init__.py +0 -0
  56. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/chunk.py +0 -0
  57. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/convert.py +0 -0
  58. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/dedup.py +0 -0
  59. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/fetch.py +0 -0
  60. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/metadata.py +0 -0
  61. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/save.py +0 -0
  62. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/save_json.py +0 -0
  63. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/save_ndjson.py +0 -0
  64. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/save_sqlite.py +0 -0
  65. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/pipeline/steps/validate.py +0 -0
  66. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/py.typed +0 -0
  67. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/security/__init__.py +0 -0
  68. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/security/robots.py +0 -0
  69. {docpull-2.4.0 → docpull-2.5.1}/src/docpull/security/url_validator.py +0 -0
  70. {docpull-2.4.0 → docpull-2.5.1}/src/docpull.egg-info/SOURCES.txt +0 -0
  71. {docpull-2.4.0 → docpull-2.5.1}/src/docpull.egg-info/dependency_links.txt +0 -0
  72. {docpull-2.4.0 → docpull-2.5.1}/src/docpull.egg-info/entry_points.txt +0 -0
  73. {docpull-2.4.0 → docpull-2.5.1}/src/docpull.egg-info/requires.txt +0 -0
  74. {docpull-2.4.0 → docpull-2.5.1}/src/docpull.egg-info/top_level.txt +0 -0
  75. {docpull-2.4.0 → docpull-2.5.1}/tests/test_cache_conditional_get.py +0 -0
  76. {docpull-2.4.0 → docpull-2.5.1}/tests/test_chunking.py +0 -0
  77. {docpull-2.4.0 → docpull-2.5.1}/tests/test_cli.py +0 -0
  78. {docpull-2.4.0 → docpull-2.5.1}/tests/test_convert_step_new.py +0 -0
  79. {docpull-2.4.0 → docpull-2.5.1}/tests/test_fixes_v2_3_0.py +0 -0
  80. {docpull-2.4.0 → docpull-2.5.1}/tests/test_link_extractors.py +0 -0
  81. {docpull-2.4.0 → docpull-2.5.1}/tests/test_naming.py +0 -0
  82. {docpull-2.4.0 → docpull-2.5.1}/tests/test_save_ndjson.py +0 -0
  83. {docpull-2.4.0 → docpull-2.5.1}/tests/test_security_hardening.py +0 -0
  84. {docpull-2.4.0 → docpull-2.5.1}/tests/test_special_cases.py +0 -0
  85. {docpull-2.4.0 → docpull-2.5.1}/tests/test_v2_conversion.py +0 -0
  86. {docpull-2.4.0 → docpull-2.5.1}/tests/test_v2_discovery.py +0 -0
  87. {docpull-2.4.0 → docpull-2.5.1}/tests/test_v2_integration.py +0 -0
  88. {docpull-2.4.0 → docpull-2.5.1}/tests/test_v2_pipeline.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docpull
3
- Version: 2.4.0
3
+ Version: 2.5.1
4
4
  Summary: Pull documentation from the web and convert to clean markdown
5
5
  Author-email: Zachary Roth <support@raintree.technology>
6
6
  Maintainer-email: Raintree Technology <support@raintree.technology>
@@ -222,7 +222,7 @@ pip install 'docpull[mcp]'
222
222
  docpull mcp # starts the stdio server
223
223
  ```
224
224
 
225
- Add to Claude Desktop or Claude Code:
225
+ Add to Claude Desktop or Claude Code manually:
226
226
 
227
227
  ```json
228
228
  {
@@ -235,13 +235,39 @@ Add to Claude Desktop or Claude Code:
235
235
  }
236
236
  ```
237
237
 
238
- Tools exposed:
238
+ Or, if you use Claude Code, install the plugin instead — it bundles the MCP
239
+ server, five slash commands (`/docs-add`, `/docs-search`, `/docs-list`,
240
+ `/docs-refresh`, `/docs-remove`), and a meta-skill that teaches Claude
241
+ when to reach for docpull automatically:
239
242
 
240
- - `fetch_url(url, max_tokens?)` — one-shot fetch, no crawl
241
- - `ensure_docs(source, force?)` fetch a named library (cached 7 days)
243
+ ```bash
244
+ # 1. Install docpull with the MCP extra (required for the plugin)
245
+ pip install 'docpull[mcp]'
246
+ ```
247
+
248
+ ```
249
+ # 2. Then in Claude Code:
250
+ /plugin marketplace add raintree-technology/docpull
251
+ /plugin install docpull@docpull
252
+ ```
253
+
254
+ See [plugin/README.md](plugin/README.md) for details.
255
+
256
+ Tools exposed (8 total — read tools advertise `readOnlyHint` so hosts that auto-approve safe tools won't prompt):
257
+
258
+ Read:
259
+ - `fetch_url(url, max_tokens?)` — one-shot fetch, no crawl. HTTPS-only, SSRF-validated.
242
260
  - `list_sources(category?)` — show available aliases (react, nextjs, fastapi, …)
243
- - `list_indexed()` — what has been fetched locally
244
- - `grep_docs(pattern, library?)` — regex search across fetched Markdown
261
+ - `list_indexed()` — what has been fetched locally, with last-fetched age
262
+ - `grep_docs(pattern, library?, limit?, context?)` — regex search across fetched Markdown (length-capped + wall-clock budgeted to mitigate ReDoS)
263
+ - `read_doc(library, path, line_start?, line_end?)` — read a specific cached file, optionally line-sliced
264
+
265
+ Write:
266
+ - `ensure_docs(source, force?, profile?)` — fetch a named library (cached 7 days). Forwards progress to clients that supply a `progressToken`.
267
+ - `add_source(name, url, description?, category?, max_pages?, force?)` — register a user alias (HTTPS-only, atomic write to `sources.yaml`).
268
+ - `remove_source(name, delete_cache?)` — drop a user alias and (optionally) its cached docs.
269
+
270
+ All tools that carry data also return `structuredContent` validated against an `outputSchema` for clients that prefer typed output.
245
271
 
246
272
  User-defined sources live in `~/.config/docpull-mcp/sources.yaml`:
247
273
 
@@ -140,7 +140,7 @@ pip install 'docpull[mcp]'
140
140
  docpull mcp # starts the stdio server
141
141
  ```
142
142
 
143
- Add to Claude Desktop or Claude Code:
143
+ Add to Claude Desktop or Claude Code manually:
144
144
 
145
145
  ```json
146
146
  {
@@ -153,13 +153,39 @@ Add to Claude Desktop or Claude Code:
153
153
  }
154
154
  ```
155
155
 
156
- Tools exposed:
156
+ Or, if you use Claude Code, install the plugin instead — it bundles the MCP
157
+ server, five slash commands (`/docs-add`, `/docs-search`, `/docs-list`,
158
+ `/docs-refresh`, `/docs-remove`), and a meta-skill that teaches Claude
159
+ when to reach for docpull automatically:
157
160
 
158
- - `fetch_url(url, max_tokens?)` — one-shot fetch, no crawl
159
- - `ensure_docs(source, force?)` fetch a named library (cached 7 days)
161
+ ```bash
162
+ # 1. Install docpull with the MCP extra (required for the plugin)
163
+ pip install 'docpull[mcp]'
164
+ ```
165
+
166
+ ```
167
+ # 2. Then in Claude Code:
168
+ /plugin marketplace add raintree-technology/docpull
169
+ /plugin install docpull@docpull
170
+ ```
171
+
172
+ See [plugin/README.md](plugin/README.md) for details.
173
+
174
+ Tools exposed (8 total — read tools advertise `readOnlyHint` so hosts that auto-approve safe tools won't prompt):
175
+
176
+ Read:
177
+ - `fetch_url(url, max_tokens?)` — one-shot fetch, no crawl. HTTPS-only, SSRF-validated.
160
178
  - `list_sources(category?)` — show available aliases (react, nextjs, fastapi, …)
161
- - `list_indexed()` — what has been fetched locally
162
- - `grep_docs(pattern, library?)` — regex search across fetched Markdown
179
+ - `list_indexed()` — what has been fetched locally, with last-fetched age
180
+ - `grep_docs(pattern, library?, limit?, context?)` — regex search across fetched Markdown (length-capped + wall-clock budgeted to mitigate ReDoS)
181
+ - `read_doc(library, path, line_start?, line_end?)` — read a specific cached file, optionally line-sliced
182
+
183
+ Write:
184
+ - `ensure_docs(source, force?, profile?)` — fetch a named library (cached 7 days). Forwards progress to clients that supply a `progressToken`.
185
+ - `add_source(name, url, description?, category?, max_pages?, force?)` — register a user alias (HTTPS-only, atomic write to `sources.yaml`).
186
+ - `remove_source(name, delete_cache?)` — drop a user alias and (optionally) its cached docs.
187
+
188
+ All tools that carry data also return `structuredContent` validated against an `outputSchema` for clients that prefer typed output.
163
189
 
164
190
  User-defined sources live in `~/.config/docpull-mcp/sources.yaml`:
165
191
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "docpull"
7
- version = "2.4.0"
7
+ version = "2.5.1"
8
8
  dynamic = []
9
9
  description = "Pull documentation from the web and convert to clean markdown"
10
10
  readme = {file = "README.md", content-type = "text/markdown"}
@@ -14,7 +14,7 @@ Usage:
14
14
  print(event)
15
15
  """
16
16
 
17
- __version__ = "2.4.0"
17
+ __version__ = "2.5.1"
18
18
 
19
19
  from .cache import CacheManager, StreamingDeduplicator
20
20
  from .conversion.chunking import Chunk, TokenCounter, chunk_markdown