docpull 2.5.0__tar.gz → 2.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. {docpull-2.5.0/src/docpull.egg-info → docpull-2.5.1}/PKG-INFO +33 -7
  2. {docpull-2.5.0 → docpull-2.5.1}/README.md +32 -6
  3. {docpull-2.5.0 → docpull-2.5.1}/pyproject.toml +1 -1
  4. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/__init__.py +1 -1
  5. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/mcp/server.py +12 -6
  6. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/mcp/tools.py +18 -9
  7. {docpull-2.5.0 → docpull-2.5.1/src/docpull.egg-info}/PKG-INFO +33 -7
  8. {docpull-2.5.0 → docpull-2.5.1}/tests/test_mcp_tools.py +63 -1
  9. {docpull-2.5.0 → docpull-2.5.1}/LICENSE +0 -0
  10. {docpull-2.5.0 → docpull-2.5.1}/setup.cfg +0 -0
  11. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/__main__.py +0 -0
  12. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/cache/__init__.py +0 -0
  13. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/cache/manager.py +0 -0
  14. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/cache/streaming_dedup.py +0 -0
  15. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/cli.py +0 -0
  16. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/concurrency/__init__.py +0 -0
  17. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/concurrency/manager.py +0 -0
  18. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/conversion/__init__.py +0 -0
  19. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/conversion/chunking.py +0 -0
  20. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/conversion/extractor.py +0 -0
  21. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/conversion/markdown.py +0 -0
  22. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/conversion/protocols.py +0 -0
  23. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/conversion/special_cases.py +0 -0
  24. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/conversion/trafilatura_extractor.py +0 -0
  25. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/core/__init__.py +0 -0
  26. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/core/fetcher.py +0 -0
  27. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/__init__.py +0 -0
  28. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/composite.py +0 -0
  29. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/crawler.py +0 -0
  30. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/filters.py +0 -0
  31. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/link_extractors/__init__.py +0 -0
  32. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/link_extractors/enhanced.py +0 -0
  33. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/link_extractors/protocols.py +0 -0
  34. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/link_extractors/static.py +0 -0
  35. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/protocols.py +0 -0
  36. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/discovery/sitemap.py +0 -0
  37. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/doctor.py +0 -0
  38. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/http/__init__.py +0 -0
  39. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/http/client.py +0 -0
  40. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/http/protocols.py +0 -0
  41. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/http/rate_limiter.py +0 -0
  42. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/logging_config.py +0 -0
  43. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/mcp/__init__.py +0 -0
  44. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/mcp/sources.py +0 -0
  45. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/metadata_extractor.py +0 -0
  46. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/models/__init__.py +0 -0
  47. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/models/config.py +0 -0
  48. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/models/events.py +0 -0
  49. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/models/profiles.py +0 -0
  50. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/__init__.py +0 -0
  51. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/base.py +0 -0
  52. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/__init__.py +0 -0
  53. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/chunk.py +0 -0
  54. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/convert.py +0 -0
  55. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/dedup.py +0 -0
  56. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/fetch.py +0 -0
  57. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/metadata.py +0 -0
  58. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/save.py +0 -0
  59. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/save_json.py +0 -0
  60. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/save_ndjson.py +0 -0
  61. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/save_sqlite.py +0 -0
  62. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/pipeline/steps/validate.py +0 -0
  63. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/py.typed +0 -0
  64. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/security/__init__.py +0 -0
  65. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/security/robots.py +0 -0
  66. {docpull-2.5.0 → docpull-2.5.1}/src/docpull/security/url_validator.py +0 -0
  67. {docpull-2.5.0 → docpull-2.5.1}/src/docpull.egg-info/SOURCES.txt +0 -0
  68. {docpull-2.5.0 → docpull-2.5.1}/src/docpull.egg-info/dependency_links.txt +0 -0
  69. {docpull-2.5.0 → docpull-2.5.1}/src/docpull.egg-info/entry_points.txt +0 -0
  70. {docpull-2.5.0 → docpull-2.5.1}/src/docpull.egg-info/requires.txt +0 -0
  71. {docpull-2.5.0 → docpull-2.5.1}/src/docpull.egg-info/top_level.txt +0 -0
  72. {docpull-2.5.0 → docpull-2.5.1}/tests/test_cache_conditional_get.py +0 -0
  73. {docpull-2.5.0 → docpull-2.5.1}/tests/test_chunking.py +0 -0
  74. {docpull-2.5.0 → docpull-2.5.1}/tests/test_cli.py +0 -0
  75. {docpull-2.5.0 → docpull-2.5.1}/tests/test_convert_step_new.py +0 -0
  76. {docpull-2.5.0 → docpull-2.5.1}/tests/test_fixes_v2_3_0.py +0 -0
  77. {docpull-2.5.0 → docpull-2.5.1}/tests/test_link_extractors.py +0 -0
  78. {docpull-2.5.0 → docpull-2.5.1}/tests/test_naming.py +0 -0
  79. {docpull-2.5.0 → docpull-2.5.1}/tests/test_save_ndjson.py +0 -0
  80. {docpull-2.5.0 → docpull-2.5.1}/tests/test_security_hardening.py +0 -0
  81. {docpull-2.5.0 → docpull-2.5.1}/tests/test_special_cases.py +0 -0
  82. {docpull-2.5.0 → docpull-2.5.1}/tests/test_v2_conversion.py +0 -0
  83. {docpull-2.5.0 → docpull-2.5.1}/tests/test_v2_discovery.py +0 -0
  84. {docpull-2.5.0 → docpull-2.5.1}/tests/test_v2_integration.py +0 -0
  85. {docpull-2.5.0 → docpull-2.5.1}/tests/test_v2_pipeline.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docpull
3
- Version: 2.5.0
3
+ Version: 2.5.1
4
4
  Summary: Pull documentation from the web and convert to clean markdown
5
5
  Author-email: Zachary Roth <support@raintree.technology>
6
6
  Maintainer-email: Raintree Technology <support@raintree.technology>
@@ -222,7 +222,7 @@ pip install 'docpull[mcp]'
222
222
  docpull mcp # starts the stdio server
223
223
  ```
224
224
 
225
- Add to Claude Desktop or Claude Code:
225
+ Add to Claude Desktop or Claude Code manually:
226
226
 
227
227
  ```json
228
228
  {
@@ -235,13 +235,39 @@ Add to Claude Desktop or Claude Code:
235
235
  }
236
236
  ```
237
237
 
238
- Tools exposed:
238
+ Or, if you use Claude Code, install the plugin instead — it bundles the MCP
239
+ server, five slash commands (`/docs-add`, `/docs-search`, `/docs-list`,
240
+ `/docs-refresh`, `/docs-remove`), and a meta-skill that teaches Claude
241
+ when to reach for docpull automatically:
239
242
 
240
- - `fetch_url(url, max_tokens?)` — one-shot fetch, no crawl
241
- - `ensure_docs(source, force?)` fetch a named library (cached 7 days)
243
+ ```bash
244
+ # 1. Install docpull with the MCP extra (required for the plugin)
245
+ pip install 'docpull[mcp]'
246
+ ```
247
+
248
+ ```
249
+ # 2. Then in Claude Code:
250
+ /plugin marketplace add raintree-technology/docpull
251
+ /plugin install docpull@docpull
252
+ ```
253
+
254
+ See [plugin/README.md](plugin/README.md) for details.
255
+
256
+ Tools exposed (8 total — read tools advertise `readOnlyHint` so hosts that auto-approve safe tools won't prompt):
257
+
258
+ Read:
259
+ - `fetch_url(url, max_tokens?)` — one-shot fetch, no crawl. HTTPS-only, SSRF-validated.
242
260
  - `list_sources(category?)` — show available aliases (react, nextjs, fastapi, …)
243
- - `list_indexed()` — what has been fetched locally
244
- - `grep_docs(pattern, library?)` — regex search across fetched Markdown
261
+ - `list_indexed()` — what has been fetched locally, with last-fetched age
262
+ - `grep_docs(pattern, library?, limit?, context?)` — regex search across fetched Markdown (length-capped + wall-clock budgeted to mitigate ReDoS)
263
+ - `read_doc(library, path, line_start?, line_end?)` — read a specific cached file, optionally line-sliced
264
+
265
+ Write:
266
+ - `ensure_docs(source, force?, profile?)` — fetch a named library (cached 7 days). Forwards progress to clients that supply a `progressToken`.
267
+ - `add_source(name, url, description?, category?, max_pages?, force?)` — register a user alias (HTTPS-only, atomic write to `sources.yaml`).
268
+ - `remove_source(name, delete_cache?)` — drop a user alias and (optionally) its cached docs.
269
+
270
+ All tools that carry data also return `structuredContent` validated against an `outputSchema` for clients that prefer typed output.
245
271
 
246
272
  User-defined sources live in `~/.config/docpull-mcp/sources.yaml`:
247
273
 
@@ -140,7 +140,7 @@ pip install 'docpull[mcp]'
140
140
  docpull mcp # starts the stdio server
141
141
  ```
142
142
 
143
- Add to Claude Desktop or Claude Code:
143
+ Add to Claude Desktop or Claude Code manually:
144
144
 
145
145
  ```json
146
146
  {
@@ -153,13 +153,39 @@ Add to Claude Desktop or Claude Code:
153
153
  }
154
154
  ```
155
155
 
156
- Tools exposed:
156
+ Or, if you use Claude Code, install the plugin instead — it bundles the MCP
157
+ server, five slash commands (`/docs-add`, `/docs-search`, `/docs-list`,
158
+ `/docs-refresh`, `/docs-remove`), and a meta-skill that teaches Claude
159
+ when to reach for docpull automatically:
157
160
 
158
- - `fetch_url(url, max_tokens?)` — one-shot fetch, no crawl
159
- - `ensure_docs(source, force?)` fetch a named library (cached 7 days)
161
+ ```bash
162
+ # 1. Install docpull with the MCP extra (required for the plugin)
163
+ pip install 'docpull[mcp]'
164
+ ```
165
+
166
+ ```
167
+ # 2. Then in Claude Code:
168
+ /plugin marketplace add raintree-technology/docpull
169
+ /plugin install docpull@docpull
170
+ ```
171
+
172
+ See [plugin/README.md](plugin/README.md) for details.
173
+
174
+ Tools exposed (8 total — read tools advertise `readOnlyHint` so hosts that auto-approve safe tools won't prompt):
175
+
176
+ Read:
177
+ - `fetch_url(url, max_tokens?)` — one-shot fetch, no crawl. HTTPS-only, SSRF-validated.
160
178
  - `list_sources(category?)` — show available aliases (react, nextjs, fastapi, …)
161
- - `list_indexed()` — what has been fetched locally
162
- - `grep_docs(pattern, library?)` — regex search across fetched Markdown
179
+ - `list_indexed()` — what has been fetched locally, with last-fetched age
180
+ - `grep_docs(pattern, library?, limit?, context?)` — regex search across fetched Markdown (length-capped + wall-clock budgeted to mitigate ReDoS)
181
+ - `read_doc(library, path, line_start?, line_end?)` — read a specific cached file, optionally line-sliced
182
+
183
+ Write:
184
+ - `ensure_docs(source, force?, profile?)` — fetch a named library (cached 7 days). Forwards progress to clients that supply a `progressToken`.
185
+ - `add_source(name, url, description?, category?, max_pages?, force?)` — register a user alias (HTTPS-only, atomic write to `sources.yaml`).
186
+ - `remove_source(name, delete_cache?)` — drop a user alias and (optionally) its cached docs.
187
+
188
+ All tools that carry data also return `structuredContent` validated against an `outputSchema` for clients that prefer typed output.
163
189
 
164
190
  User-defined sources live in `~/.config/docpull-mcp/sources.yaml`:
165
191
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "docpull"
7
- version = "2.5.0"
7
+ version = "2.5.1"
8
8
  dynamic = []
9
9
  description = "Pull documentation from the web and convert to clean markdown"
10
10
  readme = {file = "README.md", content-type = "text/markdown"}
@@ -14,7 +14,7 @@ Usage:
14
14
  print(event)
15
15
  """
16
16
 
17
- __version__ = "2.5.0"
17
+ __version__ = "2.5.1"
18
18
 
19
19
  from .cache import CacheManager, StreamingDeduplicator
20
20
  from .conversion.chunking import Chunk, TokenCounter, chunk_markdown
@@ -103,7 +103,11 @@ _GREP_DOCS_OUTPUT_SCHEMA = {
103
103
  "items": {
104
104
  "type": "object",
105
105
  "properties": {
106
- "path": {"type": "string"},
106
+ "library": {"type": "string"},
107
+ "path": {
108
+ "type": "string",
109
+ "description": "Relative to the library root; pass directly to read_doc",
110
+ },
107
111
  "match_count": {"type": "integer"},
108
112
  "matches": {
109
113
  "type": "array",
@@ -119,7 +123,7 @@ _GREP_DOCS_OUTPUT_SCHEMA = {
119
123
  },
120
124
  },
121
125
  },
122
- "required": ["path", "match_count", "matches"],
126
+ "required": ["library", "path", "match_count", "matches"],
123
127
  },
124
128
  },
125
129
  "truncated": {"type": "boolean"},
@@ -333,8 +337,9 @@ async def _run_stdio() -> int:
333
337
  description=(
334
338
  "Regex search through fetched Markdown. Results are ranked by "
335
339
  "match density (most matches per file first) and rendered with "
336
- "lines of surrounding context. Use ensure_docs first; then "
337
- "read_doc to pull more context around a hit."
340
+ "lines of surrounding context. Each result returns the library "
341
+ "and a path relative to the library root, so you can feed both "
342
+ "fields straight into read_doc. Use ensure_docs first."
338
343
  ),
339
344
  annotations=ToolAnnotations(
340
345
  title="Regex-search cached docs",
@@ -370,8 +375,9 @@ async def _run_stdio() -> int:
370
375
  name="read_doc",
371
376
  description=(
372
377
  "Read a Markdown file from a fetched library, optionally sliced "
373
- "by line range. The natural follow-up to grep_docs: pass the "
374
- "library + path it returned to pull more surrounding context."
378
+ "by line range. The natural follow-up to grep_docs: pass each "
379
+ "result's library and path (path is already relative to the "
380
+ "library root) to pull more surrounding context."
375
381
  ),
376
382
  annotations=ToolAnnotations(
377
383
  title="Read a cached doc file",
@@ -392,9 +392,14 @@ class _FileHits:
392
392
 
393
393
  Each match is ``(lineno, before_lines, hit_line, after_lines)`` where
394
394
  ``before_lines`` / ``after_lines`` are 0..context lines of context.
395
+
396
+ ``library`` and ``path`` are split so that ``path`` is relative to the
397
+ library root and can be passed straight into ``read_doc`` alongside
398
+ ``library``. Human-readable rendering still uses ``library/path``.
395
399
  """
396
400
 
397
- rel_path: str
401
+ library: str
402
+ path: str
398
403
  matches: list[tuple[int, list[str], str, list[str]]]
399
404
 
400
405
 
@@ -483,7 +488,8 @@ def grep_docs(
483
488
  if matches:
484
489
  file_hits.append(
485
490
  _FileHits(
486
- rel_path=str(file.relative_to(docs_dir)),
491
+ library=root.name,
492
+ path=str(file.relative_to(root)),
487
493
  matches=matches,
488
494
  )
489
495
  )
@@ -505,7 +511,7 @@ def grep_docs(
505
511
  )
506
512
 
507
513
  # Rank by raw count; tie-break alphabetically so output is stable.
508
- file_hits.sort(key=lambda fh: (-len(fh.matches), fh.rel_path))
514
+ file_hits.sort(key=lambda fh: (-len(fh.matches), fh.library, fh.path))
509
515
 
510
516
  blocks: list[str] = []
511
517
  files_payload: list[dict[str, Any]] = []
@@ -513,7 +519,8 @@ def grep_docs(
513
519
  for fh in file_hits:
514
520
  if rendered >= limit:
515
521
  break
516
- block_lines = [f"## {fh.rel_path} ({len(fh.matches)} matches)"]
522
+ qualified = f"{fh.library}/{fh.path}"
523
+ block_lines = [f"## {qualified} ({len(fh.matches)} matches)"]
517
524
  rendered_matches: list[dict[str, Any]] = []
518
525
  for lineno, before, hit, after in fh.matches:
519
526
  if rendered >= limit:
@@ -532,7 +539,8 @@ def grep_docs(
532
539
  blocks.append("\n\n".join(block_lines))
533
540
  files_payload.append(
534
541
  {
535
- "path": fh.rel_path,
542
+ "library": fh.library,
543
+ "path": fh.path,
536
544
  "match_count": len(fh.matches),
537
545
  "matches": rendered_matches,
538
546
  }
@@ -568,10 +576,11 @@ def read_doc(
568
576
  ) -> ToolResult:
569
577
  """Read a Markdown file from a fetched library, optionally line-sliced.
570
578
 
571
- The natural follow-up to ``grep_docs``: once you have ``library/path.md``
572
- and a line number, ``read_doc(library, path, line_start=N-20, line_end=N+20)``
573
- pulls the surrounding context without filesystem access. Path is validated
574
- against ``docs_dir / library`` to block traversal.
579
+ The natural follow-up to ``grep_docs``: each grep result returns
580
+ ``library`` and ``path`` (path relative to the library root), so
581
+ ``read_doc(library=..., path=..., line_start=N-20, line_end=N+20)``
582
+ pulls the surrounding context. Path is validated against
583
+ ``docs_dir / library`` to block traversal.
575
584
  """
576
585
  docs_dir = docs_dir or default_docs_dir()
577
586
  if not is_safe_library_name(library):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docpull
3
- Version: 2.5.0
3
+ Version: 2.5.1
4
4
  Summary: Pull documentation from the web and convert to clean markdown
5
5
  Author-email: Zachary Roth <support@raintree.technology>
6
6
  Maintainer-email: Raintree Technology <support@raintree.technology>
@@ -222,7 +222,7 @@ pip install 'docpull[mcp]'
222
222
  docpull mcp # starts the stdio server
223
223
  ```
224
224
 
225
- Add to Claude Desktop or Claude Code:
225
+ Add to Claude Desktop or Claude Code manually:
226
226
 
227
227
  ```json
228
228
  {
@@ -235,13 +235,39 @@ Add to Claude Desktop or Claude Code:
235
235
  }
236
236
  ```
237
237
 
238
- Tools exposed:
238
+ Or, if you use Claude Code, install the plugin instead — it bundles the MCP
239
+ server, five slash commands (`/docs-add`, `/docs-search`, `/docs-list`,
240
+ `/docs-refresh`, `/docs-remove`), and a meta-skill that teaches Claude
241
+ when to reach for docpull automatically:
239
242
 
240
- - `fetch_url(url, max_tokens?)` — one-shot fetch, no crawl
241
- - `ensure_docs(source, force?)` fetch a named library (cached 7 days)
243
+ ```bash
244
+ # 1. Install docpull with the MCP extra (required for the plugin)
245
+ pip install 'docpull[mcp]'
246
+ ```
247
+
248
+ ```
249
+ # 2. Then in Claude Code:
250
+ /plugin marketplace add raintree-technology/docpull
251
+ /plugin install docpull@docpull
252
+ ```
253
+
254
+ See [plugin/README.md](plugin/README.md) for details.
255
+
256
+ Tools exposed (8 total — read tools advertise `readOnlyHint` so hosts that auto-approve safe tools won't prompt):
257
+
258
+ Read:
259
+ - `fetch_url(url, max_tokens?)` — one-shot fetch, no crawl. HTTPS-only, SSRF-validated.
242
260
  - `list_sources(category?)` — show available aliases (react, nextjs, fastapi, …)
243
- - `list_indexed()` — what has been fetched locally
244
- - `grep_docs(pattern, library?)` — regex search across fetched Markdown
261
+ - `list_indexed()` — what has been fetched locally, with last-fetched age
262
+ - `grep_docs(pattern, library?, limit?, context?)` — regex search across fetched Markdown (length-capped + wall-clock budgeted to mitigate ReDoS)
263
+ - `read_doc(library, path, line_start?, line_end?)` — read a specific cached file, optionally line-sliced
264
+
265
+ Write:
266
+ - `ensure_docs(source, force?, profile?)` — fetch a named library (cached 7 days). Forwards progress to clients that supply a `progressToken`.
267
+ - `add_source(name, url, description?, category?, max_pages?, force?)` — register a user alias (HTTPS-only, atomic write to `sources.yaml`).
268
+ - `remove_source(name, delete_cache?)` — drop a user alias and (optionally) its cached docs.
269
+
270
+ All tools that carry data also return `structuredContent` validated against an `outputSchema` for clients that prefer typed output.
245
271
 
246
272
  User-defined sources live in `~/.config/docpull-mcp/sources.yaml`:
247
273
 
@@ -576,11 +576,73 @@ def test_grep_docs_structured_payload(tmp_path):
576
576
  assert result.data["truncated"] is False
577
577
  assert result.data["timed_out"] is False
578
578
  files = result.data["files"]
579
- assert files[0]["path"].endswith("a.md")
579
+ # path is library-relative (no library prefix), library is its own field
580
+ assert files[0]["library"] == "lib"
581
+ assert files[0]["path"] == "a.md"
580
582
  assert files[0]["matches"][0]["lineno"] == 2
581
583
  assert files[0]["matches"][0]["line"] == "TARGET"
582
584
 
583
585
 
586
+ def test_grep_docs_path_is_library_relative_in_subdir(tmp_path):
587
+ """Files nested inside a library should still report library-relative path."""
588
+ lib = tmp_path / "hono"
589
+ (lib / "middleware").mkdir(parents=True)
590
+ (lib / "middleware" / "basic-auth.md").write_text("alpha\nbasicAuth\nbravo")
591
+ result = grep_docs("basicAuth", docs_dir=tmp_path)
592
+ files = result.data["files"]
593
+ assert files[0]["library"] == "hono"
594
+ assert files[0]["path"] == "middleware/basic-auth.md"
595
+
596
+
597
+ def test_grep_to_read_doc_roundtrip(tmp_path):
598
+ """Regression: grep_docs results must be directly usable as read_doc inputs.
599
+
600
+ The contract (per the read_doc tool description) is that an agent can
601
+ pass each grep result's `library` and `path` straight into read_doc.
602
+ Prior to this fix grep returned `<library>/<path>` and read_doc
603
+ re-prepended the library, producing a doubled prefix.
604
+ """
605
+ lib = tmp_path / "hono"
606
+ (lib / "middleware").mkdir(parents=True)
607
+ (lib / "middleware" / "basic-auth.md").write_text(
608
+ "intro\nbasicAuth example\nmore text\nfinal line"
609
+ )
610
+ grep_result = grep_docs("basicAuth", docs_dir=tmp_path)
611
+ file_hit = grep_result.data["files"][0]
612
+
613
+ # Pass library + path verbatim — no manual munging.
614
+ read_result = read_doc(
615
+ file_hit["library"], file_hit["path"], docs_dir=tmp_path
616
+ )
617
+ assert read_result.is_error is False, read_result.text
618
+ assert "basicAuth example" in read_result.data["text"]
619
+
620
+
621
+ def test_grep_to_read_doc_roundtrip_with_line_slice(tmp_path):
622
+ """Roundtrip should also work with line-range slicing around the hit."""
623
+ lib = tmp_path / "react"
624
+ lib.mkdir()
625
+ body = "\n".join(f"line{i}" for i in range(1, 21)) # line1..line20
626
+ body = body.replace("line10", "needle here")
627
+ (lib / "hooks.md").write_text(body)
628
+
629
+ grep_result = grep_docs("needle", docs_dir=tmp_path)
630
+ hit = grep_result.data["files"][0]
631
+ match = hit["matches"][0]
632
+
633
+ read_result = read_doc(
634
+ hit["library"],
635
+ hit["path"],
636
+ docs_dir=tmp_path,
637
+ line_start=match["lineno"] - 2,
638
+ line_end=match["lineno"] + 2,
639
+ )
640
+ assert read_result.is_error is False
641
+ assert "needle here" in read_result.data["text"]
642
+ assert read_result.data["line_start"] == 8
643
+ assert read_result.data["line_end"] == 12
644
+
645
+
584
646
  def test_grep_docs_no_matches_structured_payload(tmp_path):
585
647
  lib = tmp_path / "lib"
586
648
  lib.mkdir()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes