doculayer 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
doculayer/__init__.py ADDED
@@ -0,0 +1,142 @@
1
+ """DocuLayer: live documentation access layer for AI agents.
2
+
3
+ Zero hallucination — content is fetched verbatim from source, never generated.
4
+ No disk storage — ephemeral in-memory TTL cache only.
5
+ Quick access — llms.txt-first with BM25 section search.
6
+
7
+ Quick start
8
+ -----------
9
+ import asyncio
10
+ from doculayer import search, fetch
11
+
12
+ results = asyncio.run(search("dependency injection", "fastapi"))
13
+ for r in results:
14
+ print(r.section.source_url, r.score)
15
+ print(r.section.trimmed(200))
16
+
17
+ content = asyncio.run(fetch("httpx", section="AsyncClient"))
18
+ print(content)
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from .config import DocuLayerConfig, get_config
24
+ from .core.citation import attr_header
25
+ from .core.fetcher import DocFetcher, FetchResult
26
+ from .core.parser import DocParser, DocSection
27
+ from .core.search import DocSearcher, SearchResult
28
+ from .sources.generic import GenericDocSource
29
+ from .sources.registry import resolve_identifier
30
+
31
+ __version__ = "0.1.0"
32
+ __all__ = [
33
+ "DocuLayerConfig",
34
+ "get_config",
35
+ "DocFetcher",
36
+ "FetchResult",
37
+ "DocParser",
38
+ "DocSection",
39
+ "DocSearcher",
40
+ "SearchResult",
41
+ "GenericDocSource",
42
+ "resolve_identifier",
43
+ "search",
44
+ "fetch",
45
+ ]
46
+
47
+ # ── module-level singleton fetcher ────────────────────────────────────────────
48
+
49
+ _fetcher: DocFetcher | None = None
50
+
51
+
52
+ def _get_fetcher() -> DocFetcher:
53
+ global _fetcher
54
+ if _fetcher is None:
55
+ cfg = get_config()
56
+ _fetcher = DocFetcher(
57
+ ttl=cfg.cache_ttl,
58
+ max_size=cfg.max_cache_size,
59
+ timeout=cfg.fetch_timeout,
60
+ max_bytes=cfg.max_fetch_bytes,
61
+ )
62
+ return _fetcher
63
+
64
+
65
+ # ── high-level async API ──────────────────────────────────────────────────────
66
+
67
+ async def search(
68
+ query: str,
69
+ identifier: str,
70
+ top_k: int = 5,
71
+ ) -> list[SearchResult]:
72
+ """Search live documentation for *query*.
73
+
74
+ Args:
75
+ query: What to find — e.g. ``"dependency injection"``.
76
+ identifier: Package name or URL — ``"fastapi"``, ``"pypi:httpx"``,
77
+ ``"npm:react"``, ``"https://docs.example.com"``.
78
+ top_k: Maximum number of sections to return.
79
+
80
+ Returns:
81
+ List of :class:`SearchResult` objects. Every section is verbatim
82
+ text from the source — ``section.source_url`` identifies the origin.
83
+
84
+ Raises:
85
+ ValueError: if *identifier* cannot be resolved to a URL.
86
+ """
87
+ url = await resolve_identifier(identifier)
88
+ if not url:
89
+ raise ValueError(f"Cannot resolve documentation source: {identifier!r}")
90
+ return await GenericDocSource(url, _get_fetcher(), label=identifier).search(
91
+ query, top_k
92
+ )
93
+
94
+
95
+ async def fetch(
96
+ identifier: str,
97
+ section: str | None = None,
98
+ ) -> str:
99
+ """Fetch documentation for *identifier*.
100
+
101
+ Args:
102
+ identifier: Package name or URL.
103
+ section: Optional heading to extract — e.g. ``"Installation"``.
104
+
105
+ Returns:
106
+ Markdown string with a source attribution header.
107
+ Content is verbatim from the fetched page.
108
+
109
+ Raises:
110
+ ValueError: if *identifier* cannot be resolved.
111
+ """
112
+ url = await resolve_identifier(identifier)
113
+ if not url:
114
+ raise ValueError(f"Cannot resolve documentation source: {identifier!r}")
115
+
116
+ src = GenericDocSource(url, _get_fetcher(), label=identifier)
117
+ cfg = get_config()
118
+
119
+ if section:
120
+ s = await src.fetch_section(section)
121
+ if s:
122
+ return s.cited_content
123
+ return f"Section '{section}' not found in {identifier} docs."
124
+
125
+ sections = await src.fetch_page()
126
+ if not sections:
127
+ return f"No content found at {url}"
128
+
129
+ s0 = sections[0]
130
+ header = attr_header(s0.source_url, s0.fetched_at, s0.from_cache) + "\n\n"
131
+ limit = cfg.max_section_words * 4
132
+ parts: list[str] = []
133
+ total = 0
134
+ for s in sections:
135
+ wc = len(s.content.split())
136
+ if total + wc > limit:
137
+ parts.append("*(truncated — use `section=` for targeted access)*")
138
+ break
139
+ parts.append(s.content)
140
+ total += wc
141
+
142
+ return header + "\n\n".join(parts)
doculayer/cli.py ADDED
@@ -0,0 +1,132 @@
1
+ """DocuLayer CLI.
2
+
3
+ Usage
4
+ -----
5
+ doculayer setup # auto-detect IDE and install MCP server
6
+ doculayer search "dependency injection" --source fastapi
7
+ doculayer fetch httpx --section AsyncClient
8
+ doculayer resolve pydantic
9
+ doculayer sources
10
+ doculayer mcp # start MCP server on stdio
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import asyncio
16
+ import sys
17
+
18
+ import click
19
+
20
+ from . import fetch as _fetch
21
+ from . import search as _search
22
+ from .setup_wizard import setup as _setup_cmd
23
+ from .sources.registry import resolve_identifier
24
+
25
+
26
+ @click.group()
27
+ @click.version_option(package_name="doculayer")
28
+ def main() -> None:
29
+ """DocuLayer: live documentation access for AI agents."""
30
+
31
+
32
+
33
+ main.add_command(_setup_cmd)
34
+
35
+ # ── search ─────────────────────────────────────────────────────────────────────
36
+
37
+ @main.command()
38
+ @click.argument("query")
39
+ @click.option("--source", "-s", required=True, help="Package name or URL to search")
40
+ @click.option("--results", "-n", default=3, show_default=True, help="Max results")
41
+ def search(query: str, source: str, results: int) -> None:
42
+ """Search QUERY in live documentation for SOURCE."""
43
+
44
+ async def _run() -> None:
45
+ try:
46
+ hits = await _search(query, source, top_k=results)
47
+ except ValueError as e:
48
+ click.echo(str(e), err=True)
49
+ sys.exit(1)
50
+
51
+ if not hits:
52
+ click.echo(f"No results for '{query}' in {source}.", err=True)
53
+ sys.exit(1)
54
+
55
+ for i, r in enumerate(hits, 1):
56
+ click.echo(f"\n{'─' * 60}")
57
+ click.echo(f" {i}. {r.section.title} (score {r.score:.2f})")
58
+ click.echo(f" {r.section.source_url}")
59
+ click.echo(f"{'─' * 60}")
60
+ click.echo(r.section.trimmed(200))
61
+
62
+ asyncio.run(_run())
63
+
64
+
65
+ # ── fetch ──────────────────────────────────────────────────────────────────────
66
+
67
+ @main.command()
68
+ @click.argument("identifier")
69
+ @click.option("--section", "-S", default=None, help="Section heading to extract")
70
+ def fetch(identifier: str, section: str | None) -> None:
71
+ """Fetch documentation for IDENTIFIER."""
72
+
73
+ async def _run() -> None:
74
+ try:
75
+ content = await _fetch(identifier, section=section)
76
+ except ValueError as e:
77
+ click.echo(str(e), err=True)
78
+ sys.exit(1)
79
+ click.echo(content)
80
+
81
+ asyncio.run(_run())
82
+
83
+
84
+ # ── resolve ────────────────────────────────────────────────────────────────────
85
+
86
+ @main.command()
87
+ @click.argument("identifier")
88
+ def resolve(identifier: str) -> None:
89
+ """Print the resolved documentation URL for IDENTIFIER."""
90
+
91
+ async def _run() -> None:
92
+ url = await resolve_identifier(identifier)
93
+ if url:
94
+ click.echo(url)
95
+ else:
96
+ click.echo(f"Cannot resolve: {identifier}", err=True)
97
+ sys.exit(1)
98
+
99
+ asyncio.run(_run())
100
+
101
+
102
+ # ── sources ────────────────────────────────────────────────────────────────────
103
+
104
+ @main.command()
105
+ def sources() -> None:
106
+ """List packages with built-in llms.txt support."""
107
+ from .sources.registry import KNOWN_LLMS_TXT, KNOWN_URLS
108
+
109
+ click.echo("Packages with llms.txt support (fast targeted access):")
110
+ for name in sorted(KNOWN_LLMS_TXT):
111
+ click.echo(f" {name}")
112
+ if KNOWN_URLS:
113
+ click.echo()
114
+ click.echo("Packages with direct URL shortcuts:")
115
+ for name, url in sorted(KNOWN_URLS.items()):
116
+ click.echo(f" {name:<16} {url}")
117
+ click.echo()
118
+ click.echo("Identifier formats: bare-name · pypi: · npm: · gh: · https://")
119
+
120
+
121
+ # ── mcp ────────────────────────────────────────────────────────────────────────
122
+
123
+ @main.command("mcp")
124
+ def mcp_serve() -> None:
125
+ """Start DocuLayer as an MCP server (stdio transport)."""
126
+ from .mcp_server import main as _mcp_main
127
+
128
+ _mcp_main()
129
+
130
+
131
+ if __name__ == "__main__":
132
+ main()
doculayer/config.py ADDED
@@ -0,0 +1,31 @@
1
+ """DocuLayer configuration.
2
+
3
+ All settings read from environment variables with sane defaults.
4
+ No config files — no disk reads.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass
10
+ import os
11
+
12
+
13
+ @dataclass
14
+ class DocuLayerConfig:
15
+ # ── cache (in-memory only; no disk persistence) ──────────────────────────
16
+ cache_ttl: int = int(os.getenv("DOCULAYER_CACHE_TTL", "3600")) # seconds
17
+ max_cache_size: int = int(os.getenv("DOCULAYER_MAX_CACHE", "256")) # entries
18
+
19
+ # ── HTTP fetch ────────────────────────────────────────────────────────────
20
+ fetch_timeout: float = float(os.getenv("DOCULAYER_FETCH_TIMEOUT", "12.0"))
21
+ max_fetch_bytes: int = int(os.getenv("DOCULAYER_MAX_BYTES", str(512 * 1024)))
22
+
23
+ # ── output shaping ────────────────────────────────────────────────────────
24
+ max_section_words: int = int(os.getenv("DOCULAYER_MAX_WORDS", "400"))
25
+
26
+
27
+ _default = DocuLayerConfig()
28
+
29
+
30
+ def get_config() -> DocuLayerConfig:
31
+ return _default
@@ -0,0 +1 @@
1
+ # doculayer core
@@ -0,0 +1,73 @@
1
+ """Process-local TTL cache.
2
+
3
+ Zero disk I/O by design. Cache entries evaporate when the process exits.
4
+ This is the storage guarantee DocuLayer makes: no docs are persisted.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import time
10
+ from dataclasses import dataclass
11
+ from typing import Generic, Optional, TypeVar
12
+
13
+ T = TypeVar("T")
14
+
15
+
16
+ @dataclass
17
+ class _Entry(Generic[T]):
18
+ value: T
19
+ expires_at: float # monotonic clock
20
+
21
+ @property
22
+ def alive(self) -> bool:
23
+ return time.monotonic() < self.expires_at
24
+
25
+
26
+ class TTLCache(Generic[T]):
27
+ """In-memory TTL cache.
28
+
29
+ Eviction policy: expired-first, then earliest-expiring on overflow.
30
+ Thread safety: single-threaded (asyncio); no locks needed.
31
+ """
32
+
33
+ def __init__(self, ttl: int, max_size: int = 256) -> None:
34
+ self._ttl = ttl
35
+ self._max = max_size
36
+ self._store: dict[str, _Entry[T]] = {}
37
+
38
+ # ── public ────────────────────────────────────────────────────────────────
39
+
40
+ def get(self, key: str) -> Optional[T]:
41
+ entry = self._store.get(key)
42
+ if entry is None:
43
+ return None
44
+ if not entry.alive:
45
+ del self._store[key]
46
+ return None
47
+ return entry.value
48
+
49
+ def set(self, key: str, value: T) -> None:
50
+ if key not in self._store and len(self._store) >= self._max:
51
+ self._evict()
52
+ self._store[key] = _Entry(value=value, expires_at=time.monotonic() + self._ttl)
53
+
54
+ def invalidate(self, key: str) -> None:
55
+ self._store.pop(key, None)
56
+
57
+ def clear(self) -> None:
58
+ self._store.clear()
59
+
60
+ @property
61
+ def stats(self) -> dict[str, int]:
62
+ live = sum(1 for v in self._store.values() if v.alive)
63
+ return {"alive": live, "total": len(self._store)}
64
+
65
+ # ── private ───────────────────────────────────────────────────────────────
66
+
67
+ def _evict(self) -> None:
68
+ dead = [k for k, v in self._store.items() if not v.alive]
69
+ for k in dead:
70
+ del self._store[k]
71
+ if len(self._store) >= self._max:
72
+ victim = min(self._store, key=lambda k: self._store[k].expires_at)
73
+ del self._store[victim]
@@ -0,0 +1,48 @@
1
+ """Shared citation formatting for DocuLayer.
2
+
3
+ Every piece of documentation returned to a caller MUST include provenance.
4
+ This module provides a single source of truth for that formatting so no
5
+ output path can silently omit a source attribution.
6
+
7
+ Design guarantee
8
+ ----------------
9
+ ``attr_header`` and ``cited_block`` are the only functions that produce
10
+ attribution strings. Import them here — never duplicate the format.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import time
16
+
17
+
18
+ def attr_header(url: str, fetched_at: float, from_cache: bool) -> str:
19
+ """Return a Markdown blockquote attribution line.
20
+
21
+ Example output::
22
+
23
+ > **Source**: https://fastapi.tiangolo.com/tutorial/ \\
24
+ > **Fetched**: 42s ago *(cached)*
25
+
26
+ Args:
27
+ url: The canonical URL the content was fetched from.
28
+ fetched_at: Unix timestamp of the fetch (``time.time()``).
29
+ from_cache: Whether the result was served from the in-memory cache.
30
+ """
31
+ age = time.time() - fetched_at
32
+ if age < 60:
33
+ age_s = f"{int(age)}s"
34
+ elif age < 3600:
35
+ age_s = f"{int(age / 60)}m"
36
+ else:
37
+ age_s = f"{int(age / 3600)}h"
38
+ note = " *(cached)*" if from_cache else ""
39
+ return f"> **Source**: {url} \n> **Fetched**: {age_s} ago{note}"
40
+
41
+
42
+ def cited_block(content: str, url: str, fetched_at: float, from_cache: bool) -> str:
43
+ """Return *content* preceded by a citation header.
44
+
45
+ Use this whenever a whole section of documentation is returned so the
46
+ caller always sees provenance before the content.
47
+ """
48
+ return f"{attr_header(url, fetched_at, from_cache)}\n\n{content}"
@@ -0,0 +1,102 @@
1
+ """Async HTTP fetcher backed by TTLCache.
2
+
3
+ Guarantees:
4
+ - No file-system writes.
5
+ - Stale content is never served past the TTL.
6
+ - Hard byte cap prevents oversized responses from swamping memory.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import hashlib
12
+ import time
13
+ from dataclasses import dataclass
14
+
15
+ import httpx
16
+
17
+ from .cache import TTLCache
18
+
19
+
20
+ @dataclass
21
+ class FetchResult:
22
+ url: str # final URL after redirects
23
+ content: str
24
+ content_type: str
25
+ fetched_at: float # Unix timestamp
26
+ from_cache: bool
27
+
28
+ def age_label(self) -> str:
29
+ secs = time.time() - self.fetched_at
30
+ if secs < 60:
31
+ return f"{int(secs)}s"
32
+ if secs < 3600:
33
+ return f"{int(secs / 60)}m"
34
+ return f"{int(secs / 3600)}h"
35
+
36
+
37
+ class DocFetcher:
38
+ """Fetch documentation pages with TTL in-memory caching."""
39
+
40
+ _HEADERS = {
41
+ "User-Agent": "DocuLayer/1.0 (live-doc-access-layer; no-hallucination)",
42
+ "Accept": "text/html,application/xhtml+xml,text/plain;q=0.9,*/*;q=0.8",
43
+ "Accept-Language": "en-US,en;q=0.9",
44
+ }
45
+
46
+ def __init__(
47
+ self,
48
+ ttl: int = 3600,
49
+ max_size: int = 256,
50
+ timeout: float = 12.0,
51
+ max_bytes: int = 524_288,
52
+ ) -> None:
53
+ self._cache: TTLCache[FetchResult] = TTLCache(ttl, max_size)
54
+ self._ttl = ttl
55
+ self._timeout = timeout
56
+ self._max_bytes = max_bytes
57
+
58
+ @staticmethod
59
+ def _key(url: str) -> str:
60
+ return hashlib.sha256(url.encode()).hexdigest()[:20]
61
+
62
+ async def fetch(self, url: str, force: bool = False) -> FetchResult:
63
+ """Fetch *url*, returning a cached result when still live."""
64
+ key = self._key(url)
65
+
66
+ if not force:
67
+ hit = self._cache.get(key)
68
+ if hit is not None:
69
+ return FetchResult(
70
+ url=hit.url,
71
+ content=hit.content,
72
+ content_type=hit.content_type,
73
+ fetched_at=hit.fetched_at,
74
+ from_cache=True,
75
+ )
76
+
77
+ async with httpx.AsyncClient(
78
+ follow_redirects=True, timeout=self._timeout
79
+ ) as client:
80
+ resp = await client.get(url, headers=self._HEADERS)
81
+ resp.raise_for_status()
82
+
83
+ content = resp.text[: self._max_bytes]
84
+ ct = resp.headers.get("content-type", "text/html")
85
+
86
+ result = FetchResult(
87
+ url=str(resp.url),
88
+ content=content,
89
+ content_type=ct,
90
+ fetched_at=time.time(),
91
+ from_cache=False,
92
+ )
93
+ self._cache.set(key, result)
94
+ return result
95
+
96
+ @property
97
+ def cache_stats(self) -> dict[str, int]:
98
+ return self._cache.stats
99
+
100
+ @property
101
+ def ttl(self) -> int:
102
+ return self._ttl
@@ -0,0 +1,156 @@
1
+ """Parse HTML or Markdown documentation into a flat list of DocSection objects.
2
+
3
+ Sections map 1-to-1 with headings. The parser never generates text —
4
+ every word in a DocSection came verbatim from the fetched source.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import re
10
+ import time
11
+ from dataclasses import dataclass, field
12
+ from typing import Optional
13
+
14
+ import markdownify
15
+ from bs4 import BeautifulSoup
16
+
17
+ from .citation import attr_header
18
+
19
+
20
+ @dataclass
21
+ class DocSection:
22
+ title: str
23
+ content: str # Markdown, verbatim from source
24
+ level: int # heading depth: 0 = pre-heading preamble, 1-6 = h1-h6
25
+ anchor: Optional[str]
26
+ source_url: str
27
+ fetched_at: float = field(default_factory=time.time)
28
+ from_cache: bool = False
29
+
30
+ def trimmed(self, max_words: int) -> str:
31
+ """Return content, hard-truncated to *max_words*."""
32
+ words = self.content.split()
33
+ if len(words) <= max_words:
34
+ return self.content
35
+ return " ".join(words[:max_words]) + "\n\n*(content truncated)*"
36
+
37
+ @property
38
+ def cited_content(self) -> str:
39
+ """Content with a Markdown blockquote citation prepended.
40
+
41
+ Use this property whenever returning documentation to a caller so
42
+ provenance is structurally inseparable from the text. Example::
43
+
44
+ > **Source**: https://fastapi.tiangolo.com/tutorial/
45
+ > **Fetched**: 12s ago
46
+
47
+ ## Dependency Injection
48
+ ...
49
+ """
50
+ return f"{attr_header(self.source_url, self.fetched_at, self.from_cache)}\n\n{self.content}"
51
+
52
+
53
+ # ── HTML noise selectors ──────────────────────────────────────────────────────
54
+
55
+ _NOISE_SEL = [
56
+ "nav", "header", "footer", "aside",
57
+ ".sidebar", ".nav", ".navbar", ".header", ".footer",
58
+ ".toc", ".table-of-contents", "#sidebar", ".breadcrumb",
59
+ ".pagination", ".cookie-banner", ".advertisement",
60
+ "[role=navigation]", "[role=banner]", "[role=complementary]",
61
+ "script", "style", "noscript", "iframe",
62
+ ]
63
+
64
+ # Heading pattern: lines starting with 1-6 # characters
65
+ _HEADING_RE = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE)
66
+ # Pandoc-style heading anchors: {#my-anchor}
67
+ _ANCHOR_RE = re.compile(r"\{#([^}]+)\}")
68
+
69
+
70
+ class DocParser:
71
+ """Stateless parser for HTML and Markdown documentation pages."""
72
+
73
+ def parse(
74
+ self,
75
+ content: str,
76
+ url: str,
77
+ content_type: str = "text/html",
78
+ fetched_at: float = 0.0,
79
+ from_cache: bool = False,
80
+ ) -> list[DocSection]:
81
+ """Parse *content* into sections.
82
+
83
+ HTML is stripped and converted to Markdown first.
84
+ Markdown is split at heading boundaries.
85
+ """
86
+ ts = fetched_at or time.time()
87
+ if "html" in content_type.lower():
88
+ md = self._html_to_md(content)
89
+ else:
90
+ md = content
91
+ return self._split_md(md, url, ts, from_cache)
92
+
93
+ # ── private ───────────────────────────────────────────────────────────────
94
+
95
+ @staticmethod
96
+ def _html_to_md(html: str) -> str:
97
+ soup = BeautifulSoup(html, "lxml")
98
+
99
+ # Strip navigation / chrome
100
+ for sel in _NOISE_SEL:
101
+ for el in soup.select(sel):
102
+ el.decompose()
103
+
104
+ # Find main content region
105
+ main = (
106
+ soup.find("main")
107
+ or soup.find(id=re.compile(r"^(content|main|body)$", re.I))
108
+ or soup.find(
109
+ class_=re.compile(r"\b(content|main|body|article|post|docs)\b", re.I)
110
+ )
111
+ or soup.find("article")
112
+ or soup.body
113
+ )
114
+
115
+ return markdownify.markdownify(
116
+ str(main or soup), heading_style="ATX", strip=["img"]
117
+ )
118
+
119
+ @staticmethod
120
+ def _split_md(
121
+ md: str, url: str, fetched_at: float, from_cache: bool
122
+ ) -> list[DocSection]:
123
+ md = re.sub(r"\n{3,}", "\n\n", md.strip())
124
+ matches = list(_HEADING_RE.finditer(md))
125
+ sections: list[DocSection] = []
126
+
127
+ if not matches:
128
+ if md.strip():
129
+ sections.append(
130
+ DocSection("Documentation", md.strip(), 0, None, url, fetched_at, from_cache)
131
+ )
132
+ return sections
133
+
134
+ # Content that appears before the first heading
135
+ preamble = md[: matches[0].start()].strip()
136
+ if preamble:
137
+ sections.append(DocSection("Overview", preamble, 0, None, url, fetched_at, from_cache))
138
+
139
+ for i, m in enumerate(matches):
140
+ level = len(m.group(1))
141
+ raw_title = m.group(2).strip()
142
+
143
+ anchor_m = _ANCHOR_RE.search(raw_title)
144
+ anchor = anchor_m.group(1) if anchor_m else None
145
+ title = _ANCHOR_RE.sub("", raw_title).strip()
146
+
147
+ body_start = m.end()
148
+ body_end = matches[i + 1].start() if i + 1 < len(matches) else len(md)
149
+ body = md[body_start:body_end].strip()
150
+
151
+ full = f"{'#' * level} {title}\n\n{body}" if body else f"{'#' * level} {title}"
152
+ sections.append(
153
+ DocSection(title, full, level, anchor, url, fetched_at, from_cache)
154
+ )
155
+
156
+ return sections