msaas-docs 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
docs/__init__.py ADDED
@@ -0,0 +1,34 @@
1
+ """Knowledge base and documentation system for SaaS products."""
2
+
3
+ from docs.config import DocsConfig, get_docs, init_docs
4
+ from docs.models import (
5
+ DocCategory,
6
+ DocPage,
7
+ DocSearchResult,
8
+ DocTree,
9
+ DocVersion,
10
+ PageStatus,
11
+ )
12
+ from docs.renderer import DocRenderer
13
+ from docs.router import create_docs_router
14
+ from docs.search import DocSearch
15
+ from docs.service import DocsService
16
+ from docs.store import DocsStore, InMemoryStore
17
+
18
+ __all__ = [
19
+ "DocCategory",
20
+ "DocPage",
21
+ "DocRenderer",
22
+ "DocSearch",
23
+ "DocSearchResult",
24
+ "DocTree",
25
+ "DocVersion",
26
+ "DocsConfig",
27
+ "DocsService",
28
+ "DocsStore",
29
+ "InMemoryStore",
30
+ "PageStatus",
31
+ "create_docs_router",
32
+ "get_docs",
33
+ "init_docs",
34
+ ]
docs/config.py ADDED
@@ -0,0 +1,51 @@
1
+ """Global configuration and singleton access for the docs module."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import TYPE_CHECKING
7
+
8
+ if TYPE_CHECKING:
9
+ from docs.service import DocsService
10
+
11
+ _instance: DocsService | None = None
12
+
13
+
14
+ @dataclass(frozen=True, slots=True)
15
+ class DocsConfig:
16
+ """Configuration for the docs module."""
17
+
18
+ base_path: str = "/docs"
19
+ enable_versioning: bool = True
20
+ enable_search: bool = True
21
+ default_locale: str = "en"
22
+ max_versions: int = 100
23
+ search_snippet_length: int = 160
24
+ extra: dict[str, object] = field(default_factory=dict)
25
+
26
+
27
+ def init_docs(config: DocsConfig | None = None) -> DocsService:
28
+ """Initialize the global DocsService singleton."""
29
+ global _instance # noqa: PLW0603
30
+ from docs.search import DocSearch
31
+ from docs.service import DocsService
32
+ from docs.store import InMemoryStore
33
+
34
+ cfg = config or DocsConfig()
35
+ store = InMemoryStore()
36
+ search = DocSearch(snippet_length=cfg.search_snippet_length) if cfg.enable_search else None
37
+ _instance = DocsService(config=cfg, store=store, search=search)
38
+ return _instance
39
+
40
+
41
+ def get_docs() -> DocsService:
42
+ """Return the global DocsService. Raises if not initialized."""
43
+ if _instance is None:
44
+ raise RuntimeError("docs module not initialized -- call init_docs() first")
45
+ return _instance
46
+
47
+
48
+ def reset_docs() -> None:
49
+ """Reset the global singleton (useful for tests)."""
50
+ global _instance # noqa: PLW0603
51
+ _instance = None
docs/models.py ADDED
@@ -0,0 +1,162 @@
1
+ """Domain models for the docs module."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ import uuid
7
+ from datetime import datetime, timezone
8
+ from enum import StrEnum
9
+
10
+ from pydantic import BaseModel, Field
11
+
12
+
13
+ class PageStatus(StrEnum):
14
+ """Publication lifecycle status for a documentation page."""
15
+
16
+ DRAFT = "draft"
17
+ PUBLISHED = "published"
18
+ ARCHIVED = "archived"
19
+
20
+
21
+ def _generate_id() -> str:
22
+ return uuid.uuid4().hex[:16]
23
+
24
+
25
+ def _now() -> datetime:
26
+ return datetime.now(timezone.utc)
27
+
28
+
29
+ def slugify(text: str) -> str:
30
+ """Convert text to a URL-friendly slug."""
31
+ text = text.lower().strip()
32
+ text = re.sub(r"[^\w\s-]", "", text)
33
+ text = re.sub(r"[\s_]+", "-", text)
34
+ return re.sub(r"-+", "-", text).strip("-")
35
+
36
+
37
+ # ---------------------------------------------------------------------------
38
+ # Core models
39
+ # ---------------------------------------------------------------------------
40
+
41
+
42
+ class DocCategory(BaseModel):
43
+ """A category that groups documentation pages."""
44
+
45
+ id: str = Field(default_factory=_generate_id)
46
+ name: str
47
+ slug: str = ""
48
+ description: str = ""
49
+ order: int = 0
50
+ parent_id: str | None = None
51
+
52
+ def model_post_init(self, _context: object) -> None:
53
+ if not self.slug:
54
+ self.slug = slugify(self.name)
55
+
56
+
57
+ class DocPage(BaseModel):
58
+ """A single documentation page."""
59
+
60
+ id: str = Field(default_factory=_generate_id)
61
+ slug: str = ""
62
+ title: str
63
+ content_markdown: str = ""
64
+ content_html: str = ""
65
+ parent_id: str | None = None
66
+ order: int = 0
67
+ category: str = ""
68
+ tags: list[str] = Field(default_factory=list)
69
+ author_id: str = ""
70
+ status: PageStatus = PageStatus.DRAFT
71
+ version: int = 1
72
+ locale: str = "en"
73
+ created_at: datetime = Field(default_factory=_now)
74
+ updated_at: datetime = Field(default_factory=_now)
75
+ published_at: datetime | None = None
76
+
77
+ def model_post_init(self, _context: object) -> None:
78
+ if not self.slug:
79
+ self.slug = slugify(self.title)
80
+
81
+
82
+ class DocVersion(BaseModel):
83
+ """An immutable snapshot of a page at a specific version."""
84
+
85
+ page_id: str
86
+ version: int
87
+ content_markdown: str
88
+ author_id: str = ""
89
+ message: str = ""
90
+ created_at: datetime = Field(default_factory=_now)
91
+
92
+
93
+ class DocSearchResult(BaseModel):
94
+ """A single search hit."""
95
+
96
+ page_id: str
97
+ title: str
98
+ slug: str
99
+ snippet: str = ""
100
+ score: float = 0.0
101
+ category: str = ""
102
+
103
+
104
+ class DocTreeNode(BaseModel):
105
+ """A node in the navigation tree (page with optional children)."""
106
+
107
+ page_id: str
108
+ title: str
109
+ slug: str
110
+ order: int = 0
111
+ children: list[DocTreeNode] = Field(default_factory=list)
112
+
113
+
114
+ class DocTree(BaseModel):
115
+ """Hierarchical navigation tree, optionally scoped to a category."""
116
+
117
+ category: str = ""
118
+ pages: list[DocTreeNode] = Field(default_factory=list)
119
+
120
+
121
+ # ---------------------------------------------------------------------------
122
+ # API request / response helpers
123
+ # ---------------------------------------------------------------------------
124
+
125
+
126
+ class CreatePageRequest(BaseModel):
127
+ title: str
128
+ content_markdown: str = ""
129
+ category: str = ""
130
+ parent_id: str | None = None
131
+ tags: list[str] = Field(default_factory=list)
132
+ author_id: str = ""
133
+ locale: str = "en"
134
+
135
+
136
+ class UpdatePageRequest(BaseModel):
137
+ title: str | None = None
138
+ content_markdown: str | None = None
139
+ category: str | None = None
140
+ parent_id: str | None = None
141
+ tags: list[str] | None = None
142
+ order: int | None = None
143
+ locale: str | None = None
144
+ version_message: str = ""
145
+
146
+
147
+ class RevertRequest(BaseModel):
148
+ version: int
149
+
150
+
151
+ class CreateCategoryRequest(BaseModel):
152
+ name: str
153
+ description: str = ""
154
+ order: int = 0
155
+ parent_id: str | None = None
156
+
157
+
158
+ class PaginatedPages(BaseModel):
159
+ items: list[DocPage]
160
+ total: int
161
+ page: int
162
+ per_page: int
docs/renderer.py ADDED
@@ -0,0 +1,247 @@
1
+ """Markdown-to-HTML renderer and HTML utilities.
2
+
3
+ Provides a lightweight, dependency-free renderer covering the most common
4
+ Markdown constructs: headings, paragraphs, bold, italic, inline code,
5
+ code blocks, links, images, unordered/ordered lists, blockquotes,
6
+ horizontal rules, and tables.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import html
12
+ import re
13
+ from dataclasses import dataclass, field
14
+
15
+
16
+ @dataclass
17
+ class TocEntry:
18
+ """A single heading in a table of contents."""
19
+
20
+ level: int
21
+ text: str
22
+ anchor: str
23
+
24
+
25
+ @dataclass
26
+ class DocRenderer:
27
+ """Stateless Markdown renderer with TOC/text extraction helpers."""
28
+
29
+ heading_prefix: str = "doc-"
30
+ _anchor_counts: dict[str, int] = field(default_factory=dict, repr=False)
31
+
32
+ # ------------------------------------------------------------------
33
+ # Public API
34
+ # ------------------------------------------------------------------
35
+
36
+ def render_markdown(self, content: str) -> str:
37
+ """Convert Markdown text to HTML."""
38
+ self._anchor_counts = {}
39
+ lines = content.split("\n")
40
+ html_parts: list[str] = []
41
+ i = 0
42
+ while i < len(lines):
43
+ line = lines[i]
44
+
45
+ # Fenced code block
46
+ if line.strip().startswith("```"):
47
+ block, i = self._parse_code_block(lines, i)
48
+ html_parts.append(block)
49
+ continue
50
+
51
+ # Table
52
+ if i + 1 < len(lines) and re.match(r"^\|.*\|$", line.strip()) and re.match(
53
+ r"^\|[\s\-:|]+\|$", lines[i + 1].strip()
54
+ ):
55
+ table, i = self._parse_table(lines, i)
56
+ html_parts.append(table)
57
+ continue
58
+
59
+ # Heading
60
+ if m := re.match(r"^(#{1,6})\s+(.+)$", line):
61
+ level = len(m.group(1))
62
+ text = self._inline(m.group(2))
63
+ anchor = self._make_anchor(text)
64
+ html_parts.append(
65
+ f'<h{level} id="{anchor}">{text}</h{level}>'
66
+ )
67
+ i += 1
68
+ continue
69
+
70
+ # Horizontal rule
71
+ if re.match(r"^(\*{3,}|-{3,}|_{3,})\s*$", line.strip()):
72
+ html_parts.append("<hr>")
73
+ i += 1
74
+ continue
75
+
76
+ # Blockquote
77
+ if line.strip().startswith(">"):
78
+ block, i = self._parse_blockquote(lines, i)
79
+ html_parts.append(block)
80
+ continue
81
+
82
+ # Unordered list
83
+ if re.match(r"^[\s]*[-*+]\s+", line):
84
+ block, i = self._parse_unordered_list(lines, i)
85
+ html_parts.append(block)
86
+ continue
87
+
88
+ # Ordered list
89
+ if re.match(r"^[\s]*\d+\.\s+", line):
90
+ block, i = self._parse_ordered_list(lines, i)
91
+ html_parts.append(block)
92
+ continue
93
+
94
+ # Blank line
95
+ if not line.strip():
96
+ i += 1
97
+ continue
98
+
99
+ # Paragraph
100
+ para_lines: list[str] = []
101
+ while i < len(lines) and lines[i].strip() and not self._is_block_start(lines, i):
102
+ para_lines.append(lines[i])
103
+ i += 1
104
+ html_parts.append(f"<p>{self._inline(' '.join(para_lines))}</p>")
105
+
106
+ return "\n".join(html_parts)
107
+
108
+ def extract_toc(self, rendered_html: str) -> list[TocEntry]:
109
+ """Extract table-of-contents entries from rendered HTML."""
110
+ entries: list[TocEntry] = []
111
+ for m in re.finditer(r'<h(\d)\s+id="([^"]+)">(.*?)</h\1>', rendered_html):
112
+ entries.append(
113
+ TocEntry(
114
+ level=int(m.group(1)),
115
+ text=self._strip_tags(m.group(3)),
116
+ anchor=m.group(2),
117
+ )
118
+ )
119
+ return entries
120
+
121
+ def extract_text(self, rendered_html: str) -> str:
122
+ """Strip all HTML tags and return plain text for indexing."""
123
+ text = re.sub(r"<[^>]+>", " ", rendered_html)
124
+ text = html.unescape(text)
125
+ return re.sub(r"\s+", " ", text).strip()
126
+
127
+ # ------------------------------------------------------------------
128
+ # Block parsers
129
+ # ------------------------------------------------------------------
130
+
131
+ def _parse_code_block(self, lines: list[str], start: int) -> tuple[str, int]:
132
+ opening = lines[start].strip()
133
+ lang = opening.lstrip("`").strip()
134
+ i = start + 1
135
+ code_lines: list[str] = []
136
+ while i < len(lines):
137
+ if lines[i].strip() == "```":
138
+ i += 1
139
+ break
140
+ code_lines.append(html.escape(lines[i]))
141
+ i += 1
142
+ lang_attr = f' class="language-{lang}"' if lang else ""
143
+ code = "\n".join(code_lines)
144
+ return f"<pre><code{lang_attr}>{code}</code></pre>", i
145
+
146
+ def _parse_table(self, lines: list[str], start: int) -> tuple[str, int]:
147
+ header_cells = [c.strip() for c in lines[start].strip().strip("|").split("|")]
148
+ i = start + 2 # skip separator
149
+ rows: list[list[str]] = []
150
+ while i < len(lines) and re.match(r"^\|.*\|$", lines[i].strip()):
151
+ cells = [c.strip() for c in lines[i].strip().strip("|").split("|")]
152
+ rows.append(cells)
153
+ i += 1
154
+ parts = ["<table>", "<thead><tr>"]
155
+ for cell in header_cells:
156
+ parts.append(f"<th>{self._inline(cell)}</th>")
157
+ parts.append("</tr></thead>")
158
+ if rows:
159
+ parts.append("<tbody>")
160
+ for row in rows:
161
+ parts.append("<tr>")
162
+ for cell in row:
163
+ parts.append(f"<td>{self._inline(cell)}</td>")
164
+ parts.append("</tr>")
165
+ parts.append("</tbody>")
166
+ parts.append("</table>")
167
+ return "".join(parts), i
168
+
169
+ def _parse_blockquote(self, lines: list[str], start: int) -> tuple[str, int]:
170
+ i = start
171
+ content_lines: list[str] = []
172
+ while i < len(lines) and lines[i].strip().startswith(">"):
173
+ content_lines.append(re.sub(r"^>\s?", "", lines[i]))
174
+ i += 1
175
+ inner = self._inline(" ".join(content_lines))
176
+ return f"<blockquote><p>{inner}</p></blockquote>", i
177
+
178
+ def _parse_unordered_list(self, lines: list[str], start: int) -> tuple[str, int]:
179
+ i = start
180
+ items: list[str] = []
181
+ while i < len(lines) and re.match(r"^[\s]*[-*+]\s+", lines[i]):
182
+ text = re.sub(r"^[\s]*[-*+]\s+", "", lines[i])
183
+ items.append(f"<li>{self._inline(text)}</li>")
184
+ i += 1
185
+ return "<ul>" + "".join(items) + "</ul>", i
186
+
187
+ def _parse_ordered_list(self, lines: list[str], start: int) -> tuple[str, int]:
188
+ i = start
189
+ items: list[str] = []
190
+ while i < len(lines) and re.match(r"^[\s]*\d+\.\s+", lines[i]):
191
+ text = re.sub(r"^[\s]*\d+\.\s+", "", lines[i])
192
+ items.append(f"<li>{self._inline(text)}</li>")
193
+ i += 1
194
+ return "<ol>" + "".join(items) + "</ol>", i
195
+
196
+ # ------------------------------------------------------------------
197
+ # Inline formatting
198
+ # ------------------------------------------------------------------
199
+
200
+ def _inline(self, text: str) -> str:
201
+ """Apply inline Markdown transformations."""
202
+ # Inline code (must come before bold/italic to avoid conflicts)
203
+ text = re.sub(r"`([^`]+)`", r"<code>\1</code>", text)
204
+ # Images
205
+ text = re.sub(r"!\[([^\]]*)\]\(([^)]+)\)", r'<img src="\2" alt="\1">', text)
206
+ # Links
207
+ text = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r'<a href="\2">\1</a>', text)
208
+ # Bold + italic
209
+ text = re.sub(r"\*\*\*(.+?)\*\*\*", r"<strong><em>\1</em></strong>", text)
210
+ # Bold
211
+ text = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", text)
212
+ # Italic
213
+ text = re.sub(r"\*(.+?)\*", r"<em>\1</em>", text)
214
+ return text
215
+
216
+ # ------------------------------------------------------------------
217
+ # Helpers
218
+ # ------------------------------------------------------------------
219
+
220
+ def _is_block_start(self, lines: list[str], i: int) -> bool:
221
+ line = lines[i]
222
+ if re.match(r"^#{1,6}\s+", line):
223
+ return True
224
+ if line.strip().startswith("```"):
225
+ return True
226
+ if re.match(r"^[\s]*[-*+]\s+", line):
227
+ return True
228
+ if re.match(r"^[\s]*\d+\.\s+", line):
229
+ return True
230
+ if line.strip().startswith(">"):
231
+ return True
232
+ if re.match(r"^(\*{3,}|-{3,}|_{3,})\s*$", line.strip()):
233
+ return True
234
+ return False
235
+
236
+ def _make_anchor(self, text: str) -> str:
237
+ plain = self._strip_tags(text).lower()
238
+ slug = re.sub(r"[^\w\s-]", "", plain)
239
+ slug = re.sub(r"[\s]+", "-", slug).strip("-")
240
+ base = f"{self.heading_prefix}{slug}"
241
+ count = self._anchor_counts.get(base, 0)
242
+ self._anchor_counts[base] = count + 1
243
+ return base if count == 0 else f"{base}-{count}"
244
+
245
+ @staticmethod
246
+ def _strip_tags(text: str) -> str:
247
+ return re.sub(r"<[^>]+>", "", text)
docs/router.py ADDED
@@ -0,0 +1,153 @@
1
+ """FastAPI router factory for the docs module."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from docs.config import get_docs
8
+ from docs.models import (
9
+ CreateCategoryRequest,
10
+ CreatePageRequest,
11
+ DocCategory,
12
+ DocPage,
13
+ DocSearchResult,
14
+ DocTree,
15
+ DocVersion,
16
+ PaginatedPages,
17
+ RevertRequest,
18
+ UpdatePageRequest,
19
+ )
20
+ from errors import BusinessLogicError, NotFoundError
21
+ from api_core.responses import ApiResponse, PaginatedResponse
22
+
23
+ if TYPE_CHECKING:
24
+ pass
25
+
26
+
27
+ def create_docs_router(): # noqa: ANN201 -- avoids hard dep on fastapi at import time
28
+ """Create and return a FastAPI APIRouter for the docs module."""
29
+ from fastapi import APIRouter, Query
30
+
31
+ router = APIRouter(prefix="/docs", tags=["docs"])
32
+
33
+ # ------------------------------------------------------------------
34
+ # Pages
35
+ # ------------------------------------------------------------------
36
+
37
+ @router.post("/pages", response_model=DocPage, status_code=201)
38
+ async def create_page(body: CreatePageRequest) -> DocPage:
39
+ svc = get_docs()
40
+ return await svc.create_page(
41
+ title=body.title,
42
+ content_markdown=body.content_markdown,
43
+ category=body.category,
44
+ parent_id=body.parent_id,
45
+ tags=body.tags,
46
+ author_id=body.author_id,
47
+ locale=body.locale,
48
+ )
49
+
50
+ @router.get("/pages", response_model=PaginatedPages)
51
+ async def list_pages(
52
+ category: str | None = Query(None),
53
+ status: str | None = Query(None),
54
+ page: int = Query(1, ge=1),
55
+ per_page: int = Query(20, ge=1, le=100),
56
+ ) -> PaginatedPages:
57
+ svc = get_docs()
58
+ from docs.models import PageStatus
59
+
60
+ page_status = PageStatus(status) if status else None
61
+ return await svc.list_pages(
62
+ category=category, status=page_status, page=page, per_page=per_page
63
+ )
64
+
65
+ @router.get("/pages/{slug}", response_model=DocPage)
66
+ async def get_page(slug: str) -> DocPage:
67
+ svc = get_docs()
68
+ result = await svc.get_page(slug)
69
+ if result is None:
70
+ raise NotFoundError("Page not found")
71
+ return result
72
+
73
+ @router.put("/pages/{page_id}", response_model=DocPage)
74
+ async def update_page(page_id: str, body: UpdatePageRequest) -> DocPage:
75
+ svc = get_docs()
76
+ fields = body.model_dump(exclude_none=True)
77
+ try:
78
+ return await svc.update_page(page_id, **fields)
79
+ except ValueError as exc:
80
+ raise NotFoundError(str(exc)) from exc
81
+
82
+ @router.post("/pages/{page_id}/publish", response_model=DocPage)
83
+ async def publish_page(page_id: str) -> DocPage:
84
+ svc = get_docs()
85
+ try:
86
+ return await svc.publish(page_id)
87
+ except ValueError as exc:
88
+ raise NotFoundError(str(exc)) from exc
89
+
90
+ @router.post("/pages/{page_id}/archive", response_model=DocPage)
91
+ async def archive_page(page_id: str) -> DocPage:
92
+ svc = get_docs()
93
+ try:
94
+ return await svc.archive(page_id)
95
+ except ValueError as exc:
96
+ raise NotFoundError(str(exc)) from exc
97
+
98
+ @router.get("/pages/{page_id}/versions", response_model=list[DocVersion])
99
+ async def get_versions(page_id: str) -> list[DocVersion]:
100
+ svc = get_docs()
101
+ return await svc.get_versions(page_id)
102
+
103
+ @router.post("/pages/{page_id}/revert", response_model=DocPage)
104
+ async def revert_page(page_id: str, body: RevertRequest) -> DocPage:
105
+ svc = get_docs()
106
+ try:
107
+ return await svc.revert(page_id, body.version)
108
+ except ValueError as exc:
109
+ raise NotFoundError(str(exc)) from exc
110
+
111
+ # ------------------------------------------------------------------
112
+ # Tree
113
+ # ------------------------------------------------------------------
114
+
115
+ @router.get("/tree", response_model=DocTree)
116
+ async def get_tree(category: str | None = Query(None)) -> DocTree:
117
+ svc = get_docs()
118
+ return await svc.get_tree(category=category)
119
+
120
+ # ------------------------------------------------------------------
121
+ # Categories
122
+ # ------------------------------------------------------------------
123
+
124
+ @router.get("/categories", response_model=list[DocCategory])
125
+ async def list_categories() -> list[DocCategory]:
126
+ svc = get_docs()
127
+ return await svc.list_categories()
128
+
129
+ @router.post("/categories", response_model=DocCategory, status_code=201)
130
+ async def create_category(body: CreateCategoryRequest) -> DocCategory:
131
+ svc = get_docs()
132
+ return await svc.create_category(
133
+ name=body.name,
134
+ description=body.description,
135
+ order=body.order,
136
+ parent_id=body.parent_id,
137
+ )
138
+
139
+ # ------------------------------------------------------------------
140
+ # Search
141
+ # ------------------------------------------------------------------
142
+
143
+ @router.get("/search", response_model=list[DocSearchResult])
144
+ async def search_docs(
145
+ q: str = Query(..., min_length=1),
146
+ limit: int = Query(20, ge=1, le=100),
147
+ ) -> list[DocSearchResult]:
148
+ svc = get_docs()
149
+ if svc.search is None:
150
+ raise BusinessLogicError("Search not enabled")
151
+ return svc.search.search(q, limit=limit)
152
+
153
+ return router
docs/search.py ADDED
@@ -0,0 +1,159 @@
1
+ """In-memory full-text search with BM25-lite scoring."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+ import re
7
+ from dataclasses import dataclass, field
8
+
9
+ from docs.models import DocPage, DocSearchResult
10
+ from docs.renderer import DocRenderer
11
+
12
+
13
+ def _tokenize(text: str) -> list[str]:
14
+ """Split text into lowercase word tokens."""
15
+ return [w for w in re.findall(r"[a-z0-9]+", text.lower()) if len(w) > 1]
16
+
17
+
18
+ @dataclass
19
+ class _IndexedDoc:
20
+ """Metadata stored per indexed document."""
21
+
22
+ page_id: str
23
+ title: str
24
+ slug: str
25
+ category: str
26
+ tokens: list[str]
27
+ token_count: int
28
+ text: str
29
+
30
+
31
+ @dataclass
32
+ class DocSearch:
33
+ """In-memory inverted index with BM25-lite scoring."""
34
+
35
+ snippet_length: int = 160
36
+ _k1: float = 1.5
37
+ _b: float = 0.75
38
+ _documents: dict[str, _IndexedDoc] = field(default_factory=dict, repr=False)
39
+ _inverted: dict[str, set[str]] = field(default_factory=dict, repr=False)
40
+ _avg_dl: float = field(default=0.0, repr=False)
41
+ _renderer: DocRenderer = field(default_factory=DocRenderer, repr=False)
42
+
43
+ # ------------------------------------------------------------------
44
+ # Public API
45
+ # ------------------------------------------------------------------
46
+
47
+ def index_page(self, page: DocPage) -> None:
48
+ """Add or update a page in the search index."""
49
+ self.remove_page(page.id)
50
+
51
+ rendered = self._renderer.render_markdown(page.content_markdown)
52
+ plain_text = self._renderer.extract_text(rendered)
53
+ full_text = f"{page.title} {' '.join(page.tags)} {plain_text}"
54
+ tokens = _tokenize(full_text)
55
+
56
+ doc = _IndexedDoc(
57
+ page_id=page.id,
58
+ title=page.title,
59
+ slug=page.slug,
60
+ category=page.category,
61
+ tokens=tokens,
62
+ token_count=len(tokens),
63
+ text=plain_text,
64
+ )
65
+ self._documents[page.id] = doc
66
+
67
+ for token in set(tokens):
68
+ self._inverted.setdefault(token, set()).add(page.id)
69
+
70
+ self._recalc_avg_dl()
71
+
72
+ def remove_page(self, page_id: str) -> None:
73
+ """Remove a page from the index."""
74
+ doc = self._documents.pop(page_id, None)
75
+ if doc is None:
76
+ return
77
+ for token in set(doc.tokens):
78
+ posting = self._inverted.get(token)
79
+ if posting:
80
+ posting.discard(page_id)
81
+ if not posting:
82
+ del self._inverted[token]
83
+ self._recalc_avg_dl()
84
+
85
+ def search(self, query: str, *, limit: int = 20) -> list[DocSearchResult]:
86
+ """Search indexed pages, returning results sorted by BM25 score."""
87
+ query_tokens = _tokenize(query)
88
+ if not query_tokens:
89
+ return []
90
+
91
+ scores: dict[str, float] = {}
92
+ n = len(self._documents)
93
+ if n == 0:
94
+ return []
95
+
96
+ for token in query_tokens:
97
+ posting = self._inverted.get(token)
98
+ if not posting:
99
+ continue
100
+ df = len(posting)
101
+ idf = math.log((n - df + 0.5) / (df + 0.5) + 1.0)
102
+ for page_id in posting:
103
+ doc = self._documents[page_id]
104
+ tf = doc.tokens.count(token)
105
+ dl = doc.token_count
106
+ avg_dl = self._avg_dl or 1.0
107
+ numerator = tf * (self._k1 + 1)
108
+ denominator = tf + self._k1 * (1 - self._b + self._b * dl / avg_dl)
109
+ scores[page_id] = scores.get(page_id, 0.0) + idf * numerator / denominator
110
+
111
+ ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:limit]
112
+ results: list[DocSearchResult] = []
113
+ for page_id, score in ranked:
114
+ doc = self._documents[page_id]
115
+ snippet = self._build_snippet(doc.text, query_tokens)
116
+ results.append(
117
+ DocSearchResult(
118
+ page_id=doc.page_id,
119
+ title=doc.title,
120
+ slug=doc.slug,
121
+ snippet=snippet,
122
+ score=round(score, 4),
123
+ category=doc.category,
124
+ )
125
+ )
126
+ return results
127
+
128
+ @property
129
+ def document_count(self) -> int:
130
+ return len(self._documents)
131
+
132
+ # ------------------------------------------------------------------
133
+ # Internal helpers
134
+ # ------------------------------------------------------------------
135
+
136
+ def _recalc_avg_dl(self) -> None:
137
+ docs = self._documents.values()
138
+ self._avg_dl = sum(d.token_count for d in docs) / max(len(self._documents), 1)
139
+
140
+ def _build_snippet(self, text: str, query_tokens: list[str]) -> str:
141
+ """Extract a snippet around the first query token match."""
142
+ lower = text.lower()
143
+ best_pos = len(text)
144
+ for token in query_tokens:
145
+ pos = lower.find(token)
146
+ if pos != -1 and pos < best_pos:
147
+ best_pos = pos
148
+
149
+ if best_pos == len(text):
150
+ return text[: self.snippet_length]
151
+
152
+ start = max(0, best_pos - 40)
153
+ end = start + self.snippet_length
154
+ snippet = text[start:end]
155
+ if start > 0:
156
+ snippet = "..." + snippet
157
+ if end < len(text):
158
+ snippet = snippet + "..."
159
+ return snippet
docs/service.py ADDED
@@ -0,0 +1,232 @@
1
+ """Business logic layer for the docs module."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime, timezone
6
+
7
+ from docs.config import DocsConfig
8
+ from docs.models import (
9
+ DocCategory,
10
+ DocPage,
11
+ DocTree,
12
+ DocTreeNode,
13
+ DocVersion,
14
+ PageStatus,
15
+ PaginatedPages,
16
+ slugify,
17
+ )
18
+ from docs.renderer import DocRenderer
19
+ from docs.search import DocSearch
20
+ from docs.store import DocsStore
21
+ from errors import NotFoundError, ValidationError
22
+
23
+
24
+ class DocsService:
25
+ """Orchestrates page CRUD, versioning, search, and rendering."""
26
+
27
+ def __init__(
28
+ self,
29
+ *,
30
+ config: DocsConfig,
31
+ store: DocsStore,
32
+ search: DocSearch | None = None,
33
+ ) -> None:
34
+ self.config = config
35
+ self.store = store
36
+ self.search = search
37
+ self.renderer = DocRenderer()
38
+
39
+ # ------------------------------------------------------------------
40
+ # Pages
41
+ # ------------------------------------------------------------------
42
+
43
+ async def create_page(
44
+ self,
45
+ title: str,
46
+ content_markdown: str = "",
47
+ category: str = "",
48
+ *,
49
+ parent_id: str | None = None,
50
+ tags: list[str] | None = None,
51
+ author_id: str = "",
52
+ locale: str | None = None,
53
+ ) -> DocPage:
54
+ rendered = self.renderer.render_markdown(content_markdown)
55
+ page = DocPage(
56
+ title=title,
57
+ content_markdown=content_markdown,
58
+ content_html=rendered,
59
+ category=category,
60
+ parent_id=parent_id,
61
+ tags=tags or [],
62
+ author_id=author_id,
63
+ locale=locale or self.config.default_locale,
64
+ )
65
+ saved = await self.store.save_page(page)
66
+
67
+ if self.config.enable_versioning:
68
+ await self._save_version(saved, message="Initial version")
69
+
70
+ if self.search:
71
+ self.search.index_page(saved)
72
+
73
+ return saved
74
+
75
+ async def update_page(self, page_id: str, /, **fields: object) -> DocPage:
76
+ page = await self.store.get_page_by_id(page_id)
77
+ if page is None:
78
+ raise NotFoundError(f"Page not found: {page_id}")
79
+
80
+ version_message = str(fields.pop("version_message", ""))
81
+
82
+ for key, value in fields.items():
83
+ if value is not None and hasattr(page, key):
84
+ setattr(page, key, value)
85
+
86
+ if "title" in fields and fields["title"] is not None:
87
+ page.slug = slugify(str(fields["title"]))
88
+
89
+ if "content_markdown" in fields and fields["content_markdown"] is not None:
90
+ page.content_html = self.renderer.render_markdown(page.content_markdown)
91
+
92
+ page.version += 1
93
+ page.updated_at = datetime.now(timezone.utc)
94
+ saved = await self.store.save_page(page)
95
+
96
+ if self.config.enable_versioning:
97
+ await self._save_version(saved, message=version_message)
98
+
99
+ if self.search:
100
+ self.search.index_page(saved)
101
+
102
+ return saved
103
+
104
+ async def publish(self, page_id: str) -> DocPage:
105
+ page = await self.store.get_page_by_id(page_id)
106
+ if page is None:
107
+ raise NotFoundError(f"Page not found: {page_id}")
108
+ page.status = PageStatus.PUBLISHED
109
+ page.published_at = datetime.now(timezone.utc)
110
+ page.updated_at = datetime.now(timezone.utc)
111
+ return await self.store.save_page(page)
112
+
113
+ async def archive(self, page_id: str) -> DocPage:
114
+ page = await self.store.get_page_by_id(page_id)
115
+ if page is None:
116
+ raise NotFoundError(f"Page not found: {page_id}")
117
+ page.status = PageStatus.ARCHIVED
118
+ page.updated_at = datetime.now(timezone.utc)
119
+ saved = await self.store.save_page(page)
120
+
121
+ if self.search:
122
+ self.search.remove_page(page_id)
123
+
124
+ return saved
125
+
126
+ async def get_page(self, page_id_or_slug: str) -> DocPage | None:
127
+ page = await self.store.get_page_by_id(page_id_or_slug)
128
+ if page:
129
+ return page
130
+ return await self.store.get_page_by_slug(page_id_or_slug)
131
+
132
+ async def list_pages(
133
+ self,
134
+ *,
135
+ category: str | None = None,
136
+ status: PageStatus | None = None,
137
+ page: int = 1,
138
+ per_page: int = 20,
139
+ ) -> PaginatedPages:
140
+ items, total = await self.store.list_pages(
141
+ category=category, status=status, page=page, per_page=per_page
142
+ )
143
+ return PaginatedPages(items=items, total=total, page=page, per_page=per_page)
144
+
145
+ # ------------------------------------------------------------------
146
+ # Versioning
147
+ # ------------------------------------------------------------------
148
+
149
+ async def get_versions(self, page_id: str) -> list[DocVersion]:
150
+ return await self.store.get_versions(page_id)
151
+
152
+ async def revert(self, page_id: str, version: int) -> DocPage:
153
+ target = await self.store.get_version(page_id, version)
154
+ if target is None:
155
+ raise NotFoundError(f"Version {version} not found for page {page_id}")
156
+ return await self.update_page(
157
+ page_id,
158
+ content_markdown=target.content_markdown,
159
+ version_message=f"Reverted to version {version}",
160
+ )
161
+
162
+ # ------------------------------------------------------------------
163
+ # Ordering
164
+ # ------------------------------------------------------------------
165
+
166
+ async def reorder(self, page_id: str, new_order: int) -> DocPage:
167
+ return await self.update_page(page_id, order=new_order, version_message="Reordered")
168
+
169
+ # ------------------------------------------------------------------
170
+ # Navigation tree
171
+ # ------------------------------------------------------------------
172
+
173
+ async def get_tree(self, category: str | None = None) -> DocTree:
174
+ pages, _ = await self.store.list_pages(
175
+ category=category, status=PageStatus.PUBLISHED, per_page=10_000
176
+ )
177
+ nodes_by_id: dict[str, DocTreeNode] = {}
178
+ for p in pages:
179
+ nodes_by_id[p.id] = DocTreeNode(
180
+ page_id=p.id, title=p.title, slug=p.slug, order=p.order
181
+ )
182
+
183
+ roots: list[DocTreeNode] = []
184
+ for p in pages:
185
+ node = nodes_by_id[p.id]
186
+ if p.parent_id and p.parent_id in nodes_by_id:
187
+ nodes_by_id[p.parent_id].children.append(node)
188
+ else:
189
+ roots.append(node)
190
+
191
+ def sort_nodes(nodes: list[DocTreeNode]) -> None:
192
+ nodes.sort(key=lambda n: n.order)
193
+ for n in nodes:
194
+ sort_nodes(n.children)
195
+
196
+ sort_nodes(roots)
197
+ return DocTree(category=category or "", pages=roots)
198
+
199
+ # ------------------------------------------------------------------
200
+ # Categories
201
+ # ------------------------------------------------------------------
202
+
203
+ async def create_category(
204
+ self,
205
+ name: str,
206
+ *,
207
+ description: str = "",
208
+ order: int = 0,
209
+ parent_id: str | None = None,
210
+ ) -> DocCategory:
211
+ cat = DocCategory(name=name, description=description, order=order, parent_id=parent_id)
212
+ return await self.store.save_category(cat)
213
+
214
+ async def list_categories(self) -> list[DocCategory]:
215
+ return await self.store.list_categories()
216
+
217
+ async def get_category(self, category_id: str) -> DocCategory | None:
218
+ return await self.store.get_category_by_id(category_id)
219
+
220
+ # ------------------------------------------------------------------
221
+ # Internal
222
+ # ------------------------------------------------------------------
223
+
224
+ async def _save_version(self, page: DocPage, *, message: str = "") -> DocVersion:
225
+ version = DocVersion(
226
+ page_id=page.id,
227
+ version=page.version,
228
+ content_markdown=page.content_markdown,
229
+ author_id=page.author_id,
230
+ message=message,
231
+ )
232
+ return await self.store.save_version(version)
docs/store.py ADDED
@@ -0,0 +1,153 @@
1
+ """Persistence abstraction and in-memory implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from copy import deepcopy
7
+
8
+ from docs.models import (
9
+ DocCategory,
10
+ DocPage,
11
+ DocVersion,
12
+ PageStatus,
13
+ )
14
+
15
+
16
+ class DocsStore(ABC):
17
+ """Abstract base class for docs persistence."""
18
+
19
+ # -- pages ---------------------------------------------------------------
20
+
21
+ @abstractmethod
22
+ async def save_page(self, page: DocPage) -> DocPage: ...
23
+
24
+ @abstractmethod
25
+ async def get_page_by_id(self, page_id: str) -> DocPage | None: ...
26
+
27
+ @abstractmethod
28
+ async def get_page_by_slug(self, slug: str) -> DocPage | None: ...
29
+
30
+ @abstractmethod
31
+ async def list_pages(
32
+ self,
33
+ *,
34
+ category: str | None = None,
35
+ status: PageStatus | None = None,
36
+ parent_id: str | None = None,
37
+ page: int = 1,
38
+ per_page: int = 20,
39
+ ) -> tuple[list[DocPage], int]: ...
40
+
41
+ @abstractmethod
42
+ async def delete_page(self, page_id: str) -> bool: ...
43
+
44
+ # -- categories ----------------------------------------------------------
45
+
46
+ @abstractmethod
47
+ async def save_category(self, category: DocCategory) -> DocCategory: ...
48
+
49
+ @abstractmethod
50
+ async def get_category_by_id(self, category_id: str) -> DocCategory | None: ...
51
+
52
+ @abstractmethod
53
+ async def get_category_by_slug(self, slug: str) -> DocCategory | None: ...
54
+
55
+ @abstractmethod
56
+ async def list_categories(self) -> list[DocCategory]: ...
57
+
58
+ # -- versions ------------------------------------------------------------
59
+
60
+ @abstractmethod
61
+ async def save_version(self, version: DocVersion) -> DocVersion: ...
62
+
63
+ @abstractmethod
64
+ async def get_versions(self, page_id: str) -> list[DocVersion]: ...
65
+
66
+ @abstractmethod
67
+ async def get_version(self, page_id: str, version: int) -> DocVersion | None: ...
68
+
69
+
70
+ class InMemoryStore(DocsStore):
71
+ """Thread-safe, dict-backed implementation for testing and lightweight use."""
72
+
73
+ def __init__(self) -> None:
74
+ self._pages: dict[str, DocPage] = {}
75
+ self._categories: dict[str, DocCategory] = {}
76
+ self._versions: dict[str, list[DocVersion]] = {}
77
+
78
+ # -- pages ---------------------------------------------------------------
79
+
80
+ async def save_page(self, page: DocPage) -> DocPage:
81
+ self._pages[page.id] = deepcopy(page)
82
+ return deepcopy(page)
83
+
84
+ async def get_page_by_id(self, page_id: str) -> DocPage | None:
85
+ page = self._pages.get(page_id)
86
+ return deepcopy(page) if page else None
87
+
88
+ async def get_page_by_slug(self, slug: str) -> DocPage | None:
89
+ for page in self._pages.values():
90
+ if page.slug == slug:
91
+ return deepcopy(page)
92
+ return None
93
+
94
+ async def list_pages(
95
+ self,
96
+ *,
97
+ category: str | None = None,
98
+ status: PageStatus | None = None,
99
+ parent_id: str | None = None,
100
+ page: int = 1,
101
+ per_page: int = 20,
102
+ ) -> tuple[list[DocPage], int]:
103
+ filtered = list(self._pages.values())
104
+ if category is not None:
105
+ filtered = [p for p in filtered if p.category == category]
106
+ if status is not None:
107
+ filtered = [p for p in filtered if p.status == status]
108
+ if parent_id is not None:
109
+ filtered = [p for p in filtered if p.parent_id == parent_id]
110
+ filtered.sort(key=lambda p: (p.order, p.created_at))
111
+ total = len(filtered)
112
+ start = (page - 1) * per_page
113
+ end = start + per_page
114
+ return [deepcopy(p) for p in filtered[start:end]], total
115
+
116
+ async def delete_page(self, page_id: str) -> bool:
117
+ return self._pages.pop(page_id, None) is not None
118
+
119
+ # -- categories ----------------------------------------------------------
120
+
121
+ async def save_category(self, category: DocCategory) -> DocCategory:
122
+ self._categories[category.id] = deepcopy(category)
123
+ return deepcopy(category)
124
+
125
+ async def get_category_by_id(self, category_id: str) -> DocCategory | None:
126
+ cat = self._categories.get(category_id)
127
+ return deepcopy(cat) if cat else None
128
+
129
+ async def get_category_by_slug(self, slug: str) -> DocCategory | None:
130
+ for cat in self._categories.values():
131
+ if cat.slug == slug:
132
+ return deepcopy(cat)
133
+ return None
134
+
135
+ async def list_categories(self) -> list[DocCategory]:
136
+ cats = sorted(self._categories.values(), key=lambda c: (c.order, c.name))
137
+ return [deepcopy(c) for c in cats]
138
+
139
+ # -- versions ------------------------------------------------------------
140
+
141
+ async def save_version(self, version: DocVersion) -> DocVersion:
142
+ self._versions.setdefault(version.page_id, []).append(deepcopy(version))
143
+ return deepcopy(version)
144
+
145
+ async def get_versions(self, page_id: str) -> list[DocVersion]:
146
+ versions = self._versions.get(page_id, [])
147
+ return [deepcopy(v) for v in sorted(versions, key=lambda v: v.version, reverse=True)]
148
+
149
+ async def get_version(self, page_id: str, version: int) -> DocVersion | None:
150
+ for v in self._versions.get(page_id, []):
151
+ if v.version == version:
152
+ return deepcopy(v)
153
+ return None
@@ -0,0 +1,16 @@
1
+ Metadata-Version: 2.4
2
+ Name: msaas-docs
3
+ Version: 0.1.0
4
+ Summary: Knowledge base and documentation system for SaaS products
5
+ Requires-Python: >=3.12
6
+ Requires-Dist: msaas-api-core
7
+ Requires-Dist: msaas-errors
8
+ Requires-Dist: pydantic>=2.0
9
+ Provides-Extra: dev
10
+ Requires-Dist: fastapi>=0.110.0; extra == 'dev'
11
+ Requires-Dist: httpx>=0.27; extra == 'dev'
12
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
13
+ Requires-Dist: pytest>=8.0; extra == 'dev'
14
+ Requires-Dist: ruff>=0.8; extra == 'dev'
15
+ Provides-Extra: fastapi
16
+ Requires-Dist: fastapi>=0.110.0; extra == 'fastapi'
@@ -0,0 +1,11 @@
1
+ docs/__init__.py,sha256=3L_bpn0jJPmCcRwCheQm8B9T_Lm98_U4wfIyBDiyPgY,739
2
+ docs/config.py,sha256=Da5btnLcX3NpR4qtjp0qLF8CcpyASiQWu46-NiZhHPs,1532
3
+ docs/models.py,sha256=dmLx27Uu-q26DcuQy8WRtD_FBjKX18yj4OmEQCgXNN4,3880
4
+ docs/renderer.py,sha256=81koPdYk2jML0HISZ7jc8UFCwxuh82j0o81ArupaIdI,9073
5
+ docs/router.py,sha256=2QUFCOR_qLfwtZOti1e0iY5gCVzi0DeKlKoOMhEw_sc,5324
6
+ docs/search.py,sha256=hdqjZF0brqHeqjVmY9rG3McUI2BSX8VcGl8mC1uFjMs,5242
7
+ docs/service.py,sha256=yedmJjUrLMSSYOPPLSmV5k1ht35FRovvkmgxB-JnOVo,7914
8
+ docs/store.py,sha256=f07Mtswg6LYtoRCH-GlK_ZFkBNCBX_ye4JnFzCiyP44,5223
9
+ msaas_docs-0.1.0.dist-info/METADATA,sha256=5FJlMmzbl0goONQRsRYqARQculG_uVcd8PQMpqUq0Rc,555
10
+ msaas_docs-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
11
+ msaas_docs-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any