msaas-docs 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ node_modules/
2
+ dist/
3
+ .next/
4
+ .turbo/
5
+ *.pyc
6
+ __pycache__/
7
+ .venv/
8
+ *.egg-info/
9
+ .pytest_cache/
10
+ .ruff_cache/
11
+ .env
12
+ .env.local
13
+ .env.*.local
14
+ .DS_Store
15
+ coverage/
16
+
17
+ # Runtime artifacts
18
+ logs_llm/
19
+ vectors.db
20
+ vectors.db-shm
21
+ vectors.db-wal
@@ -0,0 +1,16 @@
1
+ Metadata-Version: 2.4
2
+ Name: msaas-docs
3
+ Version: 0.1.0
4
+ Summary: Knowledge base and documentation system for SaaS products
5
+ Requires-Python: >=3.12
6
+ Requires-Dist: msaas-api-core
7
+ Requires-Dist: msaas-errors
8
+ Requires-Dist: pydantic>=2.0
9
+ Provides-Extra: dev
10
+ Requires-Dist: fastapi>=0.110.0; extra == 'dev'
11
+ Requires-Dist: httpx>=0.27; extra == 'dev'
12
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
13
+ Requires-Dist: pytest>=8.0; extra == 'dev'
14
+ Requires-Dist: ruff>=0.8; extra == 'dev'
15
+ Provides-Extra: fastapi
16
+ Requires-Dist: fastapi>=0.110.0; extra == 'fastapi'
@@ -0,0 +1,35 @@
1
+ [project]
2
+ name = "msaas-docs"
3
+ version = "0.1.0"
4
+ description = "Knowledge base and documentation system for SaaS products"
5
+ requires-python = ">=3.12"
6
+ dependencies = [
7
+ "msaas-api-core",
8
+ "msaas-errors","pydantic>=2.0"
9
+ ]
10
+
11
+ [project.optional-dependencies]
12
+ fastapi = ["fastapi>=0.110.0"]
13
+ dev = ["pytest>=8.0", "pytest-asyncio>=0.24", "httpx>=0.27", "fastapi>=0.110.0", "ruff>=0.8"]
14
+
15
+ [build-system]
16
+ requires = ["hatchling"]
17
+ build-backend = "hatchling.build"
18
+
19
+ [tool.hatch.build.targets.wheel]
20
+ packages = ["src/docs"]
21
+
22
+ [tool.ruff]
23
+ target-version = "py312"
24
+ line-length = 100
25
+
26
+ [tool.ruff.lint]
27
+ select = ["E", "F", "I", "N", "W", "UP", "B", "SIM", "TCH"]
28
+
29
+ [tool.pytest.ini_options]
30
+ testpaths = ["tests"]
31
+ asyncio_mode = "auto"
32
+
33
+ [tool.uv.sources]
34
+ msaas-api-core = { workspace = true }
35
+ msaas-errors = { workspace = true }
@@ -0,0 +1,34 @@
1
+ """Knowledge base and documentation system for SaaS products."""
2
+
3
+ from docs.config import DocsConfig, get_docs, init_docs
4
+ from docs.models import (
5
+ DocCategory,
6
+ DocPage,
7
+ DocSearchResult,
8
+ DocTree,
9
+ DocVersion,
10
+ PageStatus,
11
+ )
12
+ from docs.renderer import DocRenderer
13
+ from docs.router import create_docs_router
14
+ from docs.search import DocSearch
15
+ from docs.service import DocsService
16
+ from docs.store import DocsStore, InMemoryStore
17
+
18
+ __all__ = [
19
+ "DocCategory",
20
+ "DocPage",
21
+ "DocRenderer",
22
+ "DocSearch",
23
+ "DocSearchResult",
24
+ "DocTree",
25
+ "DocVersion",
26
+ "DocsConfig",
27
+ "DocsService",
28
+ "DocsStore",
29
+ "InMemoryStore",
30
+ "PageStatus",
31
+ "create_docs_router",
32
+ "get_docs",
33
+ "init_docs",
34
+ ]
@@ -0,0 +1,51 @@
1
+ """Global configuration and singleton access for the docs module."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import TYPE_CHECKING
7
+
8
+ if TYPE_CHECKING:
9
+ from docs.service import DocsService
10
+
11
+ _instance: DocsService | None = None
12
+
13
+
14
+ @dataclass(frozen=True, slots=True)
15
+ class DocsConfig:
16
+ """Configuration for the docs module."""
17
+
18
+ base_path: str = "/docs"
19
+ enable_versioning: bool = True
20
+ enable_search: bool = True
21
+ default_locale: str = "en"
22
+ max_versions: int = 100
23
+ search_snippet_length: int = 160
24
+ extra: dict[str, object] = field(default_factory=dict)
25
+
26
+
27
+ def init_docs(config: DocsConfig | None = None) -> DocsService:
28
+ """Initialize the global DocsService singleton."""
29
+ global _instance # noqa: PLW0603
30
+ from docs.search import DocSearch
31
+ from docs.service import DocsService
32
+ from docs.store import InMemoryStore
33
+
34
+ cfg = config or DocsConfig()
35
+ store = InMemoryStore()
36
+ search = DocSearch(snippet_length=cfg.search_snippet_length) if cfg.enable_search else None
37
+ _instance = DocsService(config=cfg, store=store, search=search)
38
+ return _instance
39
+
40
+
41
+ def get_docs() -> DocsService:
42
+ """Return the global DocsService. Raises if not initialized."""
43
+ if _instance is None:
44
+ raise RuntimeError("docs module not initialized -- call init_docs() first")
45
+ return _instance
46
+
47
+
48
+ def reset_docs() -> None:
49
+ """Reset the global singleton (useful for tests)."""
50
+ global _instance # noqa: PLW0603
51
+ _instance = None
@@ -0,0 +1,162 @@
1
+ """Domain models for the docs module."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ import uuid
7
+ from datetime import datetime, timezone
8
+ from enum import StrEnum
9
+
10
+ from pydantic import BaseModel, Field
11
+
12
+
13
+ class PageStatus(StrEnum):
14
+ """Publication lifecycle status for a documentation page."""
15
+
16
+ DRAFT = "draft"
17
+ PUBLISHED = "published"
18
+ ARCHIVED = "archived"
19
+
20
+
21
+ def _generate_id() -> str:
22
+ return uuid.uuid4().hex[:16]
23
+
24
+
25
+ def _now() -> datetime:
26
+ return datetime.now(timezone.utc)
27
+
28
+
29
+ def slugify(text: str) -> str:
30
+ """Convert text to a URL-friendly slug."""
31
+ text = text.lower().strip()
32
+ text = re.sub(r"[^\w\s-]", "", text)
33
+ text = re.sub(r"[\s_]+", "-", text)
34
+ return re.sub(r"-+", "-", text).strip("-")
35
+
36
+
37
+ # ---------------------------------------------------------------------------
38
+ # Core models
39
+ # ---------------------------------------------------------------------------
40
+
41
+
42
+ class DocCategory(BaseModel):
43
+ """A category that groups documentation pages."""
44
+
45
+ id: str = Field(default_factory=_generate_id)
46
+ name: str
47
+ slug: str = ""
48
+ description: str = ""
49
+ order: int = 0
50
+ parent_id: str | None = None
51
+
52
+ def model_post_init(self, _context: object) -> None:
53
+ if not self.slug:
54
+ self.slug = slugify(self.name)
55
+
56
+
57
+ class DocPage(BaseModel):
58
+ """A single documentation page."""
59
+
60
+ id: str = Field(default_factory=_generate_id)
61
+ slug: str = ""
62
+ title: str
63
+ content_markdown: str = ""
64
+ content_html: str = ""
65
+ parent_id: str | None = None
66
+ order: int = 0
67
+ category: str = ""
68
+ tags: list[str] = Field(default_factory=list)
69
+ author_id: str = ""
70
+ status: PageStatus = PageStatus.DRAFT
71
+ version: int = 1
72
+ locale: str = "en"
73
+ created_at: datetime = Field(default_factory=_now)
74
+ updated_at: datetime = Field(default_factory=_now)
75
+ published_at: datetime | None = None
76
+
77
+ def model_post_init(self, _context: object) -> None:
78
+ if not self.slug:
79
+ self.slug = slugify(self.title)
80
+
81
+
82
+ class DocVersion(BaseModel):
83
+ """An immutable snapshot of a page at a specific version."""
84
+
85
+ page_id: str
86
+ version: int
87
+ content_markdown: str
88
+ author_id: str = ""
89
+ message: str = ""
90
+ created_at: datetime = Field(default_factory=_now)
91
+
92
+
93
+ class DocSearchResult(BaseModel):
94
+ """A single search hit."""
95
+
96
+ page_id: str
97
+ title: str
98
+ slug: str
99
+ snippet: str = ""
100
+ score: float = 0.0
101
+ category: str = ""
102
+
103
+
104
+ class DocTreeNode(BaseModel):
105
+ """A node in the navigation tree (page with optional children)."""
106
+
107
+ page_id: str
108
+ title: str
109
+ slug: str
110
+ order: int = 0
111
+ children: list[DocTreeNode] = Field(default_factory=list)
112
+
113
+
114
+ class DocTree(BaseModel):
115
+ """Hierarchical navigation tree, optionally scoped to a category."""
116
+
117
+ category: str = ""
118
+ pages: list[DocTreeNode] = Field(default_factory=list)
119
+
120
+
121
+ # ---------------------------------------------------------------------------
122
+ # API request / response helpers
123
+ # ---------------------------------------------------------------------------
124
+
125
+
126
+ class CreatePageRequest(BaseModel):
127
+ title: str
128
+ content_markdown: str = ""
129
+ category: str = ""
130
+ parent_id: str | None = None
131
+ tags: list[str] = Field(default_factory=list)
132
+ author_id: str = ""
133
+ locale: str = "en"
134
+
135
+
136
+ class UpdatePageRequest(BaseModel):
137
+ title: str | None = None
138
+ content_markdown: str | None = None
139
+ category: str | None = None
140
+ parent_id: str | None = None
141
+ tags: list[str] | None = None
142
+ order: int | None = None
143
+ locale: str | None = None
144
+ version_message: str = ""
145
+
146
+
147
+ class RevertRequest(BaseModel):
148
+ version: int
149
+
150
+
151
+ class CreateCategoryRequest(BaseModel):
152
+ name: str
153
+ description: str = ""
154
+ order: int = 0
155
+ parent_id: str | None = None
156
+
157
+
158
+ class PaginatedPages(BaseModel):
159
+ items: list[DocPage]
160
+ total: int
161
+ page: int
162
+ per_page: int
@@ -0,0 +1,247 @@
1
+ """Markdown-to-HTML renderer and HTML utilities.
2
+
3
+ Provides a lightweight, dependency-free renderer covering the most common
4
+ Markdown constructs: headings, paragraphs, bold, italic, inline code,
5
+ code blocks, links, images, unordered/ordered lists, blockquotes,
6
+ horizontal rules, and tables.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import html
12
+ import re
13
+ from dataclasses import dataclass, field
14
+
15
+
16
+ @dataclass
17
+ class TocEntry:
18
+ """A single heading in a table of contents."""
19
+
20
+ level: int
21
+ text: str
22
+ anchor: str
23
+
24
+
25
+ @dataclass
26
+ class DocRenderer:
27
+ """Stateless Markdown renderer with TOC/text extraction helpers."""
28
+
29
+ heading_prefix: str = "doc-"
30
+ _anchor_counts: dict[str, int] = field(default_factory=dict, repr=False)
31
+
32
+ # ------------------------------------------------------------------
33
+ # Public API
34
+ # ------------------------------------------------------------------
35
+
36
+ def render_markdown(self, content: str) -> str:
37
+ """Convert Markdown text to HTML."""
38
+ self._anchor_counts = {}
39
+ lines = content.split("\n")
40
+ html_parts: list[str] = []
41
+ i = 0
42
+ while i < len(lines):
43
+ line = lines[i]
44
+
45
+ # Fenced code block
46
+ if line.strip().startswith("```"):
47
+ block, i = self._parse_code_block(lines, i)
48
+ html_parts.append(block)
49
+ continue
50
+
51
+ # Table
52
+ if i + 1 < len(lines) and re.match(r"^\|.*\|$", line.strip()) and re.match(
53
+ r"^\|[\s\-:|]+\|$", lines[i + 1].strip()
54
+ ):
55
+ table, i = self._parse_table(lines, i)
56
+ html_parts.append(table)
57
+ continue
58
+
59
+ # Heading
60
+ if m := re.match(r"^(#{1,6})\s+(.+)$", line):
61
+ level = len(m.group(1))
62
+ text = self._inline(m.group(2))
63
+ anchor = self._make_anchor(text)
64
+ html_parts.append(
65
+ f'<h{level} id="{anchor}">{text}</h{level}>'
66
+ )
67
+ i += 1
68
+ continue
69
+
70
+ # Horizontal rule
71
+ if re.match(r"^(\*{3,}|-{3,}|_{3,})\s*$", line.strip()):
72
+ html_parts.append("<hr>")
73
+ i += 1
74
+ continue
75
+
76
+ # Blockquote
77
+ if line.strip().startswith(">"):
78
+ block, i = self._parse_blockquote(lines, i)
79
+ html_parts.append(block)
80
+ continue
81
+
82
+ # Unordered list
83
+ if re.match(r"^[\s]*[-*+]\s+", line):
84
+ block, i = self._parse_unordered_list(lines, i)
85
+ html_parts.append(block)
86
+ continue
87
+
88
+ # Ordered list
89
+ if re.match(r"^[\s]*\d+\.\s+", line):
90
+ block, i = self._parse_ordered_list(lines, i)
91
+ html_parts.append(block)
92
+ continue
93
+
94
+ # Blank line
95
+ if not line.strip():
96
+ i += 1
97
+ continue
98
+
99
+ # Paragraph
100
+ para_lines: list[str] = []
101
+ while i < len(lines) and lines[i].strip() and not self._is_block_start(lines, i):
102
+ para_lines.append(lines[i])
103
+ i += 1
104
+ html_parts.append(f"<p>{self._inline(' '.join(para_lines))}</p>")
105
+
106
+ return "\n".join(html_parts)
107
+
108
+ def extract_toc(self, rendered_html: str) -> list[TocEntry]:
109
+ """Extract table-of-contents entries from rendered HTML."""
110
+ entries: list[TocEntry] = []
111
+ for m in re.finditer(r'<h(\d)\s+id="([^"]+)">(.*?)</h\1>', rendered_html):
112
+ entries.append(
113
+ TocEntry(
114
+ level=int(m.group(1)),
115
+ text=self._strip_tags(m.group(3)),
116
+ anchor=m.group(2),
117
+ )
118
+ )
119
+ return entries
120
+
121
+ def extract_text(self, rendered_html: str) -> str:
122
+ """Strip all HTML tags and return plain text for indexing."""
123
+ text = re.sub(r"<[^>]+>", " ", rendered_html)
124
+ text = html.unescape(text)
125
+ return re.sub(r"\s+", " ", text).strip()
126
+
127
+ # ------------------------------------------------------------------
128
+ # Block parsers
129
+ # ------------------------------------------------------------------
130
+
131
+ def _parse_code_block(self, lines: list[str], start: int) -> tuple[str, int]:
132
+ opening = lines[start].strip()
133
+ lang = opening.lstrip("`").strip()
134
+ i = start + 1
135
+ code_lines: list[str] = []
136
+ while i < len(lines):
137
+ if lines[i].strip() == "```":
138
+ i += 1
139
+ break
140
+ code_lines.append(html.escape(lines[i]))
141
+ i += 1
142
+ lang_attr = f' class="language-{lang}"' if lang else ""
143
+ code = "\n".join(code_lines)
144
+ return f"<pre><code{lang_attr}>{code}</code></pre>", i
145
+
146
+ def _parse_table(self, lines: list[str], start: int) -> tuple[str, int]:
147
+ header_cells = [c.strip() for c in lines[start].strip().strip("|").split("|")]
148
+ i = start + 2 # skip separator
149
+ rows: list[list[str]] = []
150
+ while i < len(lines) and re.match(r"^\|.*\|$", lines[i].strip()):
151
+ cells = [c.strip() for c in lines[i].strip().strip("|").split("|")]
152
+ rows.append(cells)
153
+ i += 1
154
+ parts = ["<table>", "<thead><tr>"]
155
+ for cell in header_cells:
156
+ parts.append(f"<th>{self._inline(cell)}</th>")
157
+ parts.append("</tr></thead>")
158
+ if rows:
159
+ parts.append("<tbody>")
160
+ for row in rows:
161
+ parts.append("<tr>")
162
+ for cell in row:
163
+ parts.append(f"<td>{self._inline(cell)}</td>")
164
+ parts.append("</tr>")
165
+ parts.append("</tbody>")
166
+ parts.append("</table>")
167
+ return "".join(parts), i
168
+
169
+ def _parse_blockquote(self, lines: list[str], start: int) -> tuple[str, int]:
170
+ i = start
171
+ content_lines: list[str] = []
172
+ while i < len(lines) and lines[i].strip().startswith(">"):
173
+ content_lines.append(re.sub(r"^>\s?", "", lines[i]))
174
+ i += 1
175
+ inner = self._inline(" ".join(content_lines))
176
+ return f"<blockquote><p>{inner}</p></blockquote>", i
177
+
178
+ def _parse_unordered_list(self, lines: list[str], start: int) -> tuple[str, int]:
179
+ i = start
180
+ items: list[str] = []
181
+ while i < len(lines) and re.match(r"^[\s]*[-*+]\s+", lines[i]):
182
+ text = re.sub(r"^[\s]*[-*+]\s+", "", lines[i])
183
+ items.append(f"<li>{self._inline(text)}</li>")
184
+ i += 1
185
+ return "<ul>" + "".join(items) + "</ul>", i
186
+
187
+ def _parse_ordered_list(self, lines: list[str], start: int) -> tuple[str, int]:
188
+ i = start
189
+ items: list[str] = []
190
+ while i < len(lines) and re.match(r"^[\s]*\d+\.\s+", lines[i]):
191
+ text = re.sub(r"^[\s]*\d+\.\s+", "", lines[i])
192
+ items.append(f"<li>{self._inline(text)}</li>")
193
+ i += 1
194
+ return "<ol>" + "".join(items) + "</ol>", i
195
+
196
+ # ------------------------------------------------------------------
197
+ # Inline formatting
198
+ # ------------------------------------------------------------------
199
+
200
+ def _inline(self, text: str) -> str:
201
+ """Apply inline Markdown transformations."""
202
+ # Inline code (must come before bold/italic to avoid conflicts)
203
+ text = re.sub(r"`([^`]+)`", r"<code>\1</code>", text)
204
+ # Images
205
+ text = re.sub(r"!\[([^\]]*)\]\(([^)]+)\)", r'<img src="\2" alt="\1">', text)
206
+ # Links
207
+ text = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r'<a href="\2">\1</a>', text)
208
+ # Bold + italic
209
+ text = re.sub(r"\*\*\*(.+?)\*\*\*", r"<strong><em>\1</em></strong>", text)
210
+ # Bold
211
+ text = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", text)
212
+ # Italic
213
+ text = re.sub(r"\*(.+?)\*", r"<em>\1</em>", text)
214
+ return text
215
+
216
+ # ------------------------------------------------------------------
217
+ # Helpers
218
+ # ------------------------------------------------------------------
219
+
220
+ def _is_block_start(self, lines: list[str], i: int) -> bool:
221
+ line = lines[i]
222
+ if re.match(r"^#{1,6}\s+", line):
223
+ return True
224
+ if line.strip().startswith("```"):
225
+ return True
226
+ if re.match(r"^[\s]*[-*+]\s+", line):
227
+ return True
228
+ if re.match(r"^[\s]*\d+\.\s+", line):
229
+ return True
230
+ if line.strip().startswith(">"):
231
+ return True
232
+ if re.match(r"^(\*{3,}|-{3,}|_{3,})\s*$", line.strip()):
233
+ return True
234
+ return False
235
+
236
+ def _make_anchor(self, text: str) -> str:
237
+ plain = self._strip_tags(text).lower()
238
+ slug = re.sub(r"[^\w\s-]", "", plain)
239
+ slug = re.sub(r"[\s]+", "-", slug).strip("-")
240
+ base = f"{self.heading_prefix}{slug}"
241
+ count = self._anchor_counts.get(base, 0)
242
+ self._anchor_counts[base] = count + 1
243
+ return base if count == 0 else f"{base}-{count}"
244
+
245
+ @staticmethod
246
+ def _strip_tags(text: str) -> str:
247
+ return re.sub(r"<[^>]+>", "", text)