msaas-docs 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- msaas_docs-0.1.0/.gitignore +21 -0
- msaas_docs-0.1.0/PKG-INFO +16 -0
- msaas_docs-0.1.0/pyproject.toml +35 -0
- msaas_docs-0.1.0/src/docs/__init__.py +34 -0
- msaas_docs-0.1.0/src/docs/config.py +51 -0
- msaas_docs-0.1.0/src/docs/models.py +162 -0
- msaas_docs-0.1.0/src/docs/renderer.py +247 -0
- msaas_docs-0.1.0/src/docs/router.py +153 -0
- msaas_docs-0.1.0/src/docs/search.py +159 -0
- msaas_docs-0.1.0/src/docs/service.py +232 -0
- msaas_docs-0.1.0/src/docs/store.py +153 -0
- msaas_docs-0.1.0/tests/__init__.py +0 -0
- msaas_docs-0.1.0/tests/conftest.py +97 -0
- msaas_docs-0.1.0/tests/test_models.py +202 -0
- msaas_docs-0.1.0/tests/test_renderer.py +194 -0
- msaas_docs-0.1.0/tests/test_router.py +195 -0
- msaas_docs-0.1.0/tests/test_search.py +187 -0
- msaas_docs-0.1.0/tests/test_service.py +273 -0
- msaas_docs-0.1.0/tests/test_store.py +184 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
node_modules/
|
|
2
|
+
dist/
|
|
3
|
+
.next/
|
|
4
|
+
.turbo/
|
|
5
|
+
*.pyc
|
|
6
|
+
__pycache__/
|
|
7
|
+
.venv/
|
|
8
|
+
*.egg-info/
|
|
9
|
+
.pytest_cache/
|
|
10
|
+
.ruff_cache/
|
|
11
|
+
.env
|
|
12
|
+
.env.local
|
|
13
|
+
.env.*.local
|
|
14
|
+
.DS_Store
|
|
15
|
+
coverage/
|
|
16
|
+
|
|
17
|
+
# Runtime artifacts
|
|
18
|
+
logs_llm/
|
|
19
|
+
vectors.db
|
|
20
|
+
vectors.db-shm
|
|
21
|
+
vectors.db-wal
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: msaas-docs
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Knowledge base and documentation system for SaaS products
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Requires-Dist: msaas-api-core
|
|
7
|
+
Requires-Dist: msaas-errors
|
|
8
|
+
Requires-Dist: pydantic>=2.0
|
|
9
|
+
Provides-Extra: dev
|
|
10
|
+
Requires-Dist: fastapi>=0.110.0; extra == 'dev'
|
|
11
|
+
Requires-Dist: httpx>=0.27; extra == 'dev'
|
|
12
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
|
|
13
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
14
|
+
Requires-Dist: ruff>=0.8; extra == 'dev'
|
|
15
|
+
Provides-Extra: fastapi
|
|
16
|
+
Requires-Dist: fastapi>=0.110.0; extra == 'fastapi'
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "msaas-docs"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Knowledge base and documentation system for SaaS products"
|
|
5
|
+
requires-python = ">=3.12"
|
|
6
|
+
dependencies = [
|
|
7
|
+
"msaas-api-core",
|
|
8
|
+
"msaas-errors","pydantic>=2.0"
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
[project.optional-dependencies]
|
|
12
|
+
fastapi = ["fastapi>=0.110.0"]
|
|
13
|
+
dev = ["pytest>=8.0", "pytest-asyncio>=0.24", "httpx>=0.27", "fastapi>=0.110.0", "ruff>=0.8"]
|
|
14
|
+
|
|
15
|
+
[build-system]
|
|
16
|
+
requires = ["hatchling"]
|
|
17
|
+
build-backend = "hatchling.build"
|
|
18
|
+
|
|
19
|
+
[tool.hatch.build.targets.wheel]
|
|
20
|
+
packages = ["src/docs"]
|
|
21
|
+
|
|
22
|
+
[tool.ruff]
|
|
23
|
+
target-version = "py312"
|
|
24
|
+
line-length = 100
|
|
25
|
+
|
|
26
|
+
[tool.ruff.lint]
|
|
27
|
+
select = ["E", "F", "I", "N", "W", "UP", "B", "SIM", "TCH"]
|
|
28
|
+
|
|
29
|
+
[tool.pytest.ini_options]
|
|
30
|
+
testpaths = ["tests"]
|
|
31
|
+
asyncio_mode = "auto"
|
|
32
|
+
|
|
33
|
+
[tool.uv.sources]
|
|
34
|
+
msaas-api-core = { workspace = true }
|
|
35
|
+
msaas-errors = { workspace = true }
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Knowledge base and documentation system for SaaS products."""
|
|
2
|
+
|
|
3
|
+
from docs.config import DocsConfig, get_docs, init_docs
|
|
4
|
+
from docs.models import (
|
|
5
|
+
DocCategory,
|
|
6
|
+
DocPage,
|
|
7
|
+
DocSearchResult,
|
|
8
|
+
DocTree,
|
|
9
|
+
DocVersion,
|
|
10
|
+
PageStatus,
|
|
11
|
+
)
|
|
12
|
+
from docs.renderer import DocRenderer
|
|
13
|
+
from docs.router import create_docs_router
|
|
14
|
+
from docs.search import DocSearch
|
|
15
|
+
from docs.service import DocsService
|
|
16
|
+
from docs.store import DocsStore, InMemoryStore
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"DocCategory",
|
|
20
|
+
"DocPage",
|
|
21
|
+
"DocRenderer",
|
|
22
|
+
"DocSearch",
|
|
23
|
+
"DocSearchResult",
|
|
24
|
+
"DocTree",
|
|
25
|
+
"DocVersion",
|
|
26
|
+
"DocsConfig",
|
|
27
|
+
"DocsService",
|
|
28
|
+
"DocsStore",
|
|
29
|
+
"InMemoryStore",
|
|
30
|
+
"PageStatus",
|
|
31
|
+
"create_docs_router",
|
|
32
|
+
"get_docs",
|
|
33
|
+
"init_docs",
|
|
34
|
+
]
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Global configuration and singleton access for the docs module."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from docs.service import DocsService
|
|
10
|
+
|
|
11
|
+
_instance: DocsService | None = None
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True, slots=True)
|
|
15
|
+
class DocsConfig:
|
|
16
|
+
"""Configuration for the docs module."""
|
|
17
|
+
|
|
18
|
+
base_path: str = "/docs"
|
|
19
|
+
enable_versioning: bool = True
|
|
20
|
+
enable_search: bool = True
|
|
21
|
+
default_locale: str = "en"
|
|
22
|
+
max_versions: int = 100
|
|
23
|
+
search_snippet_length: int = 160
|
|
24
|
+
extra: dict[str, object] = field(default_factory=dict)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def init_docs(config: DocsConfig | None = None) -> DocsService:
|
|
28
|
+
"""Initialize the global DocsService singleton."""
|
|
29
|
+
global _instance # noqa: PLW0603
|
|
30
|
+
from docs.search import DocSearch
|
|
31
|
+
from docs.service import DocsService
|
|
32
|
+
from docs.store import InMemoryStore
|
|
33
|
+
|
|
34
|
+
cfg = config or DocsConfig()
|
|
35
|
+
store = InMemoryStore()
|
|
36
|
+
search = DocSearch(snippet_length=cfg.search_snippet_length) if cfg.enable_search else None
|
|
37
|
+
_instance = DocsService(config=cfg, store=store, search=search)
|
|
38
|
+
return _instance
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_docs() -> DocsService:
|
|
42
|
+
"""Return the global DocsService. Raises if not initialized."""
|
|
43
|
+
if _instance is None:
|
|
44
|
+
raise RuntimeError("docs module not initialized -- call init_docs() first")
|
|
45
|
+
return _instance
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def reset_docs() -> None:
|
|
49
|
+
"""Reset the global singleton (useful for tests)."""
|
|
50
|
+
global _instance # noqa: PLW0603
|
|
51
|
+
_instance = None
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""Domain models for the docs module."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
import uuid
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from enum import StrEnum
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PageStatus(StrEnum):
|
|
14
|
+
"""Publication lifecycle status for a documentation page."""
|
|
15
|
+
|
|
16
|
+
DRAFT = "draft"
|
|
17
|
+
PUBLISHED = "published"
|
|
18
|
+
ARCHIVED = "archived"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _generate_id() -> str:
|
|
22
|
+
return uuid.uuid4().hex[:16]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _now() -> datetime:
|
|
26
|
+
return datetime.now(timezone.utc)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def slugify(text: str) -> str:
|
|
30
|
+
"""Convert text to a URL-friendly slug."""
|
|
31
|
+
text = text.lower().strip()
|
|
32
|
+
text = re.sub(r"[^\w\s-]", "", text)
|
|
33
|
+
text = re.sub(r"[\s_]+", "-", text)
|
|
34
|
+
return re.sub(r"-+", "-", text).strip("-")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
# Core models
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class DocCategory(BaseModel):
|
|
43
|
+
"""A category that groups documentation pages."""
|
|
44
|
+
|
|
45
|
+
id: str = Field(default_factory=_generate_id)
|
|
46
|
+
name: str
|
|
47
|
+
slug: str = ""
|
|
48
|
+
description: str = ""
|
|
49
|
+
order: int = 0
|
|
50
|
+
parent_id: str | None = None
|
|
51
|
+
|
|
52
|
+
def model_post_init(self, _context: object) -> None:
|
|
53
|
+
if not self.slug:
|
|
54
|
+
self.slug = slugify(self.name)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class DocPage(BaseModel):
|
|
58
|
+
"""A single documentation page."""
|
|
59
|
+
|
|
60
|
+
id: str = Field(default_factory=_generate_id)
|
|
61
|
+
slug: str = ""
|
|
62
|
+
title: str
|
|
63
|
+
content_markdown: str = ""
|
|
64
|
+
content_html: str = ""
|
|
65
|
+
parent_id: str | None = None
|
|
66
|
+
order: int = 0
|
|
67
|
+
category: str = ""
|
|
68
|
+
tags: list[str] = Field(default_factory=list)
|
|
69
|
+
author_id: str = ""
|
|
70
|
+
status: PageStatus = PageStatus.DRAFT
|
|
71
|
+
version: int = 1
|
|
72
|
+
locale: str = "en"
|
|
73
|
+
created_at: datetime = Field(default_factory=_now)
|
|
74
|
+
updated_at: datetime = Field(default_factory=_now)
|
|
75
|
+
published_at: datetime | None = None
|
|
76
|
+
|
|
77
|
+
def model_post_init(self, _context: object) -> None:
|
|
78
|
+
if not self.slug:
|
|
79
|
+
self.slug = slugify(self.title)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class DocVersion(BaseModel):
|
|
83
|
+
"""An immutable snapshot of a page at a specific version."""
|
|
84
|
+
|
|
85
|
+
page_id: str
|
|
86
|
+
version: int
|
|
87
|
+
content_markdown: str
|
|
88
|
+
author_id: str = ""
|
|
89
|
+
message: str = ""
|
|
90
|
+
created_at: datetime = Field(default_factory=_now)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class DocSearchResult(BaseModel):
|
|
94
|
+
"""A single search hit."""
|
|
95
|
+
|
|
96
|
+
page_id: str
|
|
97
|
+
title: str
|
|
98
|
+
slug: str
|
|
99
|
+
snippet: str = ""
|
|
100
|
+
score: float = 0.0
|
|
101
|
+
category: str = ""
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class DocTreeNode(BaseModel):
|
|
105
|
+
"""A node in the navigation tree (page with optional children)."""
|
|
106
|
+
|
|
107
|
+
page_id: str
|
|
108
|
+
title: str
|
|
109
|
+
slug: str
|
|
110
|
+
order: int = 0
|
|
111
|
+
children: list[DocTreeNode] = Field(default_factory=list)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class DocTree(BaseModel):
|
|
115
|
+
"""Hierarchical navigation tree, optionally scoped to a category."""
|
|
116
|
+
|
|
117
|
+
category: str = ""
|
|
118
|
+
pages: list[DocTreeNode] = Field(default_factory=list)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
# ---------------------------------------------------------------------------
|
|
122
|
+
# API request / response helpers
|
|
123
|
+
# ---------------------------------------------------------------------------
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class CreatePageRequest(BaseModel):
|
|
127
|
+
title: str
|
|
128
|
+
content_markdown: str = ""
|
|
129
|
+
category: str = ""
|
|
130
|
+
parent_id: str | None = None
|
|
131
|
+
tags: list[str] = Field(default_factory=list)
|
|
132
|
+
author_id: str = ""
|
|
133
|
+
locale: str = "en"
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class UpdatePageRequest(BaseModel):
|
|
137
|
+
title: str | None = None
|
|
138
|
+
content_markdown: str | None = None
|
|
139
|
+
category: str | None = None
|
|
140
|
+
parent_id: str | None = None
|
|
141
|
+
tags: list[str] | None = None
|
|
142
|
+
order: int | None = None
|
|
143
|
+
locale: str | None = None
|
|
144
|
+
version_message: str = ""
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class RevertRequest(BaseModel):
|
|
148
|
+
version: int
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class CreateCategoryRequest(BaseModel):
|
|
152
|
+
name: str
|
|
153
|
+
description: str = ""
|
|
154
|
+
order: int = 0
|
|
155
|
+
parent_id: str | None = None
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class PaginatedPages(BaseModel):
|
|
159
|
+
items: list[DocPage]
|
|
160
|
+
total: int
|
|
161
|
+
page: int
|
|
162
|
+
per_page: int
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
"""Markdown-to-HTML renderer and HTML utilities.
|
|
2
|
+
|
|
3
|
+
Provides a lightweight, dependency-free renderer covering the most common
|
|
4
|
+
Markdown constructs: headings, paragraphs, bold, italic, inline code,
|
|
5
|
+
code blocks, links, images, unordered/ordered lists, blockquotes,
|
|
6
|
+
horizontal rules, and tables.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import html
|
|
12
|
+
import re
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class TocEntry:
|
|
18
|
+
"""A single heading in a table of contents."""
|
|
19
|
+
|
|
20
|
+
level: int
|
|
21
|
+
text: str
|
|
22
|
+
anchor: str
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class DocRenderer:
|
|
27
|
+
"""Stateless Markdown renderer with TOC/text extraction helpers."""
|
|
28
|
+
|
|
29
|
+
heading_prefix: str = "doc-"
|
|
30
|
+
_anchor_counts: dict[str, int] = field(default_factory=dict, repr=False)
|
|
31
|
+
|
|
32
|
+
# ------------------------------------------------------------------
|
|
33
|
+
# Public API
|
|
34
|
+
# ------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
def render_markdown(self, content: str) -> str:
|
|
37
|
+
"""Convert Markdown text to HTML."""
|
|
38
|
+
self._anchor_counts = {}
|
|
39
|
+
lines = content.split("\n")
|
|
40
|
+
html_parts: list[str] = []
|
|
41
|
+
i = 0
|
|
42
|
+
while i < len(lines):
|
|
43
|
+
line = lines[i]
|
|
44
|
+
|
|
45
|
+
# Fenced code block
|
|
46
|
+
if line.strip().startswith("```"):
|
|
47
|
+
block, i = self._parse_code_block(lines, i)
|
|
48
|
+
html_parts.append(block)
|
|
49
|
+
continue
|
|
50
|
+
|
|
51
|
+
# Table
|
|
52
|
+
if i + 1 < len(lines) and re.match(r"^\|.*\|$", line.strip()) and re.match(
|
|
53
|
+
r"^\|[\s\-:|]+\|$", lines[i + 1].strip()
|
|
54
|
+
):
|
|
55
|
+
table, i = self._parse_table(lines, i)
|
|
56
|
+
html_parts.append(table)
|
|
57
|
+
continue
|
|
58
|
+
|
|
59
|
+
# Heading
|
|
60
|
+
if m := re.match(r"^(#{1,6})\s+(.+)$", line):
|
|
61
|
+
level = len(m.group(1))
|
|
62
|
+
text = self._inline(m.group(2))
|
|
63
|
+
anchor = self._make_anchor(text)
|
|
64
|
+
html_parts.append(
|
|
65
|
+
f'<h{level} id="{anchor}">{text}</h{level}>'
|
|
66
|
+
)
|
|
67
|
+
i += 1
|
|
68
|
+
continue
|
|
69
|
+
|
|
70
|
+
# Horizontal rule
|
|
71
|
+
if re.match(r"^(\*{3,}|-{3,}|_{3,})\s*$", line.strip()):
|
|
72
|
+
html_parts.append("<hr>")
|
|
73
|
+
i += 1
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
# Blockquote
|
|
77
|
+
if line.strip().startswith(">"):
|
|
78
|
+
block, i = self._parse_blockquote(lines, i)
|
|
79
|
+
html_parts.append(block)
|
|
80
|
+
continue
|
|
81
|
+
|
|
82
|
+
# Unordered list
|
|
83
|
+
if re.match(r"^[\s]*[-*+]\s+", line):
|
|
84
|
+
block, i = self._parse_unordered_list(lines, i)
|
|
85
|
+
html_parts.append(block)
|
|
86
|
+
continue
|
|
87
|
+
|
|
88
|
+
# Ordered list
|
|
89
|
+
if re.match(r"^[\s]*\d+\.\s+", line):
|
|
90
|
+
block, i = self._parse_ordered_list(lines, i)
|
|
91
|
+
html_parts.append(block)
|
|
92
|
+
continue
|
|
93
|
+
|
|
94
|
+
# Blank line
|
|
95
|
+
if not line.strip():
|
|
96
|
+
i += 1
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
# Paragraph
|
|
100
|
+
para_lines: list[str] = []
|
|
101
|
+
while i < len(lines) and lines[i].strip() and not self._is_block_start(lines, i):
|
|
102
|
+
para_lines.append(lines[i])
|
|
103
|
+
i += 1
|
|
104
|
+
html_parts.append(f"<p>{self._inline(' '.join(para_lines))}</p>")
|
|
105
|
+
|
|
106
|
+
return "\n".join(html_parts)
|
|
107
|
+
|
|
108
|
+
def extract_toc(self, rendered_html: str) -> list[TocEntry]:
|
|
109
|
+
"""Extract table-of-contents entries from rendered HTML."""
|
|
110
|
+
entries: list[TocEntry] = []
|
|
111
|
+
for m in re.finditer(r'<h(\d)\s+id="([^"]+)">(.*?)</h\1>', rendered_html):
|
|
112
|
+
entries.append(
|
|
113
|
+
TocEntry(
|
|
114
|
+
level=int(m.group(1)),
|
|
115
|
+
text=self._strip_tags(m.group(3)),
|
|
116
|
+
anchor=m.group(2),
|
|
117
|
+
)
|
|
118
|
+
)
|
|
119
|
+
return entries
|
|
120
|
+
|
|
121
|
+
def extract_text(self, rendered_html: str) -> str:
|
|
122
|
+
"""Strip all HTML tags and return plain text for indexing."""
|
|
123
|
+
text = re.sub(r"<[^>]+>", " ", rendered_html)
|
|
124
|
+
text = html.unescape(text)
|
|
125
|
+
return re.sub(r"\s+", " ", text).strip()
|
|
126
|
+
|
|
127
|
+
# ------------------------------------------------------------------
|
|
128
|
+
# Block parsers
|
|
129
|
+
# ------------------------------------------------------------------
|
|
130
|
+
|
|
131
|
+
def _parse_code_block(self, lines: list[str], start: int) -> tuple[str, int]:
|
|
132
|
+
opening = lines[start].strip()
|
|
133
|
+
lang = opening.lstrip("`").strip()
|
|
134
|
+
i = start + 1
|
|
135
|
+
code_lines: list[str] = []
|
|
136
|
+
while i < len(lines):
|
|
137
|
+
if lines[i].strip() == "```":
|
|
138
|
+
i += 1
|
|
139
|
+
break
|
|
140
|
+
code_lines.append(html.escape(lines[i]))
|
|
141
|
+
i += 1
|
|
142
|
+
lang_attr = f' class="language-{lang}"' if lang else ""
|
|
143
|
+
code = "\n".join(code_lines)
|
|
144
|
+
return f"<pre><code{lang_attr}>{code}</code></pre>", i
|
|
145
|
+
|
|
146
|
+
def _parse_table(self, lines: list[str], start: int) -> tuple[str, int]:
|
|
147
|
+
header_cells = [c.strip() for c in lines[start].strip().strip("|").split("|")]
|
|
148
|
+
i = start + 2 # skip separator
|
|
149
|
+
rows: list[list[str]] = []
|
|
150
|
+
while i < len(lines) and re.match(r"^\|.*\|$", lines[i].strip()):
|
|
151
|
+
cells = [c.strip() for c in lines[i].strip().strip("|").split("|")]
|
|
152
|
+
rows.append(cells)
|
|
153
|
+
i += 1
|
|
154
|
+
parts = ["<table>", "<thead><tr>"]
|
|
155
|
+
for cell in header_cells:
|
|
156
|
+
parts.append(f"<th>{self._inline(cell)}</th>")
|
|
157
|
+
parts.append("</tr></thead>")
|
|
158
|
+
if rows:
|
|
159
|
+
parts.append("<tbody>")
|
|
160
|
+
for row in rows:
|
|
161
|
+
parts.append("<tr>")
|
|
162
|
+
for cell in row:
|
|
163
|
+
parts.append(f"<td>{self._inline(cell)}</td>")
|
|
164
|
+
parts.append("</tr>")
|
|
165
|
+
parts.append("</tbody>")
|
|
166
|
+
parts.append("</table>")
|
|
167
|
+
return "".join(parts), i
|
|
168
|
+
|
|
169
|
+
def _parse_blockquote(self, lines: list[str], start: int) -> tuple[str, int]:
|
|
170
|
+
i = start
|
|
171
|
+
content_lines: list[str] = []
|
|
172
|
+
while i < len(lines) and lines[i].strip().startswith(">"):
|
|
173
|
+
content_lines.append(re.sub(r"^>\s?", "", lines[i]))
|
|
174
|
+
i += 1
|
|
175
|
+
inner = self._inline(" ".join(content_lines))
|
|
176
|
+
return f"<blockquote><p>{inner}</p></blockquote>", i
|
|
177
|
+
|
|
178
|
+
def _parse_unordered_list(self, lines: list[str], start: int) -> tuple[str, int]:
|
|
179
|
+
i = start
|
|
180
|
+
items: list[str] = []
|
|
181
|
+
while i < len(lines) and re.match(r"^[\s]*[-*+]\s+", lines[i]):
|
|
182
|
+
text = re.sub(r"^[\s]*[-*+]\s+", "", lines[i])
|
|
183
|
+
items.append(f"<li>{self._inline(text)}</li>")
|
|
184
|
+
i += 1
|
|
185
|
+
return "<ul>" + "".join(items) + "</ul>", i
|
|
186
|
+
|
|
187
|
+
def _parse_ordered_list(self, lines: list[str], start: int) -> tuple[str, int]:
|
|
188
|
+
i = start
|
|
189
|
+
items: list[str] = []
|
|
190
|
+
while i < len(lines) and re.match(r"^[\s]*\d+\.\s+", lines[i]):
|
|
191
|
+
text = re.sub(r"^[\s]*\d+\.\s+", "", lines[i])
|
|
192
|
+
items.append(f"<li>{self._inline(text)}</li>")
|
|
193
|
+
i += 1
|
|
194
|
+
return "<ol>" + "".join(items) + "</ol>", i
|
|
195
|
+
|
|
196
|
+
# ------------------------------------------------------------------
|
|
197
|
+
# Inline formatting
|
|
198
|
+
# ------------------------------------------------------------------
|
|
199
|
+
|
|
200
|
+
def _inline(self, text: str) -> str:
|
|
201
|
+
"""Apply inline Markdown transformations."""
|
|
202
|
+
# Inline code (must come before bold/italic to avoid conflicts)
|
|
203
|
+
text = re.sub(r"`([^`]+)`", r"<code>\1</code>", text)
|
|
204
|
+
# Images
|
|
205
|
+
text = re.sub(r"!\[([^\]]*)\]\(([^)]+)\)", r'<img src="\2" alt="\1">', text)
|
|
206
|
+
# Links
|
|
207
|
+
text = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r'<a href="\2">\1</a>', text)
|
|
208
|
+
# Bold + italic
|
|
209
|
+
text = re.sub(r"\*\*\*(.+?)\*\*\*", r"<strong><em>\1</em></strong>", text)
|
|
210
|
+
# Bold
|
|
211
|
+
text = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", text)
|
|
212
|
+
# Italic
|
|
213
|
+
text = re.sub(r"\*(.+?)\*", r"<em>\1</em>", text)
|
|
214
|
+
return text
|
|
215
|
+
|
|
216
|
+
# ------------------------------------------------------------------
|
|
217
|
+
# Helpers
|
|
218
|
+
# ------------------------------------------------------------------
|
|
219
|
+
|
|
220
|
+
def _is_block_start(self, lines: list[str], i: int) -> bool:
|
|
221
|
+
line = lines[i]
|
|
222
|
+
if re.match(r"^#{1,6}\s+", line):
|
|
223
|
+
return True
|
|
224
|
+
if line.strip().startswith("```"):
|
|
225
|
+
return True
|
|
226
|
+
if re.match(r"^[\s]*[-*+]\s+", line):
|
|
227
|
+
return True
|
|
228
|
+
if re.match(r"^[\s]*\d+\.\s+", line):
|
|
229
|
+
return True
|
|
230
|
+
if line.strip().startswith(">"):
|
|
231
|
+
return True
|
|
232
|
+
if re.match(r"^(\*{3,}|-{3,}|_{3,})\s*$", line.strip()):
|
|
233
|
+
return True
|
|
234
|
+
return False
|
|
235
|
+
|
|
236
|
+
def _make_anchor(self, text: str) -> str:
|
|
237
|
+
plain = self._strip_tags(text).lower()
|
|
238
|
+
slug = re.sub(r"[^\w\s-]", "", plain)
|
|
239
|
+
slug = re.sub(r"[\s]+", "-", slug).strip("-")
|
|
240
|
+
base = f"{self.heading_prefix}{slug}"
|
|
241
|
+
count = self._anchor_counts.get(base, 0)
|
|
242
|
+
self._anchor_counts[base] = count + 1
|
|
243
|
+
return base if count == 0 else f"{base}-{count}"
|
|
244
|
+
|
|
245
|
+
@staticmethod
|
|
246
|
+
def _strip_tags(text: str) -> str:
|
|
247
|
+
return re.sub(r"<[^>]+>", "", text)
|