mkforge 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mkforge/__init__.py +76 -0
- mkforge/_metadata.py +4 -0
- mkforge/assets.py +387 -0
- mkforge/content/__init__.py +62 -0
- mkforge/content/_base.py +26 -0
- mkforge/content/code.py +39 -0
- mkforge/content/image.py +42 -0
- mkforge/content/lists.py +80 -0
- mkforge/content/misc.py +47 -0
- mkforge/content/paragraph.py +70 -0
- mkforge/content/table.py +161 -0
- mkforge/content/text.py +118 -0
- mkforge/document.py +314 -0
- mkforge/errors.py +88 -0
- mkforge/input_checks.py +93 -0
- mkforge/py.typed +1 -0
- mkforge/rendering.py +513 -0
- mkforge/validation/__init__.py +21 -0
- mkforge/validation/markdown_contracts.py +667 -0
- mkforge/verification/__init__.py +25 -0
- mkforge/verification/api.py +134 -0
- mkforge/verification/diagnostic_pattern.py +41 -0
- mkforge/verification/policy.py +121 -0
- mkforge/verification/registry.py +76 -0
- mkforge/verification/rules/__init__.py +1 -0
- mkforge/verification/rules/gfm/__init__.py +1 -0
- mkforge/verification/rules/gfm/gfm001_table_delimiter.py +97 -0
- mkforge/verification/rules/gfm/gfm002_table_column_count.py +132 -0
- mkforge/verification/rules/gfm/gfm003_task_list_marker.py +52 -0
- mkforge/verification/rules/markdown/__init__.py +1 -0
- mkforge/verification/rules/markdown/_shared.py +607 -0
- mkforge/verification/rules/markdown/markdownlint_remaining.py +139 -0
- mkforge/verification/rules/markdown/md001_heading_increment.py +36 -0
- mkforge/verification/rules/markdown/md002_first_heading_level.py +42 -0
- mkforge/verification/rules/markdown/md003_heading_style.py +40 -0
- mkforge/verification/rules/markdown/md004_unordered_list_style.py +43 -0
- mkforge/verification/rules/markdown/md005_md006_md007_list_indentation.py +112 -0
- mkforge/verification/rules/markdown/md009_trailing_spaces.py +40 -0
- mkforge/verification/rules/markdown/md010_hard_tabs.py +37 -0
- mkforge/verification/rules/markdown/md011_reversed_link_syntax.py +36 -0
- mkforge/verification/rules/markdown/md012_multiple_blank_lines.py +38 -0
- mkforge/verification/rules/markdown/md013_line_length.py +40 -0
- mkforge/verification/rules/markdown/md014_command_prompt.py +42 -0
- mkforge/verification/rules/markdown/md018_atx_heading_space.py +36 -0
- mkforge/verification/rules/markdown/md019_md021_atx_closed_spaces.py +81 -0
- mkforge/verification/rules/markdown/md020_closed_atx_heading_space.py +39 -0
- mkforge/verification/rules/markdown/md022_md023_heading_blanks.py +85 -0
- mkforge/verification/rules/markdown/md024_md025_duplicate_headings.py +84 -0
- mkforge/verification/rules/markdown/md026_heading_punctuation.py +36 -0
- mkforge/verification/rules/markdown/md027_md028_blockquote.py +78 -0
- mkforge/verification/rules/markdown/md029_md030_list_prefix.py +87 -0
- mkforge/verification/rules/markdown/md031_md032_fence_list_blanks.py +100 -0
- mkforge/verification/rules/markdown/md033_inline_html.py +41 -0
- mkforge/verification/rules/markdown/md034_bare_url.py +78 -0
- mkforge/verification/rules/markdown/md035_horizontal_rule.py +49 -0
- mkforge/verification/rules/markdown/md036_emphasis_heading.py +41 -0
- mkforge/verification/rules/markdown/md037_emphasis_marker_space.py +38 -0
- mkforge/verification/rules/markdown/md038_code_span_space.py +36 -0
- mkforge/verification/rules/markdown/md039_link_text_space.py +36 -0
- mkforge/verification/rules/markdown/md040_md041_md046_md047_blocks.py +134 -0
- mkforge/verification/rules/markdown/mkf001_local_resource_exists.py +101 -0
- mkforge/verification/settings.py +217 -0
- mkforge/verification/source_scan.py +49 -0
- mkforge-0.1.0.dist-info/METADATA +158 -0
- mkforge-0.1.0.dist-info/RECORD +67 -0
- mkforge-0.1.0.dist-info/WHEEL +4 -0
- mkforge-0.1.0.dist-info/licenses/LICENSE +22 -0
mkforge/__init__.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Programmatic Markdown report generation for Python."""
|
|
2
|
+
|
|
3
|
+
from mkforge._metadata import PROJECT_DESCRIPTION, PROJECT_NAME
|
|
4
|
+
from mkforge.content import (
|
|
5
|
+
BlockQuote,
|
|
6
|
+
BulletList,
|
|
7
|
+
CodeBlock,
|
|
8
|
+
HorizontalRule,
|
|
9
|
+
Image,
|
|
10
|
+
LineBreak,
|
|
11
|
+
Link,
|
|
12
|
+
NumberedList,
|
|
13
|
+
Paragraph,
|
|
14
|
+
Table,
|
|
15
|
+
Text,
|
|
16
|
+
)
|
|
17
|
+
from mkforge.document import Chapter, Report, Section
|
|
18
|
+
from mkforge.errors import (
|
|
19
|
+
DownloadAssetError,
|
|
20
|
+
InvalidChildError,
|
|
21
|
+
InvalidTableError,
|
|
22
|
+
MissingAssetError,
|
|
23
|
+
ReportDepthError,
|
|
24
|
+
)
|
|
25
|
+
from mkforge.validation import (
|
|
26
|
+
validate_markdown_chapters,
|
|
27
|
+
validate_markdown_headings,
|
|
28
|
+
validate_markdown_images,
|
|
29
|
+
validate_markdown_yaml,
|
|
30
|
+
)
|
|
31
|
+
from mkforge.verification import (
|
|
32
|
+
Diagnostic,
|
|
33
|
+
MarkdownLine,
|
|
34
|
+
MarkdownRule,
|
|
35
|
+
MarkdownSource,
|
|
36
|
+
VerificationReport,
|
|
37
|
+
VerificationSettings,
|
|
38
|
+
verify_markdown,
|
|
39
|
+
verify_markdown_file,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
__all__ = [
|
|
43
|
+
"PROJECT_DESCRIPTION",
|
|
44
|
+
"PROJECT_NAME",
|
|
45
|
+
"BlockQuote",
|
|
46
|
+
"BulletList",
|
|
47
|
+
"Chapter",
|
|
48
|
+
"CodeBlock",
|
|
49
|
+
"Diagnostic",
|
|
50
|
+
"DownloadAssetError",
|
|
51
|
+
"HorizontalRule",
|
|
52
|
+
"Image",
|
|
53
|
+
"InvalidChildError",
|
|
54
|
+
"InvalidTableError",
|
|
55
|
+
"LineBreak",
|
|
56
|
+
"Link",
|
|
57
|
+
"MarkdownLine",
|
|
58
|
+
"MarkdownRule",
|
|
59
|
+
"MarkdownSource",
|
|
60
|
+
"MissingAssetError",
|
|
61
|
+
"NumberedList",
|
|
62
|
+
"Paragraph",
|
|
63
|
+
"Report",
|
|
64
|
+
"ReportDepthError",
|
|
65
|
+
"Section",
|
|
66
|
+
"Table",
|
|
67
|
+
"Text",
|
|
68
|
+
"VerificationReport",
|
|
69
|
+
"VerificationSettings",
|
|
70
|
+
"validate_markdown_chapters",
|
|
71
|
+
"validate_markdown_headings",
|
|
72
|
+
"validate_markdown_images",
|
|
73
|
+
"validate_markdown_yaml",
|
|
74
|
+
"verify_markdown",
|
|
75
|
+
"verify_markdown_file",
|
|
76
|
+
]
|
mkforge/_metadata.py
ADDED
mkforge/assets.py
ADDED
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
"""Asset management for report save operations.
|
|
2
|
+
|
|
3
|
+
Handles collection, verification, and copying of image assets referenced in
|
|
4
|
+
a report tree. Local images are verified to exist on disk. When
|
|
5
|
+
``copy_assets`` is requested, both local and remote images are copied into
|
|
6
|
+
an ``assets/`` directory next to the output file and Markdown links are
|
|
7
|
+
rewritten.
|
|
8
|
+
|
|
9
|
+
Remote images (URLs containing ``://``, protocol-relative ``//`` paths, or
|
|
10
|
+
``www.`` prefixes) are downloaded via ``urllib.request``. A download failure
|
|
11
|
+
raises ``DownloadAssetError``.
|
|
12
|
+
|
|
13
|
+
This module is used exclusively by ``rendering.save_report``.
|
|
14
|
+
|
|
15
|
+
Import strategy
|
|
16
|
+
---------------
|
|
17
|
+
``Image``, ``Chapter``, ``Section``, and ``Report`` are imported inside
|
|
18
|
+
functions rather than at module level (annotated ``# noqa: PLC0415``). This
|
|
19
|
+
breaks the circular import that would otherwise arise from:
|
|
20
|
+
|
|
21
|
+
assets → content.image → (transitive) → rendering → assets
|
|
22
|
+
|
|
23
|
+
Deferred imports are the standard Python idiom for this pattern. Do not
|
|
24
|
+
move them to the module level without verifying that the import cycle is
|
|
25
|
+
resolved by other means.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import ipaddress
|
|
31
|
+
import shutil
|
|
32
|
+
import socket
|
|
33
|
+
import urllib.parse
|
|
34
|
+
import urllib.request
|
|
35
|
+
import warnings
|
|
36
|
+
from collections.abc import Sequence
|
|
37
|
+
from pathlib import Path
|
|
38
|
+
from urllib.error import URLError
|
|
39
|
+
|
|
40
|
+
from mkforge.errors import DownloadAssetError, MissingAssetError
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _is_remote(path: str) -> bool:
|
|
44
|
+
"""Return True if the path is a remote reference.
|
|
45
|
+
|
|
46
|
+
A path is remote when it contains a URI scheme (``://``), uses a
|
|
47
|
+
protocol-relative URL (``//``), or starts with ``www.``. Everything
|
|
48
|
+
else is treated as a local filesystem path.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
path: Image path string to classify.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
True when the path is a remote reference.
|
|
55
|
+
"""
|
|
56
|
+
return "://" in path or path.startswith(("//", "www."))
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def collect_local_image_paths(report: object) -> list[Path]:
|
|
60
|
+
"""Walk a report tree and return resolved paths for all local images.
|
|
61
|
+
|
|
62
|
+
Remote URLs are excluded. Paths are resolved relative to the current
|
|
63
|
+
working directory at call time.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
report: Report instance to inspect.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
List of resolved ``Path`` objects for every local ``Image`` in the
|
|
70
|
+
report tree, in document order. Duplicates are preserved so that
|
|
71
|
+
all occurrences can be reported.
|
|
72
|
+
"""
|
|
73
|
+
from mkforge.content.image import Image # noqa: PLC0415
|
|
74
|
+
from mkforge.document import Chapter, Report, Section # noqa: PLC0415
|
|
75
|
+
|
|
76
|
+
if not isinstance(report, Report):
|
|
77
|
+
return []
|
|
78
|
+
|
|
79
|
+
results: list[Path] = []
|
|
80
|
+
|
|
81
|
+
def _walk_children(children: Sequence[object]) -> None:
|
|
82
|
+
"""Recursively collect local image paths from a child sequence.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
children: Sequence of Chapter, Section, or content element nodes.
|
|
86
|
+
"""
|
|
87
|
+
for child in children:
|
|
88
|
+
if isinstance(child, Image) and not _is_remote(child.path):
|
|
89
|
+
results.append(Path(child.path).resolve())
|
|
90
|
+
elif isinstance(child, (Chapter, Section)):
|
|
91
|
+
_walk_children(child.children)
|
|
92
|
+
|
|
93
|
+
_walk_children(report.children)
|
|
94
|
+
return results
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def collect_remote_image_urls(report: object) -> list[str]:
|
|
98
|
+
"""Walk a report tree and return URLs for all remote images.
|
|
99
|
+
|
|
100
|
+
Local paths are excluded.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
report: Report instance to inspect.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
List of remote URL strings for every remote ``Image`` in the report
|
|
107
|
+
tree, in document order. Duplicates are preserved.
|
|
108
|
+
"""
|
|
109
|
+
from mkforge.content.image import Image # noqa: PLC0415
|
|
110
|
+
from mkforge.document import Chapter, Report, Section # noqa: PLC0415
|
|
111
|
+
|
|
112
|
+
if not isinstance(report, Report):
|
|
113
|
+
return []
|
|
114
|
+
|
|
115
|
+
results: list[str] = []
|
|
116
|
+
|
|
117
|
+
def _walk_children(children: Sequence[object]) -> None:
|
|
118
|
+
"""Recursively collect remote image URLs from a child sequence.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
children: Sequence of Chapter, Section, or content element nodes.
|
|
122
|
+
"""
|
|
123
|
+
for child in children:
|
|
124
|
+
if isinstance(child, Image) and _is_remote(child.path):
|
|
125
|
+
results.append(child.path)
|
|
126
|
+
elif isinstance(child, (Chapter, Section)):
|
|
127
|
+
_walk_children(child.children)
|
|
128
|
+
|
|
129
|
+
_walk_children(report.children)
|
|
130
|
+
return results
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def verify_assets(paths: list[Path]) -> None:
|
|
134
|
+
"""Verify that all given local paths exist on disk.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
paths: Resolved local image paths to check.
|
|
138
|
+
|
|
139
|
+
Raises:
|
|
140
|
+
MissingAssetError: If any path does not exist, listing all missing
|
|
141
|
+
paths in one error.
|
|
142
|
+
"""
|
|
143
|
+
missing = [p for p in paths if not p.exists()]
|
|
144
|
+
if missing:
|
|
145
|
+
raise MissingAssetError(missing)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _unique_dest_name(
|
|
149
|
+
base_name: str,
|
|
150
|
+
used_names: dict[str, int],
|
|
151
|
+
stem: str,
|
|
152
|
+
suffix: str,
|
|
153
|
+
) -> str:
|
|
154
|
+
"""Compute a unique destination filename, renaming on collision.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
base_name: Original filename (stem + suffix).
|
|
158
|
+
used_names: Mutable map from base_name to next collision counter.
|
|
159
|
+
stem: Filename stem without extension.
|
|
160
|
+
suffix: Filename extension including the dot.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
Unique destination filename, possibly suffixed with a counter.
|
|
164
|
+
"""
|
|
165
|
+
if base_name in used_names:
|
|
166
|
+
counter = used_names[base_name]
|
|
167
|
+
used_names[base_name] = counter + 1
|
|
168
|
+
dest_name = f"{stem}_{counter}{suffix}"
|
|
169
|
+
msg = f"Asset name collision: {base_name!r} renamed to {dest_name!r}."
|
|
170
|
+
warnings.warn(msg, UserWarning, stacklevel=4)
|
|
171
|
+
return dest_name
|
|
172
|
+
used_names[base_name] = 1
|
|
173
|
+
return base_name
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def copy_assets_to_dir(
|
|
177
|
+
paths: list[Path],
|
|
178
|
+
assets_dir: Path,
|
|
179
|
+
) -> dict[Path, str]:
|
|
180
|
+
"""Copy local image files into the assets directory.
|
|
181
|
+
|
|
182
|
+
Files that share a filename with an already-copied file are renamed by
|
|
183
|
+
appending a one-based counter suffix before the extension
|
|
184
|
+
(e.g. ``chart.png`` → ``chart_1.png``). A ``UserWarning`` is emitted
|
|
185
|
+
for each renamed file.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
paths: Resolved local image paths to copy (duplicates skipped).
|
|
189
|
+
assets_dir: Destination directory; created if it does not exist.
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
Mapping from each original resolved path to its new relative
|
|
193
|
+
``assets/<filename>`` string for use in Markdown link rewriting.
|
|
194
|
+
"""
|
|
195
|
+
assets_dir.mkdir(parents=True, exist_ok=True)
|
|
196
|
+
path_map: dict[Path, str] = {}
|
|
197
|
+
used_names: dict[str, int] = {}
|
|
198
|
+
|
|
199
|
+
for src in paths:
|
|
200
|
+
if src in path_map:
|
|
201
|
+
continue
|
|
202
|
+
dest_name = _unique_dest_name(
|
|
203
|
+
src.name,
|
|
204
|
+
used_names,
|
|
205
|
+
src.stem,
|
|
206
|
+
src.suffix,
|
|
207
|
+
)
|
|
208
|
+
shutil.copy2(src, assets_dir / dest_name)
|
|
209
|
+
path_map[src] = f"assets/{dest_name}"
|
|
210
|
+
|
|
211
|
+
return path_map
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
_ALLOWED_SCHEMES: frozenset[str] = frozenset({"http", "https", "ftp", "ftps"})
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _ip_is_non_routable(
|
|
218
|
+
addr: ipaddress.IPv4Address | ipaddress.IPv6Address,
|
|
219
|
+
) -> bool:
|
|
220
|
+
"""Return True when the IP address is non-routable.
|
|
221
|
+
|
|
222
|
+
Covers loopback, link-local, private, unspecified, and multicast ranges.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
addr: Resolved IP address to classify.
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
True when the address must not be contacted.
|
|
229
|
+
"""
|
|
230
|
+
return (
|
|
231
|
+
addr.is_loopback
|
|
232
|
+
or addr.is_link_local
|
|
233
|
+
or addr.is_private
|
|
234
|
+
or addr.is_unspecified
|
|
235
|
+
or addr.is_multicast
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _is_private_host(hostname: str) -> bool:
|
|
240
|
+
"""Return True when the hostname resolves to a non-routable IP address.
|
|
241
|
+
|
|
242
|
+
Blocks loopback, link-local, private (RFC 1918 / RFC 4193), and
|
|
243
|
+
unspecified addresses to prevent SSRF attacks against internal services
|
|
244
|
+
such as cloud metadata endpoints (e.g. 169.254.169.254) or the host
|
|
245
|
+
network.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
hostname: DNS name or IP address string to check.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
True when any resolved IP is non-routable.
|
|
252
|
+
"""
|
|
253
|
+
try:
|
|
254
|
+
infos = socket.getaddrinfo(hostname, None)
|
|
255
|
+
except socket.gaierror:
|
|
256
|
+
return False
|
|
257
|
+
for info in infos:
|
|
258
|
+
try:
|
|
259
|
+
addr = ipaddress.ip_address(info[4][0])
|
|
260
|
+
except ValueError:
|
|
261
|
+
continue
|
|
262
|
+
if _ip_is_non_routable(addr):
|
|
263
|
+
return True
|
|
264
|
+
return False
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def _fetch_url(url: str, dest_path: Path) -> None:
|
|
268
|
+
"""Validate and download a single URL to a local path.
|
|
269
|
+
|
|
270
|
+
Two security checks run before any network call:
|
|
271
|
+
|
|
272
|
+
1. Scheme validation — only ``http``, ``https``, ``ftp``, and ``ftps``
|
|
273
|
+
are accepted; all others raise ``DownloadAssetError`` immediately.
|
|
274
|
+
2. Host validation — the hostname is resolved via ``socket.getaddrinfo``
|
|
275
|
+
and every resulting IP address is checked against non-routable ranges
|
|
276
|
+
(loopback, link-local, RFC 1918 private, unspecified, multicast).
|
|
277
|
+
Any private resolution raises ``DownloadAssetError`` to prevent SSRF
|
|
278
|
+
attacks against internal services such as cloud metadata endpoints.
|
|
279
|
+
|
|
280
|
+
The ``urlretrieve`` call is suppressed by ``# noqa: S310 # nosec B310``
|
|
281
|
+
because both Ruff (S310) and Bandit (B310) flag it as an unaudited URL
|
|
282
|
+
open. The suppression is justified: the scheme and host are validated
|
|
283
|
+
immediately above in this function, so the call is safe by construction.
|
|
284
|
+
Do not move or reorder the validation and ``urlretrieve`` lines without
|
|
285
|
+
re-evaluating the security contract.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
url: Remote URL to download.
|
|
289
|
+
dest_path: Local path where the downloaded file will be written.
|
|
290
|
+
|
|
291
|
+
Raises:
|
|
292
|
+
DownloadAssetError: If the scheme is not allowed, the host resolves
|
|
293
|
+
to a private address, or the download fails.
|
|
294
|
+
"""
|
|
295
|
+
parsed = urllib.parse.urlparse(url)
|
|
296
|
+
if parsed.scheme not in _ALLOWED_SCHEMES:
|
|
297
|
+
msg = f"Unsupported URL scheme {parsed.scheme!r}."
|
|
298
|
+
raise DownloadAssetError(url, msg)
|
|
299
|
+
hostname = parsed.hostname or ""
|
|
300
|
+
if not hostname:
|
|
301
|
+
raise DownloadAssetError(url, "URL has no host.")
|
|
302
|
+
if _is_private_host(hostname):
|
|
303
|
+
msg = f"Host {hostname!r} resolves to a private or loopback address."
|
|
304
|
+
raise DownloadAssetError(url, msg)
|
|
305
|
+
try:
|
|
306
|
+
urllib.request.urlretrieve(url, dest_path) # noqa: S310 # nosec B310
|
|
307
|
+
except URLError as exc:
|
|
308
|
+
raise DownloadAssetError(url, str(exc)) from exc
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def download_assets_to_dir(
|
|
312
|
+
urls: list[str],
|
|
313
|
+
assets_dir: Path,
|
|
314
|
+
) -> dict[str, str]:
|
|
315
|
+
"""Download remote image URLs into the assets directory.
|
|
316
|
+
|
|
317
|
+
Files that share a derived filename with an already-downloaded file are
|
|
318
|
+
renamed by appending a one-based counter suffix. A ``UserWarning`` is
|
|
319
|
+
emitted for each renamed file.
|
|
320
|
+
|
|
321
|
+
The filename is derived from the last path segment of the URL. If the
|
|
322
|
+
URL has no usable filename, ``image_<n>`` is used.
|
|
323
|
+
|
|
324
|
+
Each URL is validated before any network call: the scheme must belong to
|
|
325
|
+
the allowed set and the hostname must not resolve to a private or loopback
|
|
326
|
+
address (SSRF protection).
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
urls: Remote image URL strings to download (duplicates skipped).
|
|
330
|
+
assets_dir: Destination directory; created if it does not exist.
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
Mapping from each original URL to its new relative
|
|
334
|
+
``assets/<filename>`` string for use in Markdown link rewriting.
|
|
335
|
+
|
|
336
|
+
Raises:
|
|
337
|
+
DownloadAssetError: If any URL cannot be fetched or fails validation.
|
|
338
|
+
"""
|
|
339
|
+
assets_dir.mkdir(parents=True, exist_ok=True)
|
|
340
|
+
url_map: dict[str, str] = {}
|
|
341
|
+
used_names: dict[str, int] = {}
|
|
342
|
+
fallback_counter = 0
|
|
343
|
+
|
|
344
|
+
for url in urls:
|
|
345
|
+
if url in url_map:
|
|
346
|
+
continue
|
|
347
|
+
raw_name = Path(url.split("?")[0].rstrip("/")).name
|
|
348
|
+
if not raw_name or "." not in raw_name:
|
|
349
|
+
fallback_counter += 1
|
|
350
|
+
raw_name = f"image_{fallback_counter}"
|
|
351
|
+
stem = Path(raw_name).stem
|
|
352
|
+
suffix = Path(raw_name).suffix
|
|
353
|
+
dest_name = _unique_dest_name(raw_name, used_names, stem, suffix)
|
|
354
|
+
dest_path = assets_dir / dest_name
|
|
355
|
+
_fetch_url(url, dest_path)
|
|
356
|
+
url_map[url] = f"assets/{dest_name}"
|
|
357
|
+
|
|
358
|
+
return url_map
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def rewrite_image_paths(
|
|
362
|
+
markdown: str,
|
|
363
|
+
local_map: dict[Path, str],
|
|
364
|
+
remote_map: dict[str, str] | None = None,
|
|
365
|
+
) -> str:
|
|
366
|
+
"""Rewrite image paths in rendered Markdown using the copy/download maps.
|
|
367
|
+
|
|
368
|
+
Replaces each original path or URL string that appears in a Markdown
|
|
369
|
+
image reference with its new ``assets/<filename>`` relative path.
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
markdown: Rendered Markdown document string.
|
|
373
|
+
local_map: Mapping from resolved local path to new relative path,
|
|
374
|
+
as returned by ``copy_assets_to_dir``.
|
|
375
|
+
remote_map: Mapping from remote URL to new relative path, as
|
|
376
|
+
returned by ``download_assets_to_dir``. ``None`` is treated
|
|
377
|
+
as an empty map.
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
Markdown string with image paths rewritten to ``assets/`` locations.
|
|
381
|
+
"""
|
|
382
|
+
for src_path, dest_rel in local_map.items():
|
|
383
|
+
markdown = markdown.replace(str(src_path), dest_rel)
|
|
384
|
+
markdown = markdown.replace(src_path.name, dest_rel)
|
|
385
|
+
for url, dest_rel in (remote_map or {}).items():
|
|
386
|
+
markdown = markdown.replace(url, dest_rel)
|
|
387
|
+
return markdown
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Markdown content elements for report composition.
|
|
2
|
+
|
|
3
|
+
Exports every inline and block element that can appear inside a Chapter or
|
|
4
|
+
Section. Each element type is defined in its own module together with its
|
|
5
|
+
construction-time validation and ``render() -> str`` method.
|
|
6
|
+
|
|
7
|
+
All content types are immutable frozen dataclasses so that report trees are
|
|
8
|
+
deterministic and side-effect free.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from mkforge.content._base import Renderable
|
|
14
|
+
from mkforge.content.code import CodeBlock
|
|
15
|
+
from mkforge.content.image import Image
|
|
16
|
+
from mkforge.content.lists import BulletList, NumberedList
|
|
17
|
+
from mkforge.content.misc import BlockQuote, HorizontalRule
|
|
18
|
+
from mkforge.content.paragraph import Paragraph
|
|
19
|
+
from mkforge.content.table import Table
|
|
20
|
+
from mkforge.content.text import LineBreak, Link, Text, TextStyle
|
|
21
|
+
|
|
22
|
+
type ContentElement = (
|
|
23
|
+
Paragraph
|
|
24
|
+
| Text
|
|
25
|
+
| CodeBlock
|
|
26
|
+
| Table
|
|
27
|
+
| BulletList
|
|
28
|
+
| NumberedList
|
|
29
|
+
| Image
|
|
30
|
+
| HorizontalRule
|
|
31
|
+
| BlockQuote
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
CONTENT_TYPES = (
|
|
35
|
+
Paragraph,
|
|
36
|
+
Text,
|
|
37
|
+
CodeBlock,
|
|
38
|
+
Table,
|
|
39
|
+
BulletList,
|
|
40
|
+
NumberedList,
|
|
41
|
+
Image,
|
|
42
|
+
HorizontalRule,
|
|
43
|
+
BlockQuote,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
__all__ = [
|
|
47
|
+
"CONTENT_TYPES",
|
|
48
|
+
"BlockQuote",
|
|
49
|
+
"BulletList",
|
|
50
|
+
"CodeBlock",
|
|
51
|
+
"ContentElement",
|
|
52
|
+
"HorizontalRule",
|
|
53
|
+
"Image",
|
|
54
|
+
"LineBreak",
|
|
55
|
+
"Link",
|
|
56
|
+
"NumberedList",
|
|
57
|
+
"Paragraph",
|
|
58
|
+
"Renderable",
|
|
59
|
+
"Table",
|
|
60
|
+
"Text",
|
|
61
|
+
"TextStyle",
|
|
62
|
+
]
|
mkforge/content/_base.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Base protocol and shared constants for Markdown content elements.
|
|
2
|
+
|
|
3
|
+
Defines the ``Renderable`` protocol that every content element must satisfy.
|
|
4
|
+
Any class with a ``render() -> str`` method implicitly satisfies this protocol.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import Protocol, runtime_checkable
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@runtime_checkable
|
|
13
|
+
class Renderable(Protocol):
|
|
14
|
+
"""Protocol for objects that can render themselves to Markdown.
|
|
15
|
+
|
|
16
|
+
Every content element in the ``mkforge.content`` package satisfies this
|
|
17
|
+
protocol by implementing ``render() -> str``.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def render(self) -> str:
|
|
21
|
+
"""Render the element to a Markdown string.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Markdown representation of the element.
|
|
25
|
+
"""
|
|
26
|
+
... # pragma: no cover — Protocol stub; never executed at runtime
|
mkforge/content/code.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Code block content element.
|
|
2
|
+
|
|
3
|
+
A fenced code block with an optional language info string for syntax
|
|
4
|
+
highlighting. Both the code body and the language hint are validated as
|
|
5
|
+
strings at construction time.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
|
|
12
|
+
from mkforge.input_checks import require_string
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class CodeBlock:
|
|
17
|
+
"""Fenced code block with an optional language info string.
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
code: Source code text (may be empty).
|
|
21
|
+
language: Language hint for syntax highlighting (may be empty).
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
code: str
|
|
25
|
+
language: str = ""
|
|
26
|
+
|
|
27
|
+
def __post_init__(self) -> None:
|
|
28
|
+
"""Validate code block fields."""
|
|
29
|
+
require_string(self.code, "CodeBlock code", allow_empty=True)
|
|
30
|
+
require_string(self.language, "CodeBlock language", allow_empty=True)
|
|
31
|
+
|
|
32
|
+
def render(self) -> str:
|
|
33
|
+
"""Render the code block as a Markdown fenced block.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Fenced code block string, with language hint when set.
|
|
37
|
+
"""
|
|
38
|
+
fence = f"```{self.language}" if self.language else "```"
|
|
39
|
+
return f"{fence}\n{self.code}\n```"
|
mkforge/content/image.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Image content element.
|
|
2
|
+
|
|
3
|
+
A Markdown image reference with a path, alternative text, and an optional
|
|
4
|
+
hover title. The path is validated as a non-empty string; alt and title may
|
|
5
|
+
be empty. MkForge does not validate, copy, or modify the path.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
|
|
12
|
+
from mkforge.input_checks import require_string
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class Image:
|
|
17
|
+
"""Markdown image reference.
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
path: Non-empty image path or URL.
|
|
21
|
+
alt: Alternative text (may be empty).
|
|
22
|
+
title: Optional hover title (may be empty).
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
path: str
|
|
26
|
+
alt: str = ""
|
|
27
|
+
title: str = ""
|
|
28
|
+
|
|
29
|
+
def __post_init__(self) -> None:
|
|
30
|
+
"""Validate image fields."""
|
|
31
|
+
require_string(self.path, "Image path", allow_empty=False)
|
|
32
|
+
require_string(self.alt, "Image alt", allow_empty=True)
|
|
33
|
+
require_string(self.title, "Image title", allow_empty=True)
|
|
34
|
+
|
|
35
|
+
def render(self) -> str:
|
|
36
|
+
"""Render the image as a Markdown image reference.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Markdown image syntax with optional title attribute.
|
|
40
|
+
"""
|
|
41
|
+
title = f' "{self.title}"' if self.title else ""
|
|
42
|
+
return f""
|