okf-schema 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- okf_schema/__init__.py +5 -0
- okf_schema/_internal/__init__.py +1 -0
- okf_schema/_internal/models.py +94 -0
- okf_schema/_internal/utils.py +93 -0
- okf_schema/_internal/yaml.py +98 -0
- okf_schema/api.py +563 -0
- okf_schema/cli.py +434 -0
- okf_schema/formatter.py +234 -0
- okf_schema/schemas/__init__.py +24 -0
- okf_schema/validator.py +374 -0
- okf_schema-0.2.0.dist-info/METADATA +281 -0
- okf_schema-0.2.0.dist-info/RECORD +15 -0
- okf_schema-0.2.0.dist-info/WHEEL +4 -0
- okf_schema-0.2.0.dist-info/entry_points.txt +2 -0
- okf_schema-0.2.0.dist-info/licenses/LICENSE +21 -0
okf_schema/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Internal infrastructure modules for okf-schema."""
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""Data models for okf-schema validation and reporting."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class Finding:
|
|
11
|
+
"""A single validation finding (error or warning)."""
|
|
12
|
+
|
|
13
|
+
code: str
|
|
14
|
+
message: str
|
|
15
|
+
path: Path | None = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class Report:
|
|
20
|
+
"""Aggregated validation report for an OKF bundle."""
|
|
21
|
+
|
|
22
|
+
errors: list[Finding] = field(default_factory=list)
|
|
23
|
+
warnings: list[Finding] = field(default_factory=list)
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def is_conformant(self) -> bool:
|
|
27
|
+
"""Return True if the report contains no errors."""
|
|
28
|
+
return len(self.errors) == 0
|
|
29
|
+
|
|
30
|
+
def add_error(self, code: str, message: str, path: Path | None = None) -> None:
|
|
31
|
+
"""Append an error finding to the report."""
|
|
32
|
+
self.errors.append(Finding(code, message, path))
|
|
33
|
+
|
|
34
|
+
def add_warning(self, code: str, message: str, path: Path | None = None) -> None:
|
|
35
|
+
"""Append a warning finding to the report."""
|
|
36
|
+
self.warnings.append(Finding(code, message, path))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class ConceptInfo:
|
|
41
|
+
"""Extracted metadata from an OKF concept file."""
|
|
42
|
+
|
|
43
|
+
title: str
|
|
44
|
+
description: str
|
|
45
|
+
type: str
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class SearchResult:
|
|
50
|
+
"""Result from searching an OKF bundle."""
|
|
51
|
+
|
|
52
|
+
path: str
|
|
53
|
+
type: str
|
|
54
|
+
title: str
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class BundleStats:
|
|
59
|
+
"""Statistics for an OKF bundle."""
|
|
60
|
+
|
|
61
|
+
total_files: int
|
|
62
|
+
total_concepts: int
|
|
63
|
+
files_without_frontmatter: int
|
|
64
|
+
total_size: int
|
|
65
|
+
total_links: int
|
|
66
|
+
broken_links: int
|
|
67
|
+
types_distribution: dict[str, int]
|
|
68
|
+
tags_distribution: dict[str, int]
|
|
69
|
+
directories: int
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class ConceptSummary:
|
|
74
|
+
"""Summary of a single concept in an OKF bundle."""
|
|
75
|
+
|
|
76
|
+
path: str
|
|
77
|
+
type: str
|
|
78
|
+
title: str
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@dataclass
|
|
82
|
+
class ConceptDetail:
|
|
83
|
+
"""Detailed view of a single concept file."""
|
|
84
|
+
|
|
85
|
+
frontmatter: dict
|
|
86
|
+
body: str
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass
|
|
90
|
+
class IndexUpdate:
|
|
91
|
+
"""Record of an index.md update operation."""
|
|
92
|
+
|
|
93
|
+
path: str
|
|
94
|
+
action: str
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Shared utilities for OKF bundle processing."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import contextlib
|
|
6
|
+
import re
|
|
7
|
+
from collections.abc import Iterable
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from okf_schema._internal.models import ConceptInfo
|
|
11
|
+
from okf_schema._internal.yaml import extract_frontmatter, parse_yaml
|
|
12
|
+
|
|
13
|
+
RESERVED_FILES = {"index.md", "log.md"}
|
|
14
|
+
ISO8601_DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$")
|
|
15
|
+
MARKDOWN_LINK_RE = re.compile(r"!?\[([^\]]*)\]\(([^)]+)\)")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def collect_markdown_files(bundle: Path) -> Iterable[Path]:
|
|
19
|
+
"""Yield every ``.md`` file under *bundle*, sorted alphabetically."""
|
|
20
|
+
for path in sorted(bundle.rglob("*.md")):
|
|
21
|
+
if path.is_file():
|
|
22
|
+
yield path
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def resolve_link(target: str, source: Path, bundle_root: Path) -> Path | None:
|
|
26
|
+
"""Resolve a markdown link target to an absolute path.
|
|
27
|
+
|
|
28
|
+
Returns ``None`` for external URLs (``https://``, ``mailto:``, etc.).
|
|
29
|
+
Absolute paths starting with ``/`` are resolved relative to
|
|
30
|
+
*bundle_root*. Relative paths are resolved relative to *source*'s
|
|
31
|
+
parent directory.
|
|
32
|
+
"""
|
|
33
|
+
if "://" in target or target.startswith("mailto:"):
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
if target.startswith("/"):
|
|
37
|
+
resolved = bundle_root / target.lstrip("/")
|
|
38
|
+
else:
|
|
39
|
+
resolved = source.parent / target
|
|
40
|
+
|
|
41
|
+
with contextlib.suppress(OSError):
|
|
42
|
+
resolved = resolved.resolve()
|
|
43
|
+
|
|
44
|
+
return resolved
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def find_broken_links(body: str, source: Path, bundle_root: Path) -> list[str]:
|
|
48
|
+
"""Find broken internal links in markdown body text.
|
|
49
|
+
|
|
50
|
+
Returns a list of link targets that do not exist on disk.
|
|
51
|
+
External links are skipped. Directories are accepted as valid targets.
|
|
52
|
+
"""
|
|
53
|
+
broken: list[str] = []
|
|
54
|
+
for _text, target in MARKDOWN_LINK_RE.findall(body):
|
|
55
|
+
resolved = resolve_link(target, source, bundle_root)
|
|
56
|
+
if resolved is None:
|
|
57
|
+
continue # external link — can't check
|
|
58
|
+
if not resolved.exists():
|
|
59
|
+
broken.append(target)
|
|
60
|
+
return broken
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def has_markdown_files(dir_path: Path) -> bool:
|
|
64
|
+
"""Return True if *dir_path* or any descendant contains ``.md`` files."""
|
|
65
|
+
if not dir_path.is_dir():
|
|
66
|
+
return False
|
|
67
|
+
return any(item.is_file() for item in dir_path.rglob("*.md"))
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def get_concept_info(path: Path) -> ConceptInfo:
|
|
71
|
+
"""Extract title, description, and type from a concept file.
|
|
72
|
+
|
|
73
|
+
Falls back to a title derived from the file stem (replacing ``-`` and
|
|
74
|
+
``_`` with spaces, title-cased) when frontmatter is missing or
|
|
75
|
+
incomplete.
|
|
76
|
+
"""
|
|
77
|
+
text = path.read_text(encoding="utf-8")
|
|
78
|
+
fm_text, _body = extract_frontmatter(text)
|
|
79
|
+
|
|
80
|
+
fallback_title = path.stem.replace("-", " ").replace("_", " ").title()
|
|
81
|
+
info = ConceptInfo(title=fallback_title, description="", type="")
|
|
82
|
+
|
|
83
|
+
if fm_text is not None:
|
|
84
|
+
frontmatter = parse_yaml(fm_text)
|
|
85
|
+
if frontmatter is not None:
|
|
86
|
+
if frontmatter.get("title"):
|
|
87
|
+
info.title = str(frontmatter["title"]).strip()
|
|
88
|
+
if frontmatter.get("description"):
|
|
89
|
+
info.description = str(frontmatter["description"]).strip()
|
|
90
|
+
if frontmatter.get("type"):
|
|
91
|
+
info.type = str(frontmatter["type"]).strip()
|
|
92
|
+
|
|
93
|
+
return info
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""YAML helpers using ruamel.yaml for OKF frontmatter handling."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import date, datetime
|
|
6
|
+
from typing import cast
|
|
7
|
+
|
|
8
|
+
from ruamel.yaml import YAML
|
|
9
|
+
from ruamel.yaml.error import YAMLError
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def make_yaml() -> YAML:
|
|
13
|
+
"""Return a configured ruamel.yaml instance for round-trip parsing.
|
|
14
|
+
|
|
15
|
+
Configures ``preserve_quotes=True`` and ``default_flow_style=False``
|
|
16
|
+
so that formatting and comments are retained during load/dump cycles.
|
|
17
|
+
"""
|
|
18
|
+
y = YAML()
|
|
19
|
+
y.preserve_quotes = True
|
|
20
|
+
y.default_flow_style = False
|
|
21
|
+
return y
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _normalize_yaml_value(value: object) -> object:
|
|
25
|
+
"""Recursively convert YAML-native types to JSON-compatible primitives.
|
|
26
|
+
|
|
27
|
+
ruamel.yaml parses ISO 8601 dates as ``datetime.date`` and
|
|
28
|
+
``datetime.datetime`` objects. JSON Schema validators expect
|
|
29
|
+
``type: string`` fields to be actual strings, so we transparently
|
|
30
|
+
convert them to ISO 8601 strings here.
|
|
31
|
+
"""
|
|
32
|
+
if isinstance(value, datetime):
|
|
33
|
+
return value.isoformat()
|
|
34
|
+
if isinstance(value, date):
|
|
35
|
+
return value.isoformat()
|
|
36
|
+
if isinstance(value, dict):
|
|
37
|
+
return {k: _normalize_yaml_value(v) for k, v in value.items()}
|
|
38
|
+
if isinstance(value, list):
|
|
39
|
+
return [_normalize_yaml_value(item) for item in value]
|
|
40
|
+
return value
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def extract_frontmatter(text: str) -> tuple[str | None, str]:
|
|
44
|
+
"""Extract YAML frontmatter from markdown text.
|
|
45
|
+
|
|
46
|
+
Frontmatter is delimited by ``---`` at the start of the file and a
|
|
47
|
+
closing ``---`` on its own line. Returns ``(frontmatter_yaml, body)``
|
|
48
|
+
or ``(None, text)`` when no valid frontmatter block is found.
|
|
49
|
+
"""
|
|
50
|
+
if not text.startswith("---"):
|
|
51
|
+
return None, text
|
|
52
|
+
|
|
53
|
+
end_marker = "\n---"
|
|
54
|
+
end_idx = text.find(end_marker, 3)
|
|
55
|
+
if end_idx == -1:
|
|
56
|
+
return None, text
|
|
57
|
+
|
|
58
|
+
fm_text = text[3:end_idx]
|
|
59
|
+
body = text[end_idx + len(end_marker) :]
|
|
60
|
+
if body.startswith("\n"):
|
|
61
|
+
body = body[1:]
|
|
62
|
+
return fm_text, body
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def parse_yaml(yaml_text: str) -> dict | None:
|
|
66
|
+
"""Parse YAML text into a plain dict.
|
|
67
|
+
|
|
68
|
+
Uses :func:`make_yaml` for consistent configuration. Returns ``None``
|
|
69
|
+
when the text is not valid YAML or does not parse to a mapping.
|
|
70
|
+
|
|
71
|
+
Date and datetime values are automatically converted to ISO 8601
|
|
72
|
+
strings so that JSON Schema ``type: string`` validation works
|
|
73
|
+
transparently for unquoted YAML dates.
|
|
74
|
+
"""
|
|
75
|
+
y = make_yaml()
|
|
76
|
+
try:
|
|
77
|
+
data = y.load(yaml_text)
|
|
78
|
+
except YAMLError:
|
|
79
|
+
return None
|
|
80
|
+
if not isinstance(data, dict):
|
|
81
|
+
return None
|
|
82
|
+
# ruamel.yaml returns CommentedMap — convert to plain dict
|
|
83
|
+
# and normalize native datetime types to strings
|
|
84
|
+
return cast(dict, _normalize_yaml_value(dict(data)))
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def dump_yaml(data: dict) -> str:
|
|
88
|
+
"""Serialize a dict to a YAML string.
|
|
89
|
+
|
|
90
|
+
Uses :func:`make_yaml` so that quotes and comments are preserved
|
|
91
|
+
when round-tripping through :func:`parse_yaml`.
|
|
92
|
+
"""
|
|
93
|
+
from io import StringIO
|
|
94
|
+
|
|
95
|
+
y = make_yaml()
|
|
96
|
+
buf = StringIO()
|
|
97
|
+
y.dump(data, buf)
|
|
98
|
+
return buf.getvalue()
|