okf-schema 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
okf_schema/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """okf-schema — CLI tool and Python library for OKF bundle management."""
2
+
3
+ from importlib.metadata import version
4
+
5
+ __version__ = version("okf_schema")
@@ -0,0 +1 @@
1
+ """Internal infrastructure modules for okf-schema."""
@@ -0,0 +1,94 @@
1
+ """Data models for okf-schema validation and reporting."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+
8
+
9
+ @dataclass
10
+ class Finding:
11
+ """A single validation finding (error or warning)."""
12
+
13
+ code: str
14
+ message: str
15
+ path: Path | None = None
16
+
17
+
18
+ @dataclass
19
+ class Report:
20
+ """Aggregated validation report for an OKF bundle."""
21
+
22
+ errors: list[Finding] = field(default_factory=list)
23
+ warnings: list[Finding] = field(default_factory=list)
24
+
25
+ @property
26
+ def is_conformant(self) -> bool:
27
+ """Return True if the report contains no errors."""
28
+ return len(self.errors) == 0
29
+
30
+ def add_error(self, code: str, message: str, path: Path | None = None) -> None:
31
+ """Append an error finding to the report."""
32
+ self.errors.append(Finding(code, message, path))
33
+
34
+ def add_warning(self, code: str, message: str, path: Path | None = None) -> None:
35
+ """Append a warning finding to the report."""
36
+ self.warnings.append(Finding(code, message, path))
37
+
38
+
39
+ @dataclass
40
+ class ConceptInfo:
41
+ """Extracted metadata from an OKF concept file."""
42
+
43
+ title: str
44
+ description: str
45
+ type: str
46
+
47
+
48
+ @dataclass
49
+ class SearchResult:
50
+ """Result from searching an OKF bundle."""
51
+
52
+ path: str
53
+ type: str
54
+ title: str
55
+
56
+
57
+ @dataclass
58
+ class BundleStats:
59
+ """Statistics for an OKF bundle."""
60
+
61
+ total_files: int
62
+ total_concepts: int
63
+ files_without_frontmatter: int
64
+ total_size: int
65
+ total_links: int
66
+ broken_links: int
67
+ types_distribution: dict[str, int]
68
+ tags_distribution: dict[str, int]
69
+ directories: int
70
+
71
+
72
+ @dataclass
73
+ class ConceptSummary:
74
+ """Summary of a single concept in an OKF bundle."""
75
+
76
+ path: str
77
+ type: str
78
+ title: str
79
+
80
+
81
+ @dataclass
82
+ class ConceptDetail:
83
+ """Detailed view of a single concept file."""
84
+
85
+ frontmatter: dict
86
+ body: str
87
+
88
+
89
+ @dataclass
90
+ class IndexUpdate:
91
+ """Record of an index.md update operation."""
92
+
93
+ path: str
94
+ action: str
@@ -0,0 +1,93 @@
1
+ """Shared utilities for OKF bundle processing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import re
7
+ from collections.abc import Iterable
8
+ from pathlib import Path
9
+
10
+ from okf_schema._internal.models import ConceptInfo
11
+ from okf_schema._internal.yaml import extract_frontmatter, parse_yaml
12
+
13
+ RESERVED_FILES = {"index.md", "log.md"}
14
+ ISO8601_DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$")
15
+ MARKDOWN_LINK_RE = re.compile(r"!?\[([^\]]*)\]\(([^)]+)\)")
16
+
17
+
18
+ def collect_markdown_files(bundle: Path) -> Iterable[Path]:
19
+ """Yield every ``.md`` file under *bundle*, sorted alphabetically."""
20
+ for path in sorted(bundle.rglob("*.md")):
21
+ if path.is_file():
22
+ yield path
23
+
24
+
25
+ def resolve_link(target: str, source: Path, bundle_root: Path) -> Path | None:
26
+ """Resolve a markdown link target to an absolute path.
27
+
28
+ Returns ``None`` for external URLs (``https://``, ``mailto:``, etc.).
29
+ Absolute paths starting with ``/`` are resolved relative to
30
+ *bundle_root*. Relative paths are resolved relative to *source*'s
31
+ parent directory.
32
+ """
33
+ if "://" in target or target.startswith("mailto:"):
34
+ return None
35
+
36
+ if target.startswith("/"):
37
+ resolved = bundle_root / target.lstrip("/")
38
+ else:
39
+ resolved = source.parent / target
40
+
41
+ with contextlib.suppress(OSError):
42
+ resolved = resolved.resolve()
43
+
44
+ return resolved
45
+
46
+
47
+ def find_broken_links(body: str, source: Path, bundle_root: Path) -> list[str]:
48
+ """Find broken internal links in markdown body text.
49
+
50
+ Returns a list of link targets that do not exist on disk.
51
+ External links are skipped. Directories are accepted as valid targets.
52
+ """
53
+ broken: list[str] = []
54
+ for _text, target in MARKDOWN_LINK_RE.findall(body):
55
+ resolved = resolve_link(target, source, bundle_root)
56
+ if resolved is None:
57
+ continue # external link — can't check
58
+ if not resolved.exists():
59
+ broken.append(target)
60
+ return broken
61
+
62
+
63
+ def has_markdown_files(dir_path: Path) -> bool:
64
+ """Return True if *dir_path* or any descendant contains ``.md`` files."""
65
+ if not dir_path.is_dir():
66
+ return False
67
+ return any(item.is_file() for item in dir_path.rglob("*.md"))
68
+
69
+
70
+ def get_concept_info(path: Path) -> ConceptInfo:
71
+ """Extract title, description, and type from a concept file.
72
+
73
+ Falls back to a title derived from the file stem (replacing ``-`` and
74
+ ``_`` with spaces, title-cased) when frontmatter is missing or
75
+ incomplete.
76
+ """
77
+ text = path.read_text(encoding="utf-8")
78
+ fm_text, _body = extract_frontmatter(text)
79
+
80
+ fallback_title = path.stem.replace("-", " ").replace("_", " ").title()
81
+ info = ConceptInfo(title=fallback_title, description="", type="")
82
+
83
+ if fm_text is not None:
84
+ frontmatter = parse_yaml(fm_text)
85
+ if frontmatter is not None:
86
+ if frontmatter.get("title"):
87
+ info.title = str(frontmatter["title"]).strip()
88
+ if frontmatter.get("description"):
89
+ info.description = str(frontmatter["description"]).strip()
90
+ if frontmatter.get("type"):
91
+ info.type = str(frontmatter["type"]).strip()
92
+
93
+ return info
@@ -0,0 +1,98 @@
1
+ """YAML helpers using ruamel.yaml for OKF frontmatter handling."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import date, datetime
6
+ from typing import cast
7
+
8
+ from ruamel.yaml import YAML
9
+ from ruamel.yaml.error import YAMLError
10
+
11
+
12
+ def make_yaml() -> YAML:
13
+ """Return a configured ruamel.yaml instance for round-trip parsing.
14
+
15
+ Configures ``preserve_quotes=True`` and ``default_flow_style=False``
16
+ so that formatting and comments are retained during load/dump cycles.
17
+ """
18
+ y = YAML()
19
+ y.preserve_quotes = True
20
+ y.default_flow_style = False
21
+ return y
22
+
23
+
24
+ def _normalize_yaml_value(value: object) -> object:
25
+ """Recursively convert YAML-native types to JSON-compatible primitives.
26
+
27
+ ruamel.yaml parses ISO 8601 dates as ``datetime.date`` and
28
+ ``datetime.datetime`` objects. JSON Schema validators expect
29
+ ``type: string`` fields to be actual strings, so we transparently
30
+ convert them to ISO 8601 strings here.
31
+ """
32
+ if isinstance(value, datetime):
33
+ return value.isoformat()
34
+ if isinstance(value, date):
35
+ return value.isoformat()
36
+ if isinstance(value, dict):
37
+ return {k: _normalize_yaml_value(v) for k, v in value.items()}
38
+ if isinstance(value, list):
39
+ return [_normalize_yaml_value(item) for item in value]
40
+ return value
41
+
42
+
43
+ def extract_frontmatter(text: str) -> tuple[str | None, str]:
44
+ """Extract YAML frontmatter from markdown text.
45
+
46
+ Frontmatter is delimited by ``---`` at the start of the file and a
47
+ closing ``---`` on its own line. Returns ``(frontmatter_yaml, body)``
48
+ or ``(None, text)`` when no valid frontmatter block is found.
49
+ """
50
+ if not text.startswith("---"):
51
+ return None, text
52
+
53
+ end_marker = "\n---"
54
+ end_idx = text.find(end_marker, 3)
55
+ if end_idx == -1:
56
+ return None, text
57
+
58
+ fm_text = text[3:end_idx]
59
+ body = text[end_idx + len(end_marker) :]
60
+ if body.startswith("\n"):
61
+ body = body[1:]
62
+ return fm_text, body
63
+
64
+
65
+ def parse_yaml(yaml_text: str) -> dict | None:
66
+ """Parse YAML text into a plain dict.
67
+
68
+ Uses :func:`make_yaml` for consistent configuration. Returns ``None``
69
+ when the text is not valid YAML or does not parse to a mapping.
70
+
71
+ Date and datetime values are automatically converted to ISO 8601
72
+ strings so that JSON Schema ``type: string`` validation works
73
+ transparently for unquoted YAML dates.
74
+ """
75
+ y = make_yaml()
76
+ try:
77
+ data = y.load(yaml_text)
78
+ except YAMLError:
79
+ return None
80
+ if not isinstance(data, dict):
81
+ return None
82
+ # ruamel.yaml returns CommentedMap — convert to plain dict
83
+ # and normalize native datetime types to strings
84
+ return cast(dict, _normalize_yaml_value(dict(data)))
85
+
86
+
87
+ def dump_yaml(data: dict) -> str:
88
+ """Serialize a dict to a YAML string.
89
+
90
+ Uses :func:`make_yaml` so that quotes and comments are preserved
91
+ when round-tripping through :func:`parse_yaml`.
92
+ """
93
+ from io import StringIO
94
+
95
+ y = make_yaml()
96
+ buf = StringIO()
97
+ y.dump(data, buf)
98
+ return buf.getvalue()