bookwright-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bookwright/__init__.py +3 -0
- bookwright/__main__.py +6 -0
- bookwright/cli.py +19 -0
- bookwright/commands/__init__.py +0 -0
- bookwright/commands/_envelope.py +36 -0
- bookwright/commands/check.py +75 -0
- bookwright/commands/graph/__init__.py +23 -0
- bookwright/commands/graph/build.py +157 -0
- bookwright/commands/graph/envelope.py +26 -0
- bookwright/commands/graph/query.py +98 -0
- bookwright/commands/init/__init__.py +5 -0
- bookwright/commands/init/conflict.py +107 -0
- bookwright/commands/init/envelope.py +322 -0
- bookwright/commands/init/git.py +96 -0
- bookwright/commands/init/main.py +263 -0
- bookwright/commands/init/resolve.py +193 -0
- bookwright/commands/init/scaffold.py +242 -0
- bookwright/commands/init/validate.py +172 -0
- bookwright/commands/integration/__init__.py +22 -0
- bookwright/commands/integration/use.py +120 -0
- bookwright/commands/validate.py +160 -0
- bookwright/commands/version.py +35 -0
- bookwright/core/__init__.py +35 -0
- bookwright/core/_blocks.py +239 -0
- bookwright/core/_build.py +154 -0
- bookwright/core/_research_block.py +56 -0
- bookwright/core/_translate.py +90 -0
- bookwright/core/errors.py +127 -0
- bookwright/core/iso639_1.py +200 -0
- bookwright/core/manifest.py +343 -0
- bookwright/errors.py +47 -0
- bookwright/golem/__init__.py +71 -0
- bookwright/golem/base.py +200 -0
- bookwright/golem/errors.py +29 -0
- bookwright/golem/modules/__init__.py +1 -0
- bookwright/golem/modules/character.py +109 -0
- bookwright/golem/modules/event.py +91 -0
- bookwright/golem/modules/feature.py +161 -0
- bookwright/golem/modules/inference.py +41 -0
- bookwright/golem/modules/narrative.py +55 -0
- bookwright/golem/modules/provenance.py +197 -0
- bookwright/golem/modules/relationship.py +38 -0
- bookwright/golem/modules/setting.py +30 -0
- bookwright/golem/namespaces.py +332 -0
- bookwright/golem/serialize.py +25 -0
- bookwright/golem/slug.py +22 -0
- bookwright/indexers/__init__.py +47 -0
- bookwright/indexers/base.py +55 -0
- bookwright/indexers/errors.py +80 -0
- bookwright/indexers/rdflib_indexer.py +89 -0
- bookwright/integrations/__init__.py +155 -0
- bookwright/integrations/base.py +117 -0
- bookwright/integrations/claude/__init__.py +29 -0
- bookwright/integrations/constants.py +38 -0
- bookwright/integrations/descriptions.py +48 -0
- bookwright/integrations/errors.py +170 -0
- bookwright/integrations/generic/__init__.py +56 -0
- bookwright/integrations/lint.py +160 -0
- bookwright/integrations/materialize.py +202 -0
- bookwright/integrations/options.py +203 -0
- bookwright/io/__init__.py +1 -0
- bookwright/io/bible.py +500 -0
- bookwright/io/errors.py +98 -0
- bookwright/io/frontmatter.py +61 -0
- bookwright/io/fs.py +226 -0
- bookwright/io/manuscript.py +15 -0
- bookwright/io/project.py +21 -0
- bookwright/io/report.py +107 -0
- bookwright/io/research.py +427 -0
- bookwright/resources/__init__.py +1 -0
- bookwright/resources/commands/bookwright-analyze.md +66 -0
- bookwright/resources/commands/bookwright-bible.md +96 -0
- bookwright/resources/commands/bookwright-checklist.md +67 -0
- bookwright/resources/commands/bookwright-clarify.md +65 -0
- bookwright/resources/commands/bookwright-constitution.md +79 -0
- bookwright/resources/commands/bookwright-continuity.md +70 -0
- bookwright/resources/commands/bookwright-draft.md +74 -0
- bookwright/resources/commands/bookwright-outline.md +71 -0
- bookwright/resources/commands/bookwright-research.md +107 -0
- bookwright/resources/commands/bookwright-scenes.md +66 -0
- bookwright/resources/commands/bookwright-synopsis.md +67 -0
- bookwright/resources/commands/bookwright-verify.md +136 -0
- bookwright/resources/commands/references/golem-character.md +65 -0
- bookwright/resources/commands/references/golem-events-timeline.md +56 -0
- bookwright/resources/commands/references/golem-relationships.md +53 -0
- bookwright/resources/commands/references/greimas-actants.md +57 -0
- bookwright/resources/commands/references/pending-protocol.md +72 -0
- bookwright/resources/commands/references/propp-functions.md +54 -0
- bookwright/resources/commands/references/research-format.md +136 -0
- bookwright/resources/project/.bookwright/cache/.gitkeep +0 -0
- bookwright/resources/project/.bookwright/schema/.gitkeep +0 -0
- bookwright/resources/project/.bookwright/templates/.gitkeep +0 -0
- bookwright/resources/project/.gitignore +23 -0
- bookwright/resources/project/README.md.j2 +40 -0
- bookwright/resources/project/__init__.py +6 -0
- bookwright/resources/project/bible/characters/.gitkeep +0 -0
- bookwright/resources/project/bible/constitution.md.j2 +74 -0
- bookwright/resources/project/bible/glossary.md +36 -0
- bookwright/resources/project/bible/locations/.gitkeep +0 -0
- bookwright/resources/project/bible/pov-structure.md +43 -0
- bookwright/resources/project/bible/relationships.md +36 -0
- bookwright/resources/project/bible/research/_index.md +28 -0
- bookwright/resources/project/bible/research/sources.md +23 -0
- bookwright/resources/project/bible/settings/.gitkeep +0 -0
- bookwright/resources/project/bible/subplots.md +35 -0
- bookwright/resources/project/bible/themes.md +36 -0
- bookwright/resources/project/bible/timeline.md +38 -0
- bookwright/resources/project/manuscript/.gitkeep +0 -0
- bookwright/resources/project/outline/arcs.md +34 -0
- bookwright/resources/project/outline/scenes.md +31 -0
- bookwright/resources/project/outline/structure.md +35 -0
- bookwright/resources/project/outline/synopsis.md +25 -0
- bookwright/resources/schemas/__init__.py +19 -0
- bookwright/resources/schemas/golem-1.1/VERSION +1 -0
- bookwright/resources/schemas/golem-1.1/golem.ttl +1947 -0
- bookwright/resources/schemas/golem-1.1/version.json +8 -0
- bookwright/resources/templates/__init__.py +1 -0
- bookwright/resources/templates/bible/character.md.tmpl +63 -0
- bookwright/resources/templates/bible/location.md.tmpl +37 -0
- bookwright/resources/templates/bible/research/_index.md.tmpl +25 -0
- bookwright/resources/templates/bible/research/sources.md.tmpl +21 -0
- bookwright/resources/templates/bible/research/tema.md.tmpl +37 -0
- bookwright/resources/templates/bible/setting.md.tmpl +38 -0
- bookwright/resources/templates/manifest.template.toml +79 -0
- bookwright/resources/templates/manuscript/chapter.md.tmpl +36 -0
- bookwright/resources/templates/scenes/scene.md.tmpl +37 -0
- bookwright/resources/vocabularies/__init__.py +6 -0
- bookwright/resources/vocabularies/greimas.ttl +4 -0
- bookwright/resources/vocabularies/propp.ttl +4 -0
- bookwright/resources/vocabularies/sources.ttl +82 -0
- bookwright/validation/__init__.py +33 -0
- bookwright/validation/anchor_queries.py +223 -0
- bookwright/validation/base.py +233 -0
- bookwright/validation/queries.py +197 -0
- bookwright/validation/registry.py +185 -0
- bookwright/validation/report.py +106 -0
- bookwright/validation/runner.py +65 -0
- bookwright/validation/validators/__init__.py +9 -0
- bookwright/validation/validators/character_presence.py +202 -0
- bookwright/validation/validators/factual_anchor.py +291 -0
- bookwright/validation/validators/focalization.py +152 -0
- bookwright/validation/validators/setting_continuity.py +100 -0
- bookwright/validation/validators/temporal.py +277 -0
- bookwright_cli-0.2.0.dist-info/METADATA +218 -0
- bookwright_cli-0.2.0.dist-info/RECORD +149 -0
- bookwright_cli-0.2.0.dist-info/WHEEL +4 -0
- bookwright_cli-0.2.0.dist-info/entry_points.txt +2 -0
- bookwright_cli-0.2.0.dist-info/licenses/LICENSE +202 -0
- bookwright_cli-0.2.0.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""Validator discovery and configuration resolution (contracts/validator-protocol.md).
|
|
2
|
+
|
|
3
|
+
Built-ins are auto-discovered by iterating the ``bookwright.validation.validators``
|
|
4
|
+
package (``pkgutil``); customs are loaded from sorted ``*.py`` under
|
|
5
|
+
``<root>/.bookwright/validators/`` (``importlib``). No hand-registration, no
|
|
6
|
+
``entry_points`` (research D2). Discovery is deterministic: modules sorted by name,
|
|
7
|
+
objects within a module sorted by validator name (D8).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import importlib
|
|
13
|
+
import importlib.util
|
|
14
|
+
import pkgutil
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from types import ModuleType
|
|
17
|
+
from typing import TYPE_CHECKING
|
|
18
|
+
|
|
19
|
+
from bookwright.validation import validators as _validators_pkg
|
|
20
|
+
from bookwright.validation.base import UnknownValidatorError, Validator, ValidatorError
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from bookwright.core.manifest import ValidatorsBlock
|
|
24
|
+
|
|
25
|
+
__all__ = ["discover_validators", "resolve_active"]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _looks_like_validator_class(value: object) -> bool:
|
|
29
|
+
"""Whether ``value`` is a concrete class shaped like a validator (not a Protocol)."""
|
|
30
|
+
if not isinstance(value, type) or getattr(value, "_is_protocol", False):
|
|
31
|
+
return False
|
|
32
|
+
return all(hasattr(value, attr) for attr in ("name", "severity_default", "validate"))
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _as_validator(value: object) -> Validator | None:
|
|
36
|
+
"""Normalize a module-level object to a validator instance, or ``None``.
|
|
37
|
+
|
|
38
|
+
A conforming class is instantiated once (data-model); an already-built
|
|
39
|
+
conforming instance is used as-is. A class that raises on construction is
|
|
40
|
+
treated as not-a-validator (its file then yields the "no conforming" error).
|
|
41
|
+
"""
|
|
42
|
+
if _looks_like_validator_class(value):
|
|
43
|
+
try:
|
|
44
|
+
instance = value() # type: ignore[operator]
|
|
45
|
+
except Exception: # a broken ctor is "no conforming validator", not a crash
|
|
46
|
+
return None
|
|
47
|
+
return instance if isinstance(instance, Validator) else None
|
|
48
|
+
if isinstance(value, Validator) and not isinstance(value, type):
|
|
49
|
+
return value
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _collect_from_module(module: ModuleType) -> list[Validator]:
|
|
54
|
+
"""Every conforming validator declared at module level, sorted by ``name``."""
|
|
55
|
+
found: list[Validator] = []
|
|
56
|
+
for attr, value in vars(module).items():
|
|
57
|
+
if attr.startswith("_"):
|
|
58
|
+
continue
|
|
59
|
+
instance = _as_validator(value)
|
|
60
|
+
if instance is not None:
|
|
61
|
+
found.append(instance)
|
|
62
|
+
return sorted(found, key=lambda v: v.name)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _discover_builtins() -> tuple[dict[str, Validator], list[ValidatorError]]:
|
|
66
|
+
builtins: dict[str, Validator] = {}
|
|
67
|
+
errors: list[ValidatorError] = []
|
|
68
|
+
modules = sorted(pkgutil.iter_modules(_validators_pkg.__path__), key=lambda m: m.name)
|
|
69
|
+
for info in modules:
|
|
70
|
+
module = importlib.import_module(f"{_validators_pkg.__name__}.{info.name}")
|
|
71
|
+
for validator in _collect_from_module(module):
|
|
72
|
+
if validator.name in builtins:
|
|
73
|
+
errors.append(
|
|
74
|
+
ValidatorError(
|
|
75
|
+
validator.name,
|
|
76
|
+
f"duplicate built-in validator name '{validator.name}'",
|
|
77
|
+
"load",
|
|
78
|
+
)
|
|
79
|
+
)
|
|
80
|
+
continue
|
|
81
|
+
builtins[validator.name] = validator
|
|
82
|
+
return builtins, errors
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _load_custom_module(path: Path, mod_name: str) -> ModuleType:
|
|
86
|
+
spec = importlib.util.spec_from_file_location(mod_name, path)
|
|
87
|
+
if spec is None or spec.loader is None: # pragma: no cover — defensive
|
|
88
|
+
raise ImportError(f"cannot load module spec for {path}")
|
|
89
|
+
module = importlib.util.module_from_spec(spec)
|
|
90
|
+
spec.loader.exec_module(module)
|
|
91
|
+
return module
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _discover_customs(
|
|
95
|
+
custom_dir: Path, builtins: dict[str, Validator]
|
|
96
|
+
) -> tuple[dict[str, Validator], list[ValidatorError]]:
|
|
97
|
+
customs: dict[str, Validator] = {}
|
|
98
|
+
errors: list[ValidatorError] = []
|
|
99
|
+
if not custom_dir.is_dir():
|
|
100
|
+
return customs, errors
|
|
101
|
+
root = custom_dir.parent.parent # <root>/.bookwright/validators
|
|
102
|
+
for index, path in enumerate(sorted(custom_dir.glob("*.py"))):
|
|
103
|
+
try:
|
|
104
|
+
relpath = path.relative_to(root).as_posix()
|
|
105
|
+
except ValueError: # pragma: no cover — custom_dir always under root
|
|
106
|
+
relpath = path.as_posix()
|
|
107
|
+
try:
|
|
108
|
+
module = _load_custom_module(path, f"_bookwright_custom_{index}_{path.stem}")
|
|
109
|
+
except Exception as exc: # any import failure is a skip (FR-005), never a crash
|
|
110
|
+
errors.append(ValidatorError(relpath, f"{type(exc).__name__}: {exc}", "load"))
|
|
111
|
+
continue
|
|
112
|
+
found = _collect_from_module(module)
|
|
113
|
+
if not found:
|
|
114
|
+
errors.append(ValidatorError(relpath, "no conforming validator found", "load"))
|
|
115
|
+
continue
|
|
116
|
+
for validator in found:
|
|
117
|
+
name = validator.name
|
|
118
|
+
if name in builtins:
|
|
119
|
+
errors.append(
|
|
120
|
+
ValidatorError(
|
|
121
|
+
relpath,
|
|
122
|
+
f"custom validator name '{name}' collides with a built-in; rename it",
|
|
123
|
+
"load",
|
|
124
|
+
)
|
|
125
|
+
)
|
|
126
|
+
continue
|
|
127
|
+
if name in customs:
|
|
128
|
+
errors.append(
|
|
129
|
+
ValidatorError(relpath, f"duplicate custom validator name '{name}'", "load")
|
|
130
|
+
)
|
|
131
|
+
continue
|
|
132
|
+
customs[name] = validator
|
|
133
|
+
return customs, errors
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def discover_validators(
|
|
137
|
+
custom_dir: Path,
|
|
138
|
+
) -> tuple[dict[str, Validator], dict[str, Validator], list[ValidatorError]]:
|
|
139
|
+
"""Discover built-in and custom validators (FR-004/005).
|
|
140
|
+
|
|
141
|
+
Returns ``(builtins, customs, load_errors)``. The built-in and custom dicts are
|
|
142
|
+
**disjoint by name**: a custom colliding with a built-in is dropped with an
|
|
143
|
+
attributed ``ValidatorError(phase="load")`` so a built-in coherence check is
|
|
144
|
+
never silently shadowed by project code (FR-019, D2). A malformed custom file --
|
|
145
|
+
import failure, no conforming object, or a duplicate name -- is skipped the same
|
|
146
|
+
way; the run continues.
|
|
147
|
+
"""
|
|
148
|
+
builtins, builtin_errors = _discover_builtins()
|
|
149
|
+
customs, custom_errors = _discover_customs(custom_dir, builtins)
|
|
150
|
+
return builtins, customs, builtin_errors + custom_errors
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def resolve_active(
|
|
154
|
+
builtins: dict[str, Validator],
|
|
155
|
+
customs: dict[str, Validator],
|
|
156
|
+
cfg: ValidatorsBlock,
|
|
157
|
+
) -> list[Validator]:
|
|
158
|
+
"""Apply the ``[validators]`` config to the discovered set (research D7).
|
|
159
|
+
|
|
160
|
+
1. A non-empty ``custom`` allow-lists the discovered customs to those names.
|
|
161
|
+
2. ``candidates = builtins + customs`` minus ``disabled``.
|
|
162
|
+
3. A non-empty ``enabled`` intersects ``candidates`` with those names.
|
|
163
|
+
4. Any ``enabled`` / ``disabled`` / ``custom`` name absent from the discovered
|
|
164
|
+
``builtins + customs`` -> :class:`UnknownValidatorError` (FR-007).
|
|
165
|
+
|
|
166
|
+
Returns the active validators sorted by ``name`` (FR-019, D8).
|
|
167
|
+
"""
|
|
168
|
+
discovered = {**builtins, **customs}
|
|
169
|
+
|
|
170
|
+
unknown = tuple(
|
|
171
|
+
sorted(
|
|
172
|
+
name for name in (*cfg.enabled, *cfg.disabled, *cfg.custom) if name not in discovered
|
|
173
|
+
)
|
|
174
|
+
)
|
|
175
|
+
if unknown:
|
|
176
|
+
raise UnknownValidatorError(unknown)
|
|
177
|
+
|
|
178
|
+
selected_customs = {name: customs[name] for name in cfg.custom} if cfg.custom else dict(customs)
|
|
179
|
+
candidates = {**builtins, **selected_customs}
|
|
180
|
+
disabled = set(cfg.disabled)
|
|
181
|
+
active = {name: v for name, v in candidates.items() if name not in disabled}
|
|
182
|
+
if cfg.enabled:
|
|
183
|
+
enabled = set(cfg.enabled)
|
|
184
|
+
active = {name: v for name, v in active.items() if name in enabled}
|
|
185
|
+
return [active[name] for name in sorted(active)]
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""``ValidationReport`` — aggregation, the CI gate, filtering, and rendering.
|
|
2
|
+
|
|
3
|
+
The gate (:attr:`failed`) is computed from **all** violations before any filter, so a
|
|
4
|
+
display ``--scope`` / ``--severity`` can never hide an error from CI (FR-013). The
|
|
5
|
+
emitted order is fixed by the runner's total-order sort; ``reported`` only removes
|
|
6
|
+
entries, never reorders — so the human report and the JSON ``violations[]`` are
|
|
7
|
+
byte-identical across runs (SC-003).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from typing import TYPE_CHECKING, Any
|
|
14
|
+
|
|
15
|
+
from bookwright.validation.base import Severity, ValidatorError, Violation, split_source
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from rich.console import Console
|
|
19
|
+
|
|
20
|
+
__all__ = ["ScopeFilter", "ValidationReport"]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass(frozen=True)
|
|
24
|
+
class ScopeFilter:
|
|
25
|
+
"""Limits reported findings to a file or directory under the project root."""
|
|
26
|
+
|
|
27
|
+
rel: str # project-relative posix path of the scope
|
|
28
|
+
is_dir: bool
|
|
29
|
+
|
|
30
|
+
def matches(self, source: str | None) -> bool:
|
|
31
|
+
"""Whether ``source`` falls within the scope. ``None`` never matches (FR-009)."""
|
|
32
|
+
if source is None:
|
|
33
|
+
return False
|
|
34
|
+
path = split_source(source)[0] or source
|
|
35
|
+
if self.is_dir:
|
|
36
|
+
return path == self.rel or path.startswith(f"{self.rel}/")
|
|
37
|
+
return path == self.rel
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class ValidationReport:
|
|
42
|
+
"""A full run: all (deduped, pre-filter) violations, run/load errors, run names."""
|
|
43
|
+
|
|
44
|
+
violations: tuple[Violation, ...]
|
|
45
|
+
errors: tuple[ValidatorError, ...]
|
|
46
|
+
ran: tuple[str, ...]
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def failed(self) -> bool:
|
|
50
|
+
"""The gate: any violation at ``error`` severity, ignoring filters (FR-013)."""
|
|
51
|
+
return any(v.severity == Severity.error for v in self.violations)
|
|
52
|
+
|
|
53
|
+
def reported(self, *, scope: ScopeFilter | None, severity: Severity | None) -> list[Violation]:
|
|
54
|
+
"""Apply ``scope`` then the ``severity`` threshold, preserving order (D8)."""
|
|
55
|
+
result: list[Violation] = []
|
|
56
|
+
for violation in self.violations:
|
|
57
|
+
if scope is not None and not scope.matches(violation.source):
|
|
58
|
+
continue
|
|
59
|
+
if severity is not None and not violation.severity.at_least(severity):
|
|
60
|
+
continue
|
|
61
|
+
result.append(violation)
|
|
62
|
+
return result
|
|
63
|
+
|
|
64
|
+
def to_json(self, *, scope: ScopeFilter | None, severity: Severity | None) -> dict[str, Any]:
|
|
65
|
+
"""The Principle-IX envelope (data-model / contracts/cli-validate.md)."""
|
|
66
|
+
reported = self.reported(scope=scope, severity=severity)
|
|
67
|
+
return {
|
|
68
|
+
"status": "violations" if reported else "ok",
|
|
69
|
+
"failed": self.failed,
|
|
70
|
+
"violations": [v.to_json() for v in reported],
|
|
71
|
+
"errors": [e.to_json() for e in self.errors],
|
|
72
|
+
"summary": {
|
|
73
|
+
"ran": list(self.ran),
|
|
74
|
+
"total": len(self.violations),
|
|
75
|
+
"reported": len(reported),
|
|
76
|
+
"by_severity": self._by_severity(),
|
|
77
|
+
},
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
def _by_severity(self) -> dict[str, int]:
|
|
81
|
+
"""Counts over the unfiltered set; always all three keys, ``0`` when absent."""
|
|
82
|
+
counts = {level.value: 0 for level in Severity}
|
|
83
|
+
for violation in self.violations:
|
|
84
|
+
counts[violation.severity.value] += 1
|
|
85
|
+
return counts
|
|
86
|
+
|
|
87
|
+
def render(
|
|
88
|
+
self, console: Console, *, scope: ScopeFilter | None, severity: Severity | None
|
|
89
|
+
) -> None:
|
|
90
|
+
"""Render the human report (grouped by validator) to ``console`` (FR-012)."""
|
|
91
|
+
reported = self.reported(scope=scope, severity=severity)
|
|
92
|
+
if not reported and not self.errors:
|
|
93
|
+
console.print("no violations found", markup=False)
|
|
94
|
+
return
|
|
95
|
+
for validator in sorted({v.validator for v in reported}):
|
|
96
|
+
console.print(f"{validator}:", markup=False)
|
|
97
|
+
for violation in [v for v in reported if v.validator == validator]:
|
|
98
|
+
location = violation.source or "(no specific location)"
|
|
99
|
+
console.print(
|
|
100
|
+
f" {violation.severity.value}: {violation.message} — {location}",
|
|
101
|
+
markup=False,
|
|
102
|
+
)
|
|
103
|
+
if self.errors:
|
|
104
|
+
console.print("validator errors:", markup=False)
|
|
105
|
+
for error in self.errors:
|
|
106
|
+
console.print(f" {error.phase}: {error.validator}: {error.message}", markup=False)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Run the active validators with per-validator isolation (FR-014, D8/D9).
|
|
2
|
+
|
|
3
|
+
A validator that raises is caught and recorded as a ``ValidatorError(phase="run")``
|
|
4
|
+
without aborting the others (FR-014). Identical findings are deduped and the
|
|
5
|
+
combined set is sorted by an explicit total-order key so the emitted list is
|
|
6
|
+
byte-identical across runs and platforms (SC-003), not merely "stably sorted".
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from bookwright.indexers import Indexer
|
|
12
|
+
from bookwright.validation.base import (
|
|
13
|
+
_RANK,
|
|
14
|
+
ValidationContext,
|
|
15
|
+
Validator,
|
|
16
|
+
ValidatorError,
|
|
17
|
+
Violation,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
__all__ = ["RunResult", "run_validators", "sort_key"]
|
|
21
|
+
|
|
22
|
+
RunResult = tuple[list[Violation], list[ValidatorError], list[str]]
|
|
23
|
+
"""``(violations, errors, ran)`` — deduped/sorted findings, run errors, run names."""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def sort_key(violation: Violation) -> tuple[str, int, str, str, tuple[tuple[str, str, str], ...]]:
|
|
27
|
+
"""The explicit total order (D8): validator, severity desc, source, message, triples."""
|
|
28
|
+
return (
|
|
29
|
+
violation.validator,
|
|
30
|
+
-_RANK[violation.severity],
|
|
31
|
+
violation.source or "",
|
|
32
|
+
violation.message,
|
|
33
|
+
violation.triples,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def run_validators(
|
|
38
|
+
active: list[Validator], project: ValidationContext, indexer: Indexer
|
|
39
|
+
) -> RunResult:
|
|
40
|
+
"""Run every validator in ``active``, isolating failures (FR-014).
|
|
41
|
+
|
|
42
|
+
Collects each validator's ``Violation`` list, deduplicates identical findings
|
|
43
|
+
across the whole run (D8), and returns them sorted by :func:`sort_key`. A
|
|
44
|
+
validator that raises contributes a ``ValidatorError(phase="run")`` and no
|
|
45
|
+
findings; the rest still run. ``ran`` lists every invoked validator name, sorted.
|
|
46
|
+
"""
|
|
47
|
+
seen: set[Violation] = set()
|
|
48
|
+
violations: list[Violation] = []
|
|
49
|
+
errors: list[ValidatorError] = []
|
|
50
|
+
ran: list[str] = []
|
|
51
|
+
|
|
52
|
+
for validator in active:
|
|
53
|
+
ran.append(validator.name)
|
|
54
|
+
try:
|
|
55
|
+
found = validator.validate(project, indexer)
|
|
56
|
+
except Exception as exc: # per-validator isolation (FR-014) — never abort the run
|
|
57
|
+
errors.append(ValidatorError(validator.name, f"{type(exc).__name__}: {exc}", "run"))
|
|
58
|
+
continue
|
|
59
|
+
for violation in found:
|
|
60
|
+
if violation not in seen:
|
|
61
|
+
seen.add(violation)
|
|
62
|
+
violations.append(violation)
|
|
63
|
+
|
|
64
|
+
violations.sort(key=sort_key)
|
|
65
|
+
return violations, errors, sorted(ran)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Built-in validator modules.
|
|
2
|
+
|
|
3
|
+
This package is the auto-discovery root: :func:`bookwright.validation.registry.
|
|
4
|
+
discover_validators` iterates its modules with ``pkgutil.iter_modules`` and
|
|
5
|
+
collects every module-level object satisfying the ``Validator`` protocol. Adding
|
|
6
|
+
a built-in is dropping a new module here — no hand-registration (FR-004).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
"""``character_presence`` — bible roster vs. manuscript mentions (FR-016, research D3).
|
|
2
|
+
|
|
3
|
+
Two directions, split by severity so a heuristic false positive can never fail CI:
|
|
4
|
+
|
|
5
|
+
* a bible character **never** mentioned in the manuscript → orphan finding at
|
|
6
|
+
**error** (deterministic — the name and the prose are both authored),
|
|
7
|
+
* a proper-noun candidate in the prose with **no** bible entry → unknown-mention at
|
|
8
|
+
**warning** (a pinned, conservative heuristic — no NER).
|
|
9
|
+
|
|
10
|
+
Unknown mentions are collapsed per distinct name (one finding citing the first
|
|
11
|
+
occurrence), never multiplied per mention (edge case).
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import re
|
|
17
|
+
from typing import ClassVar
|
|
18
|
+
|
|
19
|
+
from bookwright.golem.slug import make_slug
|
|
20
|
+
from bookwright.indexers import Indexer
|
|
21
|
+
from bookwright.validation.base import Severity, ValidationContext, Violation
|
|
22
|
+
|
|
23
|
+
# Pinned proper-noun candidate: a capitalized word of ≥3 letters (D3). Accent-aware
|
|
24
|
+
# for Spanish prose; matches single tokens (multi-word names are caught token-wise).
|
|
25
|
+
_CANDIDATE = re.compile(r"[A-ZÁÉÍÓÚÑÜ][a-záéíóúñü]{2,}")
|
|
26
|
+
# Sentence-ending punctuation: a capital right after one of these (or at line start)
|
|
27
|
+
# is grammatical, not necessarily a proper noun — excluded (conservative, D3).
|
|
28
|
+
_SENTENCE_END = frozenset(".!?¿¡")
|
|
29
|
+
_MIN_TOKEN_LEN = 3 # shortest name token worth matching as a standalone word.
|
|
30
|
+
# Common capitalized non-names we never treat as a character mention (pinned stop-set).
|
|
31
|
+
_STOP_WORDS = frozenset(
|
|
32
|
+
{
|
|
33
|
+
# Spanish weekdays / months / frequent sentence openers.
|
|
34
|
+
"lunes",
|
|
35
|
+
"martes",
|
|
36
|
+
"miercoles",
|
|
37
|
+
"jueves",
|
|
38
|
+
"viernes",
|
|
39
|
+
"sabado",
|
|
40
|
+
"domingo",
|
|
41
|
+
"enero",
|
|
42
|
+
"febrero",
|
|
43
|
+
"marzo",
|
|
44
|
+
"abril",
|
|
45
|
+
"mayo",
|
|
46
|
+
"junio",
|
|
47
|
+
"julio",
|
|
48
|
+
"agosto",
|
|
49
|
+
"septiembre",
|
|
50
|
+
"octubre",
|
|
51
|
+
"noviembre",
|
|
52
|
+
"diciembre",
|
|
53
|
+
"entonces",
|
|
54
|
+
"cuando",
|
|
55
|
+
"aunque",
|
|
56
|
+
"pero",
|
|
57
|
+
"porque",
|
|
58
|
+
"tambien",
|
|
59
|
+
"despues",
|
|
60
|
+
"antes",
|
|
61
|
+
"ahora",
|
|
62
|
+
"nunca",
|
|
63
|
+
"siempre",
|
|
64
|
+
"quiza",
|
|
65
|
+
"quizas",
|
|
66
|
+
"acaso",
|
|
67
|
+
# English weekdays / months / openers.
|
|
68
|
+
"monday",
|
|
69
|
+
"tuesday",
|
|
70
|
+
"wednesday",
|
|
71
|
+
"thursday",
|
|
72
|
+
"friday",
|
|
73
|
+
"saturday",
|
|
74
|
+
"sunday",
|
|
75
|
+
"january",
|
|
76
|
+
"february",
|
|
77
|
+
"march",
|
|
78
|
+
"april",
|
|
79
|
+
"may",
|
|
80
|
+
"june",
|
|
81
|
+
"july",
|
|
82
|
+
"august",
|
|
83
|
+
"september",
|
|
84
|
+
"october",
|
|
85
|
+
"november",
|
|
86
|
+
"december",
|
|
87
|
+
"then",
|
|
88
|
+
"when",
|
|
89
|
+
"although",
|
|
90
|
+
"because",
|
|
91
|
+
"after",
|
|
92
|
+
"before",
|
|
93
|
+
"however",
|
|
94
|
+
"perhaps",
|
|
95
|
+
}
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class CharacterPresence:
|
|
100
|
+
"""Cross-checks the bible character roster against manuscript proper nouns."""
|
|
101
|
+
|
|
102
|
+
name: ClassVar[str] = "character_presence"
|
|
103
|
+
severity_default: ClassVar[Severity] = Severity.error
|
|
104
|
+
|
|
105
|
+
def validate(self, project: ValidationContext, indexer: Indexer) -> list[Violation]:
|
|
106
|
+
roster = project.character_names()
|
|
107
|
+
files = project.manuscript_files()
|
|
108
|
+
roster_slugs = _roster_slugs(roster)
|
|
109
|
+
|
|
110
|
+
out: list[Violation] = []
|
|
111
|
+
out.extend(self._orphans(roster, files))
|
|
112
|
+
out.extend(self._unknown_mentions(files, roster_slugs))
|
|
113
|
+
return out
|
|
114
|
+
|
|
115
|
+
def _orphans(
|
|
116
|
+
self,
|
|
117
|
+
roster: tuple[tuple[str, str], ...],
|
|
118
|
+
files: tuple[tuple[str, str], ...],
|
|
119
|
+
) -> list[Violation]:
|
|
120
|
+
out: list[Violation] = []
|
|
121
|
+
for name, relpath in roster:
|
|
122
|
+
if not _is_mentioned(name, files):
|
|
123
|
+
out.append(
|
|
124
|
+
Violation(
|
|
125
|
+
validator=self.name,
|
|
126
|
+
severity=Severity.error,
|
|
127
|
+
message=(
|
|
128
|
+
f"character '{name}' is defined in the bible but never "
|
|
129
|
+
"mentioned in the manuscript"
|
|
130
|
+
),
|
|
131
|
+
source=relpath,
|
|
132
|
+
triples=(),
|
|
133
|
+
)
|
|
134
|
+
)
|
|
135
|
+
return out
|
|
136
|
+
|
|
137
|
+
def _unknown_mentions(
|
|
138
|
+
self,
|
|
139
|
+
files: tuple[tuple[str, str], ...],
|
|
140
|
+
roster_slugs: frozenset[str],
|
|
141
|
+
) -> list[Violation]:
|
|
142
|
+
# slug → (display name, first "relpath:line"); first occurrence wins.
|
|
143
|
+
first_seen: dict[str, tuple[str, str]] = {}
|
|
144
|
+
for relpath, text in files:
|
|
145
|
+
for lineno, line in enumerate(text.splitlines(), start=1):
|
|
146
|
+
for match in _CANDIDATE.finditer(line):
|
|
147
|
+
token = match.group(0)
|
|
148
|
+
slug = make_slug(token)
|
|
149
|
+
if (
|
|
150
|
+
slug in roster_slugs
|
|
151
|
+
or slug in first_seen
|
|
152
|
+
or slug in _STOP_WORDS
|
|
153
|
+
or _is_sentence_initial(line, match.start())
|
|
154
|
+
):
|
|
155
|
+
continue
|
|
156
|
+
first_seen[slug] = (token, f"{relpath}:{lineno}")
|
|
157
|
+
out: list[Violation] = []
|
|
158
|
+
for _, (token, source) in sorted(first_seen.items()):
|
|
159
|
+
out.append(
|
|
160
|
+
Violation(
|
|
161
|
+
validator=self.name,
|
|
162
|
+
severity=Severity.warning,
|
|
163
|
+
message=(
|
|
164
|
+
f"proper noun '{token}' appears in the manuscript but has no "
|
|
165
|
+
"bible entry (heuristic — may be a place or organization)"
|
|
166
|
+
),
|
|
167
|
+
source=source,
|
|
168
|
+
triples=(),
|
|
169
|
+
)
|
|
170
|
+
)
|
|
171
|
+
return out
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _roster_slugs(roster: tuple[tuple[str, str], ...]) -> frozenset[str]:
|
|
175
|
+
"""Slugs for every roster name and each of its tokens (so a surname matches)."""
|
|
176
|
+
slugs: set[str] = set()
|
|
177
|
+
for name, _ in roster:
|
|
178
|
+
slugs.add(make_slug(name))
|
|
179
|
+
for token in name.split():
|
|
180
|
+
candidate = make_slug(token)
|
|
181
|
+
if candidate:
|
|
182
|
+
slugs.add(candidate)
|
|
183
|
+
return frozenset(slugs)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _is_mentioned(name: str, files: tuple[tuple[str, str], ...]) -> bool:
|
|
187
|
+
"""Whether ``name`` (full phrase or any ≥3-letter token) appears as a word."""
|
|
188
|
+
patterns = [re.compile(rf"\b{re.escape(name)}\b", re.IGNORECASE)]
|
|
189
|
+
patterns += [
|
|
190
|
+
re.compile(rf"\b{re.escape(token)}\b", re.IGNORECASE)
|
|
191
|
+
for token in name.split()
|
|
192
|
+
if len(token) >= _MIN_TOKEN_LEN
|
|
193
|
+
]
|
|
194
|
+
return any(pattern.search(text) for pattern in patterns for _, text in files)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _is_sentence_initial(line: str, start: int) -> bool:
|
|
198
|
+
"""Whether the match at ``start`` opens a sentence (capitalization is grammatical)."""
|
|
199
|
+
prefix = line[:start].rstrip()
|
|
200
|
+
if not prefix:
|
|
201
|
+
return True
|
|
202
|
+
return prefix[-1] in _SENTENCE_END
|