bookwright-cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. bookwright/__init__.py +3 -0
  2. bookwright/__main__.py +6 -0
  3. bookwright/cli.py +19 -0
  4. bookwright/commands/__init__.py +0 -0
  5. bookwright/commands/_envelope.py +36 -0
  6. bookwright/commands/check.py +75 -0
  7. bookwright/commands/graph/__init__.py +23 -0
  8. bookwright/commands/graph/build.py +157 -0
  9. bookwright/commands/graph/envelope.py +26 -0
  10. bookwright/commands/graph/query.py +98 -0
  11. bookwright/commands/init/__init__.py +5 -0
  12. bookwright/commands/init/conflict.py +107 -0
  13. bookwright/commands/init/envelope.py +322 -0
  14. bookwright/commands/init/git.py +96 -0
  15. bookwright/commands/init/main.py +263 -0
  16. bookwright/commands/init/resolve.py +193 -0
  17. bookwright/commands/init/scaffold.py +242 -0
  18. bookwright/commands/init/validate.py +172 -0
  19. bookwright/commands/integration/__init__.py +22 -0
  20. bookwright/commands/integration/use.py +120 -0
  21. bookwright/commands/validate.py +160 -0
  22. bookwright/commands/version.py +35 -0
  23. bookwright/core/__init__.py +35 -0
  24. bookwright/core/_blocks.py +239 -0
  25. bookwright/core/_build.py +154 -0
  26. bookwright/core/_research_block.py +56 -0
  27. bookwright/core/_translate.py +90 -0
  28. bookwright/core/errors.py +127 -0
  29. bookwright/core/iso639_1.py +200 -0
  30. bookwright/core/manifest.py +343 -0
  31. bookwright/errors.py +47 -0
  32. bookwright/golem/__init__.py +71 -0
  33. bookwright/golem/base.py +200 -0
  34. bookwright/golem/errors.py +29 -0
  35. bookwright/golem/modules/__init__.py +1 -0
  36. bookwright/golem/modules/character.py +109 -0
  37. bookwright/golem/modules/event.py +91 -0
  38. bookwright/golem/modules/feature.py +161 -0
  39. bookwright/golem/modules/inference.py +41 -0
  40. bookwright/golem/modules/narrative.py +55 -0
  41. bookwright/golem/modules/provenance.py +197 -0
  42. bookwright/golem/modules/relationship.py +38 -0
  43. bookwright/golem/modules/setting.py +30 -0
  44. bookwright/golem/namespaces.py +332 -0
  45. bookwright/golem/serialize.py +25 -0
  46. bookwright/golem/slug.py +22 -0
  47. bookwright/indexers/__init__.py +47 -0
  48. bookwright/indexers/base.py +55 -0
  49. bookwright/indexers/errors.py +80 -0
  50. bookwright/indexers/rdflib_indexer.py +89 -0
  51. bookwright/integrations/__init__.py +155 -0
  52. bookwright/integrations/base.py +117 -0
  53. bookwright/integrations/claude/__init__.py +29 -0
  54. bookwright/integrations/constants.py +38 -0
  55. bookwright/integrations/descriptions.py +48 -0
  56. bookwright/integrations/errors.py +170 -0
  57. bookwright/integrations/generic/__init__.py +56 -0
  58. bookwright/integrations/lint.py +160 -0
  59. bookwright/integrations/materialize.py +202 -0
  60. bookwright/integrations/options.py +203 -0
  61. bookwright/io/__init__.py +1 -0
  62. bookwright/io/bible.py +500 -0
  63. bookwright/io/errors.py +98 -0
  64. bookwright/io/frontmatter.py +61 -0
  65. bookwright/io/fs.py +226 -0
  66. bookwright/io/manuscript.py +15 -0
  67. bookwright/io/project.py +21 -0
  68. bookwright/io/report.py +107 -0
  69. bookwright/io/research.py +427 -0
  70. bookwright/resources/__init__.py +1 -0
  71. bookwright/resources/commands/bookwright-analyze.md +66 -0
  72. bookwright/resources/commands/bookwright-bible.md +96 -0
  73. bookwright/resources/commands/bookwright-checklist.md +67 -0
  74. bookwright/resources/commands/bookwright-clarify.md +65 -0
  75. bookwright/resources/commands/bookwright-constitution.md +79 -0
  76. bookwright/resources/commands/bookwright-continuity.md +70 -0
  77. bookwright/resources/commands/bookwright-draft.md +74 -0
  78. bookwright/resources/commands/bookwright-outline.md +71 -0
  79. bookwright/resources/commands/bookwright-research.md +107 -0
  80. bookwright/resources/commands/bookwright-scenes.md +66 -0
  81. bookwright/resources/commands/bookwright-synopsis.md +67 -0
  82. bookwright/resources/commands/bookwright-verify.md +136 -0
  83. bookwright/resources/commands/references/golem-character.md +65 -0
  84. bookwright/resources/commands/references/golem-events-timeline.md +56 -0
  85. bookwright/resources/commands/references/golem-relationships.md +53 -0
  86. bookwright/resources/commands/references/greimas-actants.md +57 -0
  87. bookwright/resources/commands/references/pending-protocol.md +72 -0
  88. bookwright/resources/commands/references/propp-functions.md +54 -0
  89. bookwright/resources/commands/references/research-format.md +136 -0
  90. bookwright/resources/project/.bookwright/cache/.gitkeep +0 -0
  91. bookwright/resources/project/.bookwright/schema/.gitkeep +0 -0
  92. bookwright/resources/project/.bookwright/templates/.gitkeep +0 -0
  93. bookwright/resources/project/.gitignore +23 -0
  94. bookwright/resources/project/README.md.j2 +40 -0
  95. bookwright/resources/project/__init__.py +6 -0
  96. bookwright/resources/project/bible/characters/.gitkeep +0 -0
  97. bookwright/resources/project/bible/constitution.md.j2 +74 -0
  98. bookwright/resources/project/bible/glossary.md +36 -0
  99. bookwright/resources/project/bible/locations/.gitkeep +0 -0
  100. bookwright/resources/project/bible/pov-structure.md +43 -0
  101. bookwright/resources/project/bible/relationships.md +36 -0
  102. bookwright/resources/project/bible/research/_index.md +28 -0
  103. bookwright/resources/project/bible/research/sources.md +23 -0
  104. bookwright/resources/project/bible/settings/.gitkeep +0 -0
  105. bookwright/resources/project/bible/subplots.md +35 -0
  106. bookwright/resources/project/bible/themes.md +36 -0
  107. bookwright/resources/project/bible/timeline.md +38 -0
  108. bookwright/resources/project/manuscript/.gitkeep +0 -0
  109. bookwright/resources/project/outline/arcs.md +34 -0
  110. bookwright/resources/project/outline/scenes.md +31 -0
  111. bookwright/resources/project/outline/structure.md +35 -0
  112. bookwright/resources/project/outline/synopsis.md +25 -0
  113. bookwright/resources/schemas/__init__.py +19 -0
  114. bookwright/resources/schemas/golem-1.1/VERSION +1 -0
  115. bookwright/resources/schemas/golem-1.1/golem.ttl +1947 -0
  116. bookwright/resources/schemas/golem-1.1/version.json +8 -0
  117. bookwright/resources/templates/__init__.py +1 -0
  118. bookwright/resources/templates/bible/character.md.tmpl +63 -0
  119. bookwright/resources/templates/bible/location.md.tmpl +37 -0
  120. bookwright/resources/templates/bible/research/_index.md.tmpl +25 -0
  121. bookwright/resources/templates/bible/research/sources.md.tmpl +21 -0
  122. bookwright/resources/templates/bible/research/tema.md.tmpl +37 -0
  123. bookwright/resources/templates/bible/setting.md.tmpl +38 -0
  124. bookwright/resources/templates/manifest.template.toml +79 -0
  125. bookwright/resources/templates/manuscript/chapter.md.tmpl +36 -0
  126. bookwright/resources/templates/scenes/scene.md.tmpl +37 -0
  127. bookwright/resources/vocabularies/__init__.py +6 -0
  128. bookwright/resources/vocabularies/greimas.ttl +4 -0
  129. bookwright/resources/vocabularies/propp.ttl +4 -0
  130. bookwright/resources/vocabularies/sources.ttl +82 -0
  131. bookwright/validation/__init__.py +33 -0
  132. bookwright/validation/anchor_queries.py +223 -0
  133. bookwright/validation/base.py +233 -0
  134. bookwright/validation/queries.py +197 -0
  135. bookwright/validation/registry.py +185 -0
  136. bookwright/validation/report.py +106 -0
  137. bookwright/validation/runner.py +65 -0
  138. bookwright/validation/validators/__init__.py +9 -0
  139. bookwright/validation/validators/character_presence.py +202 -0
  140. bookwright/validation/validators/factual_anchor.py +291 -0
  141. bookwright/validation/validators/focalization.py +152 -0
  142. bookwright/validation/validators/setting_continuity.py +100 -0
  143. bookwright/validation/validators/temporal.py +277 -0
  144. bookwright_cli-0.2.0.dist-info/METADATA +218 -0
  145. bookwright_cli-0.2.0.dist-info/RECORD +149 -0
  146. bookwright_cli-0.2.0.dist-info/WHEEL +4 -0
  147. bookwright_cli-0.2.0.dist-info/entry_points.txt +2 -0
  148. bookwright_cli-0.2.0.dist-info/licenses/LICENSE +202 -0
  149. bookwright_cli-0.2.0.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,185 @@
1
+ """Validator discovery and configuration resolution (contracts/validator-protocol.md).
2
+
3
+ Built-ins are auto-discovered by iterating the ``bookwright.validation.validators``
4
+ package (``pkgutil``); customs are loaded from sorted ``*.py`` under
5
+ ``<root>/.bookwright/validators/`` (``importlib``). No hand-registration, no
6
+ ``entry_points`` (research D2). Discovery is deterministic: modules sorted by name,
7
+ objects within a module sorted by validator name (D8).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import importlib
13
+ import importlib.util
14
+ import pkgutil
15
+ from pathlib import Path
16
+ from types import ModuleType
17
+ from typing import TYPE_CHECKING
18
+
19
+ from bookwright.validation import validators as _validators_pkg
20
+ from bookwright.validation.base import UnknownValidatorError, Validator, ValidatorError
21
+
22
+ if TYPE_CHECKING:
23
+ from bookwright.core.manifest import ValidatorsBlock
24
+
25
+ __all__ = ["discover_validators", "resolve_active"]
26
+
27
+
28
+ def _looks_like_validator_class(value: object) -> bool:
29
+ """Whether ``value`` is a concrete class shaped like a validator (not a Protocol)."""
30
+ if not isinstance(value, type) or getattr(value, "_is_protocol", False):
31
+ return False
32
+ return all(hasattr(value, attr) for attr in ("name", "severity_default", "validate"))
33
+
34
+
35
+ def _as_validator(value: object) -> Validator | None:
36
+ """Normalize a module-level object to a validator instance, or ``None``.
37
+
38
+ A conforming class is instantiated once (data-model); an already-built
39
+ conforming instance is used as-is. A class that raises on construction is
40
+ treated as not-a-validator (its file then yields the "no conforming" error).
41
+ """
42
+ if _looks_like_validator_class(value):
43
+ try:
44
+ instance = value() # type: ignore[operator]
45
+ except Exception: # a broken ctor is "no conforming validator", not a crash
46
+ return None
47
+ return instance if isinstance(instance, Validator) else None
48
+ if isinstance(value, Validator) and not isinstance(value, type):
49
+ return value
50
+ return None
51
+
52
+
53
+ def _collect_from_module(module: ModuleType) -> list[Validator]:
54
+ """Every conforming validator declared at module level, sorted by ``name``."""
55
+ found: list[Validator] = []
56
+ for attr, value in vars(module).items():
57
+ if attr.startswith("_"):
58
+ continue
59
+ instance = _as_validator(value)
60
+ if instance is not None:
61
+ found.append(instance)
62
+ return sorted(found, key=lambda v: v.name)
63
+
64
+
65
+ def _discover_builtins() -> tuple[dict[str, Validator], list[ValidatorError]]:
66
+ builtins: dict[str, Validator] = {}
67
+ errors: list[ValidatorError] = []
68
+ modules = sorted(pkgutil.iter_modules(_validators_pkg.__path__), key=lambda m: m.name)
69
+ for info in modules:
70
+ module = importlib.import_module(f"{_validators_pkg.__name__}.{info.name}")
71
+ for validator in _collect_from_module(module):
72
+ if validator.name in builtins:
73
+ errors.append(
74
+ ValidatorError(
75
+ validator.name,
76
+ f"duplicate built-in validator name '{validator.name}'",
77
+ "load",
78
+ )
79
+ )
80
+ continue
81
+ builtins[validator.name] = validator
82
+ return builtins, errors
83
+
84
+
85
+ def _load_custom_module(path: Path, mod_name: str) -> ModuleType:
86
+ spec = importlib.util.spec_from_file_location(mod_name, path)
87
+ if spec is None or spec.loader is None: # pragma: no cover — defensive
88
+ raise ImportError(f"cannot load module spec for {path}")
89
+ module = importlib.util.module_from_spec(spec)
90
+ spec.loader.exec_module(module)
91
+ return module
92
+
93
+
94
+ def _discover_customs(
95
+ custom_dir: Path, builtins: dict[str, Validator]
96
+ ) -> tuple[dict[str, Validator], list[ValidatorError]]:
97
+ customs: dict[str, Validator] = {}
98
+ errors: list[ValidatorError] = []
99
+ if not custom_dir.is_dir():
100
+ return customs, errors
101
+ root = custom_dir.parent.parent # <root>/.bookwright/validators
102
+ for index, path in enumerate(sorted(custom_dir.glob("*.py"))):
103
+ try:
104
+ relpath = path.relative_to(root).as_posix()
105
+ except ValueError: # pragma: no cover — custom_dir always under root
106
+ relpath = path.as_posix()
107
+ try:
108
+ module = _load_custom_module(path, f"_bookwright_custom_{index}_{path.stem}")
109
+ except Exception as exc: # any import failure is a skip (FR-005), never a crash
110
+ errors.append(ValidatorError(relpath, f"{type(exc).__name__}: {exc}", "load"))
111
+ continue
112
+ found = _collect_from_module(module)
113
+ if not found:
114
+ errors.append(ValidatorError(relpath, "no conforming validator found", "load"))
115
+ continue
116
+ for validator in found:
117
+ name = validator.name
118
+ if name in builtins:
119
+ errors.append(
120
+ ValidatorError(
121
+ relpath,
122
+ f"custom validator name '{name}' collides with a built-in; rename it",
123
+ "load",
124
+ )
125
+ )
126
+ continue
127
+ if name in customs:
128
+ errors.append(
129
+ ValidatorError(relpath, f"duplicate custom validator name '{name}'", "load")
130
+ )
131
+ continue
132
+ customs[name] = validator
133
+ return customs, errors
134
+
135
+
136
+ def discover_validators(
137
+ custom_dir: Path,
138
+ ) -> tuple[dict[str, Validator], dict[str, Validator], list[ValidatorError]]:
139
+ """Discover built-in and custom validators (FR-004/005).
140
+
141
+ Returns ``(builtins, customs, load_errors)``. The built-in and custom dicts are
142
+ **disjoint by name**: a custom colliding with a built-in is dropped with an
143
+ attributed ``ValidatorError(phase="load")`` so a built-in coherence check is
144
+ never silently shadowed by project code (FR-019, D2). A malformed custom file --
145
+ import failure, no conforming object, or a duplicate name -- is skipped the same
146
+ way; the run continues.
147
+ """
148
+ builtins, builtin_errors = _discover_builtins()
149
+ customs, custom_errors = _discover_customs(custom_dir, builtins)
150
+ return builtins, customs, builtin_errors + custom_errors
151
+
152
+
153
+ def resolve_active(
154
+ builtins: dict[str, Validator],
155
+ customs: dict[str, Validator],
156
+ cfg: ValidatorsBlock,
157
+ ) -> list[Validator]:
158
+ """Apply the ``[validators]`` config to the discovered set (research D7).
159
+
160
+ 1. A non-empty ``custom`` allow-lists the discovered customs to those names.
161
+ 2. ``candidates = builtins + customs`` minus ``disabled``.
162
+ 3. A non-empty ``enabled`` intersects ``candidates`` with those names.
163
+ 4. Any ``enabled`` / ``disabled`` / ``custom`` name absent from the discovered
164
+ ``builtins + customs`` -> :class:`UnknownValidatorError` (FR-007).
165
+
166
+ Returns the active validators sorted by ``name`` (FR-019, D8).
167
+ """
168
+ discovered = {**builtins, **customs}
169
+
170
+ unknown = tuple(
171
+ sorted(
172
+ name for name in (*cfg.enabled, *cfg.disabled, *cfg.custom) if name not in discovered
173
+ )
174
+ )
175
+ if unknown:
176
+ raise UnknownValidatorError(unknown)
177
+
178
+ selected_customs = {name: customs[name] for name in cfg.custom} if cfg.custom else dict(customs)
179
+ candidates = {**builtins, **selected_customs}
180
+ disabled = set(cfg.disabled)
181
+ active = {name: v for name, v in candidates.items() if name not in disabled}
182
+ if cfg.enabled:
183
+ enabled = set(cfg.enabled)
184
+ active = {name: v for name, v in active.items() if name in enabled}
185
+ return [active[name] for name in sorted(active)]
@@ -0,0 +1,106 @@
1
+ """``ValidationReport`` — aggregation, the CI gate, filtering, and rendering.
2
+
3
+ The gate (:attr:`failed`) is computed from **all** violations before any filter, so a
4
+ display ``--scope`` / ``--severity`` can never hide an error from CI (FR-013). The
5
+ emitted order is fixed by the runner's total-order sort; ``reported`` only removes
6
+ entries, never reorders — so the human report and the JSON ``violations[]`` are
7
+ byte-identical across runs (SC-003).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass
13
+ from typing import TYPE_CHECKING, Any
14
+
15
+ from bookwright.validation.base import Severity, ValidatorError, Violation, split_source
16
+
17
+ if TYPE_CHECKING:
18
+ from rich.console import Console
19
+
20
+ __all__ = ["ScopeFilter", "ValidationReport"]
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class ScopeFilter:
25
+ """Limits reported findings to a file or directory under the project root."""
26
+
27
+ rel: str # project-relative posix path of the scope
28
+ is_dir: bool
29
+
30
+ def matches(self, source: str | None) -> bool:
31
+ """Whether ``source`` falls within the scope. ``None`` never matches (FR-009)."""
32
+ if source is None:
33
+ return False
34
+ path = split_source(source)[0] or source
35
+ if self.is_dir:
36
+ return path == self.rel or path.startswith(f"{self.rel}/")
37
+ return path == self.rel
38
+
39
+
40
+ @dataclass
41
+ class ValidationReport:
42
+ """A full run: all (deduped, pre-filter) violations, run/load errors, run names."""
43
+
44
+ violations: tuple[Violation, ...]
45
+ errors: tuple[ValidatorError, ...]
46
+ ran: tuple[str, ...]
47
+
48
+ @property
49
+ def failed(self) -> bool:
50
+ """The gate: any violation at ``error`` severity, ignoring filters (FR-013)."""
51
+ return any(v.severity == Severity.error for v in self.violations)
52
+
53
+ def reported(self, *, scope: ScopeFilter | None, severity: Severity | None) -> list[Violation]:
54
+ """Apply ``scope`` then the ``severity`` threshold, preserving order (D8)."""
55
+ result: list[Violation] = []
56
+ for violation in self.violations:
57
+ if scope is not None and not scope.matches(violation.source):
58
+ continue
59
+ if severity is not None and not violation.severity.at_least(severity):
60
+ continue
61
+ result.append(violation)
62
+ return result
63
+
64
+ def to_json(self, *, scope: ScopeFilter | None, severity: Severity | None) -> dict[str, Any]:
65
+ """The Principle-IX envelope (data-model / contracts/cli-validate.md)."""
66
+ reported = self.reported(scope=scope, severity=severity)
67
+ return {
68
+ "status": "violations" if reported else "ok",
69
+ "failed": self.failed,
70
+ "violations": [v.to_json() for v in reported],
71
+ "errors": [e.to_json() for e in self.errors],
72
+ "summary": {
73
+ "ran": list(self.ran),
74
+ "total": len(self.violations),
75
+ "reported": len(reported),
76
+ "by_severity": self._by_severity(),
77
+ },
78
+ }
79
+
80
+ def _by_severity(self) -> dict[str, int]:
81
+ """Counts over the unfiltered set; always all three keys, ``0`` when absent."""
82
+ counts = {level.value: 0 for level in Severity}
83
+ for violation in self.violations:
84
+ counts[violation.severity.value] += 1
85
+ return counts
86
+
87
+ def render(
88
+ self, console: Console, *, scope: ScopeFilter | None, severity: Severity | None
89
+ ) -> None:
90
+ """Render the human report (grouped by validator) to ``console`` (FR-012)."""
91
+ reported = self.reported(scope=scope, severity=severity)
92
+ if not reported and not self.errors:
93
+ console.print("no violations found", markup=False)
94
+ return
95
+ for validator in sorted({v.validator for v in reported}):
96
+ console.print(f"{validator}:", markup=False)
97
+ for violation in [v for v in reported if v.validator == validator]:
98
+ location = violation.source or "(no specific location)"
99
+ console.print(
100
+ f" {violation.severity.value}: {violation.message} — {location}",
101
+ markup=False,
102
+ )
103
+ if self.errors:
104
+ console.print("validator errors:", markup=False)
105
+ for error in self.errors:
106
+ console.print(f" {error.phase}: {error.validator}: {error.message}", markup=False)
@@ -0,0 +1,65 @@
1
+ """Run the active validators with per-validator isolation (FR-014, D8/D9).
2
+
3
+ A validator that raises is caught and recorded as a ``ValidatorError(phase="run")``
4
+ without aborting the others (FR-014). Identical findings are deduped and the
5
+ combined set is sorted by an explicit total-order key so the emitted list is
6
+ byte-identical across runs and platforms (SC-003), not merely "stably sorted".
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from bookwright.indexers import Indexer
12
+ from bookwright.validation.base import (
13
+ _RANK,
14
+ ValidationContext,
15
+ Validator,
16
+ ValidatorError,
17
+ Violation,
18
+ )
19
+
20
+ __all__ = ["RunResult", "run_validators", "sort_key"]
21
+
22
+ RunResult = tuple[list[Violation], list[ValidatorError], list[str]]
23
+ """``(violations, errors, ran)`` — deduped/sorted findings, run errors, run names."""
24
+
25
+
26
+ def sort_key(violation: Violation) -> tuple[str, int, str, str, tuple[tuple[str, str, str], ...]]:
27
+ """The explicit total order (D8): validator, severity desc, source, message, triples."""
28
+ return (
29
+ violation.validator,
30
+ -_RANK[violation.severity],
31
+ violation.source or "",
32
+ violation.message,
33
+ violation.triples,
34
+ )
35
+
36
+
37
+ def run_validators(
38
+ active: list[Validator], project: ValidationContext, indexer: Indexer
39
+ ) -> RunResult:
40
+ """Run every validator in ``active``, isolating failures (FR-014).
41
+
42
+ Collects each validator's ``Violation`` list, deduplicates identical findings
43
+ across the whole run (D8), and returns them sorted by :func:`sort_key`. A
44
+ validator that raises contributes a ``ValidatorError(phase="run")`` and no
45
+ findings; the rest still run. ``ran`` lists every invoked validator name, sorted.
46
+ """
47
+ seen: set[Violation] = set()
48
+ violations: list[Violation] = []
49
+ errors: list[ValidatorError] = []
50
+ ran: list[str] = []
51
+
52
+ for validator in active:
53
+ ran.append(validator.name)
54
+ try:
55
+ found = validator.validate(project, indexer)
56
+ except Exception as exc: # per-validator isolation (FR-014) — never abort the run
57
+ errors.append(ValidatorError(validator.name, f"{type(exc).__name__}: {exc}", "run"))
58
+ continue
59
+ for violation in found:
60
+ if violation not in seen:
61
+ seen.add(violation)
62
+ violations.append(violation)
63
+
64
+ violations.sort(key=sort_key)
65
+ return violations, errors, sorted(ran)
@@ -0,0 +1,9 @@
1
+ """Built-in validator modules.
2
+
3
+ This package is the auto-discovery root: :func:`bookwright.validation.registry.
4
+ discover_validators` iterates its modules with ``pkgutil.iter_modules`` and
5
+ collects every module-level object satisfying the ``Validator`` protocol. Adding
6
+ a built-in is dropping a new module here — no hand-registration (FR-004).
7
+ """
8
+
9
+ from __future__ import annotations
@@ -0,0 +1,202 @@
1
+ """``character_presence`` — bible roster vs. manuscript mentions (FR-016, research D3).
2
+
3
+ Two directions, split by severity so a heuristic false positive can never fail CI:
4
+
5
+ * a bible character **never** mentioned in the manuscript → orphan finding at
6
+ **error** (deterministic — the name and the prose are both authored),
7
+ * a proper-noun candidate in the prose with **no** bible entry → unknown-mention at
8
+ **warning** (a pinned, conservative heuristic — no NER).
9
+
10
+ Unknown mentions are collapsed per distinct name (one finding citing the first
11
+ occurrence), never multiplied per mention (edge case).
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import re
17
+ from typing import ClassVar
18
+
19
+ from bookwright.golem.slug import make_slug
20
+ from bookwright.indexers import Indexer
21
+ from bookwright.validation.base import Severity, ValidationContext, Violation
22
+
23
+ # Pinned proper-noun candidate: a capitalized word of ≥3 letters (D3). Accent-aware
24
+ # for Spanish prose; matches single tokens (multi-word names are caught token-wise).
25
+ _CANDIDATE = re.compile(r"[A-ZÁÉÍÓÚÑÜ][a-záéíóúñü]{2,}")
26
+ # Sentence-ending punctuation: a capital right after one of these (or at line start)
27
+ # is grammatical, not necessarily a proper noun — excluded (conservative, D3).
28
+ _SENTENCE_END = frozenset(".!?¿¡")
29
+ _MIN_TOKEN_LEN = 3 # shortest name token worth matching as a standalone word.
30
+ # Common capitalized non-names we never treat as a character mention (pinned stop-set).
31
+ _STOP_WORDS = frozenset(
32
+ {
33
+ # Spanish weekdays / months / frequent sentence openers.
34
+ "lunes",
35
+ "martes",
36
+ "miercoles",
37
+ "jueves",
38
+ "viernes",
39
+ "sabado",
40
+ "domingo",
41
+ "enero",
42
+ "febrero",
43
+ "marzo",
44
+ "abril",
45
+ "mayo",
46
+ "junio",
47
+ "julio",
48
+ "agosto",
49
+ "septiembre",
50
+ "octubre",
51
+ "noviembre",
52
+ "diciembre",
53
+ "entonces",
54
+ "cuando",
55
+ "aunque",
56
+ "pero",
57
+ "porque",
58
+ "tambien",
59
+ "despues",
60
+ "antes",
61
+ "ahora",
62
+ "nunca",
63
+ "siempre",
64
+ "quiza",
65
+ "quizas",
66
+ "acaso",
67
+ # English weekdays / months / openers.
68
+ "monday",
69
+ "tuesday",
70
+ "wednesday",
71
+ "thursday",
72
+ "friday",
73
+ "saturday",
74
+ "sunday",
75
+ "january",
76
+ "february",
77
+ "march",
78
+ "april",
79
+ "may",
80
+ "june",
81
+ "july",
82
+ "august",
83
+ "september",
84
+ "october",
85
+ "november",
86
+ "december",
87
+ "then",
88
+ "when",
89
+ "although",
90
+ "because",
91
+ "after",
92
+ "before",
93
+ "however",
94
+ "perhaps",
95
+ }
96
+ )
97
+
98
+
99
+ class CharacterPresence:
100
+ """Cross-checks the bible character roster against manuscript proper nouns."""
101
+
102
+ name: ClassVar[str] = "character_presence"
103
+ severity_default: ClassVar[Severity] = Severity.error
104
+
105
+ def validate(self, project: ValidationContext, indexer: Indexer) -> list[Violation]:
106
+ roster = project.character_names()
107
+ files = project.manuscript_files()
108
+ roster_slugs = _roster_slugs(roster)
109
+
110
+ out: list[Violation] = []
111
+ out.extend(self._orphans(roster, files))
112
+ out.extend(self._unknown_mentions(files, roster_slugs))
113
+ return out
114
+
115
+ def _orphans(
116
+ self,
117
+ roster: tuple[tuple[str, str], ...],
118
+ files: tuple[tuple[str, str], ...],
119
+ ) -> list[Violation]:
120
+ out: list[Violation] = []
121
+ for name, relpath in roster:
122
+ if not _is_mentioned(name, files):
123
+ out.append(
124
+ Violation(
125
+ validator=self.name,
126
+ severity=Severity.error,
127
+ message=(
128
+ f"character '{name}' is defined in the bible but never "
129
+ "mentioned in the manuscript"
130
+ ),
131
+ source=relpath,
132
+ triples=(),
133
+ )
134
+ )
135
+ return out
136
+
137
+ def _unknown_mentions(
138
+ self,
139
+ files: tuple[tuple[str, str], ...],
140
+ roster_slugs: frozenset[str],
141
+ ) -> list[Violation]:
142
+ # slug → (display name, first "relpath:line"); first occurrence wins.
143
+ first_seen: dict[str, tuple[str, str]] = {}
144
+ for relpath, text in files:
145
+ for lineno, line in enumerate(text.splitlines(), start=1):
146
+ for match in _CANDIDATE.finditer(line):
147
+ token = match.group(0)
148
+ slug = make_slug(token)
149
+ if (
150
+ slug in roster_slugs
151
+ or slug in first_seen
152
+ or slug in _STOP_WORDS
153
+ or _is_sentence_initial(line, match.start())
154
+ ):
155
+ continue
156
+ first_seen[slug] = (token, f"{relpath}:{lineno}")
157
+ out: list[Violation] = []
158
+ for _, (token, source) in sorted(first_seen.items()):
159
+ out.append(
160
+ Violation(
161
+ validator=self.name,
162
+ severity=Severity.warning,
163
+ message=(
164
+ f"proper noun '{token}' appears in the manuscript but has no "
165
+ "bible entry (heuristic — may be a place or organization)"
166
+ ),
167
+ source=source,
168
+ triples=(),
169
+ )
170
+ )
171
+ return out
172
+
173
+
174
+ def _roster_slugs(roster: tuple[tuple[str, str], ...]) -> frozenset[str]:
175
+ """Slugs for every roster name and each of its tokens (so a surname matches)."""
176
+ slugs: set[str] = set()
177
+ for name, _ in roster:
178
+ slugs.add(make_slug(name))
179
+ for token in name.split():
180
+ candidate = make_slug(token)
181
+ if candidate:
182
+ slugs.add(candidate)
183
+ return frozenset(slugs)
184
+
185
+
186
+ def _is_mentioned(name: str, files: tuple[tuple[str, str], ...]) -> bool:
187
+ """Whether ``name`` (full phrase or any ≥3-letter token) appears as a word."""
188
+ patterns = [re.compile(rf"\b{re.escape(name)}\b", re.IGNORECASE)]
189
+ patterns += [
190
+ re.compile(rf"\b{re.escape(token)}\b", re.IGNORECASE)
191
+ for token in name.split()
192
+ if len(token) >= _MIN_TOKEN_LEN
193
+ ]
194
+ return any(pattern.search(text) for pattern in patterns for _, text in files)
195
+
196
+
197
+ def _is_sentence_initial(line: str, start: int) -> bool:
198
+ """Whether the match at ``start`` opens a sentence (capitalization is grammatical)."""
199
+ prefix = line[:start].rstrip()
200
+ if not prefix:
201
+ return True
202
+ return prefix[-1] in _SENTENCE_END