bookwright-cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. bookwright/__init__.py +3 -0
  2. bookwright/__main__.py +6 -0
  3. bookwright/cli.py +19 -0
  4. bookwright/commands/__init__.py +0 -0
  5. bookwright/commands/_envelope.py +36 -0
  6. bookwright/commands/check.py +75 -0
  7. bookwright/commands/graph/__init__.py +23 -0
  8. bookwright/commands/graph/build.py +157 -0
  9. bookwright/commands/graph/envelope.py +26 -0
  10. bookwright/commands/graph/query.py +98 -0
  11. bookwright/commands/init/__init__.py +5 -0
  12. bookwright/commands/init/conflict.py +107 -0
  13. bookwright/commands/init/envelope.py +322 -0
  14. bookwright/commands/init/git.py +96 -0
  15. bookwright/commands/init/main.py +263 -0
  16. bookwright/commands/init/resolve.py +193 -0
  17. bookwright/commands/init/scaffold.py +242 -0
  18. bookwright/commands/init/validate.py +172 -0
  19. bookwright/commands/integration/__init__.py +22 -0
  20. bookwright/commands/integration/use.py +120 -0
  21. bookwright/commands/validate.py +160 -0
  22. bookwright/commands/version.py +35 -0
  23. bookwright/core/__init__.py +35 -0
  24. bookwright/core/_blocks.py +239 -0
  25. bookwright/core/_build.py +154 -0
  26. bookwright/core/_research_block.py +56 -0
  27. bookwright/core/_translate.py +90 -0
  28. bookwright/core/errors.py +127 -0
  29. bookwright/core/iso639_1.py +200 -0
  30. bookwright/core/manifest.py +343 -0
  31. bookwright/errors.py +47 -0
  32. bookwright/golem/__init__.py +71 -0
  33. bookwright/golem/base.py +200 -0
  34. bookwright/golem/errors.py +29 -0
  35. bookwright/golem/modules/__init__.py +1 -0
  36. bookwright/golem/modules/character.py +109 -0
  37. bookwright/golem/modules/event.py +91 -0
  38. bookwright/golem/modules/feature.py +161 -0
  39. bookwright/golem/modules/inference.py +41 -0
  40. bookwright/golem/modules/narrative.py +55 -0
  41. bookwright/golem/modules/provenance.py +197 -0
  42. bookwright/golem/modules/relationship.py +38 -0
  43. bookwright/golem/modules/setting.py +30 -0
  44. bookwright/golem/namespaces.py +332 -0
  45. bookwright/golem/serialize.py +25 -0
  46. bookwright/golem/slug.py +22 -0
  47. bookwright/indexers/__init__.py +47 -0
  48. bookwright/indexers/base.py +55 -0
  49. bookwright/indexers/errors.py +80 -0
  50. bookwright/indexers/rdflib_indexer.py +89 -0
  51. bookwright/integrations/__init__.py +155 -0
  52. bookwright/integrations/base.py +117 -0
  53. bookwright/integrations/claude/__init__.py +29 -0
  54. bookwright/integrations/constants.py +38 -0
  55. bookwright/integrations/descriptions.py +48 -0
  56. bookwright/integrations/errors.py +170 -0
  57. bookwright/integrations/generic/__init__.py +56 -0
  58. bookwright/integrations/lint.py +160 -0
  59. bookwright/integrations/materialize.py +202 -0
  60. bookwright/integrations/options.py +203 -0
  61. bookwright/io/__init__.py +1 -0
  62. bookwright/io/bible.py +500 -0
  63. bookwright/io/errors.py +98 -0
  64. bookwright/io/frontmatter.py +61 -0
  65. bookwright/io/fs.py +226 -0
  66. bookwright/io/manuscript.py +15 -0
  67. bookwright/io/project.py +21 -0
  68. bookwright/io/report.py +107 -0
  69. bookwright/io/research.py +427 -0
  70. bookwright/resources/__init__.py +1 -0
  71. bookwright/resources/commands/bookwright-analyze.md +66 -0
  72. bookwright/resources/commands/bookwright-bible.md +96 -0
  73. bookwright/resources/commands/bookwright-checklist.md +67 -0
  74. bookwright/resources/commands/bookwright-clarify.md +65 -0
  75. bookwright/resources/commands/bookwright-constitution.md +79 -0
  76. bookwright/resources/commands/bookwright-continuity.md +70 -0
  77. bookwright/resources/commands/bookwright-draft.md +74 -0
  78. bookwright/resources/commands/bookwright-outline.md +71 -0
  79. bookwright/resources/commands/bookwright-research.md +107 -0
  80. bookwright/resources/commands/bookwright-scenes.md +66 -0
  81. bookwright/resources/commands/bookwright-synopsis.md +67 -0
  82. bookwright/resources/commands/bookwright-verify.md +136 -0
  83. bookwright/resources/commands/references/golem-character.md +65 -0
  84. bookwright/resources/commands/references/golem-events-timeline.md +56 -0
  85. bookwright/resources/commands/references/golem-relationships.md +53 -0
  86. bookwright/resources/commands/references/greimas-actants.md +57 -0
  87. bookwright/resources/commands/references/pending-protocol.md +72 -0
  88. bookwright/resources/commands/references/propp-functions.md +54 -0
  89. bookwright/resources/commands/references/research-format.md +136 -0
  90. bookwright/resources/project/.bookwright/cache/.gitkeep +0 -0
  91. bookwright/resources/project/.bookwright/schema/.gitkeep +0 -0
  92. bookwright/resources/project/.bookwright/templates/.gitkeep +0 -0
  93. bookwright/resources/project/.gitignore +23 -0
  94. bookwright/resources/project/README.md.j2 +40 -0
  95. bookwright/resources/project/__init__.py +6 -0
  96. bookwright/resources/project/bible/characters/.gitkeep +0 -0
  97. bookwright/resources/project/bible/constitution.md.j2 +74 -0
  98. bookwright/resources/project/bible/glossary.md +36 -0
  99. bookwright/resources/project/bible/locations/.gitkeep +0 -0
  100. bookwright/resources/project/bible/pov-structure.md +43 -0
  101. bookwright/resources/project/bible/relationships.md +36 -0
  102. bookwright/resources/project/bible/research/_index.md +28 -0
  103. bookwright/resources/project/bible/research/sources.md +23 -0
  104. bookwright/resources/project/bible/settings/.gitkeep +0 -0
  105. bookwright/resources/project/bible/subplots.md +35 -0
  106. bookwright/resources/project/bible/themes.md +36 -0
  107. bookwright/resources/project/bible/timeline.md +38 -0
  108. bookwright/resources/project/manuscript/.gitkeep +0 -0
  109. bookwright/resources/project/outline/arcs.md +34 -0
  110. bookwright/resources/project/outline/scenes.md +31 -0
  111. bookwright/resources/project/outline/structure.md +35 -0
  112. bookwright/resources/project/outline/synopsis.md +25 -0
  113. bookwright/resources/schemas/__init__.py +19 -0
  114. bookwright/resources/schemas/golem-1.1/VERSION +1 -0
  115. bookwright/resources/schemas/golem-1.1/golem.ttl +1947 -0
  116. bookwright/resources/schemas/golem-1.1/version.json +8 -0
  117. bookwright/resources/templates/__init__.py +1 -0
  118. bookwright/resources/templates/bible/character.md.tmpl +63 -0
  119. bookwright/resources/templates/bible/location.md.tmpl +37 -0
  120. bookwright/resources/templates/bible/research/_index.md.tmpl +25 -0
  121. bookwright/resources/templates/bible/research/sources.md.tmpl +21 -0
  122. bookwright/resources/templates/bible/research/tema.md.tmpl +37 -0
  123. bookwright/resources/templates/bible/setting.md.tmpl +38 -0
  124. bookwright/resources/templates/manifest.template.toml +79 -0
  125. bookwright/resources/templates/manuscript/chapter.md.tmpl +36 -0
  126. bookwright/resources/templates/scenes/scene.md.tmpl +37 -0
  127. bookwright/resources/vocabularies/__init__.py +6 -0
  128. bookwright/resources/vocabularies/greimas.ttl +4 -0
  129. bookwright/resources/vocabularies/propp.ttl +4 -0
  130. bookwright/resources/vocabularies/sources.ttl +82 -0
  131. bookwright/validation/__init__.py +33 -0
  132. bookwright/validation/anchor_queries.py +223 -0
  133. bookwright/validation/base.py +233 -0
  134. bookwright/validation/queries.py +197 -0
  135. bookwright/validation/registry.py +185 -0
  136. bookwright/validation/report.py +106 -0
  137. bookwright/validation/runner.py +65 -0
  138. bookwright/validation/validators/__init__.py +9 -0
  139. bookwright/validation/validators/character_presence.py +202 -0
  140. bookwright/validation/validators/factual_anchor.py +291 -0
  141. bookwright/validation/validators/focalization.py +152 -0
  142. bookwright/validation/validators/setting_continuity.py +100 -0
  143. bookwright/validation/validators/temporal.py +277 -0
  144. bookwright_cli-0.2.0.dist-info/METADATA +218 -0
  145. bookwright_cli-0.2.0.dist-info/RECORD +149 -0
  146. bookwright_cli-0.2.0.dist-info/WHEEL +4 -0
  147. bookwright_cli-0.2.0.dist-info/entry_points.txt +2 -0
  148. bookwright_cli-0.2.0.dist-info/licenses/LICENSE +202 -0
  149. bookwright_cli-0.2.0.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,223 @@
1
+ """Read-only graph projections for the ``factual_anchor`` validator (research D9).
2
+
3
+ Turns the research-anchor sub-graph iterations 012/013 emit into the plain
4
+ in-memory shapes the validator reasons over, so ``factual_anchor`` never touches
5
+ rdflib directly — exactly how ``queries`` serves ``temporal``. Every traversal is
6
+ run through the :class:`~bookwright.indexers.Indexer` seam; the predicate IRIs come
7
+ from :mod:`bookwright.golem.namespaces`, never hardcoded.
8
+
9
+ An *anchor* is the subject of a ``bw:promotes`` triple (the one predicate that
10
+ distinguishes an anchor's ``crm:E13_Attribute_Assignment`` node from a finding's).
11
+ The interval model and the ``gYear`` parser are reused from :mod:`.queries` so the
12
+ anchor span and an event boundary coerce identically (research D2).
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from dataclasses import dataclass
18
+
19
+ from rdflib.term import URIRef
20
+
21
+ from bookwright.golem.namespaces import (
22
+ BEGIN_OF_BEGIN,
23
+ BW_ACCESS_DATE,
24
+ BW_AUTHOR,
25
+ BW_CONSTRAINS,
26
+ BW_ORIGINAL_LANGUAGE,
27
+ BW_ORIGINAL_QUOTE,
28
+ BW_PROMOTES,
29
+ BW_REFERENCE,
30
+ BW_RELIABILITY,
31
+ BW_RELIABILITY_JUSTIFICATION,
32
+ BW_SUPPORTED_BY,
33
+ BW_TRANSLATION,
34
+ END_OF_END,
35
+ HAS_TIME_SPAN,
36
+ HAS_TYPE,
37
+ RELIABILITY_IRI,
38
+ timeline_uri,
39
+ )
40
+ from bookwright.indexers import Indexer
41
+ from bookwright.validation.queries import EventInterval, parse_gyear
42
+
43
+ __all__ = [
44
+ "FACETS",
45
+ "AnchorRecord",
46
+ "Facet",
47
+ "SourceRecord",
48
+ "entity_present",
49
+ "load_anchors",
50
+ "load_sources_by_anchor",
51
+ ]
52
+
53
+
54
+ @dataclass(frozen=True)
55
+ class AnchorRecord:
56
+ """One research anchor projected from the graph (the validator's working unit).
57
+
58
+ ``constrains`` is ``None`` when the anchor carries no ``bw:constrains`` triple
59
+ (the reader dropped an unresolved link); ``span`` is ``EventInterval(uri, None,
60
+ None)`` when the anchor declares no time-span.
61
+ """
62
+
63
+ uri: str
64
+ promotes: str
65
+ constrains: str | None
66
+ span: EventInterval
67
+
68
+
69
+ def load_anchors(indexer: Indexer) -> list[AnchorRecord]:
70
+ """One :class:`AnchorRecord` per anchor node, in sorted-URI order.
71
+
72
+ The optional ``bw:constrains`` target and the optional ``crm:E52_Time-Span``
73
+ (``P82a``/``P82b`` → years via :func:`~bookwright.validation.queries.parse_gyear`)
74
+ are read in a single projection; an absent optional is simply unbound. SPARQL
75
+ only — no reasoning happens here.
76
+ """
77
+ rows = indexer.query(
78
+ f"""
79
+ SELECT ?anchor ?finding ?constrains ?begin ?end WHERE {{
80
+ ?anchor <{BW_PROMOTES}> ?finding .
81
+ OPTIONAL {{ ?anchor <{BW_CONSTRAINS}> ?constrains . }}
82
+ OPTIONAL {{
83
+ ?anchor <{HAS_TIME_SPAN}> ?ts .
84
+ OPTIONAL {{ ?ts <{BEGIN_OF_BEGIN}> ?begin . }}
85
+ OPTIONAL {{ ?ts <{END_OF_END}> ?end . }}
86
+ }}
87
+ }}
88
+ """
89
+ )
90
+ records: dict[str, AnchorRecord] = {}
91
+ for row in rows:
92
+ anchor = row["anchor"]
93
+ if anchor in records: # defensive: one anchor → one record (first wins, sorted)
94
+ continue
95
+ begin = parse_gyear(row["begin"]) if "begin" in row else None
96
+ end = parse_gyear(row["end"]) if "end" in row else None
97
+ records[anchor] = AnchorRecord(
98
+ uri=anchor,
99
+ promotes=row["finding"],
100
+ constrains=row.get("constrains"),
101
+ span=EventInterval(uri=anchor, begin=begin, end=end),
102
+ )
103
+ return [records[uri] for uri in sorted(records)]
104
+
105
+
106
+ # --- Source provenance / reliability projections (R2/R3, research D5/D6) -----
107
+
108
+
109
+ @dataclass(frozen=True)
110
+ class Facet:
111
+ """One mandatory provenance facet of a :class:`Source` (research D5).
112
+
113
+ ``label`` is the author-facing name a violation message uses; ``predicate`` is
114
+ the source predicate whose presence in the graph proves the facet is recorded.
115
+ ``foreign_only`` marks ``translation`` — mandatory only when the source's
116
+ original language differs from the book language (the reader's D6 rule).
117
+ """
118
+
119
+ label: str
120
+ predicate: URIRef
121
+ foreign_only: bool = False
122
+
123
+
124
+ # The mandatory facets, in serialization order. Their predicate SET is the single
125
+ # membership emitted by a fully-populated ``provenance.Source.to_triples()`` (D5),
126
+ # pinned by a drift-guard test — it is NOT ``io/research._SOURCE_FACETS`` (which
127
+ # lists Pydantic field NAMES: it includes ``name``, which has no predicate, and
128
+ # omits ``translation``). The IRIs come from the ``golem.namespaces`` constants.
129
+ FACETS: tuple[Facet, ...] = (
130
+ Facet("type", HAS_TYPE),
131
+ Facet("reliability", BW_RELIABILITY),
132
+ Facet("reliability justification", BW_RELIABILITY_JUSTIFICATION),
133
+ Facet("reference", BW_REFERENCE),
134
+ Facet("author", BW_AUTHOR),
135
+ Facet("original language", BW_ORIGINAL_LANGUAGE),
136
+ Facet("access date", BW_ACCESS_DATE),
137
+ Facet("original quote", BW_ORIGINAL_QUOTE),
138
+ Facet("translation", BW_TRANSLATION, foreign_only=True),
139
+ )
140
+
141
+ # Reliability rank name ← its E55 individual IRI, inverted from the single
142
+ # vocabulary source (``RELIABILITY_IRI``) so the scale never re-spells it (D6).
143
+ _RELIABILITY_NAME: dict[str, str] = {str(iri): name for name, iri in RELIABILITY_IRI.items()}
144
+
145
+
146
+ @dataclass(frozen=True)
147
+ class SourceRecord:
148
+ """One source backing an anchor's promoted finding (R2/R3 working unit).
149
+
150
+ ``present_predicates`` is the set of facet-predicate IRI strings the source
151
+ actually carries (R2 reads it to find gaps); ``original_language`` drives the
152
+ translation conditionality; ``reliability`` is the rating *name*
153
+ (``alta``/``media``/``baja``) or ``None`` when the source is unrated.
154
+ """
155
+
156
+ uri: str
157
+ present_predicates: frozenset[str]
158
+ original_language: str | None
159
+ reliability: str | None
160
+
161
+
162
+ @dataclass
163
+ class _SourceAccum:
164
+ """Mutable accumulator while folding a source's triples (one per ``?p``)."""
165
+
166
+ predicates: set[str]
167
+ language: str | None = None
168
+ reliability_iri: str | None = None
169
+
170
+
171
+ def load_sources_by_anchor(indexer: Indexer) -> dict[str, list[SourceRecord]]:
172
+ """Supporting sources per anchor, reached ``anchor→finding→source`` (D5).
173
+
174
+ A source with no describing triple (a dangling ``bw:supportedBy``) still
175
+ appears — with an empty facet set — so R2 can flag every missing facet. Sources
176
+ are returned in sorted-URI order per anchor for byte-stable output.
177
+ """
178
+ rows = indexer.query(
179
+ f"""
180
+ SELECT ?anchor ?source ?p ?o WHERE {{
181
+ ?anchor <{BW_PROMOTES}> ?finding .
182
+ ?finding <{BW_SUPPORTED_BY}> ?source .
183
+ OPTIONAL {{ ?source ?p ?o . }}
184
+ }}
185
+ """
186
+ )
187
+ by_anchor: dict[str, dict[str, _SourceAccum]] = {}
188
+ for row in rows:
189
+ sources = by_anchor.setdefault(row["anchor"], {})
190
+ acc = sources.setdefault(row["source"], _SourceAccum(predicates=set()))
191
+ predicate = row.get("p")
192
+ if predicate is None:
193
+ continue
194
+ acc.predicates.add(predicate)
195
+ if predicate == str(BW_ORIGINAL_LANGUAGE):
196
+ acc.language = row.get("o")
197
+ elif predicate == str(BW_RELIABILITY):
198
+ acc.reliability_iri = row.get("o")
199
+ return {
200
+ anchor: [
201
+ SourceRecord(
202
+ uri=source,
203
+ present_predicates=frozenset(acc.predicates),
204
+ original_language=acc.language,
205
+ reliability=_RELIABILITY_NAME.get(acc.reliability_iri or ""),
206
+ )
207
+ for source, acc in sorted(sources.items())
208
+ ]
209
+ for anchor, sources in by_anchor.items()
210
+ }
211
+
212
+
213
+ def entity_present(indexer: Indexer, uri: str, uri_base: str) -> bool:
214
+ """Whether ``uri`` denotes a present graph entity (R4 presence test, D4).
215
+
216
+ True when the URI is the subject of at least one triple, or when it is the
217
+ well-known (untyped) timeline IRI — a legitimate ``bw:constrains`` target that
218
+ carries no describing triple of its own.
219
+ """
220
+ if uri == str(timeline_uri(uri_base)):
221
+ return True
222
+ rows = list(indexer.query(f"SELECT ?p WHERE {{ <{uri}> ?p ?o . }} LIMIT 1"))
223
+ return bool(rows)
@@ -0,0 +1,233 @@
1
+ """Core finding types and the validator seam (data-model, contracts/validator-protocol.md).
2
+
3
+ In-memory only; the subsystem persists nothing (FR-020). Every type here is frozen
4
+ where it can be, so findings are hashable and dedupe is trivial (D8).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass, field
10
+ from enum import StrEnum
11
+ from pathlib import Path
12
+ from typing import TYPE_CHECKING, Any, Literal, Protocol, cast, runtime_checkable
13
+
14
+ from bookwright.errors import BookwrightError
15
+ from bookwright.indexers import Indexer
16
+
17
+ if TYPE_CHECKING:
18
+ from bookwright.core.manifest import Manifest
19
+ from bookwright.golem.base import SluggedEntity
20
+ from bookwright.io.bible import MapResult
21
+
22
+ __all__ = [
23
+ "Severity",
24
+ "UnknownValidatorError",
25
+ "ValidationContext",
26
+ "Validator",
27
+ "ValidatorError",
28
+ "Violation",
29
+ "split_source",
30
+ ]
31
+
32
+
33
+ class Severity(StrEnum):
34
+ """A finding's level. String-valued (JSON-friendly, design § 13.1)."""
35
+
36
+ error = "error"
37
+ warning = "warning"
38
+ info = "info"
39
+
40
+ def at_least(self, threshold: Severity) -> bool:
41
+ """Whether this severity meets ``threshold`` under ``error > warning > info``."""
42
+ return _RANK[self] >= _RANK[threshold]
43
+
44
+
45
+ _RANK: dict[Severity, int] = {Severity.error: 2, Severity.warning: 1, Severity.info: 0}
46
+ """Ordinal for the ``--severity`` threshold, the gate, and the total-order sort."""
47
+
48
+
49
+ def split_source(source: str | None) -> tuple[str | None, int | None]:
50
+ """Split a ``relpath[:line]`` provenance string into ``(path, line)``.
51
+
52
+ The ``:line`` suffix is recognized only when a non-empty path precedes a
53
+ digit-only tail; otherwise the whole string is the path and the line is ``None``.
54
+ ``source=None`` yields ``(None, None)``. This is the single place the ``source``
55
+ grammar is parsed — every consumer (``Violation`` accessors, the report scope
56
+ filter, provenance resolution) routes through it so the parsing never forks.
57
+ """
58
+ if source is None:
59
+ return None, None
60
+ head, sep, tail = source.rpartition(":")
61
+ if head and sep and tail.isdigit():
62
+ return head, int(tail)
63
+ return source, None
64
+
65
+
66
+ @dataclass(frozen=True)
67
+ class Violation:
68
+ """One finding produced by a validator (FR-002/003).
69
+
70
+ ``frozen=True`` + tuple fields make it hashable so identical findings collapse
71
+ to one in the runner (D8). ``source`` is a project-relative posix path, optionally
72
+ ``:line``; ``None`` when no specific location applies (location-less).
73
+ """
74
+
75
+ validator: str
76
+ severity: Severity
77
+ message: str
78
+ source: str | None = None
79
+ triples: tuple[tuple[str, str, str], ...] = ()
80
+
81
+ def source_file(self) -> str | None:
82
+ """The path part of ``source`` (drops any ``:line`` suffix), or ``None``."""
83
+ return split_source(self.source)[0]
84
+
85
+ def source_line(self) -> int | None:
86
+ """The 1-based line from ``source`` when present, else ``None``."""
87
+ return split_source(self.source)[1]
88
+
89
+ def to_json(self) -> dict[str, Any]:
90
+ """Serialize to the contract shape (FR-002, SC-004); ``triples`` as lists."""
91
+ return {
92
+ "validator": self.validator,
93
+ "severity": self.severity.value,
94
+ "message": self.message,
95
+ "source": self.source,
96
+ "triples": [list(triple) for triple in self.triples],
97
+ }
98
+
99
+
100
+ @dataclass(frozen=True)
101
+ class ValidatorError:
102
+ """A validator that could not be loaded or that raised while running (FR-014).
103
+
104
+ Surfaced in the report's ``errors[]``; never affects the gate. ``validator`` is
105
+ the validator name, or the offending file path for ``phase="load"`` failures.
106
+ """
107
+
108
+ validator: str
109
+ message: str
110
+ phase: Literal["load", "run"]
111
+
112
+ def to_json(self) -> dict[str, Any]:
113
+ return {"validator": self.validator, "phase": self.phase, "message": self.message}
114
+
115
+
116
+ @runtime_checkable
117
+ class Validator(Protocol):
118
+ """The stable seam between the runner and any validator (design § 13.1).
119
+
120
+ A validator examines the project (``ValidationContext``) and the already-built
121
+ graph (``indexer``, possibly empty) and returns a list of ``Violation`` — an
122
+ empty list means "no problems" (FR-001). It MUST be deterministic (FR-019) and
123
+ MUST NOT write to disk or mutate the graph (FR-020); it MAY raise — the runner
124
+ isolates it (FR-014).
125
+ """
126
+
127
+ name: str
128
+ severity_default: Severity
129
+
130
+ def validate(self, project: ValidationContext, indexer: Indexer) -> list[Violation]: ...
131
+
132
+
133
+ class UnknownValidatorError(BookwrightError):
134
+ """A configured ``[validators]`` name is absent from the discovered set (FR-007)."""
135
+
136
+ code = "unknown_validator"
137
+
138
+ def __init__(self, names: tuple[str, ...]) -> None:
139
+ self.names = names
140
+ joined = ", ".join(names)
141
+ super().__init__(f"unknown validator(s): {joined}", {"names": list(names)})
142
+
143
+
144
+ # Sentinel distinguishing "not yet computed" from a cached ``None`` result.
145
+ _UNSET = object()
146
+
147
+
148
+ @dataclass
149
+ class ValidationContext:
150
+ """The ``project`` argument to every validator (data-model).
151
+
152
+ Bundles the project root + manifest and exposes cached accessors so each source
153
+ file is read once per run and shared across validators. Accessors memoize on
154
+ first call.
155
+ """
156
+
157
+ root: Path
158
+ manifest: Manifest
159
+
160
+ _bible: Any = field(default=_UNSET, repr=False, compare=False)
161
+ _character_names: Any = field(default=_UNSET, repr=False, compare=False)
162
+ _setting_names: Any = field(default=_UNSET, repr=False, compare=False)
163
+ _manuscript_files: Any = field(default=_UNSET, repr=False, compare=False)
164
+ _constitution_text: Any = field(default=_UNSET, repr=False, compare=False)
165
+
166
+ @property
167
+ def uri_base(self) -> str:
168
+ return self.manifest.bookwright.uri_base
169
+
170
+ def bible(self) -> MapResult:
171
+ """Map the project's bible to GOLEM entities (once per run)."""
172
+ if self._bible is _UNSET:
173
+ from bookwright.io.bible import map_bible # noqa: PLC0415
174
+
175
+ bible_dir = self.root / self.manifest.paths.bible
176
+ self._bible = map_bible(self.root, bible_dir, self.uri_base)
177
+ return cast("MapResult", self._bible)
178
+
179
+ def _names_of(self, concept_cls: type[SluggedEntity]) -> tuple[tuple[str, str], ...]:
180
+ """Sorted ``(name, bible_relpath)`` pairs for one bible concept class."""
181
+ names = [
182
+ (entity.name, mapped.relpath)
183
+ for mapped in self.bible().mapped
184
+ if isinstance((entity := mapped.entity), concept_cls)
185
+ ]
186
+ return tuple(sorted(names))
187
+
188
+ def character_names(self) -> tuple[tuple[str, str], ...]:
189
+ """Sorted ``(name, bible_relpath)`` for every bible Character."""
190
+ if self._character_names is _UNSET:
191
+ from bookwright.golem import Character # noqa: PLC0415
192
+
193
+ self._character_names = self._names_of(Character)
194
+ return cast("tuple[tuple[str, str], ...]", self._character_names)
195
+
196
+ def setting_names(self) -> tuple[tuple[str, str], ...]:
197
+ """Sorted ``(name, bible_relpath)`` for every bible Setting."""
198
+ if self._setting_names is _UNSET:
199
+ from bookwright.golem import Setting # noqa: PLC0415
200
+
201
+ self._setting_names = self._names_of(Setting)
202
+ return cast("tuple[tuple[str, str], ...]", self._setting_names)
203
+
204
+ def manuscript_files(self) -> tuple[tuple[str, str], ...]:
205
+ """Sorted ``(relpath, text)`` for every ``**/*.md`` under the manuscript dir.
206
+
207
+ Unreadable files are skipped defensively (a validator never aborts on one
208
+ bad file). Sorted by relpath for determinism (D8).
209
+ """
210
+ if self._manuscript_files is _UNSET:
211
+ manuscript_dir = self.root / self.manifest.paths.manuscript
212
+ collected: list[tuple[str, str]] = []
213
+ if manuscript_dir.is_dir():
214
+ for path in sorted(manuscript_dir.rglob("*.md")):
215
+ if not path.is_file():
216
+ continue
217
+ try:
218
+ text = path.read_text(encoding="utf-8")
219
+ except (OSError, UnicodeDecodeError):
220
+ continue
221
+ collected.append((path.relative_to(self.root).as_posix(), text))
222
+ self._manuscript_files = tuple(sorted(collected))
223
+ return cast("tuple[tuple[str, str], ...]", self._manuscript_files)
224
+
225
+ def constitution_text(self) -> str | None:
226
+ """The constitution file's text, or ``None`` when absent/unreadable."""
227
+ if self._constitution_text is _UNSET:
228
+ path = self.root / self.manifest.paths.constitution
229
+ try:
230
+ self._constitution_text = path.read_text(encoding="utf-8")
231
+ except (OSError, UnicodeDecodeError):
232
+ self._constitution_text = None
233
+ return cast("str | None", self._constitution_text)
@@ -0,0 +1,197 @@
1
+ """Read-only graph projections for the ``temporal`` validator (data-model, D11/D12).
2
+
3
+ These helpers turn the interval graph the timeline indexer emits into plain
4
+ in-memory shapes (``EventInterval`` + relation edge sets) the validator reasons
5
+ over, so ``temporal`` never touches rdflib directly. SPARQL is run through the
6
+ ``Indexer`` seam (``indexer.query``); every traversal is insensitive to whether a
7
+ year sits on a boundary directly or on its ``Dimension`` sub-node.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass
13
+
14
+ from rdflib.namespace import RDF, RDFS, XSD
15
+
16
+ from bookwright.golem.namespaces import (
17
+ ASSIGNED_ATTRIBUTE_TO,
18
+ CRM,
19
+ CSM,
20
+ GOLEM,
21
+ TEMPORAL_RELATIONS,
22
+ TR,
23
+ USED_SPECIFIC_OBJECT,
24
+ )
25
+ from bookwright.indexers import Indexer
26
+ from bookwright.validation.base import split_source
27
+
28
+ __all__ = [
29
+ "EventInterval",
30
+ "intervals_disjoint",
31
+ "load_intervals",
32
+ "load_relations",
33
+ "parse_gyear",
34
+ "resolve_source",
35
+ "timeline_bounds",
36
+ ]
37
+
38
+ _PREFIXES = "\n".join(
39
+ f"PREFIX {prefix}: <{uri}>"
40
+ for prefix, uri in (
41
+ ("golem", str(GOLEM)),
42
+ ("crm", str(CRM)),
43
+ ("tr", str(TR)),
44
+ ("csm", str(CSM)),
45
+ ("rdf", str(RDF)),
46
+ ("rdfs", str(RDFS)),
47
+ ("xsd", str(XSD)),
48
+ )
49
+ )
50
+
51
+
52
+ @dataclass(frozen=True)
53
+ class EventInterval:
54
+ """One event's begin/end years (either may be ``None`` for an open interval)."""
55
+
56
+ uri: str
57
+ begin: int | None
58
+ end: int | None
59
+
60
+
61
+ def _q(indexer: Indexer, body: str) -> list[dict[str, str]]:
62
+ return list(indexer.query(f"{_PREFIXES}\n{body}"))
63
+
64
+
65
+ def parse_gyear(raw: str) -> int | None:
66
+ """Coerce an ``xsd:gYear`` lexical (``"1885"``, ``"0800"``, ``"-0044"``) to int.
67
+
68
+ The single ``gYear`` parser for the ``temporal`` and ``factual_anchor``
69
+ validators: ``temporal`` reads event boundary years through it (via
70
+ :func:`load_intervals`) and ``factual_anchor`` reads anchor time-span years
71
+ through it (via ``anchor_queries.load_anchors``), so both coerce identically.
72
+ """
73
+ text = raw.strip()
74
+ negative = text.startswith("-")
75
+ digits = text[1:] if negative else text
76
+ if not digits.isdigit():
77
+ return None
78
+ value = int(digits)
79
+ return -value if negative else value
80
+
81
+
82
+ def intervals_disjoint(a: EventInterval, b: EventInterval) -> bool:
83
+ """True when two closed year ranges provably do not overlap (FR-011, research D1).
84
+
85
+ The **single source of truth** for "two intervals contradict": both the
86
+ ``temporal`` validator (overlap-disjoint rule) and ``factual_anchor`` (the
87
+ anachronism rule) decide disjointness here and nowhere else. An open bound
88
+ (``None``) is unbounded on that side, so it can never force disjointness — an
89
+ open-ended interval cannot be *proven* disjoint from anything.
90
+ """
91
+ return (a.end is not None and b.begin is not None and a.end < b.begin) or (
92
+ b.end is not None and a.begin is not None and b.end < a.begin
93
+ )
94
+
95
+
96
+ def load_intervals(indexer: Indexer) -> dict[str, EventInterval]:
97
+ """One :class:`EventInterval` per ``G5_Narrative_Event`` in the graph.
98
+
99
+ The year is reached via ``(csm:duration|tr:temporal-location)/tr:temporal-location``
100
+ to a boundary whose ``crm:P2_has_type`` localname is ``begin`` / ``end``, then
101
+ ``(crm:P90_has_value | crm:P43_has_dimension/crm:P90_has_value)`` — so it is
102
+ insensitive to the carrier-node shape (D12). Events without an interval still
103
+ appear, with both bounds ``None``.
104
+ """
105
+ intervals: dict[str, list[int | None]] = {
106
+ row["event"]: [None, None]
107
+ for row in _q(indexer, "SELECT ?event WHERE { ?event a golem:G5_Narrative_Event . }")
108
+ }
109
+ rows = _q(
110
+ indexer,
111
+ """
112
+ SELECT ?event ?btype ?year WHERE {
113
+ ?event a golem:G5_Narrative_Event .
114
+ ?event (csm:duration|tr:temporal-location)/tr:temporal-location ?boundary .
115
+ ?boundary crm:P2_has_type ?btype .
116
+ ?boundary (crm:P90_has_value | crm:P43_has_dimension/crm:P90_has_value) ?year .
117
+ }
118
+ """,
119
+ )
120
+ for row in rows:
121
+ event, btype, year = row["event"], row["btype"], parse_gyear(row["year"])
122
+ if event not in intervals or year is None:
123
+ continue
124
+ if btype.endswith("/begin"):
125
+ intervals[event][0] = year
126
+ elif btype.endswith("/end"):
127
+ intervals[event][1] = year
128
+ return {
129
+ event: EventInterval(uri=event, begin=bounds[0], end=bounds[1])
130
+ for event, bounds in intervals.items()
131
+ }
132
+
133
+
134
+ def timeline_bounds(intervals: dict[str, EventInterval]) -> EventInterval:
135
+ """The timeline's overall ``(min begin, max end)`` across the given events (D3).
136
+
137
+ A **pure** reduction over an already-loaded :func:`load_intervals` result — it
138
+ adds **no** new interval reasoning — used by ``factual_anchor`` when an anchor
139
+ constrains the timeline as a whole. Both bounds are ``None`` when no event carries
140
+ a year. The ``uri`` is a sentinel label (the timeline has no single typed node,
141
+ research D10). It takes the loaded dict (not the indexer) so the caller reuses one
142
+ :func:`load_intervals` pass rather than querying the graph a second time.
143
+ """
144
+ begins = [iv.begin for iv in intervals.values() if iv.begin is not None]
145
+ ends = [iv.end for iv in intervals.values() if iv.end is not None]
146
+ return EventInterval(
147
+ uri="timeline",
148
+ begin=min(begins) if begins else None,
149
+ end=max(ends) if ends else None,
150
+ )
151
+
152
+
153
+ def load_relations(indexer: Indexer) -> dict[str, set[tuple[str, str]]]:
154
+ """The five ``TR:*`` edge sets, keyed by canonical relation name (D11).
155
+
156
+ Keys are :data:`TEMPORAL_RELATIONS` names (``follows`` … ``included_in``). Each
157
+ set holds ``(subject, object)`` event-URI pairs. Only edges between two narrative
158
+ events are kept, so a stray edge never leaks into the reasoning.
159
+ """
160
+ relations: dict[str, set[tuple[str, str]]] = {}
161
+ for relation in TEMPORAL_RELATIONS:
162
+ rows = _q(
163
+ indexer,
164
+ f"""
165
+ SELECT ?a ?b WHERE {{
166
+ ?a a golem:G5_Narrative_Event .
167
+ ?b a golem:G5_Narrative_Event .
168
+ ?a <{relation.predicate}> ?b .
169
+ }}
170
+ """,
171
+ )
172
+ relations[relation.name] = {(row["a"], row["b"]) for row in rows}
173
+ return relations
174
+
175
+
176
+ def resolve_source(indexer: Indexer, uri: str) -> str | None:
177
+ """Recover the ``relpath[:line]`` provenance string for a graph entity (D6).
178
+
179
+ Reads the CIDOC provenance edge: an ``E13_Attribute_Assignment`` whose
180
+ ``P140_assigned_attribute_to`` is ``uri`` carries the source on
181
+ ``P16_used_specific_object``. When several exist, prefer one with a ``:line``
182
+ suffix, then the lexicographically smallest, for a deterministic result.
183
+ """
184
+ rows = _q(
185
+ indexer,
186
+ f"""
187
+ SELECT ?source WHERE {{
188
+ ?assertion <{ASSIGNED_ATTRIBUTE_TO}> <{uri}> .
189
+ ?assertion <{USED_SPECIFIC_OBJECT}> ?source .
190
+ }}
191
+ """,
192
+ )
193
+ sources = sorted({row["source"] for row in rows})
194
+ if not sources:
195
+ return None
196
+ located = [s for s in sources if split_source(s)[1] is not None]
197
+ return located[0] if located else sources[0]