bookwright-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bookwright/__init__.py +3 -0
- bookwright/__main__.py +6 -0
- bookwright/cli.py +19 -0
- bookwright/commands/__init__.py +0 -0
- bookwright/commands/_envelope.py +36 -0
- bookwright/commands/check.py +75 -0
- bookwright/commands/graph/__init__.py +23 -0
- bookwright/commands/graph/build.py +157 -0
- bookwright/commands/graph/envelope.py +26 -0
- bookwright/commands/graph/query.py +98 -0
- bookwright/commands/init/__init__.py +5 -0
- bookwright/commands/init/conflict.py +107 -0
- bookwright/commands/init/envelope.py +322 -0
- bookwright/commands/init/git.py +96 -0
- bookwright/commands/init/main.py +263 -0
- bookwright/commands/init/resolve.py +193 -0
- bookwright/commands/init/scaffold.py +242 -0
- bookwright/commands/init/validate.py +172 -0
- bookwright/commands/integration/__init__.py +22 -0
- bookwright/commands/integration/use.py +120 -0
- bookwright/commands/validate.py +160 -0
- bookwright/commands/version.py +35 -0
- bookwright/core/__init__.py +35 -0
- bookwright/core/_blocks.py +239 -0
- bookwright/core/_build.py +154 -0
- bookwright/core/_research_block.py +56 -0
- bookwright/core/_translate.py +90 -0
- bookwright/core/errors.py +127 -0
- bookwright/core/iso639_1.py +200 -0
- bookwright/core/manifest.py +343 -0
- bookwright/errors.py +47 -0
- bookwright/golem/__init__.py +71 -0
- bookwright/golem/base.py +200 -0
- bookwright/golem/errors.py +29 -0
- bookwright/golem/modules/__init__.py +1 -0
- bookwright/golem/modules/character.py +109 -0
- bookwright/golem/modules/event.py +91 -0
- bookwright/golem/modules/feature.py +161 -0
- bookwright/golem/modules/inference.py +41 -0
- bookwright/golem/modules/narrative.py +55 -0
- bookwright/golem/modules/provenance.py +197 -0
- bookwright/golem/modules/relationship.py +38 -0
- bookwright/golem/modules/setting.py +30 -0
- bookwright/golem/namespaces.py +332 -0
- bookwright/golem/serialize.py +25 -0
- bookwright/golem/slug.py +22 -0
- bookwright/indexers/__init__.py +47 -0
- bookwright/indexers/base.py +55 -0
- bookwright/indexers/errors.py +80 -0
- bookwright/indexers/rdflib_indexer.py +89 -0
- bookwright/integrations/__init__.py +155 -0
- bookwright/integrations/base.py +117 -0
- bookwright/integrations/claude/__init__.py +29 -0
- bookwright/integrations/constants.py +38 -0
- bookwright/integrations/descriptions.py +48 -0
- bookwright/integrations/errors.py +170 -0
- bookwright/integrations/generic/__init__.py +56 -0
- bookwright/integrations/lint.py +160 -0
- bookwright/integrations/materialize.py +202 -0
- bookwright/integrations/options.py +203 -0
- bookwright/io/__init__.py +1 -0
- bookwright/io/bible.py +500 -0
- bookwright/io/errors.py +98 -0
- bookwright/io/frontmatter.py +61 -0
- bookwright/io/fs.py +226 -0
- bookwright/io/manuscript.py +15 -0
- bookwright/io/project.py +21 -0
- bookwright/io/report.py +107 -0
- bookwright/io/research.py +427 -0
- bookwright/resources/__init__.py +1 -0
- bookwright/resources/commands/bookwright-analyze.md +66 -0
- bookwright/resources/commands/bookwright-bible.md +96 -0
- bookwright/resources/commands/bookwright-checklist.md +67 -0
- bookwright/resources/commands/bookwright-clarify.md +65 -0
- bookwright/resources/commands/bookwright-constitution.md +79 -0
- bookwright/resources/commands/bookwright-continuity.md +70 -0
- bookwright/resources/commands/bookwright-draft.md +74 -0
- bookwright/resources/commands/bookwright-outline.md +71 -0
- bookwright/resources/commands/bookwright-research.md +107 -0
- bookwright/resources/commands/bookwright-scenes.md +66 -0
- bookwright/resources/commands/bookwright-synopsis.md +67 -0
- bookwright/resources/commands/bookwright-verify.md +136 -0
- bookwright/resources/commands/references/golem-character.md +65 -0
- bookwright/resources/commands/references/golem-events-timeline.md +56 -0
- bookwright/resources/commands/references/golem-relationships.md +53 -0
- bookwright/resources/commands/references/greimas-actants.md +57 -0
- bookwright/resources/commands/references/pending-protocol.md +72 -0
- bookwright/resources/commands/references/propp-functions.md +54 -0
- bookwright/resources/commands/references/research-format.md +136 -0
- bookwright/resources/project/.bookwright/cache/.gitkeep +0 -0
- bookwright/resources/project/.bookwright/schema/.gitkeep +0 -0
- bookwright/resources/project/.bookwright/templates/.gitkeep +0 -0
- bookwright/resources/project/.gitignore +23 -0
- bookwright/resources/project/README.md.j2 +40 -0
- bookwright/resources/project/__init__.py +6 -0
- bookwright/resources/project/bible/characters/.gitkeep +0 -0
- bookwright/resources/project/bible/constitution.md.j2 +74 -0
- bookwright/resources/project/bible/glossary.md +36 -0
- bookwright/resources/project/bible/locations/.gitkeep +0 -0
- bookwright/resources/project/bible/pov-structure.md +43 -0
- bookwright/resources/project/bible/relationships.md +36 -0
- bookwright/resources/project/bible/research/_index.md +28 -0
- bookwright/resources/project/bible/research/sources.md +23 -0
- bookwright/resources/project/bible/settings/.gitkeep +0 -0
- bookwright/resources/project/bible/subplots.md +35 -0
- bookwright/resources/project/bible/themes.md +36 -0
- bookwright/resources/project/bible/timeline.md +38 -0
- bookwright/resources/project/manuscript/.gitkeep +0 -0
- bookwright/resources/project/outline/arcs.md +34 -0
- bookwright/resources/project/outline/scenes.md +31 -0
- bookwright/resources/project/outline/structure.md +35 -0
- bookwright/resources/project/outline/synopsis.md +25 -0
- bookwright/resources/schemas/__init__.py +19 -0
- bookwright/resources/schemas/golem-1.1/VERSION +1 -0
- bookwright/resources/schemas/golem-1.1/golem.ttl +1947 -0
- bookwright/resources/schemas/golem-1.1/version.json +8 -0
- bookwright/resources/templates/__init__.py +1 -0
- bookwright/resources/templates/bible/character.md.tmpl +63 -0
- bookwright/resources/templates/bible/location.md.tmpl +37 -0
- bookwright/resources/templates/bible/research/_index.md.tmpl +25 -0
- bookwright/resources/templates/bible/research/sources.md.tmpl +21 -0
- bookwright/resources/templates/bible/research/tema.md.tmpl +37 -0
- bookwright/resources/templates/bible/setting.md.tmpl +38 -0
- bookwright/resources/templates/manifest.template.toml +79 -0
- bookwright/resources/templates/manuscript/chapter.md.tmpl +36 -0
- bookwright/resources/templates/scenes/scene.md.tmpl +37 -0
- bookwright/resources/vocabularies/__init__.py +6 -0
- bookwright/resources/vocabularies/greimas.ttl +4 -0
- bookwright/resources/vocabularies/propp.ttl +4 -0
- bookwright/resources/vocabularies/sources.ttl +82 -0
- bookwright/validation/__init__.py +33 -0
- bookwright/validation/anchor_queries.py +223 -0
- bookwright/validation/base.py +233 -0
- bookwright/validation/queries.py +197 -0
- bookwright/validation/registry.py +185 -0
- bookwright/validation/report.py +106 -0
- bookwright/validation/runner.py +65 -0
- bookwright/validation/validators/__init__.py +9 -0
- bookwright/validation/validators/character_presence.py +202 -0
- bookwright/validation/validators/factual_anchor.py +291 -0
- bookwright/validation/validators/focalization.py +152 -0
- bookwright/validation/validators/setting_continuity.py +100 -0
- bookwright/validation/validators/temporal.py +277 -0
- bookwright_cli-0.2.0.dist-info/METADATA +218 -0
- bookwright_cli-0.2.0.dist-info/RECORD +149 -0
- bookwright_cli-0.2.0.dist-info/WHEEL +4 -0
- bookwright_cli-0.2.0.dist-info/entry_points.txt +2 -0
- bookwright_cli-0.2.0.dist-info/licenses/LICENSE +202 -0
- bookwright_cli-0.2.0.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""Read-only graph projections for the ``factual_anchor`` validator (research D9).
|
|
2
|
+
|
|
3
|
+
Turns the research-anchor sub-graph iterations 012/013 emit into the plain
|
|
4
|
+
in-memory shapes the validator reasons over, so ``factual_anchor`` never touches
|
|
5
|
+
rdflib directly — exactly how ``queries`` serves ``temporal``. Every traversal is
|
|
6
|
+
run through the :class:`~bookwright.indexers.Indexer` seam; the predicate IRIs come
|
|
7
|
+
from :mod:`bookwright.golem.namespaces`, never hardcoded.
|
|
8
|
+
|
|
9
|
+
An *anchor* is the subject of a ``bw:promotes`` triple (the one predicate that
|
|
10
|
+
distinguishes an anchor's ``crm:E13_Attribute_Assignment`` node from a finding's).
|
|
11
|
+
The interval model and the ``gYear`` parser are reused from :mod:`.queries` so the
|
|
12
|
+
anchor span and an event boundary coerce identically (research D2).
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
|
|
19
|
+
from rdflib.term import URIRef
|
|
20
|
+
|
|
21
|
+
from bookwright.golem.namespaces import (
|
|
22
|
+
BEGIN_OF_BEGIN,
|
|
23
|
+
BW_ACCESS_DATE,
|
|
24
|
+
BW_AUTHOR,
|
|
25
|
+
BW_CONSTRAINS,
|
|
26
|
+
BW_ORIGINAL_LANGUAGE,
|
|
27
|
+
BW_ORIGINAL_QUOTE,
|
|
28
|
+
BW_PROMOTES,
|
|
29
|
+
BW_REFERENCE,
|
|
30
|
+
BW_RELIABILITY,
|
|
31
|
+
BW_RELIABILITY_JUSTIFICATION,
|
|
32
|
+
BW_SUPPORTED_BY,
|
|
33
|
+
BW_TRANSLATION,
|
|
34
|
+
END_OF_END,
|
|
35
|
+
HAS_TIME_SPAN,
|
|
36
|
+
HAS_TYPE,
|
|
37
|
+
RELIABILITY_IRI,
|
|
38
|
+
timeline_uri,
|
|
39
|
+
)
|
|
40
|
+
from bookwright.indexers import Indexer
|
|
41
|
+
from bookwright.validation.queries import EventInterval, parse_gyear
|
|
42
|
+
|
|
43
|
+
__all__ = [
|
|
44
|
+
"FACETS",
|
|
45
|
+
"AnchorRecord",
|
|
46
|
+
"Facet",
|
|
47
|
+
"SourceRecord",
|
|
48
|
+
"entity_present",
|
|
49
|
+
"load_anchors",
|
|
50
|
+
"load_sources_by_anchor",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass(frozen=True)
|
|
55
|
+
class AnchorRecord:
|
|
56
|
+
"""One research anchor projected from the graph (the validator's working unit).
|
|
57
|
+
|
|
58
|
+
``constrains`` is ``None`` when the anchor carries no ``bw:constrains`` triple
|
|
59
|
+
(the reader dropped an unresolved link); ``span`` is ``EventInterval(uri, None,
|
|
60
|
+
None)`` when the anchor declares no time-span.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
uri: str
|
|
64
|
+
promotes: str
|
|
65
|
+
constrains: str | None
|
|
66
|
+
span: EventInterval
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def load_anchors(indexer: Indexer) -> list[AnchorRecord]:
|
|
70
|
+
"""One :class:`AnchorRecord` per anchor node, in sorted-URI order.
|
|
71
|
+
|
|
72
|
+
The optional ``bw:constrains`` target and the optional ``crm:E52_Time-Span``
|
|
73
|
+
(``P82a``/``P82b`` → years via :func:`~bookwright.validation.queries.parse_gyear`)
|
|
74
|
+
are read in a single projection; an absent optional is simply unbound. SPARQL
|
|
75
|
+
only — no reasoning happens here.
|
|
76
|
+
"""
|
|
77
|
+
rows = indexer.query(
|
|
78
|
+
f"""
|
|
79
|
+
SELECT ?anchor ?finding ?constrains ?begin ?end WHERE {{
|
|
80
|
+
?anchor <{BW_PROMOTES}> ?finding .
|
|
81
|
+
OPTIONAL {{ ?anchor <{BW_CONSTRAINS}> ?constrains . }}
|
|
82
|
+
OPTIONAL {{
|
|
83
|
+
?anchor <{HAS_TIME_SPAN}> ?ts .
|
|
84
|
+
OPTIONAL {{ ?ts <{BEGIN_OF_BEGIN}> ?begin . }}
|
|
85
|
+
OPTIONAL {{ ?ts <{END_OF_END}> ?end . }}
|
|
86
|
+
}}
|
|
87
|
+
}}
|
|
88
|
+
"""
|
|
89
|
+
)
|
|
90
|
+
records: dict[str, AnchorRecord] = {}
|
|
91
|
+
for row in rows:
|
|
92
|
+
anchor = row["anchor"]
|
|
93
|
+
if anchor in records: # defensive: one anchor → one record (first wins, sorted)
|
|
94
|
+
continue
|
|
95
|
+
begin = parse_gyear(row["begin"]) if "begin" in row else None
|
|
96
|
+
end = parse_gyear(row["end"]) if "end" in row else None
|
|
97
|
+
records[anchor] = AnchorRecord(
|
|
98
|
+
uri=anchor,
|
|
99
|
+
promotes=row["finding"],
|
|
100
|
+
constrains=row.get("constrains"),
|
|
101
|
+
span=EventInterval(uri=anchor, begin=begin, end=end),
|
|
102
|
+
)
|
|
103
|
+
return [records[uri] for uri in sorted(records)]
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# --- Source provenance / reliability projections (R2/R3, research D5/D6) -----
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@dataclass(frozen=True)
|
|
110
|
+
class Facet:
|
|
111
|
+
"""One mandatory provenance facet of a :class:`Source` (research D5).
|
|
112
|
+
|
|
113
|
+
``label`` is the author-facing name a violation message uses; ``predicate`` is
|
|
114
|
+
the source predicate whose presence in the graph proves the facet is recorded.
|
|
115
|
+
``foreign_only`` marks ``translation`` — mandatory only when the source's
|
|
116
|
+
original language differs from the book language (the reader's D6 rule).
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
label: str
|
|
120
|
+
predicate: URIRef
|
|
121
|
+
foreign_only: bool = False
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# The mandatory facets, in serialization order. Their predicate SET is the single
|
|
125
|
+
# membership emitted by a fully-populated ``provenance.Source.to_triples()`` (D5),
|
|
126
|
+
# pinned by a drift-guard test — it is NOT ``io/research._SOURCE_FACETS`` (which
|
|
127
|
+
# lists Pydantic field NAMES: it includes ``name``, which has no predicate, and
|
|
128
|
+
# omits ``translation``). The IRIs come from the ``golem.namespaces`` constants.
|
|
129
|
+
FACETS: tuple[Facet, ...] = (
|
|
130
|
+
Facet("type", HAS_TYPE),
|
|
131
|
+
Facet("reliability", BW_RELIABILITY),
|
|
132
|
+
Facet("reliability justification", BW_RELIABILITY_JUSTIFICATION),
|
|
133
|
+
Facet("reference", BW_REFERENCE),
|
|
134
|
+
Facet("author", BW_AUTHOR),
|
|
135
|
+
Facet("original language", BW_ORIGINAL_LANGUAGE),
|
|
136
|
+
Facet("access date", BW_ACCESS_DATE),
|
|
137
|
+
Facet("original quote", BW_ORIGINAL_QUOTE),
|
|
138
|
+
Facet("translation", BW_TRANSLATION, foreign_only=True),
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Reliability rank name ← its E55 individual IRI, inverted from the single
|
|
142
|
+
# vocabulary source (``RELIABILITY_IRI``) so the scale never re-spells it (D6).
|
|
143
|
+
_RELIABILITY_NAME: dict[str, str] = {str(iri): name for name, iri in RELIABILITY_IRI.items()}
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@dataclass(frozen=True)
|
|
147
|
+
class SourceRecord:
|
|
148
|
+
"""One source backing an anchor's promoted finding (R2/R3 working unit).
|
|
149
|
+
|
|
150
|
+
``present_predicates`` is the set of facet-predicate IRI strings the source
|
|
151
|
+
actually carries (R2 reads it to find gaps); ``original_language`` drives the
|
|
152
|
+
translation conditionality; ``reliability`` is the rating *name*
|
|
153
|
+
(``alta``/``media``/``baja``) or ``None`` when the source is unrated.
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
uri: str
|
|
157
|
+
present_predicates: frozenset[str]
|
|
158
|
+
original_language: str | None
|
|
159
|
+
reliability: str | None
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@dataclass
|
|
163
|
+
class _SourceAccum:
|
|
164
|
+
"""Mutable accumulator while folding a source's triples (one per ``?p``)."""
|
|
165
|
+
|
|
166
|
+
predicates: set[str]
|
|
167
|
+
language: str | None = None
|
|
168
|
+
reliability_iri: str | None = None
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def load_sources_by_anchor(indexer: Indexer) -> dict[str, list[SourceRecord]]:
|
|
172
|
+
"""Supporting sources per anchor, reached ``anchor→finding→source`` (D5).
|
|
173
|
+
|
|
174
|
+
A source with no describing triple (a dangling ``bw:supportedBy``) still
|
|
175
|
+
appears — with an empty facet set — so R2 can flag every missing facet. Sources
|
|
176
|
+
are returned in sorted-URI order per anchor for byte-stable output.
|
|
177
|
+
"""
|
|
178
|
+
rows = indexer.query(
|
|
179
|
+
f"""
|
|
180
|
+
SELECT ?anchor ?source ?p ?o WHERE {{
|
|
181
|
+
?anchor <{BW_PROMOTES}> ?finding .
|
|
182
|
+
?finding <{BW_SUPPORTED_BY}> ?source .
|
|
183
|
+
OPTIONAL {{ ?source ?p ?o . }}
|
|
184
|
+
}}
|
|
185
|
+
"""
|
|
186
|
+
)
|
|
187
|
+
by_anchor: dict[str, dict[str, _SourceAccum]] = {}
|
|
188
|
+
for row in rows:
|
|
189
|
+
sources = by_anchor.setdefault(row["anchor"], {})
|
|
190
|
+
acc = sources.setdefault(row["source"], _SourceAccum(predicates=set()))
|
|
191
|
+
predicate = row.get("p")
|
|
192
|
+
if predicate is None:
|
|
193
|
+
continue
|
|
194
|
+
acc.predicates.add(predicate)
|
|
195
|
+
if predicate == str(BW_ORIGINAL_LANGUAGE):
|
|
196
|
+
acc.language = row.get("o")
|
|
197
|
+
elif predicate == str(BW_RELIABILITY):
|
|
198
|
+
acc.reliability_iri = row.get("o")
|
|
199
|
+
return {
|
|
200
|
+
anchor: [
|
|
201
|
+
SourceRecord(
|
|
202
|
+
uri=source,
|
|
203
|
+
present_predicates=frozenset(acc.predicates),
|
|
204
|
+
original_language=acc.language,
|
|
205
|
+
reliability=_RELIABILITY_NAME.get(acc.reliability_iri or ""),
|
|
206
|
+
)
|
|
207
|
+
for source, acc in sorted(sources.items())
|
|
208
|
+
]
|
|
209
|
+
for anchor, sources in by_anchor.items()
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def entity_present(indexer: Indexer, uri: str, uri_base: str) -> bool:
|
|
214
|
+
"""Whether ``uri`` denotes a present graph entity (R4 presence test, D4).
|
|
215
|
+
|
|
216
|
+
True when the URI is the subject of at least one triple, or when it is the
|
|
217
|
+
well-known (untyped) timeline IRI — a legitimate ``bw:constrains`` target that
|
|
218
|
+
carries no describing triple of its own.
|
|
219
|
+
"""
|
|
220
|
+
if uri == str(timeline_uri(uri_base)):
|
|
221
|
+
return True
|
|
222
|
+
rows = list(indexer.query(f"SELECT ?p WHERE {{ <{uri}> ?p ?o . }} LIMIT 1"))
|
|
223
|
+
return bool(rows)
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""Core finding types and the validator seam (data-model, contracts/validator-protocol.md).
|
|
2
|
+
|
|
3
|
+
In-memory only; the subsystem persists nothing (FR-020). Every type here is frozen
|
|
4
|
+
where it can be, so findings are hashable and dedupe is trivial (D8).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from enum import StrEnum
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import TYPE_CHECKING, Any, Literal, Protocol, cast, runtime_checkable
|
|
13
|
+
|
|
14
|
+
from bookwright.errors import BookwrightError
|
|
15
|
+
from bookwright.indexers import Indexer
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from bookwright.core.manifest import Manifest
|
|
19
|
+
from bookwright.golem.base import SluggedEntity
|
|
20
|
+
from bookwright.io.bible import MapResult
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"Severity",
|
|
24
|
+
"UnknownValidatorError",
|
|
25
|
+
"ValidationContext",
|
|
26
|
+
"Validator",
|
|
27
|
+
"ValidatorError",
|
|
28
|
+
"Violation",
|
|
29
|
+
"split_source",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class Severity(StrEnum):
|
|
34
|
+
"""A finding's level. String-valued (JSON-friendly, design § 13.1)."""
|
|
35
|
+
|
|
36
|
+
error = "error"
|
|
37
|
+
warning = "warning"
|
|
38
|
+
info = "info"
|
|
39
|
+
|
|
40
|
+
def at_least(self, threshold: Severity) -> bool:
|
|
41
|
+
"""Whether this severity meets ``threshold`` under ``error > warning > info``."""
|
|
42
|
+
return _RANK[self] >= _RANK[threshold]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
_RANK: dict[Severity, int] = {Severity.error: 2, Severity.warning: 1, Severity.info: 0}
|
|
46
|
+
"""Ordinal for the ``--severity`` threshold, the gate, and the total-order sort."""
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def split_source(source: str | None) -> tuple[str | None, int | None]:
|
|
50
|
+
"""Split a ``relpath[:line]`` provenance string into ``(path, line)``.
|
|
51
|
+
|
|
52
|
+
The ``:line`` suffix is recognized only when a non-empty path precedes a
|
|
53
|
+
digit-only tail; otherwise the whole string is the path and the line is ``None``.
|
|
54
|
+
``source=None`` yields ``(None, None)``. This is the single place the ``source``
|
|
55
|
+
grammar is parsed — every consumer (``Violation`` accessors, the report scope
|
|
56
|
+
filter, provenance resolution) routes through it so the parsing never forks.
|
|
57
|
+
"""
|
|
58
|
+
if source is None:
|
|
59
|
+
return None, None
|
|
60
|
+
head, sep, tail = source.rpartition(":")
|
|
61
|
+
if head and sep and tail.isdigit():
|
|
62
|
+
return head, int(tail)
|
|
63
|
+
return source, None
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass(frozen=True)
|
|
67
|
+
class Violation:
|
|
68
|
+
"""One finding produced by a validator (FR-002/003).
|
|
69
|
+
|
|
70
|
+
``frozen=True`` + tuple fields make it hashable so identical findings collapse
|
|
71
|
+
to one in the runner (D8). ``source`` is a project-relative posix path, optionally
|
|
72
|
+
``:line``; ``None`` when no specific location applies (location-less).
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
validator: str
|
|
76
|
+
severity: Severity
|
|
77
|
+
message: str
|
|
78
|
+
source: str | None = None
|
|
79
|
+
triples: tuple[tuple[str, str, str], ...] = ()
|
|
80
|
+
|
|
81
|
+
def source_file(self) -> str | None:
|
|
82
|
+
"""The path part of ``source`` (drops any ``:line`` suffix), or ``None``."""
|
|
83
|
+
return split_source(self.source)[0]
|
|
84
|
+
|
|
85
|
+
def source_line(self) -> int | None:
|
|
86
|
+
"""The 1-based line from ``source`` when present, else ``None``."""
|
|
87
|
+
return split_source(self.source)[1]
|
|
88
|
+
|
|
89
|
+
def to_json(self) -> dict[str, Any]:
|
|
90
|
+
"""Serialize to the contract shape (FR-002, SC-004); ``triples`` as lists."""
|
|
91
|
+
return {
|
|
92
|
+
"validator": self.validator,
|
|
93
|
+
"severity": self.severity.value,
|
|
94
|
+
"message": self.message,
|
|
95
|
+
"source": self.source,
|
|
96
|
+
"triples": [list(triple) for triple in self.triples],
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@dataclass(frozen=True)
|
|
101
|
+
class ValidatorError:
|
|
102
|
+
"""A validator that could not be loaded or that raised while running (FR-014).
|
|
103
|
+
|
|
104
|
+
Surfaced in the report's ``errors[]``; never affects the gate. ``validator`` is
|
|
105
|
+
the validator name, or the offending file path for ``phase="load"`` failures.
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
validator: str
|
|
109
|
+
message: str
|
|
110
|
+
phase: Literal["load", "run"]
|
|
111
|
+
|
|
112
|
+
def to_json(self) -> dict[str, Any]:
|
|
113
|
+
return {"validator": self.validator, "phase": self.phase, "message": self.message}
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@runtime_checkable
|
|
117
|
+
class Validator(Protocol):
|
|
118
|
+
"""The stable seam between the runner and any validator (design § 13.1).
|
|
119
|
+
|
|
120
|
+
A validator examines the project (``ValidationContext``) and the already-built
|
|
121
|
+
graph (``indexer``, possibly empty) and returns a list of ``Violation`` — an
|
|
122
|
+
empty list means "no problems" (FR-001). It MUST be deterministic (FR-019) and
|
|
123
|
+
MUST NOT write to disk or mutate the graph (FR-020); it MAY raise — the runner
|
|
124
|
+
isolates it (FR-014).
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
name: str
|
|
128
|
+
severity_default: Severity
|
|
129
|
+
|
|
130
|
+
def validate(self, project: ValidationContext, indexer: Indexer) -> list[Violation]: ...
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class UnknownValidatorError(BookwrightError):
|
|
134
|
+
"""A configured ``[validators]`` name is absent from the discovered set (FR-007)."""
|
|
135
|
+
|
|
136
|
+
code = "unknown_validator"
|
|
137
|
+
|
|
138
|
+
def __init__(self, names: tuple[str, ...]) -> None:
|
|
139
|
+
self.names = names
|
|
140
|
+
joined = ", ".join(names)
|
|
141
|
+
super().__init__(f"unknown validator(s): {joined}", {"names": list(names)})
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# Sentinel distinguishing "not yet computed" from a cached ``None`` result.
|
|
145
|
+
_UNSET = object()
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@dataclass
|
|
149
|
+
class ValidationContext:
|
|
150
|
+
"""The ``project`` argument to every validator (data-model).
|
|
151
|
+
|
|
152
|
+
Bundles the project root + manifest and exposes cached accessors so each source
|
|
153
|
+
file is read once per run and shared across validators. Accessors memoize on
|
|
154
|
+
first call.
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
root: Path
|
|
158
|
+
manifest: Manifest
|
|
159
|
+
|
|
160
|
+
_bible: Any = field(default=_UNSET, repr=False, compare=False)
|
|
161
|
+
_character_names: Any = field(default=_UNSET, repr=False, compare=False)
|
|
162
|
+
_setting_names: Any = field(default=_UNSET, repr=False, compare=False)
|
|
163
|
+
_manuscript_files: Any = field(default=_UNSET, repr=False, compare=False)
|
|
164
|
+
_constitution_text: Any = field(default=_UNSET, repr=False, compare=False)
|
|
165
|
+
|
|
166
|
+
@property
|
|
167
|
+
def uri_base(self) -> str:
|
|
168
|
+
return self.manifest.bookwright.uri_base
|
|
169
|
+
|
|
170
|
+
def bible(self) -> MapResult:
|
|
171
|
+
"""Map the project's bible to GOLEM entities (once per run)."""
|
|
172
|
+
if self._bible is _UNSET:
|
|
173
|
+
from bookwright.io.bible import map_bible # noqa: PLC0415
|
|
174
|
+
|
|
175
|
+
bible_dir = self.root / self.manifest.paths.bible
|
|
176
|
+
self._bible = map_bible(self.root, bible_dir, self.uri_base)
|
|
177
|
+
return cast("MapResult", self._bible)
|
|
178
|
+
|
|
179
|
+
def _names_of(self, concept_cls: type[SluggedEntity]) -> tuple[tuple[str, str], ...]:
|
|
180
|
+
"""Sorted ``(name, bible_relpath)`` pairs for one bible concept class."""
|
|
181
|
+
names = [
|
|
182
|
+
(entity.name, mapped.relpath)
|
|
183
|
+
for mapped in self.bible().mapped
|
|
184
|
+
if isinstance((entity := mapped.entity), concept_cls)
|
|
185
|
+
]
|
|
186
|
+
return tuple(sorted(names))
|
|
187
|
+
|
|
188
|
+
def character_names(self) -> tuple[tuple[str, str], ...]:
|
|
189
|
+
"""Sorted ``(name, bible_relpath)`` for every bible Character."""
|
|
190
|
+
if self._character_names is _UNSET:
|
|
191
|
+
from bookwright.golem import Character # noqa: PLC0415
|
|
192
|
+
|
|
193
|
+
self._character_names = self._names_of(Character)
|
|
194
|
+
return cast("tuple[tuple[str, str], ...]", self._character_names)
|
|
195
|
+
|
|
196
|
+
def setting_names(self) -> tuple[tuple[str, str], ...]:
|
|
197
|
+
"""Sorted ``(name, bible_relpath)`` for every bible Setting."""
|
|
198
|
+
if self._setting_names is _UNSET:
|
|
199
|
+
from bookwright.golem import Setting # noqa: PLC0415
|
|
200
|
+
|
|
201
|
+
self._setting_names = self._names_of(Setting)
|
|
202
|
+
return cast("tuple[tuple[str, str], ...]", self._setting_names)
|
|
203
|
+
|
|
204
|
+
def manuscript_files(self) -> tuple[tuple[str, str], ...]:
|
|
205
|
+
"""Sorted ``(relpath, text)`` for every ``**/*.md`` under the manuscript dir.
|
|
206
|
+
|
|
207
|
+
Unreadable files are skipped defensively (a validator never aborts on one
|
|
208
|
+
bad file). Sorted by relpath for determinism (D8).
|
|
209
|
+
"""
|
|
210
|
+
if self._manuscript_files is _UNSET:
|
|
211
|
+
manuscript_dir = self.root / self.manifest.paths.manuscript
|
|
212
|
+
collected: list[tuple[str, str]] = []
|
|
213
|
+
if manuscript_dir.is_dir():
|
|
214
|
+
for path in sorted(manuscript_dir.rglob("*.md")):
|
|
215
|
+
if not path.is_file():
|
|
216
|
+
continue
|
|
217
|
+
try:
|
|
218
|
+
text = path.read_text(encoding="utf-8")
|
|
219
|
+
except (OSError, UnicodeDecodeError):
|
|
220
|
+
continue
|
|
221
|
+
collected.append((path.relative_to(self.root).as_posix(), text))
|
|
222
|
+
self._manuscript_files = tuple(sorted(collected))
|
|
223
|
+
return cast("tuple[tuple[str, str], ...]", self._manuscript_files)
|
|
224
|
+
|
|
225
|
+
def constitution_text(self) -> str | None:
|
|
226
|
+
"""The constitution file's text, or ``None`` when absent/unreadable."""
|
|
227
|
+
if self._constitution_text is _UNSET:
|
|
228
|
+
path = self.root / self.manifest.paths.constitution
|
|
229
|
+
try:
|
|
230
|
+
self._constitution_text = path.read_text(encoding="utf-8")
|
|
231
|
+
except (OSError, UnicodeDecodeError):
|
|
232
|
+
self._constitution_text = None
|
|
233
|
+
return cast("str | None", self._constitution_text)
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""Read-only graph projections for the ``temporal`` validator (data-model, D11/D12).
|
|
2
|
+
|
|
3
|
+
These helpers turn the interval graph the timeline indexer emits into plain
|
|
4
|
+
in-memory shapes (``EventInterval`` + relation edge sets) the validator reasons
|
|
5
|
+
over, so ``temporal`` never touches rdflib directly. SPARQL is run through the
|
|
6
|
+
``Indexer`` seam (``indexer.query``); every traversal is insensitive to whether a
|
|
7
|
+
year sits on a boundary directly or on its ``Dimension`` sub-node.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
|
|
14
|
+
from rdflib.namespace import RDF, RDFS, XSD
|
|
15
|
+
|
|
16
|
+
from bookwright.golem.namespaces import (
|
|
17
|
+
ASSIGNED_ATTRIBUTE_TO,
|
|
18
|
+
CRM,
|
|
19
|
+
CSM,
|
|
20
|
+
GOLEM,
|
|
21
|
+
TEMPORAL_RELATIONS,
|
|
22
|
+
TR,
|
|
23
|
+
USED_SPECIFIC_OBJECT,
|
|
24
|
+
)
|
|
25
|
+
from bookwright.indexers import Indexer
|
|
26
|
+
from bookwright.validation.base import split_source
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
"EventInterval",
|
|
30
|
+
"intervals_disjoint",
|
|
31
|
+
"load_intervals",
|
|
32
|
+
"load_relations",
|
|
33
|
+
"parse_gyear",
|
|
34
|
+
"resolve_source",
|
|
35
|
+
"timeline_bounds",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
_PREFIXES = "\n".join(
|
|
39
|
+
f"PREFIX {prefix}: <{uri}>"
|
|
40
|
+
for prefix, uri in (
|
|
41
|
+
("golem", str(GOLEM)),
|
|
42
|
+
("crm", str(CRM)),
|
|
43
|
+
("tr", str(TR)),
|
|
44
|
+
("csm", str(CSM)),
|
|
45
|
+
("rdf", str(RDF)),
|
|
46
|
+
("rdfs", str(RDFS)),
|
|
47
|
+
("xsd", str(XSD)),
|
|
48
|
+
)
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass(frozen=True)
|
|
53
|
+
class EventInterval:
|
|
54
|
+
"""One event's begin/end years (either may be ``None`` for an open interval)."""
|
|
55
|
+
|
|
56
|
+
uri: str
|
|
57
|
+
begin: int | None
|
|
58
|
+
end: int | None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _q(indexer: Indexer, body: str) -> list[dict[str, str]]:
|
|
62
|
+
return list(indexer.query(f"{_PREFIXES}\n{body}"))
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def parse_gyear(raw: str) -> int | None:
|
|
66
|
+
"""Coerce an ``xsd:gYear`` lexical (``"1885"``, ``"0800"``, ``"-0044"``) to int.
|
|
67
|
+
|
|
68
|
+
The single ``gYear`` parser for the ``temporal`` and ``factual_anchor``
|
|
69
|
+
validators: ``temporal`` reads event boundary years through it (via
|
|
70
|
+
:func:`load_intervals`) and ``factual_anchor`` reads anchor time-span years
|
|
71
|
+
through it (via ``anchor_queries.load_anchors``), so both coerce identically.
|
|
72
|
+
"""
|
|
73
|
+
text = raw.strip()
|
|
74
|
+
negative = text.startswith("-")
|
|
75
|
+
digits = text[1:] if negative else text
|
|
76
|
+
if not digits.isdigit():
|
|
77
|
+
return None
|
|
78
|
+
value = int(digits)
|
|
79
|
+
return -value if negative else value
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def intervals_disjoint(a: EventInterval, b: EventInterval) -> bool:
|
|
83
|
+
"""True when two closed year ranges provably do not overlap (FR-011, research D1).
|
|
84
|
+
|
|
85
|
+
The **single source of truth** for "two intervals contradict": both the
|
|
86
|
+
``temporal`` validator (overlap-disjoint rule) and ``factual_anchor`` (the
|
|
87
|
+
anachronism rule) decide disjointness here and nowhere else. An open bound
|
|
88
|
+
(``None``) is unbounded on that side, so it can never force disjointness — an
|
|
89
|
+
open-ended interval cannot be *proven* disjoint from anything.
|
|
90
|
+
"""
|
|
91
|
+
return (a.end is not None and b.begin is not None and a.end < b.begin) or (
|
|
92
|
+
b.end is not None and a.begin is not None and b.end < a.begin
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def load_intervals(indexer: Indexer) -> dict[str, EventInterval]:
|
|
97
|
+
"""One :class:`EventInterval` per ``G5_Narrative_Event`` in the graph.
|
|
98
|
+
|
|
99
|
+
The year is reached via ``(csm:duration|tr:temporal-location)/tr:temporal-location``
|
|
100
|
+
to a boundary whose ``crm:P2_has_type`` localname is ``begin`` / ``end``, then
|
|
101
|
+
``(crm:P90_has_value | crm:P43_has_dimension/crm:P90_has_value)`` — so it is
|
|
102
|
+
insensitive to the carrier-node shape (D12). Events without an interval still
|
|
103
|
+
appear, with both bounds ``None``.
|
|
104
|
+
"""
|
|
105
|
+
intervals: dict[str, list[int | None]] = {
|
|
106
|
+
row["event"]: [None, None]
|
|
107
|
+
for row in _q(indexer, "SELECT ?event WHERE { ?event a golem:G5_Narrative_Event . }")
|
|
108
|
+
}
|
|
109
|
+
rows = _q(
|
|
110
|
+
indexer,
|
|
111
|
+
"""
|
|
112
|
+
SELECT ?event ?btype ?year WHERE {
|
|
113
|
+
?event a golem:G5_Narrative_Event .
|
|
114
|
+
?event (csm:duration|tr:temporal-location)/tr:temporal-location ?boundary .
|
|
115
|
+
?boundary crm:P2_has_type ?btype .
|
|
116
|
+
?boundary (crm:P90_has_value | crm:P43_has_dimension/crm:P90_has_value) ?year .
|
|
117
|
+
}
|
|
118
|
+
""",
|
|
119
|
+
)
|
|
120
|
+
for row in rows:
|
|
121
|
+
event, btype, year = row["event"], row["btype"], parse_gyear(row["year"])
|
|
122
|
+
if event not in intervals or year is None:
|
|
123
|
+
continue
|
|
124
|
+
if btype.endswith("/begin"):
|
|
125
|
+
intervals[event][0] = year
|
|
126
|
+
elif btype.endswith("/end"):
|
|
127
|
+
intervals[event][1] = year
|
|
128
|
+
return {
|
|
129
|
+
event: EventInterval(uri=event, begin=bounds[0], end=bounds[1])
|
|
130
|
+
for event, bounds in intervals.items()
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def timeline_bounds(intervals: dict[str, EventInterval]) -> EventInterval:
|
|
135
|
+
"""The timeline's overall ``(min begin, max end)`` across the given events (D3).
|
|
136
|
+
|
|
137
|
+
A **pure** reduction over an already-loaded :func:`load_intervals` result — it
|
|
138
|
+
adds **no** new interval reasoning — used by ``factual_anchor`` when an anchor
|
|
139
|
+
constrains the timeline as a whole. Both bounds are ``None`` when no event carries
|
|
140
|
+
a year. The ``uri`` is a sentinel label (the timeline has no single typed node,
|
|
141
|
+
research D10). It takes the loaded dict (not the indexer) so the caller reuses one
|
|
142
|
+
:func:`load_intervals` pass rather than querying the graph a second time.
|
|
143
|
+
"""
|
|
144
|
+
begins = [iv.begin for iv in intervals.values() if iv.begin is not None]
|
|
145
|
+
ends = [iv.end for iv in intervals.values() if iv.end is not None]
|
|
146
|
+
return EventInterval(
|
|
147
|
+
uri="timeline",
|
|
148
|
+
begin=min(begins) if begins else None,
|
|
149
|
+
end=max(ends) if ends else None,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def load_relations(indexer: Indexer) -> dict[str, set[tuple[str, str]]]:
|
|
154
|
+
"""The five ``TR:*`` edge sets, keyed by canonical relation name (D11).
|
|
155
|
+
|
|
156
|
+
Keys are :data:`TEMPORAL_RELATIONS` names (``follows`` … ``included_in``). Each
|
|
157
|
+
set holds ``(subject, object)`` event-URI pairs. Only edges between two narrative
|
|
158
|
+
events are kept, so a stray edge never leaks into the reasoning.
|
|
159
|
+
"""
|
|
160
|
+
relations: dict[str, set[tuple[str, str]]] = {}
|
|
161
|
+
for relation in TEMPORAL_RELATIONS:
|
|
162
|
+
rows = _q(
|
|
163
|
+
indexer,
|
|
164
|
+
f"""
|
|
165
|
+
SELECT ?a ?b WHERE {{
|
|
166
|
+
?a a golem:G5_Narrative_Event .
|
|
167
|
+
?b a golem:G5_Narrative_Event .
|
|
168
|
+
?a <{relation.predicate}> ?b .
|
|
169
|
+
}}
|
|
170
|
+
""",
|
|
171
|
+
)
|
|
172
|
+
relations[relation.name] = {(row["a"], row["b"]) for row in rows}
|
|
173
|
+
return relations
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def resolve_source(indexer: Indexer, uri: str) -> str | None:
|
|
177
|
+
"""Recover the ``relpath[:line]`` provenance string for a graph entity (D6).
|
|
178
|
+
|
|
179
|
+
Reads the CIDOC provenance edge: an ``E13_Attribute_Assignment`` whose
|
|
180
|
+
``P140_assigned_attribute_to`` is ``uri`` carries the source on
|
|
181
|
+
``P16_used_specific_object``. When several exist, prefer one with a ``:line``
|
|
182
|
+
suffix, then the lexicographically smallest, for a deterministic result.
|
|
183
|
+
"""
|
|
184
|
+
rows = _q(
|
|
185
|
+
indexer,
|
|
186
|
+
f"""
|
|
187
|
+
SELECT ?source WHERE {{
|
|
188
|
+
?assertion <{ASSIGNED_ATTRIBUTE_TO}> <{uri}> .
|
|
189
|
+
?assertion <{USED_SPECIFIC_OBJECT}> ?source .
|
|
190
|
+
}}
|
|
191
|
+
""",
|
|
192
|
+
)
|
|
193
|
+
sources = sorted({row["source"] for row in rows})
|
|
194
|
+
if not sources:
|
|
195
|
+
return None
|
|
196
|
+
located = [s for s in sources if split_source(s)[1] is not None]
|
|
197
|
+
return located[0] if located else sources[0]
|