bookwright-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bookwright/__init__.py +3 -0
- bookwright/__main__.py +6 -0
- bookwright/cli.py +19 -0
- bookwright/commands/__init__.py +0 -0
- bookwright/commands/_envelope.py +36 -0
- bookwright/commands/check.py +75 -0
- bookwright/commands/graph/__init__.py +23 -0
- bookwright/commands/graph/build.py +157 -0
- bookwright/commands/graph/envelope.py +26 -0
- bookwright/commands/graph/query.py +98 -0
- bookwright/commands/init/__init__.py +5 -0
- bookwright/commands/init/conflict.py +107 -0
- bookwright/commands/init/envelope.py +322 -0
- bookwright/commands/init/git.py +96 -0
- bookwright/commands/init/main.py +263 -0
- bookwright/commands/init/resolve.py +193 -0
- bookwright/commands/init/scaffold.py +242 -0
- bookwright/commands/init/validate.py +172 -0
- bookwright/commands/integration/__init__.py +22 -0
- bookwright/commands/integration/use.py +120 -0
- bookwright/commands/validate.py +160 -0
- bookwright/commands/version.py +35 -0
- bookwright/core/__init__.py +35 -0
- bookwright/core/_blocks.py +239 -0
- bookwright/core/_build.py +154 -0
- bookwright/core/_research_block.py +56 -0
- bookwright/core/_translate.py +90 -0
- bookwright/core/errors.py +127 -0
- bookwright/core/iso639_1.py +200 -0
- bookwright/core/manifest.py +343 -0
- bookwright/errors.py +47 -0
- bookwright/golem/__init__.py +71 -0
- bookwright/golem/base.py +200 -0
- bookwright/golem/errors.py +29 -0
- bookwright/golem/modules/__init__.py +1 -0
- bookwright/golem/modules/character.py +109 -0
- bookwright/golem/modules/event.py +91 -0
- bookwright/golem/modules/feature.py +161 -0
- bookwright/golem/modules/inference.py +41 -0
- bookwright/golem/modules/narrative.py +55 -0
- bookwright/golem/modules/provenance.py +197 -0
- bookwright/golem/modules/relationship.py +38 -0
- bookwright/golem/modules/setting.py +30 -0
- bookwright/golem/namespaces.py +332 -0
- bookwright/golem/serialize.py +25 -0
- bookwright/golem/slug.py +22 -0
- bookwright/indexers/__init__.py +47 -0
- bookwright/indexers/base.py +55 -0
- bookwright/indexers/errors.py +80 -0
- bookwright/indexers/rdflib_indexer.py +89 -0
- bookwright/integrations/__init__.py +155 -0
- bookwright/integrations/base.py +117 -0
- bookwright/integrations/claude/__init__.py +29 -0
- bookwright/integrations/constants.py +38 -0
- bookwright/integrations/descriptions.py +48 -0
- bookwright/integrations/errors.py +170 -0
- bookwright/integrations/generic/__init__.py +56 -0
- bookwright/integrations/lint.py +160 -0
- bookwright/integrations/materialize.py +202 -0
- bookwright/integrations/options.py +203 -0
- bookwright/io/__init__.py +1 -0
- bookwright/io/bible.py +500 -0
- bookwright/io/errors.py +98 -0
- bookwright/io/frontmatter.py +61 -0
- bookwright/io/fs.py +226 -0
- bookwright/io/manuscript.py +15 -0
- bookwright/io/project.py +21 -0
- bookwright/io/report.py +107 -0
- bookwright/io/research.py +427 -0
- bookwright/resources/__init__.py +1 -0
- bookwright/resources/commands/bookwright-analyze.md +66 -0
- bookwright/resources/commands/bookwright-bible.md +96 -0
- bookwright/resources/commands/bookwright-checklist.md +67 -0
- bookwright/resources/commands/bookwright-clarify.md +65 -0
- bookwright/resources/commands/bookwright-constitution.md +79 -0
- bookwright/resources/commands/bookwright-continuity.md +70 -0
- bookwright/resources/commands/bookwright-draft.md +74 -0
- bookwright/resources/commands/bookwright-outline.md +71 -0
- bookwright/resources/commands/bookwright-research.md +107 -0
- bookwright/resources/commands/bookwright-scenes.md +66 -0
- bookwright/resources/commands/bookwright-synopsis.md +67 -0
- bookwright/resources/commands/bookwright-verify.md +136 -0
- bookwright/resources/commands/references/golem-character.md +65 -0
- bookwright/resources/commands/references/golem-events-timeline.md +56 -0
- bookwright/resources/commands/references/golem-relationships.md +53 -0
- bookwright/resources/commands/references/greimas-actants.md +57 -0
- bookwright/resources/commands/references/pending-protocol.md +72 -0
- bookwright/resources/commands/references/propp-functions.md +54 -0
- bookwright/resources/commands/references/research-format.md +136 -0
- bookwright/resources/project/.bookwright/cache/.gitkeep +0 -0
- bookwright/resources/project/.bookwright/schema/.gitkeep +0 -0
- bookwright/resources/project/.bookwright/templates/.gitkeep +0 -0
- bookwright/resources/project/.gitignore +23 -0
- bookwright/resources/project/README.md.j2 +40 -0
- bookwright/resources/project/__init__.py +6 -0
- bookwright/resources/project/bible/characters/.gitkeep +0 -0
- bookwright/resources/project/bible/constitution.md.j2 +74 -0
- bookwright/resources/project/bible/glossary.md +36 -0
- bookwright/resources/project/bible/locations/.gitkeep +0 -0
- bookwright/resources/project/bible/pov-structure.md +43 -0
- bookwright/resources/project/bible/relationships.md +36 -0
- bookwright/resources/project/bible/research/_index.md +28 -0
- bookwright/resources/project/bible/research/sources.md +23 -0
- bookwright/resources/project/bible/settings/.gitkeep +0 -0
- bookwright/resources/project/bible/subplots.md +35 -0
- bookwright/resources/project/bible/themes.md +36 -0
- bookwright/resources/project/bible/timeline.md +38 -0
- bookwright/resources/project/manuscript/.gitkeep +0 -0
- bookwright/resources/project/outline/arcs.md +34 -0
- bookwright/resources/project/outline/scenes.md +31 -0
- bookwright/resources/project/outline/structure.md +35 -0
- bookwright/resources/project/outline/synopsis.md +25 -0
- bookwright/resources/schemas/__init__.py +19 -0
- bookwright/resources/schemas/golem-1.1/VERSION +1 -0
- bookwright/resources/schemas/golem-1.1/golem.ttl +1947 -0
- bookwright/resources/schemas/golem-1.1/version.json +8 -0
- bookwright/resources/templates/__init__.py +1 -0
- bookwright/resources/templates/bible/character.md.tmpl +63 -0
- bookwright/resources/templates/bible/location.md.tmpl +37 -0
- bookwright/resources/templates/bible/research/_index.md.tmpl +25 -0
- bookwright/resources/templates/bible/research/sources.md.tmpl +21 -0
- bookwright/resources/templates/bible/research/tema.md.tmpl +37 -0
- bookwright/resources/templates/bible/setting.md.tmpl +38 -0
- bookwright/resources/templates/manifest.template.toml +79 -0
- bookwright/resources/templates/manuscript/chapter.md.tmpl +36 -0
- bookwright/resources/templates/scenes/scene.md.tmpl +37 -0
- bookwright/resources/vocabularies/__init__.py +6 -0
- bookwright/resources/vocabularies/greimas.ttl +4 -0
- bookwright/resources/vocabularies/propp.ttl +4 -0
- bookwright/resources/vocabularies/sources.ttl +82 -0
- bookwright/validation/__init__.py +33 -0
- bookwright/validation/anchor_queries.py +223 -0
- bookwright/validation/base.py +233 -0
- bookwright/validation/queries.py +197 -0
- bookwright/validation/registry.py +185 -0
- bookwright/validation/report.py +106 -0
- bookwright/validation/runner.py +65 -0
- bookwright/validation/validators/__init__.py +9 -0
- bookwright/validation/validators/character_presence.py +202 -0
- bookwright/validation/validators/factual_anchor.py +291 -0
- bookwright/validation/validators/focalization.py +152 -0
- bookwright/validation/validators/setting_continuity.py +100 -0
- bookwright/validation/validators/temporal.py +277 -0
- bookwright_cli-0.2.0.dist-info/METADATA +218 -0
- bookwright_cli-0.2.0.dist-info/RECORD +149 -0
- bookwright_cli-0.2.0.dist-info/WHEEL +4 -0
- bookwright_cli-0.2.0.dist-info/entry_points.txt +2 -0
- bookwright_cli-0.2.0.dist-info/licenses/LICENSE +202 -0
- bookwright_cli-0.2.0.dist-info/licenses/NOTICE +14 -0
bookwright/io/bible.py
ADDED
|
@@ -0,0 +1,500 @@
|
|
|
1
|
+
"""Discover bible source files and map their frontmatter to GOLEM entities.
|
|
2
|
+
|
|
3
|
+
Type is determined by **location** (R2 / bible-format.md). The mapper passes
|
|
4
|
+
frontmatter values straight to the iteration-5 constructors — it never builds
|
|
5
|
+
feature/role/dimension nodes itself (data-model § 0/§ 3). It collects soft
|
|
6
|
+
warnings (``unknown_keys``, ``unresolved_participants``), skips files whose
|
|
7
|
+
frontmatter is unusable (FR-013), and raises on a slug collision (FR-014).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from collections.abc import Callable, Iterable
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
import yaml
|
|
18
|
+
from rdflib.term import URIRef
|
|
19
|
+
|
|
20
|
+
from bookwright.golem import (
|
|
21
|
+
AttributeAssignment,
|
|
22
|
+
Character,
|
|
23
|
+
EmptySlugError,
|
|
24
|
+
NarrativeEvent,
|
|
25
|
+
Setting,
|
|
26
|
+
SocialRelationship,
|
|
27
|
+
)
|
|
28
|
+
from bookwright.golem.base import GolemEntity
|
|
29
|
+
from bookwright.golem.namespaces import TEMPORAL_RELATIONS
|
|
30
|
+
from bookwright.golem.slug import make_slug
|
|
31
|
+
|
|
32
|
+
from .errors import InvalidFrontmatterError, SlugCollisionError
|
|
33
|
+
from .frontmatter import Frontmatter, parse_frontmatter
|
|
34
|
+
from .report import SkippedFile, UnknownKey, UnresolvedParticipant
|
|
35
|
+
|
|
36
|
+
CHARACTER_KEYS = frozenset({"name", "born", "died", "features", "narrative_roles"})
|
|
37
|
+
SETTING_KEYS = frozenset({"name"})
|
|
38
|
+
ITEM_KEYS = frozenset({"name", "participants"})
|
|
39
|
+
# The five qualitative temporal relations an event may declare (each a list of
|
|
40
|
+
# event names resolved against the timeline's own event index — research D11).
|
|
41
|
+
# Derived from the single source of truth so the keys never drift from the model.
|
|
42
|
+
RELATION_KEYS: tuple[str, ...] = tuple(rel.name for rel in TEMPORAL_RELATIONS)
|
|
43
|
+
# Events additionally accept an interval (``begin`` / ``end`` years, or the
|
|
44
|
+
# ``date`` single-year shorthand) plus the relation keys.
|
|
45
|
+
EVENT_ITEM_KEYS = frozenset({"name", "participants", "begin", "end", "date", *RELATION_KEYS})
|
|
46
|
+
TIMELINE_TOP_KEYS = frozenset({"events"})
|
|
47
|
+
RELATIONSHIPS_TOP_KEYS = frozenset({"relationships"})
|
|
48
|
+
|
|
49
|
+
# A directory builder maps ``(frontmatter, relpath) → entity``; a collection
|
|
50
|
+
# builder maps an ``_ItemContext`` (name, resolved participants, the raw item, and
|
|
51
|
+
# the collection's own name→URI index) → entity. Typed so ``mypy --strict`` checks
|
|
52
|
+
# every call site (rather than the previous ``Any`` escape hatch).
|
|
53
|
+
_Builder = Callable[[dict[str, Any], str], GolemEntity]
|
|
54
|
+
_ItemBuilder = Callable[["_ItemContext"], GolemEntity]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass(frozen=True)
|
|
58
|
+
class MappedEntity:
|
|
59
|
+
"""One constructed entity paired with the source needed for provenance (R6)."""
|
|
60
|
+
|
|
61
|
+
entity: GolemEntity
|
|
62
|
+
relpath: str
|
|
63
|
+
key_lines: dict[str, int]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class MapResult:
|
|
68
|
+
"""The outcome of mapping a project's bible to GOLEM entities."""
|
|
69
|
+
|
|
70
|
+
mapped: list[MappedEntity] = field(default_factory=list)
|
|
71
|
+
files_processed: int = 0
|
|
72
|
+
skipped: list[SkippedFile] = field(default_factory=list)
|
|
73
|
+
unknown_keys: list[UnknownKey] = field(default_factory=list)
|
|
74
|
+
unresolved_participants: list[UnresolvedParticipant] = field(default_factory=list)
|
|
75
|
+
# ``make_slug(name) → URI`` for every character, setting and event — the research
|
|
76
|
+
# ``bears_on``/``constrains`` targets (D11), distinct from participant ``slug_index``.
|
|
77
|
+
entity_index: dict[str, URIRef] = field(default_factory=dict)
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def entities(self) -> list[GolemEntity]:
|
|
81
|
+
return [m.entity for m in self.mapped]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class _Collisions:
|
|
85
|
+
"""Tracks ``(concept, slug) → relpath`` to detect identifier collisions (FR-014)."""
|
|
86
|
+
|
|
87
|
+
def __init__(self) -> None:
|
|
88
|
+
self._seen: dict[tuple[str, str], str] = {}
|
|
89
|
+
|
|
90
|
+
def record(self, concept: str, slug: str, relpath: str) -> None:
|
|
91
|
+
prior = self._seen.get((concept, slug))
|
|
92
|
+
if prior is not None and prior != relpath:
|
|
93
|
+
raise SlugCollisionError(slug, prior, relpath)
|
|
94
|
+
self._seen[(concept, slug)] = relpath
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@dataclass
|
|
98
|
+
class _MapContext:
|
|
99
|
+
"""The mutable state every mapping helper shares (R3).
|
|
100
|
+
|
|
101
|
+
Bundling ``project_root``, the accumulating ``result``, the collision
|
|
102
|
+
tracker, and the ``slug → URI`` index into one object keeps each helper's
|
|
103
|
+
signature small — the four used to be threaded positionally through every
|
|
104
|
+
function.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
project_root: Path
|
|
108
|
+
result: MapResult
|
|
109
|
+
collisions: _Collisions
|
|
110
|
+
slug_index: dict[str, URIRef]
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass(frozen=True)
|
|
114
|
+
class _DirSpec:
|
|
115
|
+
"""Per-concept config for a one-entity-per-file directory (characters/settings)."""
|
|
116
|
+
|
|
117
|
+
directory: Path
|
|
118
|
+
concept: str
|
|
119
|
+
builder: _Builder
|
|
120
|
+
allowed_keys: frozenset[str]
|
|
121
|
+
index: bool # whether built entities feed the participant-resolution index
|
|
122
|
+
# Whether built entities feed the research ``entity_index`` (D11) — separate from
|
|
123
|
+
# ``index`` so a setting joins it without changing participant resolution.
|
|
124
|
+
into_entity_index: bool = False
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@dataclass(frozen=True)
|
|
128
|
+
class _CollectionSpec:
|
|
129
|
+
"""Per-concept config for a single collection file (timeline/relationships)."""
|
|
130
|
+
|
|
131
|
+
path: Path
|
|
132
|
+
concept: str
|
|
133
|
+
top_keys: frozenset[str]
|
|
134
|
+
container: str
|
|
135
|
+
item_keys: frozenset[str]
|
|
136
|
+
builder: _ItemBuilder
|
|
137
|
+
# When set, the collection indexes its own items by slug so an item may
|
|
138
|
+
# reference a sibling by name (events → temporal relations). ``None`` means a
|
|
139
|
+
# collection whose items never cross-reference each other (relationships).
|
|
140
|
+
item_uri: Callable[[str], URIRef] | None = None
|
|
141
|
+
# Whether built items feed the research ``entity_index`` (events yes; rel. no — D11).
|
|
142
|
+
into_entity_index: bool = False
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@dataclass(frozen=True)
|
|
146
|
+
class _ItemContext:
|
|
147
|
+
"""Everything a collection builder needs for one item (R3)."""
|
|
148
|
+
|
|
149
|
+
ctx: _MapContext
|
|
150
|
+
item: dict[str, Any]
|
|
151
|
+
name: str
|
|
152
|
+
participants: tuple[URIRef, ...]
|
|
153
|
+
relpath: str
|
|
154
|
+
item_index: dict[str, URIRef]
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def map_bible(project_root: Path, bible_dir: Path, uri_base: str) -> MapResult:
|
|
158
|
+
"""Map every recognised bible file under ``bible_dir`` to GOLEM entities.
|
|
159
|
+
|
|
160
|
+
Characters and settings are one-entity-per-file; ``timeline.md`` /
|
|
161
|
+
``relationships.md`` are single collection files. Characters are constructed
|
|
162
|
+
first so ``events:`` / ``relationships:`` participants resolve against a
|
|
163
|
+
``slug → URI`` index in a single pass.
|
|
164
|
+
"""
|
|
165
|
+
ctx = _MapContext(
|
|
166
|
+
project_root=project_root,
|
|
167
|
+
result=MapResult(),
|
|
168
|
+
collisions=_Collisions(),
|
|
169
|
+
slug_index={},
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
_map_single_dir(
|
|
173
|
+
ctx,
|
|
174
|
+
_DirSpec(
|
|
175
|
+
directory=bible_dir / "characters",
|
|
176
|
+
concept="Character",
|
|
177
|
+
builder=lambda meta, rp: _build_character(uri_base, meta),
|
|
178
|
+
allowed_keys=CHARACTER_KEYS,
|
|
179
|
+
index=True,
|
|
180
|
+
into_entity_index=True,
|
|
181
|
+
),
|
|
182
|
+
)
|
|
183
|
+
_map_single_dir(
|
|
184
|
+
ctx,
|
|
185
|
+
_DirSpec(
|
|
186
|
+
directory=bible_dir / "settings",
|
|
187
|
+
concept="Setting",
|
|
188
|
+
builder=lambda meta, rp: Setting(uri_base=uri_base, name=_require_name(meta)),
|
|
189
|
+
allowed_keys=SETTING_KEYS,
|
|
190
|
+
index=False,
|
|
191
|
+
into_entity_index=True,
|
|
192
|
+
),
|
|
193
|
+
)
|
|
194
|
+
_map_collection(
|
|
195
|
+
ctx,
|
|
196
|
+
_CollectionSpec(
|
|
197
|
+
path=bible_dir / "timeline.md",
|
|
198
|
+
concept="NarrativeEvent",
|
|
199
|
+
top_keys=TIMELINE_TOP_KEYS,
|
|
200
|
+
container="events",
|
|
201
|
+
item_keys=EVENT_ITEM_KEYS,
|
|
202
|
+
builder=lambda ic: _build_event(uri_base, ic),
|
|
203
|
+
item_uri=lambda name: URIRef(f"{uri_base}event/{make_slug(name)}"),
|
|
204
|
+
into_entity_index=True,
|
|
205
|
+
),
|
|
206
|
+
)
|
|
207
|
+
_map_collection(
|
|
208
|
+
ctx,
|
|
209
|
+
_CollectionSpec(
|
|
210
|
+
path=bible_dir / "relationships.md",
|
|
211
|
+
concept="SocialRelationship",
|
|
212
|
+
top_keys=RELATIONSHIPS_TOP_KEYS,
|
|
213
|
+
container="relationships",
|
|
214
|
+
item_keys=ITEM_KEYS,
|
|
215
|
+
builder=lambda ic: SocialRelationship(
|
|
216
|
+
uri_base=uri_base, name=ic.name, participants=ic.participants
|
|
217
|
+
),
|
|
218
|
+
),
|
|
219
|
+
)
|
|
220
|
+
return ctx.result
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def build_provenance(mapped: MappedEntity, uri_base: str) -> Iterable[AttributeAssignment]:
|
|
224
|
+
"""Mint one ``crm:E13_Attribute_Assignment`` per derived assertion (R6, FR-011).
|
|
225
|
+
|
|
226
|
+
The identity assertion carries file-level provenance; an attribute whose
|
|
227
|
+
originating frontmatter key is locatable carries a ``relpath:line`` source.
|
|
228
|
+
"""
|
|
229
|
+
for assertion in mapped.entity.derived_assertions():
|
|
230
|
+
line = mapped.key_lines.get(assertion.source_field) if assertion.source_field else None
|
|
231
|
+
source = f"{mapped.relpath}:{line}" if line is not None else mapped.relpath
|
|
232
|
+
yield AttributeAssignment(
|
|
233
|
+
uri_base=uri_base,
|
|
234
|
+
target=assertion.target,
|
|
235
|
+
attribute=assertion.attribute,
|
|
236
|
+
source=source,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
# --- internals --------------------------------------------------------------
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _relpath(path: Path, project_root: Path) -> str:
|
|
244
|
+
return path.relative_to(project_root).as_posix()
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _require_name(metadata: dict[str, Any]) -> str:
|
|
248
|
+
name = metadata.get("name")
|
|
249
|
+
if not isinstance(name, str) or not name.strip():
|
|
250
|
+
raise InvalidFrontmatterError("", "missing or empty `name`")
|
|
251
|
+
return name
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def _record_unknown_keys(
|
|
255
|
+
ctx: _MapContext, metadata: dict[str, Any], allowed: frozenset[str], relpath: str
|
|
256
|
+
) -> None:
|
|
257
|
+
for key in metadata:
|
|
258
|
+
if key not in allowed:
|
|
259
|
+
ctx.result.unknown_keys.append(UnknownKey(path=relpath, key=key))
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def _build_character(uri_base: str, metadata: dict[str, Any]) -> Character:
|
|
263
|
+
name = _require_name(metadata)
|
|
264
|
+
born = _coerce_year(metadata.get("born"), "born")
|
|
265
|
+
died = _coerce_year(metadata.get("died"), "died")
|
|
266
|
+
features = _coerce_str_list(metadata.get("features"), "features")
|
|
267
|
+
roles = _coerce_str_list(metadata.get("narrative_roles"), "narrative_roles")
|
|
268
|
+
return Character(
|
|
269
|
+
uri_base=uri_base,
|
|
270
|
+
name=name,
|
|
271
|
+
born=born,
|
|
272
|
+
died=died,
|
|
273
|
+
features=features,
|
|
274
|
+
narrative_roles=roles,
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def _coerce_year(value: Any, field_name: str) -> int | None:
|
|
279
|
+
if value is None:
|
|
280
|
+
return None
|
|
281
|
+
if isinstance(value, bool) or not isinstance(value, int):
|
|
282
|
+
raise InvalidFrontmatterError("", f"`{field_name}` must be an integer year")
|
|
283
|
+
return value
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def _coerce_str_list(value: Any, field_name: str) -> tuple[str, ...]:
|
|
287
|
+
if value is None:
|
|
288
|
+
return ()
|
|
289
|
+
if not isinstance(value, list) or not all(isinstance(item, str) for item in value):
|
|
290
|
+
raise InvalidFrontmatterError("", f"`{field_name}` must be a list of strings")
|
|
291
|
+
return tuple(value)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def _map_single_dir(ctx: _MapContext, spec: _DirSpec) -> None:
|
|
295
|
+
if not spec.directory.is_dir():
|
|
296
|
+
return
|
|
297
|
+
for path in sorted(spec.directory.glob("*.md")):
|
|
298
|
+
relpath = _relpath(path, ctx.project_root)
|
|
299
|
+
ctx.result.files_processed += 1
|
|
300
|
+
frontmatter = _safe_parse(ctx, path, relpath)
|
|
301
|
+
if frontmatter is None:
|
|
302
|
+
continue
|
|
303
|
+
try:
|
|
304
|
+
entity = spec.builder(frontmatter.metadata, relpath)
|
|
305
|
+
ctx.collisions.record(spec.concept, _slug_of(entity), relpath)
|
|
306
|
+
except InvalidFrontmatterError as exc:
|
|
307
|
+
ctx.result.skipped.append(SkippedFile(path=relpath, reason=exc.reason))
|
|
308
|
+
continue
|
|
309
|
+
except EmptySlugError as exc:
|
|
310
|
+
ctx.result.skipped.append(SkippedFile(path=relpath, reason=exc.message))
|
|
311
|
+
continue
|
|
312
|
+
# Only record soft warnings once the file actually produced an entity, so a
|
|
313
|
+
# subsequently skipped file never contributes `unknown_keys` (report stays
|
|
314
|
+
# consistent: a skipped file shows up only under `skipped`).
|
|
315
|
+
_record_unknown_keys(ctx, frontmatter.metadata, spec.allowed_keys, relpath)
|
|
316
|
+
if spec.index:
|
|
317
|
+
ctx.slug_index[_slug_of(entity)] = entity.uri
|
|
318
|
+
if spec.into_entity_index:
|
|
319
|
+
ctx.result.entity_index[_slug_of(entity)] = entity.uri
|
|
320
|
+
ctx.result.mapped.append(
|
|
321
|
+
MappedEntity(entity=entity, relpath=relpath, key_lines=frontmatter.key_lines)
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def _map_collection(ctx: _MapContext, spec: _CollectionSpec) -> None:
|
|
326
|
+
if not spec.path.is_file():
|
|
327
|
+
return
|
|
328
|
+
relpath = _relpath(spec.path, ctx.project_root)
|
|
329
|
+
ctx.result.files_processed += 1
|
|
330
|
+
frontmatter = _safe_parse(ctx, spec.path, relpath)
|
|
331
|
+
if frontmatter is None:
|
|
332
|
+
return
|
|
333
|
+
_record_unknown_keys(ctx, frontmatter.metadata, spec.top_keys, relpath)
|
|
334
|
+
items = frontmatter.metadata.get(spec.container, [])
|
|
335
|
+
if not isinstance(items, list):
|
|
336
|
+
ctx.result.skipped.append(
|
|
337
|
+
SkippedFile(path=relpath, reason=f"`{spec.container}` must be a list")
|
|
338
|
+
)
|
|
339
|
+
return
|
|
340
|
+
item_index = _build_item_index(spec, items)
|
|
341
|
+
for item in items:
|
|
342
|
+
if not isinstance(item, dict):
|
|
343
|
+
ctx.result.skipped.append(
|
|
344
|
+
SkippedFile(path=relpath, reason=f"each `{spec.container}` item must be a mapping")
|
|
345
|
+
)
|
|
346
|
+
continue
|
|
347
|
+
_map_collection_item(ctx, spec, item, frontmatter, item_index)
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def _build_item_index(spec: _CollectionSpec, items: list[Any]) -> dict[str, URIRef]:
|
|
351
|
+
"""For a self-indexing collection, map each well-named item's slug → its URI.
|
|
352
|
+
|
|
353
|
+
Lets an item reference a sibling by name (events → temporal relations) without
|
|
354
|
+
depending on declaration order. Empty for collections that don't self-reference.
|
|
355
|
+
"""
|
|
356
|
+
if spec.item_uri is None:
|
|
357
|
+
return {}
|
|
358
|
+
index: dict[str, URIRef] = {}
|
|
359
|
+
for item in items:
|
|
360
|
+
if not isinstance(item, dict):
|
|
361
|
+
continue
|
|
362
|
+
name = item.get("name")
|
|
363
|
+
if isinstance(name, str) and name.strip():
|
|
364
|
+
try:
|
|
365
|
+
index[make_slug(name)] = spec.item_uri(name)
|
|
366
|
+
except EmptySlugError:
|
|
367
|
+
continue
|
|
368
|
+
return index
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def _map_collection_item(
|
|
372
|
+
ctx: _MapContext,
|
|
373
|
+
spec: _CollectionSpec,
|
|
374
|
+
item: dict[str, Any],
|
|
375
|
+
frontmatter: Frontmatter,
|
|
376
|
+
item_index: dict[str, URIRef],
|
|
377
|
+
) -> None:
|
|
378
|
+
relpath = _relpath(spec.path, ctx.project_root)
|
|
379
|
+
name = item.get("name")
|
|
380
|
+
if not isinstance(name, str) or not name.strip():
|
|
381
|
+
ctx.result.skipped.append(
|
|
382
|
+
SkippedFile(path=relpath, reason=f"a `{spec.container}` item is missing `name`")
|
|
383
|
+
)
|
|
384
|
+
return
|
|
385
|
+
participants = _resolve_refs(ctx, item.get("participants"), ctx.slug_index, name, relpath)
|
|
386
|
+
ictx = _ItemContext(
|
|
387
|
+
ctx=ctx,
|
|
388
|
+
item=item,
|
|
389
|
+
name=name,
|
|
390
|
+
participants=participants,
|
|
391
|
+
relpath=relpath,
|
|
392
|
+
item_index=item_index,
|
|
393
|
+
)
|
|
394
|
+
try:
|
|
395
|
+
entity = spec.builder(ictx)
|
|
396
|
+
ctx.collisions.record(spec.concept, make_slug(name), relpath)
|
|
397
|
+
except EmptySlugError as exc:
|
|
398
|
+
ctx.result.skipped.append(SkippedFile(path=relpath, reason=exc.message))
|
|
399
|
+
return
|
|
400
|
+
except InvalidFrontmatterError as exc:
|
|
401
|
+
ctx.result.skipped.append(SkippedFile(path=relpath, reason=exc.reason))
|
|
402
|
+
return
|
|
403
|
+
# Record soft warnings only after the item produced an entity (see _map_single_dir).
|
|
404
|
+
_record_unknown_keys(ctx, item, spec.item_keys, relpath)
|
|
405
|
+
if spec.into_entity_index:
|
|
406
|
+
ctx.result.entity_index[make_slug(name)] = entity.uri
|
|
407
|
+
ctx.result.mapped.append(
|
|
408
|
+
MappedEntity(entity=entity, relpath=relpath, key_lines=frontmatter.key_lines)
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def _build_event(uri_base: str, ic: _ItemContext) -> NarrativeEvent:
|
|
413
|
+
"""Construct a ``NarrativeEvent`` from a timeline item: interval + relations."""
|
|
414
|
+
begin, end = _resolve_interval(ic)
|
|
415
|
+
relations = {
|
|
416
|
+
key: _resolve_refs(ic.ctx, ic.item.get(key), ic.item_index, ic.name, ic.relpath)
|
|
417
|
+
for key in RELATION_KEYS
|
|
418
|
+
}
|
|
419
|
+
return NarrativeEvent(
|
|
420
|
+
uri_base=uri_base,
|
|
421
|
+
name=ic.name,
|
|
422
|
+
participants=ic.participants,
|
|
423
|
+
begin=begin,
|
|
424
|
+
end=end,
|
|
425
|
+
**relations,
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def _resolve_interval(ic: _ItemContext) -> tuple[int | None, int | None]:
|
|
430
|
+
"""Coerce ``begin`` / ``end`` / ``date`` to int years, enforcing exclusivity.
|
|
431
|
+
|
|
432
|
+
``date`` is a single-year shorthand (``begin == end``). Supplying ``date``
|
|
433
|
+
alongside ``begin``/``end`` is a soft warning (``date`` ignored), like an
|
|
434
|
+
unknown key — never an abort.
|
|
435
|
+
"""
|
|
436
|
+
begin = _coerce_year(ic.item.get("begin"), "begin")
|
|
437
|
+
end = _coerce_year(ic.item.get("end"), "end")
|
|
438
|
+
date = _coerce_year(ic.item.get("date"), "date")
|
|
439
|
+
if date is not None:
|
|
440
|
+
if begin is not None or end is not None:
|
|
441
|
+
# Mutually exclusive: keep begin/end, drop date, flag it softly.
|
|
442
|
+
ic.ctx.result.unknown_keys.append(UnknownKey(path=ic.relpath, key="date"))
|
|
443
|
+
else:
|
|
444
|
+
return date, date
|
|
445
|
+
return begin, end
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def _resolve_refs(
|
|
449
|
+
ctx: _MapContext,
|
|
450
|
+
raw: Any,
|
|
451
|
+
index: dict[str, URIRef],
|
|
452
|
+
entity_name: str,
|
|
453
|
+
relpath: str,
|
|
454
|
+
) -> tuple[URIRef, ...]:
|
|
455
|
+
"""Resolve a list of names against ``index`` (characters or sibling events).
|
|
456
|
+
|
|
457
|
+
A non-list value, or a name absent from the index, is surfaced as an
|
|
458
|
+
``UnresolvedParticipant`` soft warning (no abort); the owning entity is built.
|
|
459
|
+
"""
|
|
460
|
+
if raw is None:
|
|
461
|
+
return ()
|
|
462
|
+
if not isinstance(raw, list):
|
|
463
|
+
ctx.result.unresolved_participants.append(
|
|
464
|
+
UnresolvedParticipant(path=relpath, entity=entity_name, name=str(raw))
|
|
465
|
+
)
|
|
466
|
+
return ()
|
|
467
|
+
resolved: list[URIRef] = []
|
|
468
|
+
for ref in raw:
|
|
469
|
+
if not isinstance(ref, str):
|
|
470
|
+
continue
|
|
471
|
+
uri = index.get(make_slug(ref))
|
|
472
|
+
if uri is None:
|
|
473
|
+
ctx.result.unresolved_participants.append(
|
|
474
|
+
UnresolvedParticipant(path=relpath, entity=entity_name, name=ref)
|
|
475
|
+
)
|
|
476
|
+
continue
|
|
477
|
+
resolved.append(uri)
|
|
478
|
+
return tuple(resolved)
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
def _safe_parse(ctx: _MapContext, path: Path, relpath: str) -> Frontmatter | None:
|
|
482
|
+
try:
|
|
483
|
+
text = path.read_text(encoding="utf-8")
|
|
484
|
+
except (OSError, UnicodeDecodeError) as exc:
|
|
485
|
+
# A non-UTF-8 or unreadable file is "unusable frontmatter" (FR-013): skip
|
|
486
|
+
# it and keep building, exactly like a YAML error — never abort the build.
|
|
487
|
+
ctx.result.skipped.append(SkippedFile(path=relpath, reason=f"unreadable file: {exc}"))
|
|
488
|
+
return None
|
|
489
|
+
try:
|
|
490
|
+
return parse_frontmatter(text)
|
|
491
|
+
except yaml.YAMLError as exc:
|
|
492
|
+
ctx.result.skipped.append(
|
|
493
|
+
SkippedFile(path=relpath, reason=f"malformed YAML frontmatter: {exc}")
|
|
494
|
+
)
|
|
495
|
+
return None
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def _slug_of(entity: GolemEntity) -> str:
|
|
499
|
+
slug = getattr(entity, "slug", None)
|
|
500
|
+
return slug if isinstance(slug, str) else str(entity.uri)
|
bookwright/io/errors.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Exception hierarchy for plain-text → model parsing (the ``io`` package).
|
|
2
|
+
|
|
3
|
+
Every concrete error inherits the canonical ``--json`` envelope from the shared
|
|
4
|
+
``BookwrightError`` base (Principle IX, data-model § 6); this module declares only
|
|
5
|
+
each error's ``code`` and ``details``.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from bookwright.errors import BookwrightError
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class IOError_(BookwrightError):
|
|
14
|
+
"""Base for every failure mode the ``bookwright.io`` package owns.
|
|
15
|
+
|
|
16
|
+
Named with a trailing underscore so it never shadows the builtin ``IOError``.
|
|
17
|
+
Abstract: declares no ``code`` and is never serialized directly.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ProjectNotFoundError(IOError_):
|
|
22
|
+
"""No ``manifest.toml`` was found in the cwd or any ancestor (R8)."""
|
|
23
|
+
|
|
24
|
+
code = "not_a_project"
|
|
25
|
+
|
|
26
|
+
def __init__(self, start: str) -> None:
|
|
27
|
+
self.start = start
|
|
28
|
+
super().__init__(
|
|
29
|
+
f"no manifest.toml in {start} or any parent directory",
|
|
30
|
+
{"start": start},
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class MissingDirectoryError(IOError_):
|
|
35
|
+
"""A required content directory (``bible/`` or ``manuscript/``) is absent (FR-012)."""
|
|
36
|
+
|
|
37
|
+
code = "missing_directory"
|
|
38
|
+
|
|
39
|
+
def __init__(self, name: str, path: str) -> None:
|
|
40
|
+
self.name = name
|
|
41
|
+
self.path = path
|
|
42
|
+
super().__init__(
|
|
43
|
+
f"required directory {name!r} is missing at {path}",
|
|
44
|
+
{"name": name, "path": path},
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class InvalidFrontmatterError(IOError_):
|
|
49
|
+
"""A single source file's frontmatter is unusable (FR-013).
|
|
50
|
+
|
|
51
|
+
Per-file and collected: the build skips the file, records ``(path, reason)``,
|
|
52
|
+
and continues — it never aborts the whole build.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
code = "invalid_frontmatter"
|
|
56
|
+
|
|
57
|
+
def __init__(self, path: str, reason: str) -> None:
|
|
58
|
+
self.path = path
|
|
59
|
+
self.reason = reason
|
|
60
|
+
super().__init__(
|
|
61
|
+
f"invalid frontmatter in {path}: {reason}",
|
|
62
|
+
{"path": path, "reason": reason},
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class ResearchError(IOError_):
|
|
67
|
+
"""A ``bible/research/`` file is structurally invalid — fatal, no graph (D7).
|
|
68
|
+
|
|
69
|
+
Unlike the bible mapper, which soft-skips an unusable file, research is
|
|
70
|
+
validated strictly: an out-of-vocabulary ``type``/``reliability``, a missing
|
|
71
|
+
required Source facet, a non-open finding lacking ``claim``/``sources``, an
|
|
72
|
+
``anchors[].promotes`` naming an unknown finding, a translation-rule violation,
|
|
73
|
+
or malformed YAML aborts the build naming the offending file and value
|
|
74
|
+
(FR-016). ``value`` carries the offending key or value (``None`` when the fault
|
|
75
|
+
is structural rather than value-level).
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
code = "invalid_research"
|
|
79
|
+
|
|
80
|
+
def __init__(self, relpath: str, message: str, value: str | None = None) -> None:
|
|
81
|
+
self.relpath = relpath
|
|
82
|
+
self.value = value
|
|
83
|
+
super().__init__(message, {"relpath": relpath, "value": value})
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class SlugCollisionError(IOError_):
|
|
87
|
+
"""Two entities of one concept share an identifier (FR-014) — fatal, no graph."""
|
|
88
|
+
|
|
89
|
+
code = "slug_collision"
|
|
90
|
+
|
|
91
|
+
def __init__(self, identifier: str, first_path: str, second_path: str) -> None:
|
|
92
|
+
self.identifier = identifier
|
|
93
|
+
self.first_path = first_path
|
|
94
|
+
self.second_path = second_path
|
|
95
|
+
super().__init__(
|
|
96
|
+
f"identifier {identifier!r} is claimed by both {first_path} and {second_path}",
|
|
97
|
+
{"identifier": identifier, "sources": [first_path, second_path]},
|
|
98
|
+
)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Read a Markdown file's leading YAML frontmatter fence (data-model § 2, R3).
|
|
2
|
+
|
|
3
|
+
A thin split-then-``yaml.safe_load`` reader. It records each top-level key's
|
|
4
|
+
1-based source line so the bible mapper can resolve a ``file:line`` provenance
|
|
5
|
+
locator (R6). Malformed YAML surfaces as ``yaml.YAMLError`` for the caller to
|
|
6
|
+
wrap in :class:`~bookwright.io.errors.InvalidFrontmatterError`.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import re
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
import yaml
|
|
16
|
+
|
|
17
|
+
_FENCE = "---"
|
|
18
|
+
_TOP_LEVEL_KEY = re.compile(r"^([A-Za-z_][\w-]*)\s*:")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True)
|
|
22
|
+
class Frontmatter:
|
|
23
|
+
"""The parsed result of one Markdown file's frontmatter fence."""
|
|
24
|
+
|
|
25
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
26
|
+
body: str = ""
|
|
27
|
+
key_lines: dict[str, int] = field(default_factory=dict)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def parse_frontmatter(text: str) -> Frontmatter:
|
|
31
|
+
"""Split a leading ``---`` … ``---`` fence and parse the YAML block.
|
|
32
|
+
|
|
33
|
+
A file with no opening fence (or no closing fence) yields ``{}`` metadata and
|
|
34
|
+
the whole text as the body. Each top-level YAML key maps to its 1-based line
|
|
35
|
+
in the original file via :attr:`Frontmatter.key_lines`.
|
|
36
|
+
"""
|
|
37
|
+
lines = text.splitlines()
|
|
38
|
+
if not lines or lines[0].strip() != _FENCE:
|
|
39
|
+
return Frontmatter(metadata={}, body=text, key_lines={})
|
|
40
|
+
|
|
41
|
+
closing: int | None = None
|
|
42
|
+
for index in range(1, len(lines)):
|
|
43
|
+
if lines[index].strip() == _FENCE:
|
|
44
|
+
closing = index
|
|
45
|
+
break
|
|
46
|
+
if closing is None:
|
|
47
|
+
return Frontmatter(metadata={}, body=text, key_lines={})
|
|
48
|
+
|
|
49
|
+
block_lines = lines[1:closing]
|
|
50
|
+
loaded = yaml.safe_load("\n".join(block_lines))
|
|
51
|
+
metadata: dict[str, Any] = loaded if isinstance(loaded, dict) else {}
|
|
52
|
+
|
|
53
|
+
key_lines: dict[str, int] = {}
|
|
54
|
+
for offset, line in enumerate(block_lines):
|
|
55
|
+
match = _TOP_LEVEL_KEY.match(line)
|
|
56
|
+
if match:
|
|
57
|
+
# block_lines[0] is the file's second line (line 1 is the fence).
|
|
58
|
+
key_lines.setdefault(match.group(1), offset + 2)
|
|
59
|
+
|
|
60
|
+
body = "\n".join(lines[closing + 1 :])
|
|
61
|
+
return Frontmatter(metadata=metadata, body=body, key_lines=key_lines)
|