@glw907/cairn-cms 0.57.1 → 0.59.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/CHANGELOG.md +66 -0
  2. package/dist/components/CairnMediaLibrary.svelte +2070 -26
  3. package/dist/components/CairnMediaLibrary.svelte.d.ts +10 -2
  4. package/dist/components/admin-icons.d.ts +5 -0
  5. package/dist/components/admin-icons.js +5 -0
  6. package/dist/components/cairn-admin.css +402 -3
  7. package/dist/content/media-rewrite.d.ts +65 -0
  8. package/dist/content/media-rewrite.js +442 -0
  9. package/dist/log/events.d.ts +1 -1
  10. package/dist/media/bulk-delete-plan.d.ts +24 -0
  11. package/dist/media/bulk-delete-plan.js +25 -0
  12. package/dist/media/orphan-scan.d.ts +37 -0
  13. package/dist/media/orphan-scan.js +42 -0
  14. package/dist/media/reconcile.d.ts +3 -0
  15. package/dist/media/reconcile.js +3 -2
  16. package/dist/media/rewrite-plan.d.ts +65 -0
  17. package/dist/media/rewrite-plan.js +61 -0
  18. package/dist/sveltekit/cairn-admin.d.ts +8 -0
  19. package/dist/sveltekit/cairn-admin.js +15 -0
  20. package/dist/sveltekit/content-routes.d.ts +118 -4
  21. package/dist/sveltekit/content-routes.js +572 -1
  22. package/dist/sveltekit/index.d.ts +1 -1
  23. package/package.json +1 -1
  24. package/src/lib/components/CairnMediaLibrary.svelte +2070 -26
  25. package/src/lib/components/admin-icons.ts +5 -0
  26. package/src/lib/content/media-rewrite.ts +555 -0
  27. package/src/lib/log/events.ts +6 -1
  28. package/src/lib/media/bulk-delete-plan.ts +54 -0
  29. package/src/lib/media/orphan-scan.ts +74 -0
  30. package/src/lib/media/reconcile.ts +3 -2
  31. package/src/lib/media/rewrite-plan.ts +122 -0
  32. package/src/lib/sveltekit/cairn-admin.ts +15 -0
  33. package/src/lib/sveltekit/content-routes.ts +722 -5
  34. package/src/lib/sveltekit/index.ts +3 -0
@@ -0,0 +1,65 @@
1
+ /** One repointed reference: which surface it lived on, the old token as written, and the new token. */
2
+ export interface RepointPlacement {
3
+ kind: 'body' | 'figure' | 'hero';
4
+ /** The old `media:` token exactly as it was written in the source. */
5
+ before: string;
6
+ /** The new asset's canonical `media:` token (the same value for every placement). */
7
+ after: string;
8
+ }
9
+ /** The rewritten markdown plus the per-placement diff, in document order (hero first, then body). */
10
+ export interface RepointResult {
11
+ markdown: string;
12
+ placements: RepointPlacement[];
13
+ }
14
+ /**
15
+ * Rewrite every reference to `oldHash` in one entry's raw markdown to `newToken`, and return the
16
+ * rewritten markdown plus a per-placement diff. Only an image-field `src:` line is rewritten in the
17
+ * frontmatter: the image-like keys are read via gray-matter and each key's `src:` line is located
18
+ * structurally within its own block, so a `media:` token that merely appears in a plain-text value (a
19
+ * `title:` or `description:`) is left untouched, matching extractMediaRefs. Body and figure images are
20
+ * matched by mdast offset over the body slice. The output is byte-for-byte identical to the input
21
+ * apart from the replaced token substrings, so the rest of the entry (alt text, captions, the
22
+ * `:::figure` fences, every other frontmatter key) is preserved exactly. A non-matching hash returns
23
+ * the markdown unchanged with an empty placement list; a malformed `media:` reference is left
24
+ * untouched. Pure and node-safe.
25
+ */
26
+ export declare function repointMediaRef(markdown: string, oldHash: string, newToken: string): RepointResult;
27
+ /** Which alt bucket a placement falls in: an empty alt always gets filled, a non-empty (custom) alt is
28
+ * reported and only overwritten on opt-in, and a decorative hero is never touched. */
29
+ export type AltBucket = 'will-fill' | 'customized' | 'decorative-skipped';
30
+ /** One placement of the target hash and what the alt-fill does to it: which surface it lives on, its
31
+ * bucket, the existing alt, and the alt after the transform (unchanged for a customized alt left as
32
+ * is and for a decorative hero). */
33
+ export interface AltPlacement {
34
+ kind: 'body' | 'figure' | 'hero';
35
+ bucket: AltBucket;
36
+ /** The existing alt, empty string when there is none. */
37
+ before: string;
38
+ /** The alt after the transform; equals `before` when nothing changed. */
39
+ after: string;
40
+ }
41
+ /** The alt-filled markdown plus the per-placement diff, in document order (hero first, then body). */
42
+ export interface AltFillResult {
43
+ markdown: string;
44
+ placements: AltPlacement[];
45
+ }
46
+ /**
47
+ * Set the alt at each placement of `hash` in one entry's raw markdown, and return the rewritten
48
+ * markdown plus a per-placement diff. An empty alt is filled with `defaultAlt` (bucket will-fill). A
49
+ * non-empty alt is overwritten with `defaultAlt` only when `opts.overwrite` is true (bucket
50
+ * customized; otherwise left unchanged but still reported, so the preview can show it and offer the
51
+ * opt-in). A frontmatter hero with `decorative: true` is bucket decorative-skipped and never changed.
52
+ * A body or figure image has no decorative slot, so its empty alt is always will-fill.
53
+ *
54
+ * The output is byte-for-byte identical to the input apart from the alt text it actually changes. The
55
+ * hero alt is edited inside the frontmatter block by string splice (no gray-matter serialize round
56
+ * trip, which would reformat the YAML); the structure read uses gray-matter only to classify buckets
57
+ * and read the hero alt and decorative flag. A body alt is written escaped (the way insertImage
58
+ * escapes it) so a `]` in the alt cannot break the image; a hero alt is written as a JSON-quoted YAML
59
+ * scalar so a colon, a quote, or an empty value is robust. Placements read in document order (hero
60
+ * first, then body in source order). A non-matching hash returns the markdown unchanged with an empty
61
+ * placement list. Pure and node-safe.
62
+ */
63
+ export declare function fillAltForHash(markdown: string, hash: string, defaultAlt: string, opts: {
64
+ overwrite: boolean;
65
+ }): AltFillResult;
@@ -0,0 +1,442 @@
1
+ // cairn-cms: the replace-in-place rewrite transform. Given one entry's raw markdown and an old
2
+ // content-hash, it rewrites every reference to that hash (a body image, a figure-wrapped image, or
3
+ // the frontmatter hero image.src) to a new asset's canonical `media:` token, and returns a per
4
+ // placement diff. This is the heart of the media-library "replace" action: the same bytes pointed
5
+ // at a new asset, with the surrounding entry left exact.
6
+ //
7
+ // The output is byte-for-byte identical to the input except for the `media:` token substrings that
8
+ // are replaced. The transform never round-trips through gray-matter or a markdown serializer (those
9
+ // reformat YAML and are not byte stable); it splices strings by source offset. The match keys on the
10
+ // parsed hash, the immutable truth, never the cosmetic slug, so a bare `media:<hash>` and a
11
+ // `media:<slug>.<hash>` for the same bytes both repoint. A malformed or non-matching token is left
12
+ // untouched.
13
+ //
14
+ // The body arm parses with the same figure-aware pipeline the render and Edit-block transforms use
15
+ // (remark-parse + gfm + directive), so a `media:` token inside a code span or fence is not an image
16
+ // node and is correctly never matched, matching extractMediaRefs. It also lets the arm classify an
17
+ // image inside a `:::figure` as a 'figure' placement.
18
+ import matter from 'gray-matter';
19
+ import { unified } from 'unified';
20
+ import remarkParse from 'remark-parse';
21
+ import remarkGfm from 'remark-gfm';
22
+ import remarkDirective from 'remark-directive';
23
+ import { visit } from 'unist-util-visit';
24
+ import { parseMediaToken } from '../media/reference.js';
25
+ import { escapeLinkText } from './links.js';
26
+ /** Drop any span that overlaps a span already kept, in source order. A final safety net so two
27
+ * splices can never target the same or overlapping bytes and clobber each other into a corrupt
28
+ * result, no matter how the locating arms behaved. A pure-insert span (`start === end`) overlaps
29
+ * another span only when it sits strictly inside it, so adjacent inserts and edits are kept. */
30
+ function dropOverlappingEdits(edits) {
31
+ const kept = [];
32
+ for (const e of edits) {
33
+ const clashes = kept.some((k) => e.start < k.end && k.start < e.end);
34
+ if (!clashes)
35
+ kept.push(e);
36
+ }
37
+ return kept;
38
+ }
39
+ /** A locating scan for candidate `media:` token substrings. Deliberately broad (it accepts
40
+ * uppercase and other out-of-grammar characters) so a malformed token is still found and then
41
+ * rejected by parseMediaToken, never silently skipped by the locator. The character class stops at
42
+ * whitespace, a quote, or any YAML or markdown delimiter, so a frontmatter value or an image
43
+ * destination ends the candidate. */
44
+ const MEDIA_TOKEN_SCAN = /media:[A-Za-z0-9._-]+/g;
45
+ /** Split a leading frontmatter block off the markdown. `fmBlock` is the `---` fenced block including
46
+ * both fences and the trailing newline (empty when there is none); `body` is everything after it.
47
+ * The block leads the document, so a frontmatter offset is already absolute and a body offset needs
48
+ * `fmBlock.length` added. Shared by every arm so they agree on the boundary. */
49
+ function splitFrontmatter(markdown) {
50
+ const m = markdown.match(/^---\r?\n[\s\S]*?\r?\n---\r?\n?/);
51
+ const fmBlock = m ? m[0] : '';
52
+ return { fmBlock, body: markdown.slice(fmBlock.length) };
53
+ }
54
+ /** Parse a doc with the figure-aware pipeline, so the body arm agrees with what remarkFigure renders
55
+ * and can see the enclosing `:::figure` container. Mirrors parseFigureDoc in markdown-format.ts. */
56
+ function parseFigureDoc(doc) {
57
+ return unified().use(remarkParse).use(remarkGfm).use(remarkDirective).parse(doc);
58
+ }
59
+ /** Whether `target` sits inside a `figure`-named container directive. Walks the tree to find the
60
+ * ancestor, since unist-util-visit's per-call ancestors are not retained across the traversal.
61
+ * Mirrors enclosingFigure in markdown-format.ts, reduced to a boolean. */
62
+ function inFigure(tree, target) {
63
+ let found = false;
64
+ visit(tree, 'containerDirective', (dir) => {
65
+ if (dir.name !== 'figure')
66
+ return;
67
+ visit(dir, 'image', (img) => {
68
+ if (img === target)
69
+ found = true;
70
+ });
71
+ });
72
+ return found;
73
+ }
74
+ /** Split fmBlock into lines once, so the locator helpers walk a shared structure instead of
75
+ * re-scanning the block per call. */
76
+ function fmLines(fmBlock) {
77
+ const lines = [];
78
+ let pos = 0;
79
+ while (pos <= fmBlock.length) {
80
+ const nl = fmBlock.indexOf('\n', pos);
81
+ const end = nl === -1 ? fmBlock.length : nl;
82
+ lines.push({ start: pos, end });
83
+ if (nl === -1)
84
+ break;
85
+ pos = nl + 1;
86
+ }
87
+ return lines;
88
+ }
89
+ /** The inclusive line-index range `[lo, hi]` of the block-style mapping a top-level key opens: the
90
+ * line `^<key>:` at indent 0 through the last line before the next top-level key (or the document
91
+ * end). A flow-style value (`key: { ... }` all on one line) yields a single-line range. Returns null
92
+ * when the key has no top-level line, which a malformed or non-canonical block can cause. Scoping the
93
+ * per-key search to this range is what lets two image fields that share one hash, or an image field
94
+ * whose hash also appears in a sibling text value, resolve to distinct, correct spans. */
95
+ function frontmatterKeyRange(lines, fmBlock, key) {
96
+ const opener = new RegExp(`^${escapeForRegExp(key)}:`);
97
+ const topLevelKey = /^[^\s#][^:]*:/;
98
+ const isBoundary = (i) => {
99
+ const text = fmBlock.slice(lines[i].start, lines[i].end);
100
+ // A new top-level key or the closing `---` fence ends the current key's block.
101
+ return topLevelKey.test(text) || text === '---';
102
+ };
103
+ let lo = -1;
104
+ for (let i = 1; i < lines.length - 1; i += 1) {
105
+ // Skip the leading `---` fence (line 0) and the trailing empty line after the closing fence.
106
+ if (opener.test(fmBlock.slice(lines[i].start, lines[i].end))) {
107
+ lo = i;
108
+ break;
109
+ }
110
+ }
111
+ if (lo === -1)
112
+ return null;
113
+ let hi = lo;
114
+ for (let i = lo + 1; i < lines.length - 1; i += 1) {
115
+ if (isBoundary(i))
116
+ break;
117
+ hi = i;
118
+ }
119
+ return [lo, hi];
120
+ }
121
+ /** Find the block-style `src:` line within `[lo, hi]` whose value token parses to `hash`. The token
122
+ * is located by the broad scan and validated through parseMediaToken (matching on hash), so a
123
+ * malformed token is found then rejected. Returns null for a flow-style value (no own `src:` line),
124
+ * which leaves that shape unanchorable rather than splicing a guessed span. */
125
+ function findSrcLineInRange(lines, fmBlock, range, hash) {
126
+ const srcKeyRe = /^(\s*)src:[ \t]?/;
127
+ for (let i = range[0]; i <= range[1]; i += 1) {
128
+ const lineText = fmBlock.slice(lines[i].start, lines[i].end);
129
+ const keyMatch = srcKeyRe.exec(lineText);
130
+ if (!keyMatch)
131
+ continue;
132
+ const valueStart = lines[i].start + keyMatch[0].length;
133
+ const valueText = fmBlock.slice(valueStart, lines[i].end);
134
+ for (const m of valueText.matchAll(MEDIA_TOKEN_SCAN)) {
135
+ const token = m[0];
136
+ const ref = parseMediaToken(token);
137
+ if (!ref || ref.hash !== hash)
138
+ continue;
139
+ const tokenStart = valueStart + m.index;
140
+ return {
141
+ lineStart: lines[i].start,
142
+ lineEnd: lines[i].end,
143
+ indent: keyMatch[1],
144
+ tokenStart,
145
+ tokenEnd: tokenStart + token.length,
146
+ token,
147
+ };
148
+ }
149
+ }
150
+ return null;
151
+ }
152
+ /** The image-like top-level frontmatter keys whose `src` parses to `hash`, in source order. A key is
153
+ * image-like when its value is an object carrying a string `src`; this is the same shape
154
+ * extractMediaRefs reads, so a token in a plain-text value (a `title:`/`note:`) is never treated as a
155
+ * reference. The bucket-classifying data comes from gray-matter (which handles every quoting form);
156
+ * the byte edit is located structurally by the caller, keyed back to this key name. */
157
+ function imageFieldKeys(data, hash) {
158
+ const out = [];
159
+ for (const [key, value] of Object.entries(data)) {
160
+ if (!value || typeof value !== 'object' || Array.isArray(value))
161
+ continue;
162
+ const obj = value;
163
+ if (typeof obj.src !== 'string')
164
+ continue;
165
+ const ref = parseMediaToken(obj.src);
166
+ if (!ref || ref.hash !== hash)
167
+ continue;
168
+ out.push({ key, obj });
169
+ }
170
+ return out;
171
+ }
172
+ /** Collect hero src-token edits inside the frontmatter block. Only an image-field `src:` line is
173
+ * rewritten: the structure is read via gray-matter (image-like keys), and each key's `src:` line is
174
+ * located structurally within that key's block. A `media:` token sitting in a plain-text value (a
175
+ * `title:` or `description:`) is on no `src:` line, so it is left untouched, keeping the byte-exact
176
+ * contract and agreeing with extractMediaRefs. A flow-style hero has no `src:` line and is skipped. */
177
+ function frontmatterEdits(markdown, fmBlock, oldHash) {
178
+ if (fmBlock === '')
179
+ return [];
180
+ const data = matter(markdown).data;
181
+ const lines = fmLines(fmBlock);
182
+ const edits = [];
183
+ for (const { key } of imageFieldKeys(data, oldHash)) {
184
+ const range = frontmatterKeyRange(lines, fmBlock, key);
185
+ if (!range)
186
+ continue;
187
+ const src = findSrcLineInRange(lines, fmBlock, range, oldHash);
188
+ if (!src)
189
+ continue;
190
+ edits.push({ start: src.tokenStart, end: src.tokenEnd, before: src.token, kind: 'hero' });
191
+ }
192
+ return edits;
193
+ }
194
+ /** Locate the exact `media:` token substring inside one image node's source span. The destination
195
+ * begins at the `](` that follows the alt text, so the search starts there to avoid a false match on
196
+ * a `media:`-like string inside the alt. Returns null when the token cannot be located, which leaves
197
+ * the image untouched rather than splicing a guessed range. */
198
+ function locateImageToken(span, url) {
199
+ const destStart = span.indexOf('](');
200
+ const from = destStart === -1 ? 0 : destStart + 2;
201
+ const at = span.indexOf(url, from);
202
+ if (at === -1)
203
+ return null;
204
+ return { start: at, end: at + url.length };
205
+ }
206
+ /** Find every body image whose url parses to `hash`, in source order, with absolute offsets. Parses
207
+ * with the figure-aware pipeline, so a `media:` token inside a code span or fence is not an image
208
+ * node and is correctly skipped, matching extractMediaRefs. */
209
+ function matchedBodyImages(body, blockLength, hash) {
210
+ const tree = parseFigureDoc(body);
211
+ const hits = [];
212
+ visit(tree, 'image', (node) => {
213
+ const ref = parseMediaToken(node.url);
214
+ if (!ref || ref.hash !== hash)
215
+ return;
216
+ const from = node.position?.start?.offset;
217
+ const to = node.position?.end?.offset;
218
+ if (from == null || to == null)
219
+ return;
220
+ hits.push({
221
+ node,
222
+ nodeFrom: blockLength + from,
223
+ nodeTo: blockLength + to,
224
+ kind: inFigure(tree, node) ? 'figure' : 'body',
225
+ });
226
+ });
227
+ return hits;
228
+ }
229
+ /** Collect body edits over the body slice. Each matching image is located within its own source span
230
+ * and recorded with an absolute offset. The kind is 'figure' when the image is inside a `:::figure`,
231
+ * else 'body'. */
232
+ function bodyEdits(body, blockLength, oldHash) {
233
+ const edits = [];
234
+ for (const hit of matchedBodyImages(body, blockLength, oldHash)) {
235
+ const span = body.slice(hit.nodeFrom - blockLength, hit.nodeTo - blockLength);
236
+ const loc = locateImageToken(span, hit.node.url);
237
+ if (!loc)
238
+ continue;
239
+ const start = hit.nodeFrom + loc.start;
240
+ const end = hit.nodeFrom + loc.end;
241
+ edits.push({ start, end, before: hit.node.url, kind: hit.kind });
242
+ }
243
+ return edits;
244
+ }
245
+ /**
246
+ * Rewrite every reference to `oldHash` in one entry's raw markdown to `newToken`, and return the
247
+ * rewritten markdown plus a per-placement diff. Only an image-field `src:` line is rewritten in the
248
+ * frontmatter: the image-like keys are read via gray-matter and each key's `src:` line is located
249
+ * structurally within its own block, so a `media:` token that merely appears in a plain-text value (a
250
+ * `title:` or `description:`) is left untouched, matching extractMediaRefs. Body and figure images are
251
+ * matched by mdast offset over the body slice. The output is byte-for-byte identical to the input
252
+ * apart from the replaced token substrings, so the rest of the entry (alt text, captions, the
253
+ * `:::figure` fences, every other frontmatter key) is preserved exactly. A non-matching hash returns
254
+ * the markdown unchanged with an empty placement list; a malformed `media:` reference is left
255
+ * untouched. Pure and node-safe.
256
+ */
257
+ export function repointMediaRef(markdown, oldHash, newToken) {
258
+ const { fmBlock, body } = splitFrontmatter(markdown);
259
+ const heroEdits = frontmatterEdits(markdown, fmBlock, oldHash);
260
+ const bodyEditList = bodyEdits(body, fmBlock.length, oldHash);
261
+ const edits = dropOverlappingEdits([...heroEdits, ...bodyEditList]);
262
+ if (edits.length === 0)
263
+ return { markdown, placements: [] };
264
+ // placements read in document order (frontmatter first, then body in source order, which is the
265
+ // order each arm already emits). The diff lists each changed reference once.
266
+ const placements = edits.map((e) => ({
267
+ kind: e.kind,
268
+ before: e.before,
269
+ after: newToken,
270
+ }));
271
+ // Apply from last offset to first so each splice leaves the earlier offsets valid.
272
+ const byOffset = [...edits].sort((a, b) => b.start - a.start);
273
+ let out = markdown;
274
+ for (const e of byOffset) {
275
+ out = out.slice(0, e.start) + newToken + out.slice(e.end);
276
+ }
277
+ return { markdown: out, placements };
278
+ }
279
+ /** Classify an existing alt into its non-decorative bucket: an empty (or whitespace-only) alt is
280
+ * filled, a non-empty alt is a custom alt the caller may opt in to overwrite. Mirrors the empty-alt
281
+ * test findMediaImagesNeedingAlt uses. */
282
+ function classifyAlt(existing) {
283
+ return existing.trim() === '' ? 'will-fill' : 'customized';
284
+ }
285
+ /** Whether a bucket plus the overwrite choice means the alt text is actually rewritten. A will-fill
286
+ * always writes; a customized alt writes only on opt-in; a decorative hero never writes. */
287
+ function altIsEdited(bucket, overwrite) {
288
+ if (bucket === 'will-fill')
289
+ return true;
290
+ if (bucket === 'customized')
291
+ return overwrite;
292
+ return false;
293
+ }
294
+ /** Collect the body and figure alt edits over the body slice. The alt source span sits between `![`
295
+ * and the `](` inside the image node's span, so the new alt (escaped the way insertImage escapes it,
296
+ * so a `]` cannot break the syntax) is spliced there. The existing alt is the parser's already
297
+ * unescaped `node.alt`. A body image has no decorative slot, so an empty alt is always will-fill. */
298
+ function bodyAltEdits(body, blockLength, hash, defaultAlt, overwrite) {
299
+ const edits = [];
300
+ for (const hit of matchedBodyImages(body, blockLength, hash)) {
301
+ const span = body.slice(hit.nodeFrom - blockLength, hit.nodeTo - blockLength);
302
+ if (!span.startsWith('!['))
303
+ continue;
304
+ // The alt source runs from `![` to the `](` that opens the destination. Find that closing `](`
305
+ // from the url's known position, not a forward scan: a forward `indexOf('](')` lands inside an
306
+ // alt that itself contains `](` or a nested `![x](y)` and would truncate the image on overwrite.
307
+ const loc = locateImageToken(span, hit.node.url);
308
+ if (!loc)
309
+ continue;
310
+ const close = span.lastIndexOf('](', loc.start);
311
+ if (close === -1)
312
+ continue;
313
+ const before = hit.node.alt ?? '';
314
+ const bucket = classifyAlt(before);
315
+ const write = altIsEdited(bucket, overwrite);
316
+ const after = write ? defaultAlt : before;
317
+ const placement = { kind: hit.kind, bucket, before, after };
318
+ if (!write) {
319
+ edits.push({ apply: false, start: hit.nodeFrom, end: hit.nodeFrom, text: '', placement });
320
+ continue;
321
+ }
322
+ // Replace the alt text between `![` and the destination `](`, writing it escaped so a `]` in the
323
+ // alt cannot truncate the image (mirrors insertImage).
324
+ const altStart = hit.nodeFrom - blockLength + 2;
325
+ const altEnd = hit.nodeFrom - blockLength + close;
326
+ edits.push({
327
+ apply: true,
328
+ start: blockLength + altStart,
329
+ end: blockLength + altEnd,
330
+ text: escapeLinkText(defaultAlt),
331
+ placement,
332
+ });
333
+ }
334
+ return edits;
335
+ }
336
+ /** Escape a literal string for safe interpolation into a RegExp source. A key name or an indent is
337
+ * matched literally, so its characters must not act as metacharacters. */
338
+ function escapeForRegExp(literal) {
339
+ return literal.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
340
+ }
341
+ /** Find a sibling key line (`alt:` or `decorative:`) at exactly `indent` within the inclusive
342
+ * line-index range `[lo, hi]` of one mapping. The range is the mapping's own block, so the search
343
+ * spans the whole mapping rather than a same-indent contiguous run: a blank line or a deeper-nested
344
+ * child between `src:` and `alt:` no longer hides the existing key (which would otherwise insert a
345
+ * duplicate key and break the YAML). Returns the key line's value span (after the key and its space,
346
+ * to end of line) or null when the mapping has no such key at that indent. */
347
+ function findSiblingKeyValue(lines, fmBlock, range, indent, key) {
348
+ const keyRe = new RegExp(`^${escapeForRegExp(indent)}${escapeForRegExp(key)}:[ \\t]?`);
349
+ for (let i = range[0]; i <= range[1]; i += 1) {
350
+ const lineText = fmBlock.slice(lines[i].start, lines[i].end);
351
+ const m = keyRe.exec(lineText);
352
+ if (m)
353
+ return { start: lines[i].start + m[0].length, end: lines[i].end };
354
+ }
355
+ return null;
356
+ }
357
+ /** Collect the hero alt edits inside the frontmatter block. The image-field objects (and their
358
+ * decorative and alt values) are read via gray-matter to classify the bucket robustly across quoting
359
+ * forms; the byte edit is then located structurally, scoped to each field's own mapping block, keyed
360
+ * back by the top-level field name. Iterating the fields in source order keeps the hero placements in
361
+ * document order. A decorative hero is reported and never edited; an empty alt is filled; a custom
362
+ * alt is overwritten only on opt-in. An alt key that is present (anywhere in the mapping, even below a
363
+ * blank line or a nested child) has its value replaced; an absent one is inserted right after the
364
+ * `src:` line at the same indent. The new value is a JSON-quoted scalar, valid YAML that handles a
365
+ * colon, a quote, or an empty string. A flow-style hero (`image: { ... }`, no own `src:` line) is
366
+ * unanchorable, so it is reported from the gray-matter read but never spliced. */
367
+ function heroAltEdits(markdown, fmBlock, hash, defaultAlt, overwrite) {
368
+ if (fmBlock === '')
369
+ return [];
370
+ const data = matter(markdown).data;
371
+ const lines = fmLines(fmBlock);
372
+ const edits = [];
373
+ const quoted = JSON.stringify(defaultAlt);
374
+ for (const { key, obj } of imageFieldKeys(data, hash)) {
375
+ const decorative = obj.decorative === true;
376
+ const before = typeof obj.alt === 'string' ? obj.alt : '';
377
+ const bucket = decorative ? 'decorative-skipped' : classifyAlt(before);
378
+ const write = altIsEdited(bucket, overwrite);
379
+ const after = write ? defaultAlt : before;
380
+ const placement = { kind: 'hero', bucket, before, after };
381
+ const range = write ? frontmatterKeyRange(lines, fmBlock, key) : null;
382
+ const src = range ? findSrcLineInRange(lines, fmBlock, range, hash) : null;
383
+ if (!write || !range || !src) {
384
+ // Reported but not edited: a kept custom alt, a decorative hero, or an unanchorable flow-style
385
+ // hero (no own `src:` line). It carries a diff entry but no splice, so the bytes stay exact.
386
+ edits.push({ apply: false, start: 0, end: 0, text: '', placement });
387
+ continue;
388
+ }
389
+ const altSpan = findSiblingKeyValue(lines, fmBlock, range, src.indent, 'alt');
390
+ if (altSpan) {
391
+ edits.push({ apply: true, start: altSpan.start, end: altSpan.end, text: quoted, placement });
392
+ }
393
+ else {
394
+ // No alt key: insert one on its own line right after the src line, at the sibling indent.
395
+ edits.push({
396
+ apply: true,
397
+ start: src.lineEnd,
398
+ end: src.lineEnd,
399
+ text: `\n${src.indent}alt: ${quoted}`,
400
+ placement,
401
+ });
402
+ }
403
+ }
404
+ return edits;
405
+ }
406
+ /**
407
+ * Set the alt at each placement of `hash` in one entry's raw markdown, and return the rewritten
408
+ * markdown plus a per-placement diff. An empty alt is filled with `defaultAlt` (bucket will-fill). A
409
+ * non-empty alt is overwritten with `defaultAlt` only when `opts.overwrite` is true (bucket
410
+ * customized; otherwise left unchanged but still reported, so the preview can show it and offer the
411
+ * opt-in). A frontmatter hero with `decorative: true` is bucket decorative-skipped and never changed.
412
+ * A body or figure image has no decorative slot, so its empty alt is always will-fill.
413
+ *
414
+ * The output is byte-for-byte identical to the input apart from the alt text it actually changes. The
415
+ * hero alt is edited inside the frontmatter block by string splice (no gray-matter serialize round
416
+ * trip, which would reformat the YAML); the structure read uses gray-matter only to classify buckets
417
+ * and read the hero alt and decorative flag. A body alt is written escaped (the way insertImage
418
+ * escapes it) so a `]` in the alt cannot break the image; a hero alt is written as a JSON-quoted YAML
419
+ * scalar so a colon, a quote, or an empty value is robust. Placements read in document order (hero
420
+ * first, then body in source order). A non-matching hash returns the markdown unchanged with an empty
421
+ * placement list. Pure and node-safe.
422
+ */
423
+ export function fillAltForHash(markdown, hash, defaultAlt, opts) {
424
+ const { fmBlock, body } = splitFrontmatter(markdown);
425
+ const heroEditList = heroAltEdits(markdown, fmBlock, hash, defaultAlt, opts.overwrite);
426
+ const bodyEditList = bodyAltEdits(body, fmBlock.length, hash, defaultAlt, opts.overwrite);
427
+ const edits = [...heroEditList, ...bodyEditList];
428
+ if (edits.length === 0)
429
+ return { markdown, placements: [] };
430
+ const placements = edits.map((e) => e.placement);
431
+ // Apply only the edits that change bytes, from last offset to first so the earlier offsets stay
432
+ // valid. A reported-but-unchanged placement (a kept custom alt, a decorative hero) carries no span.
433
+ // The overlap guard runs in source order over the writes as a final safety net, so two splices can
434
+ // never target overlapping bytes and clobber each other into invalid output.
435
+ const writes = dropOverlappingEdits(edits.filter((e) => e.apply));
436
+ const byOffset = [...writes].sort((a, b) => b.start - a.start);
437
+ let out = markdown;
438
+ for (const e of byOffset) {
439
+ out = out.slice(0, e.start) + e.text + out.slice(e.end);
440
+ }
441
+ return { markdown: out, placements };
442
+ }
@@ -1 +1 @@
1
- export type CairnLogEvent = 'auth.link.requested' | 'auth.link.send_failed' | 'auth.token.minted' | 'auth.token.confirmed' | 'auth.session.created' | 'auth.session.destroyed' | 'commit.succeeded' | 'commit.failed' | 'config.invalid' | 'entry.published' | 'entry.discarded' | 'publish.failed' | 'github.unreachable' | 'guard.rejected' | 'media.uploaded' | 'media.upload_failed' | 'media.delivery_failed' | 'media.orphan_reconcile' | 'media.resolve_missing' | 'media.deleted' | 'media.delete_blocked';
1
+ export type CairnLogEvent = 'auth.link.requested' | 'auth.link.send_failed' | 'auth.token.minted' | 'auth.token.confirmed' | 'auth.session.created' | 'auth.session.destroyed' | 'commit.succeeded' | 'commit.failed' | 'config.invalid' | 'entry.published' | 'entry.discarded' | 'publish.failed' | 'github.unreachable' | 'guard.rejected' | 'media.uploaded' | 'media.upload_failed' | 'media.delivery_failed' | 'media.orphan_reconcile' | 'media.resolve_missing' | 'media.deleted' | 'media.delete_blocked' | 'media.bulk_deleted' | 'media.orphans_purged' | 'media.replaced' | 'media.replace_blocked' | 'media.alt_propagated';
@@ -0,0 +1,24 @@
1
+ import type { UsageEntry, UsageIndex } from './usage.js';
2
+ import type { MediaManifest } from './manifest.js';
3
+ /** One selected hash that is not deleted, with why and (for the where-used) its usage rows. The rows
4
+ * are present only for 'still-referenced'; an 'uncommitted' skip carries an empty list. */
5
+ export interface BulkDeleteSkip {
6
+ hash: string;
7
+ reason: 'still-referenced' | 'uncommitted';
8
+ usage: UsageEntry[];
9
+ }
10
+ /** The partitioned selection: the hashes safe to purge and the hashes held back. Both arrays keep the
11
+ * input order of `selected` so the screen reports them in the order the user picked. */
12
+ export interface BulkDeletePlan {
13
+ deletable: string[];
14
+ skipped: BulkDeleteSkip[];
15
+ }
16
+ /**
17
+ * Partition `selected` against a strict usage index and the media manifest.
18
+ *
19
+ * A hash with one or more usage rows is skipped 'still-referenced', carrying those rows for the
20
+ * where-used. A hash with no usage row and no committed manifest row is skipped 'uncommitted', since
21
+ * there is nothing committed to delete. A hash with no usage row and a committed manifest row is
22
+ * deletable. The input order of `selected` is preserved in both output arrays.
23
+ */
24
+ export declare function planBulkDelete(selected: string[], index: UsageIndex, manifest: MediaManifest): BulkDeletePlan;
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Partition `selected` against a strict usage index and the media manifest.
3
+ *
4
+ * A hash with one or more usage rows is skipped 'still-referenced', carrying those rows for the
5
+ * where-used. A hash with no usage row and no committed manifest row is skipped 'uncommitted', since
6
+ * there is nothing committed to delete. A hash with no usage row and a committed manifest row is
7
+ * deletable. The input order of `selected` is preserved in both output arrays.
8
+ */
9
+ export function planBulkDelete(selected, index, manifest) {
10
+ const deletable = [];
11
+ const skipped = [];
12
+ for (const hash of selected) {
13
+ const usage = index.get(hash);
14
+ if (usage && usage.length > 0) {
15
+ skipped.push({ hash, reason: 'still-referenced', usage });
16
+ }
17
+ else if (manifest[hash]) {
18
+ deletable.push(hash);
19
+ }
20
+ else {
21
+ skipped.push({ hash, reason: 'uncommitted', usage: [] });
22
+ }
23
+ }
24
+ return { deletable, skipped };
25
+ }
@@ -0,0 +1,37 @@
1
+ import { type ReconcileResult } from './reconcile.js';
2
+ import type { MediaManifest } from './manifest.js';
3
+ import type { UsageEntry, UsageIndex } from './usage.js';
4
+ /** A purgeable orphan: a stored R2 key with no manifest row, plus the 16-hex hash parsed from it. */
5
+ export interface OrphanByteRow {
6
+ /** The full R2 object key, e.g. "media/ff/ffffffffffffffff.webp". */
7
+ key: string;
8
+ /** The 16-hex content hash parsed from the key. */
9
+ hash: string;
10
+ }
11
+ /** A broken reference: a manifest row whose bytes are gone. Read-only, since purging it would drop a
12
+ * still-referenced asset's record; the screen shows where it is used so an operator can re-ingest. */
13
+ export interface BrokenRefRow {
14
+ /** The 16-hex content hash of the manifest row whose bytes are missing. */
15
+ hash: string;
16
+ /** The manifest row's display slug, or '' when the row is somehow absent. */
17
+ slug: string;
18
+ /** Where the asset is referenced, from the usage index. Empty when no reference was found. */
19
+ usage: UsageEntry[];
20
+ }
21
+ /** The scan surface model: the two row sets the Library renders. */
22
+ export interface OrphanScan {
23
+ orphanedBytes: OrphanByteRow[];
24
+ brokenRefs: BrokenRefRow[];
25
+ }
26
+ /**
27
+ * Project a reconcile read plus the usage index into the scan surface model.
28
+ *
29
+ * `orphanedBytes` come from `reconcile.orphanedObjects`: each key is parsed to its hash via the
30
+ * shared media-key grammar, and a key that does not match (so it is not a content-addressed media
31
+ * object) is skipped. A key whose hash the usage index references is also skipped: it is referenced
32
+ * on main or some open branch, so its bytes are in use, not orphaned. `brokenRefs` come from
33
+ * `reconcile.missingObjects`: each hash carries its
34
+ * manifest slug (falling back to '' when the row is absent) and its where-used rows from the index
35
+ * (an empty list when no reference was found). Both directions keep their input order.
36
+ */
37
+ export declare function buildOrphanScan(reconcile: ReconcileResult, manifest: MediaManifest, index: UsageIndex): OrphanScan;
@@ -0,0 +1,42 @@
1
+ // cairn-cms: the orphan-scan projection, the pure model behind the admin Media Library's scan
2
+ // surface. It folds reconcileMedia's two directions together with the usage index into the two rows
3
+ // the screen renders: the purgeable byte-rows and the read-only broken-reference rows (manifest rows
4
+ // whose bytes are gone). It only projects; no path here reads R2, the manifest, or git. The module
5
+ // is engine-internal and on no public subpath.
6
+ //
7
+ // An orphaned byte is a stored R2 object whose hash has NO manifest row AND appears in NO usage row,
8
+ // so it is referenced nowhere across main and every open branch. Reconcile only checks main's
9
+ // manifest, so a branch-only upload (bytes in R2, manifest row only on the open cairn/* branch) gets
10
+ // flagged as an orphaned object even though a colleague's in-progress draft references it. The byte
11
+ // purge is irreversible, so we intersect reconcile's verdict with the strict cross-branch usage
12
+ // index here: any hash the index references is in use and is dropped from orphanedBytes, which keeps
13
+ // a live draft's bytes from ever reaching the purge surface.
14
+ import { MEDIA_KEY_RE } from './reconcile.js';
15
+ /**
16
+ * Project a reconcile read plus the usage index into the scan surface model.
17
+ *
18
+ * `orphanedBytes` come from `reconcile.orphanedObjects`: each key is parsed to its hash via the
19
+ * shared media-key grammar, and a key that does not match (so it is not a content-addressed media
20
+ * object) is skipped. A key whose hash the usage index references is also skipped: it is referenced
21
+ * on main or some open branch, so its bytes are in use, not orphaned. `brokenRefs` come from
22
+ * `reconcile.missingObjects`: each hash carries its
23
+ * manifest slug (falling back to '' when the row is absent) and its where-used rows from the index
24
+ * (an empty list when no reference was found). Both directions keep their input order.
25
+ */
26
+ export function buildOrphanScan(reconcile, manifest, index) {
27
+ const orphanedBytes = [];
28
+ for (const key of reconcile.orphanedObjects) {
29
+ const hash = MEDIA_KEY_RE.exec(key)?.[1];
30
+ if (hash === undefined)
31
+ continue;
32
+ if (index.has(hash))
33
+ continue;
34
+ orphanedBytes.push({ key, hash });
35
+ }
36
+ const brokenRefs = reconcile.missingObjects.map((hash) => ({
37
+ hash,
38
+ slug: manifest[hash]?.slug ?? '',
39
+ usage: index.get(hash) ?? [],
40
+ }));
41
+ return { orphanedBytes, brokenRefs };
42
+ }
@@ -1,4 +1,7 @@
1
1
  import type { MediaManifest } from './manifest.js';
2
+ /** A stored media object key parses to its short hash via `media/<aa>/<shortHash>.<ext>`. Exported so
3
+ * the orphan-scan projection derives the same hash from an orphaned key without a second grammar. */
4
+ export declare const MEDIA_KEY_RE: RegExp;
2
5
  /** What a reconcile read found in either direction. `orphanedObjects` are stored R2 keys whose hash
3
6
  * has no manifest row; `missingObjects` are manifest hashes with no stored object. */
4
7
  export interface ReconcileResult {