@glw907/cairn-cms 0.57.1 → 0.59.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/CHANGELOG.md +66 -0
  2. package/dist/components/CairnMediaLibrary.svelte +2070 -26
  3. package/dist/components/CairnMediaLibrary.svelte.d.ts +10 -2
  4. package/dist/components/admin-icons.d.ts +5 -0
  5. package/dist/components/admin-icons.js +5 -0
  6. package/dist/components/cairn-admin.css +402 -3
  7. package/dist/content/media-rewrite.d.ts +65 -0
  8. package/dist/content/media-rewrite.js +442 -0
  9. package/dist/log/events.d.ts +1 -1
  10. package/dist/media/bulk-delete-plan.d.ts +24 -0
  11. package/dist/media/bulk-delete-plan.js +25 -0
  12. package/dist/media/orphan-scan.d.ts +37 -0
  13. package/dist/media/orphan-scan.js +42 -0
  14. package/dist/media/reconcile.d.ts +3 -0
  15. package/dist/media/reconcile.js +3 -2
  16. package/dist/media/rewrite-plan.d.ts +65 -0
  17. package/dist/media/rewrite-plan.js +61 -0
  18. package/dist/sveltekit/cairn-admin.d.ts +8 -0
  19. package/dist/sveltekit/cairn-admin.js +15 -0
  20. package/dist/sveltekit/content-routes.d.ts +118 -4
  21. package/dist/sveltekit/content-routes.js +572 -1
  22. package/dist/sveltekit/index.d.ts +1 -1
  23. package/package.json +1 -1
  24. package/src/lib/components/CairnMediaLibrary.svelte +2070 -26
  25. package/src/lib/components/admin-icons.ts +5 -0
  26. package/src/lib/content/media-rewrite.ts +555 -0
  27. package/src/lib/log/events.ts +6 -1
  28. package/src/lib/media/bulk-delete-plan.ts +54 -0
  29. package/src/lib/media/orphan-scan.ts +74 -0
  30. package/src/lib/media/reconcile.ts +3 -2
  31. package/src/lib/media/rewrite-plan.ts +122 -0
  32. package/src/lib/sveltekit/cairn-admin.ts +15 -0
  33. package/src/lib/sveltekit/content-routes.ts +722 -5
  34. package/src/lib/sveltekit/index.ts +3 -0
@@ -25,3 +25,8 @@ export { default as CopyIcon } from '@lucide/svelte/icons/copy';
25
25
  export { default as FileTextIcon } from '@lucide/svelte/icons/file-text';
26
26
  export { default as ClockIcon } from '@lucide/svelte/icons/clock';
27
27
  export { default as Link2OffIcon } from '@lucide/svelte/icons/link-2-off';
28
+ export { default as RefreshCwIcon } from '@lucide/svelte/icons/refresh-cw';
29
+ export { default as GitBranchIcon } from '@lucide/svelte/icons/git-branch';
30
+ export { default as ArrowRightIcon } from '@lucide/svelte/icons/arrow-right';
31
+ export { default as MegaphoneIcon } from '@lucide/svelte/icons/megaphone';
32
+ export { default as DatabaseIcon } from '@lucide/svelte/icons/database';
@@ -0,0 +1,555 @@
1
+ // cairn-cms: the replace-in-place rewrite transform. Given one entry's raw markdown and an old
2
+ // content-hash, it rewrites every reference to that hash (a body image, a figure-wrapped image, or
3
+ // the frontmatter hero image.src) to a new asset's canonical `media:` token, and returns a per
4
+ // placement diff. This is the heart of the media-library "replace" action: the same bytes pointed
5
+ // at a new asset, with the surrounding entry left exact.
6
+ //
7
+ // The output is byte-for-byte identical to the input except for the `media:` token substrings that
8
+ // are replaced. The transform never round-trips through gray-matter or a markdown serializer (those
9
+ // reformat YAML and are not byte stable); it splices strings by source offset. The match keys on the
10
+ // parsed hash, the immutable truth, never the cosmetic slug, so a bare `media:<hash>` and a
11
+ // `media:<slug>.<hash>` for the same bytes both repoint. A malformed or non-matching token is left
12
+ // untouched.
13
+ //
14
+ // The body arm parses with the same figure-aware pipeline the render and Edit-block transforms use
15
+ // (remark-parse + gfm + directive), so a `media:` token inside a code span or fence is not an image
16
+ // node and is correctly never matched, matching extractMediaRefs. It also lets the arm classify an
17
+ // image inside a `:::figure` as a 'figure' placement.
18
+ import matter from 'gray-matter';
19
+ import { unified } from 'unified';
20
+ import remarkParse from 'remark-parse';
21
+ import remarkGfm from 'remark-gfm';
22
+ import remarkDirective from 'remark-directive';
23
+ import { visit } from 'unist-util-visit';
24
+ import type { Image, Root } from 'mdast';
25
+ import type { ContainerDirective } from 'mdast-util-directive';
26
+ import { parseMediaToken } from '../media/reference.js';
27
+ import { escapeLinkText } from './links.js';
28
+
29
+ /** One repointed reference: which surface it lived on, the old token as written, and the new token. */
30
+ export interface RepointPlacement {
31
+ kind: 'body' | 'figure' | 'hero';
32
+ /** The old `media:` token exactly as it was written in the source. */
33
+ before: string;
34
+ /** The new asset's canonical `media:` token (the same value for every placement). */
35
+ after: string;
36
+ }
37
+
38
+ /** The rewritten markdown plus the per-placement diff, in document order (hero first, then body). */
39
+ export interface RepointResult {
40
+ markdown: string;
41
+ placements: RepointPlacement[];
42
+ }
43
+
44
+ /** A located token substring to splice: its absolute source offsets, the old text, and its kind. */
45
+ interface Edit {
46
+ start: number;
47
+ end: number;
48
+ before: string;
49
+ kind: RepointPlacement['kind'];
50
+ }
51
+
52
+ /** Drop any span that overlaps a span already kept, in source order. A final safety net so two
53
+ * splices can never target the same or overlapping bytes and clobber each other into a corrupt
54
+ * result, no matter how the locating arms behaved. A pure-insert span (`start === end`) overlaps
55
+ * another span only when it sits strictly inside it, so adjacent inserts and edits are kept. */
56
+ function dropOverlappingEdits<T extends { start: number; end: number }>(edits: T[]): T[] {
57
+ const kept: T[] = [];
58
+ for (const e of edits) {
59
+ const clashes = kept.some((k) => e.start < k.end && k.start < e.end);
60
+ if (!clashes) kept.push(e);
61
+ }
62
+ return kept;
63
+ }
64
+
65
+ /** A locating scan for candidate `media:` token substrings. Deliberately broad (it accepts
66
+ * uppercase and other out-of-grammar characters) so a malformed token is still found and then
67
+ * rejected by parseMediaToken, never silently skipped by the locator. The character class stops at
68
+ * whitespace, a quote, or any YAML or markdown delimiter, so a frontmatter value or an image
69
+ * destination ends the candidate. */
70
+ const MEDIA_TOKEN_SCAN = /media:[A-Za-z0-9._-]+/g;
71
+
72
+ /** Split a leading frontmatter block off the markdown. `fmBlock` is the `---` fenced block including
73
+ * both fences and the trailing newline (empty when there is none); `body` is everything after it.
74
+ * The block leads the document, so a frontmatter offset is already absolute and a body offset needs
75
+ * `fmBlock.length` added. Shared by every arm so they agree on the boundary. */
76
+ function splitFrontmatter(markdown: string): { fmBlock: string; body: string } {
77
+ const m = markdown.match(/^---\r?\n[\s\S]*?\r?\n---\r?\n?/);
78
+ const fmBlock = m ? m[0] : '';
79
+ return { fmBlock, body: markdown.slice(fmBlock.length) };
80
+ }
81
+
82
+ /** Parse a doc with the figure-aware pipeline, so the body arm agrees with what remarkFigure renders
83
+ * and can see the enclosing `:::figure` container. Mirrors parseFigureDoc in markdown-format.ts. */
84
+ function parseFigureDoc(doc: string): Root {
85
+ return unified().use(remarkParse).use(remarkGfm).use(remarkDirective).parse(doc) as Root;
86
+ }
87
+
88
+ /** Whether `target` sits inside a `figure`-named container directive. Walks the tree to find the
89
+ * ancestor, since unist-util-visit's per-call ancestors are not retained across the traversal.
90
+ * Mirrors enclosingFigure in markdown-format.ts, reduced to a boolean. */
91
+ function inFigure(tree: Root, target: Image): boolean {
92
+ let found = false;
93
+ visit(tree, 'containerDirective', (dir: ContainerDirective) => {
94
+ if (dir.name !== 'figure') return;
95
+ visit(dir, 'image', (img: Image) => {
96
+ if (img === target) found = true;
97
+ });
98
+ });
99
+ return found;
100
+ }
101
+
102
+ /** The split of fmBlock into its lines, each with its block-relative start and end offsets (the end
103
+ * is the index of the trailing newline, or the block length for the last line). Block offsets are
104
+ * already absolute since the frontmatter leads the document. */
105
+ interface FmLine {
106
+ start: number;
107
+ end: number;
108
+ }
109
+
110
+ /** Split fmBlock into lines once, so the locator helpers walk a shared structure instead of
111
+ * re-scanning the block per call. */
112
+ function fmLines(fmBlock: string): FmLine[] {
113
+ const lines: FmLine[] = [];
114
+ let pos = 0;
115
+ while (pos <= fmBlock.length) {
116
+ const nl = fmBlock.indexOf('\n', pos);
117
+ const end = nl === -1 ? fmBlock.length : nl;
118
+ lines.push({ start: pos, end });
119
+ if (nl === -1) break;
120
+ pos = nl + 1;
121
+ }
122
+ return lines;
123
+ }
124
+
125
+ /** The inclusive line-index range `[lo, hi]` of the block-style mapping a top-level key opens: the
126
+ * line `^<key>:` at indent 0 through the last line before the next top-level key (or the document
127
+ * end). A flow-style value (`key: { ... }` all on one line) yields a single-line range. Returns null
128
+ * when the key has no top-level line, which a malformed or non-canonical block can cause. Scoping the
129
+ * per-key search to this range is what lets two image fields that share one hash, or an image field
130
+ * whose hash also appears in a sibling text value, resolve to distinct, correct spans. */
131
+ function frontmatterKeyRange(lines: FmLine[], fmBlock: string, key: string): [number, number] | null {
132
+ const opener = new RegExp(`^${escapeForRegExp(key)}:`);
133
+ const topLevelKey = /^[^\s#][^:]*:/;
134
+ const isBoundary = (i: number) => {
135
+ const text = fmBlock.slice(lines[i].start, lines[i].end);
136
+ // A new top-level key or the closing `---` fence ends the current key's block.
137
+ return topLevelKey.test(text) || text === '---';
138
+ };
139
+ let lo = -1;
140
+ for (let i = 1; i < lines.length - 1; i += 1) {
141
+ // Skip the leading `---` fence (line 0) and the trailing empty line after the closing fence.
142
+ if (opener.test(fmBlock.slice(lines[i].start, lines[i].end))) {
143
+ lo = i;
144
+ break;
145
+ }
146
+ }
147
+ if (lo === -1) return null;
148
+ let hi = lo;
149
+ for (let i = lo + 1; i < lines.length - 1; i += 1) {
150
+ if (isBoundary(i)) break;
151
+ hi = i;
152
+ }
153
+ return [lo, hi];
154
+ }
155
+
156
+ /** A located `src:` line inside a block-style mapping: the line's start and end, its leading indent,
157
+ * and the exact `media:` token's block-relative offsets and text. */
158
+ interface SrcLineHit {
159
+ lineStart: number;
160
+ lineEnd: number;
161
+ indent: string;
162
+ tokenStart: number;
163
+ tokenEnd: number;
164
+ token: string;
165
+ }
166
+
167
+ /** Find the block-style `src:` line within `[lo, hi]` whose value token parses to `hash`. The token
168
+ * is located by the broad scan and validated through parseMediaToken (matching on hash), so a
169
+ * malformed token is found then rejected. Returns null for a flow-style value (no own `src:` line),
170
+ * which leaves that shape unanchorable rather than splicing a guessed span. */
171
+ function findSrcLineInRange(
172
+ lines: FmLine[],
173
+ fmBlock: string,
174
+ range: [number, number],
175
+ hash: string,
176
+ ): SrcLineHit | null {
177
+ const srcKeyRe = /^(\s*)src:[ \t]?/;
178
+ for (let i = range[0]; i <= range[1]; i += 1) {
179
+ const lineText = fmBlock.slice(lines[i].start, lines[i].end);
180
+ const keyMatch = srcKeyRe.exec(lineText);
181
+ if (!keyMatch) continue;
182
+ const valueStart = lines[i].start + keyMatch[0].length;
183
+ const valueText = fmBlock.slice(valueStart, lines[i].end);
184
+ for (const m of valueText.matchAll(MEDIA_TOKEN_SCAN)) {
185
+ const token = m[0];
186
+ const ref = parseMediaToken(token);
187
+ if (!ref || ref.hash !== hash) continue;
188
+ const tokenStart = valueStart + m.index;
189
+ return {
190
+ lineStart: lines[i].start,
191
+ lineEnd: lines[i].end,
192
+ indent: keyMatch[1],
193
+ tokenStart,
194
+ tokenEnd: tokenStart + token.length,
195
+ token,
196
+ };
197
+ }
198
+ }
199
+ return null;
200
+ }
201
+
202
+ /** The image-like top-level frontmatter keys whose `src` parses to `hash`, in source order. A key is
203
+ * image-like when its value is an object carrying a string `src`; this is the same shape
204
+ * extractMediaRefs reads, so a token in a plain-text value (a `title:`/`note:`) is never treated as a
205
+ * reference. The bucket-classifying data comes from gray-matter (which handles every quoting form);
206
+ * the byte edit is located structurally by the caller, keyed back to this key name. */
207
+ function imageFieldKeys(data: Record<string, unknown>, hash: string): { key: string; obj: Record<string, unknown> }[] {
208
+ const out: { key: string; obj: Record<string, unknown> }[] = [];
209
+ for (const [key, value] of Object.entries(data)) {
210
+ if (!value || typeof value !== 'object' || Array.isArray(value)) continue;
211
+ const obj = value as Record<string, unknown>;
212
+ if (typeof obj.src !== 'string') continue;
213
+ const ref = parseMediaToken(obj.src);
214
+ if (!ref || ref.hash !== hash) continue;
215
+ out.push({ key, obj });
216
+ }
217
+ return out;
218
+ }
219
+
220
+ /** Collect hero src-token edits inside the frontmatter block. Only an image-field `src:` line is
221
+ * rewritten: the structure is read via gray-matter (image-like keys), and each key's `src:` line is
222
+ * located structurally within that key's block. A `media:` token sitting in a plain-text value (a
223
+ * `title:` or `description:`) is on no `src:` line, so it is left untouched, keeping the byte-exact
224
+ * contract and agreeing with extractMediaRefs. A flow-style hero has no `src:` line and is skipped. */
225
+ function frontmatterEdits(markdown: string, fmBlock: string, oldHash: string): Edit[] {
226
+ if (fmBlock === '') return [];
227
+ const data = matter(markdown).data as Record<string, unknown>;
228
+ const lines = fmLines(fmBlock);
229
+ const edits: Edit[] = [];
230
+ for (const { key } of imageFieldKeys(data, oldHash)) {
231
+ const range = frontmatterKeyRange(lines, fmBlock, key);
232
+ if (!range) continue;
233
+ const src = findSrcLineInRange(lines, fmBlock, range, oldHash);
234
+ if (!src) continue;
235
+ edits.push({ start: src.tokenStart, end: src.tokenEnd, before: src.token, kind: 'hero' });
236
+ }
237
+ return edits;
238
+ }
239
+
240
+ /** Locate the exact `media:` token substring inside one image node's source span. The destination
241
+ * begins at the `](` that follows the alt text, so the search starts there to avoid a false match on
242
+ * a `media:`-like string inside the alt. Returns null when the token cannot be located, which leaves
243
+ * the image untouched rather than splicing a guessed range. */
244
+ function locateImageToken(span: string, url: string): { start: number; end: number } | null {
245
+ const destStart = span.indexOf('](');
246
+ const from = destStart === -1 ? 0 : destStart + 2;
247
+ const at = span.indexOf(url, from);
248
+ if (at === -1) return null;
249
+ return { start: at, end: at + url.length };
250
+ }
251
+
252
+ /** One body image whose url parses to the target hash, with its absolute node-span offsets (block
253
+ * length added) and whether it sits inside a `:::figure`. The shared body-image find that both the
254
+ * token-rewrite and alt-fill arms walk, so they agree on what an image is and how a figure is named. */
255
+ interface MatchedBodyImage {
256
+ node: Image;
257
+ /** Absolute start offset of the `![...](...)` node in the whole markdown. */
258
+ nodeFrom: number;
259
+ /** Absolute end offset of the node. */
260
+ nodeTo: number;
261
+ kind: 'body' | 'figure';
262
+ }
263
+
264
+ /** Find every body image whose url parses to `hash`, in source order, with absolute offsets. Parses
265
+ * with the figure-aware pipeline, so a `media:` token inside a code span or fence is not an image
266
+ * node and is correctly skipped, matching extractMediaRefs. */
267
+ function matchedBodyImages(body: string, blockLength: number, hash: string): MatchedBodyImage[] {
268
+ const tree = parseFigureDoc(body);
269
+ const hits: MatchedBodyImage[] = [];
270
+ visit(tree, 'image', (node: Image) => {
271
+ const ref = parseMediaToken(node.url);
272
+ if (!ref || ref.hash !== hash) return;
273
+ const from = node.position?.start?.offset;
274
+ const to = node.position?.end?.offset;
275
+ if (from == null || to == null) return;
276
+ hits.push({
277
+ node,
278
+ nodeFrom: blockLength + from,
279
+ nodeTo: blockLength + to,
280
+ kind: inFigure(tree, node) ? 'figure' : 'body',
281
+ });
282
+ });
283
+ return hits;
284
+ }
285
+
286
+ /** Collect body edits over the body slice. Each matching image is located within its own source span
287
+ * and recorded with an absolute offset. The kind is 'figure' when the image is inside a `:::figure`,
288
+ * else 'body'. */
289
+ function bodyEdits(body: string, blockLength: number, oldHash: string): Edit[] {
290
+ const edits: Edit[] = [];
291
+ for (const hit of matchedBodyImages(body, blockLength, oldHash)) {
292
+ const span = body.slice(hit.nodeFrom - blockLength, hit.nodeTo - blockLength);
293
+ const loc = locateImageToken(span, hit.node.url);
294
+ if (!loc) continue;
295
+ const start = hit.nodeFrom + loc.start;
296
+ const end = hit.nodeFrom + loc.end;
297
+ edits.push({ start, end, before: hit.node.url, kind: hit.kind });
298
+ }
299
+ return edits;
300
+ }
301
+
302
+ /**
303
+ * Rewrite every reference to `oldHash` in one entry's raw markdown to `newToken`, and return the
304
+ * rewritten markdown plus a per-placement diff. Only an image-field `src:` line is rewritten in the
305
+ * frontmatter: the image-like keys are read via gray-matter and each key's `src:` line is located
306
+ * structurally within its own block, so a `media:` token that merely appears in a plain-text value (a
307
+ * `title:` or `description:`) is left untouched, matching extractMediaRefs. Body and figure images are
308
+ * matched by mdast offset over the body slice. The output is byte-for-byte identical to the input
309
+ * apart from the replaced token substrings, so the rest of the entry (alt text, captions, the
310
+ * `:::figure` fences, every other frontmatter key) is preserved exactly. A non-matching hash returns
311
+ * the markdown unchanged with an empty placement list; a malformed `media:` reference is left
312
+ * untouched. Pure and node-safe.
313
+ */
314
+ export function repointMediaRef(markdown: string, oldHash: string, newToken: string): RepointResult {
315
+ const { fmBlock, body } = splitFrontmatter(markdown);
316
+
317
+ const heroEdits = frontmatterEdits(markdown, fmBlock, oldHash);
318
+ const bodyEditList = bodyEdits(body, fmBlock.length, oldHash);
319
+ const edits = dropOverlappingEdits([...heroEdits, ...bodyEditList]);
320
+ if (edits.length === 0) return { markdown, placements: [] };
321
+
322
+ // placements read in document order (frontmatter first, then body in source order, which is the
323
+ // order each arm already emits). The diff lists each changed reference once.
324
+ const placements: RepointPlacement[] = edits.map((e) => ({
325
+ kind: e.kind,
326
+ before: e.before,
327
+ after: newToken,
328
+ }));
329
+
330
+ // Apply from last offset to first so each splice leaves the earlier offsets valid.
331
+ const byOffset = [...edits].sort((a, b) => b.start - a.start);
332
+ let out = markdown;
333
+ for (const e of byOffset) {
334
+ out = out.slice(0, e.start) + newToken + out.slice(e.end);
335
+ }
336
+
337
+ return { markdown: out, placements };
338
+ }
339
+
340
+ /** Which alt bucket a placement falls in: an empty alt always gets filled, a non-empty (custom) alt is
341
+ * reported and only overwritten on opt-in, and a decorative hero is never touched. */
342
+ export type AltBucket = 'will-fill' | 'customized' | 'decorative-skipped';
343
+
344
+ /** One placement of the target hash and what the alt-fill does to it: which surface it lives on, its
345
+ * bucket, the existing alt, and the alt after the transform (unchanged for a customized alt left as
346
+ * is and for a decorative hero). */
347
+ export interface AltPlacement {
348
+ kind: 'body' | 'figure' | 'hero';
349
+ bucket: AltBucket;
350
+ /** The existing alt, empty string when there is none. */
351
+ before: string;
352
+ /** The alt after the transform; equals `before` when nothing changed. */
353
+ after: string;
354
+ }
355
+
356
+ /** The alt-filled markdown plus the per-placement diff, in document order (hero first, then body). */
357
+ export interface AltFillResult {
358
+ markdown: string;
359
+ placements: AltPlacement[];
360
+ }
361
+
362
+ /** A placement plus its optional byte edit. `apply` is false for a reported-but-unchanged placement
363
+ * (a kept custom alt, a decorative hero), which carries a diff entry but no splice. When `apply` is
364
+ * true, `[start, end)` is the absolute source span to replace with `text` (a pure insert is
365
+ * `start === end`). Keeping the placement here keeps the diff and the edits in step. */
366
+ interface AltEdit {
367
+ apply: boolean;
368
+ start: number;
369
+ end: number;
370
+ text: string;
371
+ placement: AltPlacement;
372
+ }
373
+
374
+ /** Classify an existing alt into its non-decorative bucket: an empty (or whitespace-only) alt is
375
+ * filled, a non-empty alt is a custom alt the caller may opt in to overwrite. Mirrors the empty-alt
376
+ * test findMediaImagesNeedingAlt uses. */
377
+ function classifyAlt(existing: string): 'will-fill' | 'customized' {
378
+ return existing.trim() === '' ? 'will-fill' : 'customized';
379
+ }
380
+
381
+ /** Whether a bucket plus the overwrite choice means the alt text is actually rewritten. A will-fill
382
+ * always writes; a customized alt writes only on opt-in; a decorative hero never writes. */
383
+ function altIsEdited(bucket: AltBucket, overwrite: boolean): boolean {
384
+ if (bucket === 'will-fill') return true;
385
+ if (bucket === 'customized') return overwrite;
386
+ return false;
387
+ }
388
+
389
+ /** Collect the body and figure alt edits over the body slice. The alt source span sits between `![`
390
+ * and the `](` inside the image node's span, so the new alt (escaped the way insertImage escapes it,
391
+ * so a `]` cannot break the syntax) is spliced there. The existing alt is the parser's already
392
+ * unescaped `node.alt`. A body image has no decorative slot, so an empty alt is always will-fill. */
393
+ function bodyAltEdits(body: string, blockLength: number, hash: string, defaultAlt: string, overwrite: boolean): AltEdit[] {
394
+ const edits: AltEdit[] = [];
395
+ for (const hit of matchedBodyImages(body, blockLength, hash)) {
396
+ const span = body.slice(hit.nodeFrom - blockLength, hit.nodeTo - blockLength);
397
+ if (!span.startsWith('![')) continue;
398
+ // The alt source runs from `![` to the `](` that opens the destination. Find that closing `](`
399
+ // from the url's known position, not a forward scan: a forward `indexOf('](')` lands inside an
400
+ // alt that itself contains `](` or a nested `![x](y)` and would truncate the image on overwrite.
401
+ const loc = locateImageToken(span, hit.node.url);
402
+ if (!loc) continue;
403
+ const close = span.lastIndexOf('](', loc.start);
404
+ if (close === -1) continue;
405
+ const before = hit.node.alt ?? '';
406
+ const bucket = classifyAlt(before);
407
+ const write = altIsEdited(bucket, overwrite);
408
+ const after = write ? defaultAlt : before;
409
+ const placement: AltPlacement = { kind: hit.kind, bucket, before, after };
410
+ if (!write) {
411
+ edits.push({ apply: false, start: hit.nodeFrom, end: hit.nodeFrom, text: '', placement });
412
+ continue;
413
+ }
414
+ // Replace the alt text between `![` and the destination `](`, writing it escaped so a `]` in the
415
+ // alt cannot truncate the image (mirrors insertImage).
416
+ const altStart = hit.nodeFrom - blockLength + 2;
417
+ const altEnd = hit.nodeFrom - blockLength + close;
418
+ edits.push({
419
+ apply: true,
420
+ start: blockLength + altStart,
421
+ end: blockLength + altEnd,
422
+ text: escapeLinkText(defaultAlt),
423
+ placement,
424
+ });
425
+ }
426
+ return edits;
427
+ }
428
+
429
+ /** Escape a literal string for safe interpolation into a RegExp source. A key name or an indent is
430
+ * matched literally, so its characters must not act as metacharacters. */
431
+ function escapeForRegExp(literal: string): string {
432
+ return literal.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
433
+ }
434
+
435
+ /** Find a sibling key line (`alt:` or `decorative:`) at exactly `indent` within the inclusive
436
+ * line-index range `[lo, hi]` of one mapping. The range is the mapping's own block, so the search
437
+ * spans the whole mapping rather than a same-indent contiguous run: a blank line or a deeper-nested
438
+ * child between `src:` and `alt:` no longer hides the existing key (which would otherwise insert a
439
+ * duplicate key and break the YAML). Returns the key line's value span (after the key and its space,
440
+ * to end of line) or null when the mapping has no such key at that indent. */
441
+ function findSiblingKeyValue(
442
+ lines: FmLine[],
443
+ fmBlock: string,
444
+ range: [number, number],
445
+ indent: string,
446
+ key: string,
447
+ ): { start: number; end: number } | null {
448
+ const keyRe = new RegExp(`^${escapeForRegExp(indent)}${escapeForRegExp(key)}:[ \\t]?`);
449
+ for (let i = range[0]; i <= range[1]; i += 1) {
450
+ const lineText = fmBlock.slice(lines[i].start, lines[i].end);
451
+ const m = keyRe.exec(lineText);
452
+ if (m) return { start: lines[i].start + m[0].length, end: lines[i].end };
453
+ }
454
+ return null;
455
+ }
456
+
457
+ /** Collect the hero alt edits inside the frontmatter block. The image-field objects (and their
458
+ * decorative and alt values) are read via gray-matter to classify the bucket robustly across quoting
459
+ * forms; the byte edit is then located structurally, scoped to each field's own mapping block, keyed
460
+ * back by the top-level field name. Iterating the fields in source order keeps the hero placements in
461
+ * document order. A decorative hero is reported and never edited; an empty alt is filled; a custom
462
+ * alt is overwritten only on opt-in. An alt key that is present (anywhere in the mapping, even below a
463
+ * blank line or a nested child) has its value replaced; an absent one is inserted right after the
464
+ * `src:` line at the same indent. The new value is a JSON-quoted scalar, valid YAML that handles a
465
+ * colon, a quote, or an empty string. A flow-style hero (`image: { ... }`, no own `src:` line) is
466
+ * unanchorable, so it is reported from the gray-matter read but never spliced. */
467
+ function heroAltEdits(
468
+ markdown: string,
469
+ fmBlock: string,
470
+ hash: string,
471
+ defaultAlt: string,
472
+ overwrite: boolean,
473
+ ): AltEdit[] {
474
+ if (fmBlock === '') return [];
475
+ const data = matter(markdown).data as Record<string, unknown>;
476
+ const lines = fmLines(fmBlock);
477
+ const edits: AltEdit[] = [];
478
+ const quoted = JSON.stringify(defaultAlt);
479
+ for (const { key, obj } of imageFieldKeys(data, hash)) {
480
+ const decorative = obj.decorative === true;
481
+ const before = typeof obj.alt === 'string' ? obj.alt : '';
482
+ const bucket: AltBucket = decorative ? 'decorative-skipped' : classifyAlt(before);
483
+ const write = altIsEdited(bucket, overwrite);
484
+ const after = write ? defaultAlt : before;
485
+ const placement: AltPlacement = { kind: 'hero', bucket, before, after };
486
+
487
+ const range = write ? frontmatterKeyRange(lines, fmBlock, key) : null;
488
+ const src = range ? findSrcLineInRange(lines, fmBlock, range, hash) : null;
489
+ if (!write || !range || !src) {
490
+ // Reported but not edited: a kept custom alt, a decorative hero, or an unanchorable flow-style
491
+ // hero (no own `src:` line). It carries a diff entry but no splice, so the bytes stay exact.
492
+ edits.push({ apply: false, start: 0, end: 0, text: '', placement });
493
+ continue;
494
+ }
495
+ const altSpan = findSiblingKeyValue(lines, fmBlock, range, src.indent, 'alt');
496
+ if (altSpan) {
497
+ edits.push({ apply: true, start: altSpan.start, end: altSpan.end, text: quoted, placement });
498
+ } else {
499
+ // No alt key: insert one on its own line right after the src line, at the sibling indent.
500
+ edits.push({
501
+ apply: true,
502
+ start: src.lineEnd,
503
+ end: src.lineEnd,
504
+ text: `\n${src.indent}alt: ${quoted}`,
505
+ placement,
506
+ });
507
+ }
508
+ }
509
+ return edits;
510
+ }
511
+
512
+ /**
513
+ * Set the alt at each placement of `hash` in one entry's raw markdown, and return the rewritten
514
+ * markdown plus a per-placement diff. An empty alt is filled with `defaultAlt` (bucket will-fill). A
515
+ * non-empty alt is overwritten with `defaultAlt` only when `opts.overwrite` is true (bucket
516
+ * customized; otherwise left unchanged but still reported, so the preview can show it and offer the
517
+ * opt-in). A frontmatter hero with `decorative: true` is bucket decorative-skipped and never changed.
518
+ * A body or figure image has no decorative slot, so its empty alt is always will-fill.
519
+ *
520
+ * The output is byte-for-byte identical to the input apart from the alt text it actually changes. The
521
+ * hero alt is edited inside the frontmatter block by string splice (no gray-matter serialize round
522
+ * trip, which would reformat the YAML); the structure read uses gray-matter only to classify buckets
523
+ * and read the hero alt and decorative flag. A body alt is written escaped (the way insertImage
524
+ * escapes it) so a `]` in the alt cannot break the image; a hero alt is written as a JSON-quoted YAML
525
+ * scalar so a colon, a quote, or an empty value is robust. Placements read in document order (hero
526
+ * first, then body in source order). A non-matching hash returns the markdown unchanged with an empty
527
+ * placement list. Pure and node-safe.
528
+ */
529
+ export function fillAltForHash(
530
+ markdown: string,
531
+ hash: string,
532
+ defaultAlt: string,
533
+ opts: { overwrite: boolean },
534
+ ): AltFillResult {
535
+ const { fmBlock, body } = splitFrontmatter(markdown);
536
+ const heroEditList = heroAltEdits(markdown, fmBlock, hash, defaultAlt, opts.overwrite);
537
+ const bodyEditList = bodyAltEdits(body, fmBlock.length, hash, defaultAlt, opts.overwrite);
538
+ const edits = [...heroEditList, ...bodyEditList];
539
+ if (edits.length === 0) return { markdown, placements: [] };
540
+
541
+ const placements = edits.map((e) => e.placement);
542
+
543
+ // Apply only the edits that change bytes, from last offset to first so the earlier offsets stay
544
+ // valid. A reported-but-unchanged placement (a kept custom alt, a decorative hero) carries no span.
545
+ // The overlap guard runs in source order over the writes as a final safety net, so two splices can
546
+ // never target overlapping bytes and clobber each other into invalid output.
547
+ const writes = dropOverlappingEdits(edits.filter((e) => e.apply));
548
+ const byOffset = [...writes].sort((a, b) => b.start - a.start);
549
+ let out = markdown;
550
+ for (const e of byOffset) {
551
+ out = out.slice(0, e.start) + e.text + out.slice(e.end);
552
+ }
553
+
554
+ return { markdown: out, placements };
555
+ }
@@ -22,4 +22,9 @@ export type CairnLogEvent =
22
22
  | 'media.orphan_reconcile'
23
23
  | 'media.resolve_missing'
24
24
  | 'media.deleted'
25
- | 'media.delete_blocked';
25
+ | 'media.delete_blocked'
26
+ | 'media.bulk_deleted'
27
+ | 'media.orphans_purged'
28
+ | 'media.replaced'
29
+ | 'media.replace_blocked'
30
+ | 'media.alt_propagated';
@@ -0,0 +1,54 @@
1
+ // cairn-cms: the pure core of the bulk-delete safety floor. Given a STRICT usage index, the selected
2
+ // hashes, and the media manifest, it partitions the selection into what is safe to delete and what is
3
+ // skipped, with the reason. The gate is membership in the passed strict index, never a display count:
4
+ // the caller builds the index with strict:true (see usage.ts) so a transient branch-read failure
5
+ // fails the whole build rather than making a still-referenced asset look orphaned. This function
6
+ // stays pure so the same verdict is testable without a repo round trip and so the destructive action
7
+ // that consumes it can be reviewed against a fixed input.
8
+ import type { UsageEntry, UsageIndex } from './usage.js';
9
+ import type { MediaManifest } from './manifest.js';
10
+
11
+ /** One selected hash that is not deleted, with why and (for the where-used) its usage rows. The rows
12
+ * are present only for 'still-referenced'; an 'uncommitted' skip carries an empty list. */
13
+ export interface BulkDeleteSkip {
14
+ hash: string;
15
+ reason: 'still-referenced' | 'uncommitted';
16
+ usage: UsageEntry[];
17
+ }
18
+
19
+ /** The partitioned selection: the hashes safe to purge and the hashes held back. Both arrays keep the
20
+ * input order of `selected` so the screen reports them in the order the user picked. */
21
+ export interface BulkDeletePlan {
22
+ deletable: string[];
23
+ skipped: BulkDeleteSkip[];
24
+ }
25
+
26
+ /**
27
+ * Partition `selected` against a strict usage index and the media manifest.
28
+ *
29
+ * A hash with one or more usage rows is skipped 'still-referenced', carrying those rows for the
30
+ * where-used. A hash with no usage row and no committed manifest row is skipped 'uncommitted', since
31
+ * there is nothing committed to delete. A hash with no usage row and a committed manifest row is
32
+ * deletable. The input order of `selected` is preserved in both output arrays.
33
+ */
34
+ export function planBulkDelete(
35
+ selected: string[],
36
+ index: UsageIndex,
37
+ manifest: MediaManifest,
38
+ ): BulkDeletePlan {
39
+ const deletable: string[] = [];
40
+ const skipped: BulkDeleteSkip[] = [];
41
+
42
+ for (const hash of selected) {
43
+ const usage = index.get(hash);
44
+ if (usage && usage.length > 0) {
45
+ skipped.push({ hash, reason: 'still-referenced', usage });
46
+ } else if (manifest[hash]) {
47
+ deletable.push(hash);
48
+ } else {
49
+ skipped.push({ hash, reason: 'uncommitted', usage: [] });
50
+ }
51
+ }
52
+
53
+ return { deletable, skipped };
54
+ }