@glw907/cairn-cms 0.56.2 → 0.57.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +96 -0
- package/dist/components/AdminLayout.svelte +3 -0
- package/dist/components/CairnAdmin.svelte +8 -1
- package/dist/components/CairnAdmin.svelte.d.ts +2 -0
- package/dist/components/CairnMediaLibrary.svelte +929 -0
- package/dist/components/CairnMediaLibrary.svelte.d.ts +37 -0
- package/dist/components/EditPage.svelte +347 -7
- package/dist/components/EditPage.svelte.d.ts +2 -0
- package/dist/components/MarkdownEditor.svelte +283 -1
- package/dist/components/MarkdownEditor.svelte.d.ts +37 -1
- package/dist/components/MediaCaptureCard.svelte +135 -0
- package/dist/components/MediaCaptureCard.svelte.d.ts +40 -0
- package/dist/components/MediaFigureControl.svelte +247 -0
- package/dist/components/MediaFigureControl.svelte.d.ts +40 -0
- package/dist/components/MediaHeroField.svelte +569 -0
- package/dist/components/MediaHeroField.svelte.d.ts +67 -0
- package/dist/components/MediaInsertPopover.svelte +449 -0
- package/dist/components/MediaInsertPopover.svelte.d.ts +58 -0
- package/dist/components/MediaPicker.svelte +257 -0
- package/dist/components/MediaPicker.svelte.d.ts +41 -0
- package/dist/components/admin-icons.d.ts +12 -0
- package/dist/components/admin-icons.js +12 -0
- package/dist/components/cairn-admin.css +901 -9
- package/dist/components/client-ingest.d.ts +142 -0
- package/dist/components/client-ingest.js +297 -0
- package/dist/components/editor-media.d.ts +11 -0
- package/dist/components/editor-media.js +206 -0
- package/dist/components/editor-placeholder.d.ts +26 -0
- package/dist/components/editor-placeholder.js +166 -0
- package/dist/components/index.d.ts +1 -0
- package/dist/components/index.js +1 -0
- package/dist/components/markdown-directives.d.ts +12 -0
- package/dist/components/markdown-directives.js +42 -0
- package/dist/components/markdown-format.d.ts +89 -0
- package/dist/components/markdown-format.js +255 -0
- package/dist/components/media-upload-outcome.d.ts +52 -0
- package/dist/components/media-upload-outcome.js +48 -0
- package/dist/content/compose.js +3 -0
- package/dist/content/frontmatter.js +17 -0
- package/dist/content/manifest.d.ts +4 -0
- package/dist/content/manifest.js +41 -1
- package/dist/content/media-refs.d.ts +7 -0
- package/dist/content/media-refs.js +52 -0
- package/dist/content/schema.d.ts +5 -2
- package/dist/content/schema.js +17 -0
- package/dist/content/types.d.ts +62 -11
- package/dist/content/validate.js +27 -0
- package/dist/delivery/public-routes.d.ts +16 -0
- package/dist/delivery/public-routes.js +46 -3
- package/dist/delivery/seo-fields.js +7 -1
- package/dist/delivery/seo.d.ts +2 -0
- package/dist/delivery/seo.js +3 -0
- package/dist/doctor/checks-local.d.ts +1 -0
- package/dist/doctor/checks-local.js +21 -0
- package/dist/doctor/index.d.ts +3 -1
- package/dist/doctor/index.js +11 -2
- package/dist/doctor/types.d.ts +3 -0
- package/dist/doctor/wrangler-config.d.ts +3 -0
- package/dist/doctor/wrangler-config.js +20 -0
- package/dist/env.d.ts +19 -0
- package/dist/env.js +26 -0
- package/dist/index.d.ts +1 -1
- package/dist/log/events.d.ts +1 -1
- package/dist/media/config.d.ts +24 -0
- package/dist/media/config.js +69 -0
- package/dist/media/delivery-bucket.d.ts +34 -0
- package/dist/media/delivery-bucket.js +10 -0
- package/dist/media/index.d.ts +6 -0
- package/dist/media/index.js +13 -0
- package/dist/media/library-entry.d.ts +30 -0
- package/dist/media/library-entry.js +17 -0
- package/dist/media/manifest.d.ts +44 -0
- package/dist/media/manifest.js +105 -0
- package/dist/media/naming.d.ts +18 -0
- package/dist/media/naming.js +112 -0
- package/dist/media/reconcile.d.ts +36 -0
- package/dist/media/reconcile.js +45 -0
- package/dist/media/reference.d.ts +12 -0
- package/dist/media/reference.js +33 -0
- package/dist/media/sniff.d.ts +18 -0
- package/dist/media/sniff.js +106 -0
- package/dist/media/store.d.ts +25 -0
- package/dist/media/store.js +16 -0
- package/dist/media/transform-url.d.ts +26 -0
- package/dist/media/transform-url.js +38 -0
- package/dist/media/usage.d.ts +48 -0
- package/dist/media/usage.js +90 -0
- package/dist/render/pipeline.d.ts +2 -0
- package/dist/render/pipeline.js +13 -2
- package/dist/render/registry.js +3 -0
- package/dist/render/remark-figure.d.ts +4 -0
- package/dist/render/remark-figure.js +103 -0
- package/dist/render/resolve-media.d.ts +34 -0
- package/dist/render/resolve-media.js +78 -0
- package/dist/render/sanitize-schema.d.ts +4 -2
- package/dist/render/sanitize-schema.js +5 -3
- package/dist/sveltekit/admin-dispatch.d.ts +2 -0
- package/dist/sveltekit/admin-dispatch.js +5 -0
- package/dist/sveltekit/cairn-admin.d.ts +8 -1
- package/dist/sveltekit/cairn-admin.js +10 -2
- package/dist/sveltekit/content-routes.d.ts +68 -2
- package/dist/sveltekit/content-routes.js +461 -10
- package/dist/sveltekit/csrf.d.ts +16 -0
- package/dist/sveltekit/csrf.js +18 -0
- package/dist/sveltekit/guard.js +10 -3
- package/dist/sveltekit/index.d.ts +2 -1
- package/dist/sveltekit/index.js +1 -0
- package/dist/sveltekit/media-route.d.ts +12 -0
- package/dist/sveltekit/media-route.js +137 -0
- package/dist/vite/index.d.ts +3 -0
- package/dist/vite/index.js +7 -2
- package/package.json +7 -1
- package/src/lib/components/AdminLayout.svelte +3 -0
- package/src/lib/components/CairnAdmin.svelte +8 -1
- package/src/lib/components/CairnMediaLibrary.svelte +929 -0
- package/src/lib/components/EditPage.svelte +347 -7
- package/src/lib/components/MarkdownEditor.svelte +283 -1
- package/src/lib/components/MediaCaptureCard.svelte +135 -0
- package/src/lib/components/MediaFigureControl.svelte +247 -0
- package/src/lib/components/MediaHeroField.svelte +569 -0
- package/src/lib/components/MediaInsertPopover.svelte +449 -0
- package/src/lib/components/MediaPicker.svelte +257 -0
- package/src/lib/components/admin-icons.ts +12 -0
- package/src/lib/components/cairn-admin.css +37 -0
- package/src/lib/components/client-ingest.ts +380 -0
- package/src/lib/components/editor-media.ts +248 -0
- package/src/lib/components/editor-placeholder.ts +213 -0
- package/src/lib/components/index.ts +1 -0
- package/src/lib/components/markdown-directives.ts +46 -0
- package/src/lib/components/markdown-format.ts +307 -1
- package/src/lib/components/media-upload-outcome.ts +83 -0
- package/src/lib/content/compose.ts +3 -0
- package/src/lib/content/frontmatter.ts +16 -1
- package/src/lib/content/manifest.ts +44 -1
- package/src/lib/content/media-refs.ts +58 -0
- package/src/lib/content/schema.ts +31 -7
- package/src/lib/content/types.ts +78 -13
- package/src/lib/content/validate.ts +26 -1
- package/src/lib/delivery/public-routes.ts +52 -3
- package/src/lib/delivery/seo-fields.ts +6 -1
- package/src/lib/delivery/seo.ts +5 -0
- package/src/lib/doctor/checks-local.ts +22 -0
- package/src/lib/doctor/index.ts +21 -3
- package/src/lib/doctor/types.ts +3 -0
- package/src/lib/doctor/wrangler-config.ts +23 -0
- package/src/lib/env.ts +28 -0
- package/src/lib/index.ts +2 -0
- package/src/lib/log/events.ts +8 -1
- package/src/lib/media/config.ts +103 -0
- package/src/lib/media/delivery-bucket.ts +41 -0
- package/src/lib/media/index.ts +22 -0
- package/src/lib/media/library-entry.ts +58 -0
- package/src/lib/media/manifest.ts +122 -0
- package/src/lib/media/naming.ts +130 -0
- package/src/lib/media/reconcile.ts +79 -0
- package/src/lib/media/reference.ts +40 -0
- package/src/lib/media/sniff.ts +114 -0
- package/src/lib/media/store.ts +57 -0
- package/src/lib/media/transform-url.ts +58 -0
- package/src/lib/media/usage.ts +152 -0
- package/src/lib/render/pipeline.ts +17 -3
- package/src/lib/render/registry.ts +5 -0
- package/src/lib/render/remark-figure.ts +132 -0
- package/src/lib/render/resolve-media.ts +96 -0
- package/src/lib/render/sanitize-schema.ts +5 -3
- package/src/lib/sveltekit/admin-dispatch.ts +6 -1
- package/src/lib/sveltekit/cairn-admin.ts +13 -3
- package/src/lib/sveltekit/content-routes.ts +573 -12
- package/src/lib/sveltekit/csrf.ts +18 -0
- package/src/lib/sveltekit/guard.ts +12 -3
- package/src/lib/sveltekit/index.ts +6 -0
- package/src/lib/sveltekit/media-route.ts +158 -0
- package/src/lib/vite/index.ts +9 -2
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
// cairn-cms: media naming. Media is content-addressed: the sha256 of the bytes names the object, so
|
|
2
|
+
// the same bytes always land at the same key no matter the original filename. This module owns the
|
|
3
|
+
// hash, the ingest slug transform, the R2 object key, and the public delivery path. The slug grammar
|
|
4
|
+
// here matches the one parseMediaToken validates in ./reference.ts, so an ingested filename round
|
|
5
|
+
// trips through the media: token unchanged.
|
|
6
|
+
|
|
7
|
+
// slugifyFilename output always satisfies parseMediaToken's grammar (lowercase alphanumerics joined
|
|
8
|
+
// by single internal hyphens, no leading or trailing hyphen), or is the literal `file`.
|
|
9
|
+
|
|
10
|
+
/** Combining marks (Unicode block U+0300 to U+036F), left over after an NFD decompose, stripped to
|
|
11
|
+
* fold an accented letter down to its ASCII base. Written as escapes because the literal marks are
|
|
12
|
+
* invisible in source. */
|
|
13
|
+
const COMBINING_MARKS = /[\u0300-\u036f]/g;
|
|
14
|
+
|
|
15
|
+
/** Windows reserved device names. A bare match (case-insensitive) cannot survive as the slug, since
|
|
16
|
+
* it names a device rather than a file on that platform. */
|
|
17
|
+
const RESERVED = new Set([
|
|
18
|
+
'con',
|
|
19
|
+
'prn',
|
|
20
|
+
'aux',
|
|
21
|
+
'nul',
|
|
22
|
+
'com1',
|
|
23
|
+
'com2',
|
|
24
|
+
'com3',
|
|
25
|
+
'com4',
|
|
26
|
+
'com5',
|
|
27
|
+
'com6',
|
|
28
|
+
'com7',
|
|
29
|
+
'com8',
|
|
30
|
+
'com9',
|
|
31
|
+
'lpt1',
|
|
32
|
+
'lpt2',
|
|
33
|
+
'lpt3',
|
|
34
|
+
'lpt4',
|
|
35
|
+
'lpt5',
|
|
36
|
+
'lpt6',
|
|
37
|
+
'lpt7',
|
|
38
|
+
'lpt8',
|
|
39
|
+
'lpt9',
|
|
40
|
+
]);
|
|
41
|
+
|
|
42
|
+
/** The maximum slug length, applied before the reserved-name and empty fallbacks. */
|
|
43
|
+
const MAX_SLUG = 80;
|
|
44
|
+
|
|
45
|
+
/** A 16-character lowercase hex content-hash prefix, the bare-hash reference form. A slug that
|
|
46
|
+
* matches this shape would collide with `media:<hash>`, so slugifyFilename screens it. */
|
|
47
|
+
const HASH_RE = /^[0-9a-f]{16}$/;
|
|
48
|
+
|
|
49
|
+
/** A short alphanumeric extension (no dot), the only shape r2Key accepts, for example `webp`. */
|
|
50
|
+
const R2_EXT_RE = /^[a-z0-9]{1,5}$/;
|
|
51
|
+
|
|
52
|
+
// A Uint8Array's generic buffer type no longer satisfies Web Crypto's BufferSource under strict lib
|
|
53
|
+
// types, since the backing buffer may be a SharedArrayBuffer; slice the bytes into a plain
|
|
54
|
+
// ArrayBuffer to hand digest. Mirrors the buf helper in ../github/signing.ts.
|
|
55
|
+
function asArrayBuffer(bytes: Uint8Array): ArrayBuffer {
|
|
56
|
+
return bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength) as ArrayBuffer;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** The full lowercase hex sha256 of the bytes, via Web Crypto, hand-formatted to 64 hex chars. */
|
|
60
|
+
export async function hashBytes(bytes: Uint8Array): Promise<string> {
|
|
61
|
+
const digest = await crypto.subtle.digest('SHA-256', asArrayBuffer(bytes));
|
|
62
|
+
return Array.from(new Uint8Array(digest))
|
|
63
|
+
.map((b) => b.toString(16).padStart(2, '0'))
|
|
64
|
+
.join('');
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/** The first 16 characters of a full hex digest, the content-hash prefix media references commit to. */
|
|
68
|
+
export function shortHash(full: string): string {
|
|
69
|
+
return full.slice(0, 16);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/** The strict ingest transform from a raw filename to a slug that satisfies the media: slug grammar,
|
|
73
|
+
* or the literal `file`. Drops the extension, lowercases, transliterates accents, collapses non-alphanumeric runs
|
|
74
|
+
* to a single hyphen, trims, caps at 80 chars, screens Windows reserved names, and falls back to
|
|
75
|
+
* `file` when nothing usable is left. */
|
|
76
|
+
export function slugifyFilename(name: string): string {
|
|
77
|
+
const dot = name.lastIndexOf('.');
|
|
78
|
+
const stem = dot === -1 ? name : name.slice(0, dot);
|
|
79
|
+
|
|
80
|
+
let slug = stem
|
|
81
|
+
.toLowerCase()
|
|
82
|
+
.normalize('NFD')
|
|
83
|
+
.replace(COMBINING_MARKS, '')
|
|
84
|
+
.replace(/[^a-z0-9]+/g, '-')
|
|
85
|
+
.replace(/^-+|-+$/g, '');
|
|
86
|
+
|
|
87
|
+
if (slug.length > MAX_SLUG) {
|
|
88
|
+
slug = slug.slice(0, MAX_SLUG).replace(/-+$/, '');
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (RESERVED.has(slug)) return `${slug}-file`;
|
|
92
|
+
if (slug === '') return 'file';
|
|
93
|
+
// A slug shaped like a bare 16-hex hash would collide with the `media:<hash>` reference form, so
|
|
94
|
+
// append -img (mirroring the reserved-name -file fallback) to keep the slug and bare-hash forms
|
|
95
|
+
// disjoint.
|
|
96
|
+
if (HASH_RE.test(slug)) return `${slug}-img`;
|
|
97
|
+
return slug;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/** The content-addressed R2 object key `media/<aa>/<shortHash>.<ext>`, fanned out on the first two
|
|
101
|
+
* hex chars of the short hash. No leading slash: this is an object key, not a URL. `ext` is bare
|
|
102
|
+
* (no dot), for example `webp`. */
|
|
103
|
+
export function r2Key(shortHash: string, ext: string): string {
|
|
104
|
+
if (!HASH_RE.test(shortHash)) {
|
|
105
|
+
throw new Error(`r2Key: hash must be 16 lowercase hex chars, got "${shortHash}"`);
|
|
106
|
+
}
|
|
107
|
+
if (!R2_EXT_RE.test(ext)) {
|
|
108
|
+
throw new Error(`r2Key: ext must be 1 to 5 lowercase alphanumerics, got "${ext}"`);
|
|
109
|
+
}
|
|
110
|
+
return `media/${shortHash.slice(0, 2)}/${shortHash}.${ext}`;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/** The public delivery URL path, with a leading slash, under the delivery base (`publicBase`,
|
|
114
|
+
* default `/media`). The `slug` form is human-readable (`<base>/<slug>.<shortHash>.<ext>`, or
|
|
115
|
+
* `<base>/<shortHash>.<ext>` when the slug is null); the `opaque` form mirrors the R2 fan-out
|
|
116
|
+
* (`<base>/<aa>/<shortHash>.<ext>`) and ignores the slug. */
|
|
117
|
+
export function publicPath(
|
|
118
|
+
slug: string | null,
|
|
119
|
+
shortHash: string,
|
|
120
|
+
ext: string,
|
|
121
|
+
urlForm: 'slug' | 'opaque',
|
|
122
|
+
publicBase = '/media',
|
|
123
|
+
): string {
|
|
124
|
+
if (urlForm === 'opaque') {
|
|
125
|
+
return `${publicBase}/${shortHash.slice(0, 2)}/${shortHash}.${ext}`;
|
|
126
|
+
}
|
|
127
|
+
return slug === null
|
|
128
|
+
? `${publicBase}/${shortHash}.${ext}`
|
|
129
|
+
: `${publicBase}/${slug}.${shortHash}.${ext}`;
|
|
130
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
// cairn-cms: the media reconcile read. Storage is put-first and idempotent, so an upload whose entry
|
|
2
|
+
// is never saved leaves R2 bytes with no manifest row (an orphan), and a manifest row whose bytes
|
|
3
|
+
// were never stored (or were collected) points at nothing. This module reads both directions: the
|
|
4
|
+
// stored R2 keys under the media/ prefix versus the manifest's content-hash keys. It only reads and
|
|
5
|
+
// reports; no path here deletes (destructive collection is deferred to Phase 4). The module is
|
|
6
|
+
// engine-internal and on no public subpath, so the narrow bucket seam below is a local interface, not
|
|
7
|
+
// a re-export of any @cloudflare/workers-types name.
|
|
8
|
+
import type { MediaManifest } from './manifest.js';
|
|
9
|
+
import { log } from '../log/index.js';
|
|
10
|
+
|
|
11
|
+
/** A stored media object key parses to its short hash via `media/<aa>/<shortHash>.<ext>`. */
|
|
12
|
+
const MEDIA_KEY_RE = /^media\/[0-9a-f]{2}\/([0-9a-f]{16})\.[a-z0-9]{1,5}$/;
|
|
13
|
+
|
|
14
|
+
/** What a reconcile read found in either direction. `orphanedObjects` are stored R2 keys whose hash
|
|
15
|
+
* has no manifest row; `missingObjects` are manifest hashes with no stored object. */
|
|
16
|
+
export interface ReconcileResult {
|
|
17
|
+
/** Stored keys (full R2 keys) whose content hash is absent from the manifest. */
|
|
18
|
+
orphanedObjects: string[];
|
|
19
|
+
/** Manifest content-hash keys with no matching stored object. */
|
|
20
|
+
missingObjects: string[];
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/** The pure core: compare the stored R2 keys against the manifest's content-hash keys and report
|
|
24
|
+
* both orphan directions. A stored key that does not match the media-key grammar is ignored, since
|
|
25
|
+
* it is not a content-addressed media object this reconcile owns. */
|
|
26
|
+
export function reconcileMedia(storedKeys: string[], manifest: MediaManifest): ReconcileResult {
|
|
27
|
+
const manifestHashes = new Set(Object.keys(manifest));
|
|
28
|
+
const storedHashes = new Set<string>();
|
|
29
|
+
const orphanedObjects: string[] = [];
|
|
30
|
+
for (const key of storedKeys) {
|
|
31
|
+
const hash = MEDIA_KEY_RE.exec(key)?.[1];
|
|
32
|
+
if (hash === undefined) continue;
|
|
33
|
+
storedHashes.add(hash);
|
|
34
|
+
if (!manifestHashes.has(hash)) orphanedObjects.push(key);
|
|
35
|
+
}
|
|
36
|
+
const missingObjects: string[] = [];
|
|
37
|
+
for (const hash of manifestHashes) {
|
|
38
|
+
if (!storedHashes.has(hash)) missingObjects.push(hash);
|
|
39
|
+
}
|
|
40
|
+
return { orphanedObjects, missingObjects };
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/** One page of an R2 list, the narrow subset the reconcile read consumes. */
|
|
44
|
+
interface ReconcileListPage {
|
|
45
|
+
objects: { key: string }[];
|
|
46
|
+
truncated: boolean;
|
|
47
|
+
cursor?: string;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/** The R2 bucket surface the reconcile read needs: a single prefixed, paginated list. A local
|
|
51
|
+
* structural interface so no @cloudflare/workers-types name is imported (the module is internal and
|
|
52
|
+
* on no public subpath, but the narrow seam keeps the build self-contained either way). */
|
|
53
|
+
export interface ReconcileBucket {
|
|
54
|
+
list(opts?: { prefix?: string; cursor?: string }): Promise<ReconcileListPage>;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/** The glue runner: list every stored key under the media/ prefix (paginating through R2's
|
|
58
|
+
* cursor/truncated), reconcile against the manifest, log the count summary, and return the result.
|
|
59
|
+
* The log record carries counts only, never bytes or a key list; the keys are content hashes and so
|
|
60
|
+
* carry no PII, but the count summary is all an operator needs to size the orphan state. */
|
|
61
|
+
export async function runReconcile(
|
|
62
|
+
bucket: ReconcileBucket,
|
|
63
|
+
manifest: MediaManifest,
|
|
64
|
+
): Promise<ReconcileResult> {
|
|
65
|
+
const storedKeys: string[] = [];
|
|
66
|
+
let cursor: string | undefined;
|
|
67
|
+
do {
|
|
68
|
+
const page = await bucket.list({ prefix: 'media/', cursor });
|
|
69
|
+
for (const object of page.objects) storedKeys.push(object.key);
|
|
70
|
+
cursor = page.truncated ? page.cursor : undefined;
|
|
71
|
+
} while (cursor !== undefined);
|
|
72
|
+
|
|
73
|
+
const result = reconcileMedia(storedKeys, manifest);
|
|
74
|
+
log.info('media.orphan_reconcile', {
|
|
75
|
+
orphaned: result.orphanedObjects.length,
|
|
76
|
+
missing: result.missingObjects.length,
|
|
77
|
+
});
|
|
78
|
+
return result;
|
|
79
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
// cairn-cms: the media: reference token. A media reference is the logical handle that content
|
|
2
|
+
// commits to git, keyed to a content-hash prefix so the same bytes resolve no matter where they
|
|
3
|
+
// are stored or what they are named. The canonical form is `media:<slug>.<hash>`: the hash is a
|
|
4
|
+
// 16-character lowercase hex content-hash prefix that identifies the bytes, and the slug is a
|
|
5
|
+
// cosmetic display name. The bare `media:<hash>` form (no slug) is also valid. This module owns
|
|
6
|
+
// the grammar; it mirrors the cairn: link codec in ../content/links.ts.
|
|
7
|
+
|
|
8
|
+
/** A resolved reference to a media asset by its content-hash prefix, with an optional display slug. */
|
|
9
|
+
export interface MediaRef {
|
|
10
|
+
slug: string | null;
|
|
11
|
+
hash: string;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/** A 16-character lowercase hex content-hash prefix. */
|
|
15
|
+
const HASH_RE = /^[0-9a-f]{16}$/;
|
|
16
|
+
|
|
17
|
+
/** The slug grammar from the Task 2 slugify transform: lowercase alphanumerics joined by single
|
|
18
|
+
* internal hyphens, with no leading or trailing hyphen and no dot (the dot is the slug/hash
|
|
19
|
+
* separator). */
|
|
20
|
+
const SLUG_RE = /^[a-z0-9]+(?:-[a-z0-9]+)*$/;
|
|
21
|
+
|
|
22
|
+
/** Parse a `media:<slug>.<hash>` href (or the bare `media:<hash>` form), or null for any other
|
|
23
|
+
* href or a malformed token. Splits on the last dot, so a slug that illegally contains a dot fails
|
|
24
|
+
* the slug grammar and returns null. */
|
|
25
|
+
export function parseMediaToken(href: string): MediaRef | null {
|
|
26
|
+
if (!href.startsWith('media:')) return null;
|
|
27
|
+
const rest = href.slice('media:'.length);
|
|
28
|
+
const dot = rest.lastIndexOf('.');
|
|
29
|
+
if (dot === -1) return HASH_RE.test(rest) ? { slug: null, hash: rest } : null;
|
|
30
|
+
const slug = rest.slice(0, dot);
|
|
31
|
+
const hash = rest.slice(dot + 1);
|
|
32
|
+
if (!HASH_RE.test(hash) || !SLUG_RE.test(slug)) return null;
|
|
33
|
+
return { slug, hash };
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/** Write the canonical media: token for a ref. The inverse of parseMediaToken, so a parse then
|
|
37
|
+
* write round trip is stable: `media:<slug>.<hash>` when the slug is present, else `media:<hash>`. */
|
|
38
|
+
export function mediaToken(ref: MediaRef): string {
|
|
39
|
+
return ref.slug === null ? `media:${ref.hash}` : `media:${ref.slug}.${ref.hash}`;
|
|
40
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
// cairn-cms: content-type sniffing and the engine-level upload deny-list. The upload action trusts no
|
|
2
|
+
// client-declared type: it sniffs the real type from the leading bytes and screens the payload against a
|
|
3
|
+
// deny-list a site cannot override. Both functions are pure and Worker-clean (a plain Uint8Array, no
|
|
4
|
+
// Node Buffer and no stream), so they run unchanged on Cloudflare Workers and under vitest.
|
|
5
|
+
//
|
|
6
|
+
// The sniff is necessary but not sufficient. A polyglot can carry a valid image magic and an HTML tail,
|
|
7
|
+
// and this byte check sees only the magic. The delivery route's response headers (X-Content-Type-Options:
|
|
8
|
+
// nosniff, Content-Disposition: inline, a restrictive Content-Security-Policy) are the real XSS control
|
|
9
|
+
// for the served bytes; sniffing here is the ingest gate, not the served-bytes defense.
|
|
10
|
+
|
|
11
|
+
/** The leading ASCII whitespace bytes skipped before the deny-list's first-byte-is-`<` check:
|
|
12
|
+
* tab (0x09), newline (0x0A), carriage return (0x0D), and space (0x20). */
|
|
13
|
+
const WHITESPACE = new Set([0x09, 0x0a, 0x0d, 0x20]);
|
|
14
|
+
|
|
15
|
+
/** The single byte `<` (0x3C). A payload whose first non-whitespace byte is `<` is markup (SVG, HTML,
|
|
16
|
+
* XML) and is denied regardless of its declared type or any site `allowedTypes`. */
|
|
17
|
+
const LT = 0x3c;
|
|
18
|
+
|
|
19
|
+
/** Declared content types denied at the engine level, independent of any site `allowedTypes`. SVG and
|
|
20
|
+
* the markup types carry active content (script, foreignObject), so they never ingest as media. */
|
|
21
|
+
const DENIED_TYPES = new Set(['image/svg+xml', 'image/svg', 'text/html', 'application/xml']);
|
|
22
|
+
|
|
23
|
+
/** The ISO-BMFF major-brand codes (at bytes 8..11 of an `ftyp` box) that mean an AVIF image. */
|
|
24
|
+
const AVIF_BRANDS = new Set(['avif', 'avis']);
|
|
25
|
+
|
|
26
|
+
/** The ISO-BMFF major-brand codes that mean a HEIF/HEIC image. */
|
|
27
|
+
const HEIC_BRANDS = new Set(['heic', 'heix', 'heif', 'hevc', 'hevx', 'mif1', 'msf1']);
|
|
28
|
+
|
|
29
|
+
/** True when every byte of `magic` matches `bytes` starting at `offset`. False if `bytes` is too
|
|
30
|
+
* short to hold the whole magic. */
|
|
31
|
+
function matches(bytes: Uint8Array, offset: number, magic: number[]): boolean {
|
|
32
|
+
if (bytes.length < offset + magic.length) return false;
|
|
33
|
+
for (let i = 0; i < magic.length; i++) {
|
|
34
|
+
if (bytes[offset + i] !== magic[i]) return false;
|
|
35
|
+
}
|
|
36
|
+
return true;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** The four ASCII characters at bytes `offset..offset+3`, or null when the input is too short. Used to
|
|
40
|
+
* read an ISO-BMFF brand code as a string. */
|
|
41
|
+
function ascii4(bytes: Uint8Array, offset: number): string | null {
|
|
42
|
+
if (bytes.length < offset + 4) return null;
|
|
43
|
+
return String.fromCharCode(bytes[offset], bytes[offset + 1], bytes[offset + 2], bytes[offset + 3]);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Detect the MIME type of an image from its leading magic bytes. Reads only the first ~32 bytes and
|
|
48
|
+
* returns the recognized type, or null for an unrecognized magic or an input too short for a given
|
|
49
|
+
* check. This is the server's source of truth for an upload's type; the client-declared type is
|
|
50
|
+
* advisory. Recognizes JPEG, PNG, GIF, WebP, and the AVIF/HEIC ISO-BMFF brands.
|
|
51
|
+
*/
|
|
52
|
+
export function sniffMediaType(bytes: Uint8Array): string | null {
|
|
53
|
+
// JPEG: starts FF D8 FF.
|
|
54
|
+
if (matches(bytes, 0, [0xff, 0xd8, 0xff])) return 'image/jpeg';
|
|
55
|
+
|
|
56
|
+
// PNG: the 8-byte signature 89 50 4E 47 0D 0A 1A 0A; the leading 89 50 4E 47 ('.PNG') is enough.
|
|
57
|
+
if (matches(bytes, 0, [0x89, 0x50, 0x4e, 0x47])) return 'image/png';
|
|
58
|
+
|
|
59
|
+
// GIF: 'GIF8' (the 87a and 89a versions share this prefix).
|
|
60
|
+
if (matches(bytes, 0, [0x47, 0x49, 0x46, 0x38])) return 'image/gif';
|
|
61
|
+
|
|
62
|
+
// WebP: a RIFF container ('RIFF' at 0..3) whose form type is 'WEBP' at 8..11.
|
|
63
|
+
if (matches(bytes, 0, [0x52, 0x49, 0x46, 0x46]) && matches(bytes, 8, [0x57, 0x45, 0x42, 0x50])) {
|
|
64
|
+
return 'image/webp';
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// AVIF and HEIC are ISO base media format files: an 'ftyp' box tag at bytes 4..7, then the 4-byte
|
|
68
|
+
// major brand at bytes 8..11. A truncated box (no brand bytes) or an unknown brand returns null.
|
|
69
|
+
if (matches(bytes, 4, [0x66, 0x74, 0x79, 0x70])) {
|
|
70
|
+
const brand = ascii4(bytes, 8);
|
|
71
|
+
if (brand !== null) {
|
|
72
|
+
if (AVIF_BRANDS.has(brand)) return 'image/avif';
|
|
73
|
+
if (HEIC_BRANDS.has(brand)) return 'image/heic';
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/** The bare file extension (no dot) for each sniffed media type the upload path stores. The ext is
|
|
81
|
+
* derived from the server-sniffed type, never the client filename, so the stored key and the
|
|
82
|
+
* delivery extension allow-list always agree. An unmappable type returns null (the upload 415s). */
|
|
83
|
+
const EXT_BY_TYPE: Record<string, string> = {
|
|
84
|
+
'image/jpeg': 'jpg',
|
|
85
|
+
'image/png': 'png',
|
|
86
|
+
'image/gif': 'gif',
|
|
87
|
+
'image/webp': 'webp',
|
|
88
|
+
'image/avif': 'avif',
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
/** The storage extension for a sniffed media type, or null for a type the upload path does not store
|
|
92
|
+
* (HEIC, an unknown type). Driven by the sniffed type, so the key's ext is server-owned. */
|
|
93
|
+
export function extForMediaType(type: string): string | null {
|
|
94
|
+
return EXT_BY_TYPE[type] ?? null;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* The engine-level upload deny predicate. Returns true (reject) when the upload is markup a site can
|
|
99
|
+
* never override: a declared type of image/svg+xml, image/svg, text/html, or application/xml, OR a
|
|
100
|
+
* payload whose first non-whitespace byte is `<` (an 0x3C after skipping leading ASCII whitespace).
|
|
101
|
+
* This runs ahead of and independent of any site `allowedTypes`, since SVG and HTML carry active
|
|
102
|
+
* content. The byte check catches a markup payload sent under a permitted declared type.
|
|
103
|
+
*/
|
|
104
|
+
export function isDeniedUpload(bytes: Uint8Array, declaredType?: string): boolean {
|
|
105
|
+
if (declaredType !== undefined && DENIED_TYPES.has(declaredType.toLowerCase())) return true;
|
|
106
|
+
|
|
107
|
+
for (let i = 0; i < bytes.length; i++) {
|
|
108
|
+
if (WHITESPACE.has(bytes[i])) continue;
|
|
109
|
+
return bytes[i] === LT;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// An empty or all-whitespace payload has no opening byte to deny here; the type and size gates own it.
|
|
113
|
+
return false;
|
|
114
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
// cairn-cms: the media object store, a thin wrapper over the per-site R2 bucket binding. The media
|
|
2
|
+
// pipeline codes against the narrow MediaStore seam rather than the full R2Bucket API, so the four
|
|
3
|
+
// operations it needs (store, probe, read, remove a content-addressed object) are typed and testable
|
|
4
|
+
// against an in-memory double, and a later storage-backend swap touches this one factory. The bytes
|
|
5
|
+
// are content-addressed, so a put under an existing key is a harmless rewrite of identical bytes.
|
|
6
|
+
import type {
|
|
7
|
+
R2Bucket,
|
|
8
|
+
R2Conditional,
|
|
9
|
+
R2HTTPMetadata,
|
|
10
|
+
R2Object,
|
|
11
|
+
R2ObjectBody,
|
|
12
|
+
R2Range,
|
|
13
|
+
} from '@cloudflare/workers-types';
|
|
14
|
+
|
|
15
|
+
/** The narrow R2 surface the media pipeline uses. The engine depends on this, not on R2Bucket, so the
|
|
16
|
+
* multipart, list, and conditional-read surface R2 also carries never leaks into the media code. */
|
|
17
|
+
export interface MediaStore {
|
|
18
|
+
/** Store bytes under a content-addressed key, with the response HTTP metadata (the content type)
|
|
19
|
+
* and optional custom metadata (the upload stores the full sha256 here, so a short-hash collision
|
|
20
|
+
* is detectable on a later dedup probe). */
|
|
21
|
+
put(
|
|
22
|
+
key: string,
|
|
23
|
+
bytes: ArrayBuffer | Uint8Array,
|
|
24
|
+
httpMetadata?: R2HTTPMetadata,
|
|
25
|
+
customMetadata?: Record<string, string>,
|
|
26
|
+
): Promise<void>;
|
|
27
|
+
/** The object's metadata, or null when no object lives at the key (the dedup probe). */
|
|
28
|
+
head(key: string): Promise<R2Object | null>;
|
|
29
|
+
/** The object body for streaming to a delivery response, or null when the key is absent. The
|
|
30
|
+
* delivery route passes `onlyIf` and `range` through for conditional and partial reads: an
|
|
31
|
+
* `onlyIf` etag match returns a body-less R2Object (the 304 shape), so the return widens to
|
|
32
|
+
* `R2Object` alongside `R2ObjectBody`. */
|
|
33
|
+
get(
|
|
34
|
+
key: string,
|
|
35
|
+
opts?: { range?: R2Range; onlyIf?: R2Conditional },
|
|
36
|
+
): Promise<R2ObjectBody | R2Object | null>;
|
|
37
|
+
/** Remove the object at the key. A delete of an absent key is a no-op, the R2 contract. */
|
|
38
|
+
delete(key: string): Promise<void>;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/** Wrap an R2 bucket binding as a MediaStore. Each method delegates to the binding; put folds the
|
|
42
|
+
* HTTP and custom metadata into R2's options shape and drops the returned R2Object the pipeline does
|
|
43
|
+
* not read. */
|
|
44
|
+
export function r2Store(bucket: R2Bucket): MediaStore {
|
|
45
|
+
return {
|
|
46
|
+
async put(key, bytes, httpMetadata, customMetadata) {
|
|
47
|
+
const options =
|
|
48
|
+
httpMetadata || customMetadata
|
|
49
|
+
? { ...(httpMetadata ? { httpMetadata } : {}), ...(customMetadata ? { customMetadata } : {}) }
|
|
50
|
+
: undefined;
|
|
51
|
+
await bucket.put(key, bytes, options);
|
|
52
|
+
},
|
|
53
|
+
head: (key) => bucket.head(key),
|
|
54
|
+
get: (key, opts) => bucket.get(key, opts),
|
|
55
|
+
delete: (key) => bucket.delete(key),
|
|
56
|
+
};
|
|
57
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
// cairn-cms: the Cloudflare Images transform URL. A delivery path names the original bytes; an
|
|
2
|
+
// on-demand variant is that path prefixed with `/cdn-cgi/image/<options>/`, where the options are a
|
|
3
|
+
// comma-joined list of resize and format directives Cloudflare reads at the edge. This module owns
|
|
4
|
+
// the option encoding and the stable option order, so the same spec always builds the same URL and
|
|
5
|
+
// a CDN cache keys on it cleanly. The delivery path is appended unaltered, since it already carries
|
|
6
|
+
// its own leading slash.
|
|
7
|
+
|
|
8
|
+
/** A single image variant: the resize and format directives Cloudflare Images applies to the
|
|
9
|
+
* original bytes. Every field is optional. width, height, quality, and fit are emitted only when
|
|
10
|
+
* set; format and gravity always appear, defaulting to auto. */
|
|
11
|
+
export interface VariantSpec {
|
|
12
|
+
/** Target width in pixels. */
|
|
13
|
+
width?: number;
|
|
14
|
+
/** Target height in pixels. */
|
|
15
|
+
height?: number;
|
|
16
|
+
/** Output quality, 1 to 100. */
|
|
17
|
+
quality?: number;
|
|
18
|
+
/** How the image fits the target box. */
|
|
19
|
+
fit?: 'scale-down' | 'contain' | 'cover' | 'crop' | 'pad';
|
|
20
|
+
/** Crop focus, `auto` or `face` or a coordinate string. */
|
|
21
|
+
gravity?: 'auto' | 'face' | string;
|
|
22
|
+
/** Output format, `auto` to let Cloudflare negotiate, or a forced codec. */
|
|
23
|
+
format?: 'auto' | 'webp' | 'avif' | string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/** Build the on-demand Cloudflare Images transform URL for a delivery path. The options are
|
|
27
|
+
* comma-joined in the stable order width, height, quality, fit, format, gravity, with width through
|
|
28
|
+
* fit emitted only when the spec sets them and format and gravity always present (defaulting to
|
|
29
|
+
* auto). The publicPath is appended unaltered, so the result is `/cdn-cgi/image/<options><publicPath>`. */
|
|
30
|
+
export function variantUrl(publicPath: string, spec: VariantSpec): string {
|
|
31
|
+
const options: string[] = [];
|
|
32
|
+
if (spec.width !== undefined) options.push(`width=${spec.width}`);
|
|
33
|
+
if (spec.height !== undefined) options.push(`height=${spec.height}`);
|
|
34
|
+
if (spec.quality !== undefined) options.push(`quality=${spec.quality}`);
|
|
35
|
+
if (spec.fit !== undefined) options.push(`fit=${spec.fit}`);
|
|
36
|
+
options.push(`format=${spec.format ?? 'auto'}`);
|
|
37
|
+
options.push(`gravity=${spec.gravity ?? 'auto'}`);
|
|
38
|
+
// The source must be its own path segment after the options, so it needs a leading slash;
|
|
39
|
+
// Cloudflare reads a slashless join as a malformed options list. publicPath carries one, but this
|
|
40
|
+
// guards a caller that passes a relative path from fusing the options and the source.
|
|
41
|
+
const source = publicPath.startsWith('/') ? publicPath : `/${publicPath}`;
|
|
42
|
+
return `/cdn-cgi/image/${options.join(',')}${source}`;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** Build a variant URL from a named preset. Looks up presetName in variants and builds its spec with
|
|
46
|
+
* variantUrl. Throws a cairn:-prefixed error naming the unknown preset when the name is absent, so a
|
|
47
|
+
* typo in a preset name fails loudly rather than silently rendering an unsized image. */
|
|
48
|
+
export function presetUrl(
|
|
49
|
+
publicPath: string,
|
|
50
|
+
presetName: string,
|
|
51
|
+
variants: Record<string, VariantSpec>,
|
|
52
|
+
): string {
|
|
53
|
+
const spec = variants[presetName];
|
|
54
|
+
if (spec === undefined) {
|
|
55
|
+
throw new Error(`cairn: unknown image variant preset "${presetName}"`);
|
|
56
|
+
}
|
|
57
|
+
return variantUrl(publicPath, spec);
|
|
58
|
+
}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
// cairn-cms: the cross-branch media usage index, the where-used core of the admin Media Library.
|
|
2
|
+
// It answers "which entries reference this asset" for every committed asset, keyed by the content
|
|
3
|
+
// hash (the immutable truth, so a renamed slug never splits a row). The map unions two sources:
|
|
4
|
+
// the published corpus on main and every open cairn/* edit branch, so an asset that is referenced
|
|
5
|
+
// only in an unpublished draft still shows as in use and is not mistaken for an orphan.
|
|
6
|
+
//
|
|
7
|
+
// The main arm reads the content manifest's per-entry mediaRefs (the field manifestEntryFromFile
|
|
8
|
+
// records) and builds the reverse map; it never crawls the files, since the manifest already
|
|
9
|
+
// carries the refs. The branch arm cannot use a manifest (the content manifest is never committed
|
|
10
|
+
// to a branch), so it reconstructs each edited entry's path from the branch name, reads that one
|
|
11
|
+
// file, and runs the extractor directly.
|
|
12
|
+
//
|
|
13
|
+
// A site's published use and an open-branch edit of the SAME entry are distinct origins (decision
|
|
14
|
+
// 4): both rows are kept, and the consumer groups by origin. Within one entry the extractor dedupes
|
|
15
|
+
// by hash, so an asset used twice in one entry is a single row.
|
|
16
|
+
//
|
|
17
|
+
// CAVEAT (carry it to the screen): a reference hidden inside a raw-HTML block (an <img> the markdown
|
|
18
|
+
// parser sees as opaque HTML, not an image node) is undetectable here. The Library's verdict wording
|
|
19
|
+
// is therefore "found in N entries" / "no references found", never a bare "unused": absence of a row
|
|
20
|
+
// means no reference was found, not a proof that none exists.
|
|
21
|
+
import type { ConceptDescriptor } from '../content/types.js';
|
|
22
|
+
import type { RepoRef } from '../github/types.js';
|
|
23
|
+
import type { Manifest } from '../content/manifest.js';
|
|
24
|
+
import { listBranches } from '../github/branches.js';
|
|
25
|
+
import { readRaw } from '../github/repo.js';
|
|
26
|
+
import { PENDING_PREFIX, parsePendingBranch } from '../content/pending.js';
|
|
27
|
+
import { findConcept } from '../content/concepts.js';
|
|
28
|
+
import { isValidId, filenameFromId } from '../content/ids.js';
|
|
29
|
+
import { parseMarkdown } from '../content/frontmatter.js';
|
|
30
|
+
import { extractMediaRefs } from '../content/media-refs.js';
|
|
31
|
+
|
|
32
|
+
/** Where a reference lives: the published corpus on main, or a named open edit branch. */
|
|
33
|
+
export type UsageOrigin = { kind: 'published' } | { kind: 'branch'; branch: string };
|
|
34
|
+
|
|
35
|
+
/** One entry that references an asset, in a shape the screen links and groups by. */
|
|
36
|
+
export interface UsageEntry {
|
|
37
|
+
/** The concept id, e.g. "posts". */
|
|
38
|
+
concept: string;
|
|
39
|
+
/** The entry id (its filename stem). */
|
|
40
|
+
id: string;
|
|
41
|
+
/** The entry title for display, from the manifest (published) or frontmatter (branch). */
|
|
42
|
+
title: string;
|
|
43
|
+
/** The public permalink, present for a published entry (carried from the manifest). */
|
|
44
|
+
permalink?: string;
|
|
45
|
+
/** Published vs the cairn/* branch the edit lives on. */
|
|
46
|
+
origin: UsageOrigin;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/** Content hash to the distinct entries that reference it. A hash with no row is "no references
|
|
50
|
+
* found" (see the raw-HTML caveat above), never a proven orphan. */
|
|
51
|
+
export type UsageIndex = Map<string, UsageEntry[]>;
|
|
52
|
+
|
|
53
|
+
/** Build options. `branches` lets a caller that already listed the open cairn/* branches pass them
|
|
54
|
+
* in so the index does not list them a second time (the load path lists once for the media-union).
|
|
55
|
+
* `strict` flips the per-branch read from degrade-and-skip to fail-closed: a delete gate must not
|
|
56
|
+
* treat a transient branch-read failure as an absent reference, so it rethrows instead. */
|
|
57
|
+
export interface BuildUsageOptions {
|
|
58
|
+
/** The open cairn/* branch names, already listed. When present the index skips its own listing. */
|
|
59
|
+
branches?: string[];
|
|
60
|
+
/** When true a branch read that throws rejects the whole build, so the caller can fail closed. */
|
|
61
|
+
strict?: boolean;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/** Append a row under its hash, creating the bucket on first use. */
|
|
65
|
+
function push(index: UsageIndex, hash: string, entry: UsageEntry): void {
|
|
66
|
+
const rows = index.get(hash);
|
|
67
|
+
if (rows) rows.push(entry);
|
|
68
|
+
else index.set(hash, [entry]);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Build the hash-keyed usage index over main (from the manifest's per-entry mediaRefs) plus every
|
|
73
|
+
* open cairn/* branch (parsed from its edited markdown).
|
|
74
|
+
*
|
|
75
|
+
* By default a single branch read that throws degrades that one branch and is skipped, the way the
|
|
76
|
+
* admin loaders degrade a failed read, rather than sinking the whole screen. That tolerance is right
|
|
77
|
+
* for the Library DISPLAY, but wrong for the delete gate: a transient branch-read failure would make
|
|
78
|
+
* a still-referenced asset look orphaned. Pass `strict: true` (the delete path) to rethrow a branch
|
|
79
|
+
* failure so the caller fails closed. Pass `branches` to reuse a branch list the caller already has
|
|
80
|
+
* (the load path lists once for the media-union) rather than listing them a second time.
|
|
81
|
+
*/
|
|
82
|
+
export async function buildUsageIndex(
|
|
83
|
+
repo: RepoRef,
|
|
84
|
+
token: string,
|
|
85
|
+
concepts: ConceptDescriptor[],
|
|
86
|
+
manifest: Manifest,
|
|
87
|
+
opts: BuildUsageOptions = {},
|
|
88
|
+
): Promise<UsageIndex> {
|
|
89
|
+
const index: UsageIndex = new Map();
|
|
90
|
+
|
|
91
|
+
// The main arm: the manifest already carries each entry's mediaRefs, so this is a pure reverse
|
|
92
|
+
// map with no per-file read.
|
|
93
|
+
for (const entry of manifest.entries) {
|
|
94
|
+
for (const hash of entry.mediaRefs ?? []) {
|
|
95
|
+
push(index, hash, {
|
|
96
|
+
concept: entry.concept,
|
|
97
|
+
id: entry.id,
|
|
98
|
+
title: entry.title,
|
|
99
|
+
permalink: entry.permalink,
|
|
100
|
+
origin: { kind: 'published' },
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// The branch arm: read each open cairn/* branch's one edited file. The path is derivable from the
|
|
106
|
+
// branch name, so no tree-listing is needed. The branch list is reused when the caller passes it.
|
|
107
|
+
const names = opts.branches ?? (await listBranches(repo, PENDING_PREFIX, token));
|
|
108
|
+
// Read the branches in parallel rather than one at a time, so the latency floor is one round trip
|
|
109
|
+
// instead of N. workerd self-throttles to 6 simultaneous outbound connections, so this batch and
|
|
110
|
+
// the load path's media-union batch each stay under the limit; do NOT merge the two into one
|
|
111
|
+
// wider Promise.all, since the combined fan-out would queue behind that throttle.
|
|
112
|
+
const perBranch = await Promise.all(
|
|
113
|
+
names.map(async (name): Promise<{ hash: string; entry: UsageEntry }[]> => {
|
|
114
|
+
// Resolve the branch name to a configured entry with the same guard the branch tooling uses: a
|
|
115
|
+
// malformed name, an id that fails the slug rule (entry paths are built from it, so this is the
|
|
116
|
+
// path confinement), or a concept this site does not configure is skipped, no read attempted.
|
|
117
|
+
const ref = parsePendingBranch(name);
|
|
118
|
+
if (!ref || !isValidId(ref.id)) return [];
|
|
119
|
+
const concept = findConcept(concepts, ref.concept);
|
|
120
|
+
if (!concept) return [];
|
|
121
|
+
|
|
122
|
+
const path = `${concept.dir}/${filenameFromId(ref.id)}`;
|
|
123
|
+
try {
|
|
124
|
+
const raw = await readRaw({ ...repo, branch: name }, path, token);
|
|
125
|
+
if (raw === null) return []; // The file is absent on the branch: nothing to extract.
|
|
126
|
+
const { frontmatter, body } = parseMarkdown(raw);
|
|
127
|
+
const fmTitle = frontmatter.title;
|
|
128
|
+
const title = typeof fmTitle === 'string' && fmTitle.trim() ? fmTitle : ref.id;
|
|
129
|
+
const rows: { hash: string; entry: UsageEntry }[] = [];
|
|
130
|
+
for (const hash of extractMediaRefs(frontmatter, body, concept.fields)) {
|
|
131
|
+
rows.push({
|
|
132
|
+
hash,
|
|
133
|
+
entry: { concept: concept.id, id: ref.id, title, origin: { kind: 'branch', branch: name } },
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
return rows;
|
|
137
|
+
} catch (err) {
|
|
138
|
+
// In strict mode a branch failure fails the whole build so the delete gate can fail closed;
|
|
139
|
+
// otherwise degrade this one branch rather than sinking the screen.
|
|
140
|
+
if (opts.strict) throw err;
|
|
141
|
+
return [];
|
|
142
|
+
}
|
|
143
|
+
}),
|
|
144
|
+
);
|
|
145
|
+
|
|
146
|
+
// Fold the per-branch rows back in, preserving the branch order so the index reads stably.
|
|
147
|
+
for (const rows of perBranch) {
|
|
148
|
+
for (const { hash, entry } of rows) push(index, hash, entry);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
return index;
|
|
152
|
+
}
|