@glw907/cairn-cms 0.58.0 → 0.60.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +84 -0
- package/dist/components/CairnAdmin.svelte +3 -0
- package/dist/components/CairnMediaLibrary.svelte +1101 -27
- package/dist/components/CairnMediaLibrary.svelte.d.ts +10 -2
- package/dist/components/CairnTidySettings.svelte +553 -0
- package/dist/components/CairnTidySettings.svelte.d.ts +32 -0
- package/dist/components/EditPage.svelte +371 -2
- package/dist/components/MarkdownEditor.svelte +168 -1
- package/dist/components/MarkdownEditor.svelte.d.ts +44 -0
- package/dist/components/TidyReview.svelte +463 -0
- package/dist/components/TidyReview.svelte.d.ts +47 -0
- package/dist/components/admin-icons.d.ts +1 -0
- package/dist/components/admin-icons.js +1 -0
- package/dist/components/cairn-admin.css +913 -2
- package/dist/components/editor-tidy.d.ts +31 -0
- package/dist/components/editor-tidy.js +199 -0
- package/dist/components/index.d.ts +1 -0
- package/dist/components/index.js +1 -0
- package/dist/components/markdown-directives.d.ts +16 -0
- package/dist/components/markdown-directives.js +34 -0
- package/dist/components/objective-errors.d.ts +30 -0
- package/dist/components/objective-errors.js +113 -0
- package/dist/components/spellcheck-assets/dictionary-en-us.txt +104743 -0
- package/dist/components/spellcheck-assets/spellchecker-wasm-LICENSE.txt +21 -0
- package/dist/components/spellcheck-assets/spellchecker-wasm.wasm +0 -0
- package/dist/components/spellcheck-worker.d.ts +80 -0
- package/dist/components/spellcheck-worker.js +161 -0
- package/dist/components/spellcheck.d.ts +146 -0
- package/dist/components/spellcheck.js +541 -0
- package/dist/components/tidy-categorize.d.ts +67 -0
- package/dist/components/tidy-categorize.js +392 -0
- package/dist/components/tidy-diff.d.ts +60 -0
- package/dist/components/tidy-diff.js +147 -0
- package/dist/components/tidy-validate.d.ts +37 -0
- package/dist/components/tidy-validate.js +174 -0
- package/dist/content/compose.d.ts +1 -1
- package/dist/content/compose.js +11 -0
- package/dist/content/site-dictionary.d.ts +31 -0
- package/dist/content/site-dictionary.js +82 -0
- package/dist/content/types.d.ts +25 -0
- package/dist/doctor/checks-local.d.ts +1 -0
- package/dist/doctor/checks-local.js +55 -6
- package/dist/doctor/index.js +2 -1
- package/dist/log/events.d.ts +1 -1
- package/dist/media/bulk-delete-plan.d.ts +24 -0
- package/dist/media/bulk-delete-plan.js +25 -0
- package/dist/media/orphan-scan.d.ts +37 -0
- package/dist/media/orphan-scan.js +42 -0
- package/dist/media/reconcile.d.ts +3 -0
- package/dist/media/reconcile.js +3 -2
- package/dist/nav/site-config.d.ts +98 -0
- package/dist/nav/site-config.js +132 -0
- package/dist/sveltekit/admin-dispatch.d.ts +2 -0
- package/dist/sveltekit/admin-dispatch.js +6 -2
- package/dist/sveltekit/cairn-admin.d.ts +16 -1
- package/dist/sveltekit/cairn-admin.js +28 -3
- package/dist/sveltekit/content-routes.d.ts +171 -4
- package/dist/sveltekit/content-routes.js +597 -3
- package/dist/sveltekit/index.d.ts +1 -1
- package/dist/sveltekit/tidy-prompt.d.ts +11 -0
- package/dist/sveltekit/tidy-prompt.js +118 -0
- package/package.json +10 -1
- package/src/lib/components/CairnAdmin.svelte +3 -0
- package/src/lib/components/CairnMediaLibrary.svelte +1101 -27
- package/src/lib/components/CairnTidySettings.svelte +553 -0
- package/src/lib/components/EditPage.svelte +371 -2
- package/src/lib/components/MarkdownEditor.svelte +168 -1
- package/src/lib/components/TidyReview.svelte +463 -0
- package/src/lib/components/admin-icons.ts +1 -0
- package/src/lib/components/cairn-admin.css +25 -0
- package/src/lib/components/editor-tidy.ts +241 -0
- package/src/lib/components/index.ts +1 -0
- package/src/lib/components/markdown-directives.ts +35 -0
- package/src/lib/components/objective-errors.ts +155 -0
- package/src/lib/components/spellcheck-assets/dictionary-en-us.txt +104743 -0
- package/src/lib/components/spellcheck-assets/spellchecker-wasm-LICENSE.txt +21 -0
- package/src/lib/components/spellcheck-assets/spellchecker-wasm.wasm +0 -0
- package/src/lib/components/spellcheck-worker.ts +279 -0
- package/src/lib/components/spellcheck.ts +679 -0
- package/src/lib/components/tidy-categorize.ts +460 -0
- package/src/lib/components/tidy-diff.ts +196 -0
- package/src/lib/components/tidy-validate.ts +202 -0
- package/src/lib/content/compose.ts +11 -1
- package/src/lib/content/site-dictionary.ts +84 -0
- package/src/lib/content/types.ts +25 -0
- package/src/lib/doctor/checks-local.ts +59 -5
- package/src/lib/doctor/index.ts +2 -0
- package/src/lib/log/events.ts +9 -1
- package/src/lib/media/bulk-delete-plan.ts +54 -0
- package/src/lib/media/orphan-scan.ts +74 -0
- package/src/lib/media/reconcile.ts +3 -2
- package/src/lib/nav/site-config.ts +197 -0
- package/src/lib/sveltekit/admin-dispatch.ts +7 -3
- package/src/lib/sveltekit/cairn-admin.ts +38 -4
- package/src/lib/sveltekit/content-routes.ts +795 -7
- package/src/lib/sveltekit/index.ts +1 -0
- package/src/lib/sveltekit/tidy-prompt.ts +153 -0
|
@@ -10,13 +10,22 @@ import { deriveExcerpt } from '../content/excerpt.js';
|
|
|
10
10
|
import { asString } from '../content/identity.js';
|
|
11
11
|
import { isValidId, slugify, filenameFromId, composeDatedId, slugFromId, renameId } from '../content/ids.js';
|
|
12
12
|
import { appCredentials } from '../github/credentials.js';
|
|
13
|
-
import { listMarkdown, readRaw, commitFiles } from '../github/repo.js';
|
|
13
|
+
import { listMarkdown, readRaw, commitFile, commitFiles } from '../github/repo.js';
|
|
14
14
|
import { branchHeadSha, createBranch, deleteBranch, listBranches } from '../github/branches.js';
|
|
15
15
|
import { PENDING_PREFIX, pendingBranch, parsePendingBranch } from '../content/pending.js';
|
|
16
16
|
import { cachedInstallationToken } from '../github/signing.js';
|
|
17
17
|
import { emptyManifest, manifestEntryFromFile, parseManifest, serializeManifest, upsertEntry, removeEntry, inboundLinks } from '../content/manifest.js';
|
|
18
18
|
import { isConflict } from '../github/types.js';
|
|
19
19
|
import { log } from '../log/index.js';
|
|
20
|
+
import { dictionaryFileForDialect, DEFAULT_TIDY_MODEL, resolveTidyConventions, parseSiteConfig, setTidy, validateTidyConventions, TidyConventionsError } from '../nav/site-config.js';
|
|
21
|
+
import { buildTidyPrompt } from './tidy-prompt.js';
|
|
22
|
+
// Server-only: the Anthropic SDK ships the API-key path and never reaches a browser bundle. It is
|
|
23
|
+
// imported only here (a Worker module no component imports statically), and the server-only-deps test
|
|
24
|
+
// guards that boundary. The default export is the Anthropic client class; the structural TidyClient
|
|
25
|
+
// type below keeps the action's surface small and the test seam injectable, so the SDK's deep types
|
|
26
|
+
// never leak into a public signature.
|
|
27
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
28
|
+
import { parseDictionary, mergeDictionaryWords, serializeDictionary, isValidDictionaryWord } from '../content/site-dictionary.js';
|
|
20
29
|
import { issueCsrfToken, validateCsrfHeader } from './csrf.js';
|
|
21
30
|
import { requireSession } from './guard.js';
|
|
22
31
|
import { sniffMediaType, isDeniedUpload, extForMediaType } from '../media/sniff.js';
|
|
@@ -26,8 +35,37 @@ import { r2Store } from '../media/store.js';
|
|
|
26
35
|
import { parseMediaEntries, parseMediaManifest, upsertMediaEntry, removeMediaEntry, serializeMediaManifest } from '../media/manifest.js';
|
|
27
36
|
import { mediaLibraryEntry } from '../media/library-entry.js';
|
|
28
37
|
import { buildUsageIndex } from '../media/usage.js';
|
|
38
|
+
import { runReconcile, MEDIA_KEY_RE } from '../media/reconcile.js';
|
|
39
|
+
import { buildOrphanScan } from '../media/orphan-scan.js';
|
|
29
40
|
import { repointMediaRef, fillAltForHash } from '../content/media-rewrite.js';
|
|
30
41
|
import { planMediaRewrite } from '../media/rewrite-plan.js';
|
|
42
|
+
import { planBulkDelete } from '../media/bulk-delete-plan.js';
|
|
43
|
+
/** The Worker-side request deadline for the tidy model call: 30 seconds. A tidy call to Sonnet on a
|
|
44
|
+
* full entry can run many seconds, so the action bounds it with an AbortSignal and maps the overrun to
|
|
45
|
+
* a retryable fail(502). This sits well under Cloudflare's per-request wall-clock ceiling (a Worker
|
|
46
|
+
* invocation can run far longer, but a single subrequest left open near that ceiling would surface as a
|
|
47
|
+
* platform timeout the action could not shape into a clean retry). 30s comfortably covers a proofread
|
|
48
|
+
* of the bounded input (see MAX_TIDY_CHARS) while leaving headroom under the platform limit. */
|
|
49
|
+
const DEFAULT_TIDY_TIMEOUT_MS = 30_000;
|
|
50
|
+
/** The fallback site-config path when no nav menu names one: the convention every scaffolded site
|
|
51
|
+
* uses. The settings save edits the same committed YAML the nav editor does, so it resolves the path
|
|
52
|
+
* from the configured nav menu first and falls back to this default. */
|
|
53
|
+
const DEFAULT_SITE_CONFIG_PATH = 'src/lib/site.config.yaml';
|
|
54
|
+
/** Plain-language labels for the known tidy models, so the read-only model fact reads as a name rather
|
|
55
|
+
* than a bare id. An unknown id falls back to itself. */
|
|
56
|
+
const TIDY_MODEL_LABELS = {
|
|
57
|
+
'claude-sonnet-4-6': 'Claude Sonnet',
|
|
58
|
+
'claude-haiku-4-5': 'Claude Haiku',
|
|
59
|
+
};
|
|
60
|
+
/** The display label for a tidy model id, falling back to the raw id for an unknown model. */
|
|
61
|
+
function tidyModelLabel(model) {
|
|
62
|
+
return TIDY_MODEL_LABELS[model] ?? model;
|
|
63
|
+
}
|
|
64
|
+
/** The input cap for a single tidy request: 24000 characters (~6k input tokens). A proofread runs at
|
|
65
|
+
* roughly input length, so this stays comfortably inside the 30s deadline; a longer entry refuses with
|
|
66
|
+
* fail(413) and the author tidies a selection instead. The cap is enforced BEFORE the model call, so an
|
|
67
|
+
* over-long body never spends a token or risks the deadline. */
|
|
68
|
+
const MAX_TIDY_CHARS = 24_000;
|
|
31
69
|
/** Resolve the effective preview for one concept: its `byConcept` override wins per key, with
|
|
32
70
|
* nullish coalescing so an override key that is present but undefined keeps the top-level value.
|
|
33
71
|
* Stylesheets are always shared, and the `byConcept` map never reaches the client. */
|
|
@@ -50,6 +88,11 @@ function conceptOf(runtime, params) {
|
|
|
50
88
|
}
|
|
51
89
|
export function createContentRoutes(runtime, deps = {}) {
|
|
52
90
|
const mintToken = deps.mintToken ?? ((env) => cachedInstallationToken(appCredentials(runtime.backend, env)));
|
|
91
|
+
// The default Anthropic factory builds the real SDK client from the resolved key. Tests inject a fake
|
|
92
|
+
// (deps.anthropic) so messages.create is stubbed and no network call or real key is ever needed. The
|
|
93
|
+
// SDK client satisfies TidyClient structurally; the cast names that to the compiler.
|
|
94
|
+
const anthropicClient = deps.anthropic ?? ((opts) => new Anthropic({ apiKey: opts.apiKey }));
|
|
95
|
+
const tidyTimeoutMs = deps.tidyTimeoutMs ?? DEFAULT_TIDY_TIMEOUT_MS;
|
|
53
96
|
/** Main's manifest, parsed. A missing file starts empty (a fresh repo before the first commit).
|
|
54
97
|
* Always read from main: pending branches carry no manifest copy. */
|
|
55
98
|
async function readManifest(token) {
|
|
@@ -233,6 +276,10 @@ export function createContentRoutes(runtime, deps = {}) {
|
|
|
233
276
|
flash = 'replaced';
|
|
234
277
|
else if (event.url.searchParams.get('altPropagated') === '1')
|
|
235
278
|
flash = 'altPropagated';
|
|
279
|
+
else if (event.url.searchParams.get('bulkDeleted') === '1')
|
|
280
|
+
flash = 'bulkDeleted';
|
|
281
|
+
else if (event.url.searchParams.get('orphansPurged') === '1')
|
|
282
|
+
flash = 'orphansPurged';
|
|
236
283
|
const flashError = event.url.searchParams.get('error');
|
|
237
284
|
let token;
|
|
238
285
|
try {
|
|
@@ -361,13 +408,17 @@ export function createContentRoutes(runtime, deps = {}) {
|
|
|
361
408
|
// The media manifest joins the concurrent batch only when media is on, read from the default
|
|
362
409
|
// branch (pending branches carry no copy). A rejected media read degrades to null so the edit
|
|
363
410
|
// never throws on a missing or unreadable media.json; the projection below treats null as empty.
|
|
364
|
-
|
|
411
|
+
// The committed personal dictionary joins the concurrent batch, read from the default branch. A
|
|
412
|
+
// rejected read degrades to null so the edit never throws on a missing or unreadable dictionary;
|
|
413
|
+
// the projection below treats null as an empty word list (the editor falls back to dialect-only).
|
|
414
|
+
const [headSha, mainRaw, manifestRaw, mediaRaw, dictionaryRaw] = await Promise.all([
|
|
365
415
|
branchHeadSha(runtime.backend, branch, token),
|
|
366
416
|
readRaw(runtime.backend, path, token),
|
|
367
417
|
readRaw(runtime.backend, runtime.manifestPath, token),
|
|
368
418
|
runtime.resolvedAssets.enabled
|
|
369
419
|
? readRaw(runtime.backend, runtime.mediaManifestPath, token).catch(() => null)
|
|
370
420
|
: Promise.resolve(null),
|
|
421
|
+
readRaw(runtime.backend, dictionaryFilePath(), token).catch(() => null),
|
|
371
422
|
]);
|
|
372
423
|
const pending = headSha !== null;
|
|
373
424
|
const raw = pending ? await readRaw({ ...runtime.backend, branch }, path, token) : mainRaw;
|
|
@@ -421,8 +472,28 @@ export function createContentRoutes(runtime, deps = {}) {
|
|
|
421
472
|
publishedFlash: event.url.searchParams.get('published') === '1',
|
|
422
473
|
discardedFlash: event.url.searchParams.get('discarded') === '1',
|
|
423
474
|
preview: resolvePreview(runtime.preview, concept.id),
|
|
475
|
+
// composeRuntime always resolves this from the site config's dialect; default a hand-built
|
|
476
|
+
// runtime that omits it to the US English dictionary so the editor always has a real filename.
|
|
477
|
+
spellcheckDictionary: runtime.spellcheckDictionary ?? dictionaryFileForDialect(undefined),
|
|
478
|
+
// The committed personal-dictionary words, normalized to the canonical sorted, deduplicated set
|
|
479
|
+
// so the editor seeds the Worker's personal layer with a clean list. A missing or unreadable file
|
|
480
|
+
// is an empty list (the dialect-only fallback).
|
|
481
|
+
siteDictionary: mergeDictionaryWords(parseDictionary(dictionaryRaw), []),
|
|
482
|
+
// The editor-tier tidy facts: the master switch, the model (for the head pill), and the resolved
|
|
483
|
+
// conventions (the because-line and category inference read only these). The API key is never
|
|
484
|
+
// exposed here. A site with no tidy block reads disabled with the default conventions.
|
|
485
|
+
tidy: {
|
|
486
|
+
enabled: runtime.tidy?.enabled ?? false,
|
|
487
|
+
model: runtime.tidy?.model || DEFAULT_TIDY_MODEL,
|
|
488
|
+
conventions: resolveTidyConventions(runtime.tidy?.conventions),
|
|
489
|
+
},
|
|
424
490
|
};
|
|
425
491
|
}
|
|
492
|
+
/** The repo-relative personal-dictionary path, defaulting a hand-built runtime that omits it to the
|
|
493
|
+
* same `.cairn/` content root the manifests use. composeRuntime always fills `dictionaryPath`. */
|
|
494
|
+
function dictionaryFilePath() {
|
|
495
|
+
return runtime.dictionaryPath ?? 'src/content/.cairn/dictionary.txt';
|
|
496
|
+
}
|
|
426
497
|
/** Log a failed commit: a conflict is the expected last-writer-wins outcome, so it warns with a
|
|
427
498
|
* reason; any other error is unexpected and logs at error with the stringified cause. Publish
|
|
428
499
|
* failures carry the same shape under their own event name. */
|
|
@@ -1073,6 +1144,245 @@ export function createContentRoutes(runtime, deps = {}) {
|
|
|
1073
1144
|
log.info('media.deleted', { editor: editor.email, hash });
|
|
1074
1145
|
throw redirect(303, '/admin/media?deleted=1');
|
|
1075
1146
|
}
|
|
1147
|
+
/** Bulk safe-delete a multi-select of committed media assets. This is mediaDeleteAction extended to
|
|
1148
|
+
* many items, with the same safety primitives and one rule that defines the batch: the gate is ONE
|
|
1149
|
+
* shared strict cross-branch usage index built per batch, never N per-item reads (N strict reads
|
|
1150
|
+
* would blow the workerd connection budget at many open branches). The fail-closed posture is for
|
|
1151
|
+
* the WHOLE batch: if that single strict index cannot complete, the action refuses everything and
|
|
1152
|
+
* commits nothing, rather than risk deleting bytes a branch still references.
|
|
1153
|
+
*
|
|
1154
|
+
* Skip-and-report, never force: the pure planBulkDelete partitions the selection against the strict
|
|
1155
|
+
* index into deletable (no usage row, a committed manifest row exists), skipped-still-referenced (a
|
|
1156
|
+
* usage row, carried for the where-used), and skipped-uncommitted (no manifest row). An in-use item
|
|
1157
|
+
* is skipped and reported, never bulk-force-deleted; forced in-use deletion stays the single-item
|
|
1158
|
+
* typed-slug path.
|
|
1159
|
+
*
|
|
1160
|
+
* The order is load-bearing, mirroring single delete: ONE atomic commit removes every deletable row
|
|
1161
|
+
* FIRST, then the R2 objects are deleted (commit-row-then-delete-R2). A failure after the commit
|
|
1162
|
+
* leaves bytes with no row (a benign orphan) rather than a row pointing at deleted bytes. Each R2
|
|
1163
|
+
* delete is best-effort and batch-resilient: a per-object error is reported in `failed` and never
|
|
1164
|
+
* aborts the rest of the batch. The result is an itemized 207-style summary the component renders
|
|
1165
|
+
* (deleted / skipped with reasons / failed); there is no success redirect. */
|
|
1166
|
+
async function mediaBulkDelete(event) {
|
|
1167
|
+
const editor = requireSession(event);
|
|
1168
|
+
const token = await mintToken(event.platform?.env ?? {});
|
|
1169
|
+
// Read the selected hashes from the form. Accept the repeated `hash` field, falling back to a JSON
|
|
1170
|
+
// `hashes` array. Each value must match the 16-hex content-hash grammar; a malformed value is
|
|
1171
|
+
// dropped silently rather than surfaced as a skip (it was never a real selection).
|
|
1172
|
+
const form = await event.request.formData();
|
|
1173
|
+
let raw = form.getAll('hash').map(String);
|
|
1174
|
+
if (raw.length === 0) {
|
|
1175
|
+
const json = form.get('hashes');
|
|
1176
|
+
if (typeof json === 'string') {
|
|
1177
|
+
try {
|
|
1178
|
+
const parsed = JSON.parse(json);
|
|
1179
|
+
if (Array.isArray(parsed))
|
|
1180
|
+
raw = parsed.map(String);
|
|
1181
|
+
}
|
|
1182
|
+
catch {
|
|
1183
|
+
raw = [];
|
|
1184
|
+
}
|
|
1185
|
+
}
|
|
1186
|
+
}
|
|
1187
|
+
const selected = raw.filter((h) => MEDIA_HASH_RE.test(h));
|
|
1188
|
+
// Read the fresh media manifest (the deletable rows come from here, by hash).
|
|
1189
|
+
const manifest = parseMediaManifest(parseMediaJson(await readRaw(runtime.backend, runtime.mediaManifestPath, token)));
|
|
1190
|
+
// Resolve the R2 bucket before any write, so a media-off site or a missing binding refuses before
|
|
1191
|
+
// the commit, exactly like single delete.
|
|
1192
|
+
const resolved = runtime.resolvedAssets;
|
|
1193
|
+
if (!resolved.enabled) {
|
|
1194
|
+
return fail(503, { error: 'Media is not enabled for this site.' });
|
|
1195
|
+
}
|
|
1196
|
+
const platformEnv = event.platform?.env ?? {};
|
|
1197
|
+
const rawBucket = platformEnv[resolved.bucketBinding];
|
|
1198
|
+
if (!rawBucket) {
|
|
1199
|
+
return fail(503, { error: 'The media bucket is not bound.' });
|
|
1200
|
+
}
|
|
1201
|
+
const store = r2Store(rawBucket);
|
|
1202
|
+
// THE fail-closed gate for the whole batch: one shared strict usage index. STRICT mode rethrows a
|
|
1203
|
+
// branch-read failure, so a transient branch read failing refuses the whole batch rather than
|
|
1204
|
+
// mistaking a still-referenced asset for an orphan. Build exactly one index, never one per item.
|
|
1205
|
+
let index;
|
|
1206
|
+
try {
|
|
1207
|
+
index = await buildUsageIndex(runtime.backend, token, runtime.concepts, await readManifest(token), { strict: true });
|
|
1208
|
+
}
|
|
1209
|
+
catch {
|
|
1210
|
+
return fail(503, { error: 'Could not verify where these assets are used. Try again.' });
|
|
1211
|
+
}
|
|
1212
|
+
// The pure partition: membership in the fresh strict index is the gate, never the display count.
|
|
1213
|
+
const plan = planBulkDelete(selected, index, manifest);
|
|
1214
|
+
// An all-skipped or empty batch is a no-op success: nothing committed, nothing deleted.
|
|
1215
|
+
if (plan.deletable.length === 0) {
|
|
1216
|
+
return { deleted: [], skipped: plan.skipped, failed: [] };
|
|
1217
|
+
}
|
|
1218
|
+
// ONE atomic commit removing EVERY deletable row, folded over removeMediaEntry.
|
|
1219
|
+
let next = manifest;
|
|
1220
|
+
for (const hash of plan.deletable)
|
|
1221
|
+
next = removeMediaEntry(next, hash);
|
|
1222
|
+
const commitFields = { concept: 'media', id: 'bulk', editor: editor.email };
|
|
1223
|
+
try {
|
|
1224
|
+
await commitFiles(runtime.backend, [{ path: runtime.mediaManifestPath, content: serializeMediaManifest(next) }], { message: `Delete ${plan.deletable.length} media assets`, author: { name: editor.displayName, email: editor.email } }, token);
|
|
1225
|
+
log.info('commit.succeeded', commitFields);
|
|
1226
|
+
}
|
|
1227
|
+
catch (err) {
|
|
1228
|
+
commitFailure(commitFields, err, '/admin/media', 'The media manifest changed since you opened it. Reload and try again.');
|
|
1229
|
+
}
|
|
1230
|
+
// THEN delete each deletable hash's R2 object (the load-bearing order, see the docstring). Best
|
|
1231
|
+
// effort and batch-resilient: a thrown key derivation or a delete error is reported in `failed`
|
|
1232
|
+
// and the loop continues. An absent object is a no-op (the R2 contract).
|
|
1233
|
+
const deleted = [];
|
|
1234
|
+
const failed = [];
|
|
1235
|
+
for (const hash of plan.deletable) {
|
|
1236
|
+
try {
|
|
1237
|
+
const row = manifest[hash];
|
|
1238
|
+
await store.delete(r2Key(row.hash, row.ext));
|
|
1239
|
+
deleted.push(hash);
|
|
1240
|
+
}
|
|
1241
|
+
catch (err) {
|
|
1242
|
+
failed.push({ hash, error: err instanceof Error ? err.message : String(err) });
|
|
1243
|
+
}
|
|
1244
|
+
}
|
|
1245
|
+
log.info('media.bulk_deleted', { editor: editor.email, deleted: deleted.length, skipped: plan.skipped.length });
|
|
1246
|
+
return { deleted, skipped: plan.skipped, failed };
|
|
1247
|
+
}
|
|
1248
|
+
/** The on-demand orphan scan: a read-only reconcile of stored R2 bytes against the manifest, joined
|
|
1249
|
+
* with one strict cross-branch usage index for the broken-reference where-used. It runs only when
|
|
1250
|
+
* requested, never on the loaded index, because it is heavier than the load path: a full R2 list
|
|
1251
|
+
* plus a reconcile pass on top of the strict usage build.
|
|
1252
|
+
*
|
|
1253
|
+
* Detection-time fail-closed: BOTH the reconcile and the strict usage build run inside one
|
|
1254
|
+
* try/catch, and any throw refuses the whole scan with fail(503) rather than returning a partial
|
|
1255
|
+
* result. The reconcile must not run on a half-listed bucket: a truncated R2 list would call
|
|
1256
|
+
* still-stored bytes orphaned. The strict usage build must not run on a half-read branch set: an
|
|
1257
|
+
* unread branch would make a branch-referenced asset look orphaned. A wrong orphan verdict here
|
|
1258
|
+
* feeds the irreversible purge, so the scan refuses rather than risk it.
|
|
1259
|
+
*
|
|
1260
|
+
* The result is the OrphanScan projection: orphanedBytes (stored keys with no manifest row, the
|
|
1261
|
+
* purge surface) and brokenRefs (manifest rows whose bytes are gone, read-only, shown with their
|
|
1262
|
+
* where-used so an operator can re-ingest rather than purge a still-referenced record). */
|
|
1263
|
+
async function mediaOrphanScan(event) {
|
|
1264
|
+
requireSession(event);
|
|
1265
|
+
const token = await mintToken(event.platform?.env ?? {});
|
|
1266
|
+
// Resolve the R2 binding. The reconcile lists the raw bucket directly, so keep the raw binding;
|
|
1267
|
+
// the MediaStore seam carries no list. A media-off site or a missing binding refuses the scan.
|
|
1268
|
+
const resolved = runtime.resolvedAssets;
|
|
1269
|
+
if (!resolved.enabled) {
|
|
1270
|
+
return fail(503, { error: 'Media is not enabled for this site.' });
|
|
1271
|
+
}
|
|
1272
|
+
const platformEnv = event.platform?.env ?? {};
|
|
1273
|
+
const rawBucket = platformEnv[resolved.bucketBinding];
|
|
1274
|
+
if (!rawBucket) {
|
|
1275
|
+
return fail(503, { error: 'The media bucket is not bound.' });
|
|
1276
|
+
}
|
|
1277
|
+
// Read the fresh media manifest for the reconcile's manifest side.
|
|
1278
|
+
const manifest = parseMediaManifest(parseMediaJson(await readRaw(runtime.backend, runtime.mediaManifestPath, token)));
|
|
1279
|
+
// THE detection-time fail-closed surface. The reconcile (an R2 list that must complete in full)
|
|
1280
|
+
// and the strict usage build (a branch read that must complete in full) are both unsafe to use
|
|
1281
|
+
// partially, so either throwing refuses the scan. A wrong orphan verdict from a partial read here
|
|
1282
|
+
// would feed the irreversible purge.
|
|
1283
|
+
let reconcile;
|
|
1284
|
+
let index;
|
|
1285
|
+
try {
|
|
1286
|
+
reconcile = await runReconcile(rawBucket, manifest);
|
|
1287
|
+
index = await buildUsageIndex(runtime.backend, token, runtime.concepts, await readManifest(token), { strict: true });
|
|
1288
|
+
}
|
|
1289
|
+
catch {
|
|
1290
|
+
return fail(503, { error: 'Could not check where files are used, so the scan was not run. Try again.' });
|
|
1291
|
+
}
|
|
1292
|
+
return buildOrphanScan(reconcile, manifest, index);
|
|
1293
|
+
}
|
|
1294
|
+
/** Purge orphaned R2 bytes: the one IRREVERSIBLE media action. Raw object bytes live only in R2, not
|
|
1295
|
+
* in git, so a purged orphan cannot be recovered the way a deleted manifest row can be reverted in
|
|
1296
|
+
* history. The whole action is built around that fact.
|
|
1297
|
+
*
|
|
1298
|
+
* The typed-count confirm is the never-bypassable gate, the analogue of single delete's typed-slug
|
|
1299
|
+
* check. The form's `confirm` must equal the count of selected keys (the approved rev.2 mockup's
|
|
1300
|
+
* "Type N to purge these files for good"); an empty selection or a mismatched count deletes nothing.
|
|
1301
|
+
*
|
|
1302
|
+
* Re-derive fresh is the safety crux. The selection came from an earlier scan, so the action does
|
|
1303
|
+
* NOT trust it: the purge keys are client-posted, so the server cannot assume they came from a fresh
|
|
1304
|
+
* scan. It reads the current media manifest AND rebuilds ONE strict cross-branch usage index, then
|
|
1305
|
+
* for each selected key parses the hash from the key grammar. A key that does not match the grammar
|
|
1306
|
+
* was never a real orphan key and is dropped silently. A key whose hash now has a manifest row OR is
|
|
1307
|
+
* referenced on any open cairn/* branch survived the scan window (it was claimed by a row, or a
|
|
1308
|
+
* draft started referencing those bytes), so it is skipped into skippedClaimed and its bytes survive.
|
|
1309
|
+
* Only a key whose hash is STILL absent from both is purged. This closes the TOCTOU between scan and
|
|
1310
|
+
* purge that could otherwise irreversibly delete a live draft's bytes.
|
|
1311
|
+
*
|
|
1312
|
+
* Like the scan and the bulk delete, the strict index build is the fail-closed gate: a branch read
|
|
1313
|
+
* that throws refuses the whole batch with fail(503) rather than mistaking an unverifiable reference
|
|
1314
|
+
* for an absent one. The index is built exactly once for the batch, never once per key.
|
|
1315
|
+
*
|
|
1316
|
+
* There is no commit. An orphan by definition has no manifest row to remove, so the purge deletes
|
|
1317
|
+
* the R2 object directly. Each delete is best-effort and batch-resilient: a per-object error is
|
|
1318
|
+
* reported in `failed` and the loop continues; an absent object is a no-op (the R2 contract). */
|
|
1319
|
+
async function mediaPurgeOrphans(event) {
|
|
1320
|
+
const editor = requireSession(event);
|
|
1321
|
+
const token = await mintToken(event.platform?.env ?? {});
|
|
1322
|
+
// Resolve the R2 binding, the same media-off / missing-binding refusals as the scan. The purge
|
|
1323
|
+
// deletes through the MediaStore seam, so wrap the raw binding.
|
|
1324
|
+
const resolved = runtime.resolvedAssets;
|
|
1325
|
+
if (!resolved.enabled) {
|
|
1326
|
+
return fail(503, { error: 'Media is not enabled for this site.' });
|
|
1327
|
+
}
|
|
1328
|
+
const platformEnv = event.platform?.env ?? {};
|
|
1329
|
+
const rawBucket = platformEnv[resolved.bucketBinding];
|
|
1330
|
+
if (!rawBucket) {
|
|
1331
|
+
return fail(503, { error: 'The media bucket is not bound.' });
|
|
1332
|
+
}
|
|
1333
|
+
const store = r2Store(rawBucket);
|
|
1334
|
+
// Read the selected R2 keys and the typed confirm.
|
|
1335
|
+
const form = await event.request.formData();
|
|
1336
|
+
const keys = form.getAll('key').map(String);
|
|
1337
|
+
const confirm = String(form.get('confirm') ?? '');
|
|
1338
|
+
// The irreversible gate: the confirm must equal the selected count, and the set must be non-empty.
|
|
1339
|
+
// A mismatch or an empty set refuses and deletes NOTHING.
|
|
1340
|
+
if (keys.length === 0 || confirm !== String(keys.length)) {
|
|
1341
|
+
return fail(400, { error: 'Type the number of files to confirm the purge.' });
|
|
1342
|
+
}
|
|
1343
|
+
// Re-derive fresh against the current manifest, so a key claimed since the scan is never purged.
|
|
1344
|
+
const manifest = parseMediaManifest(parseMediaJson(await readRaw(runtime.backend, runtime.mediaManifestPath, token)));
|
|
1345
|
+
// THE fail-closed gate for the whole batch: one shared strict cross-branch usage index, symmetric
|
|
1346
|
+
// with the scan and the bulk delete. STRICT mode rethrows a branch-read failure, so a transient
|
|
1347
|
+
// branch read refuses the irreversible purge rather than letting a possibly-referenced byte be
|
|
1348
|
+
// treated as a true orphan. Build exactly one index, never one per key.
|
|
1349
|
+
let index;
|
|
1350
|
+
try {
|
|
1351
|
+
index = await buildUsageIndex(runtime.backend, token, runtime.concepts, await readManifest(token), { strict: true });
|
|
1352
|
+
}
|
|
1353
|
+
catch {
|
|
1354
|
+
return fail(503, { error: 'Could not verify where these files are used. Try again.' });
|
|
1355
|
+
}
|
|
1356
|
+
const purged = [];
|
|
1357
|
+
const skippedClaimed = [];
|
|
1358
|
+
const failed = [];
|
|
1359
|
+
for (const key of keys) {
|
|
1360
|
+
const hash = MEDIA_KEY_RE.exec(key)?.[1];
|
|
1361
|
+
// A key that does not match the grammar was never a real orphan key: drop it silently.
|
|
1362
|
+
if (hash === undefined)
|
|
1363
|
+
continue;
|
|
1364
|
+
// A hash that now has a manifest row was claimed since the scan: its bytes are a live asset now.
|
|
1365
|
+
if (manifest[hash]) {
|
|
1366
|
+
skippedClaimed.push(key);
|
|
1367
|
+
continue;
|
|
1368
|
+
}
|
|
1369
|
+
// A hash referenced on any open cairn/* branch backs an in-progress draft: skip it claimed too.
|
|
1370
|
+
if (index.has(hash)) {
|
|
1371
|
+
skippedClaimed.push(key);
|
|
1372
|
+
continue;
|
|
1373
|
+
}
|
|
1374
|
+
// Still orphaned: delete the object directly. No commit, there is no manifest row.
|
|
1375
|
+
try {
|
|
1376
|
+
await store.delete(key);
|
|
1377
|
+
purged.push(key);
|
|
1378
|
+
}
|
|
1379
|
+
catch (err) {
|
|
1380
|
+
failed.push({ key, error: err instanceof Error ? err.message : String(err) });
|
|
1381
|
+
}
|
|
1382
|
+
}
|
|
1383
|
+
log.info('media.orphans_purged', { editor: editor.email, purged: purged.length });
|
|
1384
|
+
return { purged, skippedClaimed, failed };
|
|
1385
|
+
}
|
|
1076
1386
|
/** Edit a committed asset's metadata: its display name, slug, and default alt. A single media.json
|
|
1077
1387
|
* row commit, with NO reference rewrite: the resolver and the delivery route key on the hash, so a
|
|
1078
1388
|
* rename never breaks an existing `media:` reference. The default alt is the asset's value for the
|
|
@@ -1425,7 +1735,291 @@ export function createContentRoutes(runtime, deps = {}) {
|
|
|
1425
1735
|
}
|
|
1426
1736
|
throw redirect(303, '/admin/media?altPropagated=1');
|
|
1427
1737
|
}
|
|
1428
|
-
|
|
1738
|
+
/** The cap on a personal-dictionary word, matched by isValidDictionaryWord. A word is one line, so
|
|
1739
|
+
* this bounds an abusive input; the real authority is the per-character validation, which rejects
|
|
1740
|
+
* whitespace and control bytes so a body can never inject an extra line into the committed file. */
|
|
1741
|
+
const MAX_DICTIONARY_WORD = 64;
|
|
1742
|
+
/** The cap on the words a single add request carries: an editor adds a handful at save time, never
|
|
1743
|
+
* a flood. Past this the body is treated as abusive and the surplus is dropped. */
|
|
1744
|
+
const MAX_DICTIONARY_BATCH = 100;
|
|
1745
|
+
/** Read the committed personal dictionary, merge the validated additions in sorted order, and commit
|
|
1746
|
+
* the canonical file back. Shared by the first attempt and the post-conflict retry, so both re-read
|
|
1747
|
+
* the head and re-merge the same additions; the merge is order-independent, so a concurrent editor's
|
|
1748
|
+
* word that already landed is preserved and the result is the same sorted set regardless of order.
|
|
1749
|
+
* Returns the merged word list. Throws CommitConflictError (via commitFiles) when the branch moves
|
|
1750
|
+
* under the commit, which the caller catches to retry once. */
|
|
1751
|
+
async function mergeAndCommitDictionary(token, additions, editor) {
|
|
1752
|
+
const path = dictionaryFilePath();
|
|
1753
|
+
// The existing file as its canonical sorted set, so a no-op add is detected against the same
|
|
1754
|
+
// normalization the commit would write (an already-sorted file never re-commits just to reorder).
|
|
1755
|
+
const canonicalExisting = mergeDictionaryWords(parseDictionary(await readRaw(runtime.backend, path, token)), []);
|
|
1756
|
+
const merged = mergeDictionaryWords(canonicalExisting, additions);
|
|
1757
|
+
// Nothing new (every addition was already present): skip the commit so an idempotent add never
|
|
1758
|
+
// pushes an empty commit that would redeploy the site. The merged set is still returned so the
|
|
1759
|
+
// client reconciles its pending additions away.
|
|
1760
|
+
if (merged.length === canonicalExisting.length)
|
|
1761
|
+
return merged;
|
|
1762
|
+
await commitFiles(runtime.backend, [{ path, content: serializeDictionary(merged) }], { message: `Add to dictionary: ${additions.join(', ')}`, author: { name: editor.displayName, email: editor.email } }, token);
|
|
1763
|
+
return merged;
|
|
1764
|
+
}
|
|
1765
|
+
/** The repo-relative site-config path the settings save reads and commits. It is the same committed
|
|
1766
|
+
* YAML the nav editor edits, so it comes from the configured nav menu first and falls back to the
|
|
1767
|
+
* scaffold default when no menu is configured. */
|
|
1768
|
+
function siteConfigPath() {
|
|
1769
|
+
return runtime.navMenu?.configPath ?? DEFAULT_SITE_CONFIG_PATH;
|
|
1770
|
+
}
|
|
1771
|
+
/** Read whether the Anthropic API key secret is present in the load's env. A presence flag for the
|
|
1772
|
+
* truthful visibility gate, never the key itself: the key is a Worker secret, so this only reports
|
|
1773
|
+
* that a non-empty `ANTHROPIC_API_KEY` exists and the value never leaves the server. */
|
|
1774
|
+
function keyConfigured(event) {
|
|
1775
|
+
const env = (event.platform?.env ?? {});
|
|
1776
|
+
return typeof env.ANTHROPIC_API_KEY === 'string' && env.ANTHROPIC_API_KEY.length > 0;
|
|
1777
|
+
}
|
|
1778
|
+
/** Load the two-tier tidy settings (spec 2.8, Task 15). The developer tier (enabled, key, model) is
|
|
1779
|
+
* read-only; the editor tier is the resolved conventions block. The visibility gate is truthful: the
|
|
1780
|
+
* `enabled` flag is true only when `tidy.enabled` is set AND the key is present, so the screen renders
|
|
1781
|
+
* the convention list only then and the honest gate note otherwise. No secret is returned: only a
|
|
1782
|
+
* presence flag for the key. The conventions come straight from the runtime config (the same source
|
|
1783
|
+
* the tidy action's prompt reads), so the screen and the prompt can never diverge. */
|
|
1784
|
+
function settingsLoad(event) {
|
|
1785
|
+
requireSession(event);
|
|
1786
|
+
const tidy = runtime.tidy;
|
|
1787
|
+
const tidyEnabled = tidy?.enabled === true;
|
|
1788
|
+
const keyPresent = keyConfigured(event);
|
|
1789
|
+
const model = tidy?.model || DEFAULT_TIDY_MODEL;
|
|
1790
|
+
return {
|
|
1791
|
+
enabled: tidyEnabled && keyPresent,
|
|
1792
|
+
tidyEnabled,
|
|
1793
|
+
keyConfigured: keyPresent,
|
|
1794
|
+
model,
|
|
1795
|
+
modelLabel: tidyModelLabel(model),
|
|
1796
|
+
conventions: resolveTidyConventions(tidy?.conventions),
|
|
1797
|
+
saved: event.url.searchParams.get('saved') === '1',
|
|
1798
|
+
error: event.url.searchParams.get('error'),
|
|
1799
|
+
};
|
|
1800
|
+
}
|
|
1801
|
+
/** Save the editor-tier tidy conventions: validate the posted block, then read-modify-commit it into
|
|
1802
|
+
* the same committed YAML the nav editor writes, with the session editor as author. The transport is
|
|
1803
|
+
* the nav save's exactly: a form POST carrying the conventions JSON, the read-modify-commit through
|
|
1804
|
+
* `commitFile`, and a stale-SHA `isConflict` bounced back as a reload prompt. Only the conventions
|
|
1805
|
+
* block is written (setTidy leaves `tidy.enabled` and `tidy.model` untouched), so an editor's save can
|
|
1806
|
+
* never flip the developer-tier deploy facts. The save refuses before any commit when tidy is not
|
|
1807
|
+
* enabled, so the gate state's absent editor tier can never be saved past. */
|
|
1808
|
+
async function settingsSave(event) {
|
|
1809
|
+
const editor = requireSession(event);
|
|
1810
|
+
// The editor tier does not exist when tidy is off, so a save in that state is a 404 (no editable
|
|
1811
|
+
// surface to commit), the server half of the truthful gate.
|
|
1812
|
+
if (runtime.tidy?.enabled !== true)
|
|
1813
|
+
throw error(404, 'Tidy is not enabled for this site');
|
|
1814
|
+
const form = await event.request.formData();
|
|
1815
|
+
let conventions;
|
|
1816
|
+
try {
|
|
1817
|
+
conventions = validateTidyConventions(JSON.parse(String(form.get('conventions') ?? '{}')));
|
|
1818
|
+
}
|
|
1819
|
+
catch (err) {
|
|
1820
|
+
const message = err instanceof TidyConventionsError ? err.message : 'Invalid tidy settings';
|
|
1821
|
+
throw redirect(303, `/admin/settings?error=${encodeURIComponent(message)}`);
|
|
1822
|
+
}
|
|
1823
|
+
const path = siteConfigPath();
|
|
1824
|
+
const token = await mintToken(event.platform?.env ?? {});
|
|
1825
|
+
const raw = await readRaw(runtime.backend, path, token);
|
|
1826
|
+
if (raw === null)
|
|
1827
|
+
throw error(404, 'Site config not found');
|
|
1828
|
+
// Parse first so a malformed file fails before the write rather than committing onto a broken base.
|
|
1829
|
+
parseSiteConfig(raw);
|
|
1830
|
+
const commitFields = { concept: 'settings', id: 'tidy', editor: editor.email };
|
|
1831
|
+
try {
|
|
1832
|
+
await commitFile(runtime.backend, path, setTidy(raw, conventions), { message: 'Update tidy settings', author: { name: editor.displayName, email: editor.email } }, token);
|
|
1833
|
+
log.info('commit.succeeded', commitFields);
|
|
1834
|
+
}
|
|
1835
|
+
catch (err) {
|
|
1836
|
+
if (isConflict(err)) {
|
|
1837
|
+
log.warn('commit.failed', { ...commitFields, reason: 'conflict' });
|
|
1838
|
+
const message = 'The site config changed since you opened it. Reload and reapply your edits.';
|
|
1839
|
+
throw redirect(303, `/admin/settings?error=${encodeURIComponent(message)}`);
|
|
1840
|
+
}
|
|
1841
|
+
log.error('commit.failed', { ...commitFields, error: String(err) });
|
|
1842
|
+
throw err;
|
|
1843
|
+
}
|
|
1844
|
+
throw redirect(303, '/admin/settings?saved=1');
|
|
1845
|
+
}
|
|
1846
|
+
/** Add a word (or batch) to the git-committed personal dictionary (spec 1.6). The transport mirrors
|
|
1847
|
+
* the media raw-body actions exactly: a `text/plain` POST, the CSRF token in `X-Cairn-CSRF` validated
|
|
1848
|
+
* by validateCsrfHeader (CSRF first, then the session), and a small JSON body `{ word }` or
|
|
1849
|
+
* `{ words }`. It reads the current file from the default branch, inserts the validated words in
|
|
1850
|
+
* sorted order if absent (idempotent), and commits through the GitHub-App pipeline.
|
|
1851
|
+
*
|
|
1852
|
+
* The commit is SHA-guarded with commit-and-retry: commitFiles throws CommitConflictError when the
|
|
1853
|
+
* branch moved under it, which is caught here to re-read the new head, re-merge the same additions
|
|
1854
|
+
* (the sorted insert is order-independent, so a concurrent editor's word is preserved), and retry
|
|
1855
|
+
* once. The response is the merged word list, so the client drops the now-committed words from its
|
|
1856
|
+
* pending set; a refusal rides a `fail` envelope the client reads by `type`/`status`.
|
|
1857
|
+
*
|
|
1858
|
+
* Input validation is load-bearing here: this commits to the repo from request input, so every word
|
|
1859
|
+
* is length-bounded and rejected if it carries whitespace or control characters (a word is one
|
|
1860
|
+
* line), and the batch is capped. A body that yields no valid word refuses with a 400 and commits
|
|
1861
|
+
* nothing, so the committed file can never gain an injected or empty line. */
|
|
1862
|
+
async function addDictionaryWord(event) {
|
|
1863
|
+
// CSRF first: a raw-body (JSON) POST, so the header witness is the authority, like the upload and
|
|
1864
|
+
// media actions. A failed check refuses before the session read or any GitHub call.
|
|
1865
|
+
if (!event.cookies || !validateCsrfHeader({ url: event.url, request: event.request, cookies: event.cookies })) {
|
|
1866
|
+
return fail(403, { error: 'csrf' });
|
|
1867
|
+
}
|
|
1868
|
+
const editor = requireSession(event);
|
|
1869
|
+
let payload;
|
|
1870
|
+
try {
|
|
1871
|
+
payload = JSON.parse(await event.request.text());
|
|
1872
|
+
}
|
|
1873
|
+
catch {
|
|
1874
|
+
return fail(400, { error: 'Could not read the dictionary request.' });
|
|
1875
|
+
}
|
|
1876
|
+
// Collect the candidate words from `word` and/or `words`, keep only the strings, validate each
|
|
1877
|
+
// against the one-line word grammar, dedupe, and cap the batch. A body with no valid word refuses.
|
|
1878
|
+
const raw = [
|
|
1879
|
+
...(typeof payload.word === 'string' ? [payload.word] : []),
|
|
1880
|
+
...(Array.isArray(payload.words) ? payload.words.filter((w) => typeof w === 'string') : []),
|
|
1881
|
+
];
|
|
1882
|
+
const additions = [...new Set(raw.filter((w) => isValidDictionaryWord(w, MAX_DICTIONARY_WORD)))].slice(0, MAX_DICTIONARY_BATCH);
|
|
1883
|
+
if (additions.length === 0) {
|
|
1884
|
+
return fail(400, { error: 'No valid word to add to the dictionary.' });
|
|
1885
|
+
}
|
|
1886
|
+
const token = await mintToken(event.platform?.env ?? {});
|
|
1887
|
+
const commitFields = { concept: 'dictionary', id: additions[0], editor: editor.email };
|
|
1888
|
+
try {
|
|
1889
|
+
const words = await mergeAndCommitDictionary(token, additions, editor);
|
|
1890
|
+
log.info('dictionary.added', { editor: editor.email, words: additions });
|
|
1891
|
+
return { words };
|
|
1892
|
+
}
|
|
1893
|
+
catch (err) {
|
|
1894
|
+
if (!isConflict(err))
|
|
1895
|
+
throw err;
|
|
1896
|
+
// The branch moved under the commit. Re-read the new head and re-merge the same additions, then
|
|
1897
|
+
// retry once. The merge is order-independent, so a concurrent editor's word that landed in the
|
|
1898
|
+
// window is preserved and the two adds converge on the same sorted set.
|
|
1899
|
+
try {
|
|
1900
|
+
const words = await mergeAndCommitDictionary(token, additions, editor);
|
|
1901
|
+
log.info('dictionary.added', { editor: editor.email, words: additions, retried: true });
|
|
1902
|
+
return { words };
|
|
1903
|
+
}
|
|
1904
|
+
catch (retryErr) {
|
|
1905
|
+
if (!isConflict(retryErr))
|
|
1906
|
+
throw retryErr;
|
|
1907
|
+
// A second conflict: give up rather than loop. The client keeps the words in its pending set
|
|
1908
|
+
// for the session and re-attempts on the next save, so the word is never silently dropped.
|
|
1909
|
+
log.warn('dictionary.add_conflict', { editor: editor.email, words: additions });
|
|
1910
|
+
return fail(409, { error: 'The dictionary changed while saving. It will retry on the next save.' });
|
|
1911
|
+
}
|
|
1912
|
+
}
|
|
1913
|
+
}
|
|
1914
|
+
/** Tidy: a light LLM copy-edit of the author's markdown (spec 2.1). The first remote model call in
|
|
1915
|
+
* the library, so this is the highest-blast-radius server action: untrusted content and the Anthropic
|
|
1916
|
+
* API key. The transport mirrors the media raw-body actions (a `text/plain` POST carrying JSON
|
|
1917
|
+
* `{ text, scope }`, the CSRF token in `X-Cairn-CSRF`, the response deserialized by the client), with
|
|
1918
|
+
* abort/timeout/deadline the media calls did not need: a tidy call to Sonnet on a full entry can run
|
|
1919
|
+
* many seconds.
|
|
1920
|
+
*
|
|
1921
|
+
* Gate order (every refusal happens before the next step, so a refused request spends nothing):
|
|
1922
|
+
* 1. validateCsrfHeader FIRST (the header witness is the authority for a raw-body POST).
|
|
1923
|
+
* 2. requireSession (an expired session throws the manual-redirect 303 the client reads as status-0).
|
|
1924
|
+
* 3. Read the key and config; refuse fail(503) if tidy is disabled or the key is missing.
|
|
1925
|
+
* 4. Parse and bound the body; refuse fail(400) on malformed JSON, fail(413) on an over-long text.
|
|
1926
|
+
* 5. Only then build the prompt and call the model, bounded by the Worker deadline.
|
|
1927
|
+
*
|
|
1928
|
+
* The untrusted text rides as the user message, never interpolated into the system prompt; the
|
|
1929
|
+
* prompt's injection framing (Task 10) treats it as data. The API key never leaves the action: it is
|
|
1930
|
+
* not returned and not logged, and the log line carries no content. The action commits NOTHING, so a
|
|
1931
|
+
* failed, aborted, or refused tidy can never corrupt the entry; the diff is computed on the client
|
|
1932
|
+
* (Task 12), so the server stays a thin model-call boundary. */
|
|
1933
|
+
async function tidyAction(event) {
|
|
1934
|
+
// CSRF first: a raw-body (JSON) POST, so the header witness is the authority. A failed check refuses
|
|
1935
|
+
// before the session read and before any model call.
|
|
1936
|
+
if (!event.cookies || !validateCsrfHeader({ url: event.url, request: event.request, cookies: event.cookies })) {
|
|
1937
|
+
return fail(403, { error: 'csrf' });
|
|
1938
|
+
}
|
|
1939
|
+
const editor = requireSession(event);
|
|
1940
|
+
// Fail-fast: refuse before any model call if tidy is off or the key is missing. The model is read
|
|
1941
|
+
// from config (a stated fact in this tier); a missing key is the "not enabled" refusal. No secret is
|
|
1942
|
+
// ever returned or logged.
|
|
1943
|
+
const tidy = runtime.tidy;
|
|
1944
|
+
if (!tidy?.enabled) {
|
|
1945
|
+
return fail(503, { error: 'Tidy is not enabled for this site.' });
|
|
1946
|
+
}
|
|
1947
|
+
const env = (event.platform?.env ?? {});
|
|
1948
|
+
const apiKey = typeof env.ANTHROPIC_API_KEY === 'string' ? env.ANTHROPIC_API_KEY : '';
|
|
1949
|
+
if (!apiKey) {
|
|
1950
|
+
return fail(503, { error: 'Tidy is not configured: the Anthropic API key is missing.' });
|
|
1951
|
+
}
|
|
1952
|
+
// Parse and bound the body before the call. A malformed body refuses 400; an over-long text refuses
|
|
1953
|
+
// 413 (tidy a selection instead), so no over-long input ever spends a token or risks the deadline.
|
|
1954
|
+
let payload;
|
|
1955
|
+
try {
|
|
1956
|
+
payload = JSON.parse(await event.request.text());
|
|
1957
|
+
}
|
|
1958
|
+
catch {
|
|
1959
|
+
return fail(400, { error: 'Could not read the tidy request.' });
|
|
1960
|
+
}
|
|
1961
|
+
const text = typeof payload.text === 'string' ? payload.text : '';
|
|
1962
|
+
if (text.length === 0) {
|
|
1963
|
+
return fail(400, { error: 'No text to tidy.' });
|
|
1964
|
+
}
|
|
1965
|
+
if (text.length > MAX_TIDY_CHARS) {
|
|
1966
|
+
return fail(413, { error: 'This is too long to tidy at once. Select a passage and tidy that instead.' });
|
|
1967
|
+
}
|
|
1968
|
+
// Build the system prompt from the resolved conventions (Task 10). The prompt is built from config,
|
|
1969
|
+
// never from the author's text, so the untrusted text cannot reshape the instructions.
|
|
1970
|
+
const system = buildTidyPrompt(resolveTidyConventions(tidy.conventions));
|
|
1971
|
+
const model = tidy.model || DEFAULT_TIDY_MODEL;
|
|
1972
|
+
// max_tokens sized to comfortably exceed the input token count: a proofread runs at roughly input
|
|
1973
|
+
// length, never lowballed. The character cap is ~6k input tokens, so this leaves generous headroom.
|
|
1974
|
+
const maxTokens = 16_000;
|
|
1975
|
+
// Bound the model call with the Worker's own deadline (shorter than the platform limit), so a slow
|
|
1976
|
+
// call becomes a retryable fail(502) rather than a platform timeout. The client also drives its own
|
|
1977
|
+
// AbortController (Cancel + a bounded timeout, Task 14); this action accepts an aborted request
|
|
1978
|
+
// cleanly by mapping any abort to the same fail(502).
|
|
1979
|
+
const controller = new AbortController();
|
|
1980
|
+
const timer = setTimeout(() => controller.abort(), tidyTimeoutMs);
|
|
1981
|
+
let message;
|
|
1982
|
+
try {
|
|
1983
|
+
const client = anthropicClient({ apiKey });
|
|
1984
|
+
message = await client.messages.create({
|
|
1985
|
+
model,
|
|
1986
|
+
max_tokens: maxTokens,
|
|
1987
|
+
system,
|
|
1988
|
+
messages: [{ role: 'user', content: text }],
|
|
1989
|
+
},
|
|
1990
|
+
// The signal rides the request options, so the deadline timer above actually cancels the call.
|
|
1991
|
+
{ signal: controller.signal });
|
|
1992
|
+
}
|
|
1993
|
+
catch (err) {
|
|
1994
|
+
// A deadline overrun, a client abort, or a model error (rate limit, overload, 5xx) all map to the
|
|
1995
|
+
// retryable fail(502). The error string is not surfaced to the client (it may carry internal
|
|
1996
|
+
// detail); the log line carries the editor and the kind, never the key or the content.
|
|
1997
|
+
log.warn('tidy.error', { editor: editor.email, model, aborted: controller.signal.aborted });
|
|
1998
|
+
return fail(502, { error: 'Tidy could not finish. Try again.' });
|
|
1999
|
+
}
|
|
2000
|
+
finally {
|
|
2001
|
+
clearTimeout(timer);
|
|
2002
|
+
}
|
|
2003
|
+
// A model refusal (the streaming-classifier intervention) is a clean fail(422): the author's text is
|
|
2004
|
+
// untouched, so the editor can leave it as-is.
|
|
2005
|
+
if (message.stop_reason === 'refusal') {
|
|
2006
|
+
log.warn('tidy.refused', { editor: editor.email, model });
|
|
2007
|
+
return fail(422, { error: 'Tidy declined to edit this text.' });
|
|
2008
|
+
}
|
|
2009
|
+
// Read the output as plain text: concatenate the text blocks (a normal response is one). An empty
|
|
2010
|
+
// result is treated as a model error rather than silently returning an empty document.
|
|
2011
|
+
const corrected = message.content
|
|
2012
|
+
.filter((block) => block.type === 'text' && typeof block.text === 'string')
|
|
2013
|
+
.map((block) => block.text ?? '')
|
|
2014
|
+
.join('');
|
|
2015
|
+
if (corrected.length === 0) {
|
|
2016
|
+
log.warn('tidy.empty', { editor: editor.email, model });
|
|
2017
|
+
return fail(502, { error: 'Tidy returned nothing. Try again.' });
|
|
2018
|
+
}
|
|
2019
|
+
log.info('tidy.done', { editor: editor.email, model: message.model, usage: message.usage });
|
|
2020
|
+
return { corrected, model: message.model, usage: message.usage };
|
|
2021
|
+
}
|
|
2022
|
+
return { layoutLoad, indexRedirect, listLoad, mediaLibraryLoad, settingsLoad, settingsSave, createAction, editLoad, saveAction, publishAction, publishAllAction, discardAction, deleteAction, listDeleteAction, renameAction, uploadAction, mediaDeleteAction, mediaBulkDelete, mediaOrphanScan, mediaPurgeOrphans, mediaUpdateAction, mediaReplacePreview, mediaReplaceApply, mediaAltPreview, mediaAltApply, addDictionaryWord, tidyAction, mintToken };
|
|
1429
2023
|
}
|
|
1430
2024
|
/** The cap, in characters, on the stored alt text. The human fields are display copy, not content,
|
|
1431
2025
|
* so a generous cap rejects only abuse-scale input. */
|