@glw907/cairn-cms 0.58.0 → 0.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/CHANGELOG.md +84 -0
  2. package/dist/components/CairnAdmin.svelte +3 -0
  3. package/dist/components/CairnMediaLibrary.svelte +1101 -27
  4. package/dist/components/CairnMediaLibrary.svelte.d.ts +10 -2
  5. package/dist/components/CairnTidySettings.svelte +553 -0
  6. package/dist/components/CairnTidySettings.svelte.d.ts +32 -0
  7. package/dist/components/EditPage.svelte +371 -2
  8. package/dist/components/MarkdownEditor.svelte +168 -1
  9. package/dist/components/MarkdownEditor.svelte.d.ts +44 -0
  10. package/dist/components/TidyReview.svelte +463 -0
  11. package/dist/components/TidyReview.svelte.d.ts +47 -0
  12. package/dist/components/admin-icons.d.ts +1 -0
  13. package/dist/components/admin-icons.js +1 -0
  14. package/dist/components/cairn-admin.css +913 -2
  15. package/dist/components/editor-tidy.d.ts +31 -0
  16. package/dist/components/editor-tidy.js +199 -0
  17. package/dist/components/index.d.ts +1 -0
  18. package/dist/components/index.js +1 -0
  19. package/dist/components/markdown-directives.d.ts +16 -0
  20. package/dist/components/markdown-directives.js +34 -0
  21. package/dist/components/objective-errors.d.ts +30 -0
  22. package/dist/components/objective-errors.js +113 -0
  23. package/dist/components/spellcheck-assets/dictionary-en-us.txt +104743 -0
  24. package/dist/components/spellcheck-assets/spellchecker-wasm-LICENSE.txt +21 -0
  25. package/dist/components/spellcheck-assets/spellchecker-wasm.wasm +0 -0
  26. package/dist/components/spellcheck-worker.d.ts +80 -0
  27. package/dist/components/spellcheck-worker.js +161 -0
  28. package/dist/components/spellcheck.d.ts +146 -0
  29. package/dist/components/spellcheck.js +541 -0
  30. package/dist/components/tidy-categorize.d.ts +67 -0
  31. package/dist/components/tidy-categorize.js +392 -0
  32. package/dist/components/tidy-diff.d.ts +60 -0
  33. package/dist/components/tidy-diff.js +147 -0
  34. package/dist/components/tidy-validate.d.ts +37 -0
  35. package/dist/components/tidy-validate.js +174 -0
  36. package/dist/content/compose.d.ts +1 -1
  37. package/dist/content/compose.js +11 -0
  38. package/dist/content/site-dictionary.d.ts +31 -0
  39. package/dist/content/site-dictionary.js +82 -0
  40. package/dist/content/types.d.ts +25 -0
  41. package/dist/doctor/checks-local.d.ts +1 -0
  42. package/dist/doctor/checks-local.js +55 -6
  43. package/dist/doctor/index.js +2 -1
  44. package/dist/log/events.d.ts +1 -1
  45. package/dist/media/bulk-delete-plan.d.ts +24 -0
  46. package/dist/media/bulk-delete-plan.js +25 -0
  47. package/dist/media/orphan-scan.d.ts +37 -0
  48. package/dist/media/orphan-scan.js +42 -0
  49. package/dist/media/reconcile.d.ts +3 -0
  50. package/dist/media/reconcile.js +3 -2
  51. package/dist/nav/site-config.d.ts +98 -0
  52. package/dist/nav/site-config.js +132 -0
  53. package/dist/sveltekit/admin-dispatch.d.ts +2 -0
  54. package/dist/sveltekit/admin-dispatch.js +6 -2
  55. package/dist/sveltekit/cairn-admin.d.ts +16 -1
  56. package/dist/sveltekit/cairn-admin.js +28 -3
  57. package/dist/sveltekit/content-routes.d.ts +171 -4
  58. package/dist/sveltekit/content-routes.js +597 -3
  59. package/dist/sveltekit/index.d.ts +1 -1
  60. package/dist/sveltekit/tidy-prompt.d.ts +11 -0
  61. package/dist/sveltekit/tidy-prompt.js +118 -0
  62. package/package.json +10 -1
  63. package/src/lib/components/CairnAdmin.svelte +3 -0
  64. package/src/lib/components/CairnMediaLibrary.svelte +1101 -27
  65. package/src/lib/components/CairnTidySettings.svelte +553 -0
  66. package/src/lib/components/EditPage.svelte +371 -2
  67. package/src/lib/components/MarkdownEditor.svelte +168 -1
  68. package/src/lib/components/TidyReview.svelte +463 -0
  69. package/src/lib/components/admin-icons.ts +1 -0
  70. package/src/lib/components/cairn-admin.css +25 -0
  71. package/src/lib/components/editor-tidy.ts +241 -0
  72. package/src/lib/components/index.ts +1 -0
  73. package/src/lib/components/markdown-directives.ts +35 -0
  74. package/src/lib/components/objective-errors.ts +155 -0
  75. package/src/lib/components/spellcheck-assets/dictionary-en-us.txt +104743 -0
  76. package/src/lib/components/spellcheck-assets/spellchecker-wasm-LICENSE.txt +21 -0
  77. package/src/lib/components/spellcheck-assets/spellchecker-wasm.wasm +0 -0
  78. package/src/lib/components/spellcheck-worker.ts +279 -0
  79. package/src/lib/components/spellcheck.ts +679 -0
  80. package/src/lib/components/tidy-categorize.ts +460 -0
  81. package/src/lib/components/tidy-diff.ts +196 -0
  82. package/src/lib/components/tidy-validate.ts +202 -0
  83. package/src/lib/content/compose.ts +11 -1
  84. package/src/lib/content/site-dictionary.ts +84 -0
  85. package/src/lib/content/types.ts +25 -0
  86. package/src/lib/doctor/checks-local.ts +59 -5
  87. package/src/lib/doctor/index.ts +2 -0
  88. package/src/lib/log/events.ts +9 -1
  89. package/src/lib/media/bulk-delete-plan.ts +54 -0
  90. package/src/lib/media/orphan-scan.ts +74 -0
  91. package/src/lib/media/reconcile.ts +3 -2
  92. package/src/lib/nav/site-config.ts +197 -0
  93. package/src/lib/sveltekit/admin-dispatch.ts +7 -3
  94. package/src/lib/sveltekit/cairn-admin.ts +38 -4
  95. package/src/lib/sveltekit/content-routes.ts +795 -7
  96. package/src/lib/sveltekit/index.ts +1 -0
  97. package/src/lib/sveltekit/tidy-prompt.ts +153 -0
@@ -10,13 +10,23 @@ import { deriveExcerpt } from '../content/excerpt.js';
10
10
  import { asString } from '../content/identity.js';
11
11
  import { isValidId, slugify, filenameFromId, composeDatedId, slugFromId, renameId } from '../content/ids.js';
12
12
  import { appCredentials, type GithubKeyEnv } from '../github/credentials.js';
13
- import { listMarkdown, readRaw, commitFiles, type FileChange } from '../github/repo.js';
13
+ import { listMarkdown, readRaw, commitFile, commitFiles, type FileChange } from '../github/repo.js';
14
14
  import { branchHeadSha, createBranch, deleteBranch, listBranches } from '../github/branches.js';
15
15
  import { PENDING_PREFIX, pendingBranch, parsePendingBranch } from '../content/pending.js';
16
16
  import { cachedInstallationToken } from '../github/signing.js';
17
17
  import { emptyManifest, manifestEntryFromFile, parseManifest, serializeManifest, upsertEntry, removeEntry, inboundLinks, type Manifest, type LinkTarget, type InboundLink } from '../content/manifest.js';
18
18
  import { isConflict } from '../github/types.js';
19
19
  import { log } from '../log/index.js';
20
+ import { dictionaryFileForDialect, DEFAULT_TIDY_MODEL, resolveTidyConventions, parseSiteConfig, setTidy, validateTidyConventions, TidyConventionsError } from '../nav/site-config.js';
21
+ import type { TidyConventions } from '../nav/site-config.js';
22
+ import { buildTidyPrompt } from './tidy-prompt.js';
23
+ // Server-only: the Anthropic SDK ships the API-key path and never reaches a browser bundle. It is
24
+ // imported only here (a Worker module no component imports statically), and the server-only-deps test
25
+ // guards that boundary. The default export is the Anthropic client class; the structural TidyClient
26
+ // type below keeps the action's surface small and the test seam injectable, so the SDK's deep types
27
+ // never leak into a public signature.
28
+ import Anthropic from '@anthropic-ai/sdk';
29
+ import { parseDictionary, mergeDictionaryWords, serializeDictionary, isValidDictionaryWord } from '../content/site-dictionary.js';
20
30
  import { issueCsrfToken, validateCsrfHeader } from './csrf.js';
21
31
  import { requireSession } from './guard.js';
22
32
  import { sniffMediaType, isDeniedUpload, extForMediaType } from '../media/sniff.js';
@@ -29,10 +39,14 @@ import { mediaLibraryEntry } from '../media/library-entry.js';
29
39
  import type { MediaLibrary, MediaLibraryEntry } from '../media/library-entry.js';
30
40
  import { buildUsageIndex } from '../media/usage.js';
31
41
  import type { UsageEntry } from '../media/usage.js';
42
+ import { runReconcile, MEDIA_KEY_RE, type ReconcileBucket } from '../media/reconcile.js';
43
+ import { buildOrphanScan, type OrphanScan } from '../media/orphan-scan.js';
32
44
  import { repointMediaRef, fillAltForHash } from '../content/media-rewrite.js';
33
45
  import type { RepointPlacement, AltPlacement } from '../content/media-rewrite.js';
34
46
  import { planMediaRewrite } from '../media/rewrite-plan.js';
35
47
  import type { BranchRef } from '../media/rewrite-plan.js';
48
+ import { planBulkDelete } from '../media/bulk-delete-plan.js';
49
+ import type { BulkDeleteSkip } from '../media/bulk-delete-plan.js';
36
50
  import type { CookieJar, EventBase } from './types.js';
37
51
  import type { CairnRuntime, ConceptDescriptor, FrontmatterField, PreviewConfig, ResolvedPreview } from '../content/types.js';
38
52
  import type { Editor, Role } from '../auth/types.js';
@@ -138,6 +152,22 @@ export interface EditData {
138
152
  * when one exists, applied over the top-level values); null when the site sets none, which
139
153
  * leaves the frame rendering unstyled markup behind a hint. */
140
154
  preview: ResolvedPreview | null;
155
+ /** The spellcheck dictionary file for the site's configured dialect (default US English), resolved
156
+ * once at compose. The editor resolves it to a real asset URL on the main thread and hands that URL
157
+ * to the spellcheck Worker's `init`, the same way `mediaLibrary` is threaded in. Just the filename,
158
+ * e.g. "dictionary-en-us.txt". */
159
+ spellcheckDictionary: string;
160
+ /** The committed personal-dictionary words for the site (spec 1.6): the durable, shared, reviewable
161
+ * layer the editor seeds the spellcheck Worker's personal set from, the way `mediaLibrary` is handed
162
+ * in. Read from the git-committed `dictionary.txt` at editor load; empty when the file is absent or
163
+ * unreadable (the editor degrades to dialect-only). The dialect dictionary and the session ignore
164
+ * list are the other two layers; only this one is committed. */
165
+ siteDictionary: string[];
166
+ /** The editor-tier tidy facts the review surface needs (spec 2.5): whether tidy is enabled, the model
167
+ * that runs (for the head pill), and the RESOLVED conventions (the only data source for a
168
+ * normalization's because-line and the local category inference). The API key never appears here, it
169
+ * is a Worker secret. `enabled` false hides the Tidy control. */
170
+ tidy: { enabled: boolean; model: string; conventions: TidyConventions };
141
171
  }
142
172
 
143
173
  /** One asset's where-used overlay, kept separate from MediaLibraryEntry so the picker's shared
@@ -161,14 +191,52 @@ export interface MediaLibraryData {
161
191
  * redirected commit conflict never overwrite each other. */
162
192
  error: string | null;
163
193
  /** The success flash a redirected action carries: `deleted` from `?deleted=1`, `updated` from
164
- * `?updated=1`, `replaced` from `?replaced=1`, `altPropagated` from `?altPropagated=1`, null
165
- * otherwise. The component renders a polite success strip for each. */
166
- flash: 'deleted' | 'updated' | 'replaced' | 'altPropagated' | null;
194
+ * `?updated=1`, `replaced` from `?replaced=1`, `altPropagated` from `?altPropagated=1`,
195
+ * `bulkDeleted` from `?bulkDeleted=1`, `orphansPurged` from `?orphansPurged=1`, null otherwise.
196
+ * The component renders a polite success strip for each. */
197
+ flash: 'deleted' | 'updated' | 'replaced' | 'altPropagated' | 'bulkDeleted' | 'orphansPurged' | null;
167
198
  /** A redirected action's conflict error read from `?error=` (a commit-conflict bounce). Kept in
168
199
  * its own slot rather than the degraded-load `error` above, so the two never collide. */
169
200
  flashError: string | null;
170
201
  }
171
202
 
203
+ /** The two-tier tidy settings load (spec 2.8, Task 15). The developer tier is read-only: `enabled`,
204
+ * `keyConfigured`, and `model`/`modelLabel` are deploy-time facts the editor sees but cannot change.
205
+ * The editor tier is the resolved `conventions` block, written back through the save. The visibility
206
+ * gate is truthful: `enabled` is true only when `tidy.enabled` is set AND the API key is present, so
207
+ * the screen renders the convention list only then and the honest gate note otherwise. The key is a
208
+ * Worker secret, so `keyConfigured` is the presence of `ANTHROPIC_API_KEY` in the load's env, never
209
+ * the key itself; nothing here returns or logs the secret. */
210
+ export interface SettingsData {
211
+ /** The truthful gate: tidy is enabled AND the API key is present. The screen renders the editor
212
+ * tier only when this is true, and the honest gate note (a labelled region, no disabled controls)
213
+ * otherwise. */
214
+ enabled: boolean;
215
+ /** Whether `tidy.enabled` is set in the site config, independent of the key. The gate note's
216
+ * checklist reads this to show which deploy-time step is still open. */
217
+ tidyEnabled: boolean;
218
+ /** Whether the API key secret is present in the Worker env. A presence flag, never the key. */
219
+ keyConfigured: boolean;
220
+ /** The model id (a developer-tier fact, read-only on the screen). */
221
+ model: string;
222
+ /** A plain-language label for the model id ("Claude Sonnet"), so the read-only fact is not a bare
223
+ * jargon token. Falls back to the raw id for an unknown model. */
224
+ modelLabel: string;
225
+ /** The resolved editor-tier conventions: every field concrete, the screen's initial control state.
226
+ * Present only when the gate is open; the gate state needs no conventions. */
227
+ conventions: TidyConventions;
228
+ /** The success flash a redirected save carries (`?saved=1`). */
229
+ saved: boolean;
230
+ /** A redirected save's validation or conflict error read from `?error=`. */
231
+ error: string | null;
232
+ }
233
+
234
+ /** A refused settings save: a conflict bounce or a malformed conventions payload. Just the one-line
235
+ * summary; the save commits nothing on a refusal. */
236
+ export interface SettingsSaveFailure {
237
+ error: string;
238
+ }
239
+
172
240
  /** The structural event the content routes read; a real SvelteKit RequestEvent satisfies it. */
173
241
  export interface ContentEvent extends EventBase<GithubKeyEnv> {
174
242
  params: Record<string, string>;
@@ -178,12 +246,96 @@ export interface ContentEvent extends EventBase<GithubKeyEnv> {
178
246
  }
179
247
 
180
248
  /** Injectable dependencies; tests stub the token mint to avoid signing a real key. */
249
+ /** The minimal Anthropic client surface the tidy action uses, typed structurally so the SDK's deep
250
+ * generics never reach a public signature and so the integration test can inject a fake whose
251
+ * `messages.create` it stubs. The real factory builds `new Anthropic({ apiKey })`, which satisfies
252
+ * this shape. The success path reads only the text blocks, the model, the stop reason, and the usage
253
+ * counts. */
254
+ export interface TidyClient {
255
+ messages: {
256
+ create(
257
+ body: {
258
+ model: string;
259
+ max_tokens: number;
260
+ system: string;
261
+ messages: { role: 'user'; content: string }[];
262
+ },
263
+ // The SDK signature is create(body, options). The abort signal belongs in the second argument
264
+ // (RequestOptions), not the body, so the request actually cancels when the deadline fires.
265
+ options?: { signal?: AbortSignal },
266
+ ): Promise<{
267
+ content: { type: string; text?: string }[];
268
+ model: string;
269
+ stop_reason: string | null;
270
+ usage: { input_tokens: number; output_tokens: number };
271
+ }>;
272
+ };
273
+ }
274
+
181
275
  export interface ContentRoutesDeps {
182
276
  /** Mint a GitHub App installation token from the Worker env. Defaults to the real signer.
183
277
  * A bare string works too; the routes await whatever comes back. */
184
278
  mintToken?: (env: GithubKeyEnv) => string | Promise<string>;
279
+ /** Build the Anthropic client for the tidy action from the resolved API key. Defaults to the real
280
+ * SDK client. Injected in tests so `messages.create` is stubbed and no network call (or real key)
281
+ * is ever needed. The factory runs only after the key is read from the env, so a disabled or
282
+ * unconfigured site never constructs a client. */
283
+ anthropic?: (opts: { apiKey: string }) => TidyClient;
284
+ /** The tidy action's own request deadline in milliseconds, set shorter than the platform limit so a
285
+ * slow model call becomes a clean retryable fail(502) rather than a platform timeout. Defaults to
286
+ * {@link DEFAULT_TIDY_TIMEOUT_MS}. Overridable in tests to assert the deadline path without waiting. */
287
+ tidyTimeoutMs?: number;
185
288
  }
186
289
 
290
+ /** The successful tidy outcome (spec 2.1): the corrected markdown, the model that produced it, and the
291
+ * token usage. The diff is computed on the client (Task 12), so the server returns the plain text and
292
+ * commits nothing. Admin-internal: consumed by the editor's review surface, not on the package's
293
+ * sveltekit subpath, so it carries no reference page. */
294
+ export interface TidyResult {
295
+ corrected: string;
296
+ model: string;
297
+ usage: { input_tokens: number; output_tokens: number };
298
+ }
299
+
300
+ /** A refused tidy: `fail(403)` on a failed CSRF check, `fail(503)` when tidy is disabled or the API
301
+ * key is missing, `fail(413)` for an over-long body, `fail(502)` for a deadline overrun, abort, or
302
+ * model error (all retryable), `fail(422)` for a model refusal, `fail(400)` for a malformed body. Just
303
+ * the one-line summary; the action commits nothing, so a refusal can never corrupt the entry. */
304
+ export interface TidyFailure {
305
+ error: string;
306
+ }
307
+
308
+ /** The Worker-side request deadline for the tidy model call: 30 seconds. A tidy call to Sonnet on a
309
+ * full entry can run many seconds, so the action bounds it with an AbortSignal and maps the overrun to
310
+ * a retryable fail(502). This sits well under Cloudflare's per-request wall-clock ceiling (a Worker
311
+ * invocation can run far longer, but a single subrequest left open near that ceiling would surface as a
312
+ * platform timeout the action could not shape into a clean retry). 30s comfortably covers a proofread
313
+ * of the bounded input (see MAX_TIDY_CHARS) while leaving headroom under the platform limit. */
314
+ const DEFAULT_TIDY_TIMEOUT_MS = 30_000;
315
+
316
+ /** The fallback site-config path when no nav menu names one: the convention every scaffolded site
317
+ * uses. The settings save edits the same committed YAML the nav editor does, so it resolves the path
318
+ * from the configured nav menu first and falls back to this default. */
319
+ const DEFAULT_SITE_CONFIG_PATH = 'src/lib/site.config.yaml';
320
+
321
+ /** Plain-language labels for the known tidy models, so the read-only model fact reads as a name rather
322
+ * than a bare id. An unknown id falls back to itself. */
323
+ const TIDY_MODEL_LABELS: Record<string, string> = {
324
+ 'claude-sonnet-4-6': 'Claude Sonnet',
325
+ 'claude-haiku-4-5': 'Claude Haiku',
326
+ };
327
+
328
+ /** The display label for a tidy model id, falling back to the raw id for an unknown model. */
329
+ function tidyModelLabel(model: string): string {
330
+ return TIDY_MODEL_LABELS[model] ?? model;
331
+ }
332
+
333
+ /** The input cap for a single tidy request: 24000 characters (~6k input tokens). A proofread runs at
334
+ * roughly input length, so this stays comfortably inside the 30s deadline; a longer entry refuses with
335
+ * fail(413) and the author tidies a selection instead. The cap is enforced BEFORE the model call, so an
336
+ * over-long body never spends a token or risks the deadline. */
337
+ const MAX_TIDY_CHARS = 24_000;
338
+
187
339
  /** A blocked save or publish: `fail(400)` when the body links to a target absent from main. */
188
340
  export interface SaveFailure {
189
341
  /** The one-line human summary every content action failure carries. */
@@ -248,6 +400,45 @@ export interface MediaAltPropagateFailure {
248
400
  error: string;
249
401
  }
250
402
 
403
+ /** The personal-dictionary add outcome (spec 1.6): the merged, canonical sorted word list after the
404
+ * add landed. The client reconciles its pending-additions set against this (a word now in the list is
405
+ * committed and dropped from pending). Admin-internal: exported for the editor host's reconcile, not
406
+ * on the package's sveltekit subpath, so it carries no reference page. */
407
+ export interface DictionaryAddResult {
408
+ words: string[];
409
+ }
410
+
411
+ /** A refused personal-dictionary add: `fail(403)` on a failed CSRF check, `fail(400)` on a body that
412
+ * carries no valid word. The client keeps its pending additions for the session and re-attempts on
413
+ * the next save, so the word is never silently dropped. Just the one-line summary. */
414
+ export interface DictionaryAddFailure {
415
+ error: string;
416
+ }
417
+
418
+ /** A refused media bulk delete or orphan purge: `fail(503)` for the fail-closed strict-usage refusal
419
+ * (the whole batch refuses) or media-off / a missing bucket binding. The per-item outcomes ride the
420
+ * returned summary, not a fail. */
421
+ export interface MediaBulkFailure {
422
+ error: string;
423
+ }
424
+
425
+ /** The bulk-delete outcome the component renders: the deleted hashes, the skipped rows from the
426
+ * partition (with their reason and where-used), and any per-object R2 delete failure. Admin-internal,
427
+ * not on the package subpath, so no reference page. */
428
+ export interface MediaBulkDeleteResult {
429
+ deleted: string[];
430
+ skipped: BulkDeleteSkip[];
431
+ failed: { hash: string; error: string }[];
432
+ }
433
+
434
+ /** The orphan-purge outcome: the purged R2 keys, the keys skipped because their hash was claimed by a
435
+ * manifest row since the scan, and any per-object delete failure. Admin-internal, no reference page. */
436
+ export interface MediaOrphanPurgeResult {
437
+ purged: string[];
438
+ skippedClaimed: string[];
439
+ failed: { key: string; error: string }[];
440
+ }
441
+
251
442
  /** One entry the replace preview will rewrite, enriched with its display title and permalink from the
252
443
  * content manifest (the planner's PlannedEntry carries neither). The screen lists these as the
253
444
  * confirm dialog's where-touched preview, and the apply re-derives its own plan rather than trusting
@@ -312,7 +503,7 @@ export interface MediaAltPreviewPlan {
312
503
  * `form` prop carries a `?/mediaDelete`, `?/mediaUpdate`, `?/mediaReplace`, or `?/mediaAltPropagate`
313
504
  * refusal without a second type. */
314
505
  export type ContentFormFailure = Partial<
315
- SaveFailure & DeleteRefusal & RenameFailure & MediaDeleteRefusal & MediaUpdateFailure & MediaReplaceFailure & MediaAltPropagateFailure
506
+ SaveFailure & DeleteRefusal & RenameFailure & MediaDeleteRefusal & MediaUpdateFailure & MediaReplaceFailure & MediaAltPropagateFailure & MediaBulkFailure & TidyFailure
316
507
  >;
317
508
 
318
509
  /** The successful upload's response (`uploadAction`). The server-owned `record` rides the editor's
@@ -350,6 +541,13 @@ export function createContentRoutes(runtime: CairnRuntime, deps: ContentRoutesDe
350
541
  const mintToken =
351
542
  deps.mintToken ?? ((env: GithubKeyEnv) => cachedInstallationToken(appCredentials(runtime.backend, env)));
352
543
 
544
+ // The default Anthropic factory builds the real SDK client from the resolved key. Tests inject a fake
545
+ // (deps.anthropic) so messages.create is stubbed and no network call or real key is ever needed. The
546
+ // SDK client satisfies TidyClient structurally; the cast names that to the compiler.
547
+ const anthropicClient =
548
+ deps.anthropic ?? ((opts: { apiKey: string }) => new Anthropic({ apiKey: opts.apiKey }) as unknown as TidyClient);
549
+ const tidyTimeoutMs = deps.tidyTimeoutMs ?? DEFAULT_TIDY_TIMEOUT_MS;
550
+
353
551
  /** Main's manifest, parsed. A missing file starts empty (a fresh repo before the first commit).
354
552
  * Always read from main: pending branches carry no manifest copy. */
355
553
  async function readManifest(token: string): Promise<Manifest> {
@@ -546,6 +744,8 @@ export function createContentRoutes(runtime: CairnRuntime, deps: ContentRoutesDe
546
744
  else if (event.url.searchParams.get('updated') === '1') flash = 'updated';
547
745
  else if (event.url.searchParams.get('replaced') === '1') flash = 'replaced';
548
746
  else if (event.url.searchParams.get('altPropagated') === '1') flash = 'altPropagated';
747
+ else if (event.url.searchParams.get('bulkDeleted') === '1') flash = 'bulkDeleted';
748
+ else if (event.url.searchParams.get('orphansPurged') === '1') flash = 'orphansPurged';
549
749
  const flashError = event.url.searchParams.get('error');
550
750
  let token: string;
551
751
  try {
@@ -674,13 +874,17 @@ export function createContentRoutes(runtime: CairnRuntime, deps: ContentRoutesDe
674
874
  // The media manifest joins the concurrent batch only when media is on, read from the default
675
875
  // branch (pending branches carry no copy). A rejected media read degrades to null so the edit
676
876
  // never throws on a missing or unreadable media.json; the projection below treats null as empty.
677
- const [headSha, mainRaw, manifestRaw, mediaRaw] = await Promise.all([
877
+ // The committed personal dictionary joins the concurrent batch, read from the default branch. A
878
+ // rejected read degrades to null so the edit never throws on a missing or unreadable dictionary;
879
+ // the projection below treats null as an empty word list (the editor falls back to dialect-only).
880
+ const [headSha, mainRaw, manifestRaw, mediaRaw, dictionaryRaw] = await Promise.all([
678
881
  branchHeadSha(runtime.backend, branch, token),
679
882
  readRaw(runtime.backend, path, token),
680
883
  readRaw(runtime.backend, runtime.manifestPath, token),
681
884
  runtime.resolvedAssets.enabled
682
885
  ? readRaw(runtime.backend, runtime.mediaManifestPath, token).catch(() => null)
683
886
  : Promise.resolve(null),
887
+ readRaw(runtime.backend, dictionaryFilePath(), token).catch(() => null),
684
888
  ]);
685
889
  const pending = headSha !== null;
686
890
  const raw = pending ? await readRaw({ ...runtime.backend, branch }, path, token) : mainRaw;
@@ -737,9 +941,30 @@ export function createContentRoutes(runtime: CairnRuntime, deps: ContentRoutesDe
737
941
  publishedFlash: event.url.searchParams.get('published') === '1',
738
942
  discardedFlash: event.url.searchParams.get('discarded') === '1',
739
943
  preview: resolvePreview(runtime.preview, concept.id),
944
+ // composeRuntime always resolves this from the site config's dialect; default a hand-built
945
+ // runtime that omits it to the US English dictionary so the editor always has a real filename.
946
+ spellcheckDictionary: runtime.spellcheckDictionary ?? dictionaryFileForDialect(undefined),
947
+ // The committed personal-dictionary words, normalized to the canonical sorted, deduplicated set
948
+ // so the editor seeds the Worker's personal layer with a clean list. A missing or unreadable file
949
+ // is an empty list (the dialect-only fallback).
950
+ siteDictionary: mergeDictionaryWords(parseDictionary(dictionaryRaw), []),
951
+ // The editor-tier tidy facts: the master switch, the model (for the head pill), and the resolved
952
+ // conventions (the because-line and category inference read only these). The API key is never
953
+ // exposed here. A site with no tidy block reads disabled with the default conventions.
954
+ tidy: {
955
+ enabled: runtime.tidy?.enabled ?? false,
956
+ model: runtime.tidy?.model || DEFAULT_TIDY_MODEL,
957
+ conventions: resolveTidyConventions(runtime.tidy?.conventions),
958
+ },
740
959
  };
741
960
  }
742
961
 
962
+ /** The repo-relative personal-dictionary path, defaulting a hand-built runtime that omits it to the
963
+ * same `.cairn/` content root the manifests use. composeRuntime always fills `dictionaryPath`. */
964
+ function dictionaryFilePath(): string {
965
+ return runtime.dictionaryPath ?? 'src/content/.cairn/dictionary.txt';
966
+ }
967
+
743
968
  /** Log a failed commit: a conflict is the expected last-writer-wins outcome, so it warns with a
744
969
  * reason; any other error is unexpected and logs at error with the stringified cause. Publish
745
970
  * failures carry the same shape under their own event name. */
@@ -1493,6 +1718,263 @@ export function createContentRoutes(runtime: CairnRuntime, deps: ContentRoutesDe
1493
1718
  throw redirect(303, '/admin/media?deleted=1');
1494
1719
  }
1495
1720
 
1721
+ /** Bulk safe-delete a multi-select of committed media assets. This is mediaDeleteAction extended to
1722
+ * many items, with the same safety primitives and one rule that defines the batch: the gate is ONE
1723
+ * shared strict cross-branch usage index built per batch, never N per-item reads (N strict reads
1724
+ * would blow the workerd connection budget at many open branches). The fail-closed posture is for
1725
+ * the WHOLE batch: if that single strict index cannot complete, the action refuses everything and
1726
+ * commits nothing, rather than risk deleting bytes a branch still references.
1727
+ *
1728
+ * Skip-and-report, never force: the pure planBulkDelete partitions the selection against the strict
1729
+ * index into deletable (no usage row, a committed manifest row exists), skipped-still-referenced (a
1730
+ * usage row, carried for the where-used), and skipped-uncommitted (no manifest row). An in-use item
1731
+ * is skipped and reported, never bulk-force-deleted; forced in-use deletion stays the single-item
1732
+ * typed-slug path.
1733
+ *
1734
+ * The order is load-bearing, mirroring single delete: ONE atomic commit removes every deletable row
1735
+ * FIRST, then the R2 objects are deleted (commit-row-then-delete-R2). A failure after the commit
1736
+ * leaves bytes with no row (a benign orphan) rather than a row pointing at deleted bytes. Each R2
1737
+ * delete is best-effort and batch-resilient: a per-object error is reported in `failed` and never
1738
+ * aborts the rest of the batch. The result is an itemized 207-style summary the component renders
1739
+ * (deleted / skipped with reasons / failed); there is no success redirect. */
1740
+ async function mediaBulkDelete(event: ContentEvent): Promise<ReturnType<typeof fail> | MediaBulkDeleteResult> {
1741
+ const editor = requireSession(event);
1742
+ const token = await mintToken(event.platform?.env ?? {});
1743
+
1744
+ // Read the selected hashes from the form. Accept the repeated `hash` field, falling back to a JSON
1745
+ // `hashes` array. Each value must match the 16-hex content-hash grammar; a malformed value is
1746
+ // dropped silently rather than surfaced as a skip (it was never a real selection).
1747
+ const form = await event.request.formData();
1748
+ let raw = form.getAll('hash').map(String);
1749
+ if (raw.length === 0) {
1750
+ const json = form.get('hashes');
1751
+ if (typeof json === 'string') {
1752
+ try {
1753
+ const parsed: unknown = JSON.parse(json);
1754
+ if (Array.isArray(parsed)) raw = parsed.map(String);
1755
+ } catch {
1756
+ raw = [];
1757
+ }
1758
+ }
1759
+ }
1760
+ const selected = raw.filter((h) => MEDIA_HASH_RE.test(h));
1761
+
1762
+ // Read the fresh media manifest (the deletable rows come from here, by hash).
1763
+ const manifest = parseMediaManifest(parseMediaJson(await readRaw(runtime.backend, runtime.mediaManifestPath, token)));
1764
+
1765
+ // Resolve the R2 bucket before any write, so a media-off site or a missing binding refuses before
1766
+ // the commit, exactly like single delete.
1767
+ const resolved = runtime.resolvedAssets;
1768
+ if (!resolved.enabled) {
1769
+ return fail(503, { error: 'Media is not enabled for this site.' } satisfies MediaBulkFailure);
1770
+ }
1771
+ const platformEnv = (event.platform as { env?: Record<string, unknown> } | undefined)?.env ?? {};
1772
+ const rawBucket = platformEnv[resolved.bucketBinding];
1773
+ if (!rawBucket) {
1774
+ return fail(503, { error: 'The media bucket is not bound.' } satisfies MediaBulkFailure);
1775
+ }
1776
+ const store = r2Store(rawBucket as R2Bucket);
1777
+
1778
+ // THE fail-closed gate for the whole batch: one shared strict usage index. STRICT mode rethrows a
1779
+ // branch-read failure, so a transient branch read failing refuses the whole batch rather than
1780
+ // mistaking a still-referenced asset for an orphan. Build exactly one index, never one per item.
1781
+ let index: Awaited<ReturnType<typeof buildUsageIndex>>;
1782
+ try {
1783
+ index = await buildUsageIndex(runtime.backend, token, runtime.concepts, await readManifest(token), { strict: true });
1784
+ } catch {
1785
+ return fail(503, { error: 'Could not verify where these assets are used. Try again.' } satisfies MediaBulkFailure);
1786
+ }
1787
+
1788
+ // The pure partition: membership in the fresh strict index is the gate, never the display count.
1789
+ const plan = planBulkDelete(selected, index, manifest);
1790
+ // An all-skipped or empty batch is a no-op success: nothing committed, nothing deleted.
1791
+ if (plan.deletable.length === 0) {
1792
+ return { deleted: [], skipped: plan.skipped, failed: [] } satisfies MediaBulkDeleteResult;
1793
+ }
1794
+
1795
+ // ONE atomic commit removing EVERY deletable row, folded over removeMediaEntry.
1796
+ let next = manifest;
1797
+ for (const hash of plan.deletable) next = removeMediaEntry(next, hash);
1798
+ const commitFields = { concept: 'media', id: 'bulk', editor: editor.email };
1799
+ try {
1800
+ await commitFiles(
1801
+ runtime.backend,
1802
+ [{ path: runtime.mediaManifestPath, content: serializeMediaManifest(next) }],
1803
+ { message: `Delete ${plan.deletable.length} media assets`, author: { name: editor.displayName, email: editor.email } },
1804
+ token,
1805
+ );
1806
+ log.info('commit.succeeded', commitFields);
1807
+ } catch (err) {
1808
+ commitFailure(commitFields, err, '/admin/media',
1809
+ 'The media manifest changed since you opened it. Reload and try again.');
1810
+ }
1811
+
1812
+ // THEN delete each deletable hash's R2 object (the load-bearing order, see the docstring). Best
1813
+ // effort and batch-resilient: a thrown key derivation or a delete error is reported in `failed`
1814
+ // and the loop continues. An absent object is a no-op (the R2 contract).
1815
+ const deleted: string[] = [];
1816
+ const failed: { hash: string; error: string }[] = [];
1817
+ for (const hash of plan.deletable) {
1818
+ try {
1819
+ const row = manifest[hash];
1820
+ await store.delete(r2Key(row.hash, row.ext));
1821
+ deleted.push(hash);
1822
+ } catch (err) {
1823
+ failed.push({ hash, error: err instanceof Error ? err.message : String(err) });
1824
+ }
1825
+ }
1826
+
1827
+ log.info('media.bulk_deleted', { editor: editor.email, deleted: deleted.length, skipped: plan.skipped.length });
1828
+ return { deleted, skipped: plan.skipped, failed } satisfies MediaBulkDeleteResult;
1829
+ }
1830
+
1831
+ /** The on-demand orphan scan: a read-only reconcile of stored R2 bytes against the manifest, joined
1832
+ * with one strict cross-branch usage index for the broken-reference where-used. It runs only when
1833
+ * requested, never on the loaded index, because it is heavier than the load path: a full R2 list
1834
+ * plus a reconcile pass on top of the strict usage build.
1835
+ *
1836
+ * Detection-time fail-closed: BOTH the reconcile and the strict usage build run inside one
1837
+ * try/catch, and any throw refuses the whole scan with fail(503) rather than returning a partial
1838
+ * result. The reconcile must not run on a half-listed bucket: a truncated R2 list would call
1839
+ * still-stored bytes orphaned. The strict usage build must not run on a half-read branch set: an
1840
+ * unread branch would make a branch-referenced asset look orphaned. A wrong orphan verdict here
1841
+ * feeds the irreversible purge, so the scan refuses rather than risk it.
1842
+ *
1843
+ * The result is the OrphanScan projection: orphanedBytes (stored keys with no manifest row, the
1844
+ * purge surface) and brokenRefs (manifest rows whose bytes are gone, read-only, shown with their
1845
+ * where-used so an operator can re-ingest rather than purge a still-referenced record). */
1846
+ async function mediaOrphanScan(event: ContentEvent): Promise<ReturnType<typeof fail> | OrphanScan> {
1847
+ requireSession(event);
1848
+ const token = await mintToken(event.platform?.env ?? {});
1849
+
1850
+ // Resolve the R2 binding. The reconcile lists the raw bucket directly, so keep the raw binding;
1851
+ // the MediaStore seam carries no list. A media-off site or a missing binding refuses the scan.
1852
+ const resolved = runtime.resolvedAssets;
1853
+ if (!resolved.enabled) {
1854
+ return fail(503, { error: 'Media is not enabled for this site.' } satisfies MediaBulkFailure);
1855
+ }
1856
+ const platformEnv = (event.platform as { env?: Record<string, unknown> } | undefined)?.env ?? {};
1857
+ const rawBucket = platformEnv[resolved.bucketBinding];
1858
+ if (!rawBucket) {
1859
+ return fail(503, { error: 'The media bucket is not bound.' } satisfies MediaBulkFailure);
1860
+ }
1861
+
1862
+ // Read the fresh media manifest for the reconcile's manifest side.
1863
+ const manifest = parseMediaManifest(parseMediaJson(await readRaw(runtime.backend, runtime.mediaManifestPath, token)));
1864
+
1865
+ // THE detection-time fail-closed surface. The reconcile (an R2 list that must complete in full)
1866
+ // and the strict usage build (a branch read that must complete in full) are both unsafe to use
1867
+ // partially, so either throwing refuses the scan. A wrong orphan verdict from a partial read here
1868
+ // would feed the irreversible purge.
1869
+ let reconcile: Awaited<ReturnType<typeof runReconcile>>;
1870
+ let index: Awaited<ReturnType<typeof buildUsageIndex>>;
1871
+ try {
1872
+ reconcile = await runReconcile(rawBucket as unknown as ReconcileBucket, manifest);
1873
+ index = await buildUsageIndex(runtime.backend, token, runtime.concepts, await readManifest(token), { strict: true });
1874
+ } catch {
1875
+ return fail(503, { error: 'Could not check where files are used, so the scan was not run. Try again.' } satisfies MediaBulkFailure);
1876
+ }
1877
+
1878
+ return buildOrphanScan(reconcile, manifest, index);
1879
+ }
1880
+
1881
+ /** Purge orphaned R2 bytes: the one IRREVERSIBLE media action. Raw object bytes live only in R2, not
1882
+ * in git, so a purged orphan cannot be recovered the way a deleted manifest row can be reverted in
1883
+ * history. The whole action is built around that fact.
1884
+ *
1885
+ * The typed-count confirm is the never-bypassable gate, the analogue of single delete's typed-slug
1886
+ * check. The form's `confirm` must equal the count of selected keys (the approved rev.2 mockup's
1887
+ * "Type N to purge these files for good"); an empty selection or a mismatched count deletes nothing.
1888
+ *
1889
+ * Re-derive fresh is the safety crux. The selection came from an earlier scan, so the action does
1890
+ * NOT trust it: the purge keys are client-posted, so the server cannot assume they came from a fresh
1891
+ * scan. It reads the current media manifest AND rebuilds ONE strict cross-branch usage index, then
1892
+ * for each selected key parses the hash from the key grammar. A key that does not match the grammar
1893
+ * was never a real orphan key and is dropped silently. A key whose hash now has a manifest row OR is
1894
+ * referenced on any open cairn/* branch survived the scan window (it was claimed by a row, or a
1895
+ * draft started referencing those bytes), so it is skipped into skippedClaimed and its bytes survive.
1896
+ * Only a key whose hash is STILL absent from both is purged. This closes the TOCTOU between scan and
1897
+ * purge that could otherwise irreversibly delete a live draft's bytes.
1898
+ *
1899
+ * Like the scan and the bulk delete, the strict index build is the fail-closed gate: a branch read
1900
+ * that throws refuses the whole batch with fail(503) rather than mistaking an unverifiable reference
1901
+ * for an absent one. The index is built exactly once for the batch, never once per key.
1902
+ *
1903
+ * There is no commit. An orphan by definition has no manifest row to remove, so the purge deletes
1904
+ * the R2 object directly. Each delete is best-effort and batch-resilient: a per-object error is
1905
+ * reported in `failed` and the loop continues; an absent object is a no-op (the R2 contract). */
1906
+ async function mediaPurgeOrphans(event: ContentEvent): Promise<ReturnType<typeof fail> | MediaOrphanPurgeResult> {
1907
+ const editor = requireSession(event);
1908
+ const token = await mintToken(event.platform?.env ?? {});
1909
+
1910
+ // Resolve the R2 binding, the same media-off / missing-binding refusals as the scan. The purge
1911
+ // deletes through the MediaStore seam, so wrap the raw binding.
1912
+ const resolved = runtime.resolvedAssets;
1913
+ if (!resolved.enabled) {
1914
+ return fail(503, { error: 'Media is not enabled for this site.' } satisfies MediaBulkFailure);
1915
+ }
1916
+ const platformEnv = (event.platform as { env?: Record<string, unknown> } | undefined)?.env ?? {};
1917
+ const rawBucket = platformEnv[resolved.bucketBinding];
1918
+ if (!rawBucket) {
1919
+ return fail(503, { error: 'The media bucket is not bound.' } satisfies MediaBulkFailure);
1920
+ }
1921
+ const store = r2Store(rawBucket as R2Bucket);
1922
+
1923
+ // Read the selected R2 keys and the typed confirm.
1924
+ const form = await event.request.formData();
1925
+ const keys = form.getAll('key').map(String);
1926
+ const confirm = String(form.get('confirm') ?? '');
1927
+
1928
+ // The irreversible gate: the confirm must equal the selected count, and the set must be non-empty.
1929
+ // A mismatch or an empty set refuses and deletes NOTHING.
1930
+ if (keys.length === 0 || confirm !== String(keys.length)) {
1931
+ return fail(400, { error: 'Type the number of files to confirm the purge.' } satisfies MediaBulkFailure);
1932
+ }
1933
+
1934
+ // Re-derive fresh against the current manifest, so a key claimed since the scan is never purged.
1935
+ const manifest = parseMediaManifest(parseMediaJson(await readRaw(runtime.backend, runtime.mediaManifestPath, token)));
1936
+
1937
+ // THE fail-closed gate for the whole batch: one shared strict cross-branch usage index, symmetric
1938
+ // with the scan and the bulk delete. STRICT mode rethrows a branch-read failure, so a transient
1939
+ // branch read refuses the irreversible purge rather than letting a possibly-referenced byte be
1940
+ // treated as a true orphan. Build exactly one index, never one per key.
1941
+ let index: Awaited<ReturnType<typeof buildUsageIndex>>;
1942
+ try {
1943
+ index = await buildUsageIndex(runtime.backend, token, runtime.concepts, await readManifest(token), { strict: true });
1944
+ } catch {
1945
+ return fail(503, { error: 'Could not verify where these files are used. Try again.' } satisfies MediaBulkFailure);
1946
+ }
1947
+
1948
+ const purged: string[] = [];
1949
+ const skippedClaimed: string[] = [];
1950
+ const failed: { key: string; error: string }[] = [];
1951
+ for (const key of keys) {
1952
+ const hash = MEDIA_KEY_RE.exec(key)?.[1];
1953
+ // A key that does not match the grammar was never a real orphan key: drop it silently.
1954
+ if (hash === undefined) continue;
1955
+ // A hash that now has a manifest row was claimed since the scan: its bytes are a live asset now.
1956
+ if (manifest[hash]) {
1957
+ skippedClaimed.push(key);
1958
+ continue;
1959
+ }
1960
+ // A hash referenced on any open cairn/* branch backs an in-progress draft: skip it claimed too.
1961
+ if (index.has(hash)) {
1962
+ skippedClaimed.push(key);
1963
+ continue;
1964
+ }
1965
+ // Still orphaned: delete the object directly. No commit, there is no manifest row.
1966
+ try {
1967
+ await store.delete(key);
1968
+ purged.push(key);
1969
+ } catch (err) {
1970
+ failed.push({ key, error: err instanceof Error ? err.message : String(err) });
1971
+ }
1972
+ }
1973
+
1974
+ log.info('media.orphans_purged', { editor: editor.email, purged: purged.length });
1975
+ return { purged, skippedClaimed, failed } satisfies MediaOrphanPurgeResult;
1976
+ }
1977
+
1496
1978
  /** Edit a committed asset's metadata: its display name, slug, and default alt. A single media.json
1497
1979
  * row commit, with NO reference rewrite: the resolver and the delivery route key on the hash, so a
1498
1980
  * rename never breaks an existing `media:` reference. The default alt is the asset's value for the
@@ -1881,7 +2363,313 @@ export function createContentRoutes(runtime: CairnRuntime, deps: ContentRoutesDe
1881
2363
  throw redirect(303, '/admin/media?altPropagated=1');
1882
2364
  }
1883
2365
 
1884
- return { layoutLoad, indexRedirect, listLoad, mediaLibraryLoad, createAction, editLoad, saveAction, publishAction, publishAllAction, discardAction, deleteAction, listDeleteAction, renameAction, uploadAction, mediaDeleteAction, mediaUpdateAction, mediaReplacePreview, mediaReplaceApply, mediaAltPreview, mediaAltApply, mintToken };
2366
+ /** The cap on a personal-dictionary word, matched by isValidDictionaryWord. A word is one line, so
2367
+ * this bounds an abusive input; the real authority is the per-character validation, which rejects
2368
+ * whitespace and control bytes so a body can never inject an extra line into the committed file. */
2369
+ const MAX_DICTIONARY_WORD = 64;
2370
+ /** The cap on the words a single add request carries: an editor adds a handful at save time, never
2371
+ * a flood. Past this the body is treated as abusive and the surplus is dropped. */
2372
+ const MAX_DICTIONARY_BATCH = 100;
2373
+
2374
+ /** Read the committed personal dictionary, merge the validated additions in sorted order, and commit
2375
+ * the canonical file back. Shared by the first attempt and the post-conflict retry, so both re-read
2376
+ * the head and re-merge the same additions; the merge is order-independent, so a concurrent editor's
2377
+ * word that already landed is preserved and the result is the same sorted set regardless of order.
2378
+ * Returns the merged word list. Throws CommitConflictError (via commitFiles) when the branch moves
2379
+ * under the commit, which the caller catches to retry once. */
2380
+ async function mergeAndCommitDictionary(token: string, additions: string[], editor: Editor): Promise<string[]> {
2381
+ const path = dictionaryFilePath();
2382
+ // The existing file as its canonical sorted set, so a no-op add is detected against the same
2383
+ // normalization the commit would write (an already-sorted file never re-commits just to reorder).
2384
+ const canonicalExisting = mergeDictionaryWords(parseDictionary(await readRaw(runtime.backend, path, token)), []);
2385
+ const merged = mergeDictionaryWords(canonicalExisting, additions);
2386
+ // Nothing new (every addition was already present): skip the commit so an idempotent add never
2387
+ // pushes an empty commit that would redeploy the site. The merged set is still returned so the
2388
+ // client reconciles its pending additions away.
2389
+ if (merged.length === canonicalExisting.length) return merged;
2390
+ await commitFiles(
2391
+ runtime.backend,
2392
+ [{ path, content: serializeDictionary(merged) }],
2393
+ { message: `Add to dictionary: ${additions.join(', ')}`, author: { name: editor.displayName, email: editor.email } },
2394
+ token,
2395
+ );
2396
+ return merged;
2397
+ }
2398
+
2399
+ /** The repo-relative site-config path the settings save reads and commits. It is the same committed
2400
+ * YAML the nav editor edits, so it comes from the configured nav menu first and falls back to the
2401
+ * scaffold default when no menu is configured. */
2402
+ function siteConfigPath(): string {
2403
+ return runtime.navMenu?.configPath ?? DEFAULT_SITE_CONFIG_PATH;
2404
+ }
2405
+
2406
+ /** Read whether the Anthropic API key secret is present in the load's env. A presence flag for the
2407
+ * truthful visibility gate, never the key itself: the key is a Worker secret, so this only reports
2408
+ * that a non-empty `ANTHROPIC_API_KEY` exists and the value never leaves the server. */
2409
+ function keyConfigured(event: ContentEvent): boolean {
2410
+ const env = (event.platform?.env ?? {}) as Record<string, unknown>;
2411
+ return typeof env.ANTHROPIC_API_KEY === 'string' && env.ANTHROPIC_API_KEY.length > 0;
2412
+ }
2413
+
2414
+ /** Load the two-tier tidy settings (spec 2.8, Task 15). The developer tier (enabled, key, model) is
2415
+ * read-only; the editor tier is the resolved conventions block. The visibility gate is truthful: the
2416
+ * `enabled` flag is true only when `tidy.enabled` is set AND the key is present, so the screen renders
2417
+ * the convention list only then and the honest gate note otherwise. No secret is returned: only a
2418
+ * presence flag for the key. The conventions come straight from the runtime config (the same source
2419
+ * the tidy action's prompt reads), so the screen and the prompt can never diverge. */
2420
+ function settingsLoad(event: ContentEvent): SettingsData {
2421
+ requireSession(event);
2422
+ const tidy = runtime.tidy;
2423
+ const tidyEnabled = tidy?.enabled === true;
2424
+ const keyPresent = keyConfigured(event);
2425
+ const model = tidy?.model || DEFAULT_TIDY_MODEL;
2426
+ return {
2427
+ enabled: tidyEnabled && keyPresent,
2428
+ tidyEnabled,
2429
+ keyConfigured: keyPresent,
2430
+ model,
2431
+ modelLabel: tidyModelLabel(model),
2432
+ conventions: resolveTidyConventions(tidy?.conventions),
2433
+ saved: event.url.searchParams.get('saved') === '1',
2434
+ error: event.url.searchParams.get('error'),
2435
+ };
2436
+ }
2437
+
2438
+ /** Save the editor-tier tidy conventions: validate the posted block, then read-modify-commit it into
2439
+ * the same committed YAML the nav editor writes, with the session editor as author. The transport is
2440
+ * the nav save's exactly: a form POST carrying the conventions JSON, the read-modify-commit through
2441
+ * `commitFile`, and a stale-SHA `isConflict` bounced back as a reload prompt. Only the conventions
2442
+ * block is written (setTidy leaves `tidy.enabled` and `tidy.model` untouched), so an editor's save can
2443
+ * never flip the developer-tier deploy facts. The save refuses before any commit when tidy is not
2444
+ * enabled, so the gate state's absent editor tier can never be saved past. */
2445
+ async function settingsSave(event: ContentEvent): Promise<never> {
2446
+ const editor = requireSession(event);
2447
+ // The editor tier does not exist when tidy is off, so a save in that state is a 404 (no editable
2448
+ // surface to commit), the server half of the truthful gate.
2449
+ if (runtime.tidy?.enabled !== true) throw error(404, 'Tidy is not enabled for this site');
2450
+
2451
+ const form = await event.request.formData();
2452
+ let conventions: TidyConventions;
2453
+ try {
2454
+ conventions = validateTidyConventions(JSON.parse(String(form.get('conventions') ?? '{}')));
2455
+ } catch (err) {
2456
+ const message = err instanceof TidyConventionsError ? err.message : 'Invalid tidy settings';
2457
+ throw redirect(303, `/admin/settings?error=${encodeURIComponent(message)}`);
2458
+ }
2459
+
2460
+ const path = siteConfigPath();
2461
+ const token = await mintToken(event.platform?.env ?? {});
2462
+ const raw = await readRaw(runtime.backend, path, token);
2463
+ if (raw === null) throw error(404, 'Site config not found');
2464
+ // Parse first so a malformed file fails before the write rather than committing onto a broken base.
2465
+ parseSiteConfig(raw);
2466
+
2467
+ const commitFields = { concept: 'settings', id: 'tidy', editor: editor.email };
2468
+ try {
2469
+ await commitFile(
2470
+ runtime.backend,
2471
+ path,
2472
+ setTidy(raw, conventions),
2473
+ { message: 'Update tidy settings', author: { name: editor.displayName, email: editor.email } },
2474
+ token,
2475
+ );
2476
+ log.info('commit.succeeded', commitFields);
2477
+ } catch (err) {
2478
+ if (isConflict(err)) {
2479
+ log.warn('commit.failed', { ...commitFields, reason: 'conflict' });
2480
+ const message = 'The site config changed since you opened it. Reload and reapply your edits.';
2481
+ throw redirect(303, `/admin/settings?error=${encodeURIComponent(message)}`);
2482
+ }
2483
+ log.error('commit.failed', { ...commitFields, error: String(err) });
2484
+ throw err;
2485
+ }
2486
+
2487
+ throw redirect(303, '/admin/settings?saved=1');
2488
+ }
2489
+
2490
+ /** Add a word (or batch) to the git-committed personal dictionary (spec 1.6). The transport mirrors
2491
+ * the media raw-body actions exactly: a `text/plain` POST, the CSRF token in `X-Cairn-CSRF` validated
2492
+ * by validateCsrfHeader (CSRF first, then the session), and a small JSON body `{ word }` or
2493
+ * `{ words }`. It reads the current file from the default branch, inserts the validated words in
2494
+ * sorted order if absent (idempotent), and commits through the GitHub-App pipeline.
2495
+ *
2496
+ * The commit is SHA-guarded with commit-and-retry: commitFiles throws CommitConflictError when the
2497
+ * branch moved under it, which is caught here to re-read the new head, re-merge the same additions
2498
+ * (the sorted insert is order-independent, so a concurrent editor's word is preserved), and retry
2499
+ * once. The response is the merged word list, so the client drops the now-committed words from its
2500
+ * pending set; a refusal rides a `fail` envelope the client reads by `type`/`status`.
2501
+ *
2502
+ * Input validation is load-bearing here: this commits to the repo from request input, so every word
2503
+ * is length-bounded and rejected if it carries whitespace or control characters (a word is one
2504
+ * line), and the batch is capped. A body that yields no valid word refuses with a 400 and commits
2505
+ * nothing, so the committed file can never gain an injected or empty line. */
2506
+ async function addDictionaryWord(event: ContentEvent): Promise<ReturnType<typeof fail> | DictionaryAddResult> {
2507
+ // CSRF first: a raw-body (JSON) POST, so the header witness is the authority, like the upload and
2508
+ // media actions. A failed check refuses before the session read or any GitHub call.
2509
+ if (!event.cookies || !validateCsrfHeader({ url: event.url, request: event.request, cookies: event.cookies })) {
2510
+ return fail(403, { error: 'csrf' } satisfies DictionaryAddFailure);
2511
+ }
2512
+ const editor = requireSession(event);
2513
+
2514
+ let payload: { word?: unknown; words?: unknown };
2515
+ try {
2516
+ payload = JSON.parse(await event.request.text());
2517
+ } catch {
2518
+ return fail(400, { error: 'Could not read the dictionary request.' } satisfies DictionaryAddFailure);
2519
+ }
2520
+
2521
+ // Collect the candidate words from `word` and/or `words`, keep only the strings, validate each
2522
+ // against the one-line word grammar, dedupe, and cap the batch. A body with no valid word refuses.
2523
+ const raw = [
2524
+ ...(typeof payload.word === 'string' ? [payload.word] : []),
2525
+ ...(Array.isArray(payload.words) ? payload.words.filter((w): w is string => typeof w === 'string') : []),
2526
+ ];
2527
+ const additions = [...new Set(raw.filter((w) => isValidDictionaryWord(w, MAX_DICTIONARY_WORD)))].slice(0, MAX_DICTIONARY_BATCH);
2528
+ if (additions.length === 0) {
2529
+ return fail(400, { error: 'No valid word to add to the dictionary.' } satisfies DictionaryAddFailure);
2530
+ }
2531
+
2532
+ const token = await mintToken(event.platform?.env ?? {});
2533
+ const commitFields = { concept: 'dictionary', id: additions[0]!, editor: editor.email };
2534
+ try {
2535
+ const words = await mergeAndCommitDictionary(token, additions, editor);
2536
+ log.info('dictionary.added', { editor: editor.email, words: additions });
2537
+ return { words };
2538
+ } catch (err) {
2539
+ if (!isConflict(err)) throw err;
2540
+ // The branch moved under the commit. Re-read the new head and re-merge the same additions, then
2541
+ // retry once. The merge is order-independent, so a concurrent editor's word that landed in the
2542
+ // window is preserved and the two adds converge on the same sorted set.
2543
+ try {
2544
+ const words = await mergeAndCommitDictionary(token, additions, editor);
2545
+ log.info('dictionary.added', { editor: editor.email, words: additions, retried: true });
2546
+ return { words };
2547
+ } catch (retryErr) {
2548
+ if (!isConflict(retryErr)) throw retryErr;
2549
+ // A second conflict: give up rather than loop. The client keeps the words in its pending set
2550
+ // for the session and re-attempts on the next save, so the word is never silently dropped.
2551
+ log.warn('dictionary.add_conflict', { editor: editor.email, words: additions });
2552
+ return fail(409, { error: 'The dictionary changed while saving. It will retry on the next save.' } satisfies DictionaryAddFailure);
2553
+ }
2554
+ }
2555
+ }
2556
+
2557
+ /** Tidy: a light LLM copy-edit of the author's markdown (spec 2.1). The first remote model call in
2558
+ * the library, so this is the highest-blast-radius server action: untrusted content and the Anthropic
2559
+ * API key. The transport mirrors the media raw-body actions (a `text/plain` POST carrying JSON
2560
+ * `{ text, scope }`, the CSRF token in `X-Cairn-CSRF`, the response deserialized by the client), with
2561
+ * abort/timeout/deadline the media calls did not need: a tidy call to Sonnet on a full entry can run
2562
+ * many seconds.
2563
+ *
2564
+ * Gate order (every refusal happens before the next step, so a refused request spends nothing):
2565
+ * 1. validateCsrfHeader FIRST (the header witness is the authority for a raw-body POST).
2566
+ * 2. requireSession (an expired session throws the manual-redirect 303 the client reads as status-0).
2567
+ * 3. Read the key and config; refuse fail(503) if tidy is disabled or the key is missing.
2568
+ * 4. Parse and bound the body; refuse fail(400) on malformed JSON, fail(413) on an over-long text.
2569
+ * 5. Only then build the prompt and call the model, bounded by the Worker deadline.
2570
+ *
2571
+ * The untrusted text rides as the user message, never interpolated into the system prompt; the
2572
+ * prompt's injection framing (Task 10) treats it as data. The API key never leaves the action: it is
2573
+ * not returned and not logged, and the log line carries no content. The action commits NOTHING, so a
2574
+ * failed, aborted, or refused tidy can never corrupt the entry; the diff is computed on the client
2575
+ * (Task 12), so the server stays a thin model-call boundary. */
2576
+ async function tidyAction(event: ContentEvent): Promise<ReturnType<typeof fail> | TidyResult> {
2577
+ // CSRF first: a raw-body (JSON) POST, so the header witness is the authority. A failed check refuses
2578
+ // before the session read and before any model call.
2579
+ if (!event.cookies || !validateCsrfHeader({ url: event.url, request: event.request, cookies: event.cookies })) {
2580
+ return fail(403, { error: 'csrf' } satisfies TidyFailure);
2581
+ }
2582
+ const editor = requireSession(event);
2583
+
2584
+ // Fail-fast: refuse before any model call if tidy is off or the key is missing. The model is read
2585
+ // from config (a stated fact in this tier); a missing key is the "not enabled" refusal. No secret is
2586
+ // ever returned or logged.
2587
+ const tidy = runtime.tidy;
2588
+ if (!tidy?.enabled) {
2589
+ return fail(503, { error: 'Tidy is not enabled for this site.' } satisfies TidyFailure);
2590
+ }
2591
+ const env = (event.platform?.env ?? {}) as Record<string, unknown>;
2592
+ const apiKey = typeof env.ANTHROPIC_API_KEY === 'string' ? env.ANTHROPIC_API_KEY : '';
2593
+ if (!apiKey) {
2594
+ return fail(503, { error: 'Tidy is not configured: the Anthropic API key is missing.' } satisfies TidyFailure);
2595
+ }
2596
+
2597
+ // Parse and bound the body before the call. A malformed body refuses 400; an over-long text refuses
2598
+ // 413 (tidy a selection instead), so no over-long input ever spends a token or risks the deadline.
2599
+ let payload: { text?: unknown; scope?: unknown };
2600
+ try {
2601
+ payload = JSON.parse(await event.request.text());
2602
+ } catch {
2603
+ return fail(400, { error: 'Could not read the tidy request.' } satisfies TidyFailure);
2604
+ }
2605
+ const text = typeof payload.text === 'string' ? payload.text : '';
2606
+ if (text.length === 0) {
2607
+ return fail(400, { error: 'No text to tidy.' } satisfies TidyFailure);
2608
+ }
2609
+ if (text.length > MAX_TIDY_CHARS) {
2610
+ return fail(413, { error: 'This is too long to tidy at once. Select a passage and tidy that instead.' } satisfies TidyFailure);
2611
+ }
2612
+
2613
+ // Build the system prompt from the resolved conventions (Task 10). The prompt is built from config,
2614
+ // never from the author's text, so the untrusted text cannot reshape the instructions.
2615
+ const system = buildTidyPrompt(resolveTidyConventions(tidy.conventions));
2616
+ const model = tidy.model || DEFAULT_TIDY_MODEL;
2617
+ // max_tokens sized to comfortably exceed the input token count: a proofread runs at roughly input
2618
+ // length, never lowballed. The character cap is ~6k input tokens, so this leaves generous headroom.
2619
+ const maxTokens = 16_000;
2620
+
2621
+ // Bound the model call with the Worker's own deadline (shorter than the platform limit), so a slow
2622
+ // call becomes a retryable fail(502) rather than a platform timeout. The client also drives its own
2623
+ // AbortController (Cancel + a bounded timeout, Task 14); this action accepts an aborted request
2624
+ // cleanly by mapping any abort to the same fail(502).
2625
+ const controller = new AbortController();
2626
+ const timer = setTimeout(() => controller.abort(), tidyTimeoutMs);
2627
+ let message: Awaited<ReturnType<TidyClient['messages']['create']>>;
2628
+ try {
2629
+ const client = anthropicClient({ apiKey });
2630
+ message = await client.messages.create(
2631
+ {
2632
+ model,
2633
+ max_tokens: maxTokens,
2634
+ system,
2635
+ messages: [{ role: 'user', content: text }],
2636
+ },
2637
+ // The signal rides the request options, so the deadline timer above actually cancels the call.
2638
+ { signal: controller.signal },
2639
+ );
2640
+ } catch (err) {
2641
+ // A deadline overrun, a client abort, or a model error (rate limit, overload, 5xx) all map to the
2642
+ // retryable fail(502). The error string is not surfaced to the client (it may carry internal
2643
+ // detail); the log line carries the editor and the kind, never the key or the content.
2644
+ log.warn('tidy.error', { editor: editor.email, model, aborted: controller.signal.aborted });
2645
+ return fail(502, { error: 'Tidy could not finish. Try again.' } satisfies TidyFailure);
2646
+ } finally {
2647
+ clearTimeout(timer);
2648
+ }
2649
+
2650
+ // A model refusal (the streaming-classifier intervention) is a clean fail(422): the author's text is
2651
+ // untouched, so the editor can leave it as-is.
2652
+ if (message.stop_reason === 'refusal') {
2653
+ log.warn('tidy.refused', { editor: editor.email, model });
2654
+ return fail(422, { error: 'Tidy declined to edit this text.' } satisfies TidyFailure);
2655
+ }
2656
+
2657
+ // Read the output as plain text: concatenate the text blocks (a normal response is one). An empty
2658
+ // result is treated as a model error rather than silently returning an empty document.
2659
+ const corrected = message.content
2660
+ .filter((block) => block.type === 'text' && typeof block.text === 'string')
2661
+ .map((block) => block.text ?? '')
2662
+ .join('');
2663
+ if (corrected.length === 0) {
2664
+ log.warn('tidy.empty', { editor: editor.email, model });
2665
+ return fail(502, { error: 'Tidy returned nothing. Try again.' } satisfies TidyFailure);
2666
+ }
2667
+
2668
+ log.info('tidy.done', { editor: editor.email, model: message.model, usage: message.usage });
2669
+ return { corrected, model: message.model, usage: message.usage };
2670
+ }
2671
+
2672
+ return { layoutLoad, indexRedirect, listLoad, mediaLibraryLoad, settingsLoad, settingsSave, createAction, editLoad, saveAction, publishAction, publishAllAction, discardAction, deleteAction, listDeleteAction, renameAction, uploadAction, mediaDeleteAction, mediaBulkDelete, mediaOrphanScan, mediaPurgeOrphans, mediaUpdateAction, mediaReplacePreview, mediaReplaceApply, mediaAltPreview, mediaAltApply, addDictionaryWord, tidyAction, mintToken };
1885
2673
  }
1886
2674
 
1887
2675
  /** The cap, in characters, on the stored alt text. The human fields are display copy, not content,