@glw907/cairn-cms 0.59.0 → 0.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/CHANGELOG.md +47 -0
  2. package/dist/components/CairnAdmin.svelte +3 -0
  3. package/dist/components/CairnTidySettings.svelte +553 -0
  4. package/dist/components/CairnTidySettings.svelte.d.ts +32 -0
  5. package/dist/components/EditPage.svelte +371 -2
  6. package/dist/components/MarkdownEditor.svelte +168 -1
  7. package/dist/components/MarkdownEditor.svelte.d.ts +44 -0
  8. package/dist/components/TidyReview.svelte +463 -0
  9. package/dist/components/TidyReview.svelte.d.ts +47 -0
  10. package/dist/components/cairn-admin.css +764 -0
  11. package/dist/components/editor-tidy.d.ts +31 -0
  12. package/dist/components/editor-tidy.js +199 -0
  13. package/dist/components/index.d.ts +1 -0
  14. package/dist/components/index.js +1 -0
  15. package/dist/components/markdown-directives.d.ts +16 -0
  16. package/dist/components/markdown-directives.js +34 -0
  17. package/dist/components/objective-errors.d.ts +30 -0
  18. package/dist/components/objective-errors.js +113 -0
  19. package/dist/components/spellcheck-assets/dictionary-en-us.txt +104743 -0
  20. package/dist/components/spellcheck-assets/spellchecker-wasm-LICENSE.txt +21 -0
  21. package/dist/components/spellcheck-assets/spellchecker-wasm.wasm +0 -0
  22. package/dist/components/spellcheck-worker.d.ts +80 -0
  23. package/dist/components/spellcheck-worker.js +161 -0
  24. package/dist/components/spellcheck.d.ts +146 -0
  25. package/dist/components/spellcheck.js +541 -0
  26. package/dist/components/tidy-categorize.d.ts +67 -0
  27. package/dist/components/tidy-categorize.js +392 -0
  28. package/dist/components/tidy-diff.d.ts +60 -0
  29. package/dist/components/tidy-diff.js +147 -0
  30. package/dist/components/tidy-validate.d.ts +37 -0
  31. package/dist/components/tidy-validate.js +174 -0
  32. package/dist/content/compose.d.ts +1 -1
  33. package/dist/content/compose.js +11 -0
  34. package/dist/content/site-dictionary.d.ts +31 -0
  35. package/dist/content/site-dictionary.js +82 -0
  36. package/dist/content/types.d.ts +25 -0
  37. package/dist/doctor/checks-local.d.ts +1 -0
  38. package/dist/doctor/checks-local.js +55 -6
  39. package/dist/doctor/index.js +2 -1
  40. package/dist/log/events.d.ts +1 -1
  41. package/dist/nav/site-config.d.ts +98 -0
  42. package/dist/nav/site-config.js +132 -0
  43. package/dist/sveltekit/admin-dispatch.d.ts +2 -0
  44. package/dist/sveltekit/admin-dispatch.js +6 -2
  45. package/dist/sveltekit/cairn-admin.d.ts +13 -1
  46. package/dist/sveltekit/cairn-admin.js +22 -3
  47. package/dist/sveltekit/content-routes.d.ts +135 -1
  48. package/dist/sveltekit/content-routes.js +351 -3
  49. package/dist/sveltekit/tidy-prompt.d.ts +11 -0
  50. package/dist/sveltekit/tidy-prompt.js +118 -0
  51. package/package.json +10 -1
  52. package/src/lib/components/CairnAdmin.svelte +3 -0
  53. package/src/lib/components/CairnTidySettings.svelte +553 -0
  54. package/src/lib/components/EditPage.svelte +371 -2
  55. package/src/lib/components/MarkdownEditor.svelte +168 -1
  56. package/src/lib/components/TidyReview.svelte +463 -0
  57. package/src/lib/components/cairn-admin.css +25 -0
  58. package/src/lib/components/editor-tidy.ts +241 -0
  59. package/src/lib/components/index.ts +1 -0
  60. package/src/lib/components/markdown-directives.ts +35 -0
  61. package/src/lib/components/objective-errors.ts +155 -0
  62. package/src/lib/components/spellcheck-assets/dictionary-en-us.txt +104743 -0
  63. package/src/lib/components/spellcheck-assets/spellchecker-wasm-LICENSE.txt +21 -0
  64. package/src/lib/components/spellcheck-assets/spellchecker-wasm.wasm +0 -0
  65. package/src/lib/components/spellcheck-worker.ts +279 -0
  66. package/src/lib/components/spellcheck.ts +679 -0
  67. package/src/lib/components/tidy-categorize.ts +460 -0
  68. package/src/lib/components/tidy-diff.ts +196 -0
  69. package/src/lib/components/tidy-validate.ts +202 -0
  70. package/src/lib/content/compose.ts +11 -1
  71. package/src/lib/content/site-dictionary.ts +84 -0
  72. package/src/lib/content/types.ts +25 -0
  73. package/src/lib/doctor/checks-local.ts +59 -5
  74. package/src/lib/doctor/index.ts +2 -0
  75. package/src/lib/log/events.ts +7 -1
  76. package/src/lib/nav/site-config.ts +197 -0
  77. package/src/lib/sveltekit/admin-dispatch.ts +7 -3
  78. package/src/lib/sveltekit/cairn-admin.ts +32 -4
  79. package/src/lib/sveltekit/content-routes.ts +504 -4
  80. package/src/lib/sveltekit/tidy-prompt.ts +153 -0
@@ -10,13 +10,23 @@ import { deriveExcerpt } from '../content/excerpt.js';
10
10
  import { asString } from '../content/identity.js';
11
11
  import { isValidId, slugify, filenameFromId, composeDatedId, slugFromId, renameId } from '../content/ids.js';
12
12
  import { appCredentials, type GithubKeyEnv } from '../github/credentials.js';
13
- import { listMarkdown, readRaw, commitFiles, type FileChange } from '../github/repo.js';
13
+ import { listMarkdown, readRaw, commitFile, commitFiles, type FileChange } from '../github/repo.js';
14
14
  import { branchHeadSha, createBranch, deleteBranch, listBranches } from '../github/branches.js';
15
15
  import { PENDING_PREFIX, pendingBranch, parsePendingBranch } from '../content/pending.js';
16
16
  import { cachedInstallationToken } from '../github/signing.js';
17
17
  import { emptyManifest, manifestEntryFromFile, parseManifest, serializeManifest, upsertEntry, removeEntry, inboundLinks, type Manifest, type LinkTarget, type InboundLink } from '../content/manifest.js';
18
18
  import { isConflict } from '../github/types.js';
19
19
  import { log } from '../log/index.js';
20
+ import { dictionaryFileForDialect, DEFAULT_TIDY_MODEL, resolveTidyConventions, parseSiteConfig, setTidy, validateTidyConventions, TidyConventionsError } from '../nav/site-config.js';
21
+ import type { TidyConventions } from '../nav/site-config.js';
22
+ import { buildTidyPrompt } from './tidy-prompt.js';
23
+ // Server-only: the Anthropic SDK ships the API-key path and never reaches a browser bundle. It is
24
+ // imported only here (a Worker module no component imports statically), and the server-only-deps test
25
+ // guards that boundary. The default export is the Anthropic client class; the structural TidyClient
26
+ // type below keeps the action's surface small and the test seam injectable, so the SDK's deep types
27
+ // never leak into a public signature.
28
+ import Anthropic from '@anthropic-ai/sdk';
29
+ import { parseDictionary, mergeDictionaryWords, serializeDictionary, isValidDictionaryWord } from '../content/site-dictionary.js';
20
30
  import { issueCsrfToken, validateCsrfHeader } from './csrf.js';
21
31
  import { requireSession } from './guard.js';
22
32
  import { sniffMediaType, isDeniedUpload, extForMediaType } from '../media/sniff.js';
@@ -142,6 +152,22 @@ export interface EditData {
142
152
  * when one exists, applied over the top-level values); null when the site sets none, which
143
153
  * leaves the frame rendering unstyled markup behind a hint. */
144
154
  preview: ResolvedPreview | null;
155
+ /** The spellcheck dictionary file for the site's configured dialect (default US English), resolved
156
+ * once at compose. The editor resolves it to a real asset URL on the main thread and hands that URL
157
+ * to the spellcheck Worker's `init`, the same way `mediaLibrary` is threaded in. Just the filename,
158
+ * e.g. "dictionary-en-us.txt". */
159
+ spellcheckDictionary: string;
160
+ /** The committed personal-dictionary words for the site (spec 1.6): the durable, shared, reviewable
161
+ * layer the editor seeds the spellcheck Worker's personal set from, the way `mediaLibrary` is handed
162
+ * in. Read from the git-committed `dictionary.txt` at editor load; empty when the file is absent or
163
+ * unreadable (the editor degrades to dialect-only). The dialect dictionary and the session ignore
164
+ * list are the other two layers; only this one is committed. */
165
+ siteDictionary: string[];
166
+ /** The editor-tier tidy facts the review surface needs (spec 2.5): whether tidy is enabled, the model
167
+ * that runs (for the head pill), and the RESOLVED conventions (the only data source for a
168
+ * normalization's because-line and the local category inference). The API key never appears here, it
169
+ * is a Worker secret. `enabled` false hides the Tidy control. */
170
+ tidy: { enabled: boolean; model: string; conventions: TidyConventions };
145
171
  }
146
172
 
147
173
  /** One asset's where-used overlay, kept separate from MediaLibraryEntry so the picker's shared
@@ -174,6 +200,43 @@ export interface MediaLibraryData {
174
200
  flashError: string | null;
175
201
  }
176
202
 
203
+ /** The two-tier tidy settings load (spec 2.8, Task 15). The developer tier is read-only: `enabled`,
204
+ * `keyConfigured`, and `model`/`modelLabel` are deploy-time facts the editor sees but cannot change.
205
+ * The editor tier is the resolved `conventions` block, written back through the save. The visibility
206
+ * gate is truthful: `enabled` is true only when `tidy.enabled` is set AND the API key is present, so
207
+ * the screen renders the convention list only then and the honest gate note otherwise. The key is a
208
+ * Worker secret, so `keyConfigured` is the presence of `ANTHROPIC_API_KEY` in the load's env, never
209
+ * the key itself; nothing here returns or logs the secret. */
210
+ export interface SettingsData {
211
+ /** The truthful gate: tidy is enabled AND the API key is present. The screen renders the editor
212
+ * tier only when this is true, and the honest gate note (a labelled region, no disabled controls)
213
+ * otherwise. */
214
+ enabled: boolean;
215
+ /** Whether `tidy.enabled` is set in the site config, independent of the key. The gate note's
216
+ * checklist reads this to show which deploy-time step is still open. */
217
+ tidyEnabled: boolean;
218
+ /** Whether the API key secret is present in the Worker env. A presence flag, never the key. */
219
+ keyConfigured: boolean;
220
+ /** The model id (a developer-tier fact, read-only on the screen). */
221
+ model: string;
222
+ /** A plain-language label for the model id ("Claude Sonnet"), so the read-only fact is not a bare
223
+ * jargon token. Falls back to the raw id for an unknown model. */
224
+ modelLabel: string;
225
+ /** The resolved editor-tier conventions: every field concrete, the screen's initial control state.
226
+ * Present only when the gate is open; the gate state needs no conventions. */
227
+ conventions: TidyConventions;
228
+ /** The success flash a redirected save carries (`?saved=1`). */
229
+ saved: boolean;
230
+ /** A redirected save's validation or conflict error read from `?error=`. */
231
+ error: string | null;
232
+ }
233
+
234
+ /** A refused settings save: a conflict bounce or a malformed conventions payload. Just the one-line
235
+ * summary; the save commits nothing on a refusal. */
236
+ export interface SettingsSaveFailure {
237
+ error: string;
238
+ }
239
+
177
240
  /** The structural event the content routes read; a real SvelteKit RequestEvent satisfies it. */
178
241
  export interface ContentEvent extends EventBase<GithubKeyEnv> {
179
242
  params: Record<string, string>;
@@ -183,12 +246,96 @@ export interface ContentEvent extends EventBase<GithubKeyEnv> {
183
246
  }
184
247
 
185
248
  /** Injectable dependencies; tests stub the token mint to avoid signing a real key. */
249
+ /** The minimal Anthropic client surface the tidy action uses, typed structurally so the SDK's deep
250
+ * generics never reach a public signature and so the integration test can inject a fake whose
251
+ * `messages.create` it stubs. The real factory builds `new Anthropic({ apiKey })`, which satisfies
252
+ * this shape. The success path reads only the text blocks, the model, the stop reason, and the usage
253
+ * counts. */
254
+ export interface TidyClient {
255
+ messages: {
256
+ create(
257
+ body: {
258
+ model: string;
259
+ max_tokens: number;
260
+ system: string;
261
+ messages: { role: 'user'; content: string }[];
262
+ },
263
+ // The SDK signature is create(body, options). The abort signal belongs in the second argument
264
+ // (RequestOptions), not the body, so the request actually cancels when the deadline fires.
265
+ options?: { signal?: AbortSignal },
266
+ ): Promise<{
267
+ content: { type: string; text?: string }[];
268
+ model: string;
269
+ stop_reason: string | null;
270
+ usage: { input_tokens: number; output_tokens: number };
271
+ }>;
272
+ };
273
+ }
274
+
186
275
  export interface ContentRoutesDeps {
187
276
  /** Mint a GitHub App installation token from the Worker env. Defaults to the real signer.
188
277
  * A bare string works too; the routes await whatever comes back. */
189
278
  mintToken?: (env: GithubKeyEnv) => string | Promise<string>;
279
+ /** Build the Anthropic client for the tidy action from the resolved API key. Defaults to the real
280
+ * SDK client. Injected in tests so `messages.create` is stubbed and no network call (or real key)
281
+ * is ever needed. The factory runs only after the key is read from the env, so a disabled or
282
+ * unconfigured site never constructs a client. */
283
+ anthropic?: (opts: { apiKey: string }) => TidyClient;
284
+ /** The tidy action's own request deadline in milliseconds, set shorter than the platform limit so a
285
+ * slow model call becomes a clean retryable fail(502) rather than a platform timeout. Defaults to
286
+ * {@link DEFAULT_TIDY_TIMEOUT_MS}. Overridable in tests to assert the deadline path without waiting. */
287
+ tidyTimeoutMs?: number;
288
+ }
289
+
290
+ /** The successful tidy outcome (spec 2.1): the corrected markdown, the model that produced it, and the
291
+ * token usage. The diff is computed on the client (Task 12), so the server returns the plain text and
292
+ * commits nothing. Admin-internal: consumed by the editor's review surface, not on the package's
293
+ * sveltekit subpath, so it carries no reference page. */
294
+ export interface TidyResult {
295
+ corrected: string;
296
+ model: string;
297
+ usage: { input_tokens: number; output_tokens: number };
298
+ }
299
+
300
+ /** A refused tidy: `fail(403)` on a failed CSRF check, `fail(503)` when tidy is disabled or the API
301
+ * key is missing, `fail(413)` for an over-long body, `fail(502)` for a deadline overrun, abort, or
302
+ * model error (all retryable), `fail(422)` for a model refusal, `fail(400)` for a malformed body. Just
303
+ * the one-line summary; the action commits nothing, so a refusal can never corrupt the entry. */
304
+ export interface TidyFailure {
305
+ error: string;
306
+ }
307
+
308
+ /** The Worker-side request deadline for the tidy model call: 30 seconds. A tidy call to Sonnet on a
309
+ * full entry can run many seconds, so the action bounds it with an AbortSignal and maps the overrun to
310
+ * a retryable fail(502). This sits well under Cloudflare's per-request wall-clock ceiling (a Worker
311
+ * invocation can run far longer, but a single subrequest left open near that ceiling would surface as a
312
+ * platform timeout the action could not shape into a clean retry). 30s comfortably covers a proofread
313
+ * of the bounded input (see MAX_TIDY_CHARS) while leaving headroom under the platform limit. */
314
+ const DEFAULT_TIDY_TIMEOUT_MS = 30_000;
315
+
316
+ /** The fallback site-config path when no nav menu names one: the convention every scaffolded site
317
+ * uses. The settings save edits the same committed YAML the nav editor does, so it resolves the path
318
+ * from the configured nav menu first and falls back to this default. */
319
+ const DEFAULT_SITE_CONFIG_PATH = 'src/lib/site.config.yaml';
320
+
321
+ /** Plain-language labels for the known tidy models, so the read-only model fact reads as a name rather
322
+ * than a bare id. An unknown id falls back to itself. */
323
+ const TIDY_MODEL_LABELS: Record<string, string> = {
324
+ 'claude-sonnet-4-6': 'Claude Sonnet',
325
+ 'claude-haiku-4-5': 'Claude Haiku',
326
+ };
327
+
328
+ /** The display label for a tidy model id, falling back to the raw id for an unknown model. */
329
+ function tidyModelLabel(model: string): string {
330
+ return TIDY_MODEL_LABELS[model] ?? model;
190
331
  }
191
332
 
333
+ /** The input cap for a single tidy request: 24000 characters (~6k input tokens). A proofread runs at
334
+ * roughly input length, so this stays comfortably inside the 30s deadline; a longer entry refuses with
335
+ * fail(413) and the author tidies a selection instead. The cap is enforced BEFORE the model call, so an
336
+ * over-long body never spends a token or risks the deadline. */
337
+ const MAX_TIDY_CHARS = 24_000;
338
+
192
339
  /** A blocked save or publish: `fail(400)` when the body links to a target absent from main. */
193
340
  export interface SaveFailure {
194
341
  /** The one-line human summary every content action failure carries. */
@@ -253,6 +400,21 @@ export interface MediaAltPropagateFailure {
253
400
  error: string;
254
401
  }
255
402
 
403
+ /** The personal-dictionary add outcome (spec 1.6): the merged, canonical sorted word list after the
404
+ * add landed. The client reconciles its pending-additions set against this (a word now in the list is
405
+ * committed and dropped from pending). Admin-internal: exported for the editor host's reconcile, not
406
+ * on the package's sveltekit subpath, so it carries no reference page. */
407
+ export interface DictionaryAddResult {
408
+ words: string[];
409
+ }
410
+
411
+ /** A refused personal-dictionary add: `fail(403)` on a failed CSRF check, `fail(400)` on a body that
412
+ * carries no valid word. The client keeps its pending additions for the session and re-attempts on
413
+ * the next save, so the word is never silently dropped. Just the one-line summary. */
414
+ export interface DictionaryAddFailure {
415
+ error: string;
416
+ }
417
+
256
418
  /** A refused media bulk delete or orphan purge: `fail(503)` for the fail-closed strict-usage refusal
257
419
  * (the whole batch refuses) or media-off / a missing bucket binding. The per-item outcomes ride the
258
420
  * returned summary, not a fail. */
@@ -341,7 +503,7 @@ export interface MediaAltPreviewPlan {
341
503
  * `form` prop carries a `?/mediaDelete`, `?/mediaUpdate`, `?/mediaReplace`, or `?/mediaAltPropagate`
342
504
  * refusal without a second type. */
343
505
  export type ContentFormFailure = Partial<
344
- SaveFailure & DeleteRefusal & RenameFailure & MediaDeleteRefusal & MediaUpdateFailure & MediaReplaceFailure & MediaAltPropagateFailure & MediaBulkFailure
506
+ SaveFailure & DeleteRefusal & RenameFailure & MediaDeleteRefusal & MediaUpdateFailure & MediaReplaceFailure & MediaAltPropagateFailure & MediaBulkFailure & TidyFailure
345
507
  >;
346
508
 
347
509
  /** The successful upload's response (`uploadAction`). The server-owned `record` rides the editor's
@@ -379,6 +541,13 @@ export function createContentRoutes(runtime: CairnRuntime, deps: ContentRoutesDe
379
541
  const mintToken =
380
542
  deps.mintToken ?? ((env: GithubKeyEnv) => cachedInstallationToken(appCredentials(runtime.backend, env)));
381
543
 
544
+ // The default Anthropic factory builds the real SDK client from the resolved key. Tests inject a fake
545
+ // (deps.anthropic) so messages.create is stubbed and no network call or real key is ever needed. The
546
+ // SDK client satisfies TidyClient structurally; the cast names that to the compiler.
547
+ const anthropicClient =
548
+ deps.anthropic ?? ((opts: { apiKey: string }) => new Anthropic({ apiKey: opts.apiKey }) as unknown as TidyClient);
549
+ const tidyTimeoutMs = deps.tidyTimeoutMs ?? DEFAULT_TIDY_TIMEOUT_MS;
550
+
382
551
  /** Main's manifest, parsed. A missing file starts empty (a fresh repo before the first commit).
383
552
  * Always read from main: pending branches carry no manifest copy. */
384
553
  async function readManifest(token: string): Promise<Manifest> {
@@ -705,13 +874,17 @@ export function createContentRoutes(runtime: CairnRuntime, deps: ContentRoutesDe
705
874
  // The media manifest joins the concurrent batch only when media is on, read from the default
706
875
  // branch (pending branches carry no copy). A rejected media read degrades to null so the edit
707
876
  // never throws on a missing or unreadable media.json; the projection below treats null as empty.
708
- const [headSha, mainRaw, manifestRaw, mediaRaw] = await Promise.all([
877
+ // The committed personal dictionary joins the concurrent batch, read from the default branch. A
878
+ // rejected read degrades to null so the edit never throws on a missing or unreadable dictionary;
879
+ // the projection below treats null as an empty word list (the editor falls back to dialect-only).
880
+ const [headSha, mainRaw, manifestRaw, mediaRaw, dictionaryRaw] = await Promise.all([
709
881
  branchHeadSha(runtime.backend, branch, token),
710
882
  readRaw(runtime.backend, path, token),
711
883
  readRaw(runtime.backend, runtime.manifestPath, token),
712
884
  runtime.resolvedAssets.enabled
713
885
  ? readRaw(runtime.backend, runtime.mediaManifestPath, token).catch(() => null)
714
886
  : Promise.resolve(null),
887
+ readRaw(runtime.backend, dictionaryFilePath(), token).catch(() => null),
715
888
  ]);
716
889
  const pending = headSha !== null;
717
890
  const raw = pending ? await readRaw({ ...runtime.backend, branch }, path, token) : mainRaw;
@@ -768,9 +941,30 @@ export function createContentRoutes(runtime: CairnRuntime, deps: ContentRoutesDe
768
941
  publishedFlash: event.url.searchParams.get('published') === '1',
769
942
  discardedFlash: event.url.searchParams.get('discarded') === '1',
770
943
  preview: resolvePreview(runtime.preview, concept.id),
944
+ // composeRuntime always resolves this from the site config's dialect; default a hand-built
945
+ // runtime that omits it to the US English dictionary so the editor always has a real filename.
946
+ spellcheckDictionary: runtime.spellcheckDictionary ?? dictionaryFileForDialect(undefined),
947
+ // The committed personal-dictionary words, normalized to the canonical sorted, deduplicated set
948
+ // so the editor seeds the Worker's personal layer with a clean list. A missing or unreadable file
949
+ // is an empty list (the dialect-only fallback).
950
+ siteDictionary: mergeDictionaryWords(parseDictionary(dictionaryRaw), []),
951
+ // The editor-tier tidy facts: the master switch, the model (for the head pill), and the resolved
952
+ // conventions (the because-line and category inference read only these). The API key is never
953
+ // exposed here. A site with no tidy block reads disabled with the default conventions.
954
+ tidy: {
955
+ enabled: runtime.tidy?.enabled ?? false,
956
+ model: runtime.tidy?.model || DEFAULT_TIDY_MODEL,
957
+ conventions: resolveTidyConventions(runtime.tidy?.conventions),
958
+ },
771
959
  };
772
960
  }
773
961
 
962
+ /** The repo-relative personal-dictionary path, defaulting a hand-built runtime that omits it to the
963
+ * same `.cairn/` content root the manifests use. composeRuntime always fills `dictionaryPath`. */
964
+ function dictionaryFilePath(): string {
965
+ return runtime.dictionaryPath ?? 'src/content/.cairn/dictionary.txt';
966
+ }
967
+
774
968
  /** Log a failed commit: a conflict is the expected last-writer-wins outcome, so it warns with a
775
969
  * reason; any other error is unexpected and logs at error with the stringified cause. Publish
776
970
  * failures carry the same shape under their own event name. */
@@ -2169,7 +2363,313 @@ export function createContentRoutes(runtime: CairnRuntime, deps: ContentRoutesDe
2169
2363
  throw redirect(303, '/admin/media?altPropagated=1');
2170
2364
  }
2171
2365
 
2172
- return { layoutLoad, indexRedirect, listLoad, mediaLibraryLoad, createAction, editLoad, saveAction, publishAction, publishAllAction, discardAction, deleteAction, listDeleteAction, renameAction, uploadAction, mediaDeleteAction, mediaBulkDelete, mediaOrphanScan, mediaPurgeOrphans, mediaUpdateAction, mediaReplacePreview, mediaReplaceApply, mediaAltPreview, mediaAltApply, mintToken };
2366
+ /** The cap on a personal-dictionary word, matched by isValidDictionaryWord. A word is one line, so
2367
+ * this bounds an abusive input; the real authority is the per-character validation, which rejects
2368
+ * whitespace and control bytes so a body can never inject an extra line into the committed file. */
2369
+ const MAX_DICTIONARY_WORD = 64;
2370
+ /** The cap on the words a single add request carries: an editor adds a handful at save time, never
2371
+ * a flood. Past this the body is treated as abusive and the surplus is dropped. */
2372
+ const MAX_DICTIONARY_BATCH = 100;
2373
+
2374
+ /** Read the committed personal dictionary, merge the validated additions in sorted order, and commit
2375
+ * the canonical file back. Shared by the first attempt and the post-conflict retry, so both re-read
2376
+ * the head and re-merge the same additions; the merge is order-independent, so a concurrent editor's
2377
+ * word that already landed is preserved and the result is the same sorted set regardless of order.
2378
+ * Returns the merged word list. Throws CommitConflictError (via commitFiles) when the branch moves
2379
+ * under the commit, which the caller catches to retry once. */
2380
+ async function mergeAndCommitDictionary(token: string, additions: string[], editor: Editor): Promise<string[]> {
2381
+ const path = dictionaryFilePath();
2382
+ // The existing file as its canonical sorted set, so a no-op add is detected against the same
2383
+ // normalization the commit would write (an already-sorted file never re-commits just to reorder).
2384
+ const canonicalExisting = mergeDictionaryWords(parseDictionary(await readRaw(runtime.backend, path, token)), []);
2385
+ const merged = mergeDictionaryWords(canonicalExisting, additions);
2386
+ // Nothing new (every addition was already present): skip the commit so an idempotent add never
2387
+ // pushes an empty commit that would redeploy the site. The merged set is still returned so the
2388
+ // client reconciles its pending additions away.
2389
+ if (merged.length === canonicalExisting.length) return merged;
2390
+ await commitFiles(
2391
+ runtime.backend,
2392
+ [{ path, content: serializeDictionary(merged) }],
2393
+ { message: `Add to dictionary: ${additions.join(', ')}`, author: { name: editor.displayName, email: editor.email } },
2394
+ token,
2395
+ );
2396
+ return merged;
2397
+ }
2398
+
2399
+ /** The repo-relative site-config path the settings save reads and commits. It is the same committed
2400
+ * YAML the nav editor edits, so it comes from the configured nav menu first and falls back to the
2401
+ * scaffold default when no menu is configured. */
2402
+ function siteConfigPath(): string {
2403
+ return runtime.navMenu?.configPath ?? DEFAULT_SITE_CONFIG_PATH;
2404
+ }
2405
+
2406
+ /** Read whether the Anthropic API key secret is present in the load's env. A presence flag for the
2407
+ * truthful visibility gate, never the key itself: the key is a Worker secret, so this only reports
2408
+ * that a non-empty `ANTHROPIC_API_KEY` exists and the value never leaves the server. */
2409
+ function keyConfigured(event: ContentEvent): boolean {
2410
+ const env = (event.platform?.env ?? {}) as Record<string, unknown>;
2411
+ return typeof env.ANTHROPIC_API_KEY === 'string' && env.ANTHROPIC_API_KEY.length > 0;
2412
+ }
2413
+
2414
+ /** Load the two-tier tidy settings (spec 2.8, Task 15). The developer tier (enabled, key, model) is
2415
+ * read-only; the editor tier is the resolved conventions block. The visibility gate is truthful: the
2416
+ * `enabled` flag is true only when `tidy.enabled` is set AND the key is present, so the screen renders
2417
+ * the convention list only then and the honest gate note otherwise. No secret is returned: only a
2418
+ * presence flag for the key. The conventions come straight from the runtime config (the same source
2419
+ * the tidy action's prompt reads), so the screen and the prompt can never diverge. */
2420
+ function settingsLoad(event: ContentEvent): SettingsData {
2421
+ requireSession(event);
2422
+ const tidy = runtime.tidy;
2423
+ const tidyEnabled = tidy?.enabled === true;
2424
+ const keyPresent = keyConfigured(event);
2425
+ const model = tidy?.model || DEFAULT_TIDY_MODEL;
2426
+ return {
2427
+ enabled: tidyEnabled && keyPresent,
2428
+ tidyEnabled,
2429
+ keyConfigured: keyPresent,
2430
+ model,
2431
+ modelLabel: tidyModelLabel(model),
2432
+ conventions: resolveTidyConventions(tidy?.conventions),
2433
+ saved: event.url.searchParams.get('saved') === '1',
2434
+ error: event.url.searchParams.get('error'),
2435
+ };
2436
+ }
2437
+
2438
+ /** Save the editor-tier tidy conventions: validate the posted block, then read-modify-commit it into
2439
+ * the same committed YAML the nav editor writes, with the session editor as author. The transport is
2440
+ * the nav save's exactly: a form POST carrying the conventions JSON, the read-modify-commit through
2441
+ * `commitFile`, and a stale-SHA `isConflict` bounced back as a reload prompt. Only the conventions
2442
+ * block is written (setTidy leaves `tidy.enabled` and `tidy.model` untouched), so an editor's save can
2443
+ * never flip the developer-tier deploy facts. The save refuses before any commit when tidy is not
2444
+ * enabled, so the gate state's absent editor tier can never be saved past. */
2445
+ async function settingsSave(event: ContentEvent): Promise<never> {
2446
+ const editor = requireSession(event);
2447
+ // The editor tier does not exist when tidy is off, so a save in that state is a 404 (no editable
2448
+ // surface to commit), the server half of the truthful gate.
2449
+ if (runtime.tidy?.enabled !== true) throw error(404, 'Tidy is not enabled for this site');
2450
+
2451
+ const form = await event.request.formData();
2452
+ let conventions: TidyConventions;
2453
+ try {
2454
+ conventions = validateTidyConventions(JSON.parse(String(form.get('conventions') ?? '{}')));
2455
+ } catch (err) {
2456
+ const message = err instanceof TidyConventionsError ? err.message : 'Invalid tidy settings';
2457
+ throw redirect(303, `/admin/settings?error=${encodeURIComponent(message)}`);
2458
+ }
2459
+
2460
+ const path = siteConfigPath();
2461
+ const token = await mintToken(event.platform?.env ?? {});
2462
+ const raw = await readRaw(runtime.backend, path, token);
2463
+ if (raw === null) throw error(404, 'Site config not found');
2464
+ // Parse first so a malformed file fails before the write rather than committing onto a broken base.
2465
+ parseSiteConfig(raw);
2466
+
2467
+ const commitFields = { concept: 'settings', id: 'tidy', editor: editor.email };
2468
+ try {
2469
+ await commitFile(
2470
+ runtime.backend,
2471
+ path,
2472
+ setTidy(raw, conventions),
2473
+ { message: 'Update tidy settings', author: { name: editor.displayName, email: editor.email } },
2474
+ token,
2475
+ );
2476
+ log.info('commit.succeeded', commitFields);
2477
+ } catch (err) {
2478
+ if (isConflict(err)) {
2479
+ log.warn('commit.failed', { ...commitFields, reason: 'conflict' });
2480
+ const message = 'The site config changed since you opened it. Reload and reapply your edits.';
2481
+ throw redirect(303, `/admin/settings?error=${encodeURIComponent(message)}`);
2482
+ }
2483
+ log.error('commit.failed', { ...commitFields, error: String(err) });
2484
+ throw err;
2485
+ }
2486
+
2487
+ throw redirect(303, '/admin/settings?saved=1');
2488
+ }
2489
+
2490
+ /** Add a word (or batch) to the git-committed personal dictionary (spec 1.6). The transport mirrors
2491
+ * the media raw-body actions exactly: a `text/plain` POST, the CSRF token in `X-Cairn-CSRF` validated
2492
+ * by validateCsrfHeader (CSRF first, then the session), and a small JSON body `{ word }` or
2493
+ * `{ words }`. It reads the current file from the default branch, inserts the validated words in
2494
+ * sorted order if absent (idempotent), and commits through the GitHub-App pipeline.
2495
+ *
2496
+ * The commit is SHA-guarded with commit-and-retry: commitFiles throws CommitConflictError when the
2497
+ * branch moved under it, which is caught here to re-read the new head, re-merge the same additions
2498
+ * (the sorted insert is order-independent, so a concurrent editor's word is preserved), and retry
2499
+ * once. The response is the merged word list, so the client drops the now-committed words from its
2500
+ * pending set; a refusal rides a `fail` envelope the client reads by `type`/`status`.
2501
+ *
2502
+ * Input validation is load-bearing here: this commits to the repo from request input, so every word
2503
+ * is length-bounded and rejected if it carries whitespace or control characters (a word is one
2504
+ * line), and the batch is capped. A body that yields no valid word refuses with a 400 and commits
2505
+ * nothing, so the committed file can never gain an injected or empty line. */
2506
+ async function addDictionaryWord(event: ContentEvent): Promise<ReturnType<typeof fail> | DictionaryAddResult> {
2507
+ // CSRF first: a raw-body (JSON) POST, so the header witness is the authority, like the upload and
2508
+ // media actions. A failed check refuses before the session read or any GitHub call.
2509
+ if (!event.cookies || !validateCsrfHeader({ url: event.url, request: event.request, cookies: event.cookies })) {
2510
+ return fail(403, { error: 'csrf' } satisfies DictionaryAddFailure);
2511
+ }
2512
+ const editor = requireSession(event);
2513
+
2514
+ let payload: { word?: unknown; words?: unknown };
2515
+ try {
2516
+ payload = JSON.parse(await event.request.text());
2517
+ } catch {
2518
+ return fail(400, { error: 'Could not read the dictionary request.' } satisfies DictionaryAddFailure);
2519
+ }
2520
+
2521
+ // Collect the candidate words from `word` and/or `words`, keep only the strings, validate each
2522
+ // against the one-line word grammar, dedupe, and cap the batch. A body with no valid word refuses.
2523
+ const raw = [
2524
+ ...(typeof payload.word === 'string' ? [payload.word] : []),
2525
+ ...(Array.isArray(payload.words) ? payload.words.filter((w): w is string => typeof w === 'string') : []),
2526
+ ];
2527
+ const additions = [...new Set(raw.filter((w) => isValidDictionaryWord(w, MAX_DICTIONARY_WORD)))].slice(0, MAX_DICTIONARY_BATCH);
2528
+ if (additions.length === 0) {
2529
+ return fail(400, { error: 'No valid word to add to the dictionary.' } satisfies DictionaryAddFailure);
2530
+ }
2531
+
2532
+ const token = await mintToken(event.platform?.env ?? {});
2533
+ const commitFields = { concept: 'dictionary', id: additions[0]!, editor: editor.email };
2534
+ try {
2535
+ const words = await mergeAndCommitDictionary(token, additions, editor);
2536
+ log.info('dictionary.added', { editor: editor.email, words: additions });
2537
+ return { words };
2538
+ } catch (err) {
2539
+ if (!isConflict(err)) throw err;
2540
+ // The branch moved under the commit. Re-read the new head and re-merge the same additions, then
2541
+ // retry once. The merge is order-independent, so a concurrent editor's word that landed in the
2542
+ // window is preserved and the two adds converge on the same sorted set.
2543
+ try {
2544
+ const words = await mergeAndCommitDictionary(token, additions, editor);
2545
+ log.info('dictionary.added', { editor: editor.email, words: additions, retried: true });
2546
+ return { words };
2547
+ } catch (retryErr) {
2548
+ if (!isConflict(retryErr)) throw retryErr;
2549
+ // A second conflict: give up rather than loop. The client keeps the words in its pending set
2550
+ // for the session and re-attempts on the next save, so the word is never silently dropped.
2551
+ log.warn('dictionary.add_conflict', { editor: editor.email, words: additions });
2552
+ return fail(409, { error: 'The dictionary changed while saving. It will retry on the next save.' } satisfies DictionaryAddFailure);
2553
+ }
2554
+ }
2555
+ }
2556
+
2557
+ /** Tidy: a light LLM copy-edit of the author's markdown (spec 2.1). The first remote model call in
2558
+ * the library, so this is the highest-blast-radius server action: untrusted content and the Anthropic
2559
+ * API key. The transport mirrors the media raw-body actions (a `text/plain` POST carrying JSON
2560
+ * `{ text, scope }`, the CSRF token in `X-Cairn-CSRF`, the response deserialized by the client), with
2561
+ * abort/timeout/deadline the media calls did not need: a tidy call to Sonnet on a full entry can run
2562
+ * many seconds.
2563
+ *
2564
+ * Gate order (every refusal happens before the next step, so a refused request spends nothing):
2565
+ * 1. validateCsrfHeader FIRST (the header witness is the authority for a raw-body POST).
2566
+ * 2. requireSession (an expired session throws the manual-redirect 303 the client reads as status-0).
2567
+ * 3. Read the key and config; refuse fail(503) if tidy is disabled or the key is missing.
2568
+ * 4. Parse and bound the body; refuse fail(400) on malformed JSON, fail(413) on an over-long text.
2569
+ * 5. Only then build the prompt and call the model, bounded by the Worker deadline.
2570
+ *
2571
+ * The untrusted text rides as the user message, never interpolated into the system prompt; the
2572
+ * prompt's injection framing (Task 10) treats it as data. The API key never leaves the action: it is
2573
+ * not returned and not logged, and the log line carries no content. The action commits NOTHING, so a
2574
+ * failed, aborted, or refused tidy can never corrupt the entry; the diff is computed on the client
2575
+ * (Task 12), so the server stays a thin model-call boundary. */
2576
+ async function tidyAction(event: ContentEvent): Promise<ReturnType<typeof fail> | TidyResult> {
2577
+ // CSRF first: a raw-body (JSON) POST, so the header witness is the authority. A failed check refuses
2578
+ // before the session read and before any model call.
2579
+ if (!event.cookies || !validateCsrfHeader({ url: event.url, request: event.request, cookies: event.cookies })) {
2580
+ return fail(403, { error: 'csrf' } satisfies TidyFailure);
2581
+ }
2582
+ const editor = requireSession(event);
2583
+
2584
+ // Fail-fast: refuse before any model call if tidy is off or the key is missing. The model is read
2585
+ // from config (a stated fact in this tier); a missing key is the "not enabled" refusal. No secret is
2586
+ // ever returned or logged.
2587
+ const tidy = runtime.tidy;
2588
+ if (!tidy?.enabled) {
2589
+ return fail(503, { error: 'Tidy is not enabled for this site.' } satisfies TidyFailure);
2590
+ }
2591
+ const env = (event.platform?.env ?? {}) as Record<string, unknown>;
2592
+ const apiKey = typeof env.ANTHROPIC_API_KEY === 'string' ? env.ANTHROPIC_API_KEY : '';
2593
+ if (!apiKey) {
2594
+ return fail(503, { error: 'Tidy is not configured: the Anthropic API key is missing.' } satisfies TidyFailure);
2595
+ }
2596
+
2597
+ // Parse and bound the body before the call. A malformed body refuses 400; an over-long text refuses
2598
+ // 413 (tidy a selection instead), so no over-long input ever spends a token or risks the deadline.
2599
+ let payload: { text?: unknown; scope?: unknown };
2600
+ try {
2601
+ payload = JSON.parse(await event.request.text());
2602
+ } catch {
2603
+ return fail(400, { error: 'Could not read the tidy request.' } satisfies TidyFailure);
2604
+ }
2605
+ const text = typeof payload.text === 'string' ? payload.text : '';
2606
+ if (text.length === 0) {
2607
+ return fail(400, { error: 'No text to tidy.' } satisfies TidyFailure);
2608
+ }
2609
+ if (text.length > MAX_TIDY_CHARS) {
2610
+ return fail(413, { error: 'This is too long to tidy at once. Select a passage and tidy that instead.' } satisfies TidyFailure);
2611
+ }
2612
+
2613
+ // Build the system prompt from the resolved conventions (Task 10). The prompt is built from config,
2614
+ // never from the author's text, so the untrusted text cannot reshape the instructions.
2615
+ const system = buildTidyPrompt(resolveTidyConventions(tidy.conventions));
2616
+ const model = tidy.model || DEFAULT_TIDY_MODEL;
2617
+ // max_tokens sized to comfortably exceed the input token count: a proofread runs at roughly input
2618
+ // length, never lowballed. The character cap is ~6k input tokens, so this leaves generous headroom.
2619
+ const maxTokens = 16_000;
2620
+
2621
+ // Bound the model call with the Worker's own deadline (shorter than the platform limit), so a slow
2622
+ // call becomes a retryable fail(502) rather than a platform timeout. The client also drives its own
2623
+ // AbortController (Cancel + a bounded timeout, Task 14); this action accepts an aborted request
2624
+ // cleanly by mapping any abort to the same fail(502).
2625
+ const controller = new AbortController();
2626
+ const timer = setTimeout(() => controller.abort(), tidyTimeoutMs);
2627
+ let message: Awaited<ReturnType<TidyClient['messages']['create']>>;
2628
+ try {
2629
+ const client = anthropicClient({ apiKey });
2630
+ message = await client.messages.create(
2631
+ {
2632
+ model,
2633
+ max_tokens: maxTokens,
2634
+ system,
2635
+ messages: [{ role: 'user', content: text }],
2636
+ },
2637
+ // The signal rides the request options, so the deadline timer above actually cancels the call.
2638
+ { signal: controller.signal },
2639
+ );
2640
+ } catch (err) {
2641
+ // A deadline overrun, a client abort, or a model error (rate limit, overload, 5xx) all map to the
2642
+ // retryable fail(502). The error string is not surfaced to the client (it may carry internal
2643
+ // detail); the log line carries the editor and the kind, never the key or the content.
2644
+ log.warn('tidy.error', { editor: editor.email, model, aborted: controller.signal.aborted });
2645
+ return fail(502, { error: 'Tidy could not finish. Try again.' } satisfies TidyFailure);
2646
+ } finally {
2647
+ clearTimeout(timer);
2648
+ }
2649
+
2650
+ // A model refusal (the streaming-classifier intervention) is a clean fail(422): the author's text is
2651
+ // untouched, so the editor can leave it as-is.
2652
+ if (message.stop_reason === 'refusal') {
2653
+ log.warn('tidy.refused', { editor: editor.email, model });
2654
+ return fail(422, { error: 'Tidy declined to edit this text.' } satisfies TidyFailure);
2655
+ }
2656
+
2657
+ // Read the output as plain text: concatenate the text blocks (a normal response is one). An empty
2658
+ // result is treated as a model error rather than silently returning an empty document.
2659
+ const corrected = message.content
2660
+ .filter((block) => block.type === 'text' && typeof block.text === 'string')
2661
+ .map((block) => block.text ?? '')
2662
+ .join('');
2663
+ if (corrected.length === 0) {
2664
+ log.warn('tidy.empty', { editor: editor.email, model });
2665
+ return fail(502, { error: 'Tidy returned nothing. Try again.' } satisfies TidyFailure);
2666
+ }
2667
+
2668
+ log.info('tidy.done', { editor: editor.email, model: message.model, usage: message.usage });
2669
+ return { corrected, model: message.model, usage: message.usage };
2670
+ }
2671
+
2672
+ return { layoutLoad, indexRedirect, listLoad, mediaLibraryLoad, settingsLoad, settingsSave, createAction, editLoad, saveAction, publishAction, publishAllAction, discardAction, deleteAction, listDeleteAction, renameAction, uploadAction, mediaDeleteAction, mediaBulkDelete, mediaOrphanScan, mediaPurgeOrphans, mediaUpdateAction, mediaReplacePreview, mediaReplaceApply, mediaAltPreview, mediaAltApply, addDictionaryWord, tidyAction, mintToken };
2173
2673
  }
2174
2674
 
2175
2675
  /** The cap, in characters, on the stored alt text. The human fields are display copy, not content,