@glw907/cairn-cms 0.59.0 → 0.60.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/CHANGELOG.md +60 -0
  2. package/dist/components/AdminLayout.svelte +130 -229
  3. package/dist/components/CairnAdmin.svelte +12 -41
  4. package/dist/components/CairnLogo.svelte +1 -6
  5. package/dist/components/CairnMediaLibrary.svelte +821 -1210
  6. package/dist/components/CairnTidySettings.svelte +486 -0
  7. package/dist/components/CairnTidySettings.svelte.d.ts +32 -0
  8. package/dist/components/ComponentForm.svelte +110 -185
  9. package/dist/components/ComponentInsertDialog.svelte +163 -283
  10. package/dist/components/ConceptList.svelte +111 -191
  11. package/dist/components/ConfirmPage.svelte +5 -12
  12. package/dist/components/CsrfField.svelte +5 -11
  13. package/dist/components/DeleteDialog.svelte +15 -42
  14. package/dist/components/EditPage.svelte +786 -918
  15. package/dist/components/EditorToolbar.svelte +108 -170
  16. package/dist/components/IconPicker.svelte +23 -53
  17. package/dist/components/LinkPicker.svelte +34 -58
  18. package/dist/components/LoginPage.svelte +14 -27
  19. package/dist/components/ManageEditors.svelte +3 -15
  20. package/dist/components/MarkdownEditor.svelte +688 -789
  21. package/dist/components/MarkdownEditor.svelte.d.ts +44 -0
  22. package/dist/components/MarkdownHelpDialog.svelte +8 -12
  23. package/dist/components/MediaCaptureCard.svelte +18 -57
  24. package/dist/components/MediaFigureControl.svelte +32 -71
  25. package/dist/components/MediaHeroField.svelte +210 -329
  26. package/dist/components/MediaInsertPopover.svelte +156 -283
  27. package/dist/components/MediaPicker.svelte +67 -131
  28. package/dist/components/NavTree.svelte +46 -78
  29. package/dist/components/RenameDialog.svelte +16 -43
  30. package/dist/components/ShortcutsDialog.svelte +9 -13
  31. package/dist/components/ShortcutsGrid.svelte +1 -2
  32. package/dist/components/TidyReview.svelte +355 -0
  33. package/dist/components/TidyReview.svelte.d.ts +47 -0
  34. package/dist/components/WebLinkDialog.svelte +19 -40
  35. package/dist/components/cairn-admin.css +768 -0
  36. package/dist/components/editor-tidy.d.ts +31 -0
  37. package/dist/components/editor-tidy.js +199 -0
  38. package/dist/components/index.d.ts +1 -0
  39. package/dist/components/index.js +1 -0
  40. package/dist/components/markdown-directives.d.ts +16 -0
  41. package/dist/components/markdown-directives.js +34 -0
  42. package/dist/components/objective-errors.d.ts +30 -0
  43. package/dist/components/objective-errors.js +113 -0
  44. package/dist/components/spellcheck-assets/dictionary-en-us.txt +104743 -0
  45. package/dist/components/spellcheck-assets/spellchecker-wasm-LICENSE.txt +21 -0
  46. package/dist/components/spellcheck-assets/spellchecker-wasm.wasm +0 -0
  47. package/dist/components/spellcheck-worker.d.ts +80 -0
  48. package/dist/components/spellcheck-worker.js +161 -0
  49. package/dist/components/spellcheck.d.ts +148 -0
  50. package/dist/components/spellcheck.js +553 -0
  51. package/dist/components/tidy-categorize.d.ts +67 -0
  52. package/dist/components/tidy-categorize.js +392 -0
  53. package/dist/components/tidy-diff.d.ts +60 -0
  54. package/dist/components/tidy-diff.js +147 -0
  55. package/dist/components/tidy-validate.d.ts +37 -0
  56. package/dist/components/tidy-validate.js +174 -0
  57. package/dist/content/compose.d.ts +1 -1
  58. package/dist/content/compose.js +11 -0
  59. package/dist/content/site-dictionary.d.ts +31 -0
  60. package/dist/content/site-dictionary.js +82 -0
  61. package/dist/content/types.d.ts +25 -0
  62. package/dist/delivery/CairnHead.svelte +8 -11
  63. package/dist/doctor/checks-local.d.ts +1 -0
  64. package/dist/doctor/checks-local.js +55 -6
  65. package/dist/doctor/index.js +2 -1
  66. package/dist/log/events.d.ts +1 -1
  67. package/dist/nav/site-config.d.ts +98 -0
  68. package/dist/nav/site-config.js +132 -0
  69. package/dist/sveltekit/admin-dispatch.d.ts +2 -0
  70. package/dist/sveltekit/admin-dispatch.js +6 -2
  71. package/dist/sveltekit/cairn-admin.d.ts +13 -1
  72. package/dist/sveltekit/cairn-admin.js +22 -3
  73. package/dist/sveltekit/content-routes.d.ts +135 -1
  74. package/dist/sveltekit/content-routes.js +351 -3
  75. package/dist/sveltekit/tidy-prompt.d.ts +11 -0
  76. package/dist/sveltekit/tidy-prompt.js +118 -0
  77. package/package.json +11 -2
  78. package/src/lib/components/CairnAdmin.svelte +3 -0
  79. package/src/lib/components/CairnTidySettings.svelte +553 -0
  80. package/src/lib/components/EditPage.svelte +371 -2
  81. package/src/lib/components/MarkdownEditor.svelte +168 -1
  82. package/src/lib/components/TidyReview.svelte +463 -0
  83. package/src/lib/components/cairn-admin.css +25 -0
  84. package/src/lib/components/editor-tidy.ts +241 -0
  85. package/src/lib/components/index.ts +1 -0
  86. package/src/lib/components/markdown-directives.ts +35 -0
  87. package/src/lib/components/objective-errors.ts +155 -0
  88. package/src/lib/components/spellcheck-assets/dictionary-en-us.txt +104743 -0
  89. package/src/lib/components/spellcheck-assets/spellchecker-wasm-LICENSE.txt +21 -0
  90. package/src/lib/components/spellcheck-assets/spellchecker-wasm.wasm +0 -0
  91. package/src/lib/components/spellcheck-worker.ts +279 -0
  92. package/src/lib/components/spellcheck.ts +693 -0
  93. package/src/lib/components/tidy-categorize.ts +460 -0
  94. package/src/lib/components/tidy-diff.ts +196 -0
  95. package/src/lib/components/tidy-validate.ts +202 -0
  96. package/src/lib/content/compose.ts +11 -1
  97. package/src/lib/content/site-dictionary.ts +84 -0
  98. package/src/lib/content/types.ts +25 -0
  99. package/src/lib/doctor/checks-local.ts +59 -5
  100. package/src/lib/doctor/index.ts +2 -0
  101. package/src/lib/log/events.ts +7 -1
  102. package/src/lib/nav/site-config.ts +197 -0
  103. package/src/lib/sveltekit/admin-dispatch.ts +7 -3
  104. package/src/lib/sveltekit/cairn-admin.ts +32 -4
  105. package/src/lib/sveltekit/content-routes.ts +504 -4
  106. package/src/lib/sveltekit/tidy-prompt.ts +153 -0
@@ -0,0 +1,174 @@
1
+ // The tidy output validation: the safety backstop that proves a tidy result is a proofread and not
2
+ // a restructure (spec 2.6) or a successful prompt injection (spec 2.3.3). A pure module taking the
3
+ // captured original and the model's corrected string and returning either the validated change set
4
+ // (the Task 12 diff) or a typed rejection reason. A rejected result is discarded by the caller with
5
+ // an honest message and the document is left untouched; nothing here mutates the buffer.
6
+ //
7
+ // Four of the five checks are EXACT and are the real structural backstop: the directive structure,
8
+ // the heading count and levels, the fenced-code-block count, the byte-for-byte frontmatter, the
9
+ // media-hash multiset, and every code span and fenced block. The fifth, the divergence bound, is
10
+ // the only fuzzy one, and it is a rewrite/injection backstop only, never a voice safeguard. The
11
+ // config-driven prompt is what protects voice.
12
+ import { unified } from 'unified';
13
+ import remarkParse from 'remark-parse';
14
+ import remarkGfm from 'remark-gfm';
15
+ import { visit } from 'unist-util-visit';
16
+ import { fenceScan, frontmatterSpan } from './markdown-directives.js';
17
+ import { parseMediaToken } from '../media/reference.js';
18
+ import { diffTokens, diffChanges } from './tidy-diff.js';
19
+ /** The honest author-facing message a rejection maps to. The same message for every reason, by
20
+ * design: an author does not need the validator's internal taxonomy, only that the result was
21
+ * discarded and their text is safe. */
22
+ export const TIDY_REJECTION_MESSAGE = 'Tidy returned a result that changed more than the wording, so it was discarded. Your text is unchanged.';
23
+ // The divergence bound. The floor allows a fixed number of changed tokens regardless of fraction so
24
+ // a legitimate heavy proofread of a SHORT input is not penalized: a short paragraph with a typo in
25
+ // nearly every word is a real proofread, not a rewrite. The fraction catches a wholesale rewrite of
26
+ // a LONG input, where a large absolute count is past any honest copy-edit. A result is rejected only
27
+ // when it exceeds BOTH the floor and the fraction, so a short input rides the floor and a long input
28
+ // rides the fraction. The values are deliberate: 60 tokens of change covers a dense proofread of a
29
+ // few short paragraphs, and 0.5 of the total tokens marks the point where more than half the text
30
+ // changed, which no proofread does but a rewrite or a successful injection always does.
31
+ const DIVERGENCE_TOKEN_FLOOR = 60;
32
+ const DIVERGENCE_FRACTION = 0.5;
33
+ // Every `media:` token anywhere in the text, hash and slug forms alike. The validator scans the raw
34
+ // text rather than going through extractMediaRefs for two reasons. First, a true MULTISET is the
35
+ // invariant a backstop wants: extractMediaRefs dedups by hash, so a doubled token collapsing to one
36
+ // would read as equal, and the validator must catch a dropped duplicate. Second, the raw scan covers
37
+ // the whole text including frontmatter without threading the concept's FrontmatterField[] to the call
38
+ // site, which the validator otherwise has no reason to know. A token mangled inside a code fence is
39
+ // caught here too, redundantly with the code check, which is the right posture for a backstop.
40
+ const MEDIA_TOKEN = /media:[A-Za-z0-9.-]+/g;
41
+ /** The sorted multiset of valid media hashes in the text. Each `media:` occurrence is parsed; a
42
+ * malformed token (a broken hash, an illegal slug) parses to null and is dropped, so a tidy that
43
+ * CORRUPTED a hash drops it from the multiset and the comparison fails. Sorted so two multisets
44
+ * compare by value, order-independent. */
45
+ function mediaHashes(text) {
46
+ const hashes = [];
47
+ for (const m of text.matchAll(MEDIA_TOKEN)) {
48
+ const ref = parseMediaToken(m[0]);
49
+ if (ref)
50
+ hashes.push(ref.hash);
51
+ }
52
+ return hashes.sort();
53
+ }
54
+ /** The directive structure signature: each opener or closer in document order, paired with the depth
55
+ * the fence scan assigned it. Two texts share a directive structure when these signatures are equal,
56
+ * so an added, removed, or relevelled container fails the comparison. A fence-shaped line inside a
57
+ * code block is already disowned by the scan (its role is null), so a documented `:::` example does
58
+ * not enter the signature. */
59
+ function directiveSignature(text) {
60
+ const { depths, roles } = fenceScan(text.split('\n'));
61
+ const parts = [];
62
+ for (let i = 0; i < roles.length; i++) {
63
+ if (roles[i] !== null)
64
+ parts.push(`${roles[i]}@${depths[i]}`);
65
+ }
66
+ return parts.join(',');
67
+ }
68
+ /** The heading signature: every ATX heading's level in document order. Parsed as mdast so a `#`
69
+ * inside a code block or an escaped one is never counted, and the level is the parser's own depth.
70
+ * Two texts share a heading structure when these are equal, so an added, removed, or relevelled
71
+ * heading fails the comparison. */
72
+ function headingSignature(text) {
73
+ const tree = unified().use(remarkParse).use(remarkGfm).parse(text);
74
+ const levels = [];
75
+ visit(tree, 'heading', (node) => {
76
+ if (typeof node.depth === 'number')
77
+ levels.push(node.depth);
78
+ });
79
+ return levels.join(',');
80
+ }
81
+ /** Every code span and fenced or indented code block in the text, as a sorted multiset of values.
82
+ * Parsed as mdast so the comparison sees exactly what the parser treats as code, the same authority
83
+ * the media body scan uses. Sorted so the comparison is order-independent: the divergence and
84
+ * structure checks own ordering, this check owns the contents. A `code` node is a block, an
85
+ * `inlineCode` node is a span. */
86
+ function codeContents(text) {
87
+ const tree = unified().use(remarkParse).use(remarkGfm).parse(text);
88
+ const values = [];
89
+ visit(tree, (node) => {
90
+ if ((node.type === 'code' || node.type === 'inlineCode') && typeof node.value === 'string') {
91
+ values.push(`${node.type}:${node.value}`);
92
+ }
93
+ });
94
+ return values.sort();
95
+ }
96
+ /** True when two string multisets are equal: same length and same sorted contents. */
97
+ function multisetEqual(a, b) {
98
+ if (a.length !== b.length)
99
+ return false;
100
+ for (let i = 0; i < a.length; i++) {
101
+ if (a[i] !== b[i])
102
+ return false;
103
+ }
104
+ return true;
105
+ }
106
+ // The changed token amount: the count of tokens the diff marked inserted or deleted, against the
107
+ // total tokens in the original. An equal run contributes nothing; an inserted or deleted run counts
108
+ // its own tokens. This is the rewrite measure, deliberately coarse, since the structure/token/code
109
+ // checks are the exact backstop and this only catches a wholesale rewrite that slipped past them.
110
+ function divergence(original, corrected) {
111
+ const runs = diffTokens(original, corrected);
112
+ // Count tokens by splitting each run's text on the same word/non-word boundary the diff uses; a
113
+ // run's token count is its number of word-or-nonword matches. The original's total is the equal
114
+ // plus deleted token count.
115
+ const countTokens = (s) => (s.match(/[A-Za-z0-9_]+(?:['’][A-Za-z0-9_]+)*|[^A-Za-z0-9_]+/g) ?? []).length;
116
+ let changed = 0;
117
+ let total = 0;
118
+ for (const run of runs) {
119
+ const tokens = countTokens(run.text);
120
+ if (run.kind === 'inserted' || run.kind === 'deleted')
121
+ changed += tokens;
122
+ if (run.kind === 'equal' || run.kind === 'deleted')
123
+ total += tokens;
124
+ }
125
+ return { changed, total };
126
+ }
127
+ /**
128
+ * Validate a tidy result against the captured original. Runs the exact structural checks first (a
129
+ * restructure or a token or code edit is a hard reject regardless of how little else changed), then
130
+ * the length-aware divergence bound. On success returns the Task 12 change set for the review
131
+ * surface; on failure returns the typed reason and the one honest message.
132
+ *
133
+ * The checks, in order: the directive opener/closer sequence and depths, the ATX heading count and
134
+ * levels, the fenced-code-block count (folded into the code-contents multiset), the byte-for-byte
135
+ * frontmatter via the shared frontmatterSpan helper, the media-hash multiset, the code-span and
136
+ * code-block contents, and finally the divergence bound. A pure function: it reads the two strings
137
+ * and nothing else, and it never mutates the buffer.
138
+ */
139
+ export function validateTidy(original, corrected) {
140
+ // Directive structure: the opener/closer sequence and depths must match exactly.
141
+ if (directiveSignature(original) !== directiveSignature(corrected)) {
142
+ return { ok: false, reason: 'structure', message: TIDY_REJECTION_MESSAGE };
143
+ }
144
+ // Headings: the same ATX headings at the same levels, in order.
145
+ if (headingSignature(original) !== headingSignature(corrected)) {
146
+ return { ok: false, reason: 'structure', message: TIDY_REJECTION_MESSAGE };
147
+ }
148
+ // Frontmatter: byte-for-byte equal, via the same helper the spellcheck skip uses. A null span
149
+ // (no frontmatter) on both sides slices to the empty string on both, so a body-only document
150
+ // passes; a span on one side and not the other diverges.
151
+ const fmOriginal = frontmatterSpan(original);
152
+ const fmCorrected = frontmatterSpan(corrected);
153
+ const fmTextOriginal = fmOriginal ? original.slice(fmOriginal.from, fmOriginal.to) : '';
154
+ const fmTextCorrected = fmCorrected ? corrected.slice(fmCorrected.from, fmCorrected.to) : '';
155
+ if (fmTextOriginal !== fmTextCorrected) {
156
+ return { ok: false, reason: 'frontmatter', message: TIDY_REJECTION_MESSAGE };
157
+ }
158
+ // Media: the exact same multiset of hashes across the whole text.
159
+ if (!multisetEqual(mediaHashes(original), mediaHashes(corrected))) {
160
+ return { ok: false, reason: 'media', message: TIDY_REJECTION_MESSAGE };
161
+ }
162
+ // Code: every code span and fenced or indented block identical. The block count is folded in
163
+ // here: a multiset of block-and-span values that differs in count or contents fails.
164
+ if (!multisetEqual(codeContents(original), codeContents(corrected))) {
165
+ return { ok: false, reason: 'code', message: TIDY_REJECTION_MESSAGE };
166
+ }
167
+ // Divergence: rejected only when the changed amount exceeds BOTH the absolute floor and the
168
+ // fraction of the total. A short input rides the floor; a long input rides the fraction.
169
+ const { changed, total } = divergence(original, corrected);
170
+ if (changed > DIVERGENCE_TOKEN_FLOOR && changed > total * DIVERGENCE_FRACTION) {
171
+ return { ok: false, reason: 'divergence', message: TIDY_REJECTION_MESSAGE };
172
+ }
173
+ return { ok: true, changes: diffChanges(original, corrected) };
174
+ }
@@ -1,5 +1,5 @@
1
1
  import type { CairnAdapter, CairnExtension, CairnRuntime } from './types.js';
2
- import type { SiteConfig } from '../nav/site-config.js';
2
+ import { type SiteConfig } from '../nav/site-config.js';
3
3
  /** The input to {@link composeRuntime}. `siteConfig` is required so the per-concept URL policy is
4
4
  * always derived from one source and can never be silently dropped. `extensions` fold in after the
5
5
  * adapter's concepts. */
@@ -1,5 +1,6 @@
1
1
  import { resolveConcepts } from './concepts.js';
2
2
  import { normalizeAssets } from '../media/config.js';
3
+ import { dictionaryFileForDialect } from '../nav/site-config.js';
3
4
  /**
4
5
  * Fold an adapter and any extensions into the composed runtime (seam 2). The per-concept URL policy
5
6
  * is derived from the site config, the same source the delivery path uses, so the runtime and
@@ -36,6 +37,16 @@ export function composeRuntime({ adapter, siteConfig, extensions = [] }) {
36
37
  assets: adapter.assets,
37
38
  resolvedAssets: normalizeAssets(adapter.assets),
38
39
  mediaManifestPath: adapter.mediaManifestPath ?? 'src/content/.cairn/media.json',
40
+ // The personal dictionary sits beside the manifests under the same `.cairn/` content root, so the
41
+ // spec's `content/.cairn/dictionary.txt` resolves the same configurable way the manifest paths do.
42
+ dictionaryPath: adapter.dictionaryPath ?? 'src/content/.cairn/dictionary.txt',
43
+ // The spellcheck dictionary is resolved once here from the site config's dialect (default US),
44
+ // so the runtime and the editor never re-derive it. The site config is the one home for the
45
+ // dialect; the editor resolves this filename to a real asset URL on the main thread.
46
+ spellcheckDictionary: dictionaryFileForDialect(siteConfig.spellcheck?.dialect),
47
+ // The tidy block passes through from the site config; the tidy action reads enabled/model at call
48
+ // time and builds its prompt from conventions. Absent means tidy is off.
49
+ tidy: siteConfig.tidy,
39
50
  adminPanels,
40
51
  fieldTypes,
41
52
  };
@@ -0,0 +1,31 @@
1
+ /** True when a word is a single valid dictionary line (no whitespace, no control characters, non-empty
2
+ * and within the length bound). A leading "#" is rejected: parseDictionary re-reads such a line as a
3
+ * comment, so committing it would silently drop the word on the next read. The action uses this to
4
+ * reject untrusted input before the merge, so a newline or a control byte can never inject an extra
5
+ * line into the committed file. */
6
+ export declare function isValidDictionaryWord(word: string, maxLength?: number): boolean;
7
+ /**
8
+ * Parse the committed dictionary file text into its word list. Comment lines (a `#` after optional
9
+ * leading whitespace) and blank lines are dropped; every other line is trimmed and kept. A null or
10
+ * empty file yields an empty list. The result preserves the file's order and is not deduplicated or
11
+ * sorted here, so a caller can see exactly what the file held; `mergeDictionaryWords` is the path that
12
+ * normalizes to the sorted, deduplicated set.
13
+ */
14
+ export declare function parseDictionary(text: string | null): string[];
15
+ /**
16
+ * Merge `additions` into the `existing` word list, returning the canonical sorted, deduplicated set.
17
+ * The merge is case-insensitive (a duplicate add of an existing word, in any case, collapses) and
18
+ * order-independent: the inputs are unioned by lowercased key and sorted, so re-merging the same
19
+ * additions at a moved head produces the same set. The first-seen casing of each word wins, so an
20
+ * existing "Cairn" is kept over a later "cairn". Invalid additions (whitespace, control characters,
21
+ * empty) are skipped here as a backstop; the action validates before this is reached.
22
+ */
23
+ export declare function mergeDictionaryWords(existing: readonly string[], additions: readonly string[]): string[];
24
+ /**
25
+ * Serialize a word list to the canonical committed file text: the header comment, then one word per
26
+ * line sorted case-insensitively, with a trailing newline. The input is run through the same dedup
27
+ * and sort as the merge, so serializing an unsorted or duplicate-bearing list still yields the
28
+ * canonical form. An empty word list serializes to just the header (so the file stays a valid,
29
+ * recognizable dictionary rather than vanishing).
30
+ */
31
+ export declare function serializeDictionary(words: readonly string[]): string;
@@ -0,0 +1,82 @@
1
+ // cairn-cms: the git-committed per-site personal dictionary (spec 1.6). One word per line,
2
+ // sorted, with comment lines (starting with #) and blank lines tolerated on read. This module is
3
+ // pure: it parses the committed file text, inserts words in sorted order, and serializes the
4
+ // canonical form. The insert is order-independent, so the action's commit-and-retry can re-merge
5
+ // the pending additions at a new head and reach the same sorted set regardless of insertion order.
6
+ //
7
+ // The canonical serialization keeps a single leading header comment and one sorted word per line.
8
+ // An inbound file's other comment lines are dropped on serialize (the header is regenerated), so the
9
+ // committed file stays a clean, diffable, sorted word list; a maintainer who wants a richer comment
10
+ // edits it in git, and the next add through here normalizes it back to the header.
11
+ /** The header comment the canonical serialization writes above the sorted words. */
12
+ const HEADER = '# cairn personal dictionary: one word per line, sorted, kept in git.';
13
+ // A dictionary word: a single line carrying no whitespace and no ASCII control characters, so it can
14
+ // never inject an extra line into the committed file. Hyphens and apostrophes are allowed, since real
15
+ // words carry them ("well-known", "O'Brien"); a non-ASCII surname or place name validates too, since
16
+ // the test is for whitespace and control bytes rather than an allow-list of letters. The action runs
17
+ // inbound words through this before a merge.
18
+ const WORD_RE = /^[^\s\p{Cc}]+$/u;
19
+ /** True when a word is a single valid dictionary line (no whitespace, no control characters, non-empty
20
+ * and within the length bound). A leading "#" is rejected: parseDictionary re-reads such a line as a
21
+ * comment, so committing it would silently drop the word on the next read. The action uses this to
22
+ * reject untrusted input before the merge, so a newline or a control byte can never inject an extra
23
+ * line into the committed file. */
24
+ export function isValidDictionaryWord(word, maxLength = 64) {
25
+ if (word.startsWith('#'))
26
+ return false;
27
+ return word.length > 0 && word.length <= maxLength && WORD_RE.test(word);
28
+ }
29
+ /**
30
+ * Parse the committed dictionary file text into its word list. Comment lines (a `#` after optional
31
+ * leading whitespace) and blank lines are dropped; every other line is trimmed and kept. A null or
32
+ * empty file yields an empty list. The result preserves the file's order and is not deduplicated or
33
+ * sorted here, so a caller can see exactly what the file held; `mergeDictionaryWords` is the path that
34
+ * normalizes to the sorted, deduplicated set.
35
+ */
36
+ export function parseDictionary(text) {
37
+ if (!text)
38
+ return [];
39
+ const words = [];
40
+ for (const line of text.split('\n')) {
41
+ const trimmed = line.trim();
42
+ if (trimmed === '' || trimmed.startsWith('#'))
43
+ continue;
44
+ words.push(trimmed);
45
+ }
46
+ return words;
47
+ }
48
+ /** Case-insensitive, locale-stable comparator for the canonical sort. Words are compared lowercased
49
+ * so "Cairn" and "cairn" collapse to one entry, the same case-folding the Worker's merged set uses. */
50
+ function byWord(a, b) {
51
+ return a.toLowerCase().localeCompare(b.toLowerCase());
52
+ }
53
+ /**
54
+ * Merge `additions` into the `existing` word list, returning the canonical sorted, deduplicated set.
55
+ * The merge is case-insensitive (a duplicate add of an existing word, in any case, collapses) and
56
+ * order-independent: the inputs are unioned by lowercased key and sorted, so re-merging the same
57
+ * additions at a moved head produces the same set. The first-seen casing of each word wins, so an
58
+ * existing "Cairn" is kept over a later "cairn". Invalid additions (whitespace, control characters,
59
+ * empty) are skipped here as a backstop; the action validates before this is reached.
60
+ */
61
+ export function mergeDictionaryWords(existing, additions) {
62
+ const byKey = new Map();
63
+ for (const word of [...existing, ...additions]) {
64
+ if (!isValidDictionaryWord(word))
65
+ continue;
66
+ const key = word.toLowerCase();
67
+ if (!byKey.has(key))
68
+ byKey.set(key, word);
69
+ }
70
+ return [...byKey.values()].sort(byWord);
71
+ }
72
+ /**
73
+ * Serialize a word list to the canonical committed file text: the header comment, then one word per
74
+ * line sorted case-insensitively, with a trailing newline. The input is run through the same dedup
75
+ * and sort as the merge, so serializing an unsorted or duplicate-bearing list still yields the
76
+ * canonical form. An empty word list serializes to just the header (so the file stays a valid,
77
+ * recognizable dictionary rather than vanishing).
78
+ */
79
+ export function serializeDictionary(words) {
80
+ const sorted = mergeDictionaryWords(words, []);
81
+ return [HEADER, ...sorted].join('\n') + '\n';
82
+ }
@@ -238,6 +238,11 @@ export interface CairnAdapter {
238
238
  /** Repo-relative path to the committed media manifest. Defaults to src/content/.cairn/media.json,
239
239
  * applied in composeRuntime. Sits outside any concept directory, like the content manifest. */
240
240
  mediaManifestPath?: string;
241
+ /** Repo-relative path to the committed personal dictionary file. Defaults to
242
+ * src/content/.cairn/dictionary.txt, applied in composeRuntime: the same `.cairn/` content root the
243
+ * manifests use, so the spec's `content/.cairn/dictionary.txt` resolves the same configurable way the
244
+ * manifest paths do. One word per line, sorted, comment lines allowed (see site-dictionary.ts). */
245
+ dictionaryPath?: string;
241
246
  /** Directive component registry; the renderer and the future palette derive from it (seam 3). */
242
247
  registry?: ComponentRegistry;
243
248
  /** The site's glyph name to SVG path-data map, for the admin icon picker and the renderer. */
@@ -345,6 +350,13 @@ export interface CairnRuntime {
345
350
  manifestPath: string;
346
351
  /** The repo-relative path to the committed media manifest, defaulted in composeRuntime. */
347
352
  mediaManifestPath: string;
353
+ /** The repo-relative path to the committed personal dictionary file (one word per line, sorted),
354
+ * defaulted in composeRuntime to src/content/.cairn/dictionary.txt: the same `.cairn/` content root
355
+ * the manifests use. The edit load reads it and threads its words onto EditData; the
356
+ * addDictionaryWord action reads, merges, and commits it. Optional on the runtime so a hand-built
357
+ * runtime need not set it; composeRuntime always fills it, and the edit load and the action default
358
+ * a missing value to the same content-root path. */
359
+ dictionaryPath?: string;
348
360
  /** The adapter's asset config resolved once at compose: `{ enabled: false }` for a no-media site,
349
361
  * otherwise the filled config the upload, storage, delivery, and resolver paths read. */
350
362
  resolvedAssets: import('../media/config.js').ResolvedAssetConfig;
@@ -355,6 +367,19 @@ export interface CairnRuntime {
355
367
  /** The live site's content styling for the preview frame; passed through from the adapter. */
356
368
  preview?: PreviewConfig;
357
369
  assets?: AssetConfig;
370
+ /** The editor's spellcheck dictionary file, resolved once at compose from the site config's
371
+ * `spellcheck.dialect` (defaulting to US English). The edit load threads it onto EditData and the
372
+ * editor resolves it to a real asset URL on the main thread, so the Worker receives the URL and
373
+ * never reads config. Just the filename, e.g. "dictionary-en-us.txt". Optional on the runtime so a
374
+ * hand-built runtime need not set it; composeRuntime always fills it, and the edit load defaults a
375
+ * missing value to the US English dictionary. */
376
+ spellcheckDictionary?: string;
377
+ /** The editor tidy (LLM copy-edit) settings, passed through from the site config. Optional on the
378
+ * runtime so a hand-built runtime need not set it; composeRuntime threads it from
379
+ * `siteConfig.tidy`. The tidy action reads `enabled` and `model` at call time, and builds its prompt
380
+ * from `conventions`. Absent (or `enabled` false) means tidy is off, and the action refuses with a
381
+ * fail(503) before any model call. */
382
+ tidy?: import('../nav/site-config.js').TidyConfig;
358
383
  /** Admin panels contributed by extensions (Mode 2). Empty until Plan 09 wires the dispatch route. */
359
384
  adminPanels?: AdminPanel[];
360
385
  /** Field types contributed by extensions (Mode 2). Empty until Plan 09 wires the form dispatch. */
@@ -5,17 +5,14 @@ tags, and one escaped JSON-LD script. The title renders from seo.title by defaul
5
5
  lets the site own the <title>, and a string overrides it. It carries no CSS, so it pulls in no
6
6
  admin styles.
7
7
  -->
8
- <script lang="ts">
9
- import type { SeoMeta } from './seo.js';
10
- import { jsonLdScript } from './json-ld.js';
11
-
12
- let {
13
- /** The plain-data head to render. */
14
- seo,
15
- /** Title override: a string replaces seo.title, false lets the site own <title>. */
16
- title,
17
- }: { seo: SeoMeta; title?: string | false } = $props();
18
- const titleText = $derived(title === undefined ? seo.title : title);
8
+ <script lang="ts">import { jsonLdScript } from "./json-ld.js";
9
+ let {
10
+ /** The plain-data head to render. */
11
+ seo,
12
+ /** Title override: a string replaces seo.title, false lets the site own <title>. */
13
+ title
14
+ } = $props();
15
+ const titleText = $derived(title === void 0 ? seo.title : title);
19
16
  </script>
20
17
 
21
18
  <svelte:head>
@@ -5,3 +5,4 @@ export declare const configObservability: DoctorCheck;
5
5
  export declare const configCsrfDisable: DoctorCheck;
6
6
  export declare const configPublicOrigin: DoctorCheck;
7
7
  export declare const configSiteConfig: DoctorCheck;
8
+ export declare const configTidyKey: DoctorCheck;
@@ -129,17 +129,21 @@ export const configPublicOrigin = {
129
129
  // evaluate, so the check probes the conventional spots instead (the repo root and the two
130
130
  // src locations the production sites use).
131
131
  const SITE_CONFIG_PATHS = ['site.config.yaml', 'src/lib/site.config.yaml', 'src/site.config.yaml'];
132
+ // Read the first site.config.yaml that exists in a conventional spot, or null when none does.
133
+ async function readSiteConfigText(ctx) {
134
+ for (const path of SITE_CONFIG_PATHS) {
135
+ const text = await ctx.readFile(path);
136
+ if (text !== null)
137
+ return text;
138
+ }
139
+ return null;
140
+ }
132
141
  export const configSiteConfig = {
133
142
  id: 'config.site-config',
134
143
  conditionId: 'config.site-config-invalid',
135
144
  title: 'Site config',
136
145
  async run(ctx) {
137
- let text = null;
138
- for (const path of SITE_CONFIG_PATHS) {
139
- text = await ctx.readFile(path);
140
- if (text !== null)
141
- break;
142
- }
146
+ const text = await readSiteConfigText(ctx);
143
147
  if (text === null)
144
148
  return skip(`no site.config.yaml found (looked in ${SITE_CONFIG_PATHS.join(', ')})`);
145
149
  try {
@@ -157,3 +161,48 @@ export const configSiteConfig = {
157
161
  }
158
162
  },
159
163
  };
164
+ // A site enables tidy with `tidy.enabled: true` in the committed config; ignore a config the rest of
165
+ // the doctor reports through configSiteConfig, so a parse error here just skips rather than doubling
166
+ // the failure.
167
+ function tidyEnabled(text) {
168
+ let config;
169
+ try {
170
+ config = parseSiteConfig(text);
171
+ }
172
+ catch {
173
+ return false;
174
+ }
175
+ return config.tidy?.enabled === true;
176
+ }
177
+ // The Anthropic key is a Worker secret, so the doctor cannot prove it is unset (it is in neither the
178
+ // committed wrangler config nor anything readFile reaches). It CAN read the two spots a key would also
179
+ // appear if set as a plain var: the wrangler config text and .dev.vars. A bare presence-by-name read
180
+ // is enough for the heuristic; the runtime fail(503) and --probe are the real truth checks.
181
+ function keyAppearsIn(text) {
182
+ return text !== null && text.includes('ANTHROPIC_API_KEY');
183
+ }
184
+ // The tidy secret heuristic. It reuses the config.bindings-missing condition rather than registering a
185
+ // new one, so the readiness count holds (the same pattern configMediaBucket uses). A warn here is not a
186
+ // definitive unset claim: it asks the operator to verify the secret, since a wrangler secret is
187
+ // invisible to the CLI.
188
+ export const configTidyKey = {
189
+ id: 'config.tidy-key',
190
+ conditionId: 'config.bindings-missing',
191
+ title: 'Tidy API key',
192
+ async run(ctx) {
193
+ const text = await readSiteConfigText(ctx);
194
+ if (text === null)
195
+ return skip('no site.config.yaml found, so tidy enablement is unknown');
196
+ if (!tidyEnabled(text))
197
+ return skip('tidy is not enabled in the site config');
198
+ const wrangler = (await ctx.readFile('wrangler.jsonc')) ?? (await ctx.readFile('wrangler.toml'));
199
+ if (keyAppearsIn(wrangler)) {
200
+ return pass('ANTHROPIC_API_KEY appears in the wrangler vars (verify it is the real key, not a placeholder)');
201
+ }
202
+ const devVars = await ctx.readFile('.dev.vars');
203
+ if (keyAppearsIn(devVars)) {
204
+ return pass('ANTHROPIC_API_KEY appears in .dev.vars (the local override; verify the Worker secret is set for production)');
205
+ }
206
+ return fail('tidy is enabled but ANTHROPIC_API_KEY is in neither the wrangler vars nor .dev.vars; verify the secret is configured with wrangler secret put ANTHROPIC_API_KEY');
207
+ },
208
+ };
@@ -1,4 +1,4 @@
1
- import { configBindings, configMediaBucket, configObservability, configCsrfDisable, configSiteConfig, configPublicOrigin, } from './checks-local.js';
1
+ import { configBindings, configMediaBucket, configObservability, configCsrfDisable, configSiteConfig, configPublicOrigin, configTidyKey, } from './checks-local.js';
2
2
  import { configDependencyFloors } from './check-floors.js';
3
3
  import { emailSenderOnboarded, edgeHttpsForced, edgeHsts, authStore } from './checks-cloudflare.js';
4
4
  import { githubApp } from './checks-github.js';
@@ -108,6 +108,7 @@ export function defaultChecks() {
108
108
  configCsrfDisable,
109
109
  configSiteConfig,
110
110
  configPublicOrigin,
111
+ configTidyKey,
111
112
  configDependencyFloors,
112
113
  emailSenderOnboarded,
113
114
  edgeHttpsForced,
@@ -1 +1 @@
1
- export type CairnLogEvent = 'auth.link.requested' | 'auth.link.send_failed' | 'auth.token.minted' | 'auth.token.confirmed' | 'auth.session.created' | 'auth.session.destroyed' | 'commit.succeeded' | 'commit.failed' | 'config.invalid' | 'entry.published' | 'entry.discarded' | 'publish.failed' | 'github.unreachable' | 'guard.rejected' | 'media.uploaded' | 'media.upload_failed' | 'media.delivery_failed' | 'media.orphan_reconcile' | 'media.resolve_missing' | 'media.deleted' | 'media.delete_blocked' | 'media.bulk_deleted' | 'media.orphans_purged' | 'media.replaced' | 'media.replace_blocked' | 'media.alt_propagated';
1
+ export type CairnLogEvent = 'auth.link.requested' | 'auth.link.send_failed' | 'auth.token.minted' | 'auth.token.confirmed' | 'auth.session.created' | 'auth.session.destroyed' | 'commit.succeeded' | 'commit.failed' | 'config.invalid' | 'entry.published' | 'entry.discarded' | 'publish.failed' | 'github.unreachable' | 'guard.rejected' | 'media.uploaded' | 'media.upload_failed' | 'media.delivery_failed' | 'media.orphan_reconcile' | 'media.resolve_missing' | 'media.deleted' | 'media.delete_blocked' | 'media.bulk_deleted' | 'media.orphans_purged' | 'media.replaced' | 'media.replace_blocked' | 'media.alt_propagated' | 'dictionary.added' | 'dictionary.add_conflict' | 'tidy.done' | 'tidy.error' | 'tidy.refused' | 'tidy.empty';
@@ -34,8 +34,96 @@ export interface SiteConfig {
34
34
  menus?: Record<string, unknown>;
35
35
  /** Per-concept URL policy: the permalink pattern and date-prefix granularity, keyed by concept id. */
36
36
  content?: Record<string, ConceptUrlPolicy>;
37
+ /** The editor spellcheck settings. The dialect is declared once per site (spec 1.2), so a British
38
+ * site loads the British word list and "colour" reads as correct. Today only US English ships, so an
39
+ * unset or unknown dialect resolves to it. */
40
+ spellcheck?: {
41
+ dialect?: string;
42
+ };
43
+ /** The editor tidy (LLM copy-edit) settings. Opt-in at the site level (spec 2.8): tidy is a remote,
44
+ * costly model call, so the whole block is optional and `enabled` defaults false. The model is a
45
+ * developer-tier fact; the `conventions` block is the editor-tier per-convention config that builds
46
+ * the prompt's CONVENTIONS section. The Anthropic API key is a Worker secret, never config. */
47
+ tidy?: TidyConfig;
37
48
  [key: string]: unknown;
38
49
  }
50
+ /**
51
+ * The tidy block on the site config. Every field is optional so the YAML can carry as little as
52
+ * `tidy: { enabled: true }` and the defaults fill the rest.
53
+ */
54
+ export interface TidyConfig {
55
+ /** Master switch. Default false; tidy is opt-in (spec 2.8, decision 1). */
56
+ enabled?: boolean;
57
+ /** The model id. Default `claude-sonnet-4-6`; the alternative is `claude-haiku-4-5` (spec 2.2). */
58
+ model?: string;
59
+ /** The per-convention toggles that build the prompt's CONVENTIONS section. */
60
+ conventions?: Partial<TidyConventions>;
61
+ }
62
+ /** The default tidy model when a site sets none: Sonnet, the judgment floor for a light copy-edit. */
63
+ export declare const DEFAULT_TIDY_MODEL = "claude-sonnet-4-6";
64
+ /**
65
+ * The corrected convention set (spec "The corrected convention set"), the resolved shape the prompt
66
+ * builder consumes. Every field carries a concrete value; `resolveTidyConventions` fills the defaults
67
+ * from a partial config. The Fixes group is the objective fixes (default on, governed by the always-on
68
+ * core); the style tier defaults off (a falsy variant means off); the advanced tier defaults off.
69
+ * Sentence spacing is dropped on purpose and regional spelling is `spellcheck.dialect`, not a toggle.
70
+ */
71
+ export interface TidyConventions {
72
+ /** The objective Fixes group (spelling, grammar, doubled words, whitespace, capitals, terminal
73
+ * punctuation). Default on. The always-on core governs it; this toggle lets the screen turn the
74
+ * group off. */
75
+ fixes: boolean;
76
+ /** Oxford comma position. Off when undefined; `always` | `complex-only` (AP) | `never`. */
77
+ oxfordComma?: 'always' | 'complex-only' | 'never';
78
+ /** Number style threshold. Off when undefined; the always-numeral exception sets (ages, dates,
79
+ * measurements, percentages) apply at any threshold. */
80
+ numberStyle?: 'under-ten' | 'under-hundred' | 'always-numerals';
81
+ /** Measurement notation only (never the system, never the number). Off when undefined. */
82
+ measurements?: 'abbreviate' | 'spell-out';
83
+ /** Percent rendering. Off when undefined; `sign` is "%", `word` is "percent". */
84
+ percent?: 'sign' | 'word';
85
+ /** Em-dash spacing. Off when undefined. */
86
+ emDash?: 'spaced' | 'closed';
87
+ /** Turn a hyphen between two numbers into an en dash. Default off. */
88
+ enDashRanges: boolean;
89
+ /** Ellipsis rendering. Off when undefined. */
90
+ ellipsis?: 'single-char' | 'three-dots';
91
+ /** Time format. Off when undefined. */
92
+ timeFormat?: '5 PM' | '5pm' | '5 p.m.';
93
+ /** Advanced: convert straight quotes to curly with the full apostrophe rule set. Default off. */
94
+ smartQuotes: boolean;
95
+ /** Advanced: correct brand and proper-noun capitalization on a curated list only. Default off. */
96
+ brandCaps: boolean;
97
+ }
98
+ /** The resting tidy convention set: Fixes on, every style and advanced toggle off. */
99
+ export declare function defaultTidyConventions(): TidyConventions;
100
+ /**
101
+ * Resolve a partial conventions config (from the YAML) into the concrete TidyConventions the prompt
102
+ * builder consumes. An absent field falls to its default: Fixes on, the style and advanced toggles
103
+ * off. A multi-position toggle stays undefined (off) unless the config names a variant.
104
+ */
105
+ export declare function resolveTidyConventions(partial: Partial<TidyConventions> | undefined): TidyConventions;
106
+ export declare class TidyConventionsError extends Error {
107
+ /** A malformed settings payload maps to the same diagnostic as a malformed config. */
108
+ readonly conditionId = "config.site-config-invalid";
109
+ constructor(message: string);
110
+ }
111
+ /**
112
+ * Validate and normalize an untrusted conventions object (from the settings form) into a concrete
113
+ * TidyConventions. This input is committed to the repo, so every field is bounded to its known set:
114
+ * a boolean toggle must be a boolean, and a multi-position toggle must be one of its listed variants
115
+ * or absent (off). An unknown key is dropped rather than carried, so the committed block can never
116
+ * grow a junk key. Throws TidyConventionsError on a value outside its allowed set.
117
+ */
118
+ export declare function validateTidyConventions(value: unknown): TidyConventions;
119
+ /** The dialect string when a site sets none: US English, the only dictionary that ships today. */
120
+ export declare const DEFAULT_DIALECT = "en-US";
121
+ /**
122
+ * The dictionary asset file for a site's configured dialect, defaulting to US English. The main thread
123
+ * resolves this filename to a real URL (the spike's out-of-bundle asset) and hands it to the Worker in
124
+ * the `init` message; the Worker never reads config. An unknown dialect falls back to the default file.
125
+ */
126
+ export declare function dictionaryFileForDialect(dialect: string | undefined): string;
39
127
  export declare class SiteConfigError extends Error {
40
128
  /** The registered diagnostic condition a malformed site config maps to (mirrors CairnError). */
41
129
  readonly conditionId = "config.site-config-invalid";
@@ -54,3 +142,13 @@ export declare function urlPolicyFrom(config: SiteConfig): Record<string, Concep
54
142
  * serializes without `url`/`children` keys.
55
143
  */
56
144
  export declare function setMenu(raw: string, name: string, tree: NavNode[]): string;
145
+ /**
146
+ * Write the editor-tier tidy conventions into the YAML site-config text and reserialize, preserving
147
+ * every other top-level key and the file's comments and key order (parseDocument round-trips both,
148
+ * the same machinery setMenu uses). Only the `tidy.conventions` block is touched: the developer-tier
149
+ * `tidy.enabled` and `tidy.model` are read-only in the screen, so this leaves them as they are and a
150
+ * save can never silently flip the deploy-time facts. A convention whose value is undefined (a
151
+ * collapsed multi-position toggle, off) is dropped, so the committed block carries only the on
152
+ * toggles, the same shape `resolveTidyConventions` fills the defaults back from on read.
153
+ */
154
+ export declare function setTidy(raw: string, conventions: Partial<TidyConventions>): string;