@glw907/cairn-cms 0.59.0 → 0.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/CHANGELOG.md +47 -0
  2. package/dist/components/CairnAdmin.svelte +3 -0
  3. package/dist/components/CairnTidySettings.svelte +553 -0
  4. package/dist/components/CairnTidySettings.svelte.d.ts +32 -0
  5. package/dist/components/EditPage.svelte +371 -2
  6. package/dist/components/MarkdownEditor.svelte +168 -1
  7. package/dist/components/MarkdownEditor.svelte.d.ts +44 -0
  8. package/dist/components/TidyReview.svelte +463 -0
  9. package/dist/components/TidyReview.svelte.d.ts +47 -0
  10. package/dist/components/cairn-admin.css +764 -0
  11. package/dist/components/editor-tidy.d.ts +31 -0
  12. package/dist/components/editor-tidy.js +199 -0
  13. package/dist/components/index.d.ts +1 -0
  14. package/dist/components/index.js +1 -0
  15. package/dist/components/markdown-directives.d.ts +16 -0
  16. package/dist/components/markdown-directives.js +34 -0
  17. package/dist/components/objective-errors.d.ts +30 -0
  18. package/dist/components/objective-errors.js +113 -0
  19. package/dist/components/spellcheck-assets/dictionary-en-us.txt +104743 -0
  20. package/dist/components/spellcheck-assets/spellchecker-wasm-LICENSE.txt +21 -0
  21. package/dist/components/spellcheck-assets/spellchecker-wasm.wasm +0 -0
  22. package/dist/components/spellcheck-worker.d.ts +80 -0
  23. package/dist/components/spellcheck-worker.js +161 -0
  24. package/dist/components/spellcheck.d.ts +146 -0
  25. package/dist/components/spellcheck.js +541 -0
  26. package/dist/components/tidy-categorize.d.ts +67 -0
  27. package/dist/components/tidy-categorize.js +392 -0
  28. package/dist/components/tidy-diff.d.ts +60 -0
  29. package/dist/components/tidy-diff.js +147 -0
  30. package/dist/components/tidy-validate.d.ts +37 -0
  31. package/dist/components/tidy-validate.js +174 -0
  32. package/dist/content/compose.d.ts +1 -1
  33. package/dist/content/compose.js +11 -0
  34. package/dist/content/site-dictionary.d.ts +31 -0
  35. package/dist/content/site-dictionary.js +82 -0
  36. package/dist/content/types.d.ts +25 -0
  37. package/dist/doctor/checks-local.d.ts +1 -0
  38. package/dist/doctor/checks-local.js +55 -6
  39. package/dist/doctor/index.js +2 -1
  40. package/dist/log/events.d.ts +1 -1
  41. package/dist/nav/site-config.d.ts +98 -0
  42. package/dist/nav/site-config.js +132 -0
  43. package/dist/sveltekit/admin-dispatch.d.ts +2 -0
  44. package/dist/sveltekit/admin-dispatch.js +6 -2
  45. package/dist/sveltekit/cairn-admin.d.ts +13 -1
  46. package/dist/sveltekit/cairn-admin.js +22 -3
  47. package/dist/sveltekit/content-routes.d.ts +135 -1
  48. package/dist/sveltekit/content-routes.js +351 -3
  49. package/dist/sveltekit/tidy-prompt.d.ts +11 -0
  50. package/dist/sveltekit/tidy-prompt.js +118 -0
  51. package/package.json +10 -1
  52. package/src/lib/components/CairnAdmin.svelte +3 -0
  53. package/src/lib/components/CairnTidySettings.svelte +553 -0
  54. package/src/lib/components/EditPage.svelte +371 -2
  55. package/src/lib/components/MarkdownEditor.svelte +168 -1
  56. package/src/lib/components/TidyReview.svelte +463 -0
  57. package/src/lib/components/cairn-admin.css +25 -0
  58. package/src/lib/components/editor-tidy.ts +241 -0
  59. package/src/lib/components/index.ts +1 -0
  60. package/src/lib/components/markdown-directives.ts +35 -0
  61. package/src/lib/components/objective-errors.ts +155 -0
  62. package/src/lib/components/spellcheck-assets/dictionary-en-us.txt +104743 -0
  63. package/src/lib/components/spellcheck-assets/spellchecker-wasm-LICENSE.txt +21 -0
  64. package/src/lib/components/spellcheck-assets/spellchecker-wasm.wasm +0 -0
  65. package/src/lib/components/spellcheck-worker.ts +279 -0
  66. package/src/lib/components/spellcheck.ts +679 -0
  67. package/src/lib/components/tidy-categorize.ts +460 -0
  68. package/src/lib/components/tidy-diff.ts +196 -0
  69. package/src/lib/components/tidy-validate.ts +202 -0
  70. package/src/lib/content/compose.ts +11 -1
  71. package/src/lib/content/site-dictionary.ts +84 -0
  72. package/src/lib/content/types.ts +25 -0
  73. package/src/lib/doctor/checks-local.ts +59 -5
  74. package/src/lib/doctor/index.ts +2 -0
  75. package/src/lib/log/events.ts +7 -1
  76. package/src/lib/nav/site-config.ts +197 -0
  77. package/src/lib/sveltekit/admin-dispatch.ts +7 -3
  78. package/src/lib/sveltekit/cairn-admin.ts +32 -4
  79. package/src/lib/sveltekit/content-routes.ts +504 -4
  80. package/src/lib/sveltekit/tidy-prompt.ts +153 -0
@@ -0,0 +1,202 @@
1
+ // The tidy output validation: the safety backstop that proves a tidy result is a proofread and not
2
+ // a restructure (spec 2.6) or a successful prompt injection (spec 2.3.3). A pure module taking the
3
+ // captured original and the model's corrected string and returning either the validated change set
4
+ // (the Task 12 diff) or a typed rejection reason. A rejected result is discarded by the caller with
5
+ // an honest message and the document is left untouched; nothing here mutates the buffer.
6
+ //
7
+ // Four of the five checks are EXACT and are the real structural backstop: the directive structure,
8
+ // the heading count and levels, the fenced-code-block count, the byte-for-byte frontmatter, the
9
+ // media-hash multiset, and every code span and fenced block. The fifth, the divergence bound, is
10
+ // the only fuzzy one, and it is a rewrite/injection backstop only, never a voice safeguard. The
11
+ // config-driven prompt is what protects voice.
12
+
13
+ import { unified } from 'unified';
14
+ import remarkParse from 'remark-parse';
15
+ import remarkGfm from 'remark-gfm';
16
+ import { visit } from 'unist-util-visit';
17
+ import { fenceScan, frontmatterSpan } from './markdown-directives.js';
18
+ import { parseMediaToken } from '../media/reference.js';
19
+ import { diffTokens, diffChanges } from './tidy-diff.js';
20
+ import type { Change } from './tidy-diff.js';
21
+
22
+ /** The reason a tidy result was rejected. Task 14 branches on this; every value maps to the one
23
+ * honest author-facing message, so the reason is for logging and tests, not the user surface.
24
+ * - `structure`: a directive opener/closer sequence, a heading count or level, or a fenced-code
25
+ * count diverged (the result restructured the document).
26
+ * - `frontmatter`: the frontmatter block is not byte-for-byte equal.
27
+ * - `media`: the multiset of `media:` hashes differs (a hash was altered, dropped, or invented).
28
+ * - `code`: a code span or fenced code block was edited.
29
+ * - `divergence`: the changed-token amount exceeds the length-aware bound (a wholesale rewrite). */
30
+ export type TidyRejectionReason = 'structure' | 'frontmatter' | 'media' | 'code' | 'divergence';
31
+
32
+ /** The honest author-facing message a rejection maps to. The same message for every reason, by
33
+ * design: an author does not need the validator's internal taxonomy, only that the result was
34
+ * discarded and their text is safe. */
35
+ export const TIDY_REJECTION_MESSAGE =
36
+ 'Tidy returned a result that changed more than the wording, so it was discarded. Your text is unchanged.';
37
+
38
+ /** The outcome of validating a tidy result. On success it carries the Task 12 change set the review
39
+ * surface accepts and rejects against; on failure it carries the typed reason and the message. */
40
+ export type TidyValidation =
41
+ | { ok: true; changes: Change[] }
42
+ | { ok: false; reason: TidyRejectionReason; message: string };
43
+
44
+ // The divergence bound. The floor allows a fixed number of changed tokens regardless of fraction so
45
+ // a legitimate heavy proofread of a SHORT input is not penalized: a short paragraph with a typo in
46
+ // nearly every word is a real proofread, not a rewrite. The fraction catches a wholesale rewrite of
47
+ // a LONG input, where a large absolute count is past any honest copy-edit. A result is rejected only
48
+ // when it exceeds BOTH the floor and the fraction, so a short input rides the floor and a long input
49
+ // rides the fraction. The values are deliberate: 60 tokens of change covers a dense proofread of a
50
+ // few short paragraphs, and 0.5 of the total tokens marks the point where more than half the text
51
+ // changed, which no proofread does but a rewrite or a successful injection always does.
52
+ const DIVERGENCE_TOKEN_FLOOR = 60;
53
+ const DIVERGENCE_FRACTION = 0.5;
54
+
55
+ // Every `media:` token anywhere in the text, hash and slug forms alike. The validator scans the raw
56
+ // text rather than going through extractMediaRefs for two reasons. First, a true MULTISET is the
57
+ // invariant a backstop wants: extractMediaRefs dedups by hash, so a doubled token collapsing to one
58
+ // would read as equal, and the validator must catch a dropped duplicate. Second, the raw scan covers
59
+ // the whole text including frontmatter without threading the concept's FrontmatterField[] to the call
60
+ // site, which the validator otherwise has no reason to know. A token mangled inside a code fence is
61
+ // caught here too, redundantly with the code check, which is the right posture for a backstop.
62
+ const MEDIA_TOKEN = /media:[A-Za-z0-9.-]+/g;
63
+
64
+ /** The sorted multiset of valid media hashes in the text. Each `media:` occurrence is parsed; a
65
+ * malformed token (a broken hash, an illegal slug) parses to null and is dropped, so a tidy that
66
+ * CORRUPTED a hash drops it from the multiset and the comparison fails. Sorted so two multisets
67
+ * compare by value, order-independent. */
68
+ function mediaHashes(text: string): string[] {
69
+ const hashes: string[] = [];
70
+ for (const m of text.matchAll(MEDIA_TOKEN)) {
71
+ const ref = parseMediaToken(m[0]);
72
+ if (ref) hashes.push(ref.hash);
73
+ }
74
+ return hashes.sort();
75
+ }
76
+
77
+ /** The directive structure signature: each opener or closer in document order, paired with the depth
78
+ * the fence scan assigned it. Two texts share a directive structure when these signatures are equal,
79
+ * so an added, removed, or relevelled container fails the comparison. A fence-shaped line inside a
80
+ * code block is already disowned by the scan (its role is null), so a documented `:::` example does
81
+ * not enter the signature. */
82
+ function directiveSignature(text: string): string {
83
+ const { depths, roles } = fenceScan(text.split('\n'));
84
+ const parts: string[] = [];
85
+ for (let i = 0; i < roles.length; i++) {
86
+ if (roles[i] !== null) parts.push(`${roles[i]}@${depths[i]}`);
87
+ }
88
+ return parts.join(',');
89
+ }
90
+
91
+ /** The heading signature: every ATX heading's level in document order. Parsed as mdast so a `#`
92
+ * inside a code block or an escaped one is never counted, and the level is the parser's own depth.
93
+ * Two texts share a heading structure when these are equal, so an added, removed, or relevelled
94
+ * heading fails the comparison. */
95
+ function headingSignature(text: string): string {
96
+ const tree = unified().use(remarkParse).use(remarkGfm).parse(text);
97
+ const levels: number[] = [];
98
+ visit(tree, 'heading', (node: { depth?: number }) => {
99
+ if (typeof node.depth === 'number') levels.push(node.depth);
100
+ });
101
+ return levels.join(',');
102
+ }
103
+
104
+ /** Every code span and fenced or indented code block in the text, as a sorted multiset of values.
105
+ * Parsed as mdast so the comparison sees exactly what the parser treats as code, the same authority
106
+ * the media body scan uses. Sorted so the comparison is order-independent: the divergence and
107
+ * structure checks own ordering, this check owns the contents. A `code` node is a block, an
108
+ * `inlineCode` node is a span. */
109
+ function codeContents(text: string): string[] {
110
+ const tree = unified().use(remarkParse).use(remarkGfm).parse(text);
111
+ const values: string[] = [];
112
+ visit(tree, (node: { type: string; value?: string }) => {
113
+ if ((node.type === 'code' || node.type === 'inlineCode') && typeof node.value === 'string') {
114
+ values.push(`${node.type}:${node.value}`);
115
+ }
116
+ });
117
+ return values.sort();
118
+ }
119
+
120
+ /** True when two string multisets are equal: same length and same sorted contents. */
121
+ function multisetEqual(a: string[], b: string[]): boolean {
122
+ if (a.length !== b.length) return false;
123
+ for (let i = 0; i < a.length; i++) {
124
+ if (a[i] !== b[i]) return false;
125
+ }
126
+ return true;
127
+ }
128
+
129
+ // The changed token amount: the count of tokens the diff marked inserted or deleted, against the
130
+ // total tokens in the original. An equal run contributes nothing; an inserted or deleted run counts
131
+ // its own tokens. This is the rewrite measure, deliberately coarse, since the structure/token/code
132
+ // checks are the exact backstop and this only catches a wholesale rewrite that slipped past them.
133
+ function divergence(original: string, corrected: string): { changed: number; total: number } {
134
+ const runs = diffTokens(original, corrected);
135
+ // Count tokens by splitting each run's text on the same word/non-word boundary the diff uses; a
136
+ // run's token count is its number of word-or-nonword matches. The original's total is the equal
137
+ // plus deleted token count.
138
+ const countTokens = (s: string) => (s.match(/[A-Za-z0-9_]+(?:['’][A-Za-z0-9_]+)*|[^A-Za-z0-9_]+/g) ?? []).length;
139
+ let changed = 0;
140
+ let total = 0;
141
+ for (const run of runs) {
142
+ const tokens = countTokens(run.text);
143
+ if (run.kind === 'inserted' || run.kind === 'deleted') changed += tokens;
144
+ if (run.kind === 'equal' || run.kind === 'deleted') total += tokens;
145
+ }
146
+ return { changed, total };
147
+ }
148
+
149
+ /**
150
+ * Validate a tidy result against the captured original. Runs the exact structural checks first (a
151
+ * restructure or a token or code edit is a hard reject regardless of how little else changed), then
152
+ * the length-aware divergence bound. On success returns the Task 12 change set for the review
153
+ * surface; on failure returns the typed reason and the one honest message.
154
+ *
155
+ * The checks, in order: the directive opener/closer sequence and depths, the ATX heading count and
156
+ * levels, the fenced-code-block count (folded into the code-contents multiset), the byte-for-byte
157
+ * frontmatter via the shared frontmatterSpan helper, the media-hash multiset, the code-span and
158
+ * code-block contents, and finally the divergence bound. A pure function: it reads the two strings
159
+ * and nothing else, and it never mutates the buffer.
160
+ */
161
+ export function validateTidy(original: string, corrected: string): TidyValidation {
162
+ // Directive structure: the opener/closer sequence and depths must match exactly.
163
+ if (directiveSignature(original) !== directiveSignature(corrected)) {
164
+ return { ok: false, reason: 'structure', message: TIDY_REJECTION_MESSAGE };
165
+ }
166
+
167
+ // Headings: the same ATX headings at the same levels, in order.
168
+ if (headingSignature(original) !== headingSignature(corrected)) {
169
+ return { ok: false, reason: 'structure', message: TIDY_REJECTION_MESSAGE };
170
+ }
171
+
172
+ // Frontmatter: byte-for-byte equal, via the same helper the spellcheck skip uses. A null span
173
+ // (no frontmatter) on both sides slices to the empty string on both, so a body-only document
174
+ // passes; a span on one side and not the other diverges.
175
+ const fmOriginal = frontmatterSpan(original);
176
+ const fmCorrected = frontmatterSpan(corrected);
177
+ const fmTextOriginal = fmOriginal ? original.slice(fmOriginal.from, fmOriginal.to) : '';
178
+ const fmTextCorrected = fmCorrected ? corrected.slice(fmCorrected.from, fmCorrected.to) : '';
179
+ if (fmTextOriginal !== fmTextCorrected) {
180
+ return { ok: false, reason: 'frontmatter', message: TIDY_REJECTION_MESSAGE };
181
+ }
182
+
183
+ // Media: the exact same multiset of hashes across the whole text.
184
+ if (!multisetEqual(mediaHashes(original), mediaHashes(corrected))) {
185
+ return { ok: false, reason: 'media', message: TIDY_REJECTION_MESSAGE };
186
+ }
187
+
188
+ // Code: every code span and fenced or indented block identical. The block count is folded in
189
+ // here: a multiset of block-and-span values that differs in count or contents fails.
190
+ if (!multisetEqual(codeContents(original), codeContents(corrected))) {
191
+ return { ok: false, reason: 'code', message: TIDY_REJECTION_MESSAGE };
192
+ }
193
+
194
+ // Divergence: rejected only when the changed amount exceeds BOTH the absolute floor and the
195
+ // fraction of the total. A short input rides the floor; a long input rides the fraction.
196
+ const { changed, total } = divergence(original, corrected);
197
+ if (changed > DIVERGENCE_TOKEN_FLOOR && changed > total * DIVERGENCE_FRACTION) {
198
+ return { ok: false, reason: 'divergence', message: TIDY_REJECTION_MESSAGE };
199
+ }
200
+
201
+ return { ok: true, changes: diffChanges(original, corrected) };
202
+ }
@@ -6,7 +6,7 @@
6
6
  import type { AdminPanel, CairnAdapter, CairnExtension, CairnRuntime, ConceptConfig, FieldTypeDef } from './types.js';
7
7
  import { resolveConcepts } from './concepts.js';
8
8
  import { normalizeAssets } from '../media/config.js';
9
- import type { SiteConfig } from '../nav/site-config.js';
9
+ import { dictionaryFileForDialect, type SiteConfig } from '../nav/site-config.js';
10
10
 
11
11
  /** The input to {@link composeRuntime}. `siteConfig` is required so the per-concept URL policy is
12
12
  * always derived from one source and can never be silently dropped. `extensions` fold in after the
@@ -49,6 +49,16 @@ export function composeRuntime({ adapter, siteConfig, extensions = [] }: Compose
49
49
  assets: adapter.assets,
50
50
  resolvedAssets: normalizeAssets(adapter.assets),
51
51
  mediaManifestPath: adapter.mediaManifestPath ?? 'src/content/.cairn/media.json',
52
+ // The personal dictionary sits beside the manifests under the same `.cairn/` content root, so the
53
+ // spec's `content/.cairn/dictionary.txt` resolves the same configurable way the manifest paths do.
54
+ dictionaryPath: adapter.dictionaryPath ?? 'src/content/.cairn/dictionary.txt',
55
+ // The spellcheck dictionary is resolved once here from the site config's dialect (default US),
56
+ // so the runtime and the editor never re-derive it. The site config is the one home for the
57
+ // dialect; the editor resolves this filename to a real asset URL on the main thread.
58
+ spellcheckDictionary: dictionaryFileForDialect(siteConfig.spellcheck?.dialect),
59
+ // The tidy block passes through from the site config; the tidy action reads enabled/model at call
60
+ // time and builds its prompt from conventions. Absent means tidy is off.
61
+ tidy: siteConfig.tidy,
52
62
  adminPanels,
53
63
  fieldTypes,
54
64
  };
@@ -0,0 +1,84 @@
1
+ // cairn-cms: the git-committed per-site personal dictionary (spec 1.6). One word per line,
2
+ // sorted, with comment lines (starting with #) and blank lines tolerated on read. This module is
3
+ // pure: it parses the committed file text, inserts words in sorted order, and serializes the
4
+ // canonical form. The insert is order-independent, so the action's commit-and-retry can re-merge
5
+ // the pending additions at a new head and reach the same sorted set regardless of insertion order.
6
+ //
7
+ // The canonical serialization keeps a single leading header comment and one sorted word per line.
8
+ // An inbound file's other comment lines are dropped on serialize (the header is regenerated), so the
9
+ // committed file stays a clean, diffable, sorted word list; a maintainer who wants a richer comment
10
+ // edits it in git, and the next add through here normalizes it back to the header.
11
+
12
+ /** The header comment the canonical serialization writes above the sorted words. */
13
+ const HEADER = '# cairn personal dictionary: one word per line, sorted, kept in git.';
14
+
15
+ // A dictionary word: a single line carrying no whitespace and no ASCII control characters, so it can
16
+ // never inject an extra line into the committed file. Hyphens and apostrophes are allowed, since real
17
+ // words carry them ("well-known", "O'Brien"); a non-ASCII surname or place name validates too, since
18
+ // the test is for whitespace and control bytes rather than an allow-list of letters. The action runs
19
+ // inbound words through this before a merge.
20
+ const WORD_RE = /^[^\s\p{Cc}]+$/u;
21
+
22
+ /** True when a word is a single valid dictionary line (no whitespace, no control characters, non-empty
23
+ * and within the length bound). A leading "#" is rejected: parseDictionary re-reads such a line as a
24
+ * comment, so committing it would silently drop the word on the next read. The action uses this to
25
+ * reject untrusted input before the merge, so a newline or a control byte can never inject an extra
26
+ * line into the committed file. */
27
+ export function isValidDictionaryWord(word: string, maxLength = 64): boolean {
28
+ if (word.startsWith('#')) return false;
29
+ return word.length > 0 && word.length <= maxLength && WORD_RE.test(word);
30
+ }
31
+
32
+ /**
33
+ * Parse the committed dictionary file text into its word list. Comment lines (a `#` after optional
34
+ * leading whitespace) and blank lines are dropped; every other line is trimmed and kept. A null or
35
+ * empty file yields an empty list. The result preserves the file's order and is not deduplicated or
36
+ * sorted here, so a caller can see exactly what the file held; `mergeDictionaryWords` is the path that
37
+ * normalizes to the sorted, deduplicated set.
38
+ */
39
+ export function parseDictionary(text: string | null): string[] {
40
+ if (!text) return [];
41
+ const words: string[] = [];
42
+ for (const line of text.split('\n')) {
43
+ const trimmed = line.trim();
44
+ if (trimmed === '' || trimmed.startsWith('#')) continue;
45
+ words.push(trimmed);
46
+ }
47
+ return words;
48
+ }
49
+
50
+ /** Case-insensitive, locale-stable comparator for the canonical sort. Words are compared lowercased
51
+ * so "Cairn" and "cairn" collapse to one entry, the same case-folding the Worker's merged set uses. */
52
+ function byWord(a: string, b: string): number {
53
+ return a.toLowerCase().localeCompare(b.toLowerCase());
54
+ }
55
+
56
+ /**
57
+ * Merge `additions` into the `existing` word list, returning the canonical sorted, deduplicated set.
58
+ * The merge is case-insensitive (a duplicate add of an existing word, in any case, collapses) and
59
+ * order-independent: the inputs are unioned by lowercased key and sorted, so re-merging the same
60
+ * additions at a moved head produces the same set. The first-seen casing of each word wins, so an
61
+ * existing "Cairn" is kept over a later "cairn". Invalid additions (whitespace, control characters,
62
+ * empty) are skipped here as a backstop; the action validates before this is reached.
63
+ */
64
+ export function mergeDictionaryWords(existing: readonly string[], additions: readonly string[]): string[] {
65
+ const byKey = new Map<string, string>();
66
+ for (const word of [...existing, ...additions]) {
67
+ if (!isValidDictionaryWord(word)) continue;
68
+ const key = word.toLowerCase();
69
+ if (!byKey.has(key)) byKey.set(key, word);
70
+ }
71
+ return [...byKey.values()].sort(byWord);
72
+ }
73
+
74
+ /**
75
+ * Serialize a word list to the canonical committed file text: the header comment, then one word per
76
+ * line sorted case-insensitively, with a trailing newline. The input is run through the same dedup
77
+ * and sort as the merge, so serializing an unsorted or duplicate-bearing list still yields the
78
+ * canonical form. An empty word list serializes to just the header (so the file stays a valid,
79
+ * recognizable dictionary rather than vanishing).
80
+ */
81
+ export function serializeDictionary(words: readonly string[]): string {
82
+ const sorted = mergeDictionaryWords(words, []);
83
+ return [HEADER, ...sorted].join('\n') + '\n';
84
+ }
@@ -264,6 +264,11 @@ export interface CairnAdapter {
264
264
  /** Repo-relative path to the committed media manifest. Defaults to src/content/.cairn/media.json,
265
265
  * applied in composeRuntime. Sits outside any concept directory, like the content manifest. */
266
266
  mediaManifestPath?: string;
267
+ /** Repo-relative path to the committed personal dictionary file. Defaults to
268
+ * src/content/.cairn/dictionary.txt, applied in composeRuntime: the same `.cairn/` content root the
269
+ * manifests use, so the spec's `content/.cairn/dictionary.txt` resolves the same configurable way the
270
+ * manifest paths do. One word per line, sorted, comment lines allowed (see site-dictionary.ts). */
271
+ dictionaryPath?: string;
267
272
  /** Directive component registry; the renderer and the future palette derive from it (seam 3). */
268
273
  registry?: ComponentRegistry;
269
274
  /** The site's glyph name to SVG path-data map, for the admin icon picker and the renderer. */
@@ -380,6 +385,13 @@ export interface CairnRuntime {
380
385
  manifestPath: string;
381
386
  /** The repo-relative path to the committed media manifest, defaulted in composeRuntime. */
382
387
  mediaManifestPath: string;
388
+ /** The repo-relative path to the committed personal dictionary file (one word per line, sorted),
389
+ * defaulted in composeRuntime to src/content/.cairn/dictionary.txt: the same `.cairn/` content root
390
+ * the manifests use. The edit load reads it and threads its words onto EditData; the
391
+ * addDictionaryWord action reads, merges, and commits it. Optional on the runtime so a hand-built
392
+ * runtime need not set it; composeRuntime always fills it, and the edit load and the action default
393
+ * a missing value to the same content-root path. */
394
+ dictionaryPath?: string;
383
395
  /** The adapter's asset config resolved once at compose: `{ enabled: false }` for a no-media site,
384
396
  * otherwise the filled config the upload, storage, delivery, and resolver paths read. */
385
397
  resolvedAssets: import('../media/config.js').ResolvedAssetConfig;
@@ -390,6 +402,19 @@ export interface CairnRuntime {
390
402
  /** The live site's content styling for the preview frame; passed through from the adapter. */
391
403
  preview?: PreviewConfig;
392
404
  assets?: AssetConfig;
405
+ /** The editor's spellcheck dictionary file, resolved once at compose from the site config's
406
+ * `spellcheck.dialect` (defaulting to US English). The edit load threads it onto EditData and the
407
+ * editor resolves it to a real asset URL on the main thread, so the Worker receives the URL and
408
+ * never reads config. Just the filename, e.g. "dictionary-en-us.txt". Optional on the runtime so a
409
+ * hand-built runtime need not set it; composeRuntime always fills it, and the edit load defaults a
410
+ * missing value to the US English dictionary. */
411
+ spellcheckDictionary?: string;
412
+ /** The editor tidy (LLM copy-edit) settings, passed through from the site config. Optional on the
413
+ * runtime so a hand-built runtime need not set it; composeRuntime threads it from
414
+ * `siteConfig.tidy`. The tidy action reads `enabled` and `model` at call time, and builds its prompt
415
+ * from `conventions`. Absent (or `enabled` false) means tidy is off, and the action refuses with a
416
+ * fail(503) before any model call. */
417
+ tidy?: import('../nav/site-config.js').TidyConfig;
393
418
  /** Admin panels contributed by extensions (Mode 2). Empty until Plan 09 wires the dispatch route. */
394
419
  adminPanels?: AdminPanel[];
395
420
  /** Field types contributed by extensions (Mode 2). Empty until Plan 09 wires the form dispatch. */
@@ -6,6 +6,7 @@ import type { CheckResult, DoctorCheck, DoctorContext } from './types.js';
6
6
  import { readWranglerConfig } from './wrangler-config.js';
7
7
  import { requireOrigin } from '../env.js';
8
8
  import { parseSiteConfig, urlPolicyFrom } from '../nav/site-config.js';
9
+ import type { SiteConfig } from '../nav/site-config.js';
9
10
  import { normalizeConcepts } from '../content/concepts.js';
10
11
  import { defineFields } from '../content/schema.js';
11
12
  import type { ConceptConfig } from '../content/types.js';
@@ -138,16 +139,21 @@ export const configPublicOrigin: DoctorCheck = {
138
139
  // src locations the production sites use).
139
140
  const SITE_CONFIG_PATHS = ['site.config.yaml', 'src/lib/site.config.yaml', 'src/site.config.yaml'];
140
141
 
142
+ // Read the first site.config.yaml that exists in a conventional spot, or null when none does.
143
+ async function readSiteConfigText(ctx: DoctorContext): Promise<string | null> {
144
+ for (const path of SITE_CONFIG_PATHS) {
145
+ const text = await ctx.readFile(path);
146
+ if (text !== null) return text;
147
+ }
148
+ return null;
149
+ }
150
+
141
151
  export const configSiteConfig: DoctorCheck = {
142
152
  id: 'config.site-config',
143
153
  conditionId: 'config.site-config-invalid',
144
154
  title: 'Site config',
145
155
  async run(ctx: DoctorContext): Promise<CheckResult> {
146
- let text: string | null = null;
147
- for (const path of SITE_CONFIG_PATHS) {
148
- text = await ctx.readFile(path);
149
- if (text !== null) break;
150
- }
156
+ const text = await readSiteConfigText(ctx);
151
157
  if (text === null) return skip(`no site.config.yaml found (looked in ${SITE_CONFIG_PATHS.join(', ')})`);
152
158
  try {
153
159
  const policy = urlPolicyFrom(parseSiteConfig(text));
@@ -165,3 +171,51 @@ export const configSiteConfig: DoctorCheck = {
165
171
  }
166
172
  },
167
173
  };
174
+
175
+ // A site enables tidy with `tidy.enabled: true` in the committed config; ignore a config the rest of
176
+ // the doctor reports through configSiteConfig, so a parse error here just skips rather than doubling
177
+ // the failure.
178
+ function tidyEnabled(text: string): boolean {
179
+ let config: SiteConfig;
180
+ try {
181
+ config = parseSiteConfig(text);
182
+ } catch {
183
+ return false;
184
+ }
185
+ return config.tidy?.enabled === true;
186
+ }
187
+
188
+ // The Anthropic key is a Worker secret, so the doctor cannot prove it is unset (it is in neither the
189
+ // committed wrangler config nor anything readFile reaches). It CAN read the two spots a key would also
190
+ // appear if set as a plain var: the wrangler config text and .dev.vars. A bare presence-by-name read
191
+ // is enough for the heuristic; the runtime fail(503) and --probe are the real truth checks.
192
+ function keyAppearsIn(text: string | null): boolean {
193
+ return text !== null && text.includes('ANTHROPIC_API_KEY');
194
+ }
195
+
196
+ // The tidy secret heuristic. It reuses the config.bindings-missing condition rather than registering a
197
+ // new one, so the readiness count holds (the same pattern configMediaBucket uses). A warn here is not a
198
+ // definitive unset claim: it asks the operator to verify the secret, since a wrangler secret is
199
+ // invisible to the CLI.
200
+ export const configTidyKey: DoctorCheck = {
201
+ id: 'config.tidy-key',
202
+ conditionId: 'config.bindings-missing',
203
+ title: 'Tidy API key',
204
+ async run(ctx: DoctorContext): Promise<CheckResult> {
205
+ const text = await readSiteConfigText(ctx);
206
+ if (text === null) return skip('no site.config.yaml found, so tidy enablement is unknown');
207
+ if (!tidyEnabled(text)) return skip('tidy is not enabled in the site config');
208
+ const wrangler =
209
+ (await ctx.readFile('wrangler.jsonc')) ?? (await ctx.readFile('wrangler.toml'));
210
+ if (keyAppearsIn(wrangler)) {
211
+ return pass('ANTHROPIC_API_KEY appears in the wrangler vars (verify it is the real key, not a placeholder)');
212
+ }
213
+ const devVars = await ctx.readFile('.dev.vars');
214
+ if (keyAppearsIn(devVars)) {
215
+ return pass('ANTHROPIC_API_KEY appears in .dev.vars (the local override; verify the Worker secret is set for production)');
216
+ }
217
+ return fail(
218
+ 'tidy is enabled but ANTHROPIC_API_KEY is in neither the wrangler vars nor .dev.vars; verify the secret is configured with wrangler secret put ANTHROPIC_API_KEY'
219
+ );
220
+ },
221
+ };
@@ -9,6 +9,7 @@ import {
9
9
  configCsrfDisable,
10
10
  configSiteConfig,
11
11
  configPublicOrigin,
12
+ configTidyKey,
12
13
  } from './checks-local.js';
13
14
  import { configDependencyFloors } from './check-floors.js';
14
15
  import { emailSenderOnboarded, edgeHttpsForced, edgeHsts, authStore } from './checks-cloudflare.js';
@@ -162,6 +163,7 @@ export function defaultChecks(): DoctorCheck[] {
162
163
  configCsrfDisable,
163
164
  configSiteConfig,
164
165
  configPublicOrigin,
166
+ configTidyKey,
165
167
  configDependencyFloors,
166
168
  emailSenderOnboarded,
167
169
  edgeHttpsForced,
@@ -27,4 +27,10 @@ export type CairnLogEvent =
27
27
  | 'media.orphans_purged'
28
28
  | 'media.replaced'
29
29
  | 'media.replace_blocked'
30
- | 'media.alt_propagated';
30
+ | 'media.alt_propagated'
31
+ | 'dictionary.added'
32
+ | 'dictionary.add_conflict'
33
+ | 'tidy.done'
34
+ | 'tidy.error'
35
+ | 'tidy.refused'
36
+ | 'tidy.empty';