@glw907/cairn-cms 0.59.0 → 0.60.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +60 -0
- package/dist/components/AdminLayout.svelte +130 -229
- package/dist/components/CairnAdmin.svelte +12 -41
- package/dist/components/CairnLogo.svelte +1 -6
- package/dist/components/CairnMediaLibrary.svelte +821 -1210
- package/dist/components/CairnTidySettings.svelte +486 -0
- package/dist/components/CairnTidySettings.svelte.d.ts +32 -0
- package/dist/components/ComponentForm.svelte +110 -185
- package/dist/components/ComponentInsertDialog.svelte +163 -283
- package/dist/components/ConceptList.svelte +111 -191
- package/dist/components/ConfirmPage.svelte +5 -12
- package/dist/components/CsrfField.svelte +5 -11
- package/dist/components/DeleteDialog.svelte +15 -42
- package/dist/components/EditPage.svelte +786 -918
- package/dist/components/EditorToolbar.svelte +108 -170
- package/dist/components/IconPicker.svelte +23 -53
- package/dist/components/LinkPicker.svelte +34 -58
- package/dist/components/LoginPage.svelte +14 -27
- package/dist/components/ManageEditors.svelte +3 -15
- package/dist/components/MarkdownEditor.svelte +688 -789
- package/dist/components/MarkdownEditor.svelte.d.ts +44 -0
- package/dist/components/MarkdownHelpDialog.svelte +8 -12
- package/dist/components/MediaCaptureCard.svelte +18 -57
- package/dist/components/MediaFigureControl.svelte +32 -71
- package/dist/components/MediaHeroField.svelte +210 -329
- package/dist/components/MediaInsertPopover.svelte +156 -283
- package/dist/components/MediaPicker.svelte +67 -131
- package/dist/components/NavTree.svelte +46 -78
- package/dist/components/RenameDialog.svelte +16 -43
- package/dist/components/ShortcutsDialog.svelte +9 -13
- package/dist/components/ShortcutsGrid.svelte +1 -2
- package/dist/components/TidyReview.svelte +355 -0
- package/dist/components/TidyReview.svelte.d.ts +47 -0
- package/dist/components/WebLinkDialog.svelte +19 -40
- package/dist/components/cairn-admin.css +768 -0
- package/dist/components/editor-tidy.d.ts +31 -0
- package/dist/components/editor-tidy.js +199 -0
- package/dist/components/index.d.ts +1 -0
- package/dist/components/index.js +1 -0
- package/dist/components/markdown-directives.d.ts +16 -0
- package/dist/components/markdown-directives.js +34 -0
- package/dist/components/objective-errors.d.ts +30 -0
- package/dist/components/objective-errors.js +113 -0
- package/dist/components/spellcheck-assets/dictionary-en-us.txt +104743 -0
- package/dist/components/spellcheck-assets/spellchecker-wasm-LICENSE.txt +21 -0
- package/dist/components/spellcheck-assets/spellchecker-wasm.wasm +0 -0
- package/dist/components/spellcheck-worker.d.ts +80 -0
- package/dist/components/spellcheck-worker.js +161 -0
- package/dist/components/spellcheck.d.ts +148 -0
- package/dist/components/spellcheck.js +553 -0
- package/dist/components/tidy-categorize.d.ts +67 -0
- package/dist/components/tidy-categorize.js +392 -0
- package/dist/components/tidy-diff.d.ts +60 -0
- package/dist/components/tidy-diff.js +147 -0
- package/dist/components/tidy-validate.d.ts +37 -0
- package/dist/components/tidy-validate.js +174 -0
- package/dist/content/compose.d.ts +1 -1
- package/dist/content/compose.js +11 -0
- package/dist/content/site-dictionary.d.ts +31 -0
- package/dist/content/site-dictionary.js +82 -0
- package/dist/content/types.d.ts +25 -0
- package/dist/delivery/CairnHead.svelte +8 -11
- package/dist/doctor/checks-local.d.ts +1 -0
- package/dist/doctor/checks-local.js +55 -6
- package/dist/doctor/index.js +2 -1
- package/dist/log/events.d.ts +1 -1
- package/dist/nav/site-config.d.ts +98 -0
- package/dist/nav/site-config.js +132 -0
- package/dist/sveltekit/admin-dispatch.d.ts +2 -0
- package/dist/sveltekit/admin-dispatch.js +6 -2
- package/dist/sveltekit/cairn-admin.d.ts +13 -1
- package/dist/sveltekit/cairn-admin.js +22 -3
- package/dist/sveltekit/content-routes.d.ts +135 -1
- package/dist/sveltekit/content-routes.js +351 -3
- package/dist/sveltekit/tidy-prompt.d.ts +11 -0
- package/dist/sveltekit/tidy-prompt.js +118 -0
- package/package.json +11 -2
- package/src/lib/components/CairnAdmin.svelte +3 -0
- package/src/lib/components/CairnTidySettings.svelte +553 -0
- package/src/lib/components/EditPage.svelte +371 -2
- package/src/lib/components/MarkdownEditor.svelte +168 -1
- package/src/lib/components/TidyReview.svelte +463 -0
- package/src/lib/components/cairn-admin.css +25 -0
- package/src/lib/components/editor-tidy.ts +241 -0
- package/src/lib/components/index.ts +1 -0
- package/src/lib/components/markdown-directives.ts +35 -0
- package/src/lib/components/objective-errors.ts +155 -0
- package/src/lib/components/spellcheck-assets/dictionary-en-us.txt +104743 -0
- package/src/lib/components/spellcheck-assets/spellchecker-wasm-LICENSE.txt +21 -0
- package/src/lib/components/spellcheck-assets/spellchecker-wasm.wasm +0 -0
- package/src/lib/components/spellcheck-worker.ts +279 -0
- package/src/lib/components/spellcheck.ts +693 -0
- package/src/lib/components/tidy-categorize.ts +460 -0
- package/src/lib/components/tidy-diff.ts +196 -0
- package/src/lib/components/tidy-validate.ts +202 -0
- package/src/lib/content/compose.ts +11 -1
- package/src/lib/content/site-dictionary.ts +84 -0
- package/src/lib/content/types.ts +25 -0
- package/src/lib/doctor/checks-local.ts +59 -5
- package/src/lib/doctor/index.ts +2 -0
- package/src/lib/log/events.ts +7 -1
- package/src/lib/nav/site-config.ts +197 -0
- package/src/lib/sveltekit/admin-dispatch.ts +7 -3
- package/src/lib/sveltekit/cairn-admin.ts +32 -4
- package/src/lib/sveltekit/content-routes.ts +504 -4
- package/src/lib/sveltekit/tidy-prompt.ts +153 -0
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
// The tidy output validation: the safety backstop that proves a tidy result is a proofread and not
|
|
2
|
+
// a restructure (spec 2.6) or a successful prompt injection (spec 2.3.3). A pure module taking the
|
|
3
|
+
// captured original and the model's corrected string and returning either the validated change set
|
|
4
|
+
// (the Task 12 diff) or a typed rejection reason. A rejected result is discarded by the caller with
|
|
5
|
+
// an honest message and the document is left untouched; nothing here mutates the buffer.
|
|
6
|
+
//
|
|
7
|
+
// Four of the five checks are EXACT and are the real structural backstop: the directive structure,
|
|
8
|
+
// the heading count and levels, the fenced-code-block count, the byte-for-byte frontmatter, the
|
|
9
|
+
// media-hash multiset, and every code span and fenced block. The fifth, the divergence bound, is
|
|
10
|
+
// the only fuzzy one, and it is a rewrite/injection backstop only, never a voice safeguard. The
|
|
11
|
+
// config-driven prompt is what protects voice.
|
|
12
|
+
|
|
13
|
+
import { unified } from 'unified';
|
|
14
|
+
import remarkParse from 'remark-parse';
|
|
15
|
+
import remarkGfm from 'remark-gfm';
|
|
16
|
+
import { visit } from 'unist-util-visit';
|
|
17
|
+
import { fenceScan, frontmatterSpan } from './markdown-directives.js';
|
|
18
|
+
import { parseMediaToken } from '../media/reference.js';
|
|
19
|
+
import { diffTokens, diffChanges } from './tidy-diff.js';
|
|
20
|
+
import type { Change } from './tidy-diff.js';
|
|
21
|
+
|
|
22
|
+
/** The reason a tidy result was rejected. Task 14 branches on this; every value maps to the one
|
|
23
|
+
* honest author-facing message, so the reason is for logging and tests, not the user surface.
|
|
24
|
+
* - `structure`: a directive opener/closer sequence, a heading count or level, or a fenced-code
|
|
25
|
+
* count diverged (the result restructured the document).
|
|
26
|
+
* - `frontmatter`: the frontmatter block is not byte-for-byte equal.
|
|
27
|
+
* - `media`: the multiset of `media:` hashes differs (a hash was altered, dropped, or invented).
|
|
28
|
+
* - `code`: a code span or fenced code block was edited.
|
|
29
|
+
* - `divergence`: the changed-token amount exceeds the length-aware bound (a wholesale rewrite). */
|
|
30
|
+
export type TidyRejectionReason = 'structure' | 'frontmatter' | 'media' | 'code' | 'divergence';
|
|
31
|
+
|
|
32
|
+
/** The honest author-facing message a rejection maps to. The same message for every reason, by
|
|
33
|
+
* design: an author does not need the validator's internal taxonomy, only that the result was
|
|
34
|
+
* discarded and their text is safe. */
|
|
35
|
+
export const TIDY_REJECTION_MESSAGE =
|
|
36
|
+
'Tidy returned a result that changed more than the wording, so it was discarded. Your text is unchanged.';
|
|
37
|
+
|
|
38
|
+
/** The outcome of validating a tidy result. On success it carries the Task 12 change set the review
|
|
39
|
+
* surface accepts and rejects against; on failure it carries the typed reason and the message. */
|
|
40
|
+
export type TidyValidation =
|
|
41
|
+
| { ok: true; changes: Change[] }
|
|
42
|
+
| { ok: false; reason: TidyRejectionReason; message: string };
|
|
43
|
+
|
|
44
|
+
// The divergence bound. The floor allows a fixed number of changed tokens regardless of fraction so
|
|
45
|
+
// a legitimate heavy proofread of a SHORT input is not penalized: a short paragraph with a typo in
|
|
46
|
+
// nearly every word is a real proofread, not a rewrite. The fraction catches a wholesale rewrite of
|
|
47
|
+
// a LONG input, where a large absolute count is past any honest copy-edit. A result is rejected only
|
|
48
|
+
// when it exceeds BOTH the floor and the fraction, so a short input rides the floor and a long input
|
|
49
|
+
// rides the fraction. The values are deliberate: 60 tokens of change covers a dense proofread of a
|
|
50
|
+
// few short paragraphs, and 0.5 of the total tokens marks the point where more than half the text
|
|
51
|
+
// changed, which no proofread does but a rewrite or a successful injection always does.
|
|
52
|
+
const DIVERGENCE_TOKEN_FLOOR = 60;
|
|
53
|
+
const DIVERGENCE_FRACTION = 0.5;
|
|
54
|
+
|
|
55
|
+
// Every `media:` token anywhere in the text, hash and slug forms alike. The validator scans the raw
|
|
56
|
+
// text rather than going through extractMediaRefs for two reasons. First, a true MULTISET is the
|
|
57
|
+
// invariant a backstop wants: extractMediaRefs dedups by hash, so a doubled token collapsing to one
|
|
58
|
+
// would read as equal, and the validator must catch a dropped duplicate. Second, the raw scan covers
|
|
59
|
+
// the whole text including frontmatter without threading the concept's FrontmatterField[] to the call
|
|
60
|
+
// site, which the validator otherwise has no reason to know. A token mangled inside a code fence is
|
|
61
|
+
// caught here too, redundantly with the code check, which is the right posture for a backstop.
|
|
62
|
+
const MEDIA_TOKEN = /media:[A-Za-z0-9.-]+/g;
|
|
63
|
+
|
|
64
|
+
/** The sorted multiset of valid media hashes in the text. Each `media:` occurrence is parsed; a
|
|
65
|
+
* malformed token (a broken hash, an illegal slug) parses to null and is dropped, so a tidy that
|
|
66
|
+
* CORRUPTED a hash drops it from the multiset and the comparison fails. Sorted so two multisets
|
|
67
|
+
* compare by value, order-independent. */
|
|
68
|
+
function mediaHashes(text: string): string[] {
|
|
69
|
+
const hashes: string[] = [];
|
|
70
|
+
for (const m of text.matchAll(MEDIA_TOKEN)) {
|
|
71
|
+
const ref = parseMediaToken(m[0]);
|
|
72
|
+
if (ref) hashes.push(ref.hash);
|
|
73
|
+
}
|
|
74
|
+
return hashes.sort();
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/** The directive structure signature: each opener or closer in document order, paired with the depth
|
|
78
|
+
* the fence scan assigned it. Two texts share a directive structure when these signatures are equal,
|
|
79
|
+
* so an added, removed, or relevelled container fails the comparison. A fence-shaped line inside a
|
|
80
|
+
* code block is already disowned by the scan (its role is null), so a documented `:::` example does
|
|
81
|
+
* not enter the signature. */
|
|
82
|
+
function directiveSignature(text: string): string {
|
|
83
|
+
const { depths, roles } = fenceScan(text.split('\n'));
|
|
84
|
+
const parts: string[] = [];
|
|
85
|
+
for (let i = 0; i < roles.length; i++) {
|
|
86
|
+
if (roles[i] !== null) parts.push(`${roles[i]}@${depths[i]}`);
|
|
87
|
+
}
|
|
88
|
+
return parts.join(',');
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/** The heading signature: every ATX heading's level in document order. Parsed as mdast so a `#`
|
|
92
|
+
* inside a code block or an escaped one is never counted, and the level is the parser's own depth.
|
|
93
|
+
* Two texts share a heading structure when these are equal, so an added, removed, or relevelled
|
|
94
|
+
* heading fails the comparison. */
|
|
95
|
+
function headingSignature(text: string): string {
|
|
96
|
+
const tree = unified().use(remarkParse).use(remarkGfm).parse(text);
|
|
97
|
+
const levels: number[] = [];
|
|
98
|
+
visit(tree, 'heading', (node: { depth?: number }) => {
|
|
99
|
+
if (typeof node.depth === 'number') levels.push(node.depth);
|
|
100
|
+
});
|
|
101
|
+
return levels.join(',');
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/** Every code span and fenced or indented code block in the text, as a sorted multiset of values.
|
|
105
|
+
* Parsed as mdast so the comparison sees exactly what the parser treats as code, the same authority
|
|
106
|
+
* the media body scan uses. Sorted so the comparison is order-independent: the divergence and
|
|
107
|
+
* structure checks own ordering, this check owns the contents. A `code` node is a block, an
|
|
108
|
+
* `inlineCode` node is a span. */
|
|
109
|
+
function codeContents(text: string): string[] {
|
|
110
|
+
const tree = unified().use(remarkParse).use(remarkGfm).parse(text);
|
|
111
|
+
const values: string[] = [];
|
|
112
|
+
visit(tree, (node: { type: string; value?: string }) => {
|
|
113
|
+
if ((node.type === 'code' || node.type === 'inlineCode') && typeof node.value === 'string') {
|
|
114
|
+
values.push(`${node.type}:${node.value}`);
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
return values.sort();
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/** True when two string multisets are equal: same length and same sorted contents. */
|
|
121
|
+
function multisetEqual(a: string[], b: string[]): boolean {
|
|
122
|
+
if (a.length !== b.length) return false;
|
|
123
|
+
for (let i = 0; i < a.length; i++) {
|
|
124
|
+
if (a[i] !== b[i]) return false;
|
|
125
|
+
}
|
|
126
|
+
return true;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// The changed token amount: the count of tokens the diff marked inserted or deleted, against the
|
|
130
|
+
// total tokens in the original. An equal run contributes nothing; an inserted or deleted run counts
|
|
131
|
+
// its own tokens. This is the rewrite measure, deliberately coarse, since the structure/token/code
|
|
132
|
+
// checks are the exact backstop and this only catches a wholesale rewrite that slipped past them.
|
|
133
|
+
function divergence(original: string, corrected: string): { changed: number; total: number } {
|
|
134
|
+
const runs = diffTokens(original, corrected);
|
|
135
|
+
// Count tokens by splitting each run's text on the same word/non-word boundary the diff uses; a
|
|
136
|
+
// run's token count is its number of word-or-nonword matches. The original's total is the equal
|
|
137
|
+
// plus deleted token count.
|
|
138
|
+
const countTokens = (s: string) => (s.match(/[A-Za-z0-9_]+(?:['’][A-Za-z0-9_]+)*|[^A-Za-z0-9_]+/g) ?? []).length;
|
|
139
|
+
let changed = 0;
|
|
140
|
+
let total = 0;
|
|
141
|
+
for (const run of runs) {
|
|
142
|
+
const tokens = countTokens(run.text);
|
|
143
|
+
if (run.kind === 'inserted' || run.kind === 'deleted') changed += tokens;
|
|
144
|
+
if (run.kind === 'equal' || run.kind === 'deleted') total += tokens;
|
|
145
|
+
}
|
|
146
|
+
return { changed, total };
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Validate a tidy result against the captured original. Runs the exact structural checks first (a
|
|
151
|
+
* restructure or a token or code edit is a hard reject regardless of how little else changed), then
|
|
152
|
+
* the length-aware divergence bound. On success returns the Task 12 change set for the review
|
|
153
|
+
* surface; on failure returns the typed reason and the one honest message.
|
|
154
|
+
*
|
|
155
|
+
* The checks, in order: the directive opener/closer sequence and depths, the ATX heading count and
|
|
156
|
+
* levels, the fenced-code-block count (folded into the code-contents multiset), the byte-for-byte
|
|
157
|
+
* frontmatter via the shared frontmatterSpan helper, the media-hash multiset, the code-span and
|
|
158
|
+
* code-block contents, and finally the divergence bound. A pure function: it reads the two strings
|
|
159
|
+
* and nothing else, and it never mutates the buffer.
|
|
160
|
+
*/
|
|
161
|
+
export function validateTidy(original: string, corrected: string): TidyValidation {
|
|
162
|
+
// Directive structure: the opener/closer sequence and depths must match exactly.
|
|
163
|
+
if (directiveSignature(original) !== directiveSignature(corrected)) {
|
|
164
|
+
return { ok: false, reason: 'structure', message: TIDY_REJECTION_MESSAGE };
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Headings: the same ATX headings at the same levels, in order.
|
|
168
|
+
if (headingSignature(original) !== headingSignature(corrected)) {
|
|
169
|
+
return { ok: false, reason: 'structure', message: TIDY_REJECTION_MESSAGE };
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Frontmatter: byte-for-byte equal, via the same helper the spellcheck skip uses. A null span
|
|
173
|
+
// (no frontmatter) on both sides slices to the empty string on both, so a body-only document
|
|
174
|
+
// passes; a span on one side and not the other diverges.
|
|
175
|
+
const fmOriginal = frontmatterSpan(original);
|
|
176
|
+
const fmCorrected = frontmatterSpan(corrected);
|
|
177
|
+
const fmTextOriginal = fmOriginal ? original.slice(fmOriginal.from, fmOriginal.to) : '';
|
|
178
|
+
const fmTextCorrected = fmCorrected ? corrected.slice(fmCorrected.from, fmCorrected.to) : '';
|
|
179
|
+
if (fmTextOriginal !== fmTextCorrected) {
|
|
180
|
+
return { ok: false, reason: 'frontmatter', message: TIDY_REJECTION_MESSAGE };
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Media: the exact same multiset of hashes across the whole text.
|
|
184
|
+
if (!multisetEqual(mediaHashes(original), mediaHashes(corrected))) {
|
|
185
|
+
return { ok: false, reason: 'media', message: TIDY_REJECTION_MESSAGE };
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Code: every code span and fenced or indented block identical. The block count is folded in
|
|
189
|
+
// here: a multiset of block-and-span values that differs in count or contents fails.
|
|
190
|
+
if (!multisetEqual(codeContents(original), codeContents(corrected))) {
|
|
191
|
+
return { ok: false, reason: 'code', message: TIDY_REJECTION_MESSAGE };
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// Divergence: rejected only when the changed amount exceeds BOTH the absolute floor and the
|
|
195
|
+
// fraction of the total. A short input rides the floor; a long input rides the fraction.
|
|
196
|
+
const { changed, total } = divergence(original, corrected);
|
|
197
|
+
if (changed > DIVERGENCE_TOKEN_FLOOR && changed > total * DIVERGENCE_FRACTION) {
|
|
198
|
+
return { ok: false, reason: 'divergence', message: TIDY_REJECTION_MESSAGE };
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
return { ok: true, changes: diffChanges(original, corrected) };
|
|
202
|
+
}
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
import type { AdminPanel, CairnAdapter, CairnExtension, CairnRuntime, ConceptConfig, FieldTypeDef } from './types.js';
|
|
7
7
|
import { resolveConcepts } from './concepts.js';
|
|
8
8
|
import { normalizeAssets } from '../media/config.js';
|
|
9
|
-
import type
|
|
9
|
+
import { dictionaryFileForDialect, type SiteConfig } from '../nav/site-config.js';
|
|
10
10
|
|
|
11
11
|
/** The input to {@link composeRuntime}. `siteConfig` is required so the per-concept URL policy is
|
|
12
12
|
* always derived from one source and can never be silently dropped. `extensions` fold in after the
|
|
@@ -49,6 +49,16 @@ export function composeRuntime({ adapter, siteConfig, extensions = [] }: Compose
|
|
|
49
49
|
assets: adapter.assets,
|
|
50
50
|
resolvedAssets: normalizeAssets(adapter.assets),
|
|
51
51
|
mediaManifestPath: adapter.mediaManifestPath ?? 'src/content/.cairn/media.json',
|
|
52
|
+
// The personal dictionary sits beside the manifests under the same `.cairn/` content root, so the
|
|
53
|
+
// spec's `content/.cairn/dictionary.txt` resolves the same configurable way the manifest paths do.
|
|
54
|
+
dictionaryPath: adapter.dictionaryPath ?? 'src/content/.cairn/dictionary.txt',
|
|
55
|
+
// The spellcheck dictionary is resolved once here from the site config's dialect (default US),
|
|
56
|
+
// so the runtime and the editor never re-derive it. The site config is the one home for the
|
|
57
|
+
// dialect; the editor resolves this filename to a real asset URL on the main thread.
|
|
58
|
+
spellcheckDictionary: dictionaryFileForDialect(siteConfig.spellcheck?.dialect),
|
|
59
|
+
// The tidy block passes through from the site config; the tidy action reads enabled/model at call
|
|
60
|
+
// time and builds its prompt from conventions. Absent means tidy is off.
|
|
61
|
+
tidy: siteConfig.tidy,
|
|
52
62
|
adminPanels,
|
|
53
63
|
fieldTypes,
|
|
54
64
|
};
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
// cairn-cms: the git-committed per-site personal dictionary (spec 1.6). One word per line,
|
|
2
|
+
// sorted, with comment lines (starting with #) and blank lines tolerated on read. This module is
|
|
3
|
+
// pure: it parses the committed file text, inserts words in sorted order, and serializes the
|
|
4
|
+
// canonical form. The insert is order-independent, so the action's commit-and-retry can re-merge
|
|
5
|
+
// the pending additions at a new head and reach the same sorted set regardless of insertion order.
|
|
6
|
+
//
|
|
7
|
+
// The canonical serialization keeps a single leading header comment and one sorted word per line.
|
|
8
|
+
// An inbound file's other comment lines are dropped on serialize (the header is regenerated), so the
|
|
9
|
+
// committed file stays a clean, diffable, sorted word list; a maintainer who wants a richer comment
|
|
10
|
+
// edits it in git, and the next add through here normalizes it back to the header.
|
|
11
|
+
|
|
12
|
+
/** The header comment the canonical serialization writes above the sorted words. */
|
|
13
|
+
const HEADER = '# cairn personal dictionary: one word per line, sorted, kept in git.';
|
|
14
|
+
|
|
15
|
+
// A dictionary word: a single line carrying no whitespace and no ASCII control characters, so it can
|
|
16
|
+
// never inject an extra line into the committed file. Hyphens and apostrophes are allowed, since real
|
|
17
|
+
// words carry them ("well-known", "O'Brien"); a non-ASCII surname or place name validates too, since
|
|
18
|
+
// the test is for whitespace and control bytes rather than an allow-list of letters. The action runs
|
|
19
|
+
// inbound words through this before a merge.
|
|
20
|
+
const WORD_RE = /^[^\s\p{Cc}]+$/u;
|
|
21
|
+
|
|
22
|
+
/** True when a word is a single valid dictionary line (no whitespace, no control characters, non-empty
|
|
23
|
+
* and within the length bound). A leading "#" is rejected: parseDictionary re-reads such a line as a
|
|
24
|
+
* comment, so committing it would silently drop the word on the next read. The action uses this to
|
|
25
|
+
* reject untrusted input before the merge, so a newline or a control byte can never inject an extra
|
|
26
|
+
* line into the committed file. */
|
|
27
|
+
export function isValidDictionaryWord(word: string, maxLength = 64): boolean {
|
|
28
|
+
if (word.startsWith('#')) return false;
|
|
29
|
+
return word.length > 0 && word.length <= maxLength && WORD_RE.test(word);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Parse the committed dictionary file text into its word list. Comment lines (a `#` after optional
|
|
34
|
+
* leading whitespace) and blank lines are dropped; every other line is trimmed and kept. A null or
|
|
35
|
+
* empty file yields an empty list. The result preserves the file's order and is not deduplicated or
|
|
36
|
+
* sorted here, so a caller can see exactly what the file held; `mergeDictionaryWords` is the path that
|
|
37
|
+
* normalizes to the sorted, deduplicated set.
|
|
38
|
+
*/
|
|
39
|
+
export function parseDictionary(text: string | null): string[] {
|
|
40
|
+
if (!text) return [];
|
|
41
|
+
const words: string[] = [];
|
|
42
|
+
for (const line of text.split('\n')) {
|
|
43
|
+
const trimmed = line.trim();
|
|
44
|
+
if (trimmed === '' || trimmed.startsWith('#')) continue;
|
|
45
|
+
words.push(trimmed);
|
|
46
|
+
}
|
|
47
|
+
return words;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/** Case-insensitive, locale-stable comparator for the canonical sort. Words are compared lowercased
|
|
51
|
+
* so "Cairn" and "cairn" collapse to one entry, the same case-folding the Worker's merged set uses. */
|
|
52
|
+
function byWord(a: string, b: string): number {
|
|
53
|
+
return a.toLowerCase().localeCompare(b.toLowerCase());
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Merge `additions` into the `existing` word list, returning the canonical sorted, deduplicated set.
|
|
58
|
+
* The merge is case-insensitive (a duplicate add of an existing word, in any case, collapses) and
|
|
59
|
+
* order-independent: the inputs are unioned by lowercased key and sorted, so re-merging the same
|
|
60
|
+
* additions at a moved head produces the same set. The first-seen casing of each word wins, so an
|
|
61
|
+
* existing "Cairn" is kept over a later "cairn". Invalid additions (whitespace, control characters,
|
|
62
|
+
* empty) are skipped here as a backstop; the action validates before this is reached.
|
|
63
|
+
*/
|
|
64
|
+
export function mergeDictionaryWords(existing: readonly string[], additions: readonly string[]): string[] {
|
|
65
|
+
const byKey = new Map<string, string>();
|
|
66
|
+
for (const word of [...existing, ...additions]) {
|
|
67
|
+
if (!isValidDictionaryWord(word)) continue;
|
|
68
|
+
const key = word.toLowerCase();
|
|
69
|
+
if (!byKey.has(key)) byKey.set(key, word);
|
|
70
|
+
}
|
|
71
|
+
return [...byKey.values()].sort(byWord);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Serialize a word list to the canonical committed file text: the header comment, then one word per
|
|
76
|
+
* line sorted case-insensitively, with a trailing newline. The input is run through the same dedup
|
|
77
|
+
* and sort as the merge, so serializing an unsorted or duplicate-bearing list still yields the
|
|
78
|
+
* canonical form. An empty word list serializes to just the header (so the file stays a valid,
|
|
79
|
+
* recognizable dictionary rather than vanishing).
|
|
80
|
+
*/
|
|
81
|
+
export function serializeDictionary(words: readonly string[]): string {
|
|
82
|
+
const sorted = mergeDictionaryWords(words, []);
|
|
83
|
+
return [HEADER, ...sorted].join('\n') + '\n';
|
|
84
|
+
}
|
package/src/lib/content/types.ts
CHANGED
|
@@ -264,6 +264,11 @@ export interface CairnAdapter {
|
|
|
264
264
|
/** Repo-relative path to the committed media manifest. Defaults to src/content/.cairn/media.json,
|
|
265
265
|
* applied in composeRuntime. Sits outside any concept directory, like the content manifest. */
|
|
266
266
|
mediaManifestPath?: string;
|
|
267
|
+
/** Repo-relative path to the committed personal dictionary file. Defaults to
|
|
268
|
+
* src/content/.cairn/dictionary.txt, applied in composeRuntime: the same `.cairn/` content root the
|
|
269
|
+
* manifests use, so the spec's `content/.cairn/dictionary.txt` resolves the same configurable way the
|
|
270
|
+
* manifest paths do. One word per line, sorted, comment lines allowed (see site-dictionary.ts). */
|
|
271
|
+
dictionaryPath?: string;
|
|
267
272
|
/** Directive component registry; the renderer and the future palette derive from it (seam 3). */
|
|
268
273
|
registry?: ComponentRegistry;
|
|
269
274
|
/** The site's glyph name to SVG path-data map, for the admin icon picker and the renderer. */
|
|
@@ -380,6 +385,13 @@ export interface CairnRuntime {
|
|
|
380
385
|
manifestPath: string;
|
|
381
386
|
/** The repo-relative path to the committed media manifest, defaulted in composeRuntime. */
|
|
382
387
|
mediaManifestPath: string;
|
|
388
|
+
/** The repo-relative path to the committed personal dictionary file (one word per line, sorted),
|
|
389
|
+
* defaulted in composeRuntime to src/content/.cairn/dictionary.txt: the same `.cairn/` content root
|
|
390
|
+
* the manifests use. The edit load reads it and threads its words onto EditData; the
|
|
391
|
+
* addDictionaryWord action reads, merges, and commits it. Optional on the runtime so a hand-built
|
|
392
|
+
* runtime need not set it; composeRuntime always fills it, and the edit load and the action default
|
|
393
|
+
* a missing value to the same content-root path. */
|
|
394
|
+
dictionaryPath?: string;
|
|
383
395
|
/** The adapter's asset config resolved once at compose: `{ enabled: false }` for a no-media site,
|
|
384
396
|
* otherwise the filled config the upload, storage, delivery, and resolver paths read. */
|
|
385
397
|
resolvedAssets: import('../media/config.js').ResolvedAssetConfig;
|
|
@@ -390,6 +402,19 @@ export interface CairnRuntime {
|
|
|
390
402
|
/** The live site's content styling for the preview frame; passed through from the adapter. */
|
|
391
403
|
preview?: PreviewConfig;
|
|
392
404
|
assets?: AssetConfig;
|
|
405
|
+
/** The editor's spellcheck dictionary file, resolved once at compose from the site config's
|
|
406
|
+
* `spellcheck.dialect` (defaulting to US English). The edit load threads it onto EditData and the
|
|
407
|
+
* editor resolves it to a real asset URL on the main thread, so the Worker receives the URL and
|
|
408
|
+
* never reads config. Just the filename, e.g. "dictionary-en-us.txt". Optional on the runtime so a
|
|
409
|
+
* hand-built runtime need not set it; composeRuntime always fills it, and the edit load defaults a
|
|
410
|
+
* missing value to the US English dictionary. */
|
|
411
|
+
spellcheckDictionary?: string;
|
|
412
|
+
/** The editor tidy (LLM copy-edit) settings, passed through from the site config. Optional on the
|
|
413
|
+
* runtime so a hand-built runtime need not set it; composeRuntime threads it from
|
|
414
|
+
* `siteConfig.tidy`. The tidy action reads `enabled` and `model` at call time, and builds its prompt
|
|
415
|
+
* from `conventions`. Absent (or `enabled` false) means tidy is off, and the action refuses with a
|
|
416
|
+
* fail(503) before any model call. */
|
|
417
|
+
tidy?: import('../nav/site-config.js').TidyConfig;
|
|
393
418
|
/** Admin panels contributed by extensions (Mode 2). Empty until Plan 09 wires the dispatch route. */
|
|
394
419
|
adminPanels?: AdminPanel[];
|
|
395
420
|
/** Field types contributed by extensions (Mode 2). Empty until Plan 09 wires the form dispatch. */
|
|
@@ -6,6 +6,7 @@ import type { CheckResult, DoctorCheck, DoctorContext } from './types.js';
|
|
|
6
6
|
import { readWranglerConfig } from './wrangler-config.js';
|
|
7
7
|
import { requireOrigin } from '../env.js';
|
|
8
8
|
import { parseSiteConfig, urlPolicyFrom } from '../nav/site-config.js';
|
|
9
|
+
import type { SiteConfig } from '../nav/site-config.js';
|
|
9
10
|
import { normalizeConcepts } from '../content/concepts.js';
|
|
10
11
|
import { defineFields } from '../content/schema.js';
|
|
11
12
|
import type { ConceptConfig } from '../content/types.js';
|
|
@@ -138,16 +139,21 @@ export const configPublicOrigin: DoctorCheck = {
|
|
|
138
139
|
// src locations the production sites use).
|
|
139
140
|
const SITE_CONFIG_PATHS = ['site.config.yaml', 'src/lib/site.config.yaml', 'src/site.config.yaml'];
|
|
140
141
|
|
|
142
|
+
// Read the first site.config.yaml that exists in a conventional spot, or null when none does.
|
|
143
|
+
async function readSiteConfigText(ctx: DoctorContext): Promise<string | null> {
|
|
144
|
+
for (const path of SITE_CONFIG_PATHS) {
|
|
145
|
+
const text = await ctx.readFile(path);
|
|
146
|
+
if (text !== null) return text;
|
|
147
|
+
}
|
|
148
|
+
return null;
|
|
149
|
+
}
|
|
150
|
+
|
|
141
151
|
export const configSiteConfig: DoctorCheck = {
|
|
142
152
|
id: 'config.site-config',
|
|
143
153
|
conditionId: 'config.site-config-invalid',
|
|
144
154
|
title: 'Site config',
|
|
145
155
|
async run(ctx: DoctorContext): Promise<CheckResult> {
|
|
146
|
-
|
|
147
|
-
for (const path of SITE_CONFIG_PATHS) {
|
|
148
|
-
text = await ctx.readFile(path);
|
|
149
|
-
if (text !== null) break;
|
|
150
|
-
}
|
|
156
|
+
const text = await readSiteConfigText(ctx);
|
|
151
157
|
if (text === null) return skip(`no site.config.yaml found (looked in ${SITE_CONFIG_PATHS.join(', ')})`);
|
|
152
158
|
try {
|
|
153
159
|
const policy = urlPolicyFrom(parseSiteConfig(text));
|
|
@@ -165,3 +171,51 @@ export const configSiteConfig: DoctorCheck = {
|
|
|
165
171
|
}
|
|
166
172
|
},
|
|
167
173
|
};
|
|
174
|
+
|
|
175
|
+
// A site enables tidy with `tidy.enabled: true` in the committed config; ignore a config the rest of
|
|
176
|
+
// the doctor reports through configSiteConfig, so a parse error here just skips rather than doubling
|
|
177
|
+
// the failure.
|
|
178
|
+
function tidyEnabled(text: string): boolean {
|
|
179
|
+
let config: SiteConfig;
|
|
180
|
+
try {
|
|
181
|
+
config = parseSiteConfig(text);
|
|
182
|
+
} catch {
|
|
183
|
+
return false;
|
|
184
|
+
}
|
|
185
|
+
return config.tidy?.enabled === true;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// The Anthropic key is a Worker secret, so the doctor cannot prove it is unset (it is in neither the
|
|
189
|
+
// committed wrangler config nor anything readFile reaches). It CAN read the two spots a key would also
|
|
190
|
+
// appear if set as a plain var: the wrangler config text and .dev.vars. A bare presence-by-name read
|
|
191
|
+
// is enough for the heuristic; the runtime fail(503) and --probe are the real truth checks.
|
|
192
|
+
function keyAppearsIn(text: string | null): boolean {
|
|
193
|
+
return text !== null && text.includes('ANTHROPIC_API_KEY');
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// The tidy secret heuristic. It reuses the config.bindings-missing condition rather than registering a
|
|
197
|
+
// new one, so the readiness count holds (the same pattern configMediaBucket uses). A warn here is not a
|
|
198
|
+
// definitive unset claim: it asks the operator to verify the secret, since a wrangler secret is
|
|
199
|
+
// invisible to the CLI.
|
|
200
|
+
export const configTidyKey: DoctorCheck = {
|
|
201
|
+
id: 'config.tidy-key',
|
|
202
|
+
conditionId: 'config.bindings-missing',
|
|
203
|
+
title: 'Tidy API key',
|
|
204
|
+
async run(ctx: DoctorContext): Promise<CheckResult> {
|
|
205
|
+
const text = await readSiteConfigText(ctx);
|
|
206
|
+
if (text === null) return skip('no site.config.yaml found, so tidy enablement is unknown');
|
|
207
|
+
if (!tidyEnabled(text)) return skip('tidy is not enabled in the site config');
|
|
208
|
+
const wrangler =
|
|
209
|
+
(await ctx.readFile('wrangler.jsonc')) ?? (await ctx.readFile('wrangler.toml'));
|
|
210
|
+
if (keyAppearsIn(wrangler)) {
|
|
211
|
+
return pass('ANTHROPIC_API_KEY appears in the wrangler vars (verify it is the real key, not a placeholder)');
|
|
212
|
+
}
|
|
213
|
+
const devVars = await ctx.readFile('.dev.vars');
|
|
214
|
+
if (keyAppearsIn(devVars)) {
|
|
215
|
+
return pass('ANTHROPIC_API_KEY appears in .dev.vars (the local override; verify the Worker secret is set for production)');
|
|
216
|
+
}
|
|
217
|
+
return fail(
|
|
218
|
+
'tidy is enabled but ANTHROPIC_API_KEY is in neither the wrangler vars nor .dev.vars; verify the secret is configured with wrangler secret put ANTHROPIC_API_KEY'
|
|
219
|
+
);
|
|
220
|
+
},
|
|
221
|
+
};
|
package/src/lib/doctor/index.ts
CHANGED
|
@@ -9,6 +9,7 @@ import {
|
|
|
9
9
|
configCsrfDisable,
|
|
10
10
|
configSiteConfig,
|
|
11
11
|
configPublicOrigin,
|
|
12
|
+
configTidyKey,
|
|
12
13
|
} from './checks-local.js';
|
|
13
14
|
import { configDependencyFloors } from './check-floors.js';
|
|
14
15
|
import { emailSenderOnboarded, edgeHttpsForced, edgeHsts, authStore } from './checks-cloudflare.js';
|
|
@@ -162,6 +163,7 @@ export function defaultChecks(): DoctorCheck[] {
|
|
|
162
163
|
configCsrfDisable,
|
|
163
164
|
configSiteConfig,
|
|
164
165
|
configPublicOrigin,
|
|
166
|
+
configTidyKey,
|
|
165
167
|
configDependencyFloors,
|
|
166
168
|
emailSenderOnboarded,
|
|
167
169
|
edgeHttpsForced,
|
package/src/lib/log/events.ts
CHANGED
|
@@ -27,4 +27,10 @@ export type CairnLogEvent =
|
|
|
27
27
|
| 'media.orphans_purged'
|
|
28
28
|
| 'media.replaced'
|
|
29
29
|
| 'media.replace_blocked'
|
|
30
|
-
| 'media.alt_propagated'
|
|
30
|
+
| 'media.alt_propagated'
|
|
31
|
+
| 'dictionary.added'
|
|
32
|
+
| 'dictionary.add_conflict'
|
|
33
|
+
| 'tidy.done'
|
|
34
|
+
| 'tidy.error'
|
|
35
|
+
| 'tidy.refused'
|
|
36
|
+
| 'tidy.empty';
|