npm - @glw907/cairn-cms - Versions diffs - 0.58.0 → 0.60.0 - Mend

@glw907/cairn-cms 0.58.0 → 0.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

package/CHANGELOG.md +84 -0
package/dist/components/CairnAdmin.svelte +3 -0
package/dist/components/CairnMediaLibrary.svelte +1101 -27
package/dist/components/CairnMediaLibrary.svelte.d.ts +10 -2
package/dist/components/CairnTidySettings.svelte +553 -0
package/dist/components/CairnTidySettings.svelte.d.ts +32 -0
package/dist/components/EditPage.svelte +371 -2
package/dist/components/MarkdownEditor.svelte +168 -1
package/dist/components/MarkdownEditor.svelte.d.ts +44 -0
package/dist/components/TidyReview.svelte +463 -0
package/dist/components/TidyReview.svelte.d.ts +47 -0
package/dist/components/admin-icons.d.ts +1 -0
package/dist/components/admin-icons.js +1 -0
package/dist/components/cairn-admin.css +913 -2
package/dist/components/editor-tidy.d.ts +31 -0
package/dist/components/editor-tidy.js +199 -0
package/dist/components/index.d.ts +1 -0
package/dist/components/index.js +1 -0
package/dist/components/markdown-directives.d.ts +16 -0
package/dist/components/markdown-directives.js +34 -0
package/dist/components/objective-errors.d.ts +30 -0
package/dist/components/objective-errors.js +113 -0
package/dist/components/spellcheck-assets/dictionary-en-us.txt +104743 -0
package/dist/components/spellcheck-assets/spellchecker-wasm-LICENSE.txt +21 -0
package/dist/components/spellcheck-assets/spellchecker-wasm.wasm +0 -0
package/dist/components/spellcheck-worker.d.ts +80 -0
package/dist/components/spellcheck-worker.js +161 -0
package/dist/components/spellcheck.d.ts +146 -0
package/dist/components/spellcheck.js +541 -0
package/dist/components/tidy-categorize.d.ts +67 -0
package/dist/components/tidy-categorize.js +392 -0
package/dist/components/tidy-diff.d.ts +60 -0
package/dist/components/tidy-diff.js +147 -0
package/dist/components/tidy-validate.d.ts +37 -0
package/dist/components/tidy-validate.js +174 -0
package/dist/content/compose.d.ts +1 -1
package/dist/content/compose.js +11 -0
package/dist/content/site-dictionary.d.ts +31 -0
package/dist/content/site-dictionary.js +82 -0
package/dist/content/types.d.ts +25 -0
package/dist/doctor/checks-local.d.ts +1 -0
package/dist/doctor/checks-local.js +55 -6
package/dist/doctor/index.js +2 -1
package/dist/log/events.d.ts +1 -1
package/dist/media/bulk-delete-plan.d.ts +24 -0
package/dist/media/bulk-delete-plan.js +25 -0
package/dist/media/orphan-scan.d.ts +37 -0
package/dist/media/orphan-scan.js +42 -0
package/dist/media/reconcile.d.ts +3 -0
package/dist/media/reconcile.js +3 -2
package/dist/nav/site-config.d.ts +98 -0
package/dist/nav/site-config.js +132 -0
package/dist/sveltekit/admin-dispatch.d.ts +2 -0
package/dist/sveltekit/admin-dispatch.js +6 -2
package/dist/sveltekit/cairn-admin.d.ts +16 -1
package/dist/sveltekit/cairn-admin.js +28 -3
package/dist/sveltekit/content-routes.d.ts +171 -4
package/dist/sveltekit/content-routes.js +597 -3
package/dist/sveltekit/index.d.ts +1 -1
package/dist/sveltekit/tidy-prompt.d.ts +11 -0
package/dist/sveltekit/tidy-prompt.js +118 -0
package/package.json +10 -1
package/src/lib/components/CairnAdmin.svelte +3 -0
package/src/lib/components/CairnMediaLibrary.svelte +1101 -27
package/src/lib/components/CairnTidySettings.svelte +553 -0
package/src/lib/components/EditPage.svelte +371 -2
package/src/lib/components/MarkdownEditor.svelte +168 -1
package/src/lib/components/TidyReview.svelte +463 -0
package/src/lib/components/admin-icons.ts +1 -0
package/src/lib/components/cairn-admin.css +25 -0
package/src/lib/components/editor-tidy.ts +241 -0
package/src/lib/components/index.ts +1 -0
package/src/lib/components/markdown-directives.ts +35 -0
package/src/lib/components/objective-errors.ts +155 -0
package/src/lib/components/spellcheck-assets/dictionary-en-us.txt +104743 -0
package/src/lib/components/spellcheck-assets/spellchecker-wasm-LICENSE.txt +21 -0
package/src/lib/components/spellcheck-assets/spellchecker-wasm.wasm +0 -0
package/src/lib/components/spellcheck-worker.ts +279 -0
package/src/lib/components/spellcheck.ts +679 -0
package/src/lib/components/tidy-categorize.ts +460 -0
package/src/lib/components/tidy-diff.ts +196 -0
package/src/lib/components/tidy-validate.ts +202 -0
package/src/lib/content/compose.ts +11 -1
package/src/lib/content/site-dictionary.ts +84 -0
package/src/lib/content/types.ts +25 -0
package/src/lib/doctor/checks-local.ts +59 -5
package/src/lib/doctor/index.ts +2 -0
package/src/lib/log/events.ts +9 -1
package/src/lib/media/bulk-delete-plan.ts +54 -0
package/src/lib/media/orphan-scan.ts +74 -0
package/src/lib/media/reconcile.ts +3 -2
package/src/lib/nav/site-config.ts +197 -0
package/src/lib/sveltekit/admin-dispatch.ts +7 -3
package/src/lib/sveltekit/cairn-admin.ts +38 -4
package/src/lib/sveltekit/content-routes.ts +795 -7
package/src/lib/sveltekit/index.ts +1 -0
package/src/lib/sveltekit/tidy-prompt.ts +153 -0

package/dist/components/tidy-categorize.js ADDED Viewed

@@ -0,0 +1,392 @@
+// The local tidy category taxonomy and the because-line builder (spec 2.5, decision 9). The tidy
+// action returns a corrected STRING; the diff (Task 12) turns it into changes; this module infers each
+// change's category and safety rank LOCALLY from the diff shape and the enabled config, never from a
+// claim the model made and never from a count of the author's own usage. It is pure: the inputs are a
+// change, the captured original, and the resolved conventions, and the outputs are a category and an
+// optional because-line. Approximate by design, so it is unit-tested rather than trusted.
+//
+// The safety rank is the spine. Objective categories (spelling, typo, doubled word, whitespace) read
+// quiet and are swept by Accept-fixes. Judgment categories (a declared normalization, or a grammar fix
+// that reworded more than one token) carry the review-this treatment and are never swept until the
+// author confirms each. The category alone decides the rank, so the surface and the bulk action agree.
+/** True for the objective categories: the safe, pre-kept, Accept-fixes-swept rank. A judgment
+ *  category (`normalization` or `grammar`) returns false. The bulk action and the surface both read
+ *  this, so the safety rank is one source of truth. */
+export function isObjective(category) {
+    return (category.kind === 'spelling' ||
+        category.kind === 'typo' ||
+        category.kind === 'doubled' ||
+        category.kind === 'whitespace');
+}
+// The token boundary the diff uses, so a change's word/non-word token count here matches the diff's.
+const TOKEN = /[A-Za-z0-9_]+(?:['’][A-Za-z0-9_]+)*|[^A-Za-z0-9_]+/g;
+// The en-dash and em-dash code points, named here so the comments below never type the literal glyph
+// (the prose-guard rejects a literal dash even in a comment). Used by the punctuation conventions.
+const EN_DASH = '–';
+const EM_DASH = '—';
+function tokens(text) {
+    return text.match(TOKEN) ?? [];
+}
+// The spelled-out number words the numberStyle convention recognizes against a numeral, the conservative
+// clear cases only. A swap is claimed as a numberStyle normalization only when one side is one of these
+// words and the other side is a plain integer numeral; a compound spelled number ("twenty-five") or any
+// word outside this set is left to the shape rules, never falsely claimed.
+const NUMBER_WORDS = new Set([
+    'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten',
+    'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen',
+    'nineteen', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety',
+    'hundred', 'thousand', 'million', 'billion',
+]);
+// The unit notation pairs the measurements convention recognizes: each spelled-out unit and its
+// abbreviation, in both singular and plural where the word inflects. A swap is claimed as a measurements
+// normalization only when one side is a known abbreviation and the other its spelled-out form, the number
+// untouched (the diff isolates the unit token). The list is deliberately a curated common set, so a unit
+// outside it is left to the shape rules rather than guessed at.
+const UNIT_FORMS = [
+    { abbr: 'cm', words: ['centimeter', 'centimeters', 'centimetre', 'centimetres'] },
+    { abbr: 'mm', words: ['millimeter', 'millimeters', 'millimetre', 'millimetres'] },
+    { abbr: 'm', words: ['meter', 'meters', 'metre', 'metres'] },
+    { abbr: 'km', words: ['kilometer', 'kilometers', 'kilometre', 'kilometres'] },
+    { abbr: 'in', words: ['inch', 'inches'] },
+    { abbr: 'ft', words: ['foot', 'feet'] },
+    { abbr: 'yd', words: ['yard', 'yards'] },
+    { abbr: 'mi', words: ['mile', 'miles'] },
+    { abbr: 'g', words: ['gram', 'grams', 'gramme', 'grammes'] },
+    { abbr: 'kg', words: ['kilogram', 'kilograms', 'kilogramme', 'kilogrammes'] },
+    { abbr: 'mg', words: ['milligram', 'milligrams'] },
+    { abbr: 'lb', words: ['pound', 'pounds'] },
+    { abbr: 'oz', words: ['ounce', 'ounces'] },
+    { abbr: 'l', words: ['liter', 'liters', 'litre', 'litres'] },
+    { abbr: 'ml', words: ['milliliter', 'milliliters', 'millilitre', 'millilitres'] },
+];
+// True when `a` and `b` are the two notations of one measurement unit (one the abbreviation, the other a
+// spelled-out form). Case-insensitive on the word side; the abbreviation is compared exactly so a stray
+// word like "in" the preposition is not mistaken for the inch abbreviation unless the other side is its
+// spelled-out unit. Order-independent: either side may be the abbreviation.
+function isUnitNotationPair(a, b) {
+    for (const u of UNIT_FORMS) {
+        const aAbbr = a === u.abbr;
+        const bAbbr = b === u.abbr;
+        const aWord = u.words.includes(a.toLowerCase());
+        const bWord = u.words.includes(b.toLowerCase());
+        if ((aAbbr && bWord) || (bAbbr && aWord))
+            return true;
+    }
+    return false;
+}
+// The clock-time signature for a token: its digits and meridiem reduced to a canonical key, or null when
+// the token does not read as a time. Whitespace and the periods in "p.m." are dropped and the letters are
+// lowercased, so "5pm", "5 PM", and "5 p.m." all reduce to "5pm" and a reshape between any two of them is
+// recognized as the same time in a different format.
+function timeKey(token) {
+    const compact = token.replace(/[\s.]/g, '').toLowerCase();
+    const m = /^(\d{1,2})(:\d{2})?(am|pm)$/.exec(compact);
+    if (!m)
+        return null;
+    return `${m[1]}${m[2] ?? ''}${m[3]}`;
+}
+function words(text) {
+    return tokens(text).filter((t) => /[A-Za-z0-9_]/.test(t));
+}
+function isWhitespaceOnly(text) {
+    return text.length > 0 && /^\s+$/.test(text);
+}
+function isPunctuationOnly(text) {
+    return text.length > 0 && /^[^A-Za-z0-9_\s]+$/.test(text);
+}
+/** The word ending immediately before `offset` in `text`, skipping any whitespace just before the
+ *  offset, or null when none. The doubled-word rule reads it to confirm the deleted word repeats the
+ *  one before it. Pure text inspection, never a count. */
+function precedingWord(text, offset) {
+    let i = offset;
+    while (i > 0 && /\s/.test(text[i - 1]))
+        i--;
+    let j = i;
+    while (j > 0 && /[A-Za-z0-9_'’]/.test(text[j - 1]))
+        j--;
+    return j < i ? text.slice(j, i) : null;
+}
+/** The word starting immediately after `offset` in `text`, skipping any whitespace just after the
+ *  offset, or null when none. The doubled-word rule reads it as the other half of the look-around. */
+function followingWord(text, offset) {
+    let i = offset;
+    while (i < text.length && /\s/.test(text[i]))
+        i++;
+    let j = i;
+    while (j < text.length && /[A-Za-z0-9_'’]/.test(text[j]))
+        j++;
+    return j > i ? text.slice(i, j) : null;
+}
+/**
+ * Categorize one change against the captured original and the resolved conventions. The rules are
+ * deterministic and ordered by safety, objective first:
+ *   1. a pure whitespace change (both sides whitespace, or a whitespace insert/delete) is whitespace;
+ *   2. a removed repeated word (the original run is two of the same word collapsing to one) is doubled;
+ *   3. a single-token punctuation-only change is a typo;
+ *   4. a single word replaced by another single word is spelling;
+ *   5. a change matching an ENABLED config convention's signature is that convention's normalization;
+ *   6. anything else (a multi-token reword) is grammar.
+ * A change that looks like a normalization but whose convention is not enabled falls through to typo,
+ * spelling, or grammar by its shape, never to a normalization it cannot name. So the surface never
+ * offers a normalization that cannot cite an enabled setting.
+ */
+export function categorize(change, original, conventions) {
+    const removed = original.slice(change.from, change.to);
+    const added = change.replacement;
+    // Whitespace: the removed and added runs are each whitespace-only or empty, and at least one is
+    // non-empty whitespace. A trailing-space trim (whitespace removed, nothing added) or a run
+    // collapsed to a single space both land here.
+    const removedWs = removed === '' || isWhitespaceOnly(removed);
+    const addedWs = added === '' || isWhitespaceOnly(added);
+    if (removedWs && addedWs && (isWhitespaceOnly(removed) || isWhitespaceOnly(added))) {
+        return { kind: 'whitespace' };
+    }
+    // Doubled word: a repeated word collapses to one. The diff keeps the first copy and deletes the
+    // second, so the change reads as a deletion of "<whitespace><word>" with an empty replacement, where
+    // that word equals the word immediately before the change in the original. The look-back at the
+    // preceding word is what tells a doubled word from a plain deletion; it reads the original text, never
+    // a usage count. (A change whose own run already holds both copies, "word word" to "word", is the
+    // fallback form, handled by the same word-equality test.)
+    const removedWords = words(removed);
+    const addedWords = words(added);
+    if (removedWords.length === 1 && addedWords.length === 0 && /\S/.test(removed)) {
+        const w = removedWords[0].toLowerCase();
+        // The diff may delete either copy of the pair: the surviving copy is the word just before or just
+        // after the deleted run in the original. Either match confirms a doubled word.
+        const before = precedingWord(original, change.from);
+        const after = followingWord(original, change.to);
+        if ((before && before.toLowerCase() === w) || (after && after.toLowerCase() === w)) {
+            return { kind: 'doubled' };
+        }
+    }
+    if (removedWords.length === 2 &&
+        addedWords.length === 1 &&
+        removedWords[0].toLowerCase() === removedWords[1].toLowerCase() &&
+        addedWords[0].toLowerCase() === removedWords[0].toLowerCase()) {
+        return { kind: 'doubled' };
+    }
+    // The single-token shape: exactly one token removed and one token added (a clean replacement),
+    // which is how a typo fix and a spelling fix both read.
+    const removedTokens = tokens(removed);
+    const addedTokens = tokens(added);
+    const singleSwap = removedTokens.length === 1 && addedTokens.length === 1;
+    // A declared normalization: the change matches an enabled convention's signature. Checked before the
+    // single-word spelling rule only when the convention applies (a punctuation or notation change), so a
+    // plain misspelling is never miscategorized as a normalization. A normalization is offered ONLY when
+    // its config variant is enabled.
+    const norm = matchNormalization(removed, added, conventions);
+    if (norm)
+        return { kind: 'normalization', convention: norm };
+    // A single-token punctuation-only change (a stray or wrong mark fixed) is a typo. Reached only after
+    // the normalization check, so an enabled punctuation convention claims its change first.
+    if (singleSwap && isPunctuationOnly(removed) && isPunctuationOnly(added)) {
+        return { kind: 'typo' };
+    }
+    // A punctuation insert or delete (a missing period added, say) with no other token is also a typo.
+    if ((removed === '' && addedTokens.length === 1 && isPunctuationOnly(added)) ||
+        (added === '' && removedTokens.length === 1 && isPunctuationOnly(removed))) {
+        return { kind: 'typo' };
+    }
+    // A single word replaced by another single word is a spelling fix.
+    if (singleSwap && removedWords.length === 1 && addedWords.length === 1) {
+        return { kind: 'spelling' };
+    }
+    // Anything else is a grammar reword: a multi-token change the author should review.
+    return { kind: 'grammar' };
+}
+// Match a change against the enabled conventions' signatures. Returns the convention key when the
+// change's shape is what that convention produces AND the config has it enabled, else null. The
+// signatures are deliberately narrow: each recognizes only the unambiguous form of its convention, so
+// a false match is rare and a missed match falls to the shape-based category (never to a normalization
+// the config did not authorize). Never counts the author's own usage; the only gate is the config.
+function matchNormalization(removed, added, c) {
+    // Oxford comma: a serial comma added before the final conjunction (a space becomes a comma then a
+    // space) or removed. The diff isolates the punctuation run, so the signature is a comma appearing or
+    // disappearing with the surrounding space.
+    if (c.oxfordComma === 'always' && /^\s*$/.test(removed) && /^,\s*$/.test(added)) {
+        return 'oxfordComma';
+    }
+    if (c.oxfordComma === 'never' && /^,\s*$/.test(removed) && /^\s*$/.test(added)) {
+        return 'oxfordComma';
+    }
+    // Percent: the word to the sign or back, the whole token swapped.
+    if (c.percent === 'sign' && /^percent$/i.test(removed.trim()) && added.trim() === '%') {
+        return 'percent';
+    }
+    if (c.percent === 'word' && removed.trim() === '%' && /^percent$/i.test(added.trim())) {
+        return 'percent';
+    }
+    // Ellipsis: three dots to the single character or back.
+    if (c.ellipsis === 'single-char' && removed.includes('...') && added.includes('…')) {
+        return 'ellipsis';
+    }
+    if (c.ellipsis === 'three-dots' && removed.includes('…') && added.includes('...')) {
+        return 'ellipsis';
+    }
+    // En-dash ranges: a hyphen between two numbers becomes an en dash. The diff isolates the separator
+    // token between the numbers, so the signature is a hyphen run becoming an en-dash run.
+    if (c.enDashRanges && removed.trim() === '-' && added.trim() === EN_DASH) {
+        return 'enDashRanges';
+    }
+    // Em-dash spacing: the spacing around an em dash changes. The dash stays; only the whitespace around
+    // it moves, so the change run is the dash-plus-spacing token and the dash count is preserved.
+    if (c.emDash !== undefined && removed.includes(EM_DASH) && added.includes(EM_DASH) && removed !== added) {
+        if (removed.replace(/\s/g, '') === added.replace(/\s/g, ''))
+            return 'emDash';
+    }
+    // Smart quotes: a straight quote becomes a curly one (or an apostrophe). The signature is a straight
+    // quote in the removed run and its curly counterpart in the added run, the letters preserved.
+    if (c.smartQuotes &&
+        /['"]/.test(removed) &&
+        /[‘’“”]/.test(added) &&
+        removed.replace(/['"]/g, '') === added.replace(/[‘’“”]/g, '')) {
+        return 'smartQuotes';
+    }
+    // Number style: a spelled-out number word swapped for a plain integer numeral, or back. The diff
+    // isolates the single number token, so the signature is one trimmed side a known number word and the
+    // other a digit run. Only the clear single-word cases are claimed; a compound spelled number is left to
+    // the shape rules. The always-numeral exception sets (ages, dates, measurements, percentages) are the
+    // model's job in the prompt; this categorizer only labels the swap that landed.
+    if (c.numberStyle !== undefined) {
+        const r = removed.trim().toLowerCase();
+        const a = added.trim().toLowerCase();
+        const wordToNumeral = NUMBER_WORDS.has(r) && /^\d+$/.test(a);
+        const numeralToWord = /^\d+$/.test(r) && NUMBER_WORDS.has(a);
+        if (wordToNumeral || numeralToWord)
+            return 'numberStyle';
+    }
+    // Measurements: a unit abbreviation swapped for its spelled-out form, or back, the number untouched.
+    // The diff isolates the unit token, so the signature is the two trimmed sides forming one unit's
+    // notation pair. Notation only, never the system and never the number, exactly the convention's scope.
+    if (c.measurements !== undefined && isUnitNotationPair(removed.trim(), added.trim())) {
+        return 'measurements';
+    }
+    // Time format: a clock time reshaped between "5pm", "5 PM", and "5 p.m." styles. This claims only the
+    // case where the diff isolates the whole time as one change, so both sides reduce to the same time key.
+    // A reshape that adds or moves a space the diff splits into a separate whitespace and letter hunk
+    // (for example "5 PM" to "5 p.m."); that case is left to the shape rules, where it stays a judgment
+    // hunk that defaults to undecided, so it is still never swept by Accept-fixes.
+    if (c.timeFormat !== undefined) {
+        const rKey = timeKey(removed.trim());
+        const aKey = timeKey(added.trim());
+        if (rKey !== null && rKey === aKey && removed.trim() !== added.trim())
+            return 'timeFormat';
+    }
+    return null;
+}
+/**
+ * Build the because-line for a normalization category. Its ONLY data source is the config-declared
+ * setting that authorized the hunk: the convention key indexes the enabled variant on the conventions,
+ * and the line names that setting and variant. It NEVER counts the author's own usage. Counting the
+ * author's habit to justify a change is the harmonize-to-author judgment cairn must never make, so no
+ * code path here reads the buffer or any usage statistic; the conventions are the sole input. Returns
+ * null when the convention is somehow not enabled (defensive: categorize never produces such a hunk).
+ */
+export function buildBecause(key, conventions) {
+    switch (key) {
+        case 'oxfordComma': {
+            if (conventions.oxfordComma === undefined)
+                return null;
+            const variant = conventions.oxfordComma;
+            let effect;
+            switch (variant) {
+                case 'always':
+                    effect = 'tidy adds the serial comma before the final "and"';
+                    break;
+                case 'never':
+                    effect = 'tidy removes the serial comma before the final "and"';
+                    break;
+                default:
+                    effect = 'tidy applies the serial comma to a complex series';
+            }
+            return { label: 'Oxford-comma', variant, effect };
+        }
+        case 'numberStyle': {
+            if (conventions.numberStyle === undefined)
+                return null;
+            return { label: 'number-style', variant: conventions.numberStyle, effect: 'tidy applies your number style' };
+        }
+        case 'measurements': {
+            if (conventions.measurements === undefined)
+                return null;
+            return {
+                label: 'measurement',
+                variant: conventions.measurements,
+                effect: 'tidy applies your measurement notation',
+            };
+        }
+        case 'percent': {
+            if (conventions.percent === undefined)
+                return null;
+            const variant = conventions.percent === 'sign' ? 'the sign' : 'the word';
+            const effect = conventions.percent === 'sign' ? 'tidy uses the "%" sign' : 'tidy uses the word "percent"';
+            return { label: 'percent', variant, effect };
+        }
+        case 'emDash': {
+            if (conventions.emDash === undefined)
+                return null;
+            return { label: 'em-dash', variant: conventions.emDash, effect: 'tidy applies your em-dash spacing' };
+        }
+        case 'enDashRanges': {
+            if (!conventions.enDashRanges)
+                return null;
+            return { label: 'number-range', variant: 'en dash', effect: 'tidy uses an en dash between numbers' };
+        }
+        case 'ellipsis': {
+            if (conventions.ellipsis === undefined)
+                return null;
+            return { label: 'ellipsis', variant: conventions.ellipsis, effect: 'tidy applies your ellipsis style' };
+        }
+        case 'timeFormat': {
+            if (conventions.timeFormat === undefined)
+                return null;
+            return { label: 'time-format', variant: conventions.timeFormat, effect: 'tidy renders the time that way' };
+        }
+        case 'smartQuotes': {
+            if (!conventions.smartQuotes)
+                return null;
+            return { label: 'smart-quotes', variant: 'on', effect: 'tidy curls the straight quote' };
+        }
+    }
+}
+/** The human badge label for a category, the word shown in the hunk's category pill. A normalization's
+ *  label is the convention's display name (its comma style, its time format), never "consistency" and
+ *  never a count. */
+export function categoryLabel(category) {
+    switch (category.kind) {
+        case 'spelling':
+            return 'Spelling';
+        case 'typo':
+            return 'Punctuation';
+        case 'doubled':
+            return 'Doubled word';
+        case 'whitespace':
+            return 'Whitespace';
+        case 'grammar':
+            return 'Grammar';
+        case 'normalization':
+            return normalizationLabel(category.convention);
+    }
+}
+function normalizationLabel(key) {
+    switch (key) {
+        case 'oxfordComma':
+            return 'Comma style';
+        case 'numberStyle':
+            return 'Number style';
+        case 'measurements':
+            return 'Measurements';
+        case 'percent':
+            return 'Percent';
+        case 'emDash':
+            return 'Em-dash style';
+        case 'enDashRanges':
+            return 'Number range';
+        case 'ellipsis':
+            return 'Ellipsis';
+        case 'timeFormat':
+            return 'Time format';
+        case 'smartQuotes':
+            return 'Smart quotes';
+    }
+}

package/dist/components/tidy-diff.d.ts ADDED Viewed

@@ -0,0 +1,60 @@
+/** One run of the token diff. A run is contiguous tokens of a single kind. */
+export interface DiffRange {
+    /**
+     * `equal` for tokens kept from the original, `deleted` for tokens removed from the original,
+     * `inserted` for tokens that appear only in the corrected text.
+     */
+    kind: 'equal' | 'inserted' | 'deleted';
+    /**
+     * The offset into the captured ORIGINAL where this run begins. For `equal` and `deleted` runs
+     * this is the start of the run's text in the original. For an `inserted` run there is no original
+     * span, so `from === to`: the offset is the insertion point in the original.
+     */
+    from: number;
+    /** The offset into the captured ORIGINAL where this run ends. For an `inserted` run, equal to `from`. */
+    to: number;
+    /** The actual token text of this run (original text for equal/deleted, corrected text for inserted). */
+    text: string;
+}
+/**
+ * A change: the unit the review UI accepts and rejects. A change is a deletion, an insertion, or a
+ * deletion immediately followed by an insertion that reads as a replacement. Each change is a faithful
+ * edit recipe against the captured original: splice `replacement` over the original span `[from, to)`.
+ */
+export interface Change {
+    /** A stable, gap-free index (0, 1, 2, ...) assigned in document order. */
+    index: number;
+    /** The start offset of the change's span in the captured ORIGINAL. */
+    from: number;
+    /**
+     * The end offset of the change's span in the captured ORIGINAL. A pure insertion has a zero-width
+     * span (`from === to`); a pure deletion has a non-empty span with an empty `replacement`.
+     */
+    to: number;
+    /** The text to splice over `[from, to)`. Empty for a pure deletion. */
+    replacement: string;
+}
+/**
+ * Diff the original against the corrected text and return runs of equal, inserted, and deleted tokens.
+ * Both strings are tokenized into words plus the whitespace and punctuation between them, an LCS over
+ * the token sequences finds the kept tokens, and the gaps become deleted and inserted runs.
+ *
+ * Run offsets index the captured ORIGINAL: an `equal` or `deleted` run spans its original text, an
+ * `inserted` run carries a zero-width original span at the insertion point. Concatenating the equal
+ * and deleted runs rebuilds the original; concatenating the equal and inserted runs rebuilds the
+ * corrected text.
+ */
+export declare function diffTokens(original: string, corrected: string): DiffRange[];
+/**
+ * Group the token diff into changes, the unit the review UI accepts and rejects. A run of deletions,
+ * a run of insertions, or a deletion run immediately followed by an insertion run (a replacement) all
+ * collapse into one change. Equal runs separate changes. Each change carries the original span to
+ * replace and the replacement text, with a stable index in document order.
+ */
+export declare function diffChanges(original: string, corrected: string): Change[];
+/**
+ * The 1-based line number of an offset in the original, computed by counting newlines before it. The
+ * review surface derives every line label this way, from the offset against the captured original, so
+ * a label can never drift from the source or depend on a count the model supplied.
+ */
+export declare function lineLabel(original: string, offset: number): number;

package/dist/components/tidy-diff.js ADDED Viewed

@@ -0,0 +1,147 @@
+// The tidy diff: a Longest Common Subsequence over tokens, poplar's DiffRanges model rebuilt in
+// TypeScript (spec 2.4). A small pure module, not a diff library. The tidy action returns only the
+// corrected string; this module owns every range, offset, and line label. It is the sole source of
+// positional truth for the review surface (Tasks 13 and 14 consume its output), so all positions are
+// computed locally from this diff against the captured original, never taken from the model.
+//
+// Token granularity is the right unit for a copy-edit: a one-letter fix like "it's" to "its" reads
+// as a whole-word replacement an author accepts or rejects as a unit, not a confusing single-character
+// flip. The diff is computed against the original captured at request time; tidy is single-author and
+// on-demand, so there is no rebasing and no three-way merge.
+// A token is either a word (a run of word characters, apostrophes kept inside so "it's" is one token)
+// or a non-word run (whitespace and punctuation between words). Splitting at the word boundary gives
+// whole-word granularity: a homophone or typo fix lands on the word, not a single character.
+const TOKEN = /[A-Za-z0-9_]+(?:['’][A-Za-z0-9_]+)*|[^A-Za-z0-9_]+/g;
+function tokenize(text) {
+    const tokens = [];
+    for (const m of text.matchAll(TOKEN)) {
+        tokens.push({ text: m[0], offset: m.index });
+    }
+    return tokens;
+}
+/**
+ * Diff the original against the corrected text and return runs of equal, inserted, and deleted tokens.
+ * Both strings are tokenized into words plus the whitespace and punctuation between them, an LCS over
+ * the token sequences finds the kept tokens, and the gaps become deleted and inserted runs.
+ *
+ * Run offsets index the captured ORIGINAL: an `equal` or `deleted` run spans its original text, an
+ * `inserted` run carries a zero-width original span at the insertion point. Concatenating the equal
+ * and deleted runs rebuilds the original; concatenating the equal and inserted runs rebuilds the
+ * corrected text.
+ */
+export function diffTokens(original, corrected) {
+    const a = tokenize(original);
+    const b = tokenize(corrected);
+    const n = a.length;
+    const m = b.length;
+    // Standard LCS table over the token sequences.
+    const lcs = Array.from({ length: n + 1 }, () => new Array(m + 1).fill(0));
+    for (let i = 1; i <= n; i++) {
+        for (let j = 1; j <= m; j++) {
+            if (a[i - 1].text === b[j - 1].text) {
+                lcs[i][j] = lcs[i - 1][j - 1] + 1;
+            }
+            else if (lcs[i - 1][j] >= lcs[i][j - 1]) {
+                lcs[i][j] = lcs[i - 1][j];
+            }
+            else {
+                lcs[i][j] = lcs[i][j - 1];
+            }
+        }
+    }
+    const reversed = [];
+    let i = n;
+    let j = m;
+    while (i > 0 || j > 0) {
+        if (i > 0 && j > 0 && a[i - 1].text === b[j - 1].text) {
+            reversed.push({ kind: 'equal', from: a[i - 1].offset, to: a[i - 1].offset + a[i - 1].text.length, text: a[i - 1].text });
+            i--;
+            j--;
+        }
+        else if (j > 0 && (i === 0 || lcs[i][j - 1] >= lcs[i - 1][j])) {
+            // The original offset of an insertion is the start of the next kept original token (the one
+            // at index i), or the end of the original when nothing more remains.
+            const at = i < n ? a[i].offset : original.length;
+            reversed.push({ kind: 'inserted', from: at, to: at, text: b[j - 1].text });
+            j--;
+        }
+        else {
+            reversed.push({ kind: 'deleted', from: a[i - 1].offset, to: a[i - 1].offset + a[i - 1].text.length, text: a[i - 1].text });
+            i--;
+        }
+    }
+    const ops = reversed.reverse();
+    // Coalesce adjacent ops of the same kind into runs. A run's text is the concatenation of its
+    // tokens; offsets span from the first token's `from` to the last token's `to`.
+    const runs = [];
+    for (const op of ops) {
+        const last = runs[runs.length - 1];
+        if (last && last.kind === op.kind && last.to === op.from) {
+            last.to = op.to;
+            last.text += op.text;
+        }
+        else {
+            runs.push({ kind: op.kind, from: op.from, to: op.to, text: op.text });
+        }
+    }
+    return runs;
+}
+/**
+ * Group the token diff into changes, the unit the review UI accepts and rejects. A run of deletions,
+ * a run of insertions, or a deletion run immediately followed by an insertion run (a replacement) all
+ * collapse into one change. Equal runs separate changes. Each change carries the original span to
+ * replace and the replacement text, with a stable index in document order.
+ */
+export function diffChanges(original, corrected) {
+    const runs = diffTokens(original, corrected);
+    const changes = [];
+    let k = 0;
+    while (k < runs.length) {
+        const run = runs[k];
+        if (run.kind === 'equal') {
+            k++;
+            continue;
+        }
+        // Start a change at the first non-equal run and absorb the contiguous deleted/inserted block.
+        // A deletion immediately followed by an insertion reads as a replacement; either alone is a
+        // pure deletion or insertion.
+        let from = run.from;
+        let to = run.from;
+        let replacement = '';
+        while (k < runs.length && runs[k].kind !== 'equal') {
+            const r = runs[k];
+            if (r.kind === 'deleted') {
+                // A deleted run spans original text; extend the original span to cover it.
+                if (replacement === '' && to === from)
+                    from = r.from;
+                to = r.to;
+            }
+            else {
+                // An inserted run contributes replacement text and pins the span start at its insertion
+                // point when no deletion has set it yet (a pure insertion is zero-width).
+                if (to === from) {
+                    from = r.from;
+                    to = r.from;
+                }
+                replacement += r.text;
+            }
+            k++;
+        }
+        changes.push({ index: changes.length, from, to, replacement });
+    }
+    return changes;
+}
+/**
+ * The 1-based line number of an offset in the original, computed by counting newlines before it. The
+ * review surface derives every line label this way, from the offset against the captured original, so
+ * a label can never drift from the source or depend on a count the model supplied.
+ */
+export function lineLabel(original, offset) {
+    let line = 1;
+    const end = Math.min(offset, original.length);
+    for (let i = 0; i < end; i++) {
+        if (original[i] === '\n')
+            line++;
+    }
+    return line;
+}

package/dist/components/tidy-validate.d.ts ADDED Viewed

@@ -0,0 +1,37 @@
+import type { Change } from './tidy-diff.js';
+/** The reason a tidy result was rejected. Task 14 branches on this; every value maps to the one
+ *  honest author-facing message, so the reason is for logging and tests, not the user surface.
+ *  - `structure`: a directive opener/closer sequence, a heading count or level, or a fenced-code
+ *    count diverged (the result restructured the document).
+ *  - `frontmatter`: the frontmatter block is not byte-for-byte equal.
+ *  - `media`: the multiset of `media:` hashes differs (a hash was altered, dropped, or invented).
+ *  - `code`: a code span or fenced code block was edited.
+ *  - `divergence`: the changed-token amount exceeds the length-aware bound (a wholesale rewrite). */
+export type TidyRejectionReason = 'structure' | 'frontmatter' | 'media' | 'code' | 'divergence';
+/** The honest author-facing message a rejection maps to. The same message for every reason, by
+ *  design: an author does not need the validator's internal taxonomy, only that the result was
+ *  discarded and their text is safe. */
+export declare const TIDY_REJECTION_MESSAGE = "Tidy returned a result that changed more than the wording, so it was discarded. Your text is unchanged.";
+/** The outcome of validating a tidy result. On success it carries the Task 12 change set the review
+ *  surface accepts and rejects against; on failure it carries the typed reason and the message. */
+export type TidyValidation = {
+    ok: true;
+    changes: Change[];
+} | {
+    ok: false;
+    reason: TidyRejectionReason;
+    message: string;
+};
+/**
+ * Validate a tidy result against the captured original. Runs the exact structural checks first (a
+ * restructure or a token or code edit is a hard reject regardless of how little else changed), then
+ * the length-aware divergence bound. On success returns the Task 12 change set for the review
+ * surface; on failure returns the typed reason and the one honest message.
+ *
+ * The checks, in order: the directive opener/closer sequence and depths, the ATX heading count and
+ * levels, the fenced-code-block count (folded into the code-contents multiset), the byte-for-byte
+ * frontmatter via the shared frontmatterSpan helper, the media-hash multiset, the code-span and
+ * code-block contents, and finally the divergence bound. A pure function: it reads the two strings
+ * and nothing else, and it never mutates the buffer.
+ */
+export declare function validateTidy(original: string, corrected: string): TidyValidation;