npm - openwriter - Versions diffs - 0.15.0 → 0.17.0 - Mend

openwriter 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/dist/client/assets/index-0ttVnjRp.css +1 -0
package/dist/client/assets/{index-B5MXw2pg.js → index-BZ7LCzrR.js} +64 -64
package/dist/client/index.html +2 -2
package/dist/plugins/authors-voice/dist/index.d.ts +41 -0
package/dist/plugins/authors-voice/dist/index.js +206 -0
package/dist/plugins/authors-voice/package.json +23 -0
package/dist/plugins/image-gen/dist/index.d.ts +35 -0
package/dist/plugins/image-gen/dist/index.js +141 -0
package/dist/plugins/image-gen/package.json +26 -0
package/dist/plugins/publish/dist/helpers.d.ts +66 -0
package/dist/plugins/publish/dist/helpers.js +199 -0
package/dist/plugins/publish/dist/index.d.ts +3 -0
package/dist/plugins/publish/dist/index.js +1130 -0
package/dist/plugins/publish/dist/newsletter-tools.d.ts +2 -0
package/dist/plugins/publish/dist/newsletter-tools.js +394 -0
package/dist/plugins/publish/package.json +31 -0
package/dist/plugins/x-api/dist/index.d.ts +27 -0
package/dist/plugins/x-api/dist/index.js +240 -0
package/dist/plugins/x-api/package.json +27 -0
package/dist/server/compact.js +28 -2
package/dist/server/documents.js +234 -3
package/dist/server/enrichment.js +125 -0
package/dist/server/export-routes.js +2 -0
package/dist/server/install-skill.js +15 -0
package/dist/server/markdown-parse.js +153 -14
package/dist/server/markdown-serialize.js +100 -17
package/dist/server/mcp.js +291 -25
package/dist/server/node-blocks.js +41 -1
package/dist/server/node-fingerprint.js +347 -73
package/dist/server/node-matcher.js +19 -44
package/dist/server/pending-overlay.js +21 -4
package/dist/server/state.js +225 -41
package/dist/server/workspaces.js +27 -5
package/dist/server/ws.js +10 -0
package/package.json +2 -1
package/skill/SKILL.md +38 -7
package/skill/agents/openwriter-enrichment-minion.md +177 -0
package/skill/docs/enrichment.md +179 -0
package/skill/docs/footnotes.md +178 -0
package/dist/client/assets/index-B3iORmCT.css +0 -1

package/dist/server/node-fingerprint.js CHANGED Viewed

@@ -1,40 +1,45 @@
 /**
  * Per-block fingerprint computation for node identity tracking.
  *
- * The math-first signal hierarchy (push math before words):
+ * In-memory shape (Fingerprint) is rich — it carries position, neighbor types,
+ * counts, container children, etc. so matcher rules read what they need
+ * directly. Disk shape is ultra-lean — only fields the matcher cannot
+ * recompute from the body tree + per-block stored signals get persisted.
  *
- *   MATH SIGNALS (exact integers and chars):
- *     - type, charCount, sentenceCount, wordCount, level, language, structureSig
- *     - sentences[]: per-sentence tuples {c, f, l, t, wls, w}
- *         c   = char count (excluding terminator + trailing space)
- *         f   = first PREFIX_LEN chars of sentence (3-char prefix)
- *         l   = last PREFIX_LEN chars before terminator (3-char suffix)
- *         t   = terminator type ('D'|'E'|'Q'|'-')
- *         wls = word length sequence (array of integers)
- *         w   = word array — defense-in-depth disambiguator when math collides
+ *   PER-SENTENCE: bare hash string. simpleHash(text + terminator) folds the
+ *   terminator type into the hash, so "Hello?" and "Hello." produce distinct
+ *   hashes without needing a separate field. Unsigned hex, up to 8 chars.
  *
- *   CONTEXT SIGNALS:
- *     - prevType, nextType, parentType
+ *   PER-BLOCK on disk (tuple array, position-indexed):
+ *     paragraph (default):  [id, sentences[], marks?]
+ *     empty paragraph:      [id]
+ *     heading:              [id, "h1".."h6", sentences[], marks?]
+ *     codeBlock:            [id, "code", language, contentHash]
+ *     horizontalRule:       [id, "hr"]
+ *     image:                [id, "img"]
+ *     table:                [id, "tbl"]
+ *     bulletList:           [id, "ul", childTypes[]]
+ *     orderedList:          [id, "ol", childTypes[]]
+ *     taskList:             [id, "tl", childTypes[]]
+ *     blockquote:           [id, "bq", childTypes[]]
+ *     listItem:             [id, "li", childTypes[]]
+ *     taskItem:             [id, "ti", childTypes[]]
  *
- *   FALLBACK WORD SIGNALS (only when math is ambiguous):
- *     - firstWords, lastWords (sequence of strings)
+ *   `marks` is a compact object with non-zero entries only: {b?, i?, l?, c?}.
+ *   childTypes uses the same compact tags as the type position itself.
  *
- * Two sentences match deterministically if their tuples are equal (math + words).
- * Two blocks match deterministically if their sentence arrays are equal.
- * Splits/merges are detected via array prefix/suffix/concatenation of sentence tuples.
+ *   Derived at enrich time (never on disk): position, parentPosition,
+ *   ordinalInParent, charCount, sentenceCount, wordCount, prevType, nextType,
+ *   parentType. Each is a function of the array index + sibling tree at the
+ *   time of read.
  *
- * Prefix/suffix length 3 is chosen because:
- *   - 1 char (single first/last) collided too easily — "Bee ate the deck" vs
- *     "Bug ate the desk" hashed identically.
- *   - 3 chars captures the first/last whole short word in most sentences and
- *     reduces realistic collisions to near zero.
- *   - Longer prefixes (5+) approach "encoding the first word" rather than
- *     a math signal; we get diminishing returns past 3.
+ * Two sentences are equal iff their hashes are equal (string ==).
+ * Two blocks match exactly iff type matches, sentences arrays are equal,
+ * non-zero structureSig is equal, and any container child-type array is equal.
+ * Splits/merges are detected via prefix/suffix/concatenation of sentence arrays.
  *
  * adr: adr/node-identity-matcher.md
  */
-const WORD_FALLBACK_WINDOW = 5;
-const PREFIX_LEN = 3;
 const CONTAINER_TYPES = new Set([
     'bulletList',
     'orderedList',
@@ -44,26 +49,21 @@ const CONTAINER_TYPES = new Set([
     'listItem',
     'taskItem',
 ]);
+const ZERO_MARKS = { bold: 0, italic: 0, links: 0, code: 0 };
 /** Compute a fingerprint for a single block, given its position in the block list. */
 export function fingerprint(block, allBlocks) {
     const text = block.text || '';
-    const sentences = splitSentences(text);
-    const words = tokenizeWords(text);
+    const sentences = splitSentences(text).map(sentenceHash);
     const fp = {
         type: block.type,
         position: block.position,
         parentPosition: block.parentPosition,
         ordinalInParent: block.ordinalInParent,
-        charCount: text.length,
-        sentenceCount: sentences.length,
-        wordCount: words.length,
-        sentences: sentences.map(sentenceTuple),
+        sentences,
         structureSig: block.inlineMarks || { bold: 0, italic: 0, links: 0, code: 0 },
         prevType: allBlocks[block.position - 1]?.type || null,
         nextType: allBlocks[block.position + 1]?.type || null,
         parentType: block.parentPosition != null ? allBlocks[block.parentPosition]?.type ?? null : null,
-        firstWords: words.slice(0, WORD_FALLBACK_WINDOW),
-        lastWords: words.slice(-WORD_FALLBACK_WINDOW),
     };
     if (block.type === 'heading')
         fp.level = block.level;
@@ -78,23 +78,9 @@ export function fingerprint(block, allBlocks) {
     }
     return fp;
 }
-/**
- * Build the per-sentence tuple. Math fields (c, f, l, t, wls) form the primary
- * fingerprint. Prefix/suffix `f` and `l` are PREFIX_LEN chars each. Words (`w`)
- * are defense-in-depth for the rare case where math still collides under
- * richer prefixes.
- */
-function sentenceTuple(sentence) {
-    const t = sentence.text;
-    const words = tokenizeWords(t);
-    return {
-        c: t.length,
-        f: t.slice(0, PREFIX_LEN),
-        l: t.slice(-PREFIX_LEN),
-        t: sentence.terminator,
-        wls: words.map((w) => w.length),
-        w: words,
-    };
+/** Hash one sentence's text including its terminator so "X." and "X?" don't collide. */
+function sentenceHash(sentence) {
+    return simpleHash(sentence.text + sentence.terminator);
 }
 export function fingerprintAll(blocks) {
     return blocks.map((b) => fingerprint(b, blocks));
@@ -135,18 +121,20 @@ export function tokenizeWords(text) {
         .map((w) => w.replace(/^[^\w]+|[^\w]+$/g, ''))
         .filter((w) => w.length > 0);
 }
+/** 32-bit unsigned content hash → 1-8 hex chars, no sign prefix. */
 export function simpleHash(s) {
     let h = 0;
     for (let i = 0; i < s.length; i++) {
         h = ((h << 5) - h) + s.charCodeAt(i);
         h |= 0;
     }
-    return h.toString(16);
+    return (h >>> 0).toString(16);
 }
 /**
- * The strongest possible match: every math dimension equal AND word arrays
- * equal. Pure determinism — adversaries cannot fake exact match without
- * literally using the same content.
+ * Exact match: type + content fingerprint + structure agree. Used by Phase 1
+ * pinning and graveyard-restore. Sentence-array equality implies same sentence
+ * count and same content text; charCount/wordCount are redundant once hashes
+ * line up and have been removed from the Fingerprint shape.
  */
 export function isExactMatch(a, b) {
     if (a.type !== b.type)
@@ -155,12 +143,6 @@ export function isExactMatch(a, b) {
         return false;
     if (a.language !== b.language)
         return false;
-    if (a.charCount !== b.charCount)
-        return false;
-    if (a.sentenceCount !== b.sentenceCount)
-        return false;
-    if (a.wordCount !== b.wordCount)
-        return false;
     if (!sentenceArraysEqual(a.sentences, b.sentences))
         return false;
     if (!structureEqual(a.structureSig, b.structureSig))
@@ -195,19 +177,14 @@ export function sentenceArraysEqual(a, b) {
     if (a.length !== b.length)
         return false;
     for (let i = 0; i < a.length; i++) {
-        if (!sentenceTuplesEqual(a[i], b[i]))
+        if (a[i] !== b[i])
             return false;
     }
     return true;
 }
-/** Math + words full equality. The disambiguator for math collisions. */
+/** Backwards-compatible alias — sentence equality is now string equality. */
 export function sentenceTuplesEqual(a, b) {
-    return (a.c === b.c &&
-        a.f === b.f &&
-        a.l === b.l &&
-        a.t === b.t &&
-        arraysEqual(a.wls, b.wls) &&
-        arraysEqual(a.w, b.w));
+    return a === b;
 }
 export function isSentencePrefix(short, long) {
     if (!Array.isArray(short) || !Array.isArray(long))
@@ -215,7 +192,7 @@ export function isSentencePrefix(short, long) {
     if (short.length === 0 || short.length > long.length)
         return false;
     for (let i = 0; i < short.length; i++) {
-        if (!sentenceTuplesEqual(short[i], long[i]))
+        if (short[i] !== long[i])
             return false;
     }
     return true;
@@ -227,7 +204,7 @@ export function isSentenceSuffix(short, long) {
         return false;
     const offset = long.length - short.length;
     for (let i = 0; i < short.length; i++) {
-        if (!sentenceTuplesEqual(short[i], long[i + offset]))
+        if (short[i] !== long[i + offset])
             return false;
     }
     return true;
@@ -238,15 +215,312 @@ export function isSentenceConcat(combined, first, second) {
     if (combined.length !== first.length + second.length)
         return false;
     for (let i = 0; i < first.length; i++) {
-        if (!sentenceTuplesEqual(combined[i], first[i]))
+        if (combined[i] !== first[i])
             return false;
     }
     for (let i = 0; i < second.length; i++) {
-        if (!sentenceTuplesEqual(combined[first.length + i], second[i]))
+        if (combined[first.length + i] !== second[i])
             return false;
     }
     return true;
 }
+/** Compact type tags as written to disk. */
+const SHORT_TAG = {
+    bulletList: 'ul',
+    orderedList: 'ol',
+    taskList: 'tl',
+    blockquote: 'bq',
+    listItem: 'li',
+    taskItem: 'ti',
+    horizontalRule: 'hr',
+    image: 'img',
+    table: 'tbl',
+};
+const FULL_TYPE = {
+    ul: 'bulletList',
+    ol: 'orderedList',
+    tl: 'taskList',
+    bq: 'blockquote',
+    li: 'listItem',
+    ti: 'taskItem',
+    hr: 'horizontalRule',
+    img: 'image',
+    tbl: 'table',
+};
+function slimMarks(sig) {
+    if (!sig)
+        return null;
+    const out = {};
+    if (sig.bold)
+        out.b = sig.bold;
+    if (sig.italic)
+        out.i = sig.italic;
+    if (sig.links)
+        out.l = sig.links;
+    if (sig.code)
+        out.c = sig.code;
+    return Object.keys(out).length > 0 ? out : null;
+}
+function enrichMarks(raw) {
+    if (!raw || typeof raw !== 'object')
+        return { ...ZERO_MARKS };
+    return {
+        bold: raw.b || 0,
+        italic: raw.i || 0,
+        links: raw.l || 0,
+        code: raw.c || 0,
+    };
+}
+/** Encode a rich Fingerprint + id into the slim disk tuple. */
+export function slimEntry(id, fp) {
+    const marks = slimMarks(fp.structureSig);
+    if (fp.type === 'paragraph') {
+        const out = [id];
+        if (fp.sentences && fp.sentences.length > 0)
+            out.push(fp.sentences);
+        if (marks) {
+            if (out.length === 1)
+                out.push([]);
+            out.push(marks);
+        }
+        return out;
+    }
+    if (fp.type === 'heading') {
+        const tag = `h${fp.level || 1}`;
+        const out = [id, tag, fp.sentences || []];
+        if (marks)
+            out.push(marks);
+        return out;
+    }
+    if (fp.type === 'codeBlock') {
+        return [id, 'code', fp.language || '', fp.contentHash || ''];
+    }
+    if (CONTAINER_TYPES.has(fp.type)) {
+        const tag = SHORT_TAG[fp.type] || fp.type;
+        const out = [id, tag];
+        if (fp.childTypes && fp.childTypes.length > 0) {
+            out.push(fp.childTypes.map((t) => SHORT_TAG[t] || t));
+        }
+        return out;
+    }
+    // Atomic blocks: horizontalRule, image, table
+    const tag = SHORT_TAG[fp.type] || fp.type;
+    return [id, tag];
+}
+/**
+ * Decode a slim disk tuple's content fields (type, sentences, marks, etc.)
+ * without any positional/structural context. Used by both block-context
+ * enrichment (legacy fallback) and slim-array-walker enrichment.
+ */
+function decodeSlimTuple(slim) {
+    if (!Array.isArray(slim) || slim.length === 0 || typeof slim[0] !== 'string')
+        return null;
+    const id = slim[0];
+    const second = slim[1];
+    let type = 'paragraph';
+    let sentences = [];
+    let level;
+    let language;
+    let contentHash;
+    let childTypes;
+    let marksRaw = null;
+    if (slim.length === 1) {
+        // Empty paragraph
+    }
+    else if (Array.isArray(second)) {
+        // Paragraph with sentences
+        sentences = second.filter((x) => typeof x === 'string');
+        if (slim.length >= 3 && !Array.isArray(slim[2]))
+            marksRaw = slim[2];
+    }
+    else if (typeof second === 'string') {
+        const tag = second;
+        if (/^h([1-6])$/.test(tag)) {
+            type = 'heading';
+            level = parseInt(tag.slice(1), 10);
+            if (Array.isArray(slim[2]))
+                sentences = slim[2].filter((x) => typeof x === 'string');
+            if (slim.length >= 4 && !Array.isArray(slim[3]))
+                marksRaw = slim[3];
+        }
+        else if (tag === 'code') {
+            type = 'codeBlock';
+            language = typeof slim[2] === 'string' ? slim[2] : '';
+            contentHash = typeof slim[3] === 'string' ? slim[3] : '';
+        }
+        else if (FULL_TYPE[tag]) {
+            type = FULL_TYPE[tag];
+            if (CONTAINER_TYPES.has(type) && Array.isArray(slim[2])) {
+                childTypes = slim[2].map((t) => FULL_TYPE[t] || t);
+            }
+        }
+        else {
+            // Unknown short tag — carry through verbatim
+            type = tag;
+        }
+    }
+    return { id, type, sentences, level, language, contentHash, childTypes, structureSig: enrichMarks(marksRaw) };
+}
+/**
+ * Decode a slim disk tuple back into {id, fingerprint}. Block context (the
+ * block at this entry's position in the freshly-parsed body, plus the full
+ * block list for neighbor lookups) supplies the derived fields. If no block
+ * context is available (graveyard entries — deleted blocks have no body),
+ * derived position/neighbor fields default to safe values; matcher rules
+ * for graveyard restore only consult type + sentences + marks + childTypes,
+ * which are all carried in slim.
+ */
+export function enrichEntry(slim, block, allBlocks) {
+    const decoded = decodeSlimTuple(slim);
+    if (!decoded)
+        return null;
+    const { id, type, sentences, level, language, contentHash, childTypes, structureSig } = decoded;
+    // Derived from block context, with safe fallbacks for graveyard entries
+    // (where `block` is null — the deleted block is gone from the body).
+    const fingerprint = {
+        type,
+        position: block ? block.position : -1,
+        parentPosition: block ? block.parentPosition : null,
+        ordinalInParent: block ? block.ordinalInParent : undefined,
+        sentences,
+        structureSig,
+        prevType: block ? allBlocks[block.position - 1]?.type || null : null,
+        nextType: block ? allBlocks[block.position + 1]?.type || null : null,
+        parentType: block && block.parentPosition != null
+            ? allBlocks[block.parentPosition]?.type ?? null
+            : null,
+    };
+    if (type === 'heading' && level !== undefined)
+        fingerprint.level = level;
+    if (type === 'codeBlock') {
+        fingerprint.language = language || '';
+        fingerprint.contentHash = contentHash || '';
+    }
+    if (CONTAINER_TYPES.has(type)) {
+        if (childTypes !== undefined) {
+            fingerprint.childCount = childTypes.length;
+            fingerprint.childTypes = childTypes;
+        }
+        else if (block) {
+            // No childTypes in slim (older entry) — derive from current tree.
+            const children = allBlocks.filter((b) => b.parentPosition === block.position);
+            fingerprint.childCount = children.length;
+            fingerprint.childTypes = children.map((c) => c.type);
+        }
+        else {
+            fingerprint.childCount = 0;
+            fingerprint.childTypes = [];
+        }
+    }
+    return { id, fingerprint };
+}
+/**
+ * Enrich slim disk entries WITHOUT parsing the body. The slim array IS the
+ * previous state — position is the array index, parent is the most-recent
+ * unfilled container (tracked via stack), neighbor types come from slim[i±1].
+ *
+ * This avoids the O(N words) markdown re-parse that block-context enrichment
+ * needs for derived fields. The slim array already encodes everything; the
+ * body parse was reconstructing what was implicit.
+ */
+export function enrichSlimArray(slimList) {
+    const out = [];
+    // Stack of open containers: position + how many child slots declared + consumed so far.
+    // A container's children are the next `expected` entries that land while it's on the stack.
+    const stack = [];
+    // First pass: decode all tuples (we need types up-front for neighbor lookups).
+    const decoded = slimList.map((s) => decodeSlimTuple(s));
+    for (let i = 0; i < slimList.length; i++) {
+        const d = decoded[i];
+        if (!d)
+            continue;
+        // Pop containers whose declared child slots are fully consumed.
+        while (stack.length > 0 && stack[stack.length - 1].consumed >= stack[stack.length - 1].expected) {
+            stack.pop();
+        }
+        const parent = stack.length > 0 ? stack[stack.length - 1] : null;
+        const parentPosition = parent ? parent.position : null;
+        const parentType = parent ? parent.type : null;
+        const ordinalInParent = parent ? parent.consumed : i;
+        const prevType = i > 0 ? (decoded[i - 1]?.type ?? null) : null;
+        const nextType = i < slimList.length - 1 ? (decoded[i + 1]?.type ?? null) : null;
+        const fingerprint = {
+            type: d.type,
+            position: i,
+            parentPosition,
+            ordinalInParent,
+            sentences: d.sentences,
+            structureSig: d.structureSig,
+            prevType,
+            nextType,
+            parentType,
+        };
+        if (d.type === 'heading' && d.level !== undefined)
+            fingerprint.level = d.level;
+        if (d.type === 'codeBlock') {
+            fingerprint.language = d.language || '';
+            fingerprint.contentHash = d.contentHash || '';
+        }
+        if (CONTAINER_TYPES.has(d.type)) {
+            fingerprint.childCount = d.childTypes ? d.childTypes.length : 0;
+            fingerprint.childTypes = d.childTypes || [];
+        }
+        out.push({ id: d.id, fingerprint });
+        // Mark one of the parent's child slots as consumed.
+        if (parent)
+            parent.consumed++;
+        // If this entry is a container with declared children, push onto stack.
+        if (d.childTypes !== undefined && d.childTypes.length > 0) {
+            stack.push({ position: i, type: d.type, expected: d.childTypes.length, consumed: 0 });
+        }
+    }
+    return out;
+}
+/**
+ * Slim a list of {id, fingerprint} entries for disk. Containers and headings
+ * carry their type marker; paragraphs default. Caller passes them in the same
+ * order they appear in the block tree (matcher output naturally is).
+ */
+export function slimEntries(entries) {
+    return entries.map((e) => slimEntry(e.id, e.fingerprint));
+}
+/**
+ * Enrich slim disk entries against the freshly-parsed block list. Each slim
+ * entry at index i is paired with blocks[i]. Entries past the end of `blocks`
+ * use null context (typical for graveyard).
+ */
+export function enrichEntries(slimList, blocks) {
+    const out = [];
+    for (let i = 0; i < slimList.length; i++) {
+        const slim = slimList[i];
+        const block = blocks[i] || null;
+        const enriched = enrichEntry(slim, block, blocks);
+        if (enriched)
+            out.push(enriched);
+    }
+    return out;
+}
+// ----------------------------------------------------------------------
+// Legacy format detection (v0.14 / v0.15 → ultra-lean)
+// ----------------------------------------------------------------------
+/**
+ * Detect legacy (pre-ultra-lean) format at the frontmatter raw-parse layer.
+ * Legacy entries are objects with `id` + `fp` keys (or `firstWords`/`w`/`wls`
+ * within sentences). Ultra-lean entries are arrays. Mixed input is rare but
+ * tolerated by the legacy-migration path, which re-fingerprints positionally.
+ */
+export function isLegacyRawEntry(raw) {
+    return raw != null && typeof raw === 'object' && !Array.isArray(raw);
+}
+export function anyLegacyRaw(rawList) {
+    if (!Array.isArray(rawList))
+        return false;
+    for (const r of rawList) {
+        if (isLegacyRawEntry(r))
+            return true;
+    }
+    return false;
+}
 function arraysEqual(a, b) {
     if (!Array.isArray(a) || !Array.isArray(b))
         return false;

package/dist/server/node-matcher.js CHANGED Viewed

@@ -19,14 +19,14 @@
  *     - Insert (any block still unmatched → fresh ID)
  *   Phase 3: orphans = previousNodes entries no rule claimed (= deletes)
  *
- * Fingerprints use math signals (per-sentence char count, 3-char prefix/suffix,
- * terminator, word-length sequence) plus full word arrays for math-collision
- * disambiguation. Documented in node-fingerprint.ts.
+ * Fingerprints carry one tuple per sentence: char count, content hash,
+ * terminator type. Hash equality identifies "same sentence text" in 8 bytes.
+ * Documented in node-fingerprint.ts.
  *
  * adr: adr/node-identity-matcher.md
  */
 import { generateNodeId } from './helpers.js';
-import { fingerprintAll, isExactMatch, isSameContent, sentenceArraysEqual, sentenceTuplesEqual, } from './node-fingerprint.js';
+import { fingerprintAll, isExactMatch, isSameContent, sentenceArraysEqual, } from './node-fingerprint.js';
 /**
  * Run the matcher.
  *
@@ -459,36 +459,22 @@ function applySlotContinuityRule(unmatched, previousNodes, claimedPrevIds, pinne
     }
 }
 /**
- * Lightweight content overlap signal used by slot-continuity scoring.
- * Per sentence-pair: +1 f, +1 l, +1 t, +2 wls-equal, +3×shared-words,
- * +10 full word-array equality. Word-level overlap is the disambiguator
- * when math signals collide.
+ * Lightweight content overlap signal used by slot-continuity scoring to
+ * disambiguate between multiple candidate orphans in the same slot range.
+ *
+ * Per sentence pair across both blocks: +1 for each hash that appears in
+ * both arrays. Since hashes fold sentence text + terminator together, this
+ * counts the number of fully-shared sentences between the two blocks — the
+ * matcher's only meaningful similarity question.
  */
 function sentenceSignalOverlapScore(a, b) {
     if (!a.sentences || !b.sentences)
         return 0;
+    const seen = new Set(a.sentences);
     let score = 0;
-    for (const sa of a.sentences) {
-        for (const sb of b.sentences) {
-            if (sa.f === sb.f)
-                score++;
-            if (sa.l === sb.l)
-                score++;
-            if (sa.t === sb.t)
-                score++;
-            if (arraysEqual(sa.wls, sb.wls))
-                score += 2;
-            if (Array.isArray(sa.w) && Array.isArray(sb.w)) {
-                const aSet = new Set(sa.w);
-                let shared = 0;
-                for (const w of sb.w)
-                    if (aSet.has(w))
-                        shared++;
-                score += shared * 3;
-                if (arraysEqual(sa.w, sb.w))
-                    score += 10;
-            }
-        }
+    for (const h of b.sentences) {
+        if (seen.has(h))
+            score++;
     }
     return score;
 }
@@ -596,21 +582,10 @@ function slotHighBound(previousNodes, claimedPrevIds, pinned, orphanIdx) {
 function shareAnySentenceTuple(a, b) {
     if (!Array.isArray(a) || !Array.isArray(b))
         return false;
-    for (const sa of a) {
-        for (const sb of b) {
-            if (sentenceTuplesEqual(sa, sb))
-                return true;
-        }
+    const seen = new Set(a);
+    for (const sb of b) {
+        if (seen.has(sb))
+            return true;
     }
     return false;
 }
-function arraysEqual(a, b) {
-    if (!Array.isArray(a) || !Array.isArray(b))
-        return false;
-    if (a.length !== b.length)
-        return false;
-    for (let i = 0; i < a.length; i++)
-        if (a[i] !== b[i])
-            return false;
-    return true;
-}

package/dist/server/pending-overlay.js CHANGED Viewed

@@ -630,6 +630,23 @@ export function applyOverlayPure(canonical, entries) {
     }
     // Inserts: idempotency check FIRST. If a node with this ID already exists,
     // refresh its pending marker but do NOT splice another copy.
+    //
+    // Idempotency MUST account for descendant IDs too: when a container entry
+    // places its newContent (a subtree of listItems/paragraphs/etc.), those
+    // descendants land in canonical but aren't in nodeById until we re-index.
+    // Without that, the descendants' own entries don't see the existing
+    // placement and would splice duplicate copies. adr: adr/pending-overlay-model.md
+    function indexSubtree(node) {
+        if (!node)
+            return;
+        const id = node?.attrs?.id;
+        if (id)
+            nodeById.set(id, node);
+        if (Array.isArray(node?.content)) {
+            for (const child of node.content)
+                indexSubtree(child);
+        }
+    }
     for (const entry of entries) {
         if (entry.status !== 'insert')
             continue;
@@ -654,7 +671,7 @@ export function applyOverlayPure(canonical, entries) {
             const loc = findNodeWithParent(entry.afterNodeId);
             if (loc) {
                 loc.parent.splice(loc.index + 1, 0, newNode);
-                nodeById.set(entry.nodeId, newNode);
+                indexSubtree(newNode);
                 placed = true;
             }
         }
@@ -664,21 +681,21 @@ export function applyOverlayPure(canonical, entries) {
                 const parent = parentLoc.parent[parentLoc.index];
                 parent.content = parent.content || [];
                 parent.content.unshift(newNode);
-                nodeById.set(entry.nodeId, newNode);
+                indexSubtree(newNode);
                 placed = true;
             }
         }
         if (!placed && entry.afterNodeId === null && entry.parentNodeId === null) {
             merged.content = merged.content || [];
             merged.content.unshift(newNode);
-            nodeById.set(entry.nodeId, newNode);
+            indexSubtree(newNode);
             placed = true;
         }
         if (!placed) {
             newNode.attrs.pendingOrphan = true;
             merged.content = merged.content || [];
             merged.content.push(newNode);
-            nodeById.set(entry.nodeId, newNode);
+            indexSubtree(newNode);
         }
     }
     return merged;