openwriter 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/dist/client/assets/index-0ttVnjRp.css +1 -0
  2. package/dist/client/assets/{index-B5MXw2pg.js → index-BZ7LCzrR.js} +64 -64
  3. package/dist/client/index.html +2 -2
  4. package/dist/plugins/authors-voice/dist/index.d.ts +41 -0
  5. package/dist/plugins/authors-voice/dist/index.js +206 -0
  6. package/dist/plugins/authors-voice/package.json +23 -0
  7. package/dist/plugins/image-gen/dist/index.d.ts +35 -0
  8. package/dist/plugins/image-gen/dist/index.js +141 -0
  9. package/dist/plugins/image-gen/package.json +26 -0
  10. package/dist/plugins/publish/dist/helpers.d.ts +66 -0
  11. package/dist/plugins/publish/dist/helpers.js +199 -0
  12. package/dist/plugins/publish/dist/index.d.ts +3 -0
  13. package/dist/plugins/publish/dist/index.js +1130 -0
  14. package/dist/plugins/publish/dist/newsletter-tools.d.ts +2 -0
  15. package/dist/plugins/publish/dist/newsletter-tools.js +394 -0
  16. package/dist/plugins/publish/package.json +31 -0
  17. package/dist/plugins/x-api/dist/index.d.ts +27 -0
  18. package/dist/plugins/x-api/dist/index.js +240 -0
  19. package/dist/plugins/x-api/package.json +27 -0
  20. package/dist/server/compact.js +28 -2
  21. package/dist/server/documents.js +234 -3
  22. package/dist/server/enrichment.js +125 -0
  23. package/dist/server/export-routes.js +2 -0
  24. package/dist/server/install-skill.js +15 -0
  25. package/dist/server/markdown-parse.js +153 -14
  26. package/dist/server/markdown-serialize.js +100 -17
  27. package/dist/server/mcp.js +291 -25
  28. package/dist/server/node-blocks.js +41 -1
  29. package/dist/server/node-fingerprint.js +347 -73
  30. package/dist/server/node-matcher.js +19 -44
  31. package/dist/server/pending-overlay.js +21 -4
  32. package/dist/server/state.js +225 -41
  33. package/dist/server/workspaces.js +27 -5
  34. package/dist/server/ws.js +10 -0
  35. package/package.json +2 -1
  36. package/skill/SKILL.md +38 -7
  37. package/skill/agents/openwriter-enrichment-minion.md +177 -0
  38. package/skill/docs/enrichment.md +179 -0
  39. package/skill/docs/footnotes.md +178 -0
  40. package/dist/client/assets/index-B3iORmCT.css +0 -1
@@ -1,40 +1,45 @@
1
1
  /**
2
2
  * Per-block fingerprint computation for node identity tracking.
3
3
  *
4
- * The math-first signal hierarchy (push math before words):
4
+ * In-memory shape (Fingerprint) is rich — it carries position, neighbor types,
5
+ * counts, container children, etc. so matcher rules read what they need
6
+ * directly. Disk shape is ultra-lean — only fields the matcher cannot
7
+ * recompute from the body tree + per-block stored signals get persisted.
5
8
  *
6
- * MATH SIGNALS (exact integers and chars):
7
- * - type, charCount, sentenceCount, wordCount, level, language, structureSig
8
- * - sentences[]: per-sentence tuples {c, f, l, t, wls, w}
9
- * c = char count (excluding terminator + trailing space)
10
- * f = first PREFIX_LEN chars of sentence (3-char prefix)
11
- * l = last PREFIX_LEN chars before terminator (3-char suffix)
12
- * t = terminator type ('D'|'E'|'Q'|'-')
13
- * wls = word length sequence (array of integers)
14
- * w = word array — defense-in-depth disambiguator when math collides
9
+ * PER-SENTENCE: bare hash string. simpleHash(text + terminator) folds the
10
+ * terminator type into the hash, so "Hello?" and "Hello." produce distinct
11
+ * hashes without needing a separate field. Unsigned hex, up to 8 chars.
15
12
  *
16
- * CONTEXT SIGNALS:
17
- * - prevType, nextType, parentType
13
+ * PER-BLOCK on disk (tuple array, position-indexed):
14
+ * paragraph (default): [id, sentences[], marks?]
15
+ * empty paragraph: [id]
16
+ * heading: [id, "h1".."h6", sentences[], marks?]
17
+ * codeBlock: [id, "code", language, contentHash]
18
+ * horizontalRule: [id, "hr"]
19
+ * image: [id, "img"]
20
+ * table: [id, "tbl"]
21
+ * bulletList: [id, "ul", childTypes[]]
22
+ * orderedList: [id, "ol", childTypes[]]
23
+ * taskList: [id, "tl", childTypes[]]
24
+ * blockquote: [id, "bq", childTypes[]]
25
+ * listItem: [id, "li", childTypes[]]
26
+ * taskItem: [id, "ti", childTypes[]]
18
27
  *
19
- * FALLBACK WORD SIGNALS (only when math is ambiguous):
20
- * - firstWords, lastWords (sequence of strings)
28
+ * `marks` is a compact object with non-zero entries only: {b?, i?, l?, c?}.
29
+ * childTypes uses the same compact tags as the type position itself.
21
30
  *
22
- * Two sentences match deterministically if their tuples are equal (math + words).
23
- * Two blocks match deterministically if their sentence arrays are equal.
24
- * Splits/merges are detected via array prefix/suffix/concatenation of sentence tuples.
31
+ * Derived at enrich time (never on disk): position, parentPosition,
32
+ * ordinalInParent, charCount, sentenceCount, wordCount, prevType, nextType,
33
+ * parentType. Each is a function of the array index + sibling tree at the
34
+ * time of read.
25
35
  *
26
- * Prefix/suffix length 3 is chosen because:
27
- * - 1 char (single first/last) collided too easily "Bee ate the deck" vs
28
- * "Bug ate the desk" hashed identically.
29
- * - 3 chars captures the first/last whole short word in most sentences and
30
- * reduces realistic collisions to near zero.
31
- * - Longer prefixes (5+) approach "encoding the first word" rather than
32
- * a math signal; we get diminishing returns past 3.
36
+ * Two sentences are equal iff their hashes are equal (string ==).
37
+ * Two blocks match exactly iff type matches, sentences arrays are equal,
38
+ * non-zero structureSig is equal, and any container child-type array is equal.
39
+ * Splits/merges are detected via prefix/suffix/concatenation of sentence arrays.
33
40
  *
34
41
  * adr: adr/node-identity-matcher.md
35
42
  */
36
- const WORD_FALLBACK_WINDOW = 5;
37
- const PREFIX_LEN = 3;
38
43
  const CONTAINER_TYPES = new Set([
39
44
  'bulletList',
40
45
  'orderedList',
@@ -44,26 +49,21 @@ const CONTAINER_TYPES = new Set([
44
49
  'listItem',
45
50
  'taskItem',
46
51
  ]);
52
+ const ZERO_MARKS = { bold: 0, italic: 0, links: 0, code: 0 };
47
53
  /** Compute a fingerprint for a single block, given its position in the block list. */
48
54
  export function fingerprint(block, allBlocks) {
49
55
  const text = block.text || '';
50
- const sentences = splitSentences(text);
51
- const words = tokenizeWords(text);
56
+ const sentences = splitSentences(text).map(sentenceHash);
52
57
  const fp = {
53
58
  type: block.type,
54
59
  position: block.position,
55
60
  parentPosition: block.parentPosition,
56
61
  ordinalInParent: block.ordinalInParent,
57
- charCount: text.length,
58
- sentenceCount: sentences.length,
59
- wordCount: words.length,
60
- sentences: sentences.map(sentenceTuple),
62
+ sentences,
61
63
  structureSig: block.inlineMarks || { bold: 0, italic: 0, links: 0, code: 0 },
62
64
  prevType: allBlocks[block.position - 1]?.type || null,
63
65
  nextType: allBlocks[block.position + 1]?.type || null,
64
66
  parentType: block.parentPosition != null ? allBlocks[block.parentPosition]?.type ?? null : null,
65
- firstWords: words.slice(0, WORD_FALLBACK_WINDOW),
66
- lastWords: words.slice(-WORD_FALLBACK_WINDOW),
67
67
  };
68
68
  if (block.type === 'heading')
69
69
  fp.level = block.level;
@@ -78,23 +78,9 @@ export function fingerprint(block, allBlocks) {
78
78
  }
79
79
  return fp;
80
80
  }
81
- /**
82
- * Build the per-sentence tuple. Math fields (c, f, l, t, wls) form the primary
83
- * fingerprint. Prefix/suffix `f` and `l` are PREFIX_LEN chars each. Words (`w`)
84
- * are defense-in-depth for the rare case where math still collides under
85
- * richer prefixes.
86
- */
87
- function sentenceTuple(sentence) {
88
- const t = sentence.text;
89
- const words = tokenizeWords(t);
90
- return {
91
- c: t.length,
92
- f: t.slice(0, PREFIX_LEN),
93
- l: t.slice(-PREFIX_LEN),
94
- t: sentence.terminator,
95
- wls: words.map((w) => w.length),
96
- w: words,
97
- };
81
+ /** Hash one sentence's text including its terminator so "X." and "X?" don't collide. */
82
+ function sentenceHash(sentence) {
83
+ return simpleHash(sentence.text + sentence.terminator);
98
84
  }
99
85
  export function fingerprintAll(blocks) {
100
86
  return blocks.map((b) => fingerprint(b, blocks));
@@ -135,18 +121,20 @@ export function tokenizeWords(text) {
135
121
  .map((w) => w.replace(/^[^\w]+|[^\w]+$/g, ''))
136
122
  .filter((w) => w.length > 0);
137
123
  }
124
+ /** 32-bit unsigned content hash → 1-8 hex chars, no sign prefix. */
138
125
  export function simpleHash(s) {
139
126
  let h = 0;
140
127
  for (let i = 0; i < s.length; i++) {
141
128
  h = ((h << 5) - h) + s.charCodeAt(i);
142
129
  h |= 0;
143
130
  }
144
- return h.toString(16);
131
+ return (h >>> 0).toString(16);
145
132
  }
146
133
  /**
147
- * The strongest possible match: every math dimension equal AND word arrays
148
- * equal. Pure determinism adversaries cannot fake exact match without
149
- * literally using the same content.
134
+ * Exact match: type + content fingerprint + structure agree. Used by Phase 1
135
+ * pinning and graveyard-restore. Sentence-array equality implies same sentence
136
+ * count and same content text; charCount/wordCount are redundant once hashes
137
+ * line up and have been removed from the Fingerprint shape.
150
138
  */
151
139
  export function isExactMatch(a, b) {
152
140
  if (a.type !== b.type)
@@ -155,12 +143,6 @@ export function isExactMatch(a, b) {
155
143
  return false;
156
144
  if (a.language !== b.language)
157
145
  return false;
158
- if (a.charCount !== b.charCount)
159
- return false;
160
- if (a.sentenceCount !== b.sentenceCount)
161
- return false;
162
- if (a.wordCount !== b.wordCount)
163
- return false;
164
146
  if (!sentenceArraysEqual(a.sentences, b.sentences))
165
147
  return false;
166
148
  if (!structureEqual(a.structureSig, b.structureSig))
@@ -195,19 +177,14 @@ export function sentenceArraysEqual(a, b) {
195
177
  if (a.length !== b.length)
196
178
  return false;
197
179
  for (let i = 0; i < a.length; i++) {
198
- if (!sentenceTuplesEqual(a[i], b[i]))
180
+ if (a[i] !== b[i])
199
181
  return false;
200
182
  }
201
183
  return true;
202
184
  }
203
- /** Math + words full equality. The disambiguator for math collisions. */
185
+ /** Backwards-compatible alias sentence equality is now string equality. */
204
186
  export function sentenceTuplesEqual(a, b) {
205
- return (a.c === b.c &&
206
- a.f === b.f &&
207
- a.l === b.l &&
208
- a.t === b.t &&
209
- arraysEqual(a.wls, b.wls) &&
210
- arraysEqual(a.w, b.w));
187
+ return a === b;
211
188
  }
212
189
  export function isSentencePrefix(short, long) {
213
190
  if (!Array.isArray(short) || !Array.isArray(long))
@@ -215,7 +192,7 @@ export function isSentencePrefix(short, long) {
215
192
  if (short.length === 0 || short.length > long.length)
216
193
  return false;
217
194
  for (let i = 0; i < short.length; i++) {
218
- if (!sentenceTuplesEqual(short[i], long[i]))
195
+ if (short[i] !== long[i])
219
196
  return false;
220
197
  }
221
198
  return true;
@@ -227,7 +204,7 @@ export function isSentenceSuffix(short, long) {
227
204
  return false;
228
205
  const offset = long.length - short.length;
229
206
  for (let i = 0; i < short.length; i++) {
230
- if (!sentenceTuplesEqual(short[i], long[i + offset]))
207
+ if (short[i] !== long[i + offset])
231
208
  return false;
232
209
  }
233
210
  return true;
@@ -238,15 +215,312 @@ export function isSentenceConcat(combined, first, second) {
238
215
  if (combined.length !== first.length + second.length)
239
216
  return false;
240
217
  for (let i = 0; i < first.length; i++) {
241
- if (!sentenceTuplesEqual(combined[i], first[i]))
218
+ if (combined[i] !== first[i])
242
219
  return false;
243
220
  }
244
221
  for (let i = 0; i < second.length; i++) {
245
- if (!sentenceTuplesEqual(combined[first.length + i], second[i]))
222
+ if (combined[first.length + i] !== second[i])
246
223
  return false;
247
224
  }
248
225
  return true;
249
226
  }
227
+ /** Compact type tags as written to disk. */
228
+ const SHORT_TAG = {
229
+ bulletList: 'ul',
230
+ orderedList: 'ol',
231
+ taskList: 'tl',
232
+ blockquote: 'bq',
233
+ listItem: 'li',
234
+ taskItem: 'ti',
235
+ horizontalRule: 'hr',
236
+ image: 'img',
237
+ table: 'tbl',
238
+ };
239
+ const FULL_TYPE = {
240
+ ul: 'bulletList',
241
+ ol: 'orderedList',
242
+ tl: 'taskList',
243
+ bq: 'blockquote',
244
+ li: 'listItem',
245
+ ti: 'taskItem',
246
+ hr: 'horizontalRule',
247
+ img: 'image',
248
+ tbl: 'table',
249
+ };
250
+ function slimMarks(sig) {
251
+ if (!sig)
252
+ return null;
253
+ const out = {};
254
+ if (sig.bold)
255
+ out.b = sig.bold;
256
+ if (sig.italic)
257
+ out.i = sig.italic;
258
+ if (sig.links)
259
+ out.l = sig.links;
260
+ if (sig.code)
261
+ out.c = sig.code;
262
+ return Object.keys(out).length > 0 ? out : null;
263
+ }
264
+ function enrichMarks(raw) {
265
+ if (!raw || typeof raw !== 'object')
266
+ return { ...ZERO_MARKS };
267
+ return {
268
+ bold: raw.b || 0,
269
+ italic: raw.i || 0,
270
+ links: raw.l || 0,
271
+ code: raw.c || 0,
272
+ };
273
+ }
274
+ /** Encode a rich Fingerprint + id into the slim disk tuple. */
275
+ export function slimEntry(id, fp) {
276
+ const marks = slimMarks(fp.structureSig);
277
+ if (fp.type === 'paragraph') {
278
+ const out = [id];
279
+ if (fp.sentences && fp.sentences.length > 0)
280
+ out.push(fp.sentences);
281
+ if (marks) {
282
+ if (out.length === 1)
283
+ out.push([]);
284
+ out.push(marks);
285
+ }
286
+ return out;
287
+ }
288
+ if (fp.type === 'heading') {
289
+ const tag = `h${fp.level || 1}`;
290
+ const out = [id, tag, fp.sentences || []];
291
+ if (marks)
292
+ out.push(marks);
293
+ return out;
294
+ }
295
+ if (fp.type === 'codeBlock') {
296
+ return [id, 'code', fp.language || '', fp.contentHash || ''];
297
+ }
298
+ if (CONTAINER_TYPES.has(fp.type)) {
299
+ const tag = SHORT_TAG[fp.type] || fp.type;
300
+ const out = [id, tag];
301
+ if (fp.childTypes && fp.childTypes.length > 0) {
302
+ out.push(fp.childTypes.map((t) => SHORT_TAG[t] || t));
303
+ }
304
+ return out;
305
+ }
306
+ // Atomic blocks: horizontalRule, image, table
307
+ const tag = SHORT_TAG[fp.type] || fp.type;
308
+ return [id, tag];
309
+ }
310
+ /**
311
+ * Decode a slim disk tuple's content fields (type, sentences, marks, etc.)
312
+ * without any positional/structural context. Used by both block-context
313
+ * enrichment (legacy fallback) and slim-array-walker enrichment.
314
+ */
315
+ function decodeSlimTuple(slim) {
316
+ if (!Array.isArray(slim) || slim.length === 0 || typeof slim[0] !== 'string')
317
+ return null;
318
+ const id = slim[0];
319
+ const second = slim[1];
320
+ let type = 'paragraph';
321
+ let sentences = [];
322
+ let level;
323
+ let language;
324
+ let contentHash;
325
+ let childTypes;
326
+ let marksRaw = null;
327
+ if (slim.length === 1) {
328
+ // Empty paragraph
329
+ }
330
+ else if (Array.isArray(second)) {
331
+ // Paragraph with sentences
332
+ sentences = second.filter((x) => typeof x === 'string');
333
+ if (slim.length >= 3 && !Array.isArray(slim[2]))
334
+ marksRaw = slim[2];
335
+ }
336
+ else if (typeof second === 'string') {
337
+ const tag = second;
338
+ if (/^h([1-6])$/.test(tag)) {
339
+ type = 'heading';
340
+ level = parseInt(tag.slice(1), 10);
341
+ if (Array.isArray(slim[2]))
342
+ sentences = slim[2].filter((x) => typeof x === 'string');
343
+ if (slim.length >= 4 && !Array.isArray(slim[3]))
344
+ marksRaw = slim[3];
345
+ }
346
+ else if (tag === 'code') {
347
+ type = 'codeBlock';
348
+ language = typeof slim[2] === 'string' ? slim[2] : '';
349
+ contentHash = typeof slim[3] === 'string' ? slim[3] : '';
350
+ }
351
+ else if (FULL_TYPE[tag]) {
352
+ type = FULL_TYPE[tag];
353
+ if (CONTAINER_TYPES.has(type) && Array.isArray(slim[2])) {
354
+ childTypes = slim[2].map((t) => FULL_TYPE[t] || t);
355
+ }
356
+ }
357
+ else {
358
+ // Unknown short tag — carry through verbatim
359
+ type = tag;
360
+ }
361
+ }
362
+ return { id, type, sentences, level, language, contentHash, childTypes, structureSig: enrichMarks(marksRaw) };
363
+ }
364
+ /**
365
+ * Decode a slim disk tuple back into {id, fingerprint}. Block context (the
366
+ * block at this entry's position in the freshly-parsed body, plus the full
367
+ * block list for neighbor lookups) supplies the derived fields. If no block
368
+ * context is available (graveyard entries — deleted blocks have no body),
369
+ * derived position/neighbor fields default to safe values; matcher rules
370
+ * for graveyard restore only consult type + sentences + marks + childTypes,
371
+ * which are all carried in slim.
372
+ */
373
+ export function enrichEntry(slim, block, allBlocks) {
374
+ const decoded = decodeSlimTuple(slim);
375
+ if (!decoded)
376
+ return null;
377
+ const { id, type, sentences, level, language, contentHash, childTypes, structureSig } = decoded;
378
+ // Derived from block context, with safe fallbacks for graveyard entries
379
+ // (where `block` is null — the deleted block is gone from the body).
380
+ const fingerprint = {
381
+ type,
382
+ position: block ? block.position : -1,
383
+ parentPosition: block ? block.parentPosition : null,
384
+ ordinalInParent: block ? block.ordinalInParent : undefined,
385
+ sentences,
386
+ structureSig,
387
+ prevType: block ? allBlocks[block.position - 1]?.type || null : null,
388
+ nextType: block ? allBlocks[block.position + 1]?.type || null : null,
389
+ parentType: block && block.parentPosition != null
390
+ ? allBlocks[block.parentPosition]?.type ?? null
391
+ : null,
392
+ };
393
+ if (type === 'heading' && level !== undefined)
394
+ fingerprint.level = level;
395
+ if (type === 'codeBlock') {
396
+ fingerprint.language = language || '';
397
+ fingerprint.contentHash = contentHash || '';
398
+ }
399
+ if (CONTAINER_TYPES.has(type)) {
400
+ if (childTypes !== undefined) {
401
+ fingerprint.childCount = childTypes.length;
402
+ fingerprint.childTypes = childTypes;
403
+ }
404
+ else if (block) {
405
+ // No childTypes in slim (older entry) — derive from current tree.
406
+ const children = allBlocks.filter((b) => b.parentPosition === block.position);
407
+ fingerprint.childCount = children.length;
408
+ fingerprint.childTypes = children.map((c) => c.type);
409
+ }
410
+ else {
411
+ fingerprint.childCount = 0;
412
+ fingerprint.childTypes = [];
413
+ }
414
+ }
415
+ return { id, fingerprint };
416
+ }
417
+ /**
418
+ * Enrich slim disk entries WITHOUT parsing the body. The slim array IS the
419
+ * previous state — position is the array index, parent is the most-recent
420
+ * unfilled container (tracked via stack), neighbor types come from slim[i±1].
421
+ *
422
+ * This avoids the O(N words) markdown re-parse that block-context enrichment
423
+ * needs for derived fields. The slim array already encodes everything; the
424
+ * body parse was reconstructing what was implicit.
425
+ */
426
+ export function enrichSlimArray(slimList) {
427
+ const out = [];
428
+ // Stack of open containers: position + how many child slots declared + consumed so far.
429
+ // A container's children are the next `expected` entries that land while it's on the stack.
430
+ const stack = [];
431
+ // First pass: decode all tuples (we need types up-front for neighbor lookups).
432
+ const decoded = slimList.map((s) => decodeSlimTuple(s));
433
+ for (let i = 0; i < slimList.length; i++) {
434
+ const d = decoded[i];
435
+ if (!d)
436
+ continue;
437
+ // Pop containers whose declared child slots are fully consumed.
438
+ while (stack.length > 0 && stack[stack.length - 1].consumed >= stack[stack.length - 1].expected) {
439
+ stack.pop();
440
+ }
441
+ const parent = stack.length > 0 ? stack[stack.length - 1] : null;
442
+ const parentPosition = parent ? parent.position : null;
443
+ const parentType = parent ? parent.type : null;
444
+ const ordinalInParent = parent ? parent.consumed : i;
445
+ const prevType = i > 0 ? (decoded[i - 1]?.type ?? null) : null;
446
+ const nextType = i < slimList.length - 1 ? (decoded[i + 1]?.type ?? null) : null;
447
+ const fingerprint = {
448
+ type: d.type,
449
+ position: i,
450
+ parentPosition,
451
+ ordinalInParent,
452
+ sentences: d.sentences,
453
+ structureSig: d.structureSig,
454
+ prevType,
455
+ nextType,
456
+ parentType,
457
+ };
458
+ if (d.type === 'heading' && d.level !== undefined)
459
+ fingerprint.level = d.level;
460
+ if (d.type === 'codeBlock') {
461
+ fingerprint.language = d.language || '';
462
+ fingerprint.contentHash = d.contentHash || '';
463
+ }
464
+ if (CONTAINER_TYPES.has(d.type)) {
465
+ fingerprint.childCount = d.childTypes ? d.childTypes.length : 0;
466
+ fingerprint.childTypes = d.childTypes || [];
467
+ }
468
+ out.push({ id: d.id, fingerprint });
469
+ // Mark one of the parent's child slots as consumed.
470
+ if (parent)
471
+ parent.consumed++;
472
+ // If this entry is a container with declared children, push onto stack.
473
+ if (d.childTypes !== undefined && d.childTypes.length > 0) {
474
+ stack.push({ position: i, type: d.type, expected: d.childTypes.length, consumed: 0 });
475
+ }
476
+ }
477
+ return out;
478
+ }
479
+ /**
480
+ * Slim a list of {id, fingerprint} entries for disk. Containers and headings
481
+ * carry their type marker; paragraphs default. Caller passes them in the same
482
+ * order they appear in the block tree (matcher output naturally is).
483
+ */
484
+ export function slimEntries(entries) {
485
+ return entries.map((e) => slimEntry(e.id, e.fingerprint));
486
+ }
487
+ /**
488
+ * Enrich slim disk entries against the freshly-parsed block list. Each slim
489
+ * entry at index i is paired with blocks[i]. Entries past the end of `blocks`
490
+ * use null context (typical for graveyard).
491
+ */
492
+ export function enrichEntries(slimList, blocks) {
493
+ const out = [];
494
+ for (let i = 0; i < slimList.length; i++) {
495
+ const slim = slimList[i];
496
+ const block = blocks[i] || null;
497
+ const enriched = enrichEntry(slim, block, blocks);
498
+ if (enriched)
499
+ out.push(enriched);
500
+ }
501
+ return out;
502
+ }
503
+ // ----------------------------------------------------------------------
504
+ // Legacy format detection (v0.14 / v0.15 → ultra-lean)
505
+ // ----------------------------------------------------------------------
506
+ /**
507
+ * Detect legacy (pre-ultra-lean) format at the frontmatter raw-parse layer.
508
+ * Legacy entries are objects with `id` + `fp` keys (or `firstWords`/`w`/`wls`
509
+ * within sentences). Ultra-lean entries are arrays. Mixed input is rare but
510
+ * tolerated by the legacy-migration path, which re-fingerprints positionally.
511
+ */
512
+ export function isLegacyRawEntry(raw) {
513
+ return raw != null && typeof raw === 'object' && !Array.isArray(raw);
514
+ }
515
+ export function anyLegacyRaw(rawList) {
516
+ if (!Array.isArray(rawList))
517
+ return false;
518
+ for (const r of rawList) {
519
+ if (isLegacyRawEntry(r))
520
+ return true;
521
+ }
522
+ return false;
523
+ }
250
524
  function arraysEqual(a, b) {
251
525
  if (!Array.isArray(a) || !Array.isArray(b))
252
526
  return false;
@@ -19,14 +19,14 @@
19
19
  * - Insert (any block still unmatched → fresh ID)
20
20
  * Phase 3: orphans = previousNodes entries no rule claimed (= deletes)
21
21
  *
22
- * Fingerprints use math signals (per-sentence char count, 3-char prefix/suffix,
23
- * terminator, word-length sequence) plus full word arrays for math-collision
24
- * disambiguation. Documented in node-fingerprint.ts.
22
+ * Fingerprints carry one tuple per sentence: char count, content hash,
23
+ * terminator type. Hash equality identifies "same sentence text" in 8 bytes.
24
+ * Documented in node-fingerprint.ts.
25
25
  *
26
26
  * adr: adr/node-identity-matcher.md
27
27
  */
28
28
  import { generateNodeId } from './helpers.js';
29
- import { fingerprintAll, isExactMatch, isSameContent, sentenceArraysEqual, sentenceTuplesEqual, } from './node-fingerprint.js';
29
+ import { fingerprintAll, isExactMatch, isSameContent, sentenceArraysEqual, } from './node-fingerprint.js';
30
30
  /**
31
31
  * Run the matcher.
32
32
  *
@@ -459,36 +459,22 @@ function applySlotContinuityRule(unmatched, previousNodes, claimedPrevIds, pinne
459
459
  }
460
460
  }
461
461
  /**
462
- * Lightweight content overlap signal used by slot-continuity scoring.
463
- * Per sentence-pair: +1 f, +1 l, +1 t, +2 wls-equal, +3×shared-words,
464
- * +10 full word-array equality. Word-level overlap is the disambiguator
465
- * when math signals collide.
462
+ * Lightweight content overlap signal used by slot-continuity scoring to
463
+ * disambiguate between multiple candidate orphans in the same slot range.
464
+ *
465
+ * Per sentence pair across both blocks: +1 for each hash that appears in
466
+ * both arrays. Since hashes fold sentence text + terminator together, this
467
+ * counts the number of fully-shared sentences between the two blocks — the
468
+ * matcher's only meaningful similarity question.
466
469
  */
467
470
  function sentenceSignalOverlapScore(a, b) {
468
471
  if (!a.sentences || !b.sentences)
469
472
  return 0;
473
+ const seen = new Set(a.sentences);
470
474
  let score = 0;
471
- for (const sa of a.sentences) {
472
- for (const sb of b.sentences) {
473
- if (sa.f === sb.f)
474
- score++;
475
- if (sa.l === sb.l)
476
- score++;
477
- if (sa.t === sb.t)
478
- score++;
479
- if (arraysEqual(sa.wls, sb.wls))
480
- score += 2;
481
- if (Array.isArray(sa.w) && Array.isArray(sb.w)) {
482
- const aSet = new Set(sa.w);
483
- let shared = 0;
484
- for (const w of sb.w)
485
- if (aSet.has(w))
486
- shared++;
487
- score += shared * 3;
488
- if (arraysEqual(sa.w, sb.w))
489
- score += 10;
490
- }
491
- }
475
+ for (const h of b.sentences) {
476
+ if (seen.has(h))
477
+ score++;
492
478
  }
493
479
  return score;
494
480
  }
@@ -596,21 +582,10 @@ function slotHighBound(previousNodes, claimedPrevIds, pinned, orphanIdx) {
596
582
  function shareAnySentenceTuple(a, b) {
597
583
  if (!Array.isArray(a) || !Array.isArray(b))
598
584
  return false;
599
- for (const sa of a) {
600
- for (const sb of b) {
601
- if (sentenceTuplesEqual(sa, sb))
602
- return true;
603
- }
585
+ const seen = new Set(a);
586
+ for (const sb of b) {
587
+ if (seen.has(sb))
588
+ return true;
604
589
  }
605
590
  return false;
606
591
  }
607
- function arraysEqual(a, b) {
608
- if (!Array.isArray(a) || !Array.isArray(b))
609
- return false;
610
- if (a.length !== b.length)
611
- return false;
612
- for (let i = 0; i < a.length; i++)
613
- if (a[i] !== b[i])
614
- return false;
615
- return true;
616
- }
@@ -630,6 +630,23 @@ export function applyOverlayPure(canonical, entries) {
630
630
  }
631
631
  // Inserts: idempotency check FIRST. If a node with this ID already exists,
632
632
  // refresh its pending marker but do NOT splice another copy.
633
+ //
634
+ // Idempotency MUST account for descendant IDs too: when a container entry
635
+ // places its newContent (a subtree of listItems/paragraphs/etc.), those
636
+ // descendants land in canonical but aren't in nodeById until we re-index.
637
+ // Without that, the descendants' own entries don't see the existing
638
+ // placement and would splice duplicate copies. adr: adr/pending-overlay-model.md
639
+ function indexSubtree(node) {
640
+ if (!node)
641
+ return;
642
+ const id = node?.attrs?.id;
643
+ if (id)
644
+ nodeById.set(id, node);
645
+ if (Array.isArray(node?.content)) {
646
+ for (const child of node.content)
647
+ indexSubtree(child);
648
+ }
649
+ }
633
650
  for (const entry of entries) {
634
651
  if (entry.status !== 'insert')
635
652
  continue;
@@ -654,7 +671,7 @@ export function applyOverlayPure(canonical, entries) {
654
671
  const loc = findNodeWithParent(entry.afterNodeId);
655
672
  if (loc) {
656
673
  loc.parent.splice(loc.index + 1, 0, newNode);
657
- nodeById.set(entry.nodeId, newNode);
674
+ indexSubtree(newNode);
658
675
  placed = true;
659
676
  }
660
677
  }
@@ -664,21 +681,21 @@ export function applyOverlayPure(canonical, entries) {
664
681
  const parent = parentLoc.parent[parentLoc.index];
665
682
  parent.content = parent.content || [];
666
683
  parent.content.unshift(newNode);
667
- nodeById.set(entry.nodeId, newNode);
684
+ indexSubtree(newNode);
668
685
  placed = true;
669
686
  }
670
687
  }
671
688
  if (!placed && entry.afterNodeId === null && entry.parentNodeId === null) {
672
689
  merged.content = merged.content || [];
673
690
  merged.content.unshift(newNode);
674
- nodeById.set(entry.nodeId, newNode);
691
+ indexSubtree(newNode);
675
692
  placed = true;
676
693
  }
677
694
  if (!placed) {
678
695
  newNode.attrs.pendingOrphan = true;
679
696
  merged.content = merged.content || [];
680
697
  merged.content.push(newNode);
681
- nodeById.set(entry.nodeId, newNode);
698
+ indexSubtree(newNode);
682
699
  }
683
700
  }
684
701
  return merged;