openwriter 0.15.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client/assets/index-0ttVnjRp.css +1 -0
- package/dist/client/assets/{index-B5MXw2pg.js → index-BZ7LCzrR.js} +64 -64
- package/dist/client/index.html +2 -2
- package/dist/plugins/authors-voice/dist/index.d.ts +41 -0
- package/dist/plugins/authors-voice/dist/index.js +206 -0
- package/dist/plugins/authors-voice/package.json +23 -0
- package/dist/plugins/image-gen/dist/index.d.ts +35 -0
- package/dist/plugins/image-gen/dist/index.js +141 -0
- package/dist/plugins/image-gen/package.json +26 -0
- package/dist/plugins/publish/dist/helpers.d.ts +66 -0
- package/dist/plugins/publish/dist/helpers.js +199 -0
- package/dist/plugins/publish/dist/index.d.ts +3 -0
- package/dist/plugins/publish/dist/index.js +1130 -0
- package/dist/plugins/publish/dist/newsletter-tools.d.ts +2 -0
- package/dist/plugins/publish/dist/newsletter-tools.js +394 -0
- package/dist/plugins/publish/package.json +31 -0
- package/dist/plugins/x-api/dist/index.d.ts +27 -0
- package/dist/plugins/x-api/dist/index.js +240 -0
- package/dist/plugins/x-api/package.json +27 -0
- package/dist/server/compact.js +28 -2
- package/dist/server/documents.js +234 -3
- package/dist/server/enrichment.js +125 -0
- package/dist/server/export-routes.js +2 -0
- package/dist/server/install-skill.js +15 -0
- package/dist/server/markdown-parse.js +153 -14
- package/dist/server/markdown-serialize.js +100 -17
- package/dist/server/mcp.js +291 -25
- package/dist/server/node-blocks.js +41 -1
- package/dist/server/node-fingerprint.js +347 -73
- package/dist/server/node-matcher.js +19 -44
- package/dist/server/pending-overlay.js +21 -4
- package/dist/server/state.js +225 -41
- package/dist/server/workspaces.js +27 -5
- package/dist/server/ws.js +10 -0
- package/package.json +2 -1
- package/skill/SKILL.md +38 -7
- package/skill/agents/openwriter-enrichment-minion.md +177 -0
- package/skill/docs/enrichment.md +179 -0
- package/skill/docs/footnotes.md +178 -0
- package/dist/client/assets/index-B3iORmCT.css +0 -1
|
@@ -1,40 +1,45 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Per-block fingerprint computation for node identity tracking.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
4
|
+
* In-memory shape (Fingerprint) is rich — it carries position, neighbor types,
|
|
5
|
+
* counts, container children, etc. so matcher rules read what they need
|
|
6
|
+
* directly. Disk shape is ultra-lean — only fields the matcher cannot
|
|
7
|
+
* recompute from the body tree + per-block stored signals get persisted.
|
|
5
8
|
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
* c = char count (excluding terminator + trailing space)
|
|
10
|
-
* f = first PREFIX_LEN chars of sentence (3-char prefix)
|
|
11
|
-
* l = last PREFIX_LEN chars before terminator (3-char suffix)
|
|
12
|
-
* t = terminator type ('D'|'E'|'Q'|'-')
|
|
13
|
-
* wls = word length sequence (array of integers)
|
|
14
|
-
* w = word array — defense-in-depth disambiguator when math collides
|
|
9
|
+
* PER-SENTENCE: bare hash string. simpleHash(text + terminator) folds the
|
|
10
|
+
* terminator type into the hash, so "Hello?" and "Hello." produce distinct
|
|
11
|
+
* hashes without needing a separate field. Unsigned hex, up to 8 chars.
|
|
15
12
|
*
|
|
16
|
-
*
|
|
17
|
-
*
|
|
13
|
+
* PER-BLOCK on disk (tuple array, position-indexed):
|
|
14
|
+
* paragraph (default): [id, sentences[], marks?]
|
|
15
|
+
* empty paragraph: [id]
|
|
16
|
+
* heading: [id, "h1".."h6", sentences[], marks?]
|
|
17
|
+
* codeBlock: [id, "code", language, contentHash]
|
|
18
|
+
* horizontalRule: [id, "hr"]
|
|
19
|
+
* image: [id, "img"]
|
|
20
|
+
* table: [id, "tbl"]
|
|
21
|
+
* bulletList: [id, "ul", childTypes[]]
|
|
22
|
+
* orderedList: [id, "ol", childTypes[]]
|
|
23
|
+
* taskList: [id, "tl", childTypes[]]
|
|
24
|
+
* blockquote: [id, "bq", childTypes[]]
|
|
25
|
+
* listItem: [id, "li", childTypes[]]
|
|
26
|
+
* taskItem: [id, "ti", childTypes[]]
|
|
18
27
|
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
28
|
+
* `marks` is a compact object with non-zero entries only: {b?, i?, l?, c?}.
|
|
29
|
+
* childTypes uses the same compact tags as the type position itself.
|
|
21
30
|
*
|
|
22
|
-
*
|
|
23
|
-
*
|
|
24
|
-
*
|
|
31
|
+
* Derived at enrich time (never on disk): position, parentPosition,
|
|
32
|
+
* ordinalInParent, charCount, sentenceCount, wordCount, prevType, nextType,
|
|
33
|
+
* parentType. Each is a function of the array index + sibling tree at the
|
|
34
|
+
* time of read.
|
|
25
35
|
*
|
|
26
|
-
*
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
*
|
|
30
|
-
* reduces realistic collisions to near zero.
|
|
31
|
-
* - Longer prefixes (5+) approach "encoding the first word" rather than
|
|
32
|
-
* a math signal; we get diminishing returns past 3.
|
|
36
|
+
* Two sentences are equal iff their hashes are equal (string ==).
|
|
37
|
+
* Two blocks match exactly iff type matches, sentences arrays are equal,
|
|
38
|
+
* non-zero structureSig is equal, and any container child-type array is equal.
|
|
39
|
+
* Splits/merges are detected via prefix/suffix/concatenation of sentence arrays.
|
|
33
40
|
*
|
|
34
41
|
* adr: adr/node-identity-matcher.md
|
|
35
42
|
*/
|
|
36
|
-
const WORD_FALLBACK_WINDOW = 5;
|
|
37
|
-
const PREFIX_LEN = 3;
|
|
38
43
|
const CONTAINER_TYPES = new Set([
|
|
39
44
|
'bulletList',
|
|
40
45
|
'orderedList',
|
|
@@ -44,26 +49,21 @@ const CONTAINER_TYPES = new Set([
|
|
|
44
49
|
'listItem',
|
|
45
50
|
'taskItem',
|
|
46
51
|
]);
|
|
52
|
+
const ZERO_MARKS = { bold: 0, italic: 0, links: 0, code: 0 };
|
|
47
53
|
/** Compute a fingerprint for a single block, given its position in the block list. */
|
|
48
54
|
export function fingerprint(block, allBlocks) {
|
|
49
55
|
const text = block.text || '';
|
|
50
|
-
const sentences = splitSentences(text);
|
|
51
|
-
const words = tokenizeWords(text);
|
|
56
|
+
const sentences = splitSentences(text).map(sentenceHash);
|
|
52
57
|
const fp = {
|
|
53
58
|
type: block.type,
|
|
54
59
|
position: block.position,
|
|
55
60
|
parentPosition: block.parentPosition,
|
|
56
61
|
ordinalInParent: block.ordinalInParent,
|
|
57
|
-
|
|
58
|
-
sentenceCount: sentences.length,
|
|
59
|
-
wordCount: words.length,
|
|
60
|
-
sentences: sentences.map(sentenceTuple),
|
|
62
|
+
sentences,
|
|
61
63
|
structureSig: block.inlineMarks || { bold: 0, italic: 0, links: 0, code: 0 },
|
|
62
64
|
prevType: allBlocks[block.position - 1]?.type || null,
|
|
63
65
|
nextType: allBlocks[block.position + 1]?.type || null,
|
|
64
66
|
parentType: block.parentPosition != null ? allBlocks[block.parentPosition]?.type ?? null : null,
|
|
65
|
-
firstWords: words.slice(0, WORD_FALLBACK_WINDOW),
|
|
66
|
-
lastWords: words.slice(-WORD_FALLBACK_WINDOW),
|
|
67
67
|
};
|
|
68
68
|
if (block.type === 'heading')
|
|
69
69
|
fp.level = block.level;
|
|
@@ -78,23 +78,9 @@ export function fingerprint(block, allBlocks) {
|
|
|
78
78
|
}
|
|
79
79
|
return fp;
|
|
80
80
|
}
|
|
81
|
-
/**
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
* are defense-in-depth for the rare case where math still collides under
|
|
85
|
-
* richer prefixes.
|
|
86
|
-
*/
|
|
87
|
-
function sentenceTuple(sentence) {
|
|
88
|
-
const t = sentence.text;
|
|
89
|
-
const words = tokenizeWords(t);
|
|
90
|
-
return {
|
|
91
|
-
c: t.length,
|
|
92
|
-
f: t.slice(0, PREFIX_LEN),
|
|
93
|
-
l: t.slice(-PREFIX_LEN),
|
|
94
|
-
t: sentence.terminator,
|
|
95
|
-
wls: words.map((w) => w.length),
|
|
96
|
-
w: words,
|
|
97
|
-
};
|
|
81
|
+
/** Hash one sentence's text including its terminator so "X." and "X?" don't collide. */
|
|
82
|
+
function sentenceHash(sentence) {
|
|
83
|
+
return simpleHash(sentence.text + sentence.terminator);
|
|
98
84
|
}
|
|
99
85
|
export function fingerprintAll(blocks) {
|
|
100
86
|
return blocks.map((b) => fingerprint(b, blocks));
|
|
@@ -135,18 +121,20 @@ export function tokenizeWords(text) {
|
|
|
135
121
|
.map((w) => w.replace(/^[^\w]+|[^\w]+$/g, ''))
|
|
136
122
|
.filter((w) => w.length > 0);
|
|
137
123
|
}
|
|
124
|
+
/** 32-bit unsigned content hash → 1-8 hex chars, no sign prefix. */
|
|
138
125
|
export function simpleHash(s) {
|
|
139
126
|
let h = 0;
|
|
140
127
|
for (let i = 0; i < s.length; i++) {
|
|
141
128
|
h = ((h << 5) - h) + s.charCodeAt(i);
|
|
142
129
|
h |= 0;
|
|
143
130
|
}
|
|
144
|
-
return h.toString(16);
|
|
131
|
+
return (h >>> 0).toString(16);
|
|
145
132
|
}
|
|
146
133
|
/**
|
|
147
|
-
*
|
|
148
|
-
*
|
|
149
|
-
*
|
|
134
|
+
* Exact match: type + content fingerprint + structure agree. Used by Phase 1
|
|
135
|
+
* pinning and graveyard-restore. Sentence-array equality implies same sentence
|
|
136
|
+
* count and same content text; charCount/wordCount are redundant once hashes
|
|
137
|
+
* line up and have been removed from the Fingerprint shape.
|
|
150
138
|
*/
|
|
151
139
|
export function isExactMatch(a, b) {
|
|
152
140
|
if (a.type !== b.type)
|
|
@@ -155,12 +143,6 @@ export function isExactMatch(a, b) {
|
|
|
155
143
|
return false;
|
|
156
144
|
if (a.language !== b.language)
|
|
157
145
|
return false;
|
|
158
|
-
if (a.charCount !== b.charCount)
|
|
159
|
-
return false;
|
|
160
|
-
if (a.sentenceCount !== b.sentenceCount)
|
|
161
|
-
return false;
|
|
162
|
-
if (a.wordCount !== b.wordCount)
|
|
163
|
-
return false;
|
|
164
146
|
if (!sentenceArraysEqual(a.sentences, b.sentences))
|
|
165
147
|
return false;
|
|
166
148
|
if (!structureEqual(a.structureSig, b.structureSig))
|
|
@@ -195,19 +177,14 @@ export function sentenceArraysEqual(a, b) {
|
|
|
195
177
|
if (a.length !== b.length)
|
|
196
178
|
return false;
|
|
197
179
|
for (let i = 0; i < a.length; i++) {
|
|
198
|
-
if (
|
|
180
|
+
if (a[i] !== b[i])
|
|
199
181
|
return false;
|
|
200
182
|
}
|
|
201
183
|
return true;
|
|
202
184
|
}
|
|
203
|
-
/**
|
|
185
|
+
/** Backwards-compatible alias — sentence equality is now string equality. */
|
|
204
186
|
export function sentenceTuplesEqual(a, b) {
|
|
205
|
-
return
|
|
206
|
-
a.f === b.f &&
|
|
207
|
-
a.l === b.l &&
|
|
208
|
-
a.t === b.t &&
|
|
209
|
-
arraysEqual(a.wls, b.wls) &&
|
|
210
|
-
arraysEqual(a.w, b.w));
|
|
187
|
+
return a === b;
|
|
211
188
|
}
|
|
212
189
|
export function isSentencePrefix(short, long) {
|
|
213
190
|
if (!Array.isArray(short) || !Array.isArray(long))
|
|
@@ -215,7 +192,7 @@ export function isSentencePrefix(short, long) {
|
|
|
215
192
|
if (short.length === 0 || short.length > long.length)
|
|
216
193
|
return false;
|
|
217
194
|
for (let i = 0; i < short.length; i++) {
|
|
218
|
-
if (
|
|
195
|
+
if (short[i] !== long[i])
|
|
219
196
|
return false;
|
|
220
197
|
}
|
|
221
198
|
return true;
|
|
@@ -227,7 +204,7 @@ export function isSentenceSuffix(short, long) {
|
|
|
227
204
|
return false;
|
|
228
205
|
const offset = long.length - short.length;
|
|
229
206
|
for (let i = 0; i < short.length; i++) {
|
|
230
|
-
if (
|
|
207
|
+
if (short[i] !== long[i + offset])
|
|
231
208
|
return false;
|
|
232
209
|
}
|
|
233
210
|
return true;
|
|
@@ -238,15 +215,312 @@ export function isSentenceConcat(combined, first, second) {
|
|
|
238
215
|
if (combined.length !== first.length + second.length)
|
|
239
216
|
return false;
|
|
240
217
|
for (let i = 0; i < first.length; i++) {
|
|
241
|
-
if (
|
|
218
|
+
if (combined[i] !== first[i])
|
|
242
219
|
return false;
|
|
243
220
|
}
|
|
244
221
|
for (let i = 0; i < second.length; i++) {
|
|
245
|
-
if (
|
|
222
|
+
if (combined[first.length + i] !== second[i])
|
|
246
223
|
return false;
|
|
247
224
|
}
|
|
248
225
|
return true;
|
|
249
226
|
}
|
|
227
|
+
/** Compact type tags as written to disk. */
|
|
228
|
+
const SHORT_TAG = {
|
|
229
|
+
bulletList: 'ul',
|
|
230
|
+
orderedList: 'ol',
|
|
231
|
+
taskList: 'tl',
|
|
232
|
+
blockquote: 'bq',
|
|
233
|
+
listItem: 'li',
|
|
234
|
+
taskItem: 'ti',
|
|
235
|
+
horizontalRule: 'hr',
|
|
236
|
+
image: 'img',
|
|
237
|
+
table: 'tbl',
|
|
238
|
+
};
|
|
239
|
+
const FULL_TYPE = {
|
|
240
|
+
ul: 'bulletList',
|
|
241
|
+
ol: 'orderedList',
|
|
242
|
+
tl: 'taskList',
|
|
243
|
+
bq: 'blockquote',
|
|
244
|
+
li: 'listItem',
|
|
245
|
+
ti: 'taskItem',
|
|
246
|
+
hr: 'horizontalRule',
|
|
247
|
+
img: 'image',
|
|
248
|
+
tbl: 'table',
|
|
249
|
+
};
|
|
250
|
+
function slimMarks(sig) {
|
|
251
|
+
if (!sig)
|
|
252
|
+
return null;
|
|
253
|
+
const out = {};
|
|
254
|
+
if (sig.bold)
|
|
255
|
+
out.b = sig.bold;
|
|
256
|
+
if (sig.italic)
|
|
257
|
+
out.i = sig.italic;
|
|
258
|
+
if (sig.links)
|
|
259
|
+
out.l = sig.links;
|
|
260
|
+
if (sig.code)
|
|
261
|
+
out.c = sig.code;
|
|
262
|
+
return Object.keys(out).length > 0 ? out : null;
|
|
263
|
+
}
|
|
264
|
+
function enrichMarks(raw) {
|
|
265
|
+
if (!raw || typeof raw !== 'object')
|
|
266
|
+
return { ...ZERO_MARKS };
|
|
267
|
+
return {
|
|
268
|
+
bold: raw.b || 0,
|
|
269
|
+
italic: raw.i || 0,
|
|
270
|
+
links: raw.l || 0,
|
|
271
|
+
code: raw.c || 0,
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
/** Encode a rich Fingerprint + id into the slim disk tuple. */
|
|
275
|
+
export function slimEntry(id, fp) {
|
|
276
|
+
const marks = slimMarks(fp.structureSig);
|
|
277
|
+
if (fp.type === 'paragraph') {
|
|
278
|
+
const out = [id];
|
|
279
|
+
if (fp.sentences && fp.sentences.length > 0)
|
|
280
|
+
out.push(fp.sentences);
|
|
281
|
+
if (marks) {
|
|
282
|
+
if (out.length === 1)
|
|
283
|
+
out.push([]);
|
|
284
|
+
out.push(marks);
|
|
285
|
+
}
|
|
286
|
+
return out;
|
|
287
|
+
}
|
|
288
|
+
if (fp.type === 'heading') {
|
|
289
|
+
const tag = `h${fp.level || 1}`;
|
|
290
|
+
const out = [id, tag, fp.sentences || []];
|
|
291
|
+
if (marks)
|
|
292
|
+
out.push(marks);
|
|
293
|
+
return out;
|
|
294
|
+
}
|
|
295
|
+
if (fp.type === 'codeBlock') {
|
|
296
|
+
return [id, 'code', fp.language || '', fp.contentHash || ''];
|
|
297
|
+
}
|
|
298
|
+
if (CONTAINER_TYPES.has(fp.type)) {
|
|
299
|
+
const tag = SHORT_TAG[fp.type] || fp.type;
|
|
300
|
+
const out = [id, tag];
|
|
301
|
+
if (fp.childTypes && fp.childTypes.length > 0) {
|
|
302
|
+
out.push(fp.childTypes.map((t) => SHORT_TAG[t] || t));
|
|
303
|
+
}
|
|
304
|
+
return out;
|
|
305
|
+
}
|
|
306
|
+
// Atomic blocks: horizontalRule, image, table
|
|
307
|
+
const tag = SHORT_TAG[fp.type] || fp.type;
|
|
308
|
+
return [id, tag];
|
|
309
|
+
}
|
|
310
|
+
/**
|
|
311
|
+
* Decode a slim disk tuple's content fields (type, sentences, marks, etc.)
|
|
312
|
+
* without any positional/structural context. Used by both block-context
|
|
313
|
+
* enrichment (legacy fallback) and slim-array-walker enrichment.
|
|
314
|
+
*/
|
|
315
|
+
function decodeSlimTuple(slim) {
|
|
316
|
+
if (!Array.isArray(slim) || slim.length === 0 || typeof slim[0] !== 'string')
|
|
317
|
+
return null;
|
|
318
|
+
const id = slim[0];
|
|
319
|
+
const second = slim[1];
|
|
320
|
+
let type = 'paragraph';
|
|
321
|
+
let sentences = [];
|
|
322
|
+
let level;
|
|
323
|
+
let language;
|
|
324
|
+
let contentHash;
|
|
325
|
+
let childTypes;
|
|
326
|
+
let marksRaw = null;
|
|
327
|
+
if (slim.length === 1) {
|
|
328
|
+
// Empty paragraph
|
|
329
|
+
}
|
|
330
|
+
else if (Array.isArray(second)) {
|
|
331
|
+
// Paragraph with sentences
|
|
332
|
+
sentences = second.filter((x) => typeof x === 'string');
|
|
333
|
+
if (slim.length >= 3 && !Array.isArray(slim[2]))
|
|
334
|
+
marksRaw = slim[2];
|
|
335
|
+
}
|
|
336
|
+
else if (typeof second === 'string') {
|
|
337
|
+
const tag = second;
|
|
338
|
+
if (/^h([1-6])$/.test(tag)) {
|
|
339
|
+
type = 'heading';
|
|
340
|
+
level = parseInt(tag.slice(1), 10);
|
|
341
|
+
if (Array.isArray(slim[2]))
|
|
342
|
+
sentences = slim[2].filter((x) => typeof x === 'string');
|
|
343
|
+
if (slim.length >= 4 && !Array.isArray(slim[3]))
|
|
344
|
+
marksRaw = slim[3];
|
|
345
|
+
}
|
|
346
|
+
else if (tag === 'code') {
|
|
347
|
+
type = 'codeBlock';
|
|
348
|
+
language = typeof slim[2] === 'string' ? slim[2] : '';
|
|
349
|
+
contentHash = typeof slim[3] === 'string' ? slim[3] : '';
|
|
350
|
+
}
|
|
351
|
+
else if (FULL_TYPE[tag]) {
|
|
352
|
+
type = FULL_TYPE[tag];
|
|
353
|
+
if (CONTAINER_TYPES.has(type) && Array.isArray(slim[2])) {
|
|
354
|
+
childTypes = slim[2].map((t) => FULL_TYPE[t] || t);
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
else {
|
|
358
|
+
// Unknown short tag — carry through verbatim
|
|
359
|
+
type = tag;
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
return { id, type, sentences, level, language, contentHash, childTypes, structureSig: enrichMarks(marksRaw) };
|
|
363
|
+
}
|
|
364
|
+
/**
|
|
365
|
+
* Decode a slim disk tuple back into {id, fingerprint}. Block context (the
|
|
366
|
+
* block at this entry's position in the freshly-parsed body, plus the full
|
|
367
|
+
* block list for neighbor lookups) supplies the derived fields. If no block
|
|
368
|
+
* context is available (graveyard entries — deleted blocks have no body),
|
|
369
|
+
* derived position/neighbor fields default to safe values; matcher rules
|
|
370
|
+
* for graveyard restore only consult type + sentences + marks + childTypes,
|
|
371
|
+
* which are all carried in slim.
|
|
372
|
+
*/
|
|
373
|
+
export function enrichEntry(slim, block, allBlocks) {
|
|
374
|
+
const decoded = decodeSlimTuple(slim);
|
|
375
|
+
if (!decoded)
|
|
376
|
+
return null;
|
|
377
|
+
const { id, type, sentences, level, language, contentHash, childTypes, structureSig } = decoded;
|
|
378
|
+
// Derived from block context, with safe fallbacks for graveyard entries
|
|
379
|
+
// (where `block` is null — the deleted block is gone from the body).
|
|
380
|
+
const fingerprint = {
|
|
381
|
+
type,
|
|
382
|
+
position: block ? block.position : -1,
|
|
383
|
+
parentPosition: block ? block.parentPosition : null,
|
|
384
|
+
ordinalInParent: block ? block.ordinalInParent : undefined,
|
|
385
|
+
sentences,
|
|
386
|
+
structureSig,
|
|
387
|
+
prevType: block ? allBlocks[block.position - 1]?.type || null : null,
|
|
388
|
+
nextType: block ? allBlocks[block.position + 1]?.type || null : null,
|
|
389
|
+
parentType: block && block.parentPosition != null
|
|
390
|
+
? allBlocks[block.parentPosition]?.type ?? null
|
|
391
|
+
: null,
|
|
392
|
+
};
|
|
393
|
+
if (type === 'heading' && level !== undefined)
|
|
394
|
+
fingerprint.level = level;
|
|
395
|
+
if (type === 'codeBlock') {
|
|
396
|
+
fingerprint.language = language || '';
|
|
397
|
+
fingerprint.contentHash = contentHash || '';
|
|
398
|
+
}
|
|
399
|
+
if (CONTAINER_TYPES.has(type)) {
|
|
400
|
+
if (childTypes !== undefined) {
|
|
401
|
+
fingerprint.childCount = childTypes.length;
|
|
402
|
+
fingerprint.childTypes = childTypes;
|
|
403
|
+
}
|
|
404
|
+
else if (block) {
|
|
405
|
+
// No childTypes in slim (older entry) — derive from current tree.
|
|
406
|
+
const children = allBlocks.filter((b) => b.parentPosition === block.position);
|
|
407
|
+
fingerprint.childCount = children.length;
|
|
408
|
+
fingerprint.childTypes = children.map((c) => c.type);
|
|
409
|
+
}
|
|
410
|
+
else {
|
|
411
|
+
fingerprint.childCount = 0;
|
|
412
|
+
fingerprint.childTypes = [];
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
return { id, fingerprint };
|
|
416
|
+
}
|
|
417
|
+
/**
|
|
418
|
+
* Enrich slim disk entries WITHOUT parsing the body. The slim array IS the
|
|
419
|
+
* previous state — position is the array index, parent is the most-recent
|
|
420
|
+
* unfilled container (tracked via stack), neighbor types come from slim[i±1].
|
|
421
|
+
*
|
|
422
|
+
* This avoids the O(N words) markdown re-parse that block-context enrichment
|
|
423
|
+
* needs for derived fields. The slim array already encodes everything; the
|
|
424
|
+
* body parse was reconstructing what was implicit.
|
|
425
|
+
*/
|
|
426
|
+
export function enrichSlimArray(slimList) {
|
|
427
|
+
const out = [];
|
|
428
|
+
// Stack of open containers: position + how many child slots declared + consumed so far.
|
|
429
|
+
// A container's children are the next `expected` entries that land while it's on the stack.
|
|
430
|
+
const stack = [];
|
|
431
|
+
// First pass: decode all tuples (we need types up-front for neighbor lookups).
|
|
432
|
+
const decoded = slimList.map((s) => decodeSlimTuple(s));
|
|
433
|
+
for (let i = 0; i < slimList.length; i++) {
|
|
434
|
+
const d = decoded[i];
|
|
435
|
+
if (!d)
|
|
436
|
+
continue;
|
|
437
|
+
// Pop containers whose declared child slots are fully consumed.
|
|
438
|
+
while (stack.length > 0 && stack[stack.length - 1].consumed >= stack[stack.length - 1].expected) {
|
|
439
|
+
stack.pop();
|
|
440
|
+
}
|
|
441
|
+
const parent = stack.length > 0 ? stack[stack.length - 1] : null;
|
|
442
|
+
const parentPosition = parent ? parent.position : null;
|
|
443
|
+
const parentType = parent ? parent.type : null;
|
|
444
|
+
const ordinalInParent = parent ? parent.consumed : i;
|
|
445
|
+
const prevType = i > 0 ? (decoded[i - 1]?.type ?? null) : null;
|
|
446
|
+
const nextType = i < slimList.length - 1 ? (decoded[i + 1]?.type ?? null) : null;
|
|
447
|
+
const fingerprint = {
|
|
448
|
+
type: d.type,
|
|
449
|
+
position: i,
|
|
450
|
+
parentPosition,
|
|
451
|
+
ordinalInParent,
|
|
452
|
+
sentences: d.sentences,
|
|
453
|
+
structureSig: d.structureSig,
|
|
454
|
+
prevType,
|
|
455
|
+
nextType,
|
|
456
|
+
parentType,
|
|
457
|
+
};
|
|
458
|
+
if (d.type === 'heading' && d.level !== undefined)
|
|
459
|
+
fingerprint.level = d.level;
|
|
460
|
+
if (d.type === 'codeBlock') {
|
|
461
|
+
fingerprint.language = d.language || '';
|
|
462
|
+
fingerprint.contentHash = d.contentHash || '';
|
|
463
|
+
}
|
|
464
|
+
if (CONTAINER_TYPES.has(d.type)) {
|
|
465
|
+
fingerprint.childCount = d.childTypes ? d.childTypes.length : 0;
|
|
466
|
+
fingerprint.childTypes = d.childTypes || [];
|
|
467
|
+
}
|
|
468
|
+
out.push({ id: d.id, fingerprint });
|
|
469
|
+
// Mark one of the parent's child slots as consumed.
|
|
470
|
+
if (parent)
|
|
471
|
+
parent.consumed++;
|
|
472
|
+
// If this entry is a container with declared children, push onto stack.
|
|
473
|
+
if (d.childTypes !== undefined && d.childTypes.length > 0) {
|
|
474
|
+
stack.push({ position: i, type: d.type, expected: d.childTypes.length, consumed: 0 });
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
return out;
|
|
478
|
+
}
|
|
479
|
+
/**
|
|
480
|
+
* Slim a list of {id, fingerprint} entries for disk. Containers and headings
|
|
481
|
+
* carry their type marker; paragraphs default. Caller passes them in the same
|
|
482
|
+
* order they appear in the block tree (matcher output naturally is).
|
|
483
|
+
*/
|
|
484
|
+
export function slimEntries(entries) {
|
|
485
|
+
return entries.map((e) => slimEntry(e.id, e.fingerprint));
|
|
486
|
+
}
|
|
487
|
+
/**
|
|
488
|
+
* Enrich slim disk entries against the freshly-parsed block list. Each slim
|
|
489
|
+
* entry at index i is paired with blocks[i]. Entries past the end of `blocks`
|
|
490
|
+
* use null context (typical for graveyard).
|
|
491
|
+
*/
|
|
492
|
+
export function enrichEntries(slimList, blocks) {
|
|
493
|
+
const out = [];
|
|
494
|
+
for (let i = 0; i < slimList.length; i++) {
|
|
495
|
+
const slim = slimList[i];
|
|
496
|
+
const block = blocks[i] || null;
|
|
497
|
+
const enriched = enrichEntry(slim, block, blocks);
|
|
498
|
+
if (enriched)
|
|
499
|
+
out.push(enriched);
|
|
500
|
+
}
|
|
501
|
+
return out;
|
|
502
|
+
}
|
|
503
|
+
// ----------------------------------------------------------------------
|
|
504
|
+
// Legacy format detection (v0.14 / v0.15 → ultra-lean)
|
|
505
|
+
// ----------------------------------------------------------------------
|
|
506
|
+
/**
|
|
507
|
+
* Detect legacy (pre-ultra-lean) format at the frontmatter raw-parse layer.
|
|
508
|
+
* Legacy entries are objects with `id` + `fp` keys (or `firstWords`/`w`/`wls`
|
|
509
|
+
* within sentences). Ultra-lean entries are arrays. Mixed input is rare but
|
|
510
|
+
* tolerated by the legacy-migration path, which re-fingerprints positionally.
|
|
511
|
+
*/
|
|
512
|
+
export function isLegacyRawEntry(raw) {
|
|
513
|
+
return raw != null && typeof raw === 'object' && !Array.isArray(raw);
|
|
514
|
+
}
|
|
515
|
+
export function anyLegacyRaw(rawList) {
|
|
516
|
+
if (!Array.isArray(rawList))
|
|
517
|
+
return false;
|
|
518
|
+
for (const r of rawList) {
|
|
519
|
+
if (isLegacyRawEntry(r))
|
|
520
|
+
return true;
|
|
521
|
+
}
|
|
522
|
+
return false;
|
|
523
|
+
}
|
|
250
524
|
function arraysEqual(a, b) {
|
|
251
525
|
if (!Array.isArray(a) || !Array.isArray(b))
|
|
252
526
|
return false;
|
|
@@ -19,14 +19,14 @@
|
|
|
19
19
|
* - Insert (any block still unmatched → fresh ID)
|
|
20
20
|
* Phase 3: orphans = previousNodes entries no rule claimed (= deletes)
|
|
21
21
|
*
|
|
22
|
-
* Fingerprints
|
|
23
|
-
* terminator
|
|
24
|
-
*
|
|
22
|
+
* Fingerprints carry one tuple per sentence: char count, content hash,
|
|
23
|
+
* terminator type. Hash equality identifies "same sentence text" in 8 bytes.
|
|
24
|
+
* Documented in node-fingerprint.ts.
|
|
25
25
|
*
|
|
26
26
|
* adr: adr/node-identity-matcher.md
|
|
27
27
|
*/
|
|
28
28
|
import { generateNodeId } from './helpers.js';
|
|
29
|
-
import { fingerprintAll, isExactMatch, isSameContent, sentenceArraysEqual,
|
|
29
|
+
import { fingerprintAll, isExactMatch, isSameContent, sentenceArraysEqual, } from './node-fingerprint.js';
|
|
30
30
|
/**
|
|
31
31
|
* Run the matcher.
|
|
32
32
|
*
|
|
@@ -459,36 +459,22 @@ function applySlotContinuityRule(unmatched, previousNodes, claimedPrevIds, pinne
|
|
|
459
459
|
}
|
|
460
460
|
}
|
|
461
461
|
/**
|
|
462
|
-
* Lightweight content overlap signal used by slot-continuity scoring
|
|
463
|
-
*
|
|
464
|
-
*
|
|
465
|
-
*
|
|
462
|
+
* Lightweight content overlap signal used by slot-continuity scoring to
|
|
463
|
+
* disambiguate between multiple candidate orphans in the same slot range.
|
|
464
|
+
*
|
|
465
|
+
* Per sentence pair across both blocks: +1 for each hash that appears in
|
|
466
|
+
* both arrays. Since hashes fold sentence text + terminator together, this
|
|
467
|
+
* counts the number of fully-shared sentences between the two blocks — the
|
|
468
|
+
* matcher's only meaningful similarity question.
|
|
466
469
|
*/
|
|
467
470
|
function sentenceSignalOverlapScore(a, b) {
|
|
468
471
|
if (!a.sentences || !b.sentences)
|
|
469
472
|
return 0;
|
|
473
|
+
const seen = new Set(a.sentences);
|
|
470
474
|
let score = 0;
|
|
471
|
-
for (const
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
score++;
|
|
475
|
-
if (sa.l === sb.l)
|
|
476
|
-
score++;
|
|
477
|
-
if (sa.t === sb.t)
|
|
478
|
-
score++;
|
|
479
|
-
if (arraysEqual(sa.wls, sb.wls))
|
|
480
|
-
score += 2;
|
|
481
|
-
if (Array.isArray(sa.w) && Array.isArray(sb.w)) {
|
|
482
|
-
const aSet = new Set(sa.w);
|
|
483
|
-
let shared = 0;
|
|
484
|
-
for (const w of sb.w)
|
|
485
|
-
if (aSet.has(w))
|
|
486
|
-
shared++;
|
|
487
|
-
score += shared * 3;
|
|
488
|
-
if (arraysEqual(sa.w, sb.w))
|
|
489
|
-
score += 10;
|
|
490
|
-
}
|
|
491
|
-
}
|
|
475
|
+
for (const h of b.sentences) {
|
|
476
|
+
if (seen.has(h))
|
|
477
|
+
score++;
|
|
492
478
|
}
|
|
493
479
|
return score;
|
|
494
480
|
}
|
|
@@ -596,21 +582,10 @@ function slotHighBound(previousNodes, claimedPrevIds, pinned, orphanIdx) {
|
|
|
596
582
|
function shareAnySentenceTuple(a, b) {
|
|
597
583
|
if (!Array.isArray(a) || !Array.isArray(b))
|
|
598
584
|
return false;
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
}
|
|
585
|
+
const seen = new Set(a);
|
|
586
|
+
for (const sb of b) {
|
|
587
|
+
if (seen.has(sb))
|
|
588
|
+
return true;
|
|
604
589
|
}
|
|
605
590
|
return false;
|
|
606
591
|
}
|
|
607
|
-
function arraysEqual(a, b) {
|
|
608
|
-
if (!Array.isArray(a) || !Array.isArray(b))
|
|
609
|
-
return false;
|
|
610
|
-
if (a.length !== b.length)
|
|
611
|
-
return false;
|
|
612
|
-
for (let i = 0; i < a.length; i++)
|
|
613
|
-
if (a[i] !== b[i])
|
|
614
|
-
return false;
|
|
615
|
-
return true;
|
|
616
|
-
}
|
|
@@ -630,6 +630,23 @@ export function applyOverlayPure(canonical, entries) {
|
|
|
630
630
|
}
|
|
631
631
|
// Inserts: idempotency check FIRST. If a node with this ID already exists,
|
|
632
632
|
// refresh its pending marker but do NOT splice another copy.
|
|
633
|
+
//
|
|
634
|
+
// Idempotency MUST account for descendant IDs too: when a container entry
|
|
635
|
+
// places its newContent (a subtree of listItems/paragraphs/etc.), those
|
|
636
|
+
// descendants land in canonical but aren't in nodeById until we re-index.
|
|
637
|
+
// Without that, the descendants' own entries don't see the existing
|
|
638
|
+
// placement and would splice duplicate copies. adr: adr/pending-overlay-model.md
|
|
639
|
+
function indexSubtree(node) {
|
|
640
|
+
if (!node)
|
|
641
|
+
return;
|
|
642
|
+
const id = node?.attrs?.id;
|
|
643
|
+
if (id)
|
|
644
|
+
nodeById.set(id, node);
|
|
645
|
+
if (Array.isArray(node?.content)) {
|
|
646
|
+
for (const child of node.content)
|
|
647
|
+
indexSubtree(child);
|
|
648
|
+
}
|
|
649
|
+
}
|
|
633
650
|
for (const entry of entries) {
|
|
634
651
|
if (entry.status !== 'insert')
|
|
635
652
|
continue;
|
|
@@ -654,7 +671,7 @@ export function applyOverlayPure(canonical, entries) {
|
|
|
654
671
|
const loc = findNodeWithParent(entry.afterNodeId);
|
|
655
672
|
if (loc) {
|
|
656
673
|
loc.parent.splice(loc.index + 1, 0, newNode);
|
|
657
|
-
|
|
674
|
+
indexSubtree(newNode);
|
|
658
675
|
placed = true;
|
|
659
676
|
}
|
|
660
677
|
}
|
|
@@ -664,21 +681,21 @@ export function applyOverlayPure(canonical, entries) {
|
|
|
664
681
|
const parent = parentLoc.parent[parentLoc.index];
|
|
665
682
|
parent.content = parent.content || [];
|
|
666
683
|
parent.content.unshift(newNode);
|
|
667
|
-
|
|
684
|
+
indexSubtree(newNode);
|
|
668
685
|
placed = true;
|
|
669
686
|
}
|
|
670
687
|
}
|
|
671
688
|
if (!placed && entry.afterNodeId === null && entry.parentNodeId === null) {
|
|
672
689
|
merged.content = merged.content || [];
|
|
673
690
|
merged.content.unshift(newNode);
|
|
674
|
-
|
|
691
|
+
indexSubtree(newNode);
|
|
675
692
|
placed = true;
|
|
676
693
|
}
|
|
677
694
|
if (!placed) {
|
|
678
695
|
newNode.attrs.pendingOrphan = true;
|
|
679
696
|
merged.content = merged.content || [];
|
|
680
697
|
merged.content.push(newNode);
|
|
681
|
-
|
|
698
|
+
indexSubtree(newNode);
|
|
682
699
|
}
|
|
683
700
|
}
|
|
684
701
|
return merged;
|