openwriter 0.13.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,274 @@
1
+ /**
2
+ * TipTap node tree → walker-style block list.
3
+ *
4
+ * The matcher and fingerprint operate on a flat ordered block list (one entry
5
+ * per block, pre-order traversal, with parentPosition pointing back). This
6
+ * adapter converts the TipTap document tree into that shape so the matcher
7
+ * operates on the SAME structural view as the editor — not on a separately
8
+ * re-parsed markdown body.
9
+ *
10
+ * Avoiding a second parse is the architectural defense against walker-vs-
11
+ * parser drift: one block tree feeds both TipTap rendering and node identity
12
+ * tracking.
13
+ *
14
+ * adr: adr/node-identity-matcher.md
15
+ */
16
+ /**
17
+ * Flatten a TipTap document tree into a walker-style block list.
18
+ *
19
+ * Container blocks (lists, blockquotes, listItems) get an entry AND their
20
+ * children get separate entries with parentPosition pointing back. Container
21
+ * text is the first direct paragraph's content only (no recursive roll-up).
22
+ */
23
+ export function tiptapToBlocks(doc) {
24
+ const blocks = [];
25
+ walkNodes(doc.content || [], blocks, null);
26
+ return blocks;
27
+ }
28
+ const CONTAINER_TYPES = new Set([
29
+ 'bulletList',
30
+ 'orderedList',
31
+ 'taskList',
32
+ 'blockquote',
33
+ 'table',
34
+ 'listItem',
35
+ 'taskItem',
36
+ 'tableRow',
37
+ 'tableCell',
38
+ 'tableHeader',
39
+ ]);
40
+ function walkNodes(nodes, blocks, parentPosition) {
41
+ let ordinalInParent = 0;
42
+ for (const node of nodes) {
43
+ if (node.type === 'heading') {
44
+ const level = node.attrs?.level || 1;
45
+ const text = extractInlineText(node.content || []);
46
+ blocks.push({
47
+ position: blocks.length,
48
+ type: 'heading',
49
+ level,
50
+ text,
51
+ parentPosition,
52
+ ordinalInParent: ordinalInParent++,
53
+ inlineMarks: countInlineMarks(node.content || []),
54
+ id: node.attrs?.id,
55
+ });
56
+ }
57
+ else if (node.type === 'paragraph') {
58
+ const text = extractInlineText(node.content || []);
59
+ blocks.push({
60
+ position: blocks.length,
61
+ type: 'paragraph',
62
+ text,
63
+ parentPosition,
64
+ ordinalInParent: ordinalInParent++,
65
+ inlineMarks: countInlineMarks(node.content || []),
66
+ id: node.attrs?.id,
67
+ });
68
+ }
69
+ else if (node.type === 'bulletList' || node.type === 'orderedList' || node.type === 'taskList') {
70
+ const containerPosition = blocks.length;
71
+ blocks.push({
72
+ position: containerPosition,
73
+ type: node.type,
74
+ text: '',
75
+ parentPosition,
76
+ ordinalInParent: ordinalInParent++,
77
+ id: node.attrs?.id,
78
+ });
79
+ walkNodes(node.content || [], blocks, containerPosition);
80
+ }
81
+ else if (node.type === 'listItem' || node.type === 'taskItem') {
82
+ const itemPosition = blocks.length;
83
+ // listItem's identity text = first direct paragraph child only.
84
+ // Avoids cascading fingerprint changes when nested children are edited.
85
+ const firstParaText = firstParagraphText(node.content || []);
86
+ blocks.push({
87
+ position: itemPosition,
88
+ type: node.type,
89
+ text: firstParaText,
90
+ parentPosition,
91
+ ordinalInParent: ordinalInParent++,
92
+ id: node.attrs?.id,
93
+ });
94
+ walkNodes(node.content || [], blocks, itemPosition);
95
+ }
96
+ else if (node.type === 'blockquote') {
97
+ const bqPosition = blocks.length;
98
+ blocks.push({
99
+ position: bqPosition,
100
+ type: 'blockquote',
101
+ text: '',
102
+ parentPosition,
103
+ ordinalInParent: ordinalInParent++,
104
+ id: node.attrs?.id,
105
+ });
106
+ walkNodes(node.content || [], blocks, bqPosition);
107
+ }
108
+ else if (node.type === 'codeBlock') {
109
+ const text = extractInlineText(node.content || []);
110
+ blocks.push({
111
+ position: blocks.length,
112
+ type: 'codeBlock',
113
+ language: node.attrs?.language || '',
114
+ text,
115
+ parentPosition,
116
+ ordinalInParent: ordinalInParent++,
117
+ id: node.attrs?.id,
118
+ });
119
+ }
120
+ else if (node.type === 'horizontalRule') {
121
+ blocks.push({
122
+ position: blocks.length,
123
+ type: 'horizontalRule',
124
+ text: '',
125
+ parentPosition,
126
+ ordinalInParent: ordinalInParent++,
127
+ id: node.attrs?.id,
128
+ });
129
+ }
130
+ else if (node.type === 'table') {
131
+ const tablePosition = blocks.length;
132
+ blocks.push({
133
+ position: tablePosition,
134
+ type: 'table',
135
+ text: extractTableText(node),
136
+ parentPosition,
137
+ ordinalInParent: ordinalInParent++,
138
+ id: node.attrs?.id,
139
+ });
140
+ // Don't descend into table internals — the walker treats tables as opaque.
141
+ }
142
+ else if (node.type === 'image') {
143
+ blocks.push({
144
+ position: blocks.length,
145
+ type: 'image',
146
+ text: node.attrs?.alt || '',
147
+ parentPosition,
148
+ ordinalInParent: ordinalInParent++,
149
+ id: node.attrs?.id,
150
+ });
151
+ }
152
+ else if (CONTAINER_TYPES.has(node.type)) {
153
+ // Generic container — emit entry, descend
154
+ const containerPosition = blocks.length;
155
+ blocks.push({
156
+ position: containerPosition,
157
+ type: node.type,
158
+ text: '',
159
+ parentPosition,
160
+ ordinalInParent: ordinalInParent++,
161
+ id: node.attrs?.id,
162
+ });
163
+ walkNodes(node.content || [], blocks, containerPosition);
164
+ }
165
+ else if (node.content) {
166
+ // Unknown wrapper — descend without emitting
167
+ walkNodes(node.content, blocks, parentPosition);
168
+ }
169
+ }
170
+ }
171
+ /** Get plain text from a node's inline children. */
172
+ function extractInlineText(nodes) {
173
+ return nodes.map((n) => n.text || '').join('');
174
+ }
175
+ /** Find the first paragraph child (direct or via nested ordering) and return its text. */
176
+ function firstParagraphText(nodes) {
177
+ for (const n of nodes) {
178
+ if (n.type === 'paragraph')
179
+ return extractInlineText(n.content || []);
180
+ }
181
+ return '';
182
+ }
183
+ /** Sum inline marks across all inline children. */
184
+ function countInlineMarks(nodes) {
185
+ let bold = 0, italic = 0, links = 0, code = 0;
186
+ for (const n of nodes) {
187
+ if (n.type !== 'text')
188
+ continue;
189
+ for (const mark of n.marks || []) {
190
+ if (mark.type === 'bold')
191
+ bold++;
192
+ else if (mark.type === 'italic')
193
+ italic++;
194
+ else if (mark.type === 'link')
195
+ links++;
196
+ else if (mark.type === 'code')
197
+ code++;
198
+ }
199
+ }
200
+ return { bold, italic, links, code };
201
+ }
202
+ /** Flatten table cell text — table is treated as an opaque block for fingerprinting. */
203
+ function extractTableText(table) {
204
+ const parts = [];
205
+ function walk(nodes) {
206
+ if (!nodes)
207
+ return;
208
+ for (const n of nodes) {
209
+ if (n.type === 'text')
210
+ parts.push(n.text || '');
211
+ else if (n.content)
212
+ walk(n.content);
213
+ }
214
+ }
215
+ walk(table.content);
216
+ return parts.join(' ');
217
+ }
218
+ /**
219
+ * Apply pinned IDs from a matcher result back onto the TipTap node tree.
220
+ * Walks the tree in the same pre-order the adapter used and assigns
221
+ * attrs.id from the corresponding pinned entry.
222
+ */
223
+ export function applyIdsToTiptap(doc, pinnedByPosition) {
224
+ let position = 0;
225
+ function walk(nodes) {
226
+ for (const node of nodes) {
227
+ const isBlock = node.type === 'heading' ||
228
+ node.type === 'paragraph' ||
229
+ node.type === 'bulletList' ||
230
+ node.type === 'orderedList' ||
231
+ node.type === 'taskList' ||
232
+ node.type === 'listItem' ||
233
+ node.type === 'taskItem' ||
234
+ node.type === 'blockquote' ||
235
+ node.type === 'codeBlock' ||
236
+ node.type === 'horizontalRule' ||
237
+ node.type === 'table' ||
238
+ node.type === 'image' ||
239
+ CONTAINER_TYPES.has(node.type);
240
+ if (isBlock) {
241
+ const id = pinnedByPosition.get(position);
242
+ if (id) {
243
+ if (!node.attrs)
244
+ node.attrs = {};
245
+ node.attrs.id = id;
246
+ }
247
+ position++;
248
+ // Descend into container children, MIRRORING `walkNodes`/`tiptapToBlocks`.
249
+ // CRITICAL: tables are opaque to the walker (see `tiptapToBlocks` —
250
+ // emits one Block per table, doesn't descend). `applyIdsToTiptap` MUST
251
+ // use the same descent rule, otherwise position counters diverge and a
252
+ // matcher pin meant for a top-level node gets applied to a node inside
253
+ // a table. That's exactly the table-row-stole-the-paragraph-id
254
+ // corruption pattern (e.g. `tr:3141ee2a` where 3141ee2a was the
255
+ // target-total paragraph's ID).
256
+ //
257
+ // Descend ONLY into types `walkNodes` recursively descends into.
258
+ // adr: adr/node-identity-matcher.md
259
+ const isContainer = node.type === 'bulletList' ||
260
+ node.type === 'orderedList' ||
261
+ node.type === 'taskList' ||
262
+ node.type === 'listItem' ||
263
+ node.type === 'taskItem' ||
264
+ node.type === 'blockquote';
265
+ if (isContainer && node.content)
266
+ walk(node.content);
267
+ }
268
+ else if (node.content) {
269
+ walk(node.content);
270
+ }
271
+ }
272
+ }
273
+ walk(doc.content || []);
274
+ }
@@ -0,0 +1,264 @@
1
+ /**
2
+ * Per-block fingerprint computation for node identity tracking.
3
+ *
4
+ * The math-first signal hierarchy (push math before words):
5
+ *
6
+ * MATH SIGNALS (exact integers and chars):
7
+ * - type, charCount, sentenceCount, wordCount, level, language, structureSig
8
+ * - sentences[]: per-sentence tuples {c, f, l, t, wls, w}
9
+ * c = char count (excluding terminator + trailing space)
10
+ * f = first PREFIX_LEN chars of sentence (3-char prefix)
11
+ * l = last PREFIX_LEN chars before terminator (3-char suffix)
12
+ * t = terminator type ('D'|'E'|'Q'|'-')
13
+ * wls = word length sequence (array of integers)
14
+ * w = word array — defense-in-depth disambiguator when math collides
15
+ *
16
+ * CONTEXT SIGNALS:
17
+ * - prevType, nextType, parentType
18
+ *
19
+ * FALLBACK WORD SIGNALS (only when math is ambiguous):
20
+ * - firstWords, lastWords (sequence of strings)
21
+ *
22
+ * Two sentences match deterministically if their tuples are equal (math + words).
23
+ * Two blocks match deterministically if their sentence arrays are equal.
24
+ * Splits/merges are detected via array prefix/suffix/concatenation of sentence tuples.
25
+ *
26
+ * Prefix/suffix length 3 is chosen because:
27
+ * - 1 char (single first/last) collided too easily — "Bee ate the deck" vs
28
+ * "Bug ate the desk" hashed identically.
29
+ * - 3 chars captures the first/last whole short word in most sentences and
30
+ * reduces realistic collisions to near zero.
31
+ * - Longer prefixes (5+) approach "encoding the first word" rather than
32
+ * a math signal; we get diminishing returns past 3.
33
+ *
34
+ * adr: adr/node-identity-matcher.md
35
+ */
36
+ const WORD_FALLBACK_WINDOW = 5;
37
+ const PREFIX_LEN = 3;
38
+ const CONTAINER_TYPES = new Set([
39
+ 'bulletList',
40
+ 'orderedList',
41
+ 'taskList',
42
+ 'blockquote',
43
+ 'table',
44
+ 'listItem',
45
+ 'taskItem',
46
+ ]);
47
+ /** Compute a fingerprint for a single block, given its position in the block list. */
48
+ export function fingerprint(block, allBlocks) {
49
+ const text = block.text || '';
50
+ const sentences = splitSentences(text);
51
+ const words = tokenizeWords(text);
52
+ const fp = {
53
+ type: block.type,
54
+ position: block.position,
55
+ parentPosition: block.parentPosition,
56
+ ordinalInParent: block.ordinalInParent,
57
+ charCount: text.length,
58
+ sentenceCount: sentences.length,
59
+ wordCount: words.length,
60
+ sentences: sentences.map(sentenceTuple),
61
+ structureSig: block.inlineMarks || { bold: 0, italic: 0, links: 0, code: 0 },
62
+ prevType: allBlocks[block.position - 1]?.type || null,
63
+ nextType: allBlocks[block.position + 1]?.type || null,
64
+ parentType: block.parentPosition != null ? allBlocks[block.parentPosition]?.type ?? null : null,
65
+ firstWords: words.slice(0, WORD_FALLBACK_WINDOW),
66
+ lastWords: words.slice(-WORD_FALLBACK_WINDOW),
67
+ };
68
+ if (block.type === 'heading')
69
+ fp.level = block.level;
70
+ if (block.type === 'codeBlock') {
71
+ fp.language = block.language || '';
72
+ fp.contentHash = simpleHash(block.text);
73
+ }
74
+ if (CONTAINER_TYPES.has(block.type)) {
75
+ const children = allBlocks.filter((b) => b.parentPosition === block.position);
76
+ fp.childCount = children.length;
77
+ fp.childTypes = children.map((c) => c.type);
78
+ }
79
+ return fp;
80
+ }
81
+ /**
82
+ * Build the per-sentence tuple. Math fields (c, f, l, t, wls) form the primary
83
+ * fingerprint. Prefix/suffix `f` and `l` are PREFIX_LEN chars each. Words (`w`)
84
+ * are defense-in-depth for the rare case where math still collides under
85
+ * richer prefixes.
86
+ */
87
+ function sentenceTuple(sentence) {
88
+ const t = sentence.text;
89
+ const words = tokenizeWords(t);
90
+ return {
91
+ c: t.length,
92
+ f: t.slice(0, PREFIX_LEN),
93
+ l: t.slice(-PREFIX_LEN),
94
+ t: sentence.terminator,
95
+ wls: words.map((w) => w.length),
96
+ w: words,
97
+ };
98
+ }
99
+ export function fingerprintAll(blocks) {
100
+ return blocks.map((b) => fingerprint(b, blocks));
101
+ }
102
+ /**
103
+ * Split text into sentences. Each sentence carries its terminator type:
104
+ * 'D' = declarative (.)
105
+ * 'E' = exclamation (!)
106
+ * 'Q' = question (?)
107
+ * '-' = no terminator (last fragment with no punctuation)
108
+ */
109
+ export function splitSentences(text) {
110
+ if (!text)
111
+ return [];
112
+ const sentences = [];
113
+ const re = /([^.!?]+?)([.!?]+)(\s+|$)/g;
114
+ let lastIdx = 0;
115
+ let m;
116
+ while ((m = re.exec(text)) !== null) {
117
+ const sentenceText = m[1].trim();
118
+ const term = m[2][0];
119
+ const terminator = term === '.' ? 'D' : term === '!' ? 'E' : term === '?' ? 'Q' : '-';
120
+ sentences.push({ text: sentenceText, terminator });
121
+ lastIdx = re.lastIndex;
122
+ }
123
+ if (lastIdx < text.length) {
124
+ const remaining = text.slice(lastIdx).trim();
125
+ if (remaining)
126
+ sentences.push({ text: remaining, terminator: '-' });
127
+ }
128
+ return sentences;
129
+ }
130
+ export function tokenizeWords(text) {
131
+ if (!text)
132
+ return [];
133
+ return text
134
+ .split(/\s+/)
135
+ .map((w) => w.replace(/^[^\w]+|[^\w]+$/g, ''))
136
+ .filter((w) => w.length > 0);
137
+ }
138
+ export function simpleHash(s) {
139
+ let h = 0;
140
+ for (let i = 0; i < s.length; i++) {
141
+ h = ((h << 5) - h) + s.charCodeAt(i);
142
+ h |= 0;
143
+ }
144
+ return h.toString(16);
145
+ }
146
+ /**
147
+ * The strongest possible match: every math dimension equal AND word arrays
148
+ * equal. Pure determinism — adversaries cannot fake exact match without
149
+ * literally using the same content.
150
+ */
151
+ export function isExactMatch(a, b) {
152
+ if (a.type !== b.type)
153
+ return false;
154
+ if (a.level !== b.level)
155
+ return false;
156
+ if (a.language !== b.language)
157
+ return false;
158
+ if (a.charCount !== b.charCount)
159
+ return false;
160
+ if (a.sentenceCount !== b.sentenceCount)
161
+ return false;
162
+ if (a.wordCount !== b.wordCount)
163
+ return false;
164
+ if (!sentenceArraysEqual(a.sentences, b.sentences))
165
+ return false;
166
+ if (!structureEqual(a.structureSig, b.structureSig))
167
+ return false;
168
+ if (a.childCount != null || b.childCount != null) {
169
+ if (a.childCount !== b.childCount)
170
+ return false;
171
+ if (!arraysEqual(a.childTypes, b.childTypes))
172
+ return false;
173
+ }
174
+ return true;
175
+ }
176
+ /** Identical content but possibly different types — used by type-change rule. */
177
+ export function isSameContent(a, b) {
178
+ const aIsContainer = a.childCount != null;
179
+ const bIsContainer = b.childCount != null;
180
+ if (aIsContainer !== bIsContainer)
181
+ return false;
182
+ if (aIsContainer) {
183
+ return a.childCount === b.childCount && arraysEqual(a.childTypes, b.childTypes);
184
+ }
185
+ if (a.sentences != null && b.sentences != null) {
186
+ if (!sentenceArraysEqual(a.sentences, b.sentences))
187
+ return false;
188
+ return structureEqual(a.structureSig, b.structureSig);
189
+ }
190
+ return false;
191
+ }
192
+ export function sentenceArraysEqual(a, b) {
193
+ if (!Array.isArray(a) || !Array.isArray(b))
194
+ return false;
195
+ if (a.length !== b.length)
196
+ return false;
197
+ for (let i = 0; i < a.length; i++) {
198
+ if (!sentenceTuplesEqual(a[i], b[i]))
199
+ return false;
200
+ }
201
+ return true;
202
+ }
203
+ /** Math + words full equality. The disambiguator for math collisions. */
204
+ export function sentenceTuplesEqual(a, b) {
205
+ return (a.c === b.c &&
206
+ a.f === b.f &&
207
+ a.l === b.l &&
208
+ a.t === b.t &&
209
+ arraysEqual(a.wls, b.wls) &&
210
+ arraysEqual(a.w, b.w));
211
+ }
212
+ export function isSentencePrefix(short, long) {
213
+ if (!Array.isArray(short) || !Array.isArray(long))
214
+ return false;
215
+ if (short.length === 0 || short.length > long.length)
216
+ return false;
217
+ for (let i = 0; i < short.length; i++) {
218
+ if (!sentenceTuplesEqual(short[i], long[i]))
219
+ return false;
220
+ }
221
+ return true;
222
+ }
223
+ export function isSentenceSuffix(short, long) {
224
+ if (!Array.isArray(short) || !Array.isArray(long))
225
+ return false;
226
+ if (short.length === 0 || short.length > long.length)
227
+ return false;
228
+ const offset = long.length - short.length;
229
+ for (let i = 0; i < short.length; i++) {
230
+ if (!sentenceTuplesEqual(short[i], long[i + offset]))
231
+ return false;
232
+ }
233
+ return true;
234
+ }
235
+ export function isSentenceConcat(combined, first, second) {
236
+ if (!Array.isArray(combined) || !Array.isArray(first) || !Array.isArray(second))
237
+ return false;
238
+ if (combined.length !== first.length + second.length)
239
+ return false;
240
+ for (let i = 0; i < first.length; i++) {
241
+ if (!sentenceTuplesEqual(combined[i], first[i]))
242
+ return false;
243
+ }
244
+ for (let i = 0; i < second.length; i++) {
245
+ if (!sentenceTuplesEqual(combined[first.length + i], second[i]))
246
+ return false;
247
+ }
248
+ return true;
249
+ }
250
+ function arraysEqual(a, b) {
251
+ if (!Array.isArray(a) || !Array.isArray(b))
252
+ return false;
253
+ if (a.length !== b.length)
254
+ return false;
255
+ for (let i = 0; i < a.length; i++)
256
+ if (a[i] !== b[i])
257
+ return false;
258
+ return true;
259
+ }
260
+ function structureEqual(a, b) {
261
+ if (!a || !b)
262
+ return false;
263
+ return a.bold === b.bold && a.italic === b.italic && a.links === b.links && a.code === b.code;
264
+ }