openwriter 0.13.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,24 @@
1
1
  /**
2
2
  * TipTap JSON -> Markdown serialization.
3
3
  * Converts TipTap document to markdown with YAML frontmatter.
4
+ *
5
+ * Node identity persistence:
6
+ * - Frontmatter `nodes` array carries the (id, fingerprint) pair for every
7
+ * block in the document. On load, markdown-parse.ts runs the matcher
8
+ * against this array to reassign IDs to surviving blocks.
9
+ * - Markdown body is COMPLETELY UNDISTURBED — no anchors, no comment
10
+ * sentinels (except the legacy `<!-- -->` empty-paragraph marker which
11
+ * has no semantic ID attached anymore).
12
+ * - Legacy `^id` caret anchors are no longer emitted. Old docs that still
13
+ * contain them are migrated transparently on the next save: parse reads
14
+ * the anchors, matcher pins them, serialize emits the new `nodes`
15
+ * frontmatter and a clean body.
16
+ *
17
+ * adr: adr/node-identity-matcher.md
4
18
  */
5
- import { LEAF_BLOCK_TYPES } from './helpers.js';
19
+ import { generateNodeId, LEAF_BLOCK_TYPES } from './helpers.js';
20
+ import { tiptapToBlocks } from './node-blocks.js';
21
+ import { fingerprintAll } from './node-fingerprint.js';
6
22
  // ============================================================================
7
23
  // TipTap -> Markdown
8
24
  // ============================================================================
@@ -57,21 +73,98 @@ function collectPendingState(doc) {
57
73
  walk(doc.content || []);
58
74
  return Object.keys(pending).length > 0 ? pending : undefined;
59
75
  }
76
+ /**
77
+ * Build the `nodes` frontmatter entry — one (id, fingerprint) per block
78
+ * in pre-order traversal of the TipTap tree.
79
+ *
80
+ * Each block's ID comes from its TipTap node's `attrs.id`. Fingerprints are
81
+ * computed from the walker-style block list derived directly from the TipTap
82
+ * tree (no separate markdown re-parse — same source of truth that builds
83
+ * the visible doc).
84
+ */
85
+ function collectNodesFrontmatter(doc) {
86
+ const blocks = tiptapToBlocks(doc);
87
+ const fingerprints = fingerprintAll(blocks);
88
+ const ids = collectBlockIds(doc);
89
+ // ids array is parallel to blocks array — same pre-order traversal.
90
+ const entries = [];
91
+ for (let i = 0; i < blocks.length; i++) {
92
+ const id = ids[i] || generateNodeId();
93
+ entries.push({ id, fp: fingerprints[i] });
94
+ }
95
+ return entries;
96
+ }
97
+ /**
98
+ * Cap graveyard size to avoid frontmatter bloat on docs with many edits.
99
+ * Newest entries (highest position) win — the ones most likely to be
100
+ * paste-back targets. Older entries expire silently.
101
+ */
102
+ const GRAVEYARD_MAX = 50;
103
+ /** Walk the TipTap tree in the SAME pre-order as tiptapToBlocks, collect IDs. */
104
+ function collectBlockIds(doc) {
105
+ const ids = [];
106
+ const blockTypes = new Set([
107
+ 'heading', 'paragraph', 'bulletList', 'orderedList', 'taskList',
108
+ 'listItem', 'taskItem', 'blockquote', 'codeBlock', 'horizontalRule',
109
+ 'table', 'image', 'tableRow', 'tableCell', 'tableHeader',
110
+ ]);
111
+ const containerTypes = new Set([
112
+ 'bulletList', 'orderedList', 'taskList', 'listItem', 'taskItem', 'blockquote',
113
+ ]);
114
+ function walk(nodes) {
115
+ if (!nodes)
116
+ return;
117
+ for (const node of nodes) {
118
+ if (blockTypes.has(node.type)) {
119
+ ids.push(node.attrs?.id || '');
120
+ if (containerTypes.has(node.type) && node.content)
121
+ walk(node.content);
122
+ }
123
+ else if (node.content) {
124
+ walk(node.content);
125
+ }
126
+ }
127
+ }
128
+ walk(doc.content || []);
129
+ return ids;
130
+ }
60
131
  /**
61
132
  * Convert TipTap document to markdown with JSON frontmatter.
62
133
  * Metadata stored as minified JSON between --- delimiters (valid YAML).
63
134
  * Editor never sees frontmatter — it's stripped on load, regenerated on save.
64
135
  * Pending state is persisted in frontmatter `pending` key.
136
+ * Node identity persisted in frontmatter `nodes` key (id + fingerprint per block).
65
137
  */
66
138
  export function tiptapToMarkdown(doc, title, metadata) {
67
139
  const meta = { ...metadata, title };
68
- // Collect pending state from node attrs into frontmatter
69
- const pendingState = collectPendingState(doc);
70
- if (pendingState) {
71
- meta.pending = pendingState;
140
+ // Disk is canonical only never emit `pending:` frontmatter. Pending
141
+ // state lives in the sidecar at `_pending/{docId}.json`, separated from
142
+ // the .md file so external markdown editors see clean canonical content.
143
+ //
144
+ // If the caller passed a doc that still has in-memory pending attrs,
145
+ // serialize from a reverted clone so the body is canonical. Callers
146
+ // that have already done the split (writeToDisk's overlay path) pass
147
+ // an already-canonical doc; this revert is a no-op for them.
148
+ // adr: adr/pending-overlay-model.md
149
+ delete meta.pending;
150
+ const canonicalDoc = revertPendingForSerialization(doc);
151
+ // Collect node identity graph (id + fingerprint per block) for next-load matcher
152
+ const nodes = collectNodesFrontmatter(canonicalDoc);
153
+ if (nodes.length > 0) {
154
+ meta.nodes = nodes;
155
+ }
156
+ else {
157
+ delete meta.nodes;
158
+ }
159
+ // Graveyard: recently-orphaned (id, fingerprint) entries kept across saves so
160
+ // paste-back/undo can restore the original ID via exact fingerprint match.
161
+ // The caller (writeToDisk) puts the matcher's nextGraveyard into metadata.graveyard;
162
+ // we cap it here to keep the file small.
163
+ if (Array.isArray(meta.graveyard) && meta.graveyard.length > 0) {
164
+ meta.graveyard = meta.graveyard.slice(0, GRAVEYARD_MAX);
72
165
  }
73
166
  else {
74
- delete meta.pending;
167
+ delete meta.graveyard;
75
168
  }
76
169
  // Strip undefined/null values
77
170
  for (const key of Object.keys(meta)) {
@@ -79,12 +172,58 @@ export function tiptapToMarkdown(doc, title, metadata) {
79
172
  delete meta[key];
80
173
  }
81
174
  const frontmatter = `---\n${JSON.stringify(meta)}\n---\n\n`;
82
- const body = nodesToMarkdown(doc.content || []);
175
+ // Serialize the body from the canonical (reverted) clone — never from the
176
+ // pending-modified live doc, otherwise the on-disk body would contain
177
+ // rewritten prose without the original anywhere to revert to.
178
+ const body = nodesToMarkdown(canonicalDoc.content || []);
83
179
  return frontmatter + body;
84
180
  }
85
- /** Convert TipTap document to markdown body only (no frontmatter). */
181
+ /** Convert TipTap document to markdown body only (no frontmatter).
182
+ * Like tiptapToMarkdown, the body is canonical (pending reverted). */
86
183
  export function tiptapToBody(doc) {
87
- return nodesToMarkdown(doc.content || []);
184
+ const canonicalDoc = revertPendingForSerialization(doc);
185
+ return nodesToMarkdown(canonicalDoc.content || []);
186
+ }
187
+ /**
188
+ * Deep clone of `doc` with pending decorations reverted, used by the
189
+ * markdown serializer to ensure disk content is canonical. Mirrors
190
+ * state.cloneWithPendingReverted but is local to the serializer to
191
+ * avoid a state.ts → markdown-serialize.ts cycle.
192
+ *
193
+ * - status='insert' → drop the node
194
+ * - status='rewrite' → restore from pendingOriginalContent (or drop if absent)
195
+ * - status='delete' → keep but clear pending attrs
196
+ * - no status → keep, strip stray pending attrs
197
+ */
198
+ const PENDING_KEYS = ['pendingStatus', 'pendingOriginalContent', 'pendingGroupId', 'pendingTextEdits', 'pendingSelectionFrom', 'pendingSelectionTo', 'pendingOriginalFrom', 'pendingOriginalTo', 'pendingOrphan', 'pendingStaleBaseline'];
199
+ function revertPendingForSerialization(doc) {
200
+ function clean(node) {
201
+ const clone = JSON.parse(JSON.stringify(node));
202
+ if (clone.attrs) {
203
+ for (const k of PENDING_KEYS)
204
+ delete clone.attrs[k];
205
+ }
206
+ if (clone.content)
207
+ clone.content = walk(clone.content);
208
+ return clone;
209
+ }
210
+ function walk(nodes) {
211
+ const result = [];
212
+ for (const node of nodes || []) {
213
+ const status = node?.attrs?.pendingStatus;
214
+ if (status === 'insert')
215
+ continue;
216
+ if (status === 'rewrite') {
217
+ const original = node.attrs?.pendingOriginalContent;
218
+ if (original)
219
+ result.push(clean(original));
220
+ continue;
221
+ }
222
+ result.push(clean(node));
223
+ }
224
+ return result;
225
+ }
226
+ return { type: 'doc', content: walk(doc?.content || []) };
88
227
  }
89
228
  function nodesToMarkdown(nodes) {
90
229
  let result = '';
@@ -98,19 +237,16 @@ function nodeToMarkdown(node, indent) {
98
237
  case 'heading': {
99
238
  const level = node.attrs?.level || 1;
100
239
  const prefix = '#'.repeat(level);
101
- const idSuffix = node.attrs?.id ? ` ^${node.attrs.id}` : '';
102
- return `${prefix} ${inlineToMarkdown(node.content)}${idSuffix}\n\n`;
240
+ // Body stays undisturbed — node ID is persisted in frontmatter `nodes`, not as a trailing anchor.
241
+ return `${prefix} ${inlineToMarkdown(node.content)}\n\n`;
103
242
  }
104
243
  case 'paragraph': {
105
244
  const text = inlineToMarkdown(node.content);
106
- const id = node.attrs?.id;
107
245
  if (text) {
108
- const idSuffix = id ? ` ^${id}` : '';
109
- return `${indent}${text}${idSuffix}\n\n`;
246
+ return `${indent}${text}\n\n`;
110
247
  }
111
- // Empty paragraph: embed id in the existing sentinel comment
112
- const emptyMarker = id ? `<!-- ^${id} -->` : '<!-- -->';
113
- return `${indent}${emptyMarker}\n\n`;
248
+ // Empty paragraph: use plain sentinel (frontmatter `nodes` carries the ID).
249
+ return `${indent}<!-- -->\n\n`;
114
250
  }
115
251
  case 'bulletList':
116
252
  return listToMarkdown(node.content, '- ', indent);
@@ -188,28 +324,76 @@ function taskListToMarkdown(items, indent) {
188
324
  }
189
325
  return result + '\n';
190
326
  }
327
+ /**
328
+ * Serialize a TipTap table node to GFM markdown.
329
+ *
330
+ * Critical invariants (each one's absence causes silent table → paragraph
331
+ * loss on round-trip — observed live as `sync-check FAIL: expected table,
332
+ * got paragraph` on the Beat Sheet doc):
333
+ *
334
+ * 1. ALWAYS emit the header-separator row `| --- | --- |` after the first
335
+ * row, regardless of whether any cell is a `tableHeader`. GFM table
336
+ * recognition requires the delimiter row — without it, markdown-it
337
+ * parses each `| ... |` line as a paragraph and the entire table is
338
+ * dropped. (One-time consequence: a header-less table's first row
339
+ * becomes `tableHeader` cells after the first round-trip. Stable
340
+ * thereafter.)
341
+ *
342
+ * 2. Escape `|` inside cell text as `\|` so it doesn't terminate the cell
343
+ * column.
344
+ *
345
+ * 3. Collapse multi-paragraph cells with `<br>` joiners. The inline
346
+ * cell format can't represent multiple block paragraphs; without
347
+ * collapsing, only the first paragraph round-trips and the rest are
348
+ * silently lost.
349
+ *
350
+ * 4. Ensure a blank line precedes the table block (caller does `\n\n`
351
+ * tailing on prior nodes; we keep the leading newline minimal).
352
+ */
191
353
  function tableToMarkdown(node) {
192
354
  const rows = node.content || [];
193
355
  if (rows.length === 0)
194
356
  return '';
357
+ function cellContentToText(cell) {
358
+ const content = cell.content || [];
359
+ if (content.length === 0)
360
+ return '';
361
+ // Each cell typically holds one paragraph, but a TipTap table can carry
362
+ // multi-paragraph cells (and arbitrary blocks). Concatenate paragraphs
363
+ // with <br> so no inline content is dropped.
364
+ const parts = [];
365
+ for (const child of content) {
366
+ if (child.type === 'paragraph') {
367
+ parts.push(inlineToMarkdown(child.content));
368
+ }
369
+ else if (child.content) {
370
+ // Non-paragraph block (rare in tables) — fall through to inline.
371
+ parts.push(inlineToMarkdown(child.content));
372
+ }
373
+ }
374
+ // Escape pipes and replace newlines with <br>.
375
+ return parts.join('<br>').replace(/\|/g, '\\|').replace(/\r?\n/g, '<br>');
376
+ }
195
377
  const lines = [];
196
- let isFirstRow = true;
197
- for (const row of rows) {
378
+ const firstRowCells = rows[0]?.content || [];
379
+ const columnCount = firstRowCells.length;
380
+ for (let r = 0; r < rows.length; r++) {
381
+ const row = rows[r];
198
382
  const cells = row.content || [];
199
- const cellTexts = cells.map((cell) => {
200
- const para = cell.content?.[0];
201
- return para ? inlineToMarkdown(para.content) : '';
202
- });
383
+ const cellTexts = cells.map(cellContentToText);
384
+ // Pad short rows so the markdown table has consistent column count.
385
+ while (cellTexts.length < columnCount)
386
+ cellTexts.push('');
203
387
  lines.push(`| ${cellTexts.join(' | ')} |`);
204
- if (isFirstRow) {
205
- const hasHeaders = cells.some((c) => c.type === 'tableHeader');
206
- if (hasHeaders) {
207
- lines.push(`| ${cellTexts.map(() => '---').join(' | ')} |`);
208
- }
209
- isFirstRow = false;
388
+ if (r === 0) {
389
+ // ALWAYS emit the separator GFM parsing requires it for table
390
+ // recognition. This is the load-bearing invariant.
391
+ lines.push(`| ${Array(columnCount).fill('---').join(' | ')} |`);
210
392
  }
211
393
  }
212
- return lines.join('\n') + '\n\n';
394
+ // Leading blank line ensures we're not glued to the prior block (which
395
+ // would cause the table to be consumed as a paragraph continuation).
396
+ return '\n' + lines.join('\n') + '\n\n';
213
397
  }
214
398
  // ---- Inline mark serialization ----
215
399
  const SERIALIZED_MARKS = ['bold', 'italic', 'code', 'strike', 'underline', 'highlight', 'subscript', 'superscript', 'link'];
@@ -1,6 +1,38 @@
1
1
  /**
2
2
  * Barrel re-export for markdown serialization and parsing.
3
3
  * All existing imports from './markdown.js' continue to work unchanged.
4
+ *
5
+ * Also exposes `tiptapToMarkdownChecked` — a sync-observed wrapper that
6
+ * verifies the TipTap → markdown → TipTap round-trip preserves document
7
+ * shape. Use this when you want to catch silent drift; the unchecked
8
+ * `tiptapToMarkdown` stays the cheap path for hot loops.
4
9
  */
10
+ import { tiptapToMarkdown } from './markdown-serialize.js';
11
+ import { markdownToTiptap } from './markdown-parse.js';
12
+ import { shapeOfTiptap, compareShapes, formatSyncReport, } from './node-sync-check.js';
5
13
  export { tiptapToMarkdown, tiptapToBody, nodeText, inlineToMarkdown } from './markdown-serialize.js';
6
14
  export { markdownToTiptap, markdownToNodes } from './markdown-parse.js';
15
+ export { shapeOfTiptap, computeShape, compareShapes, formatSyncReport, } from './node-sync-check.js';
16
+ /**
17
+ * Serialize TipTap to markdown, then re-parse and verify that the round-trip
18
+ * preserved document shape. If the shapes diverge, a node was misapplied —
19
+ * content ended up in the wrong block on save, or the parse interpreted
20
+ * something differently than the serialize emitted.
21
+ *
22
+ * Returns the markdown AND the sync report. Callers decide whether a failed
23
+ * report should block the save (throw) or just log and continue.
24
+ *
25
+ * Cost: one extra parse + two shape computations per save. Acceptable for
26
+ * any user-driven save; consider the unchecked path inside tight loops.
27
+ */
28
+ export function tiptapToMarkdownChecked(doc, title, metadata) {
29
+ const markdown = tiptapToMarkdown(doc, title, metadata);
30
+ const expectedShape = shapeOfTiptap(doc);
31
+ const reparsed = markdownToTiptap(markdown);
32
+ const actualShape = shapeOfTiptap(reparsed.document);
33
+ const syncReport = compareShapes(expectedShape, actualShape);
34
+ if (!syncReport.ok) {
35
+ console.error(formatSyncReport(syncReport, `serialize:${title}`));
36
+ }
37
+ return { markdown, syncReport };
38
+ }