docrev 0.9.13 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/.claude/settings.local.json +9 -9
  2. package/.gitattributes +1 -1
  3. package/CHANGELOG.md +149 -149
  4. package/PLAN-tables-and-postprocess.md +850 -850
  5. package/README.md +391 -391
  6. package/bin/rev.js +11 -11
  7. package/bin/rev.ts +145 -145
  8. package/completions/rev.bash +127 -127
  9. package/completions/rev.ps1 +210 -210
  10. package/completions/rev.zsh +207 -207
  11. package/dev_notes/stress2/build_adversarial.ts +186 -186
  12. package/dev_notes/stress2/drift_matcher.ts +62 -62
  13. package/dev_notes/stress2/probe_anchors.ts +35 -35
  14. package/dev_notes/stress2/project/discussion.before.md +3 -3
  15. package/dev_notes/stress2/project/discussion.md +3 -3
  16. package/dev_notes/stress2/project/methods.before.md +20 -20
  17. package/dev_notes/stress2/project/methods.md +20 -20
  18. package/dev_notes/stress2/project/rev.yaml +5 -5
  19. package/dev_notes/stress2/project/sections.yaml +4 -4
  20. package/dev_notes/stress2/sections.yaml +5 -5
  21. package/dev_notes/stress2/trace_placement.ts +50 -50
  22. package/dev_notes/stresstest_boundaries.ts +27 -27
  23. package/dev_notes/stresstest_drift_apply.ts +43 -43
  24. package/dev_notes/stresstest_drift_compare.ts +43 -43
  25. package/dev_notes/stresstest_drift_v2.ts +54 -54
  26. package/dev_notes/stresstest_inspect.ts +54 -54
  27. package/dev_notes/stresstest_pstyle.ts +55 -55
  28. package/dev_notes/stresstest_section_debug.ts +23 -23
  29. package/dev_notes/stresstest_split.ts +70 -70
  30. package/dev_notes/stresstest_trace.ts +19 -19
  31. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -40
  32. package/dist/lib/build.d.ts +38 -1
  33. package/dist/lib/build.d.ts.map +1 -1
  34. package/dist/lib/build.js +68 -30
  35. package/dist/lib/build.js.map +1 -1
  36. package/dist/lib/commands/build.d.ts.map +1 -1
  37. package/dist/lib/commands/build.js +38 -5
  38. package/dist/lib/commands/build.js.map +1 -1
  39. package/dist/lib/commands/utilities.js +164 -164
  40. package/dist/lib/commands/word-tools.js +8 -8
  41. package/dist/lib/grammar.js +3 -3
  42. package/dist/lib/pdf-comments.js +44 -44
  43. package/dist/lib/plugins.js +57 -57
  44. package/dist/lib/pptx-themes.js +115 -115
  45. package/dist/lib/spelling.js +2 -2
  46. package/dist/lib/templates.js +387 -387
  47. package/dist/lib/themes.js +51 -51
  48. package/eslint.config.js +27 -27
  49. package/lib/anchor-match.ts +276 -276
  50. package/lib/annotations.ts +644 -644
  51. package/lib/build.ts +1300 -1251
  52. package/lib/citations.ts +160 -160
  53. package/lib/commands/build.ts +833 -801
  54. package/lib/commands/citations.ts +515 -515
  55. package/lib/commands/comments.ts +1050 -1050
  56. package/lib/commands/context.ts +174 -174
  57. package/lib/commands/core.ts +309 -309
  58. package/lib/commands/doi.ts +435 -435
  59. package/lib/commands/file-ops.ts +372 -372
  60. package/lib/commands/history.ts +320 -320
  61. package/lib/commands/index.ts +87 -87
  62. package/lib/commands/init.ts +259 -259
  63. package/lib/commands/merge-resolve.ts +378 -378
  64. package/lib/commands/preview.ts +178 -178
  65. package/lib/commands/project-info.ts +244 -244
  66. package/lib/commands/quality.ts +517 -517
  67. package/lib/commands/response.ts +454 -454
  68. package/lib/commands/section-boundaries.ts +82 -82
  69. package/lib/commands/sections.ts +451 -451
  70. package/lib/commands/sync.ts +706 -706
  71. package/lib/commands/text-ops.ts +449 -449
  72. package/lib/commands/utilities.ts +448 -448
  73. package/lib/commands/verify-anchors.ts +272 -272
  74. package/lib/commands/word-tools.ts +340 -340
  75. package/lib/comment-realign.ts +517 -517
  76. package/lib/config.ts +84 -84
  77. package/lib/crossref.ts +781 -781
  78. package/lib/csl.ts +191 -191
  79. package/lib/dependencies.ts +98 -98
  80. package/lib/diff-engine.ts +465 -465
  81. package/lib/doi-cache.ts +115 -115
  82. package/lib/doi.ts +897 -897
  83. package/lib/equations.ts +506 -506
  84. package/lib/errors.ts +346 -346
  85. package/lib/format.ts +541 -541
  86. package/lib/git.ts +326 -326
  87. package/lib/grammar.ts +303 -303
  88. package/lib/image-registry.ts +180 -180
  89. package/lib/import.ts +911 -911
  90. package/lib/journals.ts +543 -543
  91. package/lib/merge.ts +633 -633
  92. package/lib/orcid.ts +144 -144
  93. package/lib/pdf-comments.ts +263 -263
  94. package/lib/pdf-import.ts +524 -524
  95. package/lib/plugins.ts +362 -362
  96. package/lib/postprocess.ts +188 -188
  97. package/lib/pptx-color-filter.lua +37 -37
  98. package/lib/pptx-template.ts +469 -469
  99. package/lib/pptx-themes.ts +483 -483
  100. package/lib/protect-restore.ts +520 -520
  101. package/lib/rate-limiter.ts +94 -94
  102. package/lib/response.ts +197 -197
  103. package/lib/restore-references.ts +240 -240
  104. package/lib/review.ts +327 -327
  105. package/lib/schema.ts +417 -417
  106. package/lib/scientific-words.ts +73 -73
  107. package/lib/sections.ts +335 -335
  108. package/lib/slides.ts +756 -756
  109. package/lib/spelling.ts +334 -334
  110. package/lib/templates.ts +526 -526
  111. package/lib/themes.ts +742 -742
  112. package/lib/trackchanges.ts +247 -247
  113. package/lib/tui.ts +450 -450
  114. package/lib/types.ts +550 -550
  115. package/lib/undo.ts +250 -250
  116. package/lib/utils.ts +69 -69
  117. package/lib/variables.ts +179 -179
  118. package/lib/word-extraction.ts +806 -806
  119. package/lib/word.ts +643 -643
  120. package/lib/wordcomments.ts +817 -817
  121. package/package.json +137 -137
  122. package/scripts/postbuild.js +28 -28
  123. package/skill/REFERENCE.md +431 -431
  124. package/skill/SKILL.md +258 -258
  125. package/tsconfig.json +26 -26
  126. package/types/index.d.ts +525 -525
@@ -1,644 +1,644 @@
1
- /**
2
- * CriticMarkup annotation parsing and manipulation
3
- *
4
- * Syntax:
5
- * {++inserted text++} - Insertions
6
- * {--deleted text--} - Deletions
7
- * {~~old~>new~~} - Substitutions
8
- * {>>Author: comment<<} - Comments
9
- * {==text==} - Highlights
10
- */
11
-
12
- import type { Annotation, AnnotationCounts, StripOptions, CommentFilterOptions } from './types.js';
13
-
14
- // =============================================================================
15
- // Constants
16
- // =============================================================================
17
-
18
- /** Window size for context lookup (characters before/after position) */
19
- const CONTEXT_WINDOW_SIZE = 2000;
20
-
21
- /** Characters of context to include in annotation results */
22
- const CONTEXT_SNIPPET_SIZE = 50;
23
-
24
- /** Maximum iterations for nested annotation stripping (safety limit) */
25
- const MAX_STRIP_ITERATIONS = 20;
26
-
27
- /** Maximum author name length in comments */
28
- const MAX_AUTHOR_LENGTH = 30;
29
-
30
- /** Maximum content length before heuristic assumes it's not a comment */
31
- const MAX_COMMENT_CONTENT_LENGTH = 200;
32
-
33
- // =============================================================================
34
- // Patterns
35
- // =============================================================================
36
-
37
- // Patterns for each annotation type
38
- const PATTERNS = {
39
- insert: /\{\+\+(.+?)\+\+\}/gs,
40
- delete: /\{--(.+?)--\}/gs,
41
- substitute: /\{~~(.+?)~>(.+?)~~\}/gs,
42
- comment: /\{>>(.+?)<<\}/gs,
43
- highlight: /\{==(.+?)==\}/gs,
44
- };
45
-
46
- /**
47
- * Check if a potential comment is actually a false positive
48
- * (e.g., figure caption, nested inside other annotation, code block, etc.)
49
- * @param commentContent - The content inside {>>...<<}
50
- * @param fullText - The full document text
51
- * @param position - Position of the comment in the text
52
- * @returns true if this is a false positive (not a real comment)
53
- */
54
- function isCommentFalsePositive(commentContent: string, fullText: string, position: number): boolean {
55
- // Check if inside a code block (fenced or indented)
56
- const textBefore = fullText.slice(Math.max(0, position - CONTEXT_WINDOW_SIZE), position);
57
- const textAfter = fullText.slice(position, Math.min(fullText.length, position + CONTEXT_WINDOW_SIZE));
58
-
59
- // Count unclosed fenced code blocks (``` or ~~~)
60
- const fenceOpens = (textBefore.match(/^```|^~~~/gm) || []).length;
61
- const fenceCloses = (textBefore.match(/```$|~~~$/gm) || []).length;
62
- if (fenceOpens > fenceCloses) return true; // Inside code block
63
-
64
- // Check if on an indented line (4+ spaces or tab at line start = code)
65
- const lineStart = textBefore.lastIndexOf('\n') + 1;
66
- const linePrefix = fullText.slice(lineStart, position);
67
- if (/^(\t| )/.test(linePrefix)) return true; // Indented code
68
-
69
- // Check if inside inline code backticks
70
- const backticksBefore = (linePrefix.match(/`/g) || []).length;
71
- if (backticksBefore % 2 === 1) return true; // Inside inline code
72
-
73
- // Check if nested inside a deletion or insertion block
74
- const nearTextBefore = fullText.slice(Math.max(0, position - 500), position);
75
-
76
- // Count unclosed deletion markers
77
- const delOpens = (nearTextBefore.match(/\{--/g) || []).length;
78
- const delCloses = (nearTextBefore.match(/--\}/g) || []).length;
79
- if (delOpens > delCloses) return true; // Nested inside deletion
80
-
81
- // Count unclosed insertion markers
82
- const insOpens = (nearTextBefore.match(/\{\+\+/g) || []).length;
83
- const insCloses = (nearTextBefore.match(/\+\+\}/g) || []).length;
84
- if (insOpens > insCloses) return true; // Nested inside insertion
85
-
86
- // Heuristics for figure captions and other false positives:
87
-
88
- // Contains image/figure path patterns
89
- if (/\(figures?\/|\(images?\/|\.png|\.jpg|\.jpeg|\.gif|\.svg|\.pdf/i.test(commentContent)) return true;
90
-
91
- // Contains markdown figure reference syntax
92
- if (/\{#fig:|!\[/.test(commentContent)) return true;
93
-
94
- // Real comments typically have "Author:" at start. Accept hyphens, apostrophes,
95
- // periods, and Unicode letters so names like "Jens-Christian Svenning" or
96
- // "Camilla T Colding-Jørgensen" don't get rejected. See gcol33/docrev#1.
97
- const hasAuthorPrefix = /^[\p{L}][\p{L}\s\-'.]{0,30}:\s/u.test(commentContent.trim());
98
- const hasResolvedMark = /^[✓✔]\s/.test(commentContent.trim());
99
-
100
- // Contains URL patterns (likely a link, not a comment) — only filter when
101
- // there is no real author prefix, since reviewers legitimately cite URLs/DOIs.
102
- if (!hasAuthorPrefix && /https?:\/\/|www\./i.test(commentContent) && commentContent.length < 150) return true;
103
-
104
- // Looks like code (contains programming patterns)
105
- if (/function\s*\(|=>|import\s+|export\s+|const\s+|let\s+|var\s+/.test(commentContent)) return true;
106
-
107
- // Very long without clear author pattern (likely caption, not comment)
108
- if (!hasAuthorPrefix && !hasResolvedMark && commentContent.length > MAX_COMMENT_CONTENT_LENGTH) return true;
109
-
110
- // Looks like a figure caption (starts with "Fig" or contains typical caption words)
111
- if (/^(Fig\.?|Figure|Table|Sankey|Diagram|Proportion|Distribution|Map|Chart|Graph|Plot|Panel)/i.test(commentContent.trim())) {
112
- return true;
113
- }
114
-
115
- // Contains LaTeX-like patterns (likely equation, not comment)
116
- if (/\\[a-z]+\{|\\frac|\\sum|\\int|\\begin\{/.test(commentContent)) return true;
117
-
118
- // Looks like BibTeX entry (not a comment)
119
- if (/@article\{|@book\{|@inproceedings\{/i.test(commentContent)) return true;
120
-
121
- return false;
122
- }
123
-
124
- // Combined pattern for any track change (not comments)
125
- const TRACK_CHANGE_PATTERN = /(\{\+\+.+?\+\+\}|\{--.+?--\}|\{~~.+?~>.+?~~\})/gs;
126
-
127
- // =============================================================================
128
- // Public API
129
- // =============================================================================
130
-
131
- /**
132
- * Parse all annotations from text
133
- * @param text - Markdown text containing CriticMarkup annotations
134
- * @returns Array of parsed annotations sorted by position
135
- * @throws TypeError If text is not a string
136
- */
137
- export function parseAnnotations(text: string): Annotation[] {
138
- if (typeof text !== 'string') {
139
- throw new TypeError(`text must be a string, got ${typeof text}`);
140
- }
141
-
142
- const annotations: Annotation[] = [];
143
-
144
- // Build line number lookup
145
- const lines = text.split('\n');
146
- let pos = 0;
147
- const lineStarts = lines.map((line) => {
148
- const start = pos;
149
- pos += line.length + 1;
150
- return start;
151
- });
152
-
153
- function getLine(position: number): number {
154
- for (let i = 0; i < lineStarts.length; i++) {
155
- const start = lineStarts[i];
156
- if (start !== undefined && start > position) return i;
157
- }
158
- return lineStarts.length;
159
- }
160
-
161
- function getContext(position: number, length: number): { before: string; after: string } {
162
- const start = Math.max(0, position - CONTEXT_SNIPPET_SIZE);
163
- const end = Math.min(text.length, position + length + CONTEXT_SNIPPET_SIZE);
164
- const before = text.slice(start, position).split('\n').pop() || '';
165
- const after = text.slice(position + length, end).split('\n')[0] || '';
166
- return { before, after };
167
- }
168
-
169
- // Parse insertions
170
- for (const match of text.matchAll(PATTERNS.insert)) {
171
- if (match.index === undefined) continue;
172
- const ctx = getContext(match.index, match[0].length);
173
- annotations.push({
174
- type: 'insert',
175
- match: match[0],
176
- content: match[1] || '',
177
- position: match.index,
178
- line: getLine(match.index),
179
- ...ctx,
180
- });
181
- }
182
-
183
- // Parse deletions
184
- for (const match of text.matchAll(PATTERNS.delete)) {
185
- if (match.index === undefined) continue;
186
- const ctx = getContext(match.index, match[0].length);
187
- annotations.push({
188
- type: 'delete',
189
- match: match[0],
190
- content: match[1] || '',
191
- position: match.index,
192
- line: getLine(match.index),
193
- ...ctx,
194
- });
195
- }
196
-
197
- // Parse substitutions
198
- for (const match of text.matchAll(PATTERNS.substitute)) {
199
- if (match.index === undefined) continue;
200
- const ctx = getContext(match.index, match[0].length);
201
- annotations.push({
202
- type: 'substitute',
203
- match: match[0],
204
- content: match[1] || '',
205
- replacement: match[2] || '',
206
- position: match.index,
207
- line: getLine(match.index),
208
- ...ctx,
209
- });
210
- }
211
-
212
- // Parse comments (with false positive filtering)
213
- for (const match of text.matchAll(PATTERNS.comment)) {
214
- if (match.index === undefined) continue;
215
- // Skip false positives (figure captions, nested annotations, etc.)
216
- const commentContent = match[1] || '';
217
- if (isCommentFalsePositive(commentContent, text, match.index)) {
218
- continue;
219
- }
220
-
221
- const ctx = getContext(match.index, match[0].length);
222
- let commentText = commentContent;
223
- let author = '';
224
-
225
- // Extract author if present (format: "Author: comment")
226
- const colonIdx = commentText.indexOf(':');
227
- if (colonIdx > 0 && colonIdx < MAX_AUTHOR_LENGTH) {
228
- author = commentText.slice(0, colonIdx).trim();
229
- commentText = commentText.slice(colonIdx + 1).trim();
230
- }
231
-
232
- annotations.push({
233
- type: 'comment',
234
- match: match[0],
235
- content: commentText,
236
- author,
237
- position: match.index,
238
- line: getLine(match.index),
239
- ...ctx,
240
- });
241
- }
242
-
243
- // Sort by position
244
- annotations.sort((a, b) => a.position - b.position);
245
- return annotations;
246
- }
247
-
248
- /**
249
- * Strip annotations from text, applying changes
250
- * Handles nested annotations by iterating until stable
251
- * @param text - Markdown text with CriticMarkup annotations
252
- * @param options - Strip options
253
- * @returns Clean text with annotations applied/removed
254
- * @throws TypeError If text is not a string
255
- */
256
- export function stripAnnotations(text: string, options: StripOptions = {}): string {
257
- if (typeof text !== 'string') {
258
- throw new TypeError(`text must be a string, got ${typeof text}`);
259
- }
260
-
261
- const { keepComments = false } = options;
262
-
263
- // Iterate until no more changes (handles nested annotations)
264
- let prev: string;
265
- let iterations = 0;
266
-
267
- do {
268
- prev = text;
269
-
270
- // Apply substitutions: {~~old~>new~~} → new
271
- text = text.replace(PATTERNS.substitute, '$2');
272
-
273
- // Apply insertions: {++text++} → text
274
- text = text.replace(PATTERNS.insert, '$1');
275
-
276
- // Apply deletions: {--text--} → nothing
277
- // Don't touch surrounding whitespace - just remove the annotation
278
- text = text.replace(PATTERNS.delete, '');
279
-
280
- // Remove highlights: {==text==} → text
281
- text = text.replace(PATTERNS.highlight, '$1');
282
-
283
- // Remove comments unless keeping
284
- if (!keepComments) {
285
- text = text.replace(PATTERNS.comment, '');
286
- }
287
-
288
- // Strip pandoc highlight spans: [text]{.mark} → text.
289
- // When `keepComments=true`, preserve `[anchor]{.mark}` that is the
290
- // anchor of a kept `{>>...<<}` comment. The dual-build flow runs
291
- // stripAnnotations() before prepareMarkdownWithMarkers(), and stripping
292
- // the anchor span here would leave the marker generator with no anchor
293
- // text — collapsing every multi-word anchor to a single fallback word
294
- // in the rebuilt docx.
295
- text = keepComments
296
- ? text.replace(/(?<!<<\}\s{0,3})\[([^\]]*)\]\{\.mark\}/g, '$1')
297
- : text.replace(/\[([^\]]*)\]\{\.mark\}/g, '$1');
298
-
299
- // Clean up partial/orphaned markers within the loop
300
- // This handles cases where nested annotations leave behind fragments
301
-
302
- // Empty annotations (from nested stripping)
303
- text = text.replace(/\{----\}/g, '');
304
- text = text.replace(/\{\+\+\+\+\}/g, '');
305
- text = text.replace(/\{--\s*--\}/g, '');
306
- text = text.replace(/\{\+\+\s*\+\+\}/g, '');
307
-
308
- // Orphaned substitution fragments: ~>text~~} or {~~text (no proper pairs)
309
- text = text.replace(/~>[^{]*?~~\}/g, '');
310
- text = text.replace(/\{~~[^~}]*$/gm, '');
311
-
312
- // Handle malformed substitution from nested: {~~{~~old → just strip the {~~
313
- text = text.replace(/\{~~\{~~/g, '{~~');
314
- text = text.replace(/~~\}~~\}/g, '~~}');
315
-
316
- iterations++;
317
- } while (text !== prev && iterations < MAX_STRIP_ITERATIONS);
318
-
319
- // Final cleanup of any remaining orphaned markers
320
- // Orphaned closing markers
321
- text = text.replace(/--\}(?:--\})+/g, '');
322
- text = text.replace(/\+\+\}(?:\+\+\})+/g, '');
323
- text = text.replace(/~~\}(?:~~\})+/g, '');
324
- text = text.replace(/--\}/g, '');
325
- text = text.replace(/\+\+\}/g, '');
326
- text = text.replace(/~~\}/g, '');
327
-
328
- // Orphaned opening markers
329
- text = text.replace(/\{--(?:\{--)+/g, '');
330
- text = text.replace(/\{\+\+(?:\{\+\+)+/g, '');
331
- text = text.replace(/\{~~(?:\{~~)+/g, '');
332
- text = text.replace(/\{--/g, '');
333
- text = text.replace(/\{\+\+/g, '');
334
- text = text.replace(/\{~~/g, '');
335
- text = text.replace(/~>/g, '');
336
-
337
- // Remove orphan [ from stripped {.mark} spans where the closing ]{.mark}
338
- // was inside a comment. A [ is orphan if no `]` follows before end of line.
339
- // We deliberately allow other `[` between the candidate and the matching `]`
340
- // — otherwise nested forms like `[[0..9]]{.mark}` would have their outer
341
- // `[` stripped because the lookahead saw the inner `[` as a barrier.
342
- text = text.replace(/\[(?![^\]\n]*\])/g, '');
343
-
344
- return text;
345
- }
346
-
347
- /**
348
- * Collapse multiple spaces to single space, preserving table formatting
349
- * Useful for cleaning up messy Word imports
350
- * @param text - Text to normalize
351
- * @returns Text with multiple spaces collapsed to single spaces
352
- * @throws TypeError If text is not a string
353
- */
354
- export function stripToSingleSpace(text: string): string {
355
- if (typeof text !== 'string') {
356
- throw new TypeError(`text must be a string, got ${typeof text}`);
357
- }
358
-
359
- const lines = text.split('\n');
360
- let inTable = false;
361
-
362
- // Helper to check if a line looks like table content
363
- const looksLikeTableRow = (ln: string): boolean => {
364
- const trimmed = ln.trim();
365
- if (!trimmed) return false;
366
- // Has multiple consecutive spaces (column spacing)
367
- // OR italicized category header with trailing spaces
368
- return /\S\s{2,}\S/.test(trimmed) || (/^\*[^*]+\*\s*$/.test(trimmed) && /\s{2,}$/.test(ln));
369
- };
370
-
371
- for (let i = 0; i < lines.length; i++) {
372
- const line = lines[i];
373
- if (!line) continue;
374
-
375
- // Detect table separator line
376
- const isTableSeparator = /^\|?[\s-]*[-]{3,}[\s|:-]+[-]{3,}/.test(line) ||
377
- /^[-]{3,}\s{2,}[-]{3,}/.test(line);
378
-
379
- if (isTableSeparator) {
380
- inTable = true;
381
- continue;
382
- }
383
-
384
- // Check if we're exiting the table
385
- if (inTable && line.trim() === '') {
386
- let nextContentLine = '';
387
- for (let j = i + 1; j < lines.length && j < i + 5; j++) {
388
- const nextLine = lines[j];
389
- if (nextLine && nextLine.trim() !== '') {
390
- nextContentLine = nextLine;
391
- break;
392
- }
393
- }
394
- if (!looksLikeTableRow(nextContentLine) && !/^[-]{3,}/.test(nextContentLine.trim())) {
395
- inTable = false;
396
- }
397
- continue;
398
- }
399
-
400
- // Only collapse spaces outside tables
401
- if (!inTable) {
402
- lines[i] = line.replace(/ +/g, ' ');
403
- }
404
- }
405
-
406
- return lines.join('\n');
407
- }
408
-
409
- /**
410
- * Check if text contains any CriticMarkup annotations
411
- * @param text - Text to check
412
- * @returns True if text contains any annotations
413
- * @throws TypeError If text is not a string
414
- */
415
- export function hasAnnotations(text: string): boolean {
416
- if (typeof text !== 'string') {
417
- throw new TypeError(`text must be a string, got ${typeof text}`);
418
- }
419
-
420
- return PATTERNS.insert.test(text) ||
421
- PATTERNS.delete.test(text) ||
422
- PATTERNS.substitute.test(text) ||
423
- PATTERNS.comment.test(text) ||
424
- PATTERNS.highlight.test(text);
425
- }
426
-
427
- /**
428
- * Apply a decision to a single annotation (accept or reject)
429
- * @param text - Document text containing the annotation
430
- * @param annotation - Annotation object from parseAnnotations()
431
- * @param accept - True to accept the change, false to reject
432
- * @returns Updated text with the decision applied
433
- * @throws TypeError If text is not a string or annotation is invalid
434
- */
435
- export function applyDecision(text: string, annotation: Annotation, accept: boolean): string {
436
- if (typeof text !== 'string') {
437
- throw new TypeError(`text must be a string, got ${typeof text}`);
438
- }
439
- if (!annotation || typeof annotation.type !== 'string' || typeof annotation.match !== 'string') {
440
- throw new TypeError('annotation must have type and match properties');
441
- }
442
- let replacement: string;
443
-
444
- // Extract any comments embedded in the annotation content
445
- // These should be preserved when accepting deletions or rejecting insertions
446
- const commentPattern = /\{>>[\s\S]*?<<\}/g;
447
- const embeddedComments = (annotation.match || '').match(commentPattern) || [];
448
-
449
- switch (annotation.type) {
450
- case 'insert':
451
- if (accept) {
452
- replacement = annotation.content;
453
- } else {
454
- // Rejecting insertion - preserve any comments that were inside
455
- replacement = embeddedComments.length > 0 ? embeddedComments.join('') : '';
456
- }
457
- break;
458
- case 'delete':
459
- if (accept) {
460
- // Accepting deletion - preserve any comments by placing them before
461
- replacement = embeddedComments.length > 0 ? embeddedComments.join('') : '';
462
- } else {
463
- replacement = annotation.content;
464
- }
465
- break;
466
- case 'substitute':
467
- if (accept) {
468
- // For substitutions, check if comments are in the old text being replaced
469
- const oldTextComments = (annotation.content || '').match(commentPattern) || [];
470
- replacement = annotation.replacement || '';
471
- if (oldTextComments.length > 0) {
472
- // Prepend comments that were in the old text
473
- replacement = oldTextComments.join('') + replacement;
474
- }
475
- } else {
476
- replacement = annotation.content;
477
- }
478
- break;
479
- default:
480
- return text;
481
- }
482
-
483
- return text.replace(annotation.match, replacement);
484
- }
485
-
486
- /**
487
- * Get track changes only (no comments)
488
- * @param text - Markdown text with CriticMarkup annotations
489
- * @returns Array of insert/delete/substitute annotations
490
- * @throws TypeError If text is not a string
491
- */
492
- export function getTrackChanges(text: string): Annotation[] {
493
- // Input validation delegated to parseAnnotations
494
- return parseAnnotations(text).filter((a) => a.type !== 'comment');
495
- }
496
-
497
- /**
498
- * Get comments only
499
- * @param text - Markdown text with CriticMarkup annotations
500
- * @param options - Filter options
501
- * @returns Array of comment annotations
502
- * @throws TypeError If text is not a string
503
- */
504
- export function getComments(text: string, options: CommentFilterOptions = {}): Annotation[] {
505
- // Input validation delegated to parseAnnotations
506
- const { pendingOnly = false, resolvedOnly = false } = options;
507
- let comments = parseAnnotations(text).filter((a) => a.type === 'comment');
508
-
509
- // Check for resolved status marker at end of comment
510
- comments = comments.map((c) => {
511
- const resolved = c.content.endsWith('[RESOLVED]') || c.content.endsWith('[✓]');
512
- return {
513
- ...c,
514
- resolved,
515
- content: resolved
516
- ? c.content.replace(/\s*\[(RESOLVED|✓)\]$/, '').trim()
517
- : c.content,
518
- };
519
- });
520
-
521
- if (pendingOnly) {
522
- comments = comments.filter((c) => !c.resolved);
523
- }
524
- if (resolvedOnly) {
525
- comments = comments.filter((c) => c.resolved);
526
- }
527
-
528
- return comments;
529
- }
530
-
531
- /**
532
- * Mark a comment as resolved or pending
533
- * @param text - Document text containing the comment
534
- * @param comment - Comment annotation object from getComments()
535
- * @param resolved - True to mark resolved, false to mark pending
536
- * @returns Updated text with status marker applied
537
- * @throws TypeError If text is not a string or comment is invalid
538
- */
539
- export function setCommentStatus(text: string, comment: Annotation, resolved: boolean): string {
540
- if (typeof text !== 'string') {
541
- throw new TypeError(`text must be a string, got ${typeof text}`);
542
- }
543
- if (!comment || typeof comment.match !== 'string') {
544
- throw new TypeError('comment must have a match property');
545
- }
546
- // Find the comment in the text
547
- const originalMatch = comment.match;
548
-
549
- if (resolved) {
550
- // Add [RESOLVED] marker before the closing <<
551
- const newMatch = originalMatch.replace(/<<\}$/, ' [RESOLVED]<<}');
552
- return text.replace(originalMatch, newMatch);
553
- } else {
554
- // Remove resolved markers
555
- const newMatch = originalMatch.replace(/\s*\[(RESOLVED|✓)\]<<\}$/, '<<}');
556
- return text.replace(originalMatch, newMatch);
557
- }
558
- }
559
-
560
- /**
561
- * Count annotations by type
562
- * @param text - Markdown text with CriticMarkup annotations
563
- * @returns Counts by annotation type
564
- * @throws TypeError If text is not a string
565
- */
566
- export function countAnnotations(text: string): AnnotationCounts {
567
- // Input validation delegated to parseAnnotations
568
- const annotations = parseAnnotations(text);
569
- const counts: AnnotationCounts = { inserts: 0, deletes: 0, substitutes: 0, comments: 0, total: 0 };
570
-
571
- for (const a of annotations) {
572
- counts.total++;
573
- switch (a.type) {
574
- case 'insert':
575
- counts.inserts++;
576
- break;
577
- case 'delete':
578
- counts.deletes++;
579
- break;
580
- case 'substitute':
581
- counts.substitutes++;
582
- break;
583
- case 'comment':
584
- counts.comments++;
585
- break;
586
- }
587
- }
588
-
589
- return counts;
590
- }
591
-
592
- /**
593
- * Clean up orphaned/malformed CriticMarkup markers
594
- * This can happen when track changes span across comment boundaries
595
- * @param text - Document text with potentially malformed markers
596
- * @returns Cleaned text with orphaned markers removed
597
- * @throws TypeError If text is not a string
598
- */
599
- export function cleanupOrphanedMarkers(text: string): string {
600
- if (typeof text !== 'string') {
601
- throw new TypeError(`text must be a string, got ${typeof text}`);
602
- }
603
- let result = text;
604
-
605
- // Remove orphaned insertion end markers (++} not preceded by {++)
606
- // These occur when an insertion's start was inside something that got deleted/replaced
607
- result = result.replace(/(?<!\{\+\+[^}]*)\+\+\}/g, '');
608
-
609
- // Remove orphaned deletion end markers (--} not preceded by {--)
610
- result = result.replace(/(?<!\{--[^}]*)--\}/g, '');
611
-
612
- // Remove orphaned substitution end markers (~~} not preceded by {~~)
613
- result = result.replace(/(?<!\{~~[^}]*)~~\}/g, '');
614
-
615
- // Fix unclosed insertions: {++ without matching ++}
616
- // Find {++ and check if there's a matching ++} before the next { marker
617
- result = result.replace(/\{\+\+([^+]*?)(?=\{[+\-~>]|\{>>|$)/g, (match, content) => {
618
- // If content has no ++}, it's unclosed - just keep the content
619
- if (!content.includes('++}')) {
620
- return content;
621
- }
622
- return match;
623
- });
624
-
625
- // Fix unclosed deletions: {-- without matching --}
626
- result = result.replace(/\{--([^-]*?)(?=\{[+\-~>]|\{>>|$)/g, (match, content) => {
627
- if (!content.includes('--}')) {
628
- return content;
629
- }
630
- return match;
631
- });
632
-
633
- // Fix unclosed substitutions: {~~ without matching ~~}
634
- // This is trickier because we need both ~> and ~~}
635
- result = result.replace(/\{~~([^~]*?)~>([^~]*?)(?=\{[+\-~>]|\{>>|$)/g, (match, old, newText) => {
636
- if (!match.includes('~~}')) {
637
- // Unclosed substitution - keep the new text
638
- return newText;
639
- }
640
- return match;
641
- });
642
-
643
- return result;
644
- }
1
+ /**
2
+ * CriticMarkup annotation parsing and manipulation
3
+ *
4
+ * Syntax:
5
+ * {++inserted text++} - Insertions
6
+ * {--deleted text--} - Deletions
7
+ * {~~old~>new~~} - Substitutions
8
+ * {>>Author: comment<<} - Comments
9
+ * {==text==} - Highlights
10
+ */
11
+
12
+ import type { Annotation, AnnotationCounts, StripOptions, CommentFilterOptions } from './types.js';
13
+
14
+ // =============================================================================
15
+ // Constants
16
+ // =============================================================================
17
+
18
+ /** Window size for context lookup (characters before/after position) */
19
+ const CONTEXT_WINDOW_SIZE = 2000;
20
+
21
+ /** Characters of context to include in annotation results */
22
+ const CONTEXT_SNIPPET_SIZE = 50;
23
+
24
+ /** Maximum iterations for nested annotation stripping (safety limit) */
25
+ const MAX_STRIP_ITERATIONS = 20;
26
+
27
+ /** Maximum author name length in comments */
28
+ const MAX_AUTHOR_LENGTH = 30;
29
+
30
+ /** Maximum content length before heuristic assumes it's not a comment */
31
+ const MAX_COMMENT_CONTENT_LENGTH = 200;
32
+
33
+ // =============================================================================
34
+ // Patterns
35
+ // =============================================================================
36
+
37
+ // Patterns for each annotation type
38
+ const PATTERNS = {
39
+ insert: /\{\+\+(.+?)\+\+\}/gs,
40
+ delete: /\{--(.+?)--\}/gs,
41
+ substitute: /\{~~(.+?)~>(.+?)~~\}/gs,
42
+ comment: /\{>>(.+?)<<\}/gs,
43
+ highlight: /\{==(.+?)==\}/gs,
44
+ };
45
+
46
+ /**
47
+ * Check if a potential comment is actually a false positive
48
+ * (e.g., figure caption, nested inside other annotation, code block, etc.)
49
+ * @param commentContent - The content inside {>>...<<}
50
+ * @param fullText - The full document text
51
+ * @param position - Position of the comment in the text
52
+ * @returns true if this is a false positive (not a real comment)
53
+ */
54
+ function isCommentFalsePositive(commentContent: string, fullText: string, position: number): boolean {
55
+ // Check if inside a code block (fenced or indented)
56
+ const textBefore = fullText.slice(Math.max(0, position - CONTEXT_WINDOW_SIZE), position);
57
+ const textAfter = fullText.slice(position, Math.min(fullText.length, position + CONTEXT_WINDOW_SIZE));
58
+
59
+ // Count unclosed fenced code blocks (``` or ~~~)
60
+ const fenceOpens = (textBefore.match(/^```|^~~~/gm) || []).length;
61
+ const fenceCloses = (textBefore.match(/```$|~~~$/gm) || []).length;
62
+ if (fenceOpens > fenceCloses) return true; // Inside code block
63
+
64
+ // Check if on an indented line (4+ spaces or tab at line start = code)
65
+ const lineStart = textBefore.lastIndexOf('\n') + 1;
66
+ const linePrefix = fullText.slice(lineStart, position);
67
+ if (/^(\t| )/.test(linePrefix)) return true; // Indented code
68
+
69
+ // Check if inside inline code backticks
70
+ const backticksBefore = (linePrefix.match(/`/g) || []).length;
71
+ if (backticksBefore % 2 === 1) return true; // Inside inline code
72
+
73
+ // Check if nested inside a deletion or insertion block
74
+ const nearTextBefore = fullText.slice(Math.max(0, position - 500), position);
75
+
76
+ // Count unclosed deletion markers
77
+ const delOpens = (nearTextBefore.match(/\{--/g) || []).length;
78
+ const delCloses = (nearTextBefore.match(/--\}/g) || []).length;
79
+ if (delOpens > delCloses) return true; // Nested inside deletion
80
+
81
+ // Count unclosed insertion markers
82
+ const insOpens = (nearTextBefore.match(/\{\+\+/g) || []).length;
83
+ const insCloses = (nearTextBefore.match(/\+\+\}/g) || []).length;
84
+ if (insOpens > insCloses) return true; // Nested inside insertion
85
+
86
+ // Heuristics for figure captions and other false positives:
87
+
88
+ // Contains image/figure path patterns
89
+ if (/\(figures?\/|\(images?\/|\.png|\.jpg|\.jpeg|\.gif|\.svg|\.pdf/i.test(commentContent)) return true;
90
+
91
+ // Contains markdown figure reference syntax
92
+ if (/\{#fig:|!\[/.test(commentContent)) return true;
93
+
94
+ // Real comments typically have "Author:" at start. Accept hyphens, apostrophes,
95
+ // periods, and Unicode letters so names like "Jens-Christian Svenning" or
96
+ // "Camilla T Colding-Jørgensen" don't get rejected. See gcol33/docrev#1.
97
+ const hasAuthorPrefix = /^[\p{L}][\p{L}\s\-'.]{0,30}:\s/u.test(commentContent.trim());
98
+ const hasResolvedMark = /^[✓✔]\s/.test(commentContent.trim());
99
+
100
+ // Contains URL patterns (likely a link, not a comment) — only filter when
101
+ // there is no real author prefix, since reviewers legitimately cite URLs/DOIs.
102
+ if (!hasAuthorPrefix && /https?:\/\/|www\./i.test(commentContent) && commentContent.length < 150) return true;
103
+
104
+ // Looks like code (contains programming patterns)
105
+ if (/function\s*\(|=>|import\s+|export\s+|const\s+|let\s+|var\s+/.test(commentContent)) return true;
106
+
107
+ // Very long without clear author pattern (likely caption, not comment)
108
+ if (!hasAuthorPrefix && !hasResolvedMark && commentContent.length > MAX_COMMENT_CONTENT_LENGTH) return true;
109
+
110
+ // Looks like a figure caption (starts with "Fig" or contains typical caption words)
111
+ if (/^(Fig\.?|Figure|Table|Sankey|Diagram|Proportion|Distribution|Map|Chart|Graph|Plot|Panel)/i.test(commentContent.trim())) {
112
+ return true;
113
+ }
114
+
115
+ // Contains LaTeX-like patterns (likely equation, not comment)
116
+ if (/\\[a-z]+\{|\\frac|\\sum|\\int|\\begin\{/.test(commentContent)) return true;
117
+
118
+ // Looks like BibTeX entry (not a comment)
119
+ if (/@article\{|@book\{|@inproceedings\{/i.test(commentContent)) return true;
120
+
121
+ return false;
122
+ }
123
+
124
+ // Combined pattern for any track change (not comments)
125
+ const TRACK_CHANGE_PATTERN = /(\{\+\+.+?\+\+\}|\{--.+?--\}|\{~~.+?~>.+?~~\})/gs;
126
+
127
+ // =============================================================================
128
+ // Public API
129
+ // =============================================================================
130
+
131
+ /**
132
+ * Parse all annotations from text
133
+ * @param text - Markdown text containing CriticMarkup annotations
134
+ * @returns Array of parsed annotations sorted by position
135
+ * @throws TypeError If text is not a string
136
+ */
137
+ export function parseAnnotations(text: string): Annotation[] {
138
+ if (typeof text !== 'string') {
139
+ throw new TypeError(`text must be a string, got ${typeof text}`);
140
+ }
141
+
142
+ const annotations: Annotation[] = [];
143
+
144
+ // Build line number lookup
145
+ const lines = text.split('\n');
146
+ let pos = 0;
147
+ const lineStarts = lines.map((line) => {
148
+ const start = pos;
149
+ pos += line.length + 1;
150
+ return start;
151
+ });
152
+
153
+ function getLine(position: number): number {
154
+ for (let i = 0; i < lineStarts.length; i++) {
155
+ const start = lineStarts[i];
156
+ if (start !== undefined && start > position) return i;
157
+ }
158
+ return lineStarts.length;
159
+ }
160
+
161
+ function getContext(position: number, length: number): { before: string; after: string } {
162
+ const start = Math.max(0, position - CONTEXT_SNIPPET_SIZE);
163
+ const end = Math.min(text.length, position + length + CONTEXT_SNIPPET_SIZE);
164
+ const before = text.slice(start, position).split('\n').pop() || '';
165
+ const after = text.slice(position + length, end).split('\n')[0] || '';
166
+ return { before, after };
167
+ }
168
+
169
+ // Parse insertions
170
+ for (const match of text.matchAll(PATTERNS.insert)) {
171
+ if (match.index === undefined) continue;
172
+ const ctx = getContext(match.index, match[0].length);
173
+ annotations.push({
174
+ type: 'insert',
175
+ match: match[0],
176
+ content: match[1] || '',
177
+ position: match.index,
178
+ line: getLine(match.index),
179
+ ...ctx,
180
+ });
181
+ }
182
+
183
+ // Parse deletions
184
+ for (const match of text.matchAll(PATTERNS.delete)) {
185
+ if (match.index === undefined) continue;
186
+ const ctx = getContext(match.index, match[0].length);
187
+ annotations.push({
188
+ type: 'delete',
189
+ match: match[0],
190
+ content: match[1] || '',
191
+ position: match.index,
192
+ line: getLine(match.index),
193
+ ...ctx,
194
+ });
195
+ }
196
+
197
+ // Parse substitutions
198
+ for (const match of text.matchAll(PATTERNS.substitute)) {
199
+ if (match.index === undefined) continue;
200
+ const ctx = getContext(match.index, match[0].length);
201
+ annotations.push({
202
+ type: 'substitute',
203
+ match: match[0],
204
+ content: match[1] || '',
205
+ replacement: match[2] || '',
206
+ position: match.index,
207
+ line: getLine(match.index),
208
+ ...ctx,
209
+ });
210
+ }
211
+
212
+ // Parse comments (with false positive filtering)
213
+ for (const match of text.matchAll(PATTERNS.comment)) {
214
+ if (match.index === undefined) continue;
215
+ // Skip false positives (figure captions, nested annotations, etc.)
216
+ const commentContent = match[1] || '';
217
+ if (isCommentFalsePositive(commentContent, text, match.index)) {
218
+ continue;
219
+ }
220
+
221
+ const ctx = getContext(match.index, match[0].length);
222
+ let commentText = commentContent;
223
+ let author = '';
224
+
225
+ // Extract author if present (format: "Author: comment")
226
+ const colonIdx = commentText.indexOf(':');
227
+ if (colonIdx > 0 && colonIdx < MAX_AUTHOR_LENGTH) {
228
+ author = commentText.slice(0, colonIdx).trim();
229
+ commentText = commentText.slice(colonIdx + 1).trim();
230
+ }
231
+
232
+ annotations.push({
233
+ type: 'comment',
234
+ match: match[0],
235
+ content: commentText,
236
+ author,
237
+ position: match.index,
238
+ line: getLine(match.index),
239
+ ...ctx,
240
+ });
241
+ }
242
+
243
+ // Sort by position
244
+ annotations.sort((a, b) => a.position - b.position);
245
+ return annotations;
246
+ }
247
+
248
+ /**
249
+ * Strip annotations from text, applying changes
250
+ * Handles nested annotations by iterating until stable
251
+ * @param text - Markdown text with CriticMarkup annotations
252
+ * @param options - Strip options
253
+ * @returns Clean text with annotations applied/removed
254
+ * @throws TypeError If text is not a string
255
+ */
256
+ export function stripAnnotations(text: string, options: StripOptions = {}): string {
257
+ if (typeof text !== 'string') {
258
+ throw new TypeError(`text must be a string, got ${typeof text}`);
259
+ }
260
+
261
+ const { keepComments = false } = options;
262
+
263
+ // Iterate until no more changes (handles nested annotations)
264
+ let prev: string;
265
+ let iterations = 0;
266
+
267
+ do {
268
+ prev = text;
269
+
270
+ // Apply substitutions: {~~old~>new~~} → new
271
+ text = text.replace(PATTERNS.substitute, '$2');
272
+
273
+ // Apply insertions: {++text++} → text
274
+ text = text.replace(PATTERNS.insert, '$1');
275
+
276
+ // Apply deletions: {--text--} → nothing
277
+ // Don't touch surrounding whitespace - just remove the annotation
278
+ text = text.replace(PATTERNS.delete, '');
279
+
280
+ // Remove highlights: {==text==} → text
281
+ text = text.replace(PATTERNS.highlight, '$1');
282
+
283
+ // Remove comments unless keeping
284
+ if (!keepComments) {
285
+ text = text.replace(PATTERNS.comment, '');
286
+ }
287
+
288
+ // Strip pandoc highlight spans: [text]{.mark} → text.
289
+ // When `keepComments=true`, preserve `[anchor]{.mark}` that is the
290
+ // anchor of a kept `{>>...<<}` comment. The dual-build flow runs
291
+ // stripAnnotations() before prepareMarkdownWithMarkers(), and stripping
292
+ // the anchor span here would leave the marker generator with no anchor
293
+ // text — collapsing every multi-word anchor to a single fallback word
294
+ // in the rebuilt docx.
295
+ text = keepComments
296
+ ? text.replace(/(?<!<<\}\s{0,3})\[([^\]]*)\]\{\.mark\}/g, '$1')
297
+ : text.replace(/\[([^\]]*)\]\{\.mark\}/g, '$1');
298
+
299
+ // Clean up partial/orphaned markers within the loop
300
+ // This handles cases where nested annotations leave behind fragments
301
+
302
+ // Empty annotations (from nested stripping)
303
+ text = text.replace(/\{----\}/g, '');
304
+ text = text.replace(/\{\+\+\+\+\}/g, '');
305
+ text = text.replace(/\{--\s*--\}/g, '');
306
+ text = text.replace(/\{\+\+\s*\+\+\}/g, '');
307
+
308
+ // Orphaned substitution fragments: ~>text~~} or {~~text (no proper pairs)
309
+ text = text.replace(/~>[^{]*?~~\}/g, '');
310
+ text = text.replace(/\{~~[^~}]*$/gm, '');
311
+
312
+ // Handle malformed substitution from nested: {~~{~~old → just strip the {~~
313
+ text = text.replace(/\{~~\{~~/g, '{~~');
314
+ text = text.replace(/~~\}~~\}/g, '~~}');
315
+
316
+ iterations++;
317
+ } while (text !== prev && iterations < MAX_STRIP_ITERATIONS);
318
+
319
+ // Final cleanup of any remaining orphaned markers
320
+ // Orphaned closing markers
321
+ text = text.replace(/--\}(?:--\})+/g, '');
322
+ text = text.replace(/\+\+\}(?:\+\+\})+/g, '');
323
+ text = text.replace(/~~\}(?:~~\})+/g, '');
324
+ text = text.replace(/--\}/g, '');
325
+ text = text.replace(/\+\+\}/g, '');
326
+ text = text.replace(/~~\}/g, '');
327
+
328
+ // Orphaned opening markers
329
+ text = text.replace(/\{--(?:\{--)+/g, '');
330
+ text = text.replace(/\{\+\+(?:\{\+\+)+/g, '');
331
+ text = text.replace(/\{~~(?:\{~~)+/g, '');
332
+ text = text.replace(/\{--/g, '');
333
+ text = text.replace(/\{\+\+/g, '');
334
+ text = text.replace(/\{~~/g, '');
335
+ text = text.replace(/~>/g, '');
336
+
337
+ // Remove orphan [ from stripped {.mark} spans where the closing ]{.mark}
338
+ // was inside a comment. A [ is orphan if no `]` follows before end of line.
339
+ // We deliberately allow other `[` between the candidate and the matching `]`
340
+ // — otherwise nested forms like `[[0..9]]{.mark}` would have their outer
341
+ // `[` stripped because the lookahead saw the inner `[` as a barrier.
342
+ text = text.replace(/\[(?![^\]\n]*\])/g, '');
343
+
344
+ return text;
345
+ }
346
+
347
+ /**
348
+ * Collapse multiple spaces to single space, preserving table formatting
349
+ * Useful for cleaning up messy Word imports
350
+ * @param text - Text to normalize
351
+ * @returns Text with multiple spaces collapsed to single spaces
352
+ * @throws TypeError If text is not a string
353
+ */
354
+ export function stripToSingleSpace(text: string): string {
355
+ if (typeof text !== 'string') {
356
+ throw new TypeError(`text must be a string, got ${typeof text}`);
357
+ }
358
+
359
+ const lines = text.split('\n');
360
+ let inTable = false;
361
+
362
+ // Helper to check if a line looks like table content
363
+ const looksLikeTableRow = (ln: string): boolean => {
364
+ const trimmed = ln.trim();
365
+ if (!trimmed) return false;
366
+ // Has multiple consecutive spaces (column spacing)
367
+ // OR italicized category header with trailing spaces
368
+ return /\S\s{2,}\S/.test(trimmed) || (/^\*[^*]+\*\s*$/.test(trimmed) && /\s{2,}$/.test(ln));
369
+ };
370
+
371
+ for (let i = 0; i < lines.length; i++) {
372
+ const line = lines[i];
373
+ if (!line) continue;
374
+
375
+ // Detect table separator line
376
+ const isTableSeparator = /^\|?[\s-]*[-]{3,}[\s|:-]+[-]{3,}/.test(line) ||
377
+ /^[-]{3,}\s{2,}[-]{3,}/.test(line);
378
+
379
+ if (isTableSeparator) {
380
+ inTable = true;
381
+ continue;
382
+ }
383
+
384
+ // Check if we're exiting the table
385
+ if (inTable && line.trim() === '') {
386
+ let nextContentLine = '';
387
+ for (let j = i + 1; j < lines.length && j < i + 5; j++) {
388
+ const nextLine = lines[j];
389
+ if (nextLine && nextLine.trim() !== '') {
390
+ nextContentLine = nextLine;
391
+ break;
392
+ }
393
+ }
394
+ if (!looksLikeTableRow(nextContentLine) && !/^[-]{3,}/.test(nextContentLine.trim())) {
395
+ inTable = false;
396
+ }
397
+ continue;
398
+ }
399
+
400
+ // Only collapse spaces outside tables
401
+ if (!inTable) {
402
+ lines[i] = line.replace(/ +/g, ' ');
403
+ }
404
+ }
405
+
406
+ return lines.join('\n');
407
+ }
408
+
409
+ /**
410
+ * Check if text contains any CriticMarkup annotations
411
+ * @param text - Text to check
412
+ * @returns True if text contains any annotations
413
+ * @throws TypeError If text is not a string
414
+ */
415
+ export function hasAnnotations(text: string): boolean {
416
+ if (typeof text !== 'string') {
417
+ throw new TypeError(`text must be a string, got ${typeof text}`);
418
+ }
419
+
420
+ return PATTERNS.insert.test(text) ||
421
+ PATTERNS.delete.test(text) ||
422
+ PATTERNS.substitute.test(text) ||
423
+ PATTERNS.comment.test(text) ||
424
+ PATTERNS.highlight.test(text);
425
+ }
426
+
427
+ /**
428
+ * Apply a decision to a single annotation (accept or reject)
429
+ * @param text - Document text containing the annotation
430
+ * @param annotation - Annotation object from parseAnnotations()
431
+ * @param accept - True to accept the change, false to reject
432
+ * @returns Updated text with the decision applied
433
+ * @throws TypeError If text is not a string or annotation is invalid
434
+ */
435
+ export function applyDecision(text: string, annotation: Annotation, accept: boolean): string {
436
+ if (typeof text !== 'string') {
437
+ throw new TypeError(`text must be a string, got ${typeof text}`);
438
+ }
439
+ if (!annotation || typeof annotation.type !== 'string' || typeof annotation.match !== 'string') {
440
+ throw new TypeError('annotation must have type and match properties');
441
+ }
442
+ let replacement: string;
443
+
444
+ // Extract any comments embedded in the annotation content
445
+ // These should be preserved when accepting deletions or rejecting insertions
446
+ const commentPattern = /\{>>[\s\S]*?<<\}/g;
447
+ const embeddedComments = (annotation.match || '').match(commentPattern) || [];
448
+
449
+ switch (annotation.type) {
450
+ case 'insert':
451
+ if (accept) {
452
+ replacement = annotation.content;
453
+ } else {
454
+ // Rejecting insertion - preserve any comments that were inside
455
+ replacement = embeddedComments.length > 0 ? embeddedComments.join('') : '';
456
+ }
457
+ break;
458
+ case 'delete':
459
+ if (accept) {
460
+ // Accepting deletion - preserve any comments by placing them before
461
+ replacement = embeddedComments.length > 0 ? embeddedComments.join('') : '';
462
+ } else {
463
+ replacement = annotation.content;
464
+ }
465
+ break;
466
+ case 'substitute':
467
+ if (accept) {
468
+ // For substitutions, check if comments are in the old text being replaced
469
+ const oldTextComments = (annotation.content || '').match(commentPattern) || [];
470
+ replacement = annotation.replacement || '';
471
+ if (oldTextComments.length > 0) {
472
+ // Prepend comments that were in the old text
473
+ replacement = oldTextComments.join('') + replacement;
474
+ }
475
+ } else {
476
+ replacement = annotation.content;
477
+ }
478
+ break;
479
+ default:
480
+ return text;
481
+ }
482
+
483
+ return text.replace(annotation.match, replacement);
484
+ }
485
+
486
+ /**
487
+ * Get track changes only (no comments)
488
+ * @param text - Markdown text with CriticMarkup annotations
489
+ * @returns Array of insert/delete/substitute annotations
490
+ * @throws TypeError If text is not a string
491
+ */
492
+ export function getTrackChanges(text: string): Annotation[] {
493
+ // Input validation delegated to parseAnnotations
494
+ return parseAnnotations(text).filter((a) => a.type !== 'comment');
495
+ }
496
+
497
+ /**
498
+ * Get comments only
499
+ * @param text - Markdown text with CriticMarkup annotations
500
+ * @param options - Filter options
501
+ * @returns Array of comment annotations
502
+ * @throws TypeError If text is not a string
503
+ */
504
+ export function getComments(text: string, options: CommentFilterOptions = {}): Annotation[] {
505
+ // Input validation delegated to parseAnnotations
506
+ const { pendingOnly = false, resolvedOnly = false } = options;
507
+ let comments = parseAnnotations(text).filter((a) => a.type === 'comment');
508
+
509
+ // Check for resolved status marker at end of comment
510
+ comments = comments.map((c) => {
511
+ const resolved = c.content.endsWith('[RESOLVED]') || c.content.endsWith('[✓]');
512
+ return {
513
+ ...c,
514
+ resolved,
515
+ content: resolved
516
+ ? c.content.replace(/\s*\[(RESOLVED|✓)\]$/, '').trim()
517
+ : c.content,
518
+ };
519
+ });
520
+
521
+ if (pendingOnly) {
522
+ comments = comments.filter((c) => !c.resolved);
523
+ }
524
+ if (resolvedOnly) {
525
+ comments = comments.filter((c) => c.resolved);
526
+ }
527
+
528
+ return comments;
529
+ }
530
+
531
+ /**
532
+ * Mark a comment as resolved or pending
533
+ * @param text - Document text containing the comment
534
+ * @param comment - Comment annotation object from getComments()
535
+ * @param resolved - True to mark resolved, false to mark pending
536
+ * @returns Updated text with status marker applied
537
+ * @throws TypeError If text is not a string or comment is invalid
538
+ */
539
+ export function setCommentStatus(text: string, comment: Annotation, resolved: boolean): string {
540
+ if (typeof text !== 'string') {
541
+ throw new TypeError(`text must be a string, got ${typeof text}`);
542
+ }
543
+ if (!comment || typeof comment.match !== 'string') {
544
+ throw new TypeError('comment must have a match property');
545
+ }
546
+ // Find the comment in the text
547
+ const originalMatch = comment.match;
548
+
549
+ if (resolved) {
550
+ // Add [RESOLVED] marker before the closing <<
551
+ const newMatch = originalMatch.replace(/<<\}$/, ' [RESOLVED]<<}');
552
+ return text.replace(originalMatch, newMatch);
553
+ } else {
554
+ // Remove resolved markers
555
+ const newMatch = originalMatch.replace(/\s*\[(RESOLVED|✓)\]<<\}$/, '<<}');
556
+ return text.replace(originalMatch, newMatch);
557
+ }
558
+ }
559
+
560
+ /**
561
+ * Count annotations by type
562
+ * @param text - Markdown text with CriticMarkup annotations
563
+ * @returns Counts by annotation type
564
+ * @throws TypeError If text is not a string
565
+ */
566
+ export function countAnnotations(text: string): AnnotationCounts {
567
+ // Input validation delegated to parseAnnotations
568
+ const annotations = parseAnnotations(text);
569
+ const counts: AnnotationCounts = { inserts: 0, deletes: 0, substitutes: 0, comments: 0, total: 0 };
570
+
571
+ for (const a of annotations) {
572
+ counts.total++;
573
+ switch (a.type) {
574
+ case 'insert':
575
+ counts.inserts++;
576
+ break;
577
+ case 'delete':
578
+ counts.deletes++;
579
+ break;
580
+ case 'substitute':
581
+ counts.substitutes++;
582
+ break;
583
+ case 'comment':
584
+ counts.comments++;
585
+ break;
586
+ }
587
+ }
588
+
589
+ return counts;
590
+ }
591
+
592
+ /**
593
+ * Clean up orphaned/malformed CriticMarkup markers
594
+ * This can happen when track changes span across comment boundaries
595
+ * @param text - Document text with potentially malformed markers
596
+ * @returns Cleaned text with orphaned markers removed
597
+ * @throws TypeError If text is not a string
598
+ */
599
+ export function cleanupOrphanedMarkers(text: string): string {
600
+ if (typeof text !== 'string') {
601
+ throw new TypeError(`text must be a string, got ${typeof text}`);
602
+ }
603
+ let result = text;
604
+
605
+ // Remove orphaned insertion end markers (++} not preceded by {++)
606
+ // These occur when an insertion's start was inside something that got deleted/replaced
607
+ result = result.replace(/(?<!\{\+\+[^}]*)\+\+\}/g, '');
608
+
609
+ // Remove orphaned deletion end markers (--} not preceded by {--)
610
+ result = result.replace(/(?<!\{--[^}]*)--\}/g, '');
611
+
612
+ // Remove orphaned substitution end markers (~~} not preceded by {~~)
613
+ result = result.replace(/(?<!\{~~[^}]*)~~\}/g, '');
614
+
615
+ // Fix unclosed insertions: {++ without matching ++}
616
+ // Find {++ and check if there's a matching ++} before the next { marker
617
+ result = result.replace(/\{\+\+([^+]*?)(?=\{[+\-~>]|\{>>|$)/g, (match, content) => {
618
+ // If content has no ++}, it's unclosed - just keep the content
619
+ if (!content.includes('++}')) {
620
+ return content;
621
+ }
622
+ return match;
623
+ });
624
+
625
+ // Fix unclosed deletions: {-- without matching --}
626
+ result = result.replace(/\{--([^-]*?)(?=\{[+\-~>]|\{>>|$)/g, (match, content) => {
627
+ if (!content.includes('--}')) {
628
+ return content;
629
+ }
630
+ return match;
631
+ });
632
+
633
+ // Fix unclosed substitutions: {~~ without matching ~~}
634
+ // This is trickier because we need both ~> and ~~}
635
+ result = result.replace(/\{~~([^~]*?)~>([^~]*?)(?=\{[+\-~>]|\{>>|$)/g, (match, old, newText) => {
636
+ if (!match.includes('~~}')) {
637
+ // Unclosed substitution - keep the new text
638
+ return newText;
639
+ }
640
+ return match;
641
+ });
642
+
643
+ return result;
644
+ }