docrev 0.9.18 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/.gitattributes +1 -1
  2. package/CHANGELOG.md +173 -149
  3. package/PLAN-tables-and-postprocess.md +850 -850
  4. package/README.md +431 -406
  5. package/bin/rev.js +11 -11
  6. package/bin/rev.ts +145 -145
  7. package/completions/rev.bash +127 -127
  8. package/completions/rev.ps1 +210 -210
  9. package/completions/rev.zsh +207 -207
  10. package/dist/lib/build.d.ts +8 -0
  11. package/dist/lib/build.d.ts.map +1 -1
  12. package/dist/lib/build.js +62 -6
  13. package/dist/lib/build.js.map +1 -1
  14. package/dist/lib/commands/context.d.ts +1 -1
  15. package/dist/lib/commands/context.d.ts.map +1 -1
  16. package/dist/lib/commands/context.js +1 -1
  17. package/dist/lib/commands/context.js.map +1 -1
  18. package/dist/lib/commands/sections.js +7 -7
  19. package/dist/lib/commands/sections.js.map +1 -1
  20. package/dist/lib/commands/sync.d.ts.map +1 -1
  21. package/dist/lib/commands/sync.js +15 -14
  22. package/dist/lib/commands/sync.js.map +1 -1
  23. package/dist/lib/commands/utilities.js +164 -164
  24. package/dist/lib/commands/verify-anchors.js +6 -6
  25. package/dist/lib/commands/verify-anchors.js.map +1 -1
  26. package/dist/lib/commands/word-tools.js +8 -8
  27. package/dist/lib/grammar.js +3 -3
  28. package/dist/lib/macro-filter.lua +201 -0
  29. package/dist/lib/macros.d.ts +102 -0
  30. package/dist/lib/macros.d.ts.map +1 -0
  31. package/dist/lib/macros.js +218 -0
  32. package/dist/lib/macros.js.map +1 -0
  33. package/dist/lib/pdf-comments.js +44 -44
  34. package/dist/lib/plugins.js +57 -57
  35. package/dist/lib/pptx-color-filter.lua +37 -0
  36. package/dist/lib/pptx-themes.js +115 -115
  37. package/dist/lib/schema.d.ts.map +1 -1
  38. package/dist/lib/schema.js +34 -0
  39. package/dist/lib/schema.js.map +1 -1
  40. package/dist/lib/sections.d.ts +35 -0
  41. package/dist/lib/sections.d.ts.map +1 -1
  42. package/dist/lib/sections.js +81 -0
  43. package/dist/lib/sections.js.map +1 -1
  44. package/dist/lib/spelling.js +2 -2
  45. package/dist/lib/templates.js +387 -387
  46. package/dist/lib/themes.js +51 -51
  47. package/eslint.config.js +27 -27
  48. package/lib/anchor-match.ts +276 -276
  49. package/lib/annotations.ts +644 -644
  50. package/lib/build.ts +1766 -1694
  51. package/lib/citations.ts +160 -160
  52. package/lib/commands/build.ts +855 -855
  53. package/lib/commands/citations.ts +515 -515
  54. package/lib/commands/comments.ts +1050 -1050
  55. package/lib/commands/context.ts +176 -174
  56. package/lib/commands/core.ts +309 -309
  57. package/lib/commands/doi.ts +435 -435
  58. package/lib/commands/file-ops.ts +372 -372
  59. package/lib/commands/history.ts +320 -320
  60. package/lib/commands/index.ts +87 -87
  61. package/lib/commands/init.ts +259 -259
  62. package/lib/commands/merge-resolve.ts +378 -378
  63. package/lib/commands/preview.ts +178 -178
  64. package/lib/commands/project-info.ts +244 -244
  65. package/lib/commands/quality.ts +517 -517
  66. package/lib/commands/response.ts +454 -454
  67. package/lib/commands/section-boundaries.ts +82 -82
  68. package/lib/commands/sections.ts +451 -451
  69. package/lib/commands/sync.ts +709 -706
  70. package/lib/commands/text-ops.ts +449 -449
  71. package/lib/commands/utilities.ts +448 -448
  72. package/lib/commands/verify-anchors.ts +272 -272
  73. package/lib/commands/word-tools.ts +340 -340
  74. package/lib/comment-realign.ts +517 -517
  75. package/lib/config.ts +84 -84
  76. package/lib/crossref.ts +781 -781
  77. package/lib/csl.ts +191 -191
  78. package/lib/dependencies.ts +98 -98
  79. package/lib/diff-engine.ts +465 -465
  80. package/lib/doi-cache.ts +115 -115
  81. package/lib/doi.ts +897 -897
  82. package/lib/equations.ts +506 -506
  83. package/lib/errors.ts +346 -346
  84. package/lib/format.ts +541 -541
  85. package/lib/git.ts +326 -326
  86. package/lib/grammar.ts +303 -303
  87. package/lib/image-registry.ts +180 -180
  88. package/lib/import.ts +911 -911
  89. package/lib/journals.ts +543 -543
  90. package/lib/macro-filter.lua +201 -0
  91. package/lib/macros.ts +273 -0
  92. package/lib/merge.ts +633 -633
  93. package/lib/orcid.ts +144 -144
  94. package/lib/pdf-comments.ts +263 -263
  95. package/lib/pdf-import.ts +524 -524
  96. package/lib/plugins.ts +362 -362
  97. package/lib/postprocess.ts +188 -188
  98. package/lib/pptx-color-filter.lua +37 -37
  99. package/lib/pptx-template.ts +469 -469
  100. package/lib/pptx-themes.ts +483 -483
  101. package/lib/protect-restore.ts +520 -520
  102. package/lib/rate-limiter.ts +94 -94
  103. package/lib/response.ts +197 -197
  104. package/lib/restore-references.ts +240 -240
  105. package/lib/review.ts +327 -327
  106. package/lib/schema.ts +488 -454
  107. package/lib/scientific-words.ts +73 -73
  108. package/lib/sections.ts +425 -335
  109. package/lib/slides.ts +756 -756
  110. package/lib/spelling.ts +334 -334
  111. package/lib/templates.ts +526 -526
  112. package/lib/themes.ts +742 -742
  113. package/lib/trackchanges.ts +247 -247
  114. package/lib/tui.ts +450 -450
  115. package/lib/types.ts +550 -550
  116. package/lib/undo.ts +250 -250
  117. package/lib/utils.ts +69 -69
  118. package/lib/variables.ts +179 -179
  119. package/lib/word-extraction.ts +806 -806
  120. package/lib/word.ts +643 -643
  121. package/lib/wordcomments.ts +840 -840
  122. package/package.json +137 -137
  123. package/scripts/postbuild.js +47 -28
  124. package/skill/REFERENCE.md +539 -539
  125. package/skill/SKILL.md +295 -295
  126. package/tsconfig.json +26 -26
  127. package/types/index.d.ts +525 -525
  128. package/issues.md +0 -180
  129. package/site/assets/extra.css +0 -208
  130. package/site/commands.html +0 -926
  131. package/site/configuration.html +0 -469
  132. package/site/index.html +0 -288
  133. package/site/troubleshooting.html +0 -461
  134. package/site/workflow.html +0 -518
@@ -1,644 +1,644 @@
1
- /**
2
- * CriticMarkup annotation parsing and manipulation
3
- *
4
- * Syntax:
5
- * {++inserted text++} - Insertions
6
- * {--deleted text--} - Deletions
7
- * {~~old~>new~~} - Substitutions
8
- * {>>Author: comment<<} - Comments
9
- * {==text==} - Highlights
10
- */
11
-
12
- import type { Annotation, AnnotationCounts, StripOptions, CommentFilterOptions } from './types.js';
13
-
14
- // =============================================================================
15
- // Constants
16
- // =============================================================================
17
-
18
- /** Window size for context lookup (characters before/after position) */
19
- const CONTEXT_WINDOW_SIZE = 2000;
20
-
21
- /** Characters of context to include in annotation results */
22
- const CONTEXT_SNIPPET_SIZE = 50;
23
-
24
- /** Maximum iterations for nested annotation stripping (safety limit) */
25
- const MAX_STRIP_ITERATIONS = 20;
26
-
27
- /** Maximum author name length in comments */
28
- const MAX_AUTHOR_LENGTH = 30;
29
-
30
- /** Maximum content length before heuristic assumes it's not a comment */
31
- const MAX_COMMENT_CONTENT_LENGTH = 200;
32
-
33
- // =============================================================================
34
- // Patterns
35
- // =============================================================================
36
-
37
- // Patterns for each annotation type
38
- const PATTERNS = {
39
- insert: /\{\+\+(.+?)\+\+\}/gs,
40
- delete: /\{--(.+?)--\}/gs,
41
- substitute: /\{~~(.+?)~>(.+?)~~\}/gs,
42
- comment: /\{>>(.+?)<<\}/gs,
43
- highlight: /\{==(.+?)==\}/gs,
44
- };
45
-
46
- /**
47
- * Check if a potential comment is actually a false positive
48
- * (e.g., figure caption, nested inside other annotation, code block, etc.)
49
- * @param commentContent - The content inside {>>...<<}
50
- * @param fullText - The full document text
51
- * @param position - Position of the comment in the text
52
- * @returns true if this is a false positive (not a real comment)
53
- */
54
- function isCommentFalsePositive(commentContent: string, fullText: string, position: number): boolean {
55
- // Check if inside a code block (fenced or indented)
56
- const textBefore = fullText.slice(Math.max(0, position - CONTEXT_WINDOW_SIZE), position);
57
- const textAfter = fullText.slice(position, Math.min(fullText.length, position + CONTEXT_WINDOW_SIZE));
58
-
59
- // Count unclosed fenced code blocks (``` or ~~~)
60
- const fenceOpens = (textBefore.match(/^```|^~~~/gm) || []).length;
61
- const fenceCloses = (textBefore.match(/```$|~~~$/gm) || []).length;
62
- if (fenceOpens > fenceCloses) return true; // Inside code block
63
-
64
- // Check if on an indented line (4+ spaces or tab at line start = code)
65
- const lineStart = textBefore.lastIndexOf('\n') + 1;
66
- const linePrefix = fullText.slice(lineStart, position);
67
- if (/^(\t| )/.test(linePrefix)) return true; // Indented code
68
-
69
- // Check if inside inline code backticks
70
- const backticksBefore = (linePrefix.match(/`/g) || []).length;
71
- if (backticksBefore % 2 === 1) return true; // Inside inline code
72
-
73
- // Check if nested inside a deletion or insertion block
74
- const nearTextBefore = fullText.slice(Math.max(0, position - 500), position);
75
-
76
- // Count unclosed deletion markers
77
- const delOpens = (nearTextBefore.match(/\{--/g) || []).length;
78
- const delCloses = (nearTextBefore.match(/--\}/g) || []).length;
79
- if (delOpens > delCloses) return true; // Nested inside deletion
80
-
81
- // Count unclosed insertion markers
82
- const insOpens = (nearTextBefore.match(/\{\+\+/g) || []).length;
83
- const insCloses = (nearTextBefore.match(/\+\+\}/g) || []).length;
84
- if (insOpens > insCloses) return true; // Nested inside insertion
85
-
86
- // Heuristics for figure captions and other false positives:
87
-
88
- // Contains image/figure path patterns
89
- if (/\(figures?\/|\(images?\/|\.png|\.jpg|\.jpeg|\.gif|\.svg|\.pdf/i.test(commentContent)) return true;
90
-
91
- // Contains markdown figure reference syntax
92
- if (/\{#fig:|!\[/.test(commentContent)) return true;
93
-
94
- // Real comments typically have "Author:" at start. Accept hyphens, apostrophes,
95
- // periods, and Unicode letters so names like "Jens-Christian Svenning" or
96
- // "Camilla T Colding-Jørgensen" don't get rejected. See gcol33/docrev#1.
97
- const hasAuthorPrefix = /^[\p{L}][\p{L}\s\-'.]{0,30}:\s/u.test(commentContent.trim());
98
- const hasResolvedMark = /^[✓✔]\s/.test(commentContent.trim());
99
-
100
- // Contains URL patterns (likely a link, not a comment) — only filter when
101
- // there is no real author prefix, since reviewers legitimately cite URLs/DOIs.
102
- if (!hasAuthorPrefix && /https?:\/\/|www\./i.test(commentContent) && commentContent.length < 150) return true;
103
-
104
- // Looks like code (contains programming patterns)
105
- if (/function\s*\(|=>|import\s+|export\s+|const\s+|let\s+|var\s+/.test(commentContent)) return true;
106
-
107
- // Very long without clear author pattern (likely caption, not comment)
108
- if (!hasAuthorPrefix && !hasResolvedMark && commentContent.length > MAX_COMMENT_CONTENT_LENGTH) return true;
109
-
110
- // Looks like a figure caption (starts with "Fig" or contains typical caption words)
111
- if (/^(Fig\.?|Figure|Table|Sankey|Diagram|Proportion|Distribution|Map|Chart|Graph|Plot|Panel)/i.test(commentContent.trim())) {
112
- return true;
113
- }
114
-
115
- // Contains LaTeX-like patterns (likely equation, not comment)
116
- if (/\\[a-z]+\{|\\frac|\\sum|\\int|\\begin\{/.test(commentContent)) return true;
117
-
118
- // Looks like BibTeX entry (not a comment)
119
- if (/@article\{|@book\{|@inproceedings\{/i.test(commentContent)) return true;
120
-
121
- return false;
122
- }
123
-
124
- // Combined pattern for any track change (not comments)
125
- const TRACK_CHANGE_PATTERN = /(\{\+\+.+?\+\+\}|\{--.+?--\}|\{~~.+?~>.+?~~\})/gs;
126
-
127
- // =============================================================================
128
- // Public API
129
- // =============================================================================
130
-
131
- /**
132
- * Parse all annotations from text
133
- * @param text - Markdown text containing CriticMarkup annotations
134
- * @returns Array of parsed annotations sorted by position
135
- * @throws TypeError If text is not a string
136
- */
137
- export function parseAnnotations(text: string): Annotation[] {
138
- if (typeof text !== 'string') {
139
- throw new TypeError(`text must be a string, got ${typeof text}`);
140
- }
141
-
142
- const annotations: Annotation[] = [];
143
-
144
- // Build line number lookup
145
- const lines = text.split('\n');
146
- let pos = 0;
147
- const lineStarts = lines.map((line) => {
148
- const start = pos;
149
- pos += line.length + 1;
150
- return start;
151
- });
152
-
153
- function getLine(position: number): number {
154
- for (let i = 0; i < lineStarts.length; i++) {
155
- const start = lineStarts[i];
156
- if (start !== undefined && start > position) return i;
157
- }
158
- return lineStarts.length;
159
- }
160
-
161
- function getContext(position: number, length: number): { before: string; after: string } {
162
- const start = Math.max(0, position - CONTEXT_SNIPPET_SIZE);
163
- const end = Math.min(text.length, position + length + CONTEXT_SNIPPET_SIZE);
164
- const before = text.slice(start, position).split('\n').pop() || '';
165
- const after = text.slice(position + length, end).split('\n')[0] || '';
166
- return { before, after };
167
- }
168
-
169
- // Parse insertions
170
- for (const match of text.matchAll(PATTERNS.insert)) {
171
- if (match.index === undefined) continue;
172
- const ctx = getContext(match.index, match[0].length);
173
- annotations.push({
174
- type: 'insert',
175
- match: match[0],
176
- content: match[1] || '',
177
- position: match.index,
178
- line: getLine(match.index),
179
- ...ctx,
180
- });
181
- }
182
-
183
- // Parse deletions
184
- for (const match of text.matchAll(PATTERNS.delete)) {
185
- if (match.index === undefined) continue;
186
- const ctx = getContext(match.index, match[0].length);
187
- annotations.push({
188
- type: 'delete',
189
- match: match[0],
190
- content: match[1] || '',
191
- position: match.index,
192
- line: getLine(match.index),
193
- ...ctx,
194
- });
195
- }
196
-
197
- // Parse substitutions
198
- for (const match of text.matchAll(PATTERNS.substitute)) {
199
- if (match.index === undefined) continue;
200
- const ctx = getContext(match.index, match[0].length);
201
- annotations.push({
202
- type: 'substitute',
203
- match: match[0],
204
- content: match[1] || '',
205
- replacement: match[2] || '',
206
- position: match.index,
207
- line: getLine(match.index),
208
- ...ctx,
209
- });
210
- }
211
-
212
- // Parse comments (with false positive filtering)
213
- for (const match of text.matchAll(PATTERNS.comment)) {
214
- if (match.index === undefined) continue;
215
- // Skip false positives (figure captions, nested annotations, etc.)
216
- const commentContent = match[1] || '';
217
- if (isCommentFalsePositive(commentContent, text, match.index)) {
218
- continue;
219
- }
220
-
221
- const ctx = getContext(match.index, match[0].length);
222
- let commentText = commentContent;
223
- let author = '';
224
-
225
- // Extract author if present (format: "Author: comment")
226
- const colonIdx = commentText.indexOf(':');
227
- if (colonIdx > 0 && colonIdx < MAX_AUTHOR_LENGTH) {
228
- author = commentText.slice(0, colonIdx).trim();
229
- commentText = commentText.slice(colonIdx + 1).trim();
230
- }
231
-
232
- annotations.push({
233
- type: 'comment',
234
- match: match[0],
235
- content: commentText,
236
- author,
237
- position: match.index,
238
- line: getLine(match.index),
239
- ...ctx,
240
- });
241
- }
242
-
243
- // Sort by position
244
- annotations.sort((a, b) => a.position - b.position);
245
- return annotations;
246
- }
247
-
248
- /**
249
- * Strip annotations from text, applying changes
250
- * Handles nested annotations by iterating until stable
251
- * @param text - Markdown text with CriticMarkup annotations
252
- * @param options - Strip options
253
- * @returns Clean text with annotations applied/removed
254
- * @throws TypeError If text is not a string
255
- */
256
- export function stripAnnotations(text: string, options: StripOptions = {}): string {
257
- if (typeof text !== 'string') {
258
- throw new TypeError(`text must be a string, got ${typeof text}`);
259
- }
260
-
261
- const { keepComments = false } = options;
262
-
263
- // Iterate until no more changes (handles nested annotations)
264
- let prev: string;
265
- let iterations = 0;
266
-
267
- do {
268
- prev = text;
269
-
270
- // Apply substitutions: {~~old~>new~~} → new
271
- text = text.replace(PATTERNS.substitute, '$2');
272
-
273
- // Apply insertions: {++text++} → text
274
- text = text.replace(PATTERNS.insert, '$1');
275
-
276
- // Apply deletions: {--text--} → nothing
277
- // Don't touch surrounding whitespace - just remove the annotation
278
- text = text.replace(PATTERNS.delete, '');
279
-
280
- // Remove highlights: {==text==} → text
281
- text = text.replace(PATTERNS.highlight, '$1');
282
-
283
- // Remove comments unless keeping
284
- if (!keepComments) {
285
- text = text.replace(PATTERNS.comment, '');
286
- }
287
-
288
- // Strip pandoc highlight spans: [text]{.mark} → text.
289
- // When `keepComments=true`, preserve `[anchor]{.mark}` that is the
290
- // anchor of a kept `{>>...<<}` comment. The dual-build flow runs
291
- // stripAnnotations() before prepareMarkdownWithMarkers(), and stripping
292
- // the anchor span here would leave the marker generator with no anchor
293
- // text — collapsing every multi-word anchor to a single fallback word
294
- // in the rebuilt docx.
295
- text = keepComments
296
- ? text.replace(/(?<!<<\}\s{0,3})\[([^\]]*)\]\{\.mark\}/g, '$1')
297
- : text.replace(/\[([^\]]*)\]\{\.mark\}/g, '$1');
298
-
299
- // Clean up partial/orphaned markers within the loop
300
- // This handles cases where nested annotations leave behind fragments
301
-
302
- // Empty annotations (from nested stripping)
303
- text = text.replace(/\{----\}/g, '');
304
- text = text.replace(/\{\+\+\+\+\}/g, '');
305
- text = text.replace(/\{--\s*--\}/g, '');
306
- text = text.replace(/\{\+\+\s*\+\+\}/g, '');
307
-
308
- // Orphaned substitution fragments: ~>text~~} or {~~text (no proper pairs)
309
- text = text.replace(/~>[^{]*?~~\}/g, '');
310
- text = text.replace(/\{~~[^~}]*$/gm, '');
311
-
312
- // Handle malformed substitution from nested: {~~{~~old → just strip the {~~
313
- text = text.replace(/\{~~\{~~/g, '{~~');
314
- text = text.replace(/~~\}~~\}/g, '~~}');
315
-
316
- iterations++;
317
- } while (text !== prev && iterations < MAX_STRIP_ITERATIONS);
318
-
319
- // Final cleanup of any remaining orphaned markers
320
- // Orphaned closing markers
321
- text = text.replace(/--\}(?:--\})+/g, '');
322
- text = text.replace(/\+\+\}(?:\+\+\})+/g, '');
323
- text = text.replace(/~~\}(?:~~\})+/g, '');
324
- text = text.replace(/--\}/g, '');
325
- text = text.replace(/\+\+\}/g, '');
326
- text = text.replace(/~~\}/g, '');
327
-
328
- // Orphaned opening markers
329
- text = text.replace(/\{--(?:\{--)+/g, '');
330
- text = text.replace(/\{\+\+(?:\{\+\+)+/g, '');
331
- text = text.replace(/\{~~(?:\{~~)+/g, '');
332
- text = text.replace(/\{--/g, '');
333
- text = text.replace(/\{\+\+/g, '');
334
- text = text.replace(/\{~~/g, '');
335
- text = text.replace(/~>/g, '');
336
-
337
- // Remove orphan [ from stripped {.mark} spans where the closing ]{.mark}
338
- // was inside a comment. A [ is orphan if no `]` follows before end of line.
339
- // We deliberately allow other `[` between the candidate and the matching `]`
340
- // — otherwise nested forms like `[[0..9]]{.mark}` would have their outer
341
- // `[` stripped because the lookahead saw the inner `[` as a barrier.
342
- text = text.replace(/\[(?![^\]\n]*\])/g, '');
343
-
344
- return text;
345
- }
346
-
347
- /**
348
- * Collapse multiple spaces to single space, preserving table formatting
349
- * Useful for cleaning up messy Word imports
350
- * @param text - Text to normalize
351
- * @returns Text with multiple spaces collapsed to single spaces
352
- * @throws TypeError If text is not a string
353
- */
354
- export function stripToSingleSpace(text: string): string {
355
- if (typeof text !== 'string') {
356
- throw new TypeError(`text must be a string, got ${typeof text}`);
357
- }
358
-
359
- const lines = text.split('\n');
360
- let inTable = false;
361
-
362
- // Helper to check if a line looks like table content
363
- const looksLikeTableRow = (ln: string): boolean => {
364
- const trimmed = ln.trim();
365
- if (!trimmed) return false;
366
- // Has multiple consecutive spaces (column spacing)
367
- // OR italicized category header with trailing spaces
368
- return /\S\s{2,}\S/.test(trimmed) || (/^\*[^*]+\*\s*$/.test(trimmed) && /\s{2,}$/.test(ln));
369
- };
370
-
371
- for (let i = 0; i < lines.length; i++) {
372
- const line = lines[i];
373
- if (!line) continue;
374
-
375
- // Detect table separator line
376
- const isTableSeparator = /^\|?[\s-]*[-]{3,}[\s|:-]+[-]{3,}/.test(line) ||
377
- /^[-]{3,}\s{2,}[-]{3,}/.test(line);
378
-
379
- if (isTableSeparator) {
380
- inTable = true;
381
- continue;
382
- }
383
-
384
- // Check if we're exiting the table
385
- if (inTable && line.trim() === '') {
386
- let nextContentLine = '';
387
- for (let j = i + 1; j < lines.length && j < i + 5; j++) {
388
- const nextLine = lines[j];
389
- if (nextLine && nextLine.trim() !== '') {
390
- nextContentLine = nextLine;
391
- break;
392
- }
393
- }
394
- if (!looksLikeTableRow(nextContentLine) && !/^[-]{3,}/.test(nextContentLine.trim())) {
395
- inTable = false;
396
- }
397
- continue;
398
- }
399
-
400
- // Only collapse spaces outside tables
401
- if (!inTable) {
402
- lines[i] = line.replace(/ +/g, ' ');
403
- }
404
- }
405
-
406
- return lines.join('\n');
407
- }
408
-
409
- /**
410
- * Check if text contains any CriticMarkup annotations
411
- * @param text - Text to check
412
- * @returns True if text contains any annotations
413
- * @throws TypeError If text is not a string
414
- */
415
- export function hasAnnotations(text: string): boolean {
416
- if (typeof text !== 'string') {
417
- throw new TypeError(`text must be a string, got ${typeof text}`);
418
- }
419
-
420
- return PATTERNS.insert.test(text) ||
421
- PATTERNS.delete.test(text) ||
422
- PATTERNS.substitute.test(text) ||
423
- PATTERNS.comment.test(text) ||
424
- PATTERNS.highlight.test(text);
425
- }
426
-
427
- /**
428
- * Apply a decision to a single annotation (accept or reject)
429
- * @param text - Document text containing the annotation
430
- * @param annotation - Annotation object from parseAnnotations()
431
- * @param accept - True to accept the change, false to reject
432
- * @returns Updated text with the decision applied
433
- * @throws TypeError If text is not a string or annotation is invalid
434
- */
435
- export function applyDecision(text: string, annotation: Annotation, accept: boolean): string {
436
- if (typeof text !== 'string') {
437
- throw new TypeError(`text must be a string, got ${typeof text}`);
438
- }
439
- if (!annotation || typeof annotation.type !== 'string' || typeof annotation.match !== 'string') {
440
- throw new TypeError('annotation must have type and match properties');
441
- }
442
- let replacement: string;
443
-
444
- // Extract any comments embedded in the annotation content
445
- // These should be preserved when accepting deletions or rejecting insertions
446
- const commentPattern = /\{>>[\s\S]*?<<\}/g;
447
- const embeddedComments = (annotation.match || '').match(commentPattern) || [];
448
-
449
- switch (annotation.type) {
450
- case 'insert':
451
- if (accept) {
452
- replacement = annotation.content;
453
- } else {
454
- // Rejecting insertion - preserve any comments that were inside
455
- replacement = embeddedComments.length > 0 ? embeddedComments.join('') : '';
456
- }
457
- break;
458
- case 'delete':
459
- if (accept) {
460
- // Accepting deletion - preserve any comments by placing them before
461
- replacement = embeddedComments.length > 0 ? embeddedComments.join('') : '';
462
- } else {
463
- replacement = annotation.content;
464
- }
465
- break;
466
- case 'substitute':
467
- if (accept) {
468
- // For substitutions, check if comments are in the old text being replaced
469
- const oldTextComments = (annotation.content || '').match(commentPattern) || [];
470
- replacement = annotation.replacement || '';
471
- if (oldTextComments.length > 0) {
472
- // Prepend comments that were in the old text
473
- replacement = oldTextComments.join('') + replacement;
474
- }
475
- } else {
476
- replacement = annotation.content;
477
- }
478
- break;
479
- default:
480
- return text;
481
- }
482
-
483
- return text.replace(annotation.match, replacement);
484
- }
485
-
486
- /**
487
- * Get track changes only (no comments)
488
- * @param text - Markdown text with CriticMarkup annotations
489
- * @returns Array of insert/delete/substitute annotations
490
- * @throws TypeError If text is not a string
491
- */
492
- export function getTrackChanges(text: string): Annotation[] {
493
- // Input validation delegated to parseAnnotations
494
- return parseAnnotations(text).filter((a) => a.type !== 'comment');
495
- }
496
-
497
- /**
498
- * Get comments only
499
- * @param text - Markdown text with CriticMarkup annotations
500
- * @param options - Filter options
501
- * @returns Array of comment annotations
502
- * @throws TypeError If text is not a string
503
- */
504
- export function getComments(text: string, options: CommentFilterOptions = {}): Annotation[] {
505
- // Input validation delegated to parseAnnotations
506
- const { pendingOnly = false, resolvedOnly = false } = options;
507
- let comments = parseAnnotations(text).filter((a) => a.type === 'comment');
508
-
509
- // Check for resolved status marker at end of comment
510
- comments = comments.map((c) => {
511
- const resolved = c.content.endsWith('[RESOLVED]') || c.content.endsWith('[✓]');
512
- return {
513
- ...c,
514
- resolved,
515
- content: resolved
516
- ? c.content.replace(/\s*\[(RESOLVED|✓)\]$/, '').trim()
517
- : c.content,
518
- };
519
- });
520
-
521
- if (pendingOnly) {
522
- comments = comments.filter((c) => !c.resolved);
523
- }
524
- if (resolvedOnly) {
525
- comments = comments.filter((c) => c.resolved);
526
- }
527
-
528
- return comments;
529
- }
530
-
531
- /**
532
- * Mark a comment as resolved or pending
533
- * @param text - Document text containing the comment
534
- * @param comment - Comment annotation object from getComments()
535
- * @param resolved - True to mark resolved, false to mark pending
536
- * @returns Updated text with status marker applied
537
- * @throws TypeError If text is not a string or comment is invalid
538
- */
539
- export function setCommentStatus(text: string, comment: Annotation, resolved: boolean): string {
540
- if (typeof text !== 'string') {
541
- throw new TypeError(`text must be a string, got ${typeof text}`);
542
- }
543
- if (!comment || typeof comment.match !== 'string') {
544
- throw new TypeError('comment must have a match property');
545
- }
546
- // Find the comment in the text
547
- const originalMatch = comment.match;
548
-
549
- if (resolved) {
550
- // Add [RESOLVED] marker before the closing <<
551
- const newMatch = originalMatch.replace(/<<\}$/, ' [RESOLVED]<<}');
552
- return text.replace(originalMatch, newMatch);
553
- } else {
554
- // Remove resolved markers
555
- const newMatch = originalMatch.replace(/\s*\[(RESOLVED|✓)\]<<\}$/, '<<}');
556
- return text.replace(originalMatch, newMatch);
557
- }
558
- }
559
-
560
- /**
561
- * Count annotations by type
562
- * @param text - Markdown text with CriticMarkup annotations
563
- * @returns Counts by annotation type
564
- * @throws TypeError If text is not a string
565
- */
566
- export function countAnnotations(text: string): AnnotationCounts {
567
- // Input validation delegated to parseAnnotations
568
- const annotations = parseAnnotations(text);
569
- const counts: AnnotationCounts = { inserts: 0, deletes: 0, substitutes: 0, comments: 0, total: 0 };
570
-
571
- for (const a of annotations) {
572
- counts.total++;
573
- switch (a.type) {
574
- case 'insert':
575
- counts.inserts++;
576
- break;
577
- case 'delete':
578
- counts.deletes++;
579
- break;
580
- case 'substitute':
581
- counts.substitutes++;
582
- break;
583
- case 'comment':
584
- counts.comments++;
585
- break;
586
- }
587
- }
588
-
589
- return counts;
590
- }
591
-
592
- /**
593
- * Clean up orphaned/malformed CriticMarkup markers
594
- * This can happen when track changes span across comment boundaries
595
- * @param text - Document text with potentially malformed markers
596
- * @returns Cleaned text with orphaned markers removed
597
- * @throws TypeError If text is not a string
598
- */
599
- export function cleanupOrphanedMarkers(text: string): string {
600
- if (typeof text !== 'string') {
601
- throw new TypeError(`text must be a string, got ${typeof text}`);
602
- }
603
- let result = text;
604
-
605
- // Remove orphaned insertion end markers (++} not preceded by {++)
606
- // These occur when an insertion's start was inside something that got deleted/replaced
607
- result = result.replace(/(?<!\{\+\+[^}]*)\+\+\}/g, '');
608
-
609
- // Remove orphaned deletion end markers (--} not preceded by {--)
610
- result = result.replace(/(?<!\{--[^}]*)--\}/g, '');
611
-
612
- // Remove orphaned substitution end markers (~~} not preceded by {~~)
613
- result = result.replace(/(?<!\{~~[^}]*)~~\}/g, '');
614
-
615
- // Fix unclosed insertions: {++ without matching ++}
616
- // Find {++ and check if there's a matching ++} before the next { marker
617
- result = result.replace(/\{\+\+([^+]*?)(?=\{[+\-~>]|\{>>|$)/g, (match, content) => {
618
- // If content has no ++}, it's unclosed - just keep the content
619
- if (!content.includes('++}')) {
620
- return content;
621
- }
622
- return match;
623
- });
624
-
625
- // Fix unclosed deletions: {-- without matching --}
626
- result = result.replace(/\{--([^-]*?)(?=\{[+\-~>]|\{>>|$)/g, (match, content) => {
627
- if (!content.includes('--}')) {
628
- return content;
629
- }
630
- return match;
631
- });
632
-
633
- // Fix unclosed substitutions: {~~ without matching ~~}
634
- // This is trickier because we need both ~> and ~~}
635
- result = result.replace(/\{~~([^~]*?)~>([^~]*?)(?=\{[+\-~>]|\{>>|$)/g, (match, old, newText) => {
636
- if (!match.includes('~~}')) {
637
- // Unclosed substitution - keep the new text
638
- return newText;
639
- }
640
- return match;
641
- });
642
-
643
- return result;
644
- }
1
+ /**
2
+ * CriticMarkup annotation parsing and manipulation
3
+ *
4
+ * Syntax:
5
+ * {++inserted text++} - Insertions
6
+ * {--deleted text--} - Deletions
7
+ * {~~old~>new~~} - Substitutions
8
+ * {>>Author: comment<<} - Comments
9
+ * {==text==} - Highlights
10
+ */
11
+
12
+ import type { Annotation, AnnotationCounts, StripOptions, CommentFilterOptions } from './types.js';
13
+
14
+ // =============================================================================
15
+ // Constants
16
+ // =============================================================================
17
+
18
+ /** Window size for context lookup (characters before/after position) */
19
+ const CONTEXT_WINDOW_SIZE = 2000;
20
+
21
+ /** Characters of context to include in annotation results */
22
+ const CONTEXT_SNIPPET_SIZE = 50;
23
+
24
+ /** Maximum iterations for nested annotation stripping (safety limit) */
25
+ const MAX_STRIP_ITERATIONS = 20;
26
+
27
+ /** Maximum author name length in comments */
28
+ const MAX_AUTHOR_LENGTH = 30;
29
+
30
+ /** Maximum content length before heuristic assumes it's not a comment */
31
+ const MAX_COMMENT_CONTENT_LENGTH = 200;
32
+
33
+ // =============================================================================
34
+ // Patterns
35
+ // =============================================================================
36
+
37
+ // Patterns for each annotation type
38
+ const PATTERNS = {
39
+ insert: /\{\+\+(.+?)\+\+\}/gs,
40
+ delete: /\{--(.+?)--\}/gs,
41
+ substitute: /\{~~(.+?)~>(.+?)~~\}/gs,
42
+ comment: /\{>>(.+?)<<\}/gs,
43
+ highlight: /\{==(.+?)==\}/gs,
44
+ };
45
+
46
+ /**
47
+ * Check if a potential comment is actually a false positive
48
+ * (e.g., figure caption, nested inside other annotation, code block, etc.)
49
+ * @param commentContent - The content inside {>>...<<}
50
+ * @param fullText - The full document text
51
+ * @param position - Position of the comment in the text
52
+ * @returns true if this is a false positive (not a real comment)
53
+ */
54
+ function isCommentFalsePositive(commentContent: string, fullText: string, position: number): boolean {
55
+ // Check if inside a code block (fenced or indented)
56
+ const textBefore = fullText.slice(Math.max(0, position - CONTEXT_WINDOW_SIZE), position);
57
+ const textAfter = fullText.slice(position, Math.min(fullText.length, position + CONTEXT_WINDOW_SIZE));
58
+
59
+ // Count unclosed fenced code blocks (``` or ~~~)
60
+ const fenceOpens = (textBefore.match(/^```|^~~~/gm) || []).length;
61
+ const fenceCloses = (textBefore.match(/```$|~~~$/gm) || []).length;
62
+ if (fenceOpens > fenceCloses) return true; // Inside code block
63
+
64
+ // Check if on an indented line (4+ spaces or tab at line start = code)
65
+ const lineStart = textBefore.lastIndexOf('\n') + 1;
66
+ const linePrefix = fullText.slice(lineStart, position);
67
+ if (/^(\t| )/.test(linePrefix)) return true; // Indented code
68
+
69
+ // Check if inside inline code backticks
70
+ const backticksBefore = (linePrefix.match(/`/g) || []).length;
71
+ if (backticksBefore % 2 === 1) return true; // Inside inline code
72
+
73
+ // Check if nested inside a deletion or insertion block
74
+ const nearTextBefore = fullText.slice(Math.max(0, position - 500), position);
75
+
76
+ // Count unclosed deletion markers
77
+ const delOpens = (nearTextBefore.match(/\{--/g) || []).length;
78
+ const delCloses = (nearTextBefore.match(/--\}/g) || []).length;
79
+ if (delOpens > delCloses) return true; // Nested inside deletion
80
+
81
+ // Count unclosed insertion markers
82
+ const insOpens = (nearTextBefore.match(/\{\+\+/g) || []).length;
83
+ const insCloses = (nearTextBefore.match(/\+\+\}/g) || []).length;
84
+ if (insOpens > insCloses) return true; // Nested inside insertion
85
+
86
+ // Heuristics for figure captions and other false positives:
87
+
88
+ // Contains image/figure path patterns
89
+ if (/\(figures?\/|\(images?\/|\.png|\.jpg|\.jpeg|\.gif|\.svg|\.pdf/i.test(commentContent)) return true;
90
+
91
+ // Contains markdown figure reference syntax
92
+ if (/\{#fig:|!\[/.test(commentContent)) return true;
93
+
94
+ // Real comments typically have "Author:" at start. Accept hyphens, apostrophes,
95
+ // periods, and Unicode letters so names like "Jens-Christian Svenning" or
96
+ // "Camilla T Colding-Jørgensen" don't get rejected. See gcol33/docrev#1.
97
+ const hasAuthorPrefix = /^[\p{L}][\p{L}\s\-'.]{0,30}:\s/u.test(commentContent.trim());
98
+ const hasResolvedMark = /^[✓✔]\s/.test(commentContent.trim());
99
+
100
+ // Contains URL patterns (likely a link, not a comment) — only filter when
101
+ // there is no real author prefix, since reviewers legitimately cite URLs/DOIs.
102
+ if (!hasAuthorPrefix && /https?:\/\/|www\./i.test(commentContent) && commentContent.length < 150) return true;
103
+
104
+ // Looks like code (contains programming patterns)
105
+ if (/function\s*\(|=>|import\s+|export\s+|const\s+|let\s+|var\s+/.test(commentContent)) return true;
106
+
107
+ // Very long without clear author pattern (likely caption, not comment)
108
+ if (!hasAuthorPrefix && !hasResolvedMark && commentContent.length > MAX_COMMENT_CONTENT_LENGTH) return true;
109
+
110
+ // Looks like a figure caption (starts with "Fig" or contains typical caption words)
111
+ if (/^(Fig\.?|Figure|Table|Sankey|Diagram|Proportion|Distribution|Map|Chart|Graph|Plot|Panel)/i.test(commentContent.trim())) {
112
+ return true;
113
+ }
114
+
115
+ // Contains LaTeX-like patterns (likely equation, not comment)
116
+ if (/\\[a-z]+\{|\\frac|\\sum|\\int|\\begin\{/.test(commentContent)) return true;
117
+
118
+ // Looks like BibTeX entry (not a comment)
119
+ if (/@article\{|@book\{|@inproceedings\{/i.test(commentContent)) return true;
120
+
121
+ return false;
122
+ }
123
+
124
+ // Combined pattern for any track change (not comments)
125
+ const TRACK_CHANGE_PATTERN = /(\{\+\+.+?\+\+\}|\{--.+?--\}|\{~~.+?~>.+?~~\})/gs;
126
+
127
+ // =============================================================================
128
+ // Public API
129
+ // =============================================================================
130
+
131
+ /**
132
+ * Parse all annotations from text
133
+ * @param text - Markdown text containing CriticMarkup annotations
134
+ * @returns Array of parsed annotations sorted by position
135
+ * @throws TypeError If text is not a string
136
+ */
137
+ export function parseAnnotations(text: string): Annotation[] {
138
+ if (typeof text !== 'string') {
139
+ throw new TypeError(`text must be a string, got ${typeof text}`);
140
+ }
141
+
142
+ const annotations: Annotation[] = [];
143
+
144
+ // Build line number lookup
145
+ const lines = text.split('\n');
146
+ let pos = 0;
147
+ const lineStarts = lines.map((line) => {
148
+ const start = pos;
149
+ pos += line.length + 1;
150
+ return start;
151
+ });
152
+
153
+ function getLine(position: number): number {
154
+ for (let i = 0; i < lineStarts.length; i++) {
155
+ const start = lineStarts[i];
156
+ if (start !== undefined && start > position) return i;
157
+ }
158
+ return lineStarts.length;
159
+ }
160
+
161
+ function getContext(position: number, length: number): { before: string; after: string } {
162
+ const start = Math.max(0, position - CONTEXT_SNIPPET_SIZE);
163
+ const end = Math.min(text.length, position + length + CONTEXT_SNIPPET_SIZE);
164
+ const before = text.slice(start, position).split('\n').pop() || '';
165
+ const after = text.slice(position + length, end).split('\n')[0] || '';
166
+ return { before, after };
167
+ }
168
+
169
+ // Parse insertions
170
+ for (const match of text.matchAll(PATTERNS.insert)) {
171
+ if (match.index === undefined) continue;
172
+ const ctx = getContext(match.index, match[0].length);
173
+ annotations.push({
174
+ type: 'insert',
175
+ match: match[0],
176
+ content: match[1] || '',
177
+ position: match.index,
178
+ line: getLine(match.index),
179
+ ...ctx,
180
+ });
181
+ }
182
+
183
+ // Parse deletions
184
+ for (const match of text.matchAll(PATTERNS.delete)) {
185
+ if (match.index === undefined) continue;
186
+ const ctx = getContext(match.index, match[0].length);
187
+ annotations.push({
188
+ type: 'delete',
189
+ match: match[0],
190
+ content: match[1] || '',
191
+ position: match.index,
192
+ line: getLine(match.index),
193
+ ...ctx,
194
+ });
195
+ }
196
+
197
+ // Parse substitutions
198
+ for (const match of text.matchAll(PATTERNS.substitute)) {
199
+ if (match.index === undefined) continue;
200
+ const ctx = getContext(match.index, match[0].length);
201
+ annotations.push({
202
+ type: 'substitute',
203
+ match: match[0],
204
+ content: match[1] || '',
205
+ replacement: match[2] || '',
206
+ position: match.index,
207
+ line: getLine(match.index),
208
+ ...ctx,
209
+ });
210
+ }
211
+
212
+ // Parse comments (with false positive filtering)
213
+ for (const match of text.matchAll(PATTERNS.comment)) {
214
+ if (match.index === undefined) continue;
215
+ // Skip false positives (figure captions, nested annotations, etc.)
216
+ const commentContent = match[1] || '';
217
+ if (isCommentFalsePositive(commentContent, text, match.index)) {
218
+ continue;
219
+ }
220
+
221
+ const ctx = getContext(match.index, match[0].length);
222
+ let commentText = commentContent;
223
+ let author = '';
224
+
225
+ // Extract author if present (format: "Author: comment")
226
+ const colonIdx = commentText.indexOf(':');
227
+ if (colonIdx > 0 && colonIdx < MAX_AUTHOR_LENGTH) {
228
+ author = commentText.slice(0, colonIdx).trim();
229
+ commentText = commentText.slice(colonIdx + 1).trim();
230
+ }
231
+
232
+ annotations.push({
233
+ type: 'comment',
234
+ match: match[0],
235
+ content: commentText,
236
+ author,
237
+ position: match.index,
238
+ line: getLine(match.index),
239
+ ...ctx,
240
+ });
241
+ }
242
+
243
+ // Sort by position
244
+ annotations.sort((a, b) => a.position - b.position);
245
+ return annotations;
246
+ }
247
+
248
+ /**
249
+ * Strip annotations from text, applying changes
250
+ * Handles nested annotations by iterating until stable
251
+ * @param text - Markdown text with CriticMarkup annotations
252
+ * @param options - Strip options
253
+ * @returns Clean text with annotations applied/removed
254
+ * @throws TypeError If text is not a string
255
+ */
256
+ export function stripAnnotations(text: string, options: StripOptions = {}): string {
257
+ if (typeof text !== 'string') {
258
+ throw new TypeError(`text must be a string, got ${typeof text}`);
259
+ }
260
+
261
+ const { keepComments = false } = options;
262
+
263
+ // Iterate until no more changes (handles nested annotations)
264
+ let prev: string;
265
+ let iterations = 0;
266
+
267
+ do {
268
+ prev = text;
269
+
270
+ // Apply substitutions: {~~old~>new~~} → new
271
+ text = text.replace(PATTERNS.substitute, '$2');
272
+
273
+ // Apply insertions: {++text++} → text
274
+ text = text.replace(PATTERNS.insert, '$1');
275
+
276
+ // Apply deletions: {--text--} → nothing
277
+ // Don't touch surrounding whitespace - just remove the annotation
278
+ text = text.replace(PATTERNS.delete, '');
279
+
280
+ // Remove highlights: {==text==} → text
281
+ text = text.replace(PATTERNS.highlight, '$1');
282
+
283
+ // Remove comments unless keeping
284
+ if (!keepComments) {
285
+ text = text.replace(PATTERNS.comment, '');
286
+ }
287
+
288
+ // Strip pandoc highlight spans: [text]{.mark} → text.
289
+ // When `keepComments=true`, preserve `[anchor]{.mark}` that is the
290
+ // anchor of a kept `{>>...<<}` comment. The dual-build flow runs
291
+ // stripAnnotations() before prepareMarkdownWithMarkers(), and stripping
292
+ // the anchor span here would leave the marker generator with no anchor
293
+ // text — collapsing every multi-word anchor to a single fallback word
294
+ // in the rebuilt docx.
295
+ text = keepComments
296
+ ? text.replace(/(?<!<<\}\s{0,3})\[([^\]]*)\]\{\.mark\}/g, '$1')
297
+ : text.replace(/\[([^\]]*)\]\{\.mark\}/g, '$1');
298
+
299
+ // Clean up partial/orphaned markers within the loop
300
+ // This handles cases where nested annotations leave behind fragments
301
+
302
+ // Empty annotations (from nested stripping)
303
+ text = text.replace(/\{----\}/g, '');
304
+ text = text.replace(/\{\+\+\+\+\}/g, '');
305
+ text = text.replace(/\{--\s*--\}/g, '');
306
+ text = text.replace(/\{\+\+\s*\+\+\}/g, '');
307
+
308
+ // Orphaned substitution fragments: ~>text~~} or {~~text (no proper pairs)
309
+ text = text.replace(/~>[^{]*?~~\}/g, '');
310
+ text = text.replace(/\{~~[^~}]*$/gm, '');
311
+
312
+ // Handle malformed substitution from nested: {~~{~~old → just strip the {~~
313
+ text = text.replace(/\{~~\{~~/g, '{~~');
314
+ text = text.replace(/~~\}~~\}/g, '~~}');
315
+
316
+ iterations++;
317
+ } while (text !== prev && iterations < MAX_STRIP_ITERATIONS);
318
+
319
+ // Final cleanup of any remaining orphaned markers
320
+ // Orphaned closing markers
321
+ text = text.replace(/--\}(?:--\})+/g, '');
322
+ text = text.replace(/\+\+\}(?:\+\+\})+/g, '');
323
+ text = text.replace(/~~\}(?:~~\})+/g, '');
324
+ text = text.replace(/--\}/g, '');
325
+ text = text.replace(/\+\+\}/g, '');
326
+ text = text.replace(/~~\}/g, '');
327
+
328
+ // Orphaned opening markers
329
+ text = text.replace(/\{--(?:\{--)+/g, '');
330
+ text = text.replace(/\{\+\+(?:\{\+\+)+/g, '');
331
+ text = text.replace(/\{~~(?:\{~~)+/g, '');
332
+ text = text.replace(/\{--/g, '');
333
+ text = text.replace(/\{\+\+/g, '');
334
+ text = text.replace(/\{~~/g, '');
335
+ text = text.replace(/~>/g, '');
336
+
337
+ // Remove orphan [ from stripped {.mark} spans where the closing ]{.mark}
338
+ // was inside a comment. A [ is orphan if no `]` follows before end of line.
339
+ // We deliberately allow other `[` between the candidate and the matching `]`
340
+ // — otherwise nested forms like `[[0..9]]{.mark}` would have their outer
341
+ // `[` stripped because the lookahead saw the inner `[` as a barrier.
342
+ text = text.replace(/\[(?![^\]\n]*\])/g, '');
343
+
344
+ return text;
345
+ }
346
+
347
+ /**
348
+ * Collapse multiple spaces to single space, preserving table formatting
349
+ * Useful for cleaning up messy Word imports
350
+ * @param text - Text to normalize
351
+ * @returns Text with multiple spaces collapsed to single spaces
352
+ * @throws TypeError If text is not a string
353
+ */
354
+ export function stripToSingleSpace(text: string): string {
355
+ if (typeof text !== 'string') {
356
+ throw new TypeError(`text must be a string, got ${typeof text}`);
357
+ }
358
+
359
+ const lines = text.split('\n');
360
+ let inTable = false;
361
+
362
+ // Helper to check if a line looks like table content
363
+ const looksLikeTableRow = (ln: string): boolean => {
364
+ const trimmed = ln.trim();
365
+ if (!trimmed) return false;
366
+ // Has multiple consecutive spaces (column spacing)
367
+ // OR italicized category header with trailing spaces
368
+ return /\S\s{2,}\S/.test(trimmed) || (/^\*[^*]+\*\s*$/.test(trimmed) && /\s{2,}$/.test(ln));
369
+ };
370
+
371
+ for (let i = 0; i < lines.length; i++) {
372
+ const line = lines[i];
373
+ if (!line) continue;
374
+
375
+ // Detect table separator line
376
+ const isTableSeparator = /^\|?[\s-]*[-]{3,}[\s|:-]+[-]{3,}/.test(line) ||
377
+ /^[-]{3,}\s{2,}[-]{3,}/.test(line);
378
+
379
+ if (isTableSeparator) {
380
+ inTable = true;
381
+ continue;
382
+ }
383
+
384
+ // Check if we're exiting the table
385
+ if (inTable && line.trim() === '') {
386
+ let nextContentLine = '';
387
+ for (let j = i + 1; j < lines.length && j < i + 5; j++) {
388
+ const nextLine = lines[j];
389
+ if (nextLine && nextLine.trim() !== '') {
390
+ nextContentLine = nextLine;
391
+ break;
392
+ }
393
+ }
394
+ if (!looksLikeTableRow(nextContentLine) && !/^[-]{3,}/.test(nextContentLine.trim())) {
395
+ inTable = false;
396
+ }
397
+ continue;
398
+ }
399
+
400
+ // Only collapse spaces outside tables
401
+ if (!inTable) {
402
+ lines[i] = line.replace(/ +/g, ' ');
403
+ }
404
+ }
405
+
406
+ return lines.join('\n');
407
+ }
408
+
409
+ /**
410
+ * Check if text contains any CriticMarkup annotations
411
+ * @param text - Text to check
412
+ * @returns True if text contains any annotations
413
+ * @throws TypeError If text is not a string
414
+ */
415
+ export function hasAnnotations(text: string): boolean {
416
+ if (typeof text !== 'string') {
417
+ throw new TypeError(`text must be a string, got ${typeof text}`);
418
+ }
419
+
420
+ return PATTERNS.insert.test(text) ||
421
+ PATTERNS.delete.test(text) ||
422
+ PATTERNS.substitute.test(text) ||
423
+ PATTERNS.comment.test(text) ||
424
+ PATTERNS.highlight.test(text);
425
+ }
426
+
427
+ /**
428
+ * Apply a decision to a single annotation (accept or reject)
429
+ * @param text - Document text containing the annotation
430
+ * @param annotation - Annotation object from parseAnnotations()
431
+ * @param accept - True to accept the change, false to reject
432
+ * @returns Updated text with the decision applied
433
+ * @throws TypeError If text is not a string or annotation is invalid
434
+ */
435
+ export function applyDecision(text: string, annotation: Annotation, accept: boolean): string {
436
+ if (typeof text !== 'string') {
437
+ throw new TypeError(`text must be a string, got ${typeof text}`);
438
+ }
439
+ if (!annotation || typeof annotation.type !== 'string' || typeof annotation.match !== 'string') {
440
+ throw new TypeError('annotation must have type and match properties');
441
+ }
442
+ let replacement: string;
443
+
444
+ // Extract any comments embedded in the annotation content
445
+ // These should be preserved when accepting deletions or rejecting insertions
446
+ const commentPattern = /\{>>[\s\S]*?<<\}/g;
447
+ const embeddedComments = (annotation.match || '').match(commentPattern) || [];
448
+
449
+ switch (annotation.type) {
450
+ case 'insert':
451
+ if (accept) {
452
+ replacement = annotation.content;
453
+ } else {
454
+ // Rejecting insertion - preserve any comments that were inside
455
+ replacement = embeddedComments.length > 0 ? embeddedComments.join('') : '';
456
+ }
457
+ break;
458
+ case 'delete':
459
+ if (accept) {
460
+ // Accepting deletion - preserve any comments by placing them before
461
+ replacement = embeddedComments.length > 0 ? embeddedComments.join('') : '';
462
+ } else {
463
+ replacement = annotation.content;
464
+ }
465
+ break;
466
+ case 'substitute':
467
+ if (accept) {
468
+ // For substitutions, check if comments are in the old text being replaced
469
+ const oldTextComments = (annotation.content || '').match(commentPattern) || [];
470
+ replacement = annotation.replacement || '';
471
+ if (oldTextComments.length > 0) {
472
+ // Prepend comments that were in the old text
473
+ replacement = oldTextComments.join('') + replacement;
474
+ }
475
+ } else {
476
+ replacement = annotation.content;
477
+ }
478
+ break;
479
+ default:
480
+ return text;
481
+ }
482
+
483
+ return text.replace(annotation.match, replacement);
484
+ }
485
+
486
+ /**
487
+ * Get track changes only (no comments)
488
+ * @param text - Markdown text with CriticMarkup annotations
489
+ * @returns Array of insert/delete/substitute annotations
490
+ * @throws TypeError If text is not a string
491
+ */
492
+ export function getTrackChanges(text: string): Annotation[] {
493
+ // Input validation delegated to parseAnnotations
494
+ return parseAnnotations(text).filter((a) => a.type !== 'comment');
495
+ }
496
+
497
+ /**
498
+ * Get comments only
499
+ * @param text - Markdown text with CriticMarkup annotations
500
+ * @param options - Filter options
501
+ * @returns Array of comment annotations
502
+ * @throws TypeError If text is not a string
503
+ */
504
+ export function getComments(text: string, options: CommentFilterOptions = {}): Annotation[] {
505
+ // Input validation delegated to parseAnnotations
506
+ const { pendingOnly = false, resolvedOnly = false } = options;
507
+ let comments = parseAnnotations(text).filter((a) => a.type === 'comment');
508
+
509
+ // Check for resolved status marker at end of comment
510
+ comments = comments.map((c) => {
511
+ const resolved = c.content.endsWith('[RESOLVED]') || c.content.endsWith('[✓]');
512
+ return {
513
+ ...c,
514
+ resolved,
515
+ content: resolved
516
+ ? c.content.replace(/\s*\[(RESOLVED|✓)\]$/, '').trim()
517
+ : c.content,
518
+ };
519
+ });
520
+
521
+ if (pendingOnly) {
522
+ comments = comments.filter((c) => !c.resolved);
523
+ }
524
+ if (resolvedOnly) {
525
+ comments = comments.filter((c) => c.resolved);
526
+ }
527
+
528
+ return comments;
529
+ }
530
+
531
+ /**
532
+ * Mark a comment as resolved or pending
533
+ * @param text - Document text containing the comment
534
+ * @param comment - Comment annotation object from getComments()
535
+ * @param resolved - True to mark resolved, false to mark pending
536
+ * @returns Updated text with status marker applied
537
+ * @throws TypeError If text is not a string or comment is invalid
538
+ */
539
+ export function setCommentStatus(text: string, comment: Annotation, resolved: boolean): string {
540
+ if (typeof text !== 'string') {
541
+ throw new TypeError(`text must be a string, got ${typeof text}`);
542
+ }
543
+ if (!comment || typeof comment.match !== 'string') {
544
+ throw new TypeError('comment must have a match property');
545
+ }
546
+ // Find the comment in the text
547
+ const originalMatch = comment.match;
548
+
549
+ if (resolved) {
550
+ // Add [RESOLVED] marker before the closing <<
551
+ const newMatch = originalMatch.replace(/<<\}$/, ' [RESOLVED]<<}');
552
+ return text.replace(originalMatch, newMatch);
553
+ } else {
554
+ // Remove resolved markers
555
+ const newMatch = originalMatch.replace(/\s*\[(RESOLVED|✓)\]<<\}$/, '<<}');
556
+ return text.replace(originalMatch, newMatch);
557
+ }
558
+ }
559
+
560
+ /**
561
+ * Count annotations by type
562
+ * @param text - Markdown text with CriticMarkup annotations
563
+ * @returns Counts by annotation type
564
+ * @throws TypeError If text is not a string
565
+ */
566
+ export function countAnnotations(text: string): AnnotationCounts {
567
+ // Input validation delegated to parseAnnotations
568
+ const annotations = parseAnnotations(text);
569
+ const counts: AnnotationCounts = { inserts: 0, deletes: 0, substitutes: 0, comments: 0, total: 0 };
570
+
571
+ for (const a of annotations) {
572
+ counts.total++;
573
+ switch (a.type) {
574
+ case 'insert':
575
+ counts.inserts++;
576
+ break;
577
+ case 'delete':
578
+ counts.deletes++;
579
+ break;
580
+ case 'substitute':
581
+ counts.substitutes++;
582
+ break;
583
+ case 'comment':
584
+ counts.comments++;
585
+ break;
586
+ }
587
+ }
588
+
589
+ return counts;
590
+ }
591
+
592
+ /**
593
+ * Clean up orphaned/malformed CriticMarkup markers
594
+ * This can happen when track changes span across comment boundaries
595
+ * @param text - Document text with potentially malformed markers
596
+ * @returns Cleaned text with orphaned markers removed
597
+ * @throws TypeError If text is not a string
598
+ */
599
+ export function cleanupOrphanedMarkers(text: string): string {
600
+ if (typeof text !== 'string') {
601
+ throw new TypeError(`text must be a string, got ${typeof text}`);
602
+ }
603
+ let result = text;
604
+
605
+ // Remove orphaned insertion end markers (++} not preceded by {++)
606
+ // These occur when an insertion's start was inside something that got deleted/replaced
607
+ result = result.replace(/(?<!\{\+\+[^}]*)\+\+\}/g, '');
608
+
609
+ // Remove orphaned deletion end markers (--} not preceded by {--)
610
+ result = result.replace(/(?<!\{--[^}]*)--\}/g, '');
611
+
612
+ // Remove orphaned substitution end markers (~~} not preceded by {~~)
613
+ result = result.replace(/(?<!\{~~[^}]*)~~\}/g, '');
614
+
615
+ // Fix unclosed insertions: {++ without matching ++}
616
+ // Find {++ and check if there's a matching ++} before the next { marker
617
+ result = result.replace(/\{\+\+([^+]*?)(?=\{[+\-~>]|\{>>|$)/g, (match, content) => {
618
+ // If content has no ++}, it's unclosed - just keep the content
619
+ if (!content.includes('++}')) {
620
+ return content;
621
+ }
622
+ return match;
623
+ });
624
+
625
+ // Fix unclosed deletions: {-- without matching --}
626
+ result = result.replace(/\{--([^-]*?)(?=\{[+\-~>]|\{>>|$)/g, (match, content) => {
627
+ if (!content.includes('--}')) {
628
+ return content;
629
+ }
630
+ return match;
631
+ });
632
+
633
+ // Fix unclosed substitutions: {~~ without matching ~~}
634
+ // This is trickier because we need both ~> and ~~}
635
+ result = result.replace(/\{~~([^~]*?)~>([^~]*?)(?=\{[+\-~>]|\{>>|$)/g, (match, old, newText) => {
636
+ if (!match.includes('~~}')) {
637
+ // Unclosed substitution - keep the new text
638
+ return newText;
639
+ }
640
+ return match;
641
+ });
642
+
643
+ return result;
644
+ }