docrev 0.9.13 → 0.9.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +9 -9
- package/.gitattributes +1 -1
- package/CHANGELOG.md +149 -149
- package/PLAN-tables-and-postprocess.md +850 -850
- package/README.md +411 -391
- package/bin/rev.js +11 -11
- package/bin/rev.ts +145 -145
- package/completions/rev.bash +127 -127
- package/completions/rev.ps1 +210 -210
- package/completions/rev.zsh +207 -207
- package/dev_notes/stress2/build_adversarial.ts +186 -186
- package/dev_notes/stress2/drift_matcher.ts +62 -62
- package/dev_notes/stress2/probe_anchors.ts +35 -35
- package/dev_notes/stress2/project/discussion.before.md +3 -3
- package/dev_notes/stress2/project/discussion.md +3 -3
- package/dev_notes/stress2/project/methods.before.md +20 -20
- package/dev_notes/stress2/project/methods.md +20 -20
- package/dev_notes/stress2/project/rev.yaml +5 -5
- package/dev_notes/stress2/project/sections.yaml +4 -4
- package/dev_notes/stress2/sections.yaml +5 -5
- package/dev_notes/stress2/trace_placement.ts +50 -50
- package/dev_notes/stresstest_boundaries.ts +27 -27
- package/dev_notes/stresstest_drift_apply.ts +43 -43
- package/dev_notes/stresstest_drift_compare.ts +43 -43
- package/dev_notes/stresstest_drift_v2.ts +54 -54
- package/dev_notes/stresstest_inspect.ts +54 -54
- package/dev_notes/stresstest_pstyle.ts +55 -55
- package/dev_notes/stresstest_section_debug.ts +23 -23
- package/dev_notes/stresstest_split.ts +70 -70
- package/dev_notes/stresstest_trace.ts +19 -19
- package/dev_notes/stresstest_verify_no_overwrite.ts +40 -40
- package/dist/lib/build.d.ts +38 -1
- package/dist/lib/build.d.ts.map +1 -1
- package/dist/lib/build.js +68 -30
- package/dist/lib/build.js.map +1 -1
- package/dist/lib/commands/build.d.ts.map +1 -1
- package/dist/lib/commands/build.js +38 -5
- package/dist/lib/commands/build.js.map +1 -1
- package/dist/lib/commands/utilities.js +164 -164
- package/dist/lib/commands/word-tools.js +8 -8
- package/dist/lib/grammar.js +3 -3
- package/dist/lib/pdf-comments.js +44 -44
- package/dist/lib/plugins.js +57 -57
- package/dist/lib/pptx-themes.js +115 -115
- package/dist/lib/spelling.js +2 -2
- package/dist/lib/templates.js +387 -387
- package/dist/lib/themes.js +51 -51
- package/eslint.config.js +27 -27
- package/lib/anchor-match.ts +276 -276
- package/lib/annotations.ts +644 -644
- package/lib/build.ts +1300 -1251
- package/lib/citations.ts +160 -160
- package/lib/commands/build.ts +833 -801
- package/lib/commands/citations.ts +515 -515
- package/lib/commands/comments.ts +1050 -1050
- package/lib/commands/context.ts +174 -174
- package/lib/commands/core.ts +309 -309
- package/lib/commands/doi.ts +435 -435
- package/lib/commands/file-ops.ts +372 -372
- package/lib/commands/history.ts +320 -320
- package/lib/commands/index.ts +87 -87
- package/lib/commands/init.ts +259 -259
- package/lib/commands/merge-resolve.ts +378 -378
- package/lib/commands/preview.ts +178 -178
- package/lib/commands/project-info.ts +244 -244
- package/lib/commands/quality.ts +517 -517
- package/lib/commands/response.ts +454 -454
- package/lib/commands/section-boundaries.ts +82 -82
- package/lib/commands/sections.ts +451 -451
- package/lib/commands/sync.ts +706 -706
- package/lib/commands/text-ops.ts +449 -449
- package/lib/commands/utilities.ts +448 -448
- package/lib/commands/verify-anchors.ts +272 -272
- package/lib/commands/word-tools.ts +340 -340
- package/lib/comment-realign.ts +517 -517
- package/lib/config.ts +84 -84
- package/lib/crossref.ts +781 -781
- package/lib/csl.ts +191 -191
- package/lib/dependencies.ts +98 -98
- package/lib/diff-engine.ts +465 -465
- package/lib/doi-cache.ts +115 -115
- package/lib/doi.ts +897 -897
- package/lib/equations.ts +506 -506
- package/lib/errors.ts +346 -346
- package/lib/format.ts +541 -541
- package/lib/git.ts +326 -326
- package/lib/grammar.ts +303 -303
- package/lib/image-registry.ts +180 -180
- package/lib/import.ts +911 -911
- package/lib/journals.ts +543 -543
- package/lib/merge.ts +633 -633
- package/lib/orcid.ts +144 -144
- package/lib/pdf-comments.ts +263 -263
- package/lib/pdf-import.ts +524 -524
- package/lib/plugins.ts +362 -362
- package/lib/postprocess.ts +188 -188
- package/lib/pptx-color-filter.lua +37 -37
- package/lib/pptx-template.ts +469 -469
- package/lib/pptx-themes.ts +483 -483
- package/lib/protect-restore.ts +520 -520
- package/lib/rate-limiter.ts +94 -94
- package/lib/response.ts +197 -197
- package/lib/restore-references.ts +240 -240
- package/lib/review.ts +327 -327
- package/lib/schema.ts +417 -417
- package/lib/scientific-words.ts +73 -73
- package/lib/sections.ts +335 -335
- package/lib/slides.ts +756 -756
- package/lib/spelling.ts +334 -334
- package/lib/templates.ts +526 -526
- package/lib/themes.ts +742 -742
- package/lib/trackchanges.ts +247 -247
- package/lib/tui.ts +450 -450
- package/lib/types.ts +550 -550
- package/lib/undo.ts +250 -250
- package/lib/utils.ts +69 -69
- package/lib/variables.ts +179 -179
- package/lib/word-extraction.ts +806 -806
- package/lib/word.ts +643 -643
- package/lib/wordcomments.ts +817 -817
- package/package.json +137 -137
- package/scripts/postbuild.js +28 -28
- package/skill/REFERENCE.md +473 -431
- package/skill/SKILL.md +274 -258
- package/tsconfig.json +26 -26
- package/types/index.d.ts +525 -525
package/lib/annotations.ts
CHANGED
|
@@ -1,644 +1,644 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* CriticMarkup annotation parsing and manipulation
|
|
3
|
-
*
|
|
4
|
-
* Syntax:
|
|
5
|
-
* {++inserted text++} - Insertions
|
|
6
|
-
* {--deleted text--} - Deletions
|
|
7
|
-
* {~~old~>new~~} - Substitutions
|
|
8
|
-
* {>>Author: comment<<} - Comments
|
|
9
|
-
* {==text==} - Highlights
|
|
10
|
-
*/
|
|
11
|
-
|
|
12
|
-
import type { Annotation, AnnotationCounts, StripOptions, CommentFilterOptions } from './types.js';
|
|
13
|
-
|
|
14
|
-
// =============================================================================
|
|
15
|
-
// Constants
|
|
16
|
-
// =============================================================================
|
|
17
|
-
|
|
18
|
-
/** Window size for context lookup (characters before/after position) */
|
|
19
|
-
const CONTEXT_WINDOW_SIZE = 2000;
|
|
20
|
-
|
|
21
|
-
/** Characters of context to include in annotation results */
|
|
22
|
-
const CONTEXT_SNIPPET_SIZE = 50;
|
|
23
|
-
|
|
24
|
-
/** Maximum iterations for nested annotation stripping (safety limit) */
|
|
25
|
-
const MAX_STRIP_ITERATIONS = 20;
|
|
26
|
-
|
|
27
|
-
/** Maximum author name length in comments */
|
|
28
|
-
const MAX_AUTHOR_LENGTH = 30;
|
|
29
|
-
|
|
30
|
-
/** Maximum content length before heuristic assumes it's not a comment */
|
|
31
|
-
const MAX_COMMENT_CONTENT_LENGTH = 200;
|
|
32
|
-
|
|
33
|
-
// =============================================================================
|
|
34
|
-
// Patterns
|
|
35
|
-
// =============================================================================
|
|
36
|
-
|
|
37
|
-
// Patterns for each annotation type
|
|
38
|
-
const PATTERNS = {
|
|
39
|
-
insert: /\{\+\+(.+?)\+\+\}/gs,
|
|
40
|
-
delete: /\{--(.+?)--\}/gs,
|
|
41
|
-
substitute: /\{~~(.+?)~>(.+?)~~\}/gs,
|
|
42
|
-
comment: /\{>>(.+?)<<\}/gs,
|
|
43
|
-
highlight: /\{==(.+?)==\}/gs,
|
|
44
|
-
};
|
|
45
|
-
|
|
46
|
-
/**
|
|
47
|
-
* Check if a potential comment is actually a false positive
|
|
48
|
-
* (e.g., figure caption, nested inside other annotation, code block, etc.)
|
|
49
|
-
* @param commentContent - The content inside {>>...<<}
|
|
50
|
-
* @param fullText - The full document text
|
|
51
|
-
* @param position - Position of the comment in the text
|
|
52
|
-
* @returns true if this is a false positive (not a real comment)
|
|
53
|
-
*/
|
|
54
|
-
function isCommentFalsePositive(commentContent: string, fullText: string, position: number): boolean {
|
|
55
|
-
// Check if inside a code block (fenced or indented)
|
|
56
|
-
const textBefore = fullText.slice(Math.max(0, position - CONTEXT_WINDOW_SIZE), position);
|
|
57
|
-
const textAfter = fullText.slice(position, Math.min(fullText.length, position + CONTEXT_WINDOW_SIZE));
|
|
58
|
-
|
|
59
|
-
// Count unclosed fenced code blocks (``` or ~~~)
|
|
60
|
-
const fenceOpens = (textBefore.match(/^```|^~~~/gm) || []).length;
|
|
61
|
-
const fenceCloses = (textBefore.match(/```$|~~~$/gm) || []).length;
|
|
62
|
-
if (fenceOpens > fenceCloses) return true; // Inside code block
|
|
63
|
-
|
|
64
|
-
// Check if on an indented line (4+ spaces or tab at line start = code)
|
|
65
|
-
const lineStart = textBefore.lastIndexOf('\n') + 1;
|
|
66
|
-
const linePrefix = fullText.slice(lineStart, position);
|
|
67
|
-
if (/^(\t| )/.test(linePrefix)) return true; // Indented code
|
|
68
|
-
|
|
69
|
-
// Check if inside inline code backticks
|
|
70
|
-
const backticksBefore = (linePrefix.match(/`/g) || []).length;
|
|
71
|
-
if (backticksBefore % 2 === 1) return true; // Inside inline code
|
|
72
|
-
|
|
73
|
-
// Check if nested inside a deletion or insertion block
|
|
74
|
-
const nearTextBefore = fullText.slice(Math.max(0, position - 500), position);
|
|
75
|
-
|
|
76
|
-
// Count unclosed deletion markers
|
|
77
|
-
const delOpens = (nearTextBefore.match(/\{--/g) || []).length;
|
|
78
|
-
const delCloses = (nearTextBefore.match(/--\}/g) || []).length;
|
|
79
|
-
if (delOpens > delCloses) return true; // Nested inside deletion
|
|
80
|
-
|
|
81
|
-
// Count unclosed insertion markers
|
|
82
|
-
const insOpens = (nearTextBefore.match(/\{\+\+/g) || []).length;
|
|
83
|
-
const insCloses = (nearTextBefore.match(/\+\+\}/g) || []).length;
|
|
84
|
-
if (insOpens > insCloses) return true; // Nested inside insertion
|
|
85
|
-
|
|
86
|
-
// Heuristics for figure captions and other false positives:
|
|
87
|
-
|
|
88
|
-
// Contains image/figure path patterns
|
|
89
|
-
if (/\(figures?\/|\(images?\/|\.png|\.jpg|\.jpeg|\.gif|\.svg|\.pdf/i.test(commentContent)) return true;
|
|
90
|
-
|
|
91
|
-
// Contains markdown figure reference syntax
|
|
92
|
-
if (/\{#fig:|!\[/.test(commentContent)) return true;
|
|
93
|
-
|
|
94
|
-
// Real comments typically have "Author:" at start. Accept hyphens, apostrophes,
|
|
95
|
-
// periods, and Unicode letters so names like "Jens-Christian Svenning" or
|
|
96
|
-
// "Camilla T Colding-Jørgensen" don't get rejected. See gcol33/docrev#1.
|
|
97
|
-
const hasAuthorPrefix = /^[\p{L}][\p{L}\s\-'.]{0,30}:\s/u.test(commentContent.trim());
|
|
98
|
-
const hasResolvedMark = /^[✓✔]\s/.test(commentContent.trim());
|
|
99
|
-
|
|
100
|
-
// Contains URL patterns (likely a link, not a comment) — only filter when
|
|
101
|
-
// there is no real author prefix, since reviewers legitimately cite URLs/DOIs.
|
|
102
|
-
if (!hasAuthorPrefix && /https?:\/\/|www\./i.test(commentContent) && commentContent.length < 150) return true;
|
|
103
|
-
|
|
104
|
-
// Looks like code (contains programming patterns)
|
|
105
|
-
if (/function\s*\(|=>|import\s+|export\s+|const\s+|let\s+|var\s+/.test(commentContent)) return true;
|
|
106
|
-
|
|
107
|
-
// Very long without clear author pattern (likely caption, not comment)
|
|
108
|
-
if (!hasAuthorPrefix && !hasResolvedMark && commentContent.length > MAX_COMMENT_CONTENT_LENGTH) return true;
|
|
109
|
-
|
|
110
|
-
// Looks like a figure caption (starts with "Fig" or contains typical caption words)
|
|
111
|
-
if (/^(Fig\.?|Figure|Table|Sankey|Diagram|Proportion|Distribution|Map|Chart|Graph|Plot|Panel)/i.test(commentContent.trim())) {
|
|
112
|
-
return true;
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
// Contains LaTeX-like patterns (likely equation, not comment)
|
|
116
|
-
if (/\\[a-z]+\{|\\frac|\\sum|\\int|\\begin\{/.test(commentContent)) return true;
|
|
117
|
-
|
|
118
|
-
// Looks like BibTeX entry (not a comment)
|
|
119
|
-
if (/@article\{|@book\{|@inproceedings\{/i.test(commentContent)) return true;
|
|
120
|
-
|
|
121
|
-
return false;
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
// Combined pattern for any track change (not comments)
|
|
125
|
-
const TRACK_CHANGE_PATTERN = /(\{\+\+.+?\+\+\}|\{--.+?--\}|\{~~.+?~>.+?~~\})/gs;
|
|
126
|
-
|
|
127
|
-
// =============================================================================
|
|
128
|
-
// Public API
|
|
129
|
-
// =============================================================================
|
|
130
|
-
|
|
131
|
-
/**
|
|
132
|
-
* Parse all annotations from text
|
|
133
|
-
* @param text - Markdown text containing CriticMarkup annotations
|
|
134
|
-
* @returns Array of parsed annotations sorted by position
|
|
135
|
-
* @throws TypeError If text is not a string
|
|
136
|
-
*/
|
|
137
|
-
export function parseAnnotations(text: string): Annotation[] {
|
|
138
|
-
if (typeof text !== 'string') {
|
|
139
|
-
throw new TypeError(`text must be a string, got ${typeof text}`);
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
const annotations: Annotation[] = [];
|
|
143
|
-
|
|
144
|
-
// Build line number lookup
|
|
145
|
-
const lines = text.split('\n');
|
|
146
|
-
let pos = 0;
|
|
147
|
-
const lineStarts = lines.map((line) => {
|
|
148
|
-
const start = pos;
|
|
149
|
-
pos += line.length + 1;
|
|
150
|
-
return start;
|
|
151
|
-
});
|
|
152
|
-
|
|
153
|
-
function getLine(position: number): number {
|
|
154
|
-
for (let i = 0; i < lineStarts.length; i++) {
|
|
155
|
-
const start = lineStarts[i];
|
|
156
|
-
if (start !== undefined && start > position) return i;
|
|
157
|
-
}
|
|
158
|
-
return lineStarts.length;
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
function getContext(position: number, length: number): { before: string; after: string } {
|
|
162
|
-
const start = Math.max(0, position - CONTEXT_SNIPPET_SIZE);
|
|
163
|
-
const end = Math.min(text.length, position + length + CONTEXT_SNIPPET_SIZE);
|
|
164
|
-
const before = text.slice(start, position).split('\n').pop() || '';
|
|
165
|
-
const after = text.slice(position + length, end).split('\n')[0] || '';
|
|
166
|
-
return { before, after };
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
// Parse insertions
|
|
170
|
-
for (const match of text.matchAll(PATTERNS.insert)) {
|
|
171
|
-
if (match.index === undefined) continue;
|
|
172
|
-
const ctx = getContext(match.index, match[0].length);
|
|
173
|
-
annotations.push({
|
|
174
|
-
type: 'insert',
|
|
175
|
-
match: match[0],
|
|
176
|
-
content: match[1] || '',
|
|
177
|
-
position: match.index,
|
|
178
|
-
line: getLine(match.index),
|
|
179
|
-
...ctx,
|
|
180
|
-
});
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
// Parse deletions
|
|
184
|
-
for (const match of text.matchAll(PATTERNS.delete)) {
|
|
185
|
-
if (match.index === undefined) continue;
|
|
186
|
-
const ctx = getContext(match.index, match[0].length);
|
|
187
|
-
annotations.push({
|
|
188
|
-
type: 'delete',
|
|
189
|
-
match: match[0],
|
|
190
|
-
content: match[1] || '',
|
|
191
|
-
position: match.index,
|
|
192
|
-
line: getLine(match.index),
|
|
193
|
-
...ctx,
|
|
194
|
-
});
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
// Parse substitutions
|
|
198
|
-
for (const match of text.matchAll(PATTERNS.substitute)) {
|
|
199
|
-
if (match.index === undefined) continue;
|
|
200
|
-
const ctx = getContext(match.index, match[0].length);
|
|
201
|
-
annotations.push({
|
|
202
|
-
type: 'substitute',
|
|
203
|
-
match: match[0],
|
|
204
|
-
content: match[1] || '',
|
|
205
|
-
replacement: match[2] || '',
|
|
206
|
-
position: match.index,
|
|
207
|
-
line: getLine(match.index),
|
|
208
|
-
...ctx,
|
|
209
|
-
});
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
// Parse comments (with false positive filtering)
|
|
213
|
-
for (const match of text.matchAll(PATTERNS.comment)) {
|
|
214
|
-
if (match.index === undefined) continue;
|
|
215
|
-
// Skip false positives (figure captions, nested annotations, etc.)
|
|
216
|
-
const commentContent = match[1] || '';
|
|
217
|
-
if (isCommentFalsePositive(commentContent, text, match.index)) {
|
|
218
|
-
continue;
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
const ctx = getContext(match.index, match[0].length);
|
|
222
|
-
let commentText = commentContent;
|
|
223
|
-
let author = '';
|
|
224
|
-
|
|
225
|
-
// Extract author if present (format: "Author: comment")
|
|
226
|
-
const colonIdx = commentText.indexOf(':');
|
|
227
|
-
if (colonIdx > 0 && colonIdx < MAX_AUTHOR_LENGTH) {
|
|
228
|
-
author = commentText.slice(0, colonIdx).trim();
|
|
229
|
-
commentText = commentText.slice(colonIdx + 1).trim();
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
annotations.push({
|
|
233
|
-
type: 'comment',
|
|
234
|
-
match: match[0],
|
|
235
|
-
content: commentText,
|
|
236
|
-
author,
|
|
237
|
-
position: match.index,
|
|
238
|
-
line: getLine(match.index),
|
|
239
|
-
...ctx,
|
|
240
|
-
});
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
// Sort by position
|
|
244
|
-
annotations.sort((a, b) => a.position - b.position);
|
|
245
|
-
return annotations;
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
/**
|
|
249
|
-
* Strip annotations from text, applying changes
|
|
250
|
-
* Handles nested annotations by iterating until stable
|
|
251
|
-
* @param text - Markdown text with CriticMarkup annotations
|
|
252
|
-
* @param options - Strip options
|
|
253
|
-
* @returns Clean text with annotations applied/removed
|
|
254
|
-
* @throws TypeError If text is not a string
|
|
255
|
-
*/
|
|
256
|
-
export function stripAnnotations(text: string, options: StripOptions = {}): string {
|
|
257
|
-
if (typeof text !== 'string') {
|
|
258
|
-
throw new TypeError(`text must be a string, got ${typeof text}`);
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
const { keepComments = false } = options;
|
|
262
|
-
|
|
263
|
-
// Iterate until no more changes (handles nested annotations)
|
|
264
|
-
let prev: string;
|
|
265
|
-
let iterations = 0;
|
|
266
|
-
|
|
267
|
-
do {
|
|
268
|
-
prev = text;
|
|
269
|
-
|
|
270
|
-
// Apply substitutions: {~~old~>new~~} → new
|
|
271
|
-
text = text.replace(PATTERNS.substitute, '$2');
|
|
272
|
-
|
|
273
|
-
// Apply insertions: {++text++} → text
|
|
274
|
-
text = text.replace(PATTERNS.insert, '$1');
|
|
275
|
-
|
|
276
|
-
// Apply deletions: {--text--} → nothing
|
|
277
|
-
// Don't touch surrounding whitespace - just remove the annotation
|
|
278
|
-
text = text.replace(PATTERNS.delete, '');
|
|
279
|
-
|
|
280
|
-
// Remove highlights: {==text==} → text
|
|
281
|
-
text = text.replace(PATTERNS.highlight, '$1');
|
|
282
|
-
|
|
283
|
-
// Remove comments unless keeping
|
|
284
|
-
if (!keepComments) {
|
|
285
|
-
text = text.replace(PATTERNS.comment, '');
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
// Strip pandoc highlight spans: [text]{.mark} → text.
|
|
289
|
-
// When `keepComments=true`, preserve `[anchor]{.mark}` that is the
|
|
290
|
-
// anchor of a kept `{>>...<<}` comment. The dual-build flow runs
|
|
291
|
-
// stripAnnotations() before prepareMarkdownWithMarkers(), and stripping
|
|
292
|
-
// the anchor span here would leave the marker generator with no anchor
|
|
293
|
-
// text — collapsing every multi-word anchor to a single fallback word
|
|
294
|
-
// in the rebuilt docx.
|
|
295
|
-
text = keepComments
|
|
296
|
-
? text.replace(/(?<!<<\}\s{0,3})\[([^\]]*)\]\{\.mark\}/g, '$1')
|
|
297
|
-
: text.replace(/\[([^\]]*)\]\{\.mark\}/g, '$1');
|
|
298
|
-
|
|
299
|
-
// Clean up partial/orphaned markers within the loop
|
|
300
|
-
// This handles cases where nested annotations leave behind fragments
|
|
301
|
-
|
|
302
|
-
// Empty annotations (from nested stripping)
|
|
303
|
-
text = text.replace(/\{----\}/g, '');
|
|
304
|
-
text = text.replace(/\{\+\+\+\+\}/g, '');
|
|
305
|
-
text = text.replace(/\{--\s*--\}/g, '');
|
|
306
|
-
text = text.replace(/\{\+\+\s*\+\+\}/g, '');
|
|
307
|
-
|
|
308
|
-
// Orphaned substitution fragments: ~>text~~} or {~~text (no proper pairs)
|
|
309
|
-
text = text.replace(/~>[^{]*?~~\}/g, '');
|
|
310
|
-
text = text.replace(/\{~~[^~}]*$/gm, '');
|
|
311
|
-
|
|
312
|
-
// Handle malformed substitution from nested: {~~{~~old → just strip the {~~
|
|
313
|
-
text = text.replace(/\{~~\{~~/g, '{~~');
|
|
314
|
-
text = text.replace(/~~\}~~\}/g, '~~}');
|
|
315
|
-
|
|
316
|
-
iterations++;
|
|
317
|
-
} while (text !== prev && iterations < MAX_STRIP_ITERATIONS);
|
|
318
|
-
|
|
319
|
-
// Final cleanup of any remaining orphaned markers
|
|
320
|
-
// Orphaned closing markers
|
|
321
|
-
text = text.replace(/--\}(?:--\})+/g, '');
|
|
322
|
-
text = text.replace(/\+\+\}(?:\+\+\})+/g, '');
|
|
323
|
-
text = text.replace(/~~\}(?:~~\})+/g, '');
|
|
324
|
-
text = text.replace(/--\}/g, '');
|
|
325
|
-
text = text.replace(/\+\+\}/g, '');
|
|
326
|
-
text = text.replace(/~~\}/g, '');
|
|
327
|
-
|
|
328
|
-
// Orphaned opening markers
|
|
329
|
-
text = text.replace(/\{--(?:\{--)+/g, '');
|
|
330
|
-
text = text.replace(/\{\+\+(?:\{\+\+)+/g, '');
|
|
331
|
-
text = text.replace(/\{~~(?:\{~~)+/g, '');
|
|
332
|
-
text = text.replace(/\{--/g, '');
|
|
333
|
-
text = text.replace(/\{\+\+/g, '');
|
|
334
|
-
text = text.replace(/\{~~/g, '');
|
|
335
|
-
text = text.replace(/~>/g, '');
|
|
336
|
-
|
|
337
|
-
// Remove orphan [ from stripped {.mark} spans where the closing ]{.mark}
|
|
338
|
-
// was inside a comment. A [ is orphan if no `]` follows before end of line.
|
|
339
|
-
// We deliberately allow other `[` between the candidate and the matching `]`
|
|
340
|
-
// — otherwise nested forms like `[[0..9]]{.mark}` would have their outer
|
|
341
|
-
// `[` stripped because the lookahead saw the inner `[` as a barrier.
|
|
342
|
-
text = text.replace(/\[(?![^\]\n]*\])/g, '');
|
|
343
|
-
|
|
344
|
-
return text;
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
/**
|
|
348
|
-
* Collapse multiple spaces to single space, preserving table formatting
|
|
349
|
-
* Useful for cleaning up messy Word imports
|
|
350
|
-
* @param text - Text to normalize
|
|
351
|
-
* @returns Text with multiple spaces collapsed to single spaces
|
|
352
|
-
* @throws TypeError If text is not a string
|
|
353
|
-
*/
|
|
354
|
-
export function stripToSingleSpace(text: string): string {
|
|
355
|
-
if (typeof text !== 'string') {
|
|
356
|
-
throw new TypeError(`text must be a string, got ${typeof text}`);
|
|
357
|
-
}
|
|
358
|
-
|
|
359
|
-
const lines = text.split('\n');
|
|
360
|
-
let inTable = false;
|
|
361
|
-
|
|
362
|
-
// Helper to check if a line looks like table content
|
|
363
|
-
const looksLikeTableRow = (ln: string): boolean => {
|
|
364
|
-
const trimmed = ln.trim();
|
|
365
|
-
if (!trimmed) return false;
|
|
366
|
-
// Has multiple consecutive spaces (column spacing)
|
|
367
|
-
// OR italicized category header with trailing spaces
|
|
368
|
-
return /\S\s{2,}\S/.test(trimmed) || (/^\*[^*]+\*\s*$/.test(trimmed) && /\s{2,}$/.test(ln));
|
|
369
|
-
};
|
|
370
|
-
|
|
371
|
-
for (let i = 0; i < lines.length; i++) {
|
|
372
|
-
const line = lines[i];
|
|
373
|
-
if (!line) continue;
|
|
374
|
-
|
|
375
|
-
// Detect table separator line
|
|
376
|
-
const isTableSeparator = /^\|?[\s-]*[-]{3,}[\s|:-]+[-]{3,}/.test(line) ||
|
|
377
|
-
/^[-]{3,}\s{2,}[-]{3,}/.test(line);
|
|
378
|
-
|
|
379
|
-
if (isTableSeparator) {
|
|
380
|
-
inTable = true;
|
|
381
|
-
continue;
|
|
382
|
-
}
|
|
383
|
-
|
|
384
|
-
// Check if we're exiting the table
|
|
385
|
-
if (inTable && line.trim() === '') {
|
|
386
|
-
let nextContentLine = '';
|
|
387
|
-
for (let j = i + 1; j < lines.length && j < i + 5; j++) {
|
|
388
|
-
const nextLine = lines[j];
|
|
389
|
-
if (nextLine && nextLine.trim() !== '') {
|
|
390
|
-
nextContentLine = nextLine;
|
|
391
|
-
break;
|
|
392
|
-
}
|
|
393
|
-
}
|
|
394
|
-
if (!looksLikeTableRow(nextContentLine) && !/^[-]{3,}/.test(nextContentLine.trim())) {
|
|
395
|
-
inTable = false;
|
|
396
|
-
}
|
|
397
|
-
continue;
|
|
398
|
-
}
|
|
399
|
-
|
|
400
|
-
// Only collapse spaces outside tables
|
|
401
|
-
if (!inTable) {
|
|
402
|
-
lines[i] = line.replace(/ +/g, ' ');
|
|
403
|
-
}
|
|
404
|
-
}
|
|
405
|
-
|
|
406
|
-
return lines.join('\n');
|
|
407
|
-
}
|
|
408
|
-
|
|
409
|
-
/**
|
|
410
|
-
* Check if text contains any CriticMarkup annotations
|
|
411
|
-
* @param text - Text to check
|
|
412
|
-
* @returns True if text contains any annotations
|
|
413
|
-
* @throws TypeError If text is not a string
|
|
414
|
-
*/
|
|
415
|
-
export function hasAnnotations(text: string): boolean {
|
|
416
|
-
if (typeof text !== 'string') {
|
|
417
|
-
throw new TypeError(`text must be a string, got ${typeof text}`);
|
|
418
|
-
}
|
|
419
|
-
|
|
420
|
-
return PATTERNS.insert.test(text) ||
|
|
421
|
-
PATTERNS.delete.test(text) ||
|
|
422
|
-
PATTERNS.substitute.test(text) ||
|
|
423
|
-
PATTERNS.comment.test(text) ||
|
|
424
|
-
PATTERNS.highlight.test(text);
|
|
425
|
-
}
|
|
426
|
-
|
|
427
|
-
/**
|
|
428
|
-
* Apply a decision to a single annotation (accept or reject)
|
|
429
|
-
* @param text - Document text containing the annotation
|
|
430
|
-
* @param annotation - Annotation object from parseAnnotations()
|
|
431
|
-
* @param accept - True to accept the change, false to reject
|
|
432
|
-
* @returns Updated text with the decision applied
|
|
433
|
-
* @throws TypeError If text is not a string or annotation is invalid
|
|
434
|
-
*/
|
|
435
|
-
export function applyDecision(text: string, annotation: Annotation, accept: boolean): string {
|
|
436
|
-
if (typeof text !== 'string') {
|
|
437
|
-
throw new TypeError(`text must be a string, got ${typeof text}`);
|
|
438
|
-
}
|
|
439
|
-
if (!annotation || typeof annotation.type !== 'string' || typeof annotation.match !== 'string') {
|
|
440
|
-
throw new TypeError('annotation must have type and match properties');
|
|
441
|
-
}
|
|
442
|
-
let replacement: string;
|
|
443
|
-
|
|
444
|
-
// Extract any comments embedded in the annotation content
|
|
445
|
-
// These should be preserved when accepting deletions or rejecting insertions
|
|
446
|
-
const commentPattern = /\{>>[\s\S]*?<<\}/g;
|
|
447
|
-
const embeddedComments = (annotation.match || '').match(commentPattern) || [];
|
|
448
|
-
|
|
449
|
-
switch (annotation.type) {
|
|
450
|
-
case 'insert':
|
|
451
|
-
if (accept) {
|
|
452
|
-
replacement = annotation.content;
|
|
453
|
-
} else {
|
|
454
|
-
// Rejecting insertion - preserve any comments that were inside
|
|
455
|
-
replacement = embeddedComments.length > 0 ? embeddedComments.join('') : '';
|
|
456
|
-
}
|
|
457
|
-
break;
|
|
458
|
-
case 'delete':
|
|
459
|
-
if (accept) {
|
|
460
|
-
// Accepting deletion - preserve any comments by placing them before
|
|
461
|
-
replacement = embeddedComments.length > 0 ? embeddedComments.join('') : '';
|
|
462
|
-
} else {
|
|
463
|
-
replacement = annotation.content;
|
|
464
|
-
}
|
|
465
|
-
break;
|
|
466
|
-
case 'substitute':
|
|
467
|
-
if (accept) {
|
|
468
|
-
// For substitutions, check if comments are in the old text being replaced
|
|
469
|
-
const oldTextComments = (annotation.content || '').match(commentPattern) || [];
|
|
470
|
-
replacement = annotation.replacement || '';
|
|
471
|
-
if (oldTextComments.length > 0) {
|
|
472
|
-
// Prepend comments that were in the old text
|
|
473
|
-
replacement = oldTextComments.join('') + replacement;
|
|
474
|
-
}
|
|
475
|
-
} else {
|
|
476
|
-
replacement = annotation.content;
|
|
477
|
-
}
|
|
478
|
-
break;
|
|
479
|
-
default:
|
|
480
|
-
return text;
|
|
481
|
-
}
|
|
482
|
-
|
|
483
|
-
return text.replace(annotation.match, replacement);
|
|
484
|
-
}
|
|
485
|
-
|
|
486
|
-
/**
|
|
487
|
-
* Get track changes only (no comments)
|
|
488
|
-
* @param text - Markdown text with CriticMarkup annotations
|
|
489
|
-
* @returns Array of insert/delete/substitute annotations
|
|
490
|
-
* @throws TypeError If text is not a string
|
|
491
|
-
*/
|
|
492
|
-
export function getTrackChanges(text: string): Annotation[] {
|
|
493
|
-
// Input validation delegated to parseAnnotations
|
|
494
|
-
return parseAnnotations(text).filter((a) => a.type !== 'comment');
|
|
495
|
-
}
|
|
496
|
-
|
|
497
|
-
/**
|
|
498
|
-
* Get comments only
|
|
499
|
-
* @param text - Markdown text with CriticMarkup annotations
|
|
500
|
-
* @param options - Filter options
|
|
501
|
-
* @returns Array of comment annotations
|
|
502
|
-
* @throws TypeError If text is not a string
|
|
503
|
-
*/
|
|
504
|
-
export function getComments(text: string, options: CommentFilterOptions = {}): Annotation[] {
|
|
505
|
-
// Input validation delegated to parseAnnotations
|
|
506
|
-
const { pendingOnly = false, resolvedOnly = false } = options;
|
|
507
|
-
let comments = parseAnnotations(text).filter((a) => a.type === 'comment');
|
|
508
|
-
|
|
509
|
-
// Check for resolved status marker at end of comment
|
|
510
|
-
comments = comments.map((c) => {
|
|
511
|
-
const resolved = c.content.endsWith('[RESOLVED]') || c.content.endsWith('[✓]');
|
|
512
|
-
return {
|
|
513
|
-
...c,
|
|
514
|
-
resolved,
|
|
515
|
-
content: resolved
|
|
516
|
-
? c.content.replace(/\s*\[(RESOLVED|✓)\]$/, '').trim()
|
|
517
|
-
: c.content,
|
|
518
|
-
};
|
|
519
|
-
});
|
|
520
|
-
|
|
521
|
-
if (pendingOnly) {
|
|
522
|
-
comments = comments.filter((c) => !c.resolved);
|
|
523
|
-
}
|
|
524
|
-
if (resolvedOnly) {
|
|
525
|
-
comments = comments.filter((c) => c.resolved);
|
|
526
|
-
}
|
|
527
|
-
|
|
528
|
-
return comments;
|
|
529
|
-
}
|
|
530
|
-
|
|
531
|
-
/**
|
|
532
|
-
* Mark a comment as resolved or pending
|
|
533
|
-
* @param text - Document text containing the comment
|
|
534
|
-
* @param comment - Comment annotation object from getComments()
|
|
535
|
-
* @param resolved - True to mark resolved, false to mark pending
|
|
536
|
-
* @returns Updated text with status marker applied
|
|
537
|
-
* @throws TypeError If text is not a string or comment is invalid
|
|
538
|
-
*/
|
|
539
|
-
export function setCommentStatus(text: string, comment: Annotation, resolved: boolean): string {
|
|
540
|
-
if (typeof text !== 'string') {
|
|
541
|
-
throw new TypeError(`text must be a string, got ${typeof text}`);
|
|
542
|
-
}
|
|
543
|
-
if (!comment || typeof comment.match !== 'string') {
|
|
544
|
-
throw new TypeError('comment must have a match property');
|
|
545
|
-
}
|
|
546
|
-
// Find the comment in the text
|
|
547
|
-
const originalMatch = comment.match;
|
|
548
|
-
|
|
549
|
-
if (resolved) {
|
|
550
|
-
// Add [RESOLVED] marker before the closing <<
|
|
551
|
-
const newMatch = originalMatch.replace(/<<\}$/, ' [RESOLVED]<<}');
|
|
552
|
-
return text.replace(originalMatch, newMatch);
|
|
553
|
-
} else {
|
|
554
|
-
// Remove resolved markers
|
|
555
|
-
const newMatch = originalMatch.replace(/\s*\[(RESOLVED|✓)\]<<\}$/, '<<}');
|
|
556
|
-
return text.replace(originalMatch, newMatch);
|
|
557
|
-
}
|
|
558
|
-
}
|
|
559
|
-
|
|
560
|
-
/**
|
|
561
|
-
* Count annotations by type
|
|
562
|
-
* @param text - Markdown text with CriticMarkup annotations
|
|
563
|
-
* @returns Counts by annotation type
|
|
564
|
-
* @throws TypeError If text is not a string
|
|
565
|
-
*/
|
|
566
|
-
export function countAnnotations(text: string): AnnotationCounts {
|
|
567
|
-
// Input validation delegated to parseAnnotations
|
|
568
|
-
const annotations = parseAnnotations(text);
|
|
569
|
-
const counts: AnnotationCounts = { inserts: 0, deletes: 0, substitutes: 0, comments: 0, total: 0 };
|
|
570
|
-
|
|
571
|
-
for (const a of annotations) {
|
|
572
|
-
counts.total++;
|
|
573
|
-
switch (a.type) {
|
|
574
|
-
case 'insert':
|
|
575
|
-
counts.inserts++;
|
|
576
|
-
break;
|
|
577
|
-
case 'delete':
|
|
578
|
-
counts.deletes++;
|
|
579
|
-
break;
|
|
580
|
-
case 'substitute':
|
|
581
|
-
counts.substitutes++;
|
|
582
|
-
break;
|
|
583
|
-
case 'comment':
|
|
584
|
-
counts.comments++;
|
|
585
|
-
break;
|
|
586
|
-
}
|
|
587
|
-
}
|
|
588
|
-
|
|
589
|
-
return counts;
|
|
590
|
-
}
|
|
591
|
-
|
|
592
|
-
/**
|
|
593
|
-
* Clean up orphaned/malformed CriticMarkup markers
|
|
594
|
-
* This can happen when track changes span across comment boundaries
|
|
595
|
-
* @param text - Document text with potentially malformed markers
|
|
596
|
-
* @returns Cleaned text with orphaned markers removed
|
|
597
|
-
* @throws TypeError If text is not a string
|
|
598
|
-
*/
|
|
599
|
-
export function cleanupOrphanedMarkers(text: string): string {
|
|
600
|
-
if (typeof text !== 'string') {
|
|
601
|
-
throw new TypeError(`text must be a string, got ${typeof text}`);
|
|
602
|
-
}
|
|
603
|
-
let result = text;
|
|
604
|
-
|
|
605
|
-
// Remove orphaned insertion end markers (++} not preceded by {++)
|
|
606
|
-
// These occur when an insertion's start was inside something that got deleted/replaced
|
|
607
|
-
result = result.replace(/(?<!\{\+\+[^}]*)\+\+\}/g, '');
|
|
608
|
-
|
|
609
|
-
// Remove orphaned deletion end markers (--} not preceded by {--)
|
|
610
|
-
result = result.replace(/(?<!\{--[^}]*)--\}/g, '');
|
|
611
|
-
|
|
612
|
-
// Remove orphaned substitution end markers (~~} not preceded by {~~)
|
|
613
|
-
result = result.replace(/(?<!\{~~[^}]*)~~\}/g, '');
|
|
614
|
-
|
|
615
|
-
// Fix unclosed insertions: {++ without matching ++}
|
|
616
|
-
// Find {++ and check if there's a matching ++} before the next { marker
|
|
617
|
-
result = result.replace(/\{\+\+([^+]*?)(?=\{[+\-~>]|\{>>|$)/g, (match, content) => {
|
|
618
|
-
// If content has no ++}, it's unclosed - just keep the content
|
|
619
|
-
if (!content.includes('++}')) {
|
|
620
|
-
return content;
|
|
621
|
-
}
|
|
622
|
-
return match;
|
|
623
|
-
});
|
|
624
|
-
|
|
625
|
-
// Fix unclosed deletions: {-- without matching --}
|
|
626
|
-
result = result.replace(/\{--([^-]*?)(?=\{[+\-~>]|\{>>|$)/g, (match, content) => {
|
|
627
|
-
if (!content.includes('--}')) {
|
|
628
|
-
return content;
|
|
629
|
-
}
|
|
630
|
-
return match;
|
|
631
|
-
});
|
|
632
|
-
|
|
633
|
-
// Fix unclosed substitutions: {~~ without matching ~~}
|
|
634
|
-
// This is trickier because we need both ~> and ~~}
|
|
635
|
-
result = result.replace(/\{~~([^~]*?)~>([^~]*?)(?=\{[+\-~>]|\{>>|$)/g, (match, old, newText) => {
|
|
636
|
-
if (!match.includes('~~}')) {
|
|
637
|
-
// Unclosed substitution - keep the new text
|
|
638
|
-
return newText;
|
|
639
|
-
}
|
|
640
|
-
return match;
|
|
641
|
-
});
|
|
642
|
-
|
|
643
|
-
return result;
|
|
644
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* CriticMarkup annotation parsing and manipulation
|
|
3
|
+
*
|
|
4
|
+
* Syntax:
|
|
5
|
+
* {++inserted text++} - Insertions
|
|
6
|
+
* {--deleted text--} - Deletions
|
|
7
|
+
* {~~old~>new~~} - Substitutions
|
|
8
|
+
* {>>Author: comment<<} - Comments
|
|
9
|
+
* {==text==} - Highlights
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import type { Annotation, AnnotationCounts, StripOptions, CommentFilterOptions } from './types.js';
|
|
13
|
+
|
|
14
|
+
// =============================================================================
|
|
15
|
+
// Constants
|
|
16
|
+
// =============================================================================
|
|
17
|
+
|
|
18
|
+
/** Window size for context lookup (characters before/after position) */
|
|
19
|
+
const CONTEXT_WINDOW_SIZE = 2000;
|
|
20
|
+
|
|
21
|
+
/** Characters of context to include in annotation results */
|
|
22
|
+
const CONTEXT_SNIPPET_SIZE = 50;
|
|
23
|
+
|
|
24
|
+
/** Maximum iterations for nested annotation stripping (safety limit) */
|
|
25
|
+
const MAX_STRIP_ITERATIONS = 20;
|
|
26
|
+
|
|
27
|
+
/** Maximum author name length in comments */
|
|
28
|
+
const MAX_AUTHOR_LENGTH = 30;
|
|
29
|
+
|
|
30
|
+
/** Maximum content length before heuristic assumes it's not a comment */
|
|
31
|
+
const MAX_COMMENT_CONTENT_LENGTH = 200;
|
|
32
|
+
|
|
33
|
+
// =============================================================================
|
|
34
|
+
// Patterns
|
|
35
|
+
// =============================================================================
|
|
36
|
+
|
|
37
|
+
// Patterns for each annotation type
|
|
38
|
+
const PATTERNS = {
|
|
39
|
+
insert: /\{\+\+(.+?)\+\+\}/gs,
|
|
40
|
+
delete: /\{--(.+?)--\}/gs,
|
|
41
|
+
substitute: /\{~~(.+?)~>(.+?)~~\}/gs,
|
|
42
|
+
comment: /\{>>(.+?)<<\}/gs,
|
|
43
|
+
highlight: /\{==(.+?)==\}/gs,
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Check if a potential comment is actually a false positive
|
|
48
|
+
* (e.g., figure caption, nested inside other annotation, code block, etc.)
|
|
49
|
+
* @param commentContent - The content inside {>>...<<}
|
|
50
|
+
* @param fullText - The full document text
|
|
51
|
+
* @param position - Position of the comment in the text
|
|
52
|
+
* @returns true if this is a false positive (not a real comment)
|
|
53
|
+
*/
|
|
54
|
+
function isCommentFalsePositive(commentContent: string, fullText: string, position: number): boolean {
|
|
55
|
+
// Check if inside a code block (fenced or indented)
|
|
56
|
+
const textBefore = fullText.slice(Math.max(0, position - CONTEXT_WINDOW_SIZE), position);
|
|
57
|
+
const textAfter = fullText.slice(position, Math.min(fullText.length, position + CONTEXT_WINDOW_SIZE));
|
|
58
|
+
|
|
59
|
+
// Count unclosed fenced code blocks (``` or ~~~)
|
|
60
|
+
const fenceOpens = (textBefore.match(/^```|^~~~/gm) || []).length;
|
|
61
|
+
const fenceCloses = (textBefore.match(/```$|~~~$/gm) || []).length;
|
|
62
|
+
if (fenceOpens > fenceCloses) return true; // Inside code block
|
|
63
|
+
|
|
64
|
+
// Check if on an indented line (4+ spaces or tab at line start = code)
|
|
65
|
+
const lineStart = textBefore.lastIndexOf('\n') + 1;
|
|
66
|
+
const linePrefix = fullText.slice(lineStart, position);
|
|
67
|
+
if (/^(\t| )/.test(linePrefix)) return true; // Indented code
|
|
68
|
+
|
|
69
|
+
// Check if inside inline code backticks
|
|
70
|
+
const backticksBefore = (linePrefix.match(/`/g) || []).length;
|
|
71
|
+
if (backticksBefore % 2 === 1) return true; // Inside inline code
|
|
72
|
+
|
|
73
|
+
// Check if nested inside a deletion or insertion block
|
|
74
|
+
const nearTextBefore = fullText.slice(Math.max(0, position - 500), position);
|
|
75
|
+
|
|
76
|
+
// Count unclosed deletion markers
|
|
77
|
+
const delOpens = (nearTextBefore.match(/\{--/g) || []).length;
|
|
78
|
+
const delCloses = (nearTextBefore.match(/--\}/g) || []).length;
|
|
79
|
+
if (delOpens > delCloses) return true; // Nested inside deletion
|
|
80
|
+
|
|
81
|
+
// Count unclosed insertion markers
|
|
82
|
+
const insOpens = (nearTextBefore.match(/\{\+\+/g) || []).length;
|
|
83
|
+
const insCloses = (nearTextBefore.match(/\+\+\}/g) || []).length;
|
|
84
|
+
if (insOpens > insCloses) return true; // Nested inside insertion
|
|
85
|
+
|
|
86
|
+
// Heuristics for figure captions and other false positives:
|
|
87
|
+
|
|
88
|
+
// Contains image/figure path patterns
|
|
89
|
+
if (/\(figures?\/|\(images?\/|\.png|\.jpg|\.jpeg|\.gif|\.svg|\.pdf/i.test(commentContent)) return true;
|
|
90
|
+
|
|
91
|
+
// Contains markdown figure reference syntax
|
|
92
|
+
if (/\{#fig:|!\[/.test(commentContent)) return true;
|
|
93
|
+
|
|
94
|
+
// Real comments typically have "Author:" at start. Accept hyphens, apostrophes,
|
|
95
|
+
// periods, and Unicode letters so names like "Jens-Christian Svenning" or
|
|
96
|
+
// "Camilla T Colding-Jørgensen" don't get rejected. See gcol33/docrev#1.
|
|
97
|
+
const hasAuthorPrefix = /^[\p{L}][\p{L}\s\-'.]{0,30}:\s/u.test(commentContent.trim());
|
|
98
|
+
const hasResolvedMark = /^[✓✔]\s/.test(commentContent.trim());
|
|
99
|
+
|
|
100
|
+
// Contains URL patterns (likely a link, not a comment) — only filter when
|
|
101
|
+
// there is no real author prefix, since reviewers legitimately cite URLs/DOIs.
|
|
102
|
+
if (!hasAuthorPrefix && /https?:\/\/|www\./i.test(commentContent) && commentContent.length < 150) return true;
|
|
103
|
+
|
|
104
|
+
// Looks like code (contains programming patterns)
|
|
105
|
+
if (/function\s*\(|=>|import\s+|export\s+|const\s+|let\s+|var\s+/.test(commentContent)) return true;
|
|
106
|
+
|
|
107
|
+
// Very long without clear author pattern (likely caption, not comment)
|
|
108
|
+
if (!hasAuthorPrefix && !hasResolvedMark && commentContent.length > MAX_COMMENT_CONTENT_LENGTH) return true;
|
|
109
|
+
|
|
110
|
+
// Looks like a figure caption (starts with "Fig" or contains typical caption words)
|
|
111
|
+
if (/^(Fig\.?|Figure|Table|Sankey|Diagram|Proportion|Distribution|Map|Chart|Graph|Plot|Panel)/i.test(commentContent.trim())) {
|
|
112
|
+
return true;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Contains LaTeX-like patterns (likely equation, not comment)
|
|
116
|
+
if (/\\[a-z]+\{|\\frac|\\sum|\\int|\\begin\{/.test(commentContent)) return true;
|
|
117
|
+
|
|
118
|
+
// Looks like BibTeX entry (not a comment)
|
|
119
|
+
if (/@article\{|@book\{|@inproceedings\{/i.test(commentContent)) return true;
|
|
120
|
+
|
|
121
|
+
return false;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Combined pattern for any track change (not comments)
|
|
125
|
+
const TRACK_CHANGE_PATTERN = /(\{\+\+.+?\+\+\}|\{--.+?--\}|\{~~.+?~>.+?~~\})/gs;
|
|
126
|
+
|
|
127
|
+
// =============================================================================
|
|
128
|
+
// Public API
|
|
129
|
+
// =============================================================================
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Parse all annotations from text
|
|
133
|
+
* @param text - Markdown text containing CriticMarkup annotations
|
|
134
|
+
* @returns Array of parsed annotations sorted by position
|
|
135
|
+
* @throws TypeError If text is not a string
|
|
136
|
+
*/
|
|
137
|
+
export function parseAnnotations(text: string): Annotation[] {
|
|
138
|
+
if (typeof text !== 'string') {
|
|
139
|
+
throw new TypeError(`text must be a string, got ${typeof text}`);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const annotations: Annotation[] = [];
|
|
143
|
+
|
|
144
|
+
// Build line number lookup
|
|
145
|
+
const lines = text.split('\n');
|
|
146
|
+
let pos = 0;
|
|
147
|
+
const lineStarts = lines.map((line) => {
|
|
148
|
+
const start = pos;
|
|
149
|
+
pos += line.length + 1;
|
|
150
|
+
return start;
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
function getLine(position: number): number {
|
|
154
|
+
for (let i = 0; i < lineStarts.length; i++) {
|
|
155
|
+
const start = lineStarts[i];
|
|
156
|
+
if (start !== undefined && start > position) return i;
|
|
157
|
+
}
|
|
158
|
+
return lineStarts.length;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
function getContext(position: number, length: number): { before: string; after: string } {
|
|
162
|
+
const start = Math.max(0, position - CONTEXT_SNIPPET_SIZE);
|
|
163
|
+
const end = Math.min(text.length, position + length + CONTEXT_SNIPPET_SIZE);
|
|
164
|
+
const before = text.slice(start, position).split('\n').pop() || '';
|
|
165
|
+
const after = text.slice(position + length, end).split('\n')[0] || '';
|
|
166
|
+
return { before, after };
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Parse insertions
|
|
170
|
+
for (const match of text.matchAll(PATTERNS.insert)) {
|
|
171
|
+
if (match.index === undefined) continue;
|
|
172
|
+
const ctx = getContext(match.index, match[0].length);
|
|
173
|
+
annotations.push({
|
|
174
|
+
type: 'insert',
|
|
175
|
+
match: match[0],
|
|
176
|
+
content: match[1] || '',
|
|
177
|
+
position: match.index,
|
|
178
|
+
line: getLine(match.index),
|
|
179
|
+
...ctx,
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Parse deletions
|
|
184
|
+
for (const match of text.matchAll(PATTERNS.delete)) {
|
|
185
|
+
if (match.index === undefined) continue;
|
|
186
|
+
const ctx = getContext(match.index, match[0].length);
|
|
187
|
+
annotations.push({
|
|
188
|
+
type: 'delete',
|
|
189
|
+
match: match[0],
|
|
190
|
+
content: match[1] || '',
|
|
191
|
+
position: match.index,
|
|
192
|
+
line: getLine(match.index),
|
|
193
|
+
...ctx,
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// Parse substitutions
|
|
198
|
+
for (const match of text.matchAll(PATTERNS.substitute)) {
|
|
199
|
+
if (match.index === undefined) continue;
|
|
200
|
+
const ctx = getContext(match.index, match[0].length);
|
|
201
|
+
annotations.push({
|
|
202
|
+
type: 'substitute',
|
|
203
|
+
match: match[0],
|
|
204
|
+
content: match[1] || '',
|
|
205
|
+
replacement: match[2] || '',
|
|
206
|
+
position: match.index,
|
|
207
|
+
line: getLine(match.index),
|
|
208
|
+
...ctx,
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// Parse comments (with false positive filtering)
|
|
213
|
+
for (const match of text.matchAll(PATTERNS.comment)) {
|
|
214
|
+
if (match.index === undefined) continue;
|
|
215
|
+
// Skip false positives (figure captions, nested annotations, etc.)
|
|
216
|
+
const commentContent = match[1] || '';
|
|
217
|
+
if (isCommentFalsePositive(commentContent, text, match.index)) {
|
|
218
|
+
continue;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
const ctx = getContext(match.index, match[0].length);
|
|
222
|
+
let commentText = commentContent;
|
|
223
|
+
let author = '';
|
|
224
|
+
|
|
225
|
+
// Extract author if present (format: "Author: comment")
|
|
226
|
+
const colonIdx = commentText.indexOf(':');
|
|
227
|
+
if (colonIdx > 0 && colonIdx < MAX_AUTHOR_LENGTH) {
|
|
228
|
+
author = commentText.slice(0, colonIdx).trim();
|
|
229
|
+
commentText = commentText.slice(colonIdx + 1).trim();
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
annotations.push({
|
|
233
|
+
type: 'comment',
|
|
234
|
+
match: match[0],
|
|
235
|
+
content: commentText,
|
|
236
|
+
author,
|
|
237
|
+
position: match.index,
|
|
238
|
+
line: getLine(match.index),
|
|
239
|
+
...ctx,
|
|
240
|
+
});
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// Sort by position
|
|
244
|
+
annotations.sort((a, b) => a.position - b.position);
|
|
245
|
+
return annotations;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Strip annotations from text, applying changes
|
|
250
|
+
* Handles nested annotations by iterating until stable
|
|
251
|
+
* @param text - Markdown text with CriticMarkup annotations
|
|
252
|
+
* @param options - Strip options
|
|
253
|
+
* @returns Clean text with annotations applied/removed
|
|
254
|
+
* @throws TypeError If text is not a string
|
|
255
|
+
*/
|
|
256
|
+
export function stripAnnotations(text: string, options: StripOptions = {}): string {
|
|
257
|
+
if (typeof text !== 'string') {
|
|
258
|
+
throw new TypeError(`text must be a string, got ${typeof text}`);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
const { keepComments = false } = options;
|
|
262
|
+
|
|
263
|
+
// Iterate until no more changes (handles nested annotations)
|
|
264
|
+
let prev: string;
|
|
265
|
+
let iterations = 0;
|
|
266
|
+
|
|
267
|
+
do {
|
|
268
|
+
prev = text;
|
|
269
|
+
|
|
270
|
+
// Apply substitutions: {~~old~>new~~} → new
|
|
271
|
+
text = text.replace(PATTERNS.substitute, '$2');
|
|
272
|
+
|
|
273
|
+
// Apply insertions: {++text++} → text
|
|
274
|
+
text = text.replace(PATTERNS.insert, '$1');
|
|
275
|
+
|
|
276
|
+
// Apply deletions: {--text--} → nothing
|
|
277
|
+
// Don't touch surrounding whitespace - just remove the annotation
|
|
278
|
+
text = text.replace(PATTERNS.delete, '');
|
|
279
|
+
|
|
280
|
+
// Remove highlights: {==text==} → text
|
|
281
|
+
text = text.replace(PATTERNS.highlight, '$1');
|
|
282
|
+
|
|
283
|
+
// Remove comments unless keeping
|
|
284
|
+
if (!keepComments) {
|
|
285
|
+
text = text.replace(PATTERNS.comment, '');
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// Strip pandoc highlight spans: [text]{.mark} → text.
|
|
289
|
+
// When `keepComments=true`, preserve `[anchor]{.mark}` that is the
|
|
290
|
+
// anchor of a kept `{>>...<<}` comment. The dual-build flow runs
|
|
291
|
+
// stripAnnotations() before prepareMarkdownWithMarkers(), and stripping
|
|
292
|
+
// the anchor span here would leave the marker generator with no anchor
|
|
293
|
+
// text — collapsing every multi-word anchor to a single fallback word
|
|
294
|
+
// in the rebuilt docx.
|
|
295
|
+
text = keepComments
|
|
296
|
+
? text.replace(/(?<!<<\}\s{0,3})\[([^\]]*)\]\{\.mark\}/g, '$1')
|
|
297
|
+
: text.replace(/\[([^\]]*)\]\{\.mark\}/g, '$1');
|
|
298
|
+
|
|
299
|
+
// Clean up partial/orphaned markers within the loop
|
|
300
|
+
// This handles cases where nested annotations leave behind fragments
|
|
301
|
+
|
|
302
|
+
// Empty annotations (from nested stripping)
|
|
303
|
+
text = text.replace(/\{----\}/g, '');
|
|
304
|
+
text = text.replace(/\{\+\+\+\+\}/g, '');
|
|
305
|
+
text = text.replace(/\{--\s*--\}/g, '');
|
|
306
|
+
text = text.replace(/\{\+\+\s*\+\+\}/g, '');
|
|
307
|
+
|
|
308
|
+
// Orphaned substitution fragments: ~>text~~} or {~~text (no proper pairs)
|
|
309
|
+
text = text.replace(/~>[^{]*?~~\}/g, '');
|
|
310
|
+
text = text.replace(/\{~~[^~}]*$/gm, '');
|
|
311
|
+
|
|
312
|
+
// Handle malformed substitution from nested: {~~{~~old → just strip the {~~
|
|
313
|
+
text = text.replace(/\{~~\{~~/g, '{~~');
|
|
314
|
+
text = text.replace(/~~\}~~\}/g, '~~}');
|
|
315
|
+
|
|
316
|
+
iterations++;
|
|
317
|
+
} while (text !== prev && iterations < MAX_STRIP_ITERATIONS);
|
|
318
|
+
|
|
319
|
+
// Final cleanup of any remaining orphaned markers
|
|
320
|
+
// Orphaned closing markers
|
|
321
|
+
text = text.replace(/--\}(?:--\})+/g, '');
|
|
322
|
+
text = text.replace(/\+\+\}(?:\+\+\})+/g, '');
|
|
323
|
+
text = text.replace(/~~\}(?:~~\})+/g, '');
|
|
324
|
+
text = text.replace(/--\}/g, '');
|
|
325
|
+
text = text.replace(/\+\+\}/g, '');
|
|
326
|
+
text = text.replace(/~~\}/g, '');
|
|
327
|
+
|
|
328
|
+
// Orphaned opening markers
|
|
329
|
+
text = text.replace(/\{--(?:\{--)+/g, '');
|
|
330
|
+
text = text.replace(/\{\+\+(?:\{\+\+)+/g, '');
|
|
331
|
+
text = text.replace(/\{~~(?:\{~~)+/g, '');
|
|
332
|
+
text = text.replace(/\{--/g, '');
|
|
333
|
+
text = text.replace(/\{\+\+/g, '');
|
|
334
|
+
text = text.replace(/\{~~/g, '');
|
|
335
|
+
text = text.replace(/~>/g, '');
|
|
336
|
+
|
|
337
|
+
// Remove orphan [ from stripped {.mark} spans where the closing ]{.mark}
|
|
338
|
+
// was inside a comment. A [ is orphan if no `]` follows before end of line.
|
|
339
|
+
// We deliberately allow other `[` between the candidate and the matching `]`
|
|
340
|
+
// — otherwise nested forms like `[[0..9]]{.mark}` would have their outer
|
|
341
|
+
// `[` stripped because the lookahead saw the inner `[` as a barrier.
|
|
342
|
+
text = text.replace(/\[(?![^\]\n]*\])/g, '');
|
|
343
|
+
|
|
344
|
+
return text;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Collapse multiple spaces to single space, preserving table formatting
|
|
349
|
+
* Useful for cleaning up messy Word imports
|
|
350
|
+
* @param text - Text to normalize
|
|
351
|
+
* @returns Text with multiple spaces collapsed to single spaces
|
|
352
|
+
* @throws TypeError If text is not a string
|
|
353
|
+
*/
|
|
354
|
+
export function stripToSingleSpace(text: string): string {
|
|
355
|
+
if (typeof text !== 'string') {
|
|
356
|
+
throw new TypeError(`text must be a string, got ${typeof text}`);
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
const lines = text.split('\n');
|
|
360
|
+
let inTable = false;
|
|
361
|
+
|
|
362
|
+
// Helper to check if a line looks like table content
|
|
363
|
+
const looksLikeTableRow = (ln: string): boolean => {
|
|
364
|
+
const trimmed = ln.trim();
|
|
365
|
+
if (!trimmed) return false;
|
|
366
|
+
// Has multiple consecutive spaces (column spacing)
|
|
367
|
+
// OR italicized category header with trailing spaces
|
|
368
|
+
return /\S\s{2,}\S/.test(trimmed) || (/^\*[^*]+\*\s*$/.test(trimmed) && /\s{2,}$/.test(ln));
|
|
369
|
+
};
|
|
370
|
+
|
|
371
|
+
for (let i = 0; i < lines.length; i++) {
|
|
372
|
+
const line = lines[i];
|
|
373
|
+
if (!line) continue;
|
|
374
|
+
|
|
375
|
+
// Detect table separator line
|
|
376
|
+
const isTableSeparator = /^\|?[\s-]*[-]{3,}[\s|:-]+[-]{3,}/.test(line) ||
|
|
377
|
+
/^[-]{3,}\s{2,}[-]{3,}/.test(line);
|
|
378
|
+
|
|
379
|
+
if (isTableSeparator) {
|
|
380
|
+
inTable = true;
|
|
381
|
+
continue;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// Check if we're exiting the table
|
|
385
|
+
if (inTable && line.trim() === '') {
|
|
386
|
+
let nextContentLine = '';
|
|
387
|
+
for (let j = i + 1; j < lines.length && j < i + 5; j++) {
|
|
388
|
+
const nextLine = lines[j];
|
|
389
|
+
if (nextLine && nextLine.trim() !== '') {
|
|
390
|
+
nextContentLine = nextLine;
|
|
391
|
+
break;
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
if (!looksLikeTableRow(nextContentLine) && !/^[-]{3,}/.test(nextContentLine.trim())) {
|
|
395
|
+
inTable = false;
|
|
396
|
+
}
|
|
397
|
+
continue;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// Only collapse spaces outside tables
|
|
401
|
+
if (!inTable) {
|
|
402
|
+
lines[i] = line.replace(/ +/g, ' ');
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
return lines.join('\n');
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
/**
|
|
410
|
+
* Check if text contains any CriticMarkup annotations
|
|
411
|
+
* @param text - Text to check
|
|
412
|
+
* @returns True if text contains any annotations
|
|
413
|
+
* @throws TypeError If text is not a string
|
|
414
|
+
*/
|
|
415
|
+
export function hasAnnotations(text: string): boolean {
|
|
416
|
+
if (typeof text !== 'string') {
|
|
417
|
+
throw new TypeError(`text must be a string, got ${typeof text}`);
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
return PATTERNS.insert.test(text) ||
|
|
421
|
+
PATTERNS.delete.test(text) ||
|
|
422
|
+
PATTERNS.substitute.test(text) ||
|
|
423
|
+
PATTERNS.comment.test(text) ||
|
|
424
|
+
PATTERNS.highlight.test(text);
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
/**
|
|
428
|
+
* Apply a decision to a single annotation (accept or reject)
|
|
429
|
+
* @param text - Document text containing the annotation
|
|
430
|
+
* @param annotation - Annotation object from parseAnnotations()
|
|
431
|
+
* @param accept - True to accept the change, false to reject
|
|
432
|
+
* @returns Updated text with the decision applied
|
|
433
|
+
* @throws TypeError If text is not a string or annotation is invalid
|
|
434
|
+
*/
|
|
435
|
+
export function applyDecision(text: string, annotation: Annotation, accept: boolean): string {
|
|
436
|
+
if (typeof text !== 'string') {
|
|
437
|
+
throw new TypeError(`text must be a string, got ${typeof text}`);
|
|
438
|
+
}
|
|
439
|
+
if (!annotation || typeof annotation.type !== 'string' || typeof annotation.match !== 'string') {
|
|
440
|
+
throw new TypeError('annotation must have type and match properties');
|
|
441
|
+
}
|
|
442
|
+
let replacement: string;
|
|
443
|
+
|
|
444
|
+
// Extract any comments embedded in the annotation content
|
|
445
|
+
// These should be preserved when accepting deletions or rejecting insertions
|
|
446
|
+
const commentPattern = /\{>>[\s\S]*?<<\}/g;
|
|
447
|
+
const embeddedComments = (annotation.match || '').match(commentPattern) || [];
|
|
448
|
+
|
|
449
|
+
switch (annotation.type) {
|
|
450
|
+
case 'insert':
|
|
451
|
+
if (accept) {
|
|
452
|
+
replacement = annotation.content;
|
|
453
|
+
} else {
|
|
454
|
+
// Rejecting insertion - preserve any comments that were inside
|
|
455
|
+
replacement = embeddedComments.length > 0 ? embeddedComments.join('') : '';
|
|
456
|
+
}
|
|
457
|
+
break;
|
|
458
|
+
case 'delete':
|
|
459
|
+
if (accept) {
|
|
460
|
+
// Accepting deletion - preserve any comments by placing them before
|
|
461
|
+
replacement = embeddedComments.length > 0 ? embeddedComments.join('') : '';
|
|
462
|
+
} else {
|
|
463
|
+
replacement = annotation.content;
|
|
464
|
+
}
|
|
465
|
+
break;
|
|
466
|
+
case 'substitute':
|
|
467
|
+
if (accept) {
|
|
468
|
+
// For substitutions, check if comments are in the old text being replaced
|
|
469
|
+
const oldTextComments = (annotation.content || '').match(commentPattern) || [];
|
|
470
|
+
replacement = annotation.replacement || '';
|
|
471
|
+
if (oldTextComments.length > 0) {
|
|
472
|
+
// Prepend comments that were in the old text
|
|
473
|
+
replacement = oldTextComments.join('') + replacement;
|
|
474
|
+
}
|
|
475
|
+
} else {
|
|
476
|
+
replacement = annotation.content;
|
|
477
|
+
}
|
|
478
|
+
break;
|
|
479
|
+
default:
|
|
480
|
+
return text;
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
return text.replace(annotation.match, replacement);
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
/**
|
|
487
|
+
* Get track changes only (no comments)
|
|
488
|
+
* @param text - Markdown text with CriticMarkup annotations
|
|
489
|
+
* @returns Array of insert/delete/substitute annotations
|
|
490
|
+
* @throws TypeError If text is not a string
|
|
491
|
+
*/
|
|
492
|
+
export function getTrackChanges(text: string): Annotation[] {
|
|
493
|
+
// Input validation delegated to parseAnnotations
|
|
494
|
+
return parseAnnotations(text).filter((a) => a.type !== 'comment');
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
/**
|
|
498
|
+
* Get comments only
|
|
499
|
+
* @param text - Markdown text with CriticMarkup annotations
|
|
500
|
+
* @param options - Filter options
|
|
501
|
+
* @returns Array of comment annotations
|
|
502
|
+
* @throws TypeError If text is not a string
|
|
503
|
+
*/
|
|
504
|
+
export function getComments(text: string, options: CommentFilterOptions = {}): Annotation[] {
|
|
505
|
+
// Input validation delegated to parseAnnotations
|
|
506
|
+
const { pendingOnly = false, resolvedOnly = false } = options;
|
|
507
|
+
let comments = parseAnnotations(text).filter((a) => a.type === 'comment');
|
|
508
|
+
|
|
509
|
+
// Check for resolved status marker at end of comment
|
|
510
|
+
comments = comments.map((c) => {
|
|
511
|
+
const resolved = c.content.endsWith('[RESOLVED]') || c.content.endsWith('[✓]');
|
|
512
|
+
return {
|
|
513
|
+
...c,
|
|
514
|
+
resolved,
|
|
515
|
+
content: resolved
|
|
516
|
+
? c.content.replace(/\s*\[(RESOLVED|✓)\]$/, '').trim()
|
|
517
|
+
: c.content,
|
|
518
|
+
};
|
|
519
|
+
});
|
|
520
|
+
|
|
521
|
+
if (pendingOnly) {
|
|
522
|
+
comments = comments.filter((c) => !c.resolved);
|
|
523
|
+
}
|
|
524
|
+
if (resolvedOnly) {
|
|
525
|
+
comments = comments.filter((c) => c.resolved);
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
return comments;
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
/**
|
|
532
|
+
* Mark a comment as resolved or pending
|
|
533
|
+
* @param text - Document text containing the comment
|
|
534
|
+
* @param comment - Comment annotation object from getComments()
|
|
535
|
+
* @param resolved - True to mark resolved, false to mark pending
|
|
536
|
+
* @returns Updated text with status marker applied
|
|
537
|
+
* @throws TypeError If text is not a string or comment is invalid
|
|
538
|
+
*/
|
|
539
|
+
export function setCommentStatus(text: string, comment: Annotation, resolved: boolean): string {
|
|
540
|
+
if (typeof text !== 'string') {
|
|
541
|
+
throw new TypeError(`text must be a string, got ${typeof text}`);
|
|
542
|
+
}
|
|
543
|
+
if (!comment || typeof comment.match !== 'string') {
|
|
544
|
+
throw new TypeError('comment must have a match property');
|
|
545
|
+
}
|
|
546
|
+
// Find the comment in the text
|
|
547
|
+
const originalMatch = comment.match;
|
|
548
|
+
|
|
549
|
+
if (resolved) {
|
|
550
|
+
// Add [RESOLVED] marker before the closing <<
|
|
551
|
+
const newMatch = originalMatch.replace(/<<\}$/, ' [RESOLVED]<<}');
|
|
552
|
+
return text.replace(originalMatch, newMatch);
|
|
553
|
+
} else {
|
|
554
|
+
// Remove resolved markers
|
|
555
|
+
const newMatch = originalMatch.replace(/\s*\[(RESOLVED|✓)\]<<\}$/, '<<}');
|
|
556
|
+
return text.replace(originalMatch, newMatch);
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
/**
|
|
561
|
+
* Count annotations by type
|
|
562
|
+
* @param text - Markdown text with CriticMarkup annotations
|
|
563
|
+
* @returns Counts by annotation type
|
|
564
|
+
* @throws TypeError If text is not a string
|
|
565
|
+
*/
|
|
566
|
+
export function countAnnotations(text: string): AnnotationCounts {
|
|
567
|
+
// Input validation delegated to parseAnnotations
|
|
568
|
+
const annotations = parseAnnotations(text);
|
|
569
|
+
const counts: AnnotationCounts = { inserts: 0, deletes: 0, substitutes: 0, comments: 0, total: 0 };
|
|
570
|
+
|
|
571
|
+
for (const a of annotations) {
|
|
572
|
+
counts.total++;
|
|
573
|
+
switch (a.type) {
|
|
574
|
+
case 'insert':
|
|
575
|
+
counts.inserts++;
|
|
576
|
+
break;
|
|
577
|
+
case 'delete':
|
|
578
|
+
counts.deletes++;
|
|
579
|
+
break;
|
|
580
|
+
case 'substitute':
|
|
581
|
+
counts.substitutes++;
|
|
582
|
+
break;
|
|
583
|
+
case 'comment':
|
|
584
|
+
counts.comments++;
|
|
585
|
+
break;
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
return counts;
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
/**
|
|
593
|
+
* Clean up orphaned/malformed CriticMarkup markers
|
|
594
|
+
* This can happen when track changes span across comment boundaries
|
|
595
|
+
* @param text - Document text with potentially malformed markers
|
|
596
|
+
* @returns Cleaned text with orphaned markers removed
|
|
597
|
+
* @throws TypeError If text is not a string
|
|
598
|
+
*/
|
|
599
|
+
export function cleanupOrphanedMarkers(text: string): string {
|
|
600
|
+
if (typeof text !== 'string') {
|
|
601
|
+
throw new TypeError(`text must be a string, got ${typeof text}`);
|
|
602
|
+
}
|
|
603
|
+
let result = text;
|
|
604
|
+
|
|
605
|
+
// Remove orphaned insertion end markers (++} not preceded by {++)
|
|
606
|
+
// These occur when an insertion's start was inside something that got deleted/replaced
|
|
607
|
+
result = result.replace(/(?<!\{\+\+[^}]*)\+\+\}/g, '');
|
|
608
|
+
|
|
609
|
+
// Remove orphaned deletion end markers (--} not preceded by {--)
|
|
610
|
+
result = result.replace(/(?<!\{--[^}]*)--\}/g, '');
|
|
611
|
+
|
|
612
|
+
// Remove orphaned substitution end markers (~~} not preceded by {~~)
|
|
613
|
+
result = result.replace(/(?<!\{~~[^}]*)~~\}/g, '');
|
|
614
|
+
|
|
615
|
+
// Fix unclosed insertions: {++ without matching ++}
|
|
616
|
+
// Find {++ and check if there's a matching ++} before the next { marker
|
|
617
|
+
result = result.replace(/\{\+\+([^+]*?)(?=\{[+\-~>]|\{>>|$)/g, (match, content) => {
|
|
618
|
+
// If content has no ++}, it's unclosed - just keep the content
|
|
619
|
+
if (!content.includes('++}')) {
|
|
620
|
+
return content;
|
|
621
|
+
}
|
|
622
|
+
return match;
|
|
623
|
+
});
|
|
624
|
+
|
|
625
|
+
// Fix unclosed deletions: {-- without matching --}
|
|
626
|
+
result = result.replace(/\{--([^-]*?)(?=\{[+\-~>]|\{>>|$)/g, (match, content) => {
|
|
627
|
+
if (!content.includes('--}')) {
|
|
628
|
+
return content;
|
|
629
|
+
}
|
|
630
|
+
return match;
|
|
631
|
+
});
|
|
632
|
+
|
|
633
|
+
// Fix unclosed substitutions: {~~ without matching ~~}
|
|
634
|
+
// This is trickier because we need both ~> and ~~}
|
|
635
|
+
result = result.replace(/\{~~([^~]*?)~>([^~]*?)(?=\{[+\-~>]|\{>>|$)/g, (match, old, newText) => {
|
|
636
|
+
if (!match.includes('~~}')) {
|
|
637
|
+
// Unclosed substitution - keep the new text
|
|
638
|
+
return newText;
|
|
639
|
+
}
|
|
640
|
+
return match;
|
|
641
|
+
});
|
|
642
|
+
|
|
643
|
+
return result;
|
|
644
|
+
}
|