docrev 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,321 @@
1
+ /**
2
+ * CriticMarkup annotation parsing and manipulation
3
+ *
4
+ * Syntax:
5
+ * {++inserted text++} - Insertions
6
+ * {--deleted text--} - Deletions
7
+ * {~~old~>new~~} - Substitutions
8
+ * {>>Author: comment<<} - Comments
9
+ * {==text==} - Highlights
10
+ */
11
+
12
+ // Patterns for each annotation type
13
+ const PATTERNS = {
14
+ insert: /\{\+\+(.+?)\+\+\}/gs,
15
+ delete: /\{--(.+?)--\}/gs,
16
+ substitute: /\{~~(.+?)~>(.+?)~~\}/gs,
17
+ comment: /\{>>(.+?)<<\}/gs,
18
+ highlight: /\{==(.+?)==\}/gs,
19
+ };
20
+
21
+ /**
22
+ * Check if a potential comment is actually a false positive
23
+ * (e.g., figure caption, nested inside other annotation, etc.)
24
+ * @param {string} commentContent - The content inside {>>...<<}
25
+ * @param {string} fullText - The full document text
26
+ * @param {number} position - Position of the comment in the text
27
+ * @returns {boolean} true if this is a false positive (not a real comment)
28
+ */
29
+ function isCommentFalsePositive(commentContent, fullText, position) {
30
+ // Check if nested inside a deletion or insertion block
31
+ // Look backwards for unclosed {-- or {++ before this position
32
+ const textBefore = fullText.slice(Math.max(0, position - 500), position);
33
+
34
+ // Count unclosed deletion markers
35
+ const delOpens = (textBefore.match(/\{--/g) || []).length;
36
+ const delCloses = (textBefore.match(/--\}/g) || []).length;
37
+ if (delOpens > delCloses) return true; // Nested inside deletion
38
+
39
+ // Count unclosed insertion markers
40
+ const insOpens = (textBefore.match(/\{\+\+/g) || []).length;
41
+ const insCloses = (textBefore.match(/\+\+\}/g) || []).length;
42
+ if (insOpens > insCloses) return true; // Nested inside insertion
43
+
44
+ // Heuristics for figure captions and other false positives:
45
+
46
+ // Contains image/figure path patterns
47
+ if (/\(figures?\/|\(images?\/|\.png|\.jpg|\.pdf/i.test(commentContent)) return true;
48
+
49
+ // Contains markdown figure reference syntax
50
+ if (/\{#fig:|!\[/.test(commentContent)) return true;
51
+
52
+ // Very long without clear author pattern (likely caption, not comment)
53
+ // Real comments typically have "Author:" at start and are shorter
54
+ const hasAuthorPrefix = /^[A-Za-z][A-Za-z\s]{0,20}:/.test(commentContent.trim());
55
+ if (!hasAuthorPrefix && commentContent.length > 200) return true;
56
+
57
+ // Looks like a figure caption (starts with "Fig" or contains typical caption words)
58
+ if (/^(Fig\.?|Figure|Table|Sankey|Diagram|Proportion|Distribution)/i.test(commentContent.trim())) {
59
+ return true;
60
+ }
61
+
62
+ return false;
63
+ }
64
+
65
+ // Combined pattern for any track change (not comments)
66
+ const TRACK_CHANGE_PATTERN = /(\{\+\+.+?\+\+\}|\{--.+?--\}|\{~~.+?~>.+?~~\})/gs;
67
+
68
+ /**
69
+ * Parse all annotations from text
70
+ * @param {string} text
71
+ * @returns {Array<{type: string, match: string, content: string, replacement?: string, author?: string, position: number, line: number}>}
72
+ */
73
+ export function parseAnnotations(text) {
74
+ const annotations = [];
75
+
76
+ // Build line number lookup
77
+ const lines = text.split('\n');
78
+ let pos = 0;
79
+ const lineStarts = lines.map((line) => {
80
+ const start = pos;
81
+ pos += line.length + 1;
82
+ return start;
83
+ });
84
+
85
+ function getLine(position) {
86
+ for (let i = 0; i < lineStarts.length; i++) {
87
+ if (lineStarts[i] > position) return i;
88
+ }
89
+ return lineStarts.length;
90
+ }
91
+
92
+ function getContext(position, length) {
93
+ const start = Math.max(0, position - 50);
94
+ const end = Math.min(text.length, position + length + 50);
95
+ const before = text.slice(start, position).split('\n').pop() || '';
96
+ const after = text.slice(position + length, end).split('\n')[0] || '';
97
+ return { before, after };
98
+ }
99
+
100
+ // Parse insertions
101
+ for (const match of text.matchAll(PATTERNS.insert)) {
102
+ const ctx = getContext(match.index, match[0].length);
103
+ annotations.push({
104
+ type: 'insert',
105
+ match: match[0],
106
+ content: match[1],
107
+ position: match.index,
108
+ line: getLine(match.index),
109
+ ...ctx,
110
+ });
111
+ }
112
+
113
+ // Parse deletions
114
+ for (const match of text.matchAll(PATTERNS.delete)) {
115
+ const ctx = getContext(match.index, match[0].length);
116
+ annotations.push({
117
+ type: 'delete',
118
+ match: match[0],
119
+ content: match[1],
120
+ position: match.index,
121
+ line: getLine(match.index),
122
+ ...ctx,
123
+ });
124
+ }
125
+
126
+ // Parse substitutions
127
+ for (const match of text.matchAll(PATTERNS.substitute)) {
128
+ const ctx = getContext(match.index, match[0].length);
129
+ annotations.push({
130
+ type: 'substitute',
131
+ match: match[0],
132
+ content: match[1],
133
+ replacement: match[2],
134
+ position: match.index,
135
+ line: getLine(match.index),
136
+ ...ctx,
137
+ });
138
+ }
139
+
140
+ // Parse comments (with false positive filtering)
141
+ for (const match of text.matchAll(PATTERNS.comment)) {
142
+ // Skip false positives (figure captions, nested annotations, etc.)
143
+ if (isCommentFalsePositive(match[1], text, match.index)) {
144
+ continue;
145
+ }
146
+
147
+ const ctx = getContext(match.index, match[0].length);
148
+ let commentText = match[1];
149
+ let author = '';
150
+
151
+ // Extract author if present (format: "Author: comment")
152
+ const colonIdx = commentText.indexOf(':');
153
+ if (colonIdx > 0 && colonIdx < 30) {
154
+ author = commentText.slice(0, colonIdx).trim();
155
+ commentText = commentText.slice(colonIdx + 1).trim();
156
+ }
157
+
158
+ annotations.push({
159
+ type: 'comment',
160
+ match: match[0],
161
+ content: commentText,
162
+ author,
163
+ position: match.index,
164
+ line: getLine(match.index),
165
+ ...ctx,
166
+ });
167
+ }
168
+
169
+ // Sort by position
170
+ annotations.sort((a, b) => a.position - b.position);
171
+ return annotations;
172
+ }
173
+
174
+ /**
175
+ * Strip annotations from text, applying changes
176
+ * @param {string} text
177
+ * @param {{keepComments?: boolean}} options
178
+ * @returns {string}
179
+ */
180
+ export function stripAnnotations(text, options = {}) {
181
+ const { keepComments = false } = options;
182
+
183
+ // Apply substitutions: {~~old~>new~~} → new
184
+ text = text.replace(PATTERNS.substitute, '$2');
185
+
186
+ // Apply insertions: {++text++} → text
187
+ text = text.replace(PATTERNS.insert, '$1');
188
+
189
+ // Apply deletions: {--text--} → nothing
190
+ text = text.replace(PATTERNS.delete, '');
191
+
192
+ // Remove highlights: {==text==} → text
193
+ text = text.replace(PATTERNS.highlight, '$1');
194
+
195
+ // Remove comments unless keeping
196
+ if (!keepComments) {
197
+ text = text.replace(PATTERNS.comment, '');
198
+ }
199
+
200
+ return text;
201
+ }
202
+
203
+ /**
204
+ * Apply a decision to a single annotation
205
+ * @param {string} text
206
+ * @param {{type: string, match: string, content: string, replacement?: string}} annotation
207
+ * @param {boolean} accept
208
+ * @returns {string}
209
+ */
210
+ export function applyDecision(text, annotation, accept) {
211
+ let replacement;
212
+
213
+ switch (annotation.type) {
214
+ case 'insert':
215
+ replacement = accept ? annotation.content : '';
216
+ break;
217
+ case 'delete':
218
+ replacement = accept ? '' : annotation.content;
219
+ break;
220
+ case 'substitute':
221
+ replacement = accept ? annotation.replacement : annotation.content;
222
+ break;
223
+ default:
224
+ return text;
225
+ }
226
+
227
+ return text.replace(annotation.match, replacement);
228
+ }
229
+
230
+ /**
231
+ * Get track changes only (no comments)
232
+ * @param {string} text
233
+ * @returns {Array}
234
+ */
235
+ export function getTrackChanges(text) {
236
+ return parseAnnotations(text).filter((a) => a.type !== 'comment');
237
+ }
238
+
239
+ /**
240
+ * Get comments only
241
+ * @param {string} text
242
+ * @param {object} options
243
+ * @returns {Array}
244
+ */
245
+ export function getComments(text, options = {}) {
246
+ const { pendingOnly = false, resolvedOnly = false } = options;
247
+ let comments = parseAnnotations(text).filter((a) => a.type === 'comment');
248
+
249
+ // Check for resolved status marker at end of comment
250
+ comments = comments.map((c) => {
251
+ const resolved = c.content.endsWith('[RESOLVED]') || c.content.endsWith('[✓]');
252
+ return {
253
+ ...c,
254
+ resolved,
255
+ content: resolved
256
+ ? c.content.replace(/\s*\[(RESOLVED|✓)\]$/, '').trim()
257
+ : c.content,
258
+ };
259
+ });
260
+
261
+ if (pendingOnly) {
262
+ comments = comments.filter((c) => !c.resolved);
263
+ }
264
+ if (resolvedOnly) {
265
+ comments = comments.filter((c) => c.resolved);
266
+ }
267
+
268
+ return comments;
269
+ }
270
+
271
+ /**
272
+ * Mark a comment as resolved or pending
273
+ * @param {string} text - Document text
274
+ * @param {object} comment - Comment object with position and match
275
+ * @param {boolean} resolved - Whether to mark as resolved
276
+ * @returns {string} Updated text
277
+ */
278
+ export function setCommentStatus(text, comment, resolved) {
279
+ // Find the comment in the text
280
+ const originalMatch = comment.match;
281
+
282
+ if (resolved) {
283
+ // Add [RESOLVED] marker before the closing <<
284
+ const newMatch = originalMatch.replace(/<<\}$/, ' [RESOLVED]<<}');
285
+ return text.replace(originalMatch, newMatch);
286
+ } else {
287
+ // Remove resolved markers
288
+ const newMatch = originalMatch.replace(/\s*\[(RESOLVED|✓)\]<<\}$/, '<<}');
289
+ return text.replace(originalMatch, newMatch);
290
+ }
291
+ }
292
+
293
+ /**
294
+ * Count annotations by type
295
+ * @param {string} text
296
+ * @returns {{inserts: number, deletes: number, substitutes: number, comments: number, total: number}}
297
+ */
298
+ export function countAnnotations(text) {
299
+ const annotations = parseAnnotations(text);
300
+ const counts = { inserts: 0, deletes: 0, substitutes: 0, comments: 0, total: 0 };
301
+
302
+ for (const a of annotations) {
303
+ counts.total++;
304
+ switch (a.type) {
305
+ case 'insert':
306
+ counts.inserts++;
307
+ break;
308
+ case 'delete':
309
+ counts.deletes++;
310
+ break;
311
+ case 'substitute':
312
+ counts.substitutes++;
313
+ break;
314
+ case 'comment':
315
+ counts.comments++;
316
+ break;
317
+ }
318
+ }
319
+
320
+ return counts;
321
+ }