docrev 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/merge.js ADDED
@@ -0,0 +1,365 @@
1
+ /**
2
+ * Multi-reviewer merge utilities
3
+ * Combine feedback from multiple Word documents with conflict detection
4
+ */
5
+
6
+ import * as fs from 'fs';
7
+ import * as path from 'path';
8
+ import { diffWords } from 'diff';
9
+ import { extractTextFromWord, extractCommentsFromWord } from './import.js';
10
+
11
+ /**
12
+ * Represents a change from a reviewer
13
+ * @typedef {Object} ReviewerChange
14
+ * @property {string} reviewer - Reviewer name/identifier
15
+ * @property {string} type - 'insert' | 'delete' | 'replace'
16
+ * @property {number} start - Start position in original text
17
+ * @property {number} end - End position in original text
18
+ * @property {string} oldText - Original text (for delete/replace)
19
+ * @property {string} newText - New text (for insert/replace)
20
+ */
21
+
22
+ /**
23
+ * Represents a conflict between reviewers
24
+ * @typedef {Object} Conflict
25
+ * @property {number} start - Start position in original
26
+ * @property {number} end - End position in original
27
+ * @property {string} original - Original text
28
+ * @property {ReviewerChange[]} changes - Conflicting changes from different reviewers
29
+ */
30
+
31
+ /**
32
+ * Extract changes from a Word document compared to original
33
+ * @param {string} originalText - Original markdown text
34
+ * @param {string} wordText - Text extracted from Word
35
+ * @param {string} reviewer - Reviewer identifier
36
+ * @returns {ReviewerChange[]}
37
+ */
38
+ export function extractChanges(originalText, wordText, reviewer) {
39
+ const changes = [];
40
+ const diffs = diffWords(originalText, wordText);
41
+
42
+ let originalPos = 0;
43
+ let i = 0;
44
+
45
+ while (i < diffs.length) {
46
+ const part = diffs[i];
47
+
48
+ if (!part.added && !part.removed) {
49
+ // Unchanged
50
+ originalPos += part.value.length;
51
+ i++;
52
+ } else if (part.removed && diffs[i + 1]?.added) {
53
+ // Replacement: removed followed by added
54
+ changes.push({
55
+ reviewer,
56
+ type: 'replace',
57
+ start: originalPos,
58
+ end: originalPos + part.value.length,
59
+ oldText: part.value,
60
+ newText: diffs[i + 1].value,
61
+ });
62
+ originalPos += part.value.length;
63
+ i += 2;
64
+ } else if (part.removed) {
65
+ // Pure deletion
66
+ changes.push({
67
+ reviewer,
68
+ type: 'delete',
69
+ start: originalPos,
70
+ end: originalPos + part.value.length,
71
+ oldText: part.value,
72
+ newText: '',
73
+ });
74
+ originalPos += part.value.length;
75
+ i++;
76
+ } else if (part.added) {
77
+ // Pure insertion
78
+ changes.push({
79
+ reviewer,
80
+ type: 'insert',
81
+ start: originalPos,
82
+ end: originalPos,
83
+ oldText: '',
84
+ newText: part.value,
85
+ });
86
+ i++;
87
+ }
88
+ }
89
+
90
+ return changes;
91
+ }
92
+
93
+ /**
94
+ * Check if two changes overlap
95
+ * @param {ReviewerChange} a
96
+ * @param {ReviewerChange} b
97
+ * @returns {boolean}
98
+ */
99
+ function changesOverlap(a, b) {
100
+ // Insertions at same point conflict
101
+ if (a.type === 'insert' && b.type === 'insert' && a.start === b.start) {
102
+ return a.newText !== b.newText; // Same insertion is not a conflict
103
+ }
104
+
105
+ // Check range overlap
106
+ const aStart = a.start;
107
+ const aEnd = a.type === 'insert' ? a.start : a.end;
108
+ const bStart = b.start;
109
+ const bEnd = b.type === 'insert' ? b.start : b.end;
110
+
111
+ // Ranges overlap if neither ends before the other starts
112
+ if (aEnd <= bStart || bEnd <= aStart) {
113
+ return false;
114
+ }
115
+
116
+ // They overlap - but is it a conflict?
117
+ // Same change from different reviewers is not a conflict
118
+ if (a.type === b.type && a.oldText === b.oldText && a.newText === b.newText) {
119
+ return false;
120
+ }
121
+
122
+ return true;
123
+ }
124
+
125
+ /**
126
+ * Detect conflicts between changes from multiple reviewers
127
+ * @param {ReviewerChange[][]} allChanges - Array of change arrays, one per reviewer
128
+ * @returns {{conflicts: Conflict[], nonConflicting: ReviewerChange[]}}
129
+ */
130
+ export function detectConflicts(allChanges) {
131
+ // Flatten and sort all changes by position
132
+ const flat = allChanges.flat().sort((a, b) => a.start - b.start || a.end - b.end);
133
+
134
+ const conflicts = [];
135
+ const nonConflicting = [];
136
+ const usedIndices = new Set();
137
+
138
+ for (let i = 0; i < flat.length; i++) {
139
+ if (usedIndices.has(i)) continue;
140
+
141
+ const change = flat[i];
142
+ const conflictingChanges = [change];
143
+
144
+ // Find all changes that conflict with this one
145
+ for (let j = i + 1; j < flat.length; j++) {
146
+ if (usedIndices.has(j)) continue;
147
+
148
+ const other = flat[j];
149
+
150
+ // Stop if we're past the range
151
+ if (other.start > change.end && change.type !== 'insert') break;
152
+
153
+ if (changesOverlap(change, other)) {
154
+ conflictingChanges.push(other);
155
+ usedIndices.add(j);
156
+ }
157
+ }
158
+
159
+ if (conflictingChanges.length > 1) {
160
+ // Multiple reviewers changed the same region
161
+ const start = Math.min(...conflictingChanges.map(c => c.start));
162
+ const end = Math.max(...conflictingChanges.map(c => c.end));
163
+
164
+ conflicts.push({
165
+ start,
166
+ end,
167
+ original: conflictingChanges[0].oldText || '',
168
+ changes: conflictingChanges,
169
+ });
170
+ usedIndices.add(i);
171
+ } else {
172
+ // No conflict
173
+ nonConflicting.push(change);
174
+ usedIndices.add(i);
175
+ }
176
+ }
177
+
178
+ // Deduplicate identical non-conflicting changes
179
+ const seen = new Map();
180
+ const dedupedNonConflicting = [];
181
+
182
+ for (const change of nonConflicting) {
183
+ const key = `${change.start}:${change.end}:${change.type}:${change.newText}`;
184
+ if (!seen.has(key)) {
185
+ seen.set(key, true);
186
+ dedupedNonConflicting.push(change);
187
+ }
188
+ }
189
+
190
+ return { conflicts, nonConflicting: dedupedNonConflicting };
191
+ }
192
+
193
+ /**
194
+ * Apply non-conflicting changes to text
195
+ * @param {string} originalText
196
+ * @param {ReviewerChange[]} changes - Must be sorted by position
197
+ * @returns {string}
198
+ */
199
+ export function applyChanges(originalText, changes) {
200
+ // Sort by position descending to apply from end to start
201
+ const sorted = [...changes].sort((a, b) => b.start - a.start);
202
+
203
+ let result = originalText;
204
+
205
+ for (const change of sorted) {
206
+ if (change.type === 'insert') {
207
+ result = result.slice(0, change.start) + change.newText + result.slice(change.start);
208
+ } else if (change.type === 'delete') {
209
+ result = result.slice(0, change.start) + result.slice(change.end);
210
+ } else if (change.type === 'replace') {
211
+ result = result.slice(0, change.start) + change.newText + result.slice(change.end);
212
+ }
213
+ }
214
+
215
+ return result;
216
+ }
217
+
218
+ /**
219
+ * Apply changes as CriticMarkup annotations
220
+ * @param {string} originalText
221
+ * @param {ReviewerChange[]} changes
222
+ * @returns {string}
223
+ */
224
+ export function applyChangesAsAnnotations(originalText, changes) {
225
+ const sorted = [...changes].sort((a, b) => b.start - a.start);
226
+
227
+ let result = originalText;
228
+
229
+ for (const change of sorted) {
230
+ const reviewer = change.reviewer;
231
+
232
+ if (change.type === 'insert') {
233
+ const annotation = `{++${change.newText}++}`;
234
+ result = result.slice(0, change.start) + annotation + result.slice(change.start);
235
+ } else if (change.type === 'delete') {
236
+ const annotation = `{--${change.oldText}--}`;
237
+ result = result.slice(0, change.start) + annotation + result.slice(change.end);
238
+ } else if (change.type === 'replace') {
239
+ const annotation = `{~~${change.oldText}~>${change.newText}~~}`;
240
+ result = result.slice(0, change.start) + annotation + result.slice(change.end);
241
+ }
242
+ }
243
+
244
+ return result;
245
+ }
246
+
247
+ /**
248
+ * Format a conflict for display
249
+ * @param {Conflict} conflict
250
+ * @param {string} originalText
251
+ * @returns {string}
252
+ */
253
+ export function formatConflict(conflict, originalText) {
254
+ const lines = [];
255
+ const context = 30;
256
+
257
+ // Show context
258
+ const beforeStart = Math.max(0, conflict.start - context);
259
+ const afterEnd = Math.min(originalText.length, conflict.end + context);
260
+
261
+ const before = originalText.slice(beforeStart, conflict.start);
262
+ const original = originalText.slice(conflict.start, conflict.end);
263
+ const after = originalText.slice(conflict.end, afterEnd);
264
+
265
+ lines.push(`Context: ...${before}[CONFLICT]${after}...`);
266
+ lines.push(`Original: "${original || '(insertion point)'}"`);
267
+ lines.push('');
268
+ lines.push('Options:');
269
+
270
+ conflict.changes.forEach((change, i) => {
271
+ const label = change.type === 'insert'
272
+ ? `Insert: "${change.newText}"`
273
+ : change.type === 'delete'
274
+ ? `Delete: "${change.oldText}"`
275
+ : `Replace "${change.oldText}" → "${change.newText}"`;
276
+ lines.push(` ${i + 1}. [${change.reviewer}] ${label}`);
277
+ });
278
+
279
+ return lines.join('\n');
280
+ }
281
+
282
+ /**
283
+ * Merge multiple Word documents against an original
284
+ * @param {string} originalPath - Path to original markdown
285
+ * @param {Array<{path: string, name: string}>} reviewerDocs - Reviewer Word docs
286
+ * @param {Object} options
287
+ * @returns {Promise<{merged: string, conflicts: Conflict[], stats: Object}>}
288
+ */
289
+ export async function mergeReviewerDocs(originalPath, reviewerDocs, options = {}) {
290
+ const { autoResolve = false } = options;
291
+
292
+ if (!fs.existsSync(originalPath)) {
293
+ throw new Error(`Original file not found: ${originalPath}`);
294
+ }
295
+
296
+ const originalText = fs.readFileSync(originalPath, 'utf-8');
297
+
298
+ // Extract changes from each reviewer
299
+ const allChanges = [];
300
+ const allComments = [];
301
+
302
+ for (const doc of reviewerDocs) {
303
+ if (!fs.existsSync(doc.path)) {
304
+ throw new Error(`Reviewer file not found: ${doc.path}`);
305
+ }
306
+
307
+ const wordText = await extractTextFromWord(doc.path);
308
+ const changes = extractChanges(originalText, wordText, doc.name);
309
+ allChanges.push(changes);
310
+
311
+ // Also extract comments
312
+ try {
313
+ const comments = await extractCommentsFromWord(doc.path);
314
+ allComments.push(...comments.map(c => ({ ...c, reviewer: doc.name })));
315
+ } catch {
316
+ // Comments extraction failed, continue without
317
+ }
318
+ }
319
+
320
+ // Detect conflicts
321
+ const { conflicts, nonConflicting } = detectConflicts(allChanges);
322
+
323
+ // Apply non-conflicting changes as annotations
324
+ let merged = applyChangesAsAnnotations(originalText, nonConflicting);
325
+
326
+ // Add comments
327
+ for (const comment of allComments) {
328
+ // Append comments at the end for now (position tracking is complex)
329
+ merged += `\n{>>${comment.reviewer}: ${comment.text}<<}`;
330
+ }
331
+
332
+ const stats = {
333
+ reviewers: reviewerDocs.length,
334
+ totalChanges: allChanges.flat().length,
335
+ nonConflicting: nonConflicting.length,
336
+ conflicts: conflicts.length,
337
+ comments: allComments.length,
338
+ };
339
+
340
+ return { merged, conflicts, stats, originalText };
341
+ }
342
+
343
+ /**
344
+ * Resolve a conflict by choosing one option
345
+ * @param {string} text - Current merged text
346
+ * @param {Conflict} conflict
347
+ * @param {number} choice - Index of chosen change (0-based)
348
+ * @param {string} originalText - Original text for position reference
349
+ * @returns {string}
350
+ */
351
+ export function resolveConflict(text, conflict, choice, originalText) {
352
+ const chosen = conflict.changes[choice];
353
+
354
+ // Find the conflict region in the current text
355
+ // This is simplified - real implementation would track positions
356
+ const annotation = chosen.type === 'insert'
357
+ ? `{++${chosen.newText}++}`
358
+ : chosen.type === 'delete'
359
+ ? `{--${chosen.oldText}--}`
360
+ : `{~~${chosen.oldText}~>${chosen.newText}~~}`;
361
+
362
+ // For now, append resolved conflicts at marker position
363
+ // A more sophisticated approach would track exact positions
364
+ return text + `\n<!-- Resolved: ${annotation} -->`;
365
+ }
@@ -0,0 +1,273 @@
1
+ /**
2
+ * Track Changes export utilities
3
+ * Convert CriticMarkup annotations to Word track changes format
4
+ */
5
+
6
+ import * as fs from 'fs';
7
+ import * as path from 'path';
8
+ import AdmZip from 'adm-zip';
9
+ import { parseAnnotations } from './annotations.js';
10
+
11
+ /**
12
+ * Generate a unique revision ID
13
+ * @returns {number}
14
+ */
15
+ let revisionId = 0;
16
+ function getNextRevId() {
17
+ return revisionId++;
18
+ }
19
+
20
+ /**
21
+ * Format date for Word revision
22
+ * @returns {string}
23
+ */
24
+ function getRevisionDate() {
25
+ return new Date().toISOString().replace('Z', '');
26
+ }
27
+
28
+ /**
29
+ * Escape XML special characters
30
+ * @param {string} text
31
+ * @returns {string}
32
+ */
33
+ function escapeXml(text) {
34
+ return text
35
+ .replace(/&/g, '&amp;')
36
+ .replace(/</g, '&lt;')
37
+ .replace(/>/g, '&gt;')
38
+ .replace(/"/g, '&quot;')
39
+ .replace(/'/g, '&apos;');
40
+ }
41
+
42
+ /**
43
+ * Create Word insertion markup
44
+ * @param {string} text - Text to insert
45
+ * @param {string} author - Author name
46
+ * @returns {string}
47
+ */
48
+ function createInsertionXml(text, author = 'Author') {
49
+ const id = getNextRevId();
50
+ const date = getRevisionDate();
51
+
52
+ return `<w:ins w:id="${id}" w:author="${escapeXml(author)}" w:date="${date}"><w:r><w:t>${escapeXml(text)}</w:t></w:r></w:ins>`;
53
+ }
54
+
55
+ /**
56
+ * Create Word deletion markup
57
+ * @param {string} text - Text to delete
58
+ * @param {string} author - Author name
59
+ * @returns {string}
60
+ */
61
+ function createDeletionXml(text, author = 'Author') {
62
+ const id = getNextRevId();
63
+ const date = getRevisionDate();
64
+
65
+ return `<w:del w:id="${id}" w:author="${escapeXml(author)}" w:date="${date}"><w:r><w:delText>${escapeXml(text)}</w:delText></w:r></w:del>`;
66
+ }
67
+
68
+ /**
69
+ * Convert CriticMarkup to Word track changes in markdown
70
+ * This creates a special markdown format that can be processed after pandoc
71
+ *
72
+ * @param {string} text - Markdown with CriticMarkup
73
+ * @returns {{text: string, annotations: Array}}
74
+ */
75
+ export function prepareForTrackChanges(text) {
76
+ const annotations = parseAnnotations(text);
77
+ const markers = [];
78
+
79
+ // Sort by position descending to replace from end
80
+ const sorted = [...annotations].sort((a, b) => b.position - a.position);
81
+
82
+ let result = text;
83
+
84
+ for (const ann of sorted) {
85
+ const marker = `{{TC_${markers.length}}}`;
86
+
87
+ markers.push({
88
+ id: markers.length,
89
+ type: ann.type,
90
+ content: ann.content,
91
+ replacement: ann.replacement,
92
+ author: ann.author || 'Reviewer',
93
+ });
94
+
95
+ // Replace annotation with marker
96
+ result = result.slice(0, ann.position) + marker + result.slice(ann.position + ann.match.length);
97
+ }
98
+
99
+ return { text: result, markers };
100
+ }
101
+
102
+ /**
103
+ * Post-process a DOCX file to convert markers to track changes
104
+ *
105
+ * @param {string} docxPath - Path to DOCX file
106
+ * @param {Array} markers - Markers from prepareForTrackChanges
107
+ * @param {string} outputPath - Output path
108
+ * @returns {Promise<{success: boolean, message: string}>}
109
+ */
110
+ export async function applyTrackChangesToDocx(docxPath, markers, outputPath) {
111
+ if (!fs.existsSync(docxPath)) {
112
+ return { success: false, message: `File not found: ${docxPath}` };
113
+ }
114
+
115
+ try {
116
+ const zip = new AdmZip(docxPath);
117
+ const documentEntry = zip.getEntry('word/document.xml');
118
+
119
+ if (!documentEntry) {
120
+ return { success: false, message: 'Invalid DOCX: no document.xml' };
121
+ }
122
+
123
+ let documentXml = zip.readAsText(documentEntry);
124
+
125
+ // Enable track changes in settings
126
+ const settingsEntry = zip.getEntry('word/settings.xml');
127
+ if (settingsEntry) {
128
+ let settingsXml = zip.readAsText(settingsEntry);
129
+ // Add trackRevisions setting if not present
130
+ if (!settingsXml.includes('w:trackRevisions')) {
131
+ settingsXml = settingsXml.replace(
132
+ '</w:settings>',
133
+ '<w:trackRevisions/></w:settings>'
134
+ );
135
+ zip.updateFile('word/settings.xml', Buffer.from(settingsXml, 'utf-8'));
136
+ }
137
+ }
138
+
139
+ // Replace markers with track changes XML
140
+ for (const marker of markers) {
141
+ const markerText = `{{TC_${marker.id}}}`;
142
+
143
+ // Find the marker in document.xml (may be split across runs)
144
+ // First try simple replacement
145
+ if (documentXml.includes(markerText)) {
146
+ let replacement;
147
+
148
+ switch (marker.type) {
149
+ case 'insert':
150
+ replacement = createInsertionXml(marker.content, marker.author);
151
+ break;
152
+ case 'delete':
153
+ replacement = createDeletionXml(marker.content, marker.author);
154
+ break;
155
+ case 'substitute':
156
+ // Substitution = deletion + insertion
157
+ replacement =
158
+ createDeletionXml(marker.content, marker.author) +
159
+ createInsertionXml(marker.replacement, marker.author);
160
+ break;
161
+ case 'comment':
162
+ // Comments are handled differently - skip for track changes
163
+ replacement = '';
164
+ break;
165
+ default:
166
+ replacement = '';
167
+ }
168
+
169
+ documentXml = documentXml.replace(markerText, replacement);
170
+ } else {
171
+ // Marker might be split across <w:t> elements
172
+ // Try to find and reconstruct
173
+ const markerPattern = markerText.split('').join('(?:</w:t></w:r><w:r><w:t>)?');
174
+ const regex = new RegExp(markerPattern, 'g');
175
+
176
+ if (regex.test(documentXml)) {
177
+ let replacement;
178
+
179
+ switch (marker.type) {
180
+ case 'insert':
181
+ replacement = `</w:t></w:r>${createInsertionXml(marker.content, marker.author)}<w:r><w:t>`;
182
+ break;
183
+ case 'delete':
184
+ replacement = `</w:t></w:r>${createDeletionXml(marker.content, marker.author)}<w:r><w:t>`;
185
+ break;
186
+ case 'substitute':
187
+ replacement =
188
+ `</w:t></w:r>${createDeletionXml(marker.content, marker.author)}` +
189
+ `${createInsertionXml(marker.replacement, marker.author)}<w:r><w:t>`;
190
+ break;
191
+ default:
192
+ replacement = '';
193
+ }
194
+
195
+ documentXml = documentXml.replace(regex, replacement);
196
+ }
197
+ }
198
+ }
199
+
200
+ // Clean up empty runs created by replacements
201
+ documentXml = documentXml.replace(/<w:r><w:t><\/w:t><\/w:r>/g, '');
202
+
203
+ zip.updateFile('word/document.xml', Buffer.from(documentXml, 'utf-8'));
204
+ zip.writeZip(outputPath);
205
+
206
+ return { success: true, message: `Created ${outputPath} with track changes` };
207
+ } catch (err) {
208
+ return { success: false, message: err.message };
209
+ }
210
+ }
211
+
212
+ /**
213
+ * Build DOCX with track changes visible
214
+ * This is the main entry point for the audit export feature
215
+ *
216
+ * @param {string} markdownPath - Path to markdown with annotations
217
+ * @param {string} outputPath - Output DOCX path
218
+ * @param {Object} options
219
+ * @returns {Promise<{success: boolean, message: string, stats: Object}>}
220
+ */
221
+ export async function buildWithTrackChanges(markdownPath, outputPath, options = {}) {
222
+ const { author = 'Reviewer' } = options;
223
+
224
+ if (!fs.existsSync(markdownPath)) {
225
+ return { success: false, message: `File not found: ${markdownPath}`, stats: null };
226
+ }
227
+
228
+ const text = fs.readFileSync(markdownPath, 'utf-8');
229
+ const { text: preparedText, markers } = prepareForTrackChanges(text);
230
+
231
+ // Assign author to markers that don't have one
232
+ for (const marker of markers) {
233
+ if (!marker.author || marker.author === 'Reviewer') {
234
+ marker.author = author;
235
+ }
236
+ }
237
+
238
+ // Write temporary markdown
239
+ const tempMd = outputPath.replace('.docx', '.tmp.md');
240
+ const tempDocx = outputPath.replace('.docx', '.tmp.docx');
241
+
242
+ fs.writeFileSync(tempMd, preparedText, 'utf-8');
243
+
244
+ // Run pandoc to create initial DOCX
245
+ const { execSync } = await import('child_process');
246
+
247
+ try {
248
+ execSync(`pandoc "${tempMd}" -o "${tempDocx}"`, { stdio: 'pipe' });
249
+ } catch (err) {
250
+ fs.unlinkSync(tempMd);
251
+ return { success: false, message: `Pandoc failed: ${err.message}`, stats: null };
252
+ }
253
+
254
+ // Apply track changes
255
+ const result = await applyTrackChangesToDocx(tempDocx, markers, outputPath);
256
+
257
+ // Cleanup
258
+ try {
259
+ fs.unlinkSync(tempMd);
260
+ fs.unlinkSync(tempDocx);
261
+ } catch {
262
+ // Ignore cleanup errors
263
+ }
264
+
265
+ const stats = {
266
+ insertions: markers.filter(m => m.type === 'insert').length,
267
+ deletions: markers.filter(m => m.type === 'delete').length,
268
+ substitutions: markers.filter(m => m.type === 'substitute').length,
269
+ total: markers.length,
270
+ };
271
+
272
+ return { ...result, stats };
273
+ }