docrev 0.2.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/merge.js ADDED
@@ -0,0 +1,365 @@
1
+ /**
2
+ * Multi-reviewer merge utilities
3
+ * Combine feedback from multiple Word documents with conflict detection
4
+ */
5
+
6
+ import * as fs from 'fs';
7
+ import * as path from 'path';
8
+ import { diffWords } from 'diff';
9
+ import { extractFromWord, extractWordComments } from './import.js';
10
+
11
+ /**
12
+ * Represents a change from a reviewer
13
+ * @typedef {Object} ReviewerChange
14
+ * @property {string} reviewer - Reviewer name/identifier
15
+ * @property {string} type - 'insert' | 'delete' | 'replace'
16
+ * @property {number} start - Start position in original text
17
+ * @property {number} end - End position in original text
18
+ * @property {string} oldText - Original text (for delete/replace)
19
+ * @property {string} newText - New text (for insert/replace)
20
+ */
21
+
22
+ /**
23
+ * Represents a conflict between reviewers
24
+ * @typedef {Object} Conflict
25
+ * @property {number} start - Start position in original
26
+ * @property {number} end - End position in original
27
+ * @property {string} original - Original text
28
+ * @property {ReviewerChange[]} changes - Conflicting changes from different reviewers
29
+ */
30
+
31
+ /**
32
+ * Extract changes from a Word document compared to original
33
+ * @param {string} originalText - Original markdown text
34
+ * @param {string} wordText - Text extracted from Word
35
+ * @param {string} reviewer - Reviewer identifier
36
+ * @returns {ReviewerChange[]}
37
+ */
38
+ export function extractChanges(originalText, wordText, reviewer) {
39
+ const changes = [];
40
+ const diffs = diffWords(originalText, wordText);
41
+
42
+ let originalPos = 0;
43
+ let i = 0;
44
+
45
+ while (i < diffs.length) {
46
+ const part = diffs[i];
47
+
48
+ if (!part.added && !part.removed) {
49
+ // Unchanged
50
+ originalPos += part.value.length;
51
+ i++;
52
+ } else if (part.removed && diffs[i + 1]?.added) {
53
+ // Replacement: removed followed by added
54
+ changes.push({
55
+ reviewer,
56
+ type: 'replace',
57
+ start: originalPos,
58
+ end: originalPos + part.value.length,
59
+ oldText: part.value,
60
+ newText: diffs[i + 1].value,
61
+ });
62
+ originalPos += part.value.length;
63
+ i += 2;
64
+ } else if (part.removed) {
65
+ // Pure deletion
66
+ changes.push({
67
+ reviewer,
68
+ type: 'delete',
69
+ start: originalPos,
70
+ end: originalPos + part.value.length,
71
+ oldText: part.value,
72
+ newText: '',
73
+ });
74
+ originalPos += part.value.length;
75
+ i++;
76
+ } else if (part.added) {
77
+ // Pure insertion
78
+ changes.push({
79
+ reviewer,
80
+ type: 'insert',
81
+ start: originalPos,
82
+ end: originalPos,
83
+ oldText: '',
84
+ newText: part.value,
85
+ });
86
+ i++;
87
+ }
88
+ }
89
+
90
+ return changes;
91
+ }
92
+
93
+ /**
94
+ * Check if two changes overlap
95
+ * @param {ReviewerChange} a
96
+ * @param {ReviewerChange} b
97
+ * @returns {boolean}
98
+ */
99
+ function changesOverlap(a, b) {
100
+ // Insertions at same point conflict
101
+ if (a.type === 'insert' && b.type === 'insert' && a.start === b.start) {
102
+ return a.newText !== b.newText; // Same insertion is not a conflict
103
+ }
104
+
105
+ // Check range overlap
106
+ const aStart = a.start;
107
+ const aEnd = a.type === 'insert' ? a.start : a.end;
108
+ const bStart = b.start;
109
+ const bEnd = b.type === 'insert' ? b.start : b.end;
110
+
111
+ // Ranges overlap if neither ends before the other starts
112
+ if (aEnd <= bStart || bEnd <= aStart) {
113
+ return false;
114
+ }
115
+
116
+ // They overlap - but is it a conflict?
117
+ // Same change from different reviewers is not a conflict
118
+ if (a.type === b.type && a.oldText === b.oldText && a.newText === b.newText) {
119
+ return false;
120
+ }
121
+
122
+ return true;
123
+ }
124
+
125
+ /**
126
+ * Detect conflicts between changes from multiple reviewers
127
+ * @param {ReviewerChange[][]} allChanges - Array of change arrays, one per reviewer
128
+ * @returns {{conflicts: Conflict[], nonConflicting: ReviewerChange[]}}
129
+ */
130
+ export function detectConflicts(allChanges) {
131
+ // Flatten and sort all changes by position
132
+ const flat = allChanges.flat().sort((a, b) => a.start - b.start || a.end - b.end);
133
+
134
+ const conflicts = [];
135
+ const nonConflicting = [];
136
+ const usedIndices = new Set();
137
+
138
+ for (let i = 0; i < flat.length; i++) {
139
+ if (usedIndices.has(i)) continue;
140
+
141
+ const change = flat[i];
142
+ const conflictingChanges = [change];
143
+
144
+ // Find all changes that conflict with this one
145
+ for (let j = i + 1; j < flat.length; j++) {
146
+ if (usedIndices.has(j)) continue;
147
+
148
+ const other = flat[j];
149
+
150
+ // Stop if we're past the range
151
+ if (other.start > change.end && change.type !== 'insert') break;
152
+
153
+ if (changesOverlap(change, other)) {
154
+ conflictingChanges.push(other);
155
+ usedIndices.add(j);
156
+ }
157
+ }
158
+
159
+ if (conflictingChanges.length > 1) {
160
+ // Multiple reviewers changed the same region
161
+ const start = Math.min(...conflictingChanges.map(c => c.start));
162
+ const end = Math.max(...conflictingChanges.map(c => c.end));
163
+
164
+ conflicts.push({
165
+ start,
166
+ end,
167
+ original: conflictingChanges[0].oldText || '',
168
+ changes: conflictingChanges,
169
+ });
170
+ usedIndices.add(i);
171
+ } else {
172
+ // No conflict
173
+ nonConflicting.push(change);
174
+ usedIndices.add(i);
175
+ }
176
+ }
177
+
178
+ // Deduplicate identical non-conflicting changes
179
+ const seen = new Map();
180
+ const dedupedNonConflicting = [];
181
+
182
+ for (const change of nonConflicting) {
183
+ const key = `${change.start}:${change.end}:${change.type}:${change.newText}`;
184
+ if (!seen.has(key)) {
185
+ seen.set(key, true);
186
+ dedupedNonConflicting.push(change);
187
+ }
188
+ }
189
+
190
+ return { conflicts, nonConflicting: dedupedNonConflicting };
191
+ }
192
+
193
+ /**
194
+ * Apply non-conflicting changes to text
195
+ * @param {string} originalText
196
+ * @param {ReviewerChange[]} changes - Must be sorted by position
197
+ * @returns {string}
198
+ */
199
+ export function applyChanges(originalText, changes) {
200
+ // Sort by position descending to apply from end to start
201
+ const sorted = [...changes].sort((a, b) => b.start - a.start);
202
+
203
+ let result = originalText;
204
+
205
+ for (const change of sorted) {
206
+ if (change.type === 'insert') {
207
+ result = result.slice(0, change.start) + change.newText + result.slice(change.start);
208
+ } else if (change.type === 'delete') {
209
+ result = result.slice(0, change.start) + result.slice(change.end);
210
+ } else if (change.type === 'replace') {
211
+ result = result.slice(0, change.start) + change.newText + result.slice(change.end);
212
+ }
213
+ }
214
+
215
+ return result;
216
+ }
217
+
218
+ /**
219
+ * Apply changes as CriticMarkup annotations
220
+ * @param {string} originalText
221
+ * @param {ReviewerChange[]} changes
222
+ * @returns {string}
223
+ */
224
+ export function applyChangesAsAnnotations(originalText, changes) {
225
+ const sorted = [...changes].sort((a, b) => b.start - a.start);
226
+
227
+ let result = originalText;
228
+
229
+ for (const change of sorted) {
230
+ const reviewer = change.reviewer;
231
+
232
+ if (change.type === 'insert') {
233
+ const annotation = `{++${change.newText}++}`;
234
+ result = result.slice(0, change.start) + annotation + result.slice(change.start);
235
+ } else if (change.type === 'delete') {
236
+ const annotation = `{--${change.oldText}--}`;
237
+ result = result.slice(0, change.start) + annotation + result.slice(change.end);
238
+ } else if (change.type === 'replace') {
239
+ const annotation = `{~~${change.oldText}~>${change.newText}~~}`;
240
+ result = result.slice(0, change.start) + annotation + result.slice(change.end);
241
+ }
242
+ }
243
+
244
+ return result;
245
+ }
246
+
247
+ /**
248
+ * Format a conflict for display
249
+ * @param {Conflict} conflict
250
+ * @param {string} originalText
251
+ * @returns {string}
252
+ */
253
+ export function formatConflict(conflict, originalText) {
254
+ const lines = [];
255
+ const context = 30;
256
+
257
+ // Show context
258
+ const beforeStart = Math.max(0, conflict.start - context);
259
+ const afterEnd = Math.min(originalText.length, conflict.end + context);
260
+
261
+ const before = originalText.slice(beforeStart, conflict.start);
262
+ const original = originalText.slice(conflict.start, conflict.end);
263
+ const after = originalText.slice(conflict.end, afterEnd);
264
+
265
+ lines.push(`Context: ...${before}[CONFLICT]${after}...`);
266
+ lines.push(`Original: "${original || '(insertion point)'}"`);
267
+ lines.push('');
268
+ lines.push('Options:');
269
+
270
+ conflict.changes.forEach((change, i) => {
271
+ const label = change.type === 'insert'
272
+ ? `Insert: "${change.newText}"`
273
+ : change.type === 'delete'
274
+ ? `Delete: "${change.oldText}"`
275
+ : `Replace "${change.oldText}" → "${change.newText}"`;
276
+ lines.push(` ${i + 1}. [${change.reviewer}] ${label}`);
277
+ });
278
+
279
+ return lines.join('\n');
280
+ }
281
+
282
+ /**
283
+ * Merge multiple Word documents against an original
284
+ * @param {string} originalPath - Path to original markdown
285
+ * @param {Array<{path: string, name: string}>} reviewerDocs - Reviewer Word docs
286
+ * @param {Object} options
287
+ * @returns {Promise<{merged: string, conflicts: Conflict[], stats: Object}>}
288
+ */
289
+ export async function mergeReviewerDocs(originalPath, reviewerDocs, options = {}) {
290
+ const { autoResolve = false } = options;
291
+
292
+ if (!fs.existsSync(originalPath)) {
293
+ throw new Error(`Original file not found: ${originalPath}`);
294
+ }
295
+
296
+ const originalText = fs.readFileSync(originalPath, 'utf-8');
297
+
298
+ // Extract changes from each reviewer
299
+ const allChanges = [];
300
+ const allComments = [];
301
+
302
+ for (const doc of reviewerDocs) {
303
+ if (!fs.existsSync(doc.path)) {
304
+ throw new Error(`Reviewer file not found: ${doc.path}`);
305
+ }
306
+
307
+ const { text: wordText } = await extractFromWord(doc.path);
308
+ const changes = extractChanges(originalText, wordText, doc.name);
309
+ allChanges.push(changes);
310
+
311
+ // Also extract comments
312
+ try {
313
+ const comments = await extractWordComments(doc.path);
314
+ allComments.push(...comments.map(c => ({ ...c, reviewer: doc.name })));
315
+ } catch {
316
+ // Comments extraction failed, continue without
317
+ }
318
+ }
319
+
320
+ // Detect conflicts
321
+ const { conflicts, nonConflicting } = detectConflicts(allChanges);
322
+
323
+ // Apply non-conflicting changes as annotations
324
+ let merged = applyChangesAsAnnotations(originalText, nonConflicting);
325
+
326
+ // Add comments
327
+ for (const comment of allComments) {
328
+ // Append comments at the end for now (position tracking is complex)
329
+ merged += `\n{>>${comment.reviewer}: ${comment.text}<<}`;
330
+ }
331
+
332
+ const stats = {
333
+ reviewers: reviewerDocs.length,
334
+ totalChanges: allChanges.flat().length,
335
+ nonConflicting: nonConflicting.length,
336
+ conflicts: conflicts.length,
337
+ comments: allComments.length,
338
+ };
339
+
340
+ return { merged, conflicts, stats, originalText };
341
+ }
342
+
343
+ /**
344
+ * Resolve a conflict by choosing one option
345
+ * @param {string} text - Current merged text
346
+ * @param {Conflict} conflict
347
+ * @param {number} choice - Index of chosen change (0-based)
348
+ * @param {string} originalText - Original text for position reference
349
+ * @returns {string}
350
+ */
351
+ export function resolveConflict(text, conflict, choice, originalText) {
352
+ const chosen = conflict.changes[choice];
353
+
354
+ // Find the conflict region in the current text
355
+ // This is simplified - real implementation would track positions
356
+ const annotation = chosen.type === 'insert'
357
+ ? `{++${chosen.newText}++}`
358
+ : chosen.type === 'delete'
359
+ ? `{--${chosen.oldText}--}`
360
+ : `{~~${chosen.oldText}~>${chosen.newText}~~}`;
361
+
362
+ // For now, append resolved conflicts at marker position
363
+ // A more sophisticated approach would track exact positions
364
+ return text + `\n<!-- Resolved: ${annotation} -->`;
365
+ }
@@ -0,0 +1,73 @@
1
+ /**
2
+ * Common scientific and academic words not in standard dictionaries
3
+ * These words are accepted by Word's spellchecker
4
+ */
5
+
6
+ export const scientificWords = new Set([
7
+ // Biology/Ecology
8
+ 'abiotic', 'biotic', 'biogeographic', 'biogeography', 'phenotypic', 'phenotype',
9
+ 'anthropogenic', 'propagule', 'propagules', 'herbivory', 'herbivore', 'herbivores',
10
+ 'ruderal', 'ruderals', 'refugia', 'refugium', 'hotspot', 'hotspots',
11
+ 'biodiversity', 'ecosystem', 'ecosystems', 'ecotype', 'ecotypes',
12
+ 'taxonomic', 'phylogenetic', 'phylogeny', 'morphological', 'morphology',
13
+ 'allometric', 'allometry', 'biomass', 'biome', 'biomes',
14
+ 'invasibility', 'invasive', 'invasives', 'neophyte', 'neophytes',
15
+ 'archaeophyte', 'archaeophytes', 'naturalisation', 'naturalization',
16
+ 'colonisation', 'colonization', 'dispersal', 'fecundity',
17
+ 'phenology', 'phenological', 'ontogeny', 'ontogenetic',
18
+ 'mesic', 'xeric', 'hydric', 'riparian', 'riverine',
19
+ 'subalpine', 'alpine', 'boreal', 'temperate', 'tropical',
20
+ 'heathland', 'heathlands', 'scrubland', 'scrublands', 'grassland', 'grasslands',
21
+ 'broadleaf', 'broadleaved', 'coniferous', 'deciduous', 'evergreen',
22
+ 'autochory', 'autochorous', 'zoochory', 'zoochorous',
23
+ 'anemochory', 'anemochorous', 'hydrochory', 'hydrochorous',
24
+ 'anthropochory', 'anthropochorous', 'hemerochor', 'hemerochorist',
25
+ 'helophyte', 'helophytes', 'hydrophyte', 'hydrophytes',
26
+ 'therophyte', 'therophytes', 'geophyte', 'geophytes',
27
+ 'chamaephyte', 'chamaephytes', 'phanerophyte', 'phanerophytes',
28
+
29
+ // Statistics
30
+ 'logit', 'logistic', 'probit', 'frequentist', 'bayesian',
31
+ 'overdispersion', 'underdispersion', 'heteroscedasticity', 'homoscedasticity',
32
+ 'multicollinearity', 'autocorrelation', 'covariate', 'covariates',
33
+ 'parameterization', 'parameterisation', 'reparameterization',
34
+ 'bootstrapping', 'resampling', 'imputation', 'interpolation',
35
+ 'standardized', 'standardised', 'normalized', 'normalised',
36
+ 'discretized', 'discretised', 'categorized', 'categorised',
37
+
38
+ // Compound words
39
+ 'overrepresentation', 'underrepresentation', 'overrepresented', 'underrepresented',
40
+ 'outcompete', 'outcompetes', 'outcompeted', 'outcompeting',
41
+ 'subdataset', 'subgroup', 'subgroups', 'subtype', 'subtypes',
42
+ 'dataset', 'datasets', 'datapoint', 'datapoints',
43
+ 'spatiotemporal', 'spatio', 'geospatial',
44
+ 'timestep', 'timesteps', 'timeframe', 'timeframes',
45
+ 'warmup', 'backend', 'frontend', 'workflow', 'workflows',
46
+ 'fallback', 'fallbacks', 'tradeoff', 'tradeoffs',
47
+
48
+ // Academic writing
49
+ 'interpretability', 'reproducibility', 'replicability',
50
+ 'hypothesise', 'hypothesised', 'hypothesize', 'hypothesized',
51
+ 'analyse', 'analysed', 'analyze', 'analyzed',
52
+ 'prioritise', 'prioritised', 'prioritize', 'prioritized',
53
+ 'characterise', 'characterised', 'characterize', 'characterized',
54
+ 'generalise', 'generalised', 'generalize', 'generalized',
55
+ 'parameterise', 'parameterised', 'parameterize', 'parameterized',
56
+ 'visualise', 'visualised', 'visualize', 'visualized',
57
+ 'modelling', 'modeling', 'modelled', 'modeled',
58
+
59
+ // Geography
60
+ 'unvegetated', 'landform', 'landforms', 'topographic', 'topography',
61
+ 'elevational', 'latitudinal', 'longitudinal', 'altitudinal',
62
+
63
+ // Technical
64
+ 'doi', 'dois', 'pdf', 'pdfs', 'csv', 'xlsx',
65
+ 'pandoc', 'markdown', 'bibtex', 'crossref',
66
+
67
+ // R packages and tools
68
+ 'brms', 'cmdstanr', 'rstanarm', 'lme', 'glmm', 'glmer', 'lmer',
69
+ 'ggplot', 'dplyr', 'tidyr', 'tidyverse', 'rmarkdown',
70
+
71
+ // Common in papers
72
+ 'foci', 'et', 'al', 'cf', 'eg', 'ie', 'vs',
73
+ ]);