docrev 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/grammar.js ADDED
@@ -0,0 +1,290 @@
1
+ /**
2
+ * Grammar checker module with custom dictionary support
3
+ *
4
+ * Features:
5
+ * - Common grammar/style issues detection
6
+ * - Custom dictionary for project-specific terms
7
+ * - Learn mode to add words to dictionary
8
+ */
9
+
10
+ import * as fs from 'fs';
11
+ import * as path from 'path';
12
+
13
+ // Default dictionary location
14
+ const DEFAULT_DICT_NAME = '.rev-dictionary';
15
+
16
+ /**
17
+ * Common grammar/style rules
18
+ */
19
+ const GRAMMAR_RULES = [
20
+ {
21
+ id: 'passive-voice',
22
+ pattern: /\b(is|are|was|were|be|been|being)\s+(being\s+)?\w+ed\b/gi,
23
+ message: 'Possible passive voice',
24
+ severity: 'info',
25
+ },
26
+ {
27
+ id: 'weasel-words',
28
+ pattern: /\b(very|really|quite|extremely|fairly|rather|somewhat|just)\b/gi,
29
+ message: 'Weasel word - consider removing or being more specific',
30
+ severity: 'warning',
31
+ },
32
+ {
33
+ id: 'weak-start',
34
+ pattern: /^\s*(There (is|are|was|were)|It is)\b/gmi,
35
+ message: 'Weak sentence start - consider restructuring',
36
+ severity: 'info',
37
+ },
38
+ {
39
+ id: 'duplicate-words',
40
+ pattern: /\b(\w+)\s+\1\b/gi,
41
+ message: 'Duplicate word',
42
+ severity: 'error',
43
+ },
44
+ {
45
+ id: 'split-infinitive',
46
+ pattern: /\bto\s+(\w+ly)\s+\w+\b/gi,
47
+ message: 'Split infinitive',
48
+ severity: 'info',
49
+ },
50
+ {
51
+ id: 'sentence-length',
52
+ pattern: /[^.!?]*[.!?]/g,
53
+ check: (match) => {
54
+ const words = match.trim().split(/\s+/).length;
55
+ return words > 40;
56
+ },
57
+ message: 'Long sentence (>40 words) - consider breaking up',
58
+ severity: 'warning',
59
+ },
60
+ {
61
+ id: 'cliches',
62
+ pattern: /\b(at the end of the day|in terms of|it goes without saying|needless to say|as a matter of fact|first and foremost|last but not least)\b/gi,
63
+ message: 'Cliche - consider rephrasing',
64
+ severity: 'warning',
65
+ },
66
+ {
67
+ id: 'hedging',
68
+ pattern: /\b(seems to|appears to|tends to|might|may|could possibly|would seem)\b/gi,
69
+ message: 'Hedging language - be more direct if appropriate',
70
+ severity: 'info',
71
+ },
72
+ {
73
+ id: 'redundancy',
74
+ pattern: /\b(basic fundamentals|end result|free gift|future plans|past history|completely unique|absolutely essential|close proximity|each and every|first began|true fact|advance planning|final outcome)\b/gi,
75
+ message: 'Redundant phrase',
76
+ severity: 'warning',
77
+ },
78
+ ];
79
+
80
+ /**
81
+ * Scientific writing specific rules
82
+ */
83
+ const SCIENTIFIC_RULES = [
84
+ {
85
+ id: 'first-person',
86
+ pattern: /\b(I|we|my|our)\b/gi,
87
+ message: 'First person pronoun - check if appropriate for your journal',
88
+ severity: 'info',
89
+ },
90
+ {
91
+ id: 'significant',
92
+ pattern: /\bsignificant(ly)?\b(?!\s+(P|p|α|difference|effect|increase|decrease|correlation))/gi,
93
+ message: '"Significant" without statistical context - clarify or use different word',
94
+ severity: 'warning',
95
+ },
96
+ {
97
+ id: 'prove',
98
+ pattern: /\b(prove[ds]?|proof)\b/gi,
99
+ message: 'Avoid "prove" in science - use "demonstrate", "show", "suggest"',
100
+ severity: 'warning',
101
+ },
102
+ {
103
+ id: 'obviously',
104
+ pattern: /\b(obviously|clearly|of course)\b/gi,
105
+ message: 'If obvious, no need to say so; if not obvious, this doesn\'t help',
106
+ severity: 'warning',
107
+ },
108
+ ];
109
+
110
+ /**
111
+ * Load custom dictionary from file
112
+ * @param {string} directory - Directory to search for dictionary
113
+ * @returns {Set<string>} Set of custom words
114
+ */
115
+ export function loadDictionary(directory = '.') {
116
+ const dictPath = path.join(directory, DEFAULT_DICT_NAME);
117
+ const words = new Set();
118
+
119
+ if (fs.existsSync(dictPath)) {
120
+ const content = fs.readFileSync(dictPath, 'utf-8');
121
+ const lines = content.split('\n');
122
+
123
+ for (const line of lines) {
124
+ const word = line.trim().toLowerCase();
125
+ if (word && !word.startsWith('#')) {
126
+ words.add(word);
127
+ }
128
+ }
129
+ }
130
+
131
+ return words;
132
+ }
133
+
134
+ /**
135
+ * Save custom dictionary to file
136
+ * @param {Set<string>} words - Set of words
137
+ * @param {string} directory - Directory to save dictionary
138
+ */
139
+ export function saveDictionary(words, directory = '.') {
140
+ const dictPath = path.join(directory, DEFAULT_DICT_NAME);
141
+
142
+ const header = `# Custom dictionary for docrev
143
+ # Add one word per line
144
+ # Lines starting with # are comments
145
+ `;
146
+
147
+ const content = header + [...words].sort().join('\n') + '\n';
148
+ fs.writeFileSync(dictPath, content, 'utf-8');
149
+ }
150
+
151
+ /**
152
+ * Add word to custom dictionary
153
+ * @param {string} word - Word to add
154
+ * @param {string} directory - Directory containing dictionary
155
+ * @returns {boolean} True if word was added (not already present)
156
+ */
157
+ export function addToDictionary(word, directory = '.') {
158
+ const words = loadDictionary(directory);
159
+ const normalizedWord = word.trim().toLowerCase();
160
+
161
+ if (words.has(normalizedWord)) {
162
+ return false;
163
+ }
164
+
165
+ words.add(normalizedWord);
166
+ saveDictionary(words, directory);
167
+ return true;
168
+ }
169
+
170
+ /**
171
+ * Remove word from custom dictionary
172
+ * @param {string} word - Word to remove
173
+ * @param {string} directory - Directory containing dictionary
174
+ * @returns {boolean} True if word was removed
175
+ */
176
+ export function removeFromDictionary(word, directory = '.') {
177
+ const words = loadDictionary(directory);
178
+ const normalizedWord = word.trim().toLowerCase();
179
+
180
+ if (!words.has(normalizedWord)) {
181
+ return false;
182
+ }
183
+
184
+ words.delete(normalizedWord);
185
+ saveDictionary(words, directory);
186
+ return true;
187
+ }
188
+
189
+ /**
190
+ * Check text for grammar/style issues
191
+ * @param {string} text - Text to check
192
+ * @param {object} options - Options
193
+ * @param {boolean} options.scientific - Include scientific writing rules
194
+ * @param {string} options.directory - Directory for custom dictionary
195
+ * @returns {Array<{rule: string, message: string, severity: string, line: number, match: string}>}
196
+ */
197
+ export function checkGrammar(text, options = {}) {
198
+ const { scientific = true, directory = '.' } = options;
199
+ const customDict = loadDictionary(directory);
200
+ const issues = [];
201
+
202
+ // Get all rules
203
+ const rules = scientific ? [...GRAMMAR_RULES, ...SCIENTIFIC_RULES] : GRAMMAR_RULES;
204
+
205
+ // Split into lines for line number tracking
206
+ const lines = text.split('\n');
207
+
208
+ for (let lineNum = 0; lineNum < lines.length; lineNum++) {
209
+ const line = lines[lineNum];
210
+
211
+ // Skip code blocks and YAML frontmatter
212
+ if (line.trim().startsWith('```') || line.trim().startsWith('---')) {
213
+ continue;
214
+ }
215
+
216
+ // Skip lines that are just markdown syntax
217
+ if (/^[#\-*>|]/.test(line.trim()) && line.trim().length < 5) {
218
+ continue;
219
+ }
220
+
221
+ for (const rule of rules) {
222
+ const pattern = new RegExp(rule.pattern.source, rule.pattern.flags);
223
+ let match;
224
+
225
+ while ((match = pattern.exec(line)) !== null) {
226
+ // Check if rule has additional check function
227
+ if (rule.check && !rule.check(match[0])) {
228
+ continue;
229
+ }
230
+
231
+ // Skip if word is in custom dictionary
232
+ const word = match[0].toLowerCase();
233
+ if (customDict.has(word)) {
234
+ continue;
235
+ }
236
+
237
+ issues.push({
238
+ rule: rule.id,
239
+ message: rule.message,
240
+ severity: rule.severity,
241
+ line: lineNum + 1,
242
+ column: match.index + 1,
243
+ match: match[0],
244
+ context: line.trim(),
245
+ });
246
+ }
247
+ }
248
+ }
249
+
250
+ return issues;
251
+ }
252
+
253
+ /**
254
+ * Get grammar check summary
255
+ * @param {Array} issues - List of issues from checkGrammar
256
+ * @returns {object} Summary stats
257
+ */
258
+ export function getGrammarSummary(issues) {
259
+ const summary = {
260
+ total: issues.length,
261
+ errors: 0,
262
+ warnings: 0,
263
+ info: 0,
264
+ byRule: {},
265
+ };
266
+
267
+ for (const issue of issues) {
268
+ if (issue.severity === 'error') summary.errors++;
269
+ else if (issue.severity === 'warning') summary.warnings++;
270
+ else summary.info++;
271
+
272
+ summary.byRule[issue.rule] = (summary.byRule[issue.rule] || 0) + 1;
273
+ }
274
+
275
+ return summary;
276
+ }
277
+
278
+ /**
279
+ * List available grammar rules
280
+ * @param {boolean} scientific - Include scientific rules
281
+ * @returns {Array<{id: string, message: string, severity: string}>}
282
+ */
283
+ export function listRules(scientific = true) {
284
+ const rules = scientific ? [...GRAMMAR_RULES, ...SCIENTIFIC_RULES] : GRAMMAR_RULES;
285
+ return rules.map(r => ({
286
+ id: r.id,
287
+ message: r.message,
288
+ severity: r.severity,
289
+ }));
290
+ }
package/lib/journals.js CHANGED
@@ -130,6 +130,191 @@ export const JOURNAL_PROFILES = {
130
130
  impactStatement: true,
131
131
  },
132
132
  },
133
+
134
+ 'cell': {
135
+ name: 'Cell',
136
+ url: 'https://www.cell.com/cell/authors',
137
+ requirements: {
138
+ wordLimit: { main: 7000, abstract: 150, title: null },
139
+ references: { max: 100, doiRequired: true },
140
+ figures: { max: 7, combinedWithTables: true },
141
+ sections: {
142
+ required: ['Abstract', 'Introduction', 'Results', 'Discussion'],
143
+ graphicalAbstract: true,
144
+ highlights: true,
145
+ },
146
+ authors: { maxInitial: null, correspondingRequired: true },
147
+ },
148
+ },
149
+
150
+ 'current-biology': {
151
+ name: 'Current Biology',
152
+ url: 'https://www.cell.com/current-biology/authors',
153
+ requirements: {
154
+ wordLimit: { main: 5000, abstract: 150, title: 150 },
155
+ references: { max: 60, doiRequired: true },
156
+ figures: { max: 4, combinedWithTables: true },
157
+ sections: {
158
+ required: ['Summary', 'Results', 'Discussion'],
159
+ },
160
+ authors: { maxInitial: null, correspondingRequired: true },
161
+ },
162
+ },
163
+
164
+ 'conservation-biology': {
165
+ name: 'Conservation Biology',
166
+ url: 'https://conbio.onlinelibrary.wiley.com/hub/journal/15231739/homepage/forauthors.html',
167
+ requirements: {
168
+ wordLimit: { main: 7000, abstract: 300, title: null },
169
+ references: { max: null, doiRequired: true },
170
+ figures: { max: 6, combinedWithTables: true },
171
+ sections: {
172
+ required: ['Abstract', 'Introduction', 'Methods', 'Results', 'Discussion'],
173
+ },
174
+ keywords: { min: 5, max: 10 },
175
+ },
176
+ },
177
+
178
+ 'biological-conservation': {
179
+ name: 'Biological Conservation',
180
+ url: 'https://www.elsevier.com/journals/biological-conservation/0006-3207/guide-for-authors',
181
+ requirements: {
182
+ wordLimit: { main: 8000, abstract: 400, title: null },
183
+ references: { max: null, doiRequired: true },
184
+ figures: { max: null, combinedWithTables: false },
185
+ sections: {
186
+ required: ['Abstract', 'Introduction', 'Methods', 'Results', 'Discussion'],
187
+ },
188
+ highlights: true,
189
+ keywords: { min: 4, max: 6 },
190
+ },
191
+ },
192
+
193
+ 'journal-of-ecology': {
194
+ name: 'Journal of Ecology',
195
+ url: 'https://besjournals.onlinelibrary.wiley.com/hub/journal/13652745/author-guidelines',
196
+ requirements: {
197
+ wordLimit: { main: 7000, abstract: 350, title: null },
198
+ references: { max: null, doiRequired: true },
199
+ figures: { max: null, combinedWithTables: false },
200
+ sections: {
201
+ required: ['Abstract', 'Introduction', 'Methods', 'Results', 'Discussion'],
202
+ },
203
+ keywords: { min: 4, max: 8 },
204
+ },
205
+ },
206
+
207
+ 'functional-ecology': {
208
+ name: 'Functional Ecology',
209
+ url: 'https://besjournals.onlinelibrary.wiley.com/hub/journal/13652435/author-guidelines',
210
+ requirements: {
211
+ wordLimit: { main: 7000, abstract: 350, title: null },
212
+ references: { max: null, doiRequired: true },
213
+ figures: { max: null, combinedWithTables: false },
214
+ sections: {
215
+ required: ['Abstract', 'Introduction', 'Methods', 'Results', 'Discussion'],
216
+ },
217
+ keywords: { min: 4, max: 8 },
218
+ },
219
+ },
220
+
221
+ 'global-change-biology': {
222
+ name: 'Global Change Biology',
223
+ url: 'https://onlinelibrary.wiley.com/page/journal/13652486/homepage/forauthors.html',
224
+ requirements: {
225
+ wordLimit: { main: 7000, abstract: 300, title: null },
226
+ references: { max: null, doiRequired: true },
227
+ figures: { max: 8, combinedWithTables: false },
228
+ sections: {
229
+ required: ['Abstract', 'Introduction', 'Methods', 'Results', 'Discussion'],
230
+ },
231
+ keywords: { min: 4, max: 8 },
232
+ },
233
+ },
234
+
235
+ 'oikos': {
236
+ name: 'Oikos',
237
+ url: 'https://nsojournals.onlinelibrary.wiley.com/hub/journal/16000706/author-guidelines',
238
+ requirements: {
239
+ wordLimit: { main: 8000, abstract: 350, title: null },
240
+ references: { max: null, doiRequired: true },
241
+ figures: { max: null, combinedWithTables: false },
242
+ sections: {
243
+ required: ['Abstract', 'Introduction', 'Methods', 'Results', 'Discussion'],
244
+ },
245
+ keywords: { min: 4, max: 10 },
246
+ },
247
+ },
248
+
249
+ 'oecologia': {
250
+ name: 'Oecologia',
251
+ url: 'https://www.springer.com/journal/442/submission-guidelines',
252
+ requirements: {
253
+ wordLimit: { main: 8000, abstract: 250, title: null },
254
+ references: { max: null, doiRequired: true },
255
+ figures: { max: null, combinedWithTables: false },
256
+ sections: {
257
+ required: ['Abstract', 'Introduction', 'Methods', 'Results', 'Discussion'],
258
+ },
259
+ keywords: { min: 4, max: 6 },
260
+ },
261
+ },
262
+
263
+ 'biological-invasions': {
264
+ name: 'Biological Invasions',
265
+ url: 'https://www.springer.com/journal/10530/submission-guidelines',
266
+ requirements: {
267
+ wordLimit: { main: null, abstract: 250, title: null },
268
+ references: { max: null, doiRequired: true },
269
+ figures: { max: null, combinedWithTables: false },
270
+ sections: {
271
+ required: ['Abstract', 'Introduction', 'Methods', 'Results', 'Discussion'],
272
+ },
273
+ keywords: { min: 4, max: 6 },
274
+ },
275
+ },
276
+
277
+ 'diversity-distributions': {
278
+ name: 'Diversity and Distributions',
279
+ url: 'https://onlinelibrary.wiley.com/page/journal/14724642/homepage/forauthors.html',
280
+ requirements: {
281
+ wordLimit: { main: 6000, abstract: 300, title: null },
282
+ references: { max: null, doiRequired: true },
283
+ figures: { max: 6, combinedWithTables: true },
284
+ sections: {
285
+ required: ['Abstract', 'Introduction', 'Methods', 'Results', 'Discussion'],
286
+ },
287
+ keywords: { min: 4, max: 8 },
288
+ biosketch: true,
289
+ },
290
+ },
291
+
292
+ 'neobiota': {
293
+ name: 'NeoBiota',
294
+ url: 'https://neobiota.pensoft.net/about#Author_Guidelines',
295
+ requirements: {
296
+ wordLimit: { main: null, abstract: 350, title: null },
297
+ references: { max: null, doiRequired: true },
298
+ figures: { max: null, combinedWithTables: false },
299
+ sections: {
300
+ required: ['Abstract', 'Introduction', 'Methods', 'Results', 'Discussion'],
301
+ },
302
+ keywords: { min: 6, max: 12 },
303
+ },
304
+ },
305
+
306
+ 'peerj': {
307
+ name: 'PeerJ',
308
+ url: 'https://peerj.com/about/author-instructions/',
309
+ requirements: {
310
+ wordLimit: { main: null, abstract: 500, title: null },
311
+ references: { max: null, doiRequired: false },
312
+ figures: { max: null, combinedWithTables: false },
313
+ sections: {
314
+ required: ['Abstract', 'Introduction', 'Methods', 'Results', 'Discussion'],
315
+ },
316
+ },
317
+ },
133
318
  };
134
319
 
135
320
  /**
package/lib/merge.js CHANGED
@@ -6,7 +6,7 @@
6
6
  import * as fs from 'fs';
7
7
  import * as path from 'path';
8
8
  import { diffWords } from 'diff';
9
- import { extractTextFromWord, extractCommentsFromWord } from './import.js';
9
+ import { extractFromWord, extractWordComments } from './import.js';
10
10
 
11
11
  /**
12
12
  * Represents a change from a reviewer
@@ -304,13 +304,13 @@ export async function mergeReviewerDocs(originalPath, reviewerDocs, options = {}
304
304
  throw new Error(`Reviewer file not found: ${doc.path}`);
305
305
  }
306
306
 
307
- const wordText = await extractTextFromWord(doc.path);
307
+ const { text: wordText } = await extractFromWord(doc.path);
308
308
  const changes = extractChanges(originalText, wordText, doc.name);
309
309
  allChanges.push(changes);
310
310
 
311
311
  // Also extract comments
312
312
  try {
313
- const comments = await extractCommentsFromWord(doc.path);
313
+ const comments = await extractWordComments(doc.path);
314
314
  allComments.push(...comments.map(c => ({ ...c, reviewer: doc.name })));
315
315
  } catch {
316
316
  // Comments extraction failed, continue without
@@ -0,0 +1,73 @@
1
+ /**
2
+ * Common scientific and academic words not in standard dictionaries
3
+ * These words are accepted by Word's spellchecker
4
+ */
5
+
6
+ export const scientificWords = new Set([
7
+ // Biology/Ecology
8
+ 'abiotic', 'biotic', 'biogeographic', 'biogeography', 'phenotypic', 'phenotype',
9
+ 'anthropogenic', 'propagule', 'propagules', 'herbivory', 'herbivore', 'herbivores',
10
+ 'ruderal', 'ruderals', 'refugia', 'refugium', 'hotspot', 'hotspots',
11
+ 'biodiversity', 'ecosystem', 'ecosystems', 'ecotype', 'ecotypes',
12
+ 'taxonomic', 'phylogenetic', 'phylogeny', 'morphological', 'morphology',
13
+ 'allometric', 'allometry', 'biomass', 'biome', 'biomes',
14
+ 'invasibility', 'invasive', 'invasives', 'neophyte', 'neophytes',
15
+ 'archaeophyte', 'archaeophytes', 'naturalisation', 'naturalization',
16
+ 'colonisation', 'colonization', 'dispersal', 'fecundity',
17
+ 'phenology', 'phenological', 'ontogeny', 'ontogenetic',
18
+ 'mesic', 'xeric', 'hydric', 'riparian', 'riverine',
19
+ 'subalpine', 'alpine', 'boreal', 'temperate', 'tropical',
20
+ 'heathland', 'heathlands', 'scrubland', 'scrublands', 'grassland', 'grasslands',
21
+ 'broadleaf', 'broadleaved', 'coniferous', 'deciduous', 'evergreen',
22
+ 'autochory', 'autochorous', 'zoochory', 'zoochorous',
23
+ 'anemochory', 'anemochorous', 'hydrochory', 'hydrochorous',
24
+ 'anthropochory', 'anthropochorous', 'hemerochor', 'hemerochorist',
25
+ 'helophyte', 'helophytes', 'hydrophyte', 'hydrophytes',
26
+ 'therophyte', 'therophytes', 'geophyte', 'geophytes',
27
+ 'chamaephyte', 'chamaephytes', 'phanerophyte', 'phanerophytes',
28
+
29
+ // Statistics
30
+ 'logit', 'logistic', 'probit', 'frequentist', 'bayesian',
31
+ 'overdispersion', 'underdispersion', 'heteroscedasticity', 'homoscedasticity',
32
+ 'multicollinearity', 'autocorrelation', 'covariate', 'covariates',
33
+ 'parameterization', 'parameterisation', 'reparameterization',
34
+ 'bootstrapping', 'resampling', 'imputation', 'interpolation',
35
+ 'standardized', 'standardised', 'normalized', 'normalised',
36
+ 'discretized', 'discretised', 'categorized', 'categorised',
37
+
38
+ // Compound words
39
+ 'overrepresentation', 'underrepresentation', 'overrepresented', 'underrepresented',
40
+ 'outcompete', 'outcompetes', 'outcompeted', 'outcompeting',
41
+ 'subdataset', 'subgroup', 'subgroups', 'subtype', 'subtypes',
42
+ 'dataset', 'datasets', 'datapoint', 'datapoints',
43
+ 'spatiotemporal', 'spatio', 'geospatial',
44
+ 'timestep', 'timesteps', 'timeframe', 'timeframes',
45
+ 'warmup', 'backend', 'frontend', 'workflow', 'workflows',
46
+ 'fallback', 'fallbacks', 'tradeoff', 'tradeoffs',
47
+
48
+ // Academic writing
49
+ 'interpretability', 'reproducibility', 'replicability',
50
+ 'hypothesise', 'hypothesised', 'hypothesize', 'hypothesized',
51
+ 'analyse', 'analysed', 'analyze', 'analyzed',
52
+ 'prioritise', 'prioritised', 'prioritize', 'prioritized',
53
+ 'characterise', 'characterised', 'characterize', 'characterized',
54
+ 'generalise', 'generalised', 'generalize', 'generalized',
55
+ 'parameterise', 'parameterised', 'parameterize', 'parameterized',
56
+ 'visualise', 'visualised', 'visualize', 'visualized',
57
+ 'modelling', 'modeling', 'modelled', 'modeled',
58
+
59
+ // Geography
60
+ 'unvegetated', 'landform', 'landforms', 'topographic', 'topography',
61
+ 'elevational', 'latitudinal', 'longitudinal', 'altitudinal',
62
+
63
+ // Technical
64
+ 'doi', 'dois', 'pdf', 'pdfs', 'csv', 'xlsx',
65
+ 'pandoc', 'markdown', 'bibtex', 'crossref',
66
+
67
+ // R packages and tools
68
+ 'brms', 'cmdstanr', 'rstanarm', 'lme', 'glmm', 'glmer', 'lmer',
69
+ 'ggplot', 'dplyr', 'tidyr', 'tidyverse', 'rmarkdown',
70
+
71
+ // Common in papers
72
+ 'foci', 'et', 'al', 'cf', 'eg', 'ie', 'vs',
73
+ ]);