docrev 0.2.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/grammar.js ADDED
@@ -0,0 +1,290 @@
1
+ /**
2
+ * Grammar checker module with custom dictionary support
3
+ *
4
+ * Features:
5
+ * - Common grammar/style issues detection
6
+ * - Custom dictionary for project-specific terms
7
+ * - Learn mode to add words to dictionary
8
+ */
9
+
10
+ import * as fs from 'fs';
11
+ import * as path from 'path';
12
+
13
+ // Default dictionary location
14
+ const DEFAULT_DICT_NAME = '.rev-dictionary';
15
+
16
+ /**
17
+ * Common grammar/style rules
18
+ */
19
+ const GRAMMAR_RULES = [
20
+ {
21
+ id: 'passive-voice',
22
+ pattern: /\b(is|are|was|were|be|been|being)\s+(being\s+)?\w+ed\b/gi,
23
+ message: 'Possible passive voice',
24
+ severity: 'info',
25
+ },
26
+ {
27
+ id: 'weasel-words',
28
+ pattern: /\b(very|really|quite|extremely|fairly|rather|somewhat|just)\b/gi,
29
+ message: 'Weasel word - consider removing or being more specific',
30
+ severity: 'warning',
31
+ },
32
+ {
33
+ id: 'weak-start',
34
+ pattern: /^\s*(There (is|are|was|were)|It is)\b/gmi,
35
+ message: 'Weak sentence start - consider restructuring',
36
+ severity: 'info',
37
+ },
38
+ {
39
+ id: 'duplicate-words',
40
+ pattern: /\b(\w+)\s+\1\b/gi,
41
+ message: 'Duplicate word',
42
+ severity: 'error',
43
+ },
44
+ {
45
+ id: 'split-infinitive',
46
+ pattern: /\bto\s+(\w+ly)\s+\w+\b/gi,
47
+ message: 'Split infinitive',
48
+ severity: 'info',
49
+ },
50
+ {
51
+ id: 'sentence-length',
52
+ pattern: /[^.!?]*[.!?]/g,
53
+ check: (match) => {
54
+ const words = match.trim().split(/\s+/).length;
55
+ return words > 40;
56
+ },
57
+ message: 'Long sentence (>40 words) - consider breaking up',
58
+ severity: 'warning',
59
+ },
60
+ {
61
+ id: 'cliches',
62
+ pattern: /\b(at the end of the day|in terms of|it goes without saying|needless to say|as a matter of fact|first and foremost|last but not least)\b/gi,
63
+ message: 'Cliche - consider rephrasing',
64
+ severity: 'warning',
65
+ },
66
+ {
67
+ id: 'hedging',
68
+ pattern: /\b(seems to|appears to|tends to|might|may|could possibly|would seem)\b/gi,
69
+ message: 'Hedging language - be more direct if appropriate',
70
+ severity: 'info',
71
+ },
72
+ {
73
+ id: 'redundancy',
74
+ pattern: /\b(basic fundamentals|end result|free gift|future plans|past history|completely unique|absolutely essential|close proximity|each and every|first began|true fact|advance planning|final outcome)\b/gi,
75
+ message: 'Redundant phrase',
76
+ severity: 'warning',
77
+ },
78
+ ];
79
+
80
+ /**
81
+ * Scientific writing specific rules
82
+ */
83
+ const SCIENTIFIC_RULES = [
84
+ {
85
+ id: 'first-person',
86
+ pattern: /\b(I|we|my|our)\b/gi,
87
+ message: 'First person pronoun - check if appropriate for your journal',
88
+ severity: 'info',
89
+ },
90
+ {
91
+ id: 'significant',
92
+ pattern: /\bsignificant(ly)?\b(?!\s+(P|p|α|difference|effect|increase|decrease|correlation))/gi,
93
+ message: '"Significant" without statistical context - clarify or use different word',
94
+ severity: 'warning',
95
+ },
96
+ {
97
+ id: 'prove',
98
+ pattern: /\b(prove[ds]?|proof)\b/gi,
99
+ message: 'Avoid "prove" in science - use "demonstrate", "show", "suggest"',
100
+ severity: 'warning',
101
+ },
102
+ {
103
+ id: 'obviously',
104
+ pattern: /\b(obviously|clearly|of course)\b/gi,
105
+ message: 'If obvious, no need to say so; if not obvious, this doesn\'t help',
106
+ severity: 'warning',
107
+ },
108
+ ];
109
+
110
+ /**
111
+ * Load custom dictionary from file
112
+ * @param {string} directory - Directory to search for dictionary
113
+ * @returns {Set<string>} Set of custom words
114
+ */
115
+ export function loadDictionary(directory = '.') {
116
+ const dictPath = path.join(directory, DEFAULT_DICT_NAME);
117
+ const words = new Set();
118
+
119
+ if (fs.existsSync(dictPath)) {
120
+ const content = fs.readFileSync(dictPath, 'utf-8');
121
+ const lines = content.split('\n');
122
+
123
+ for (const line of lines) {
124
+ const word = line.trim().toLowerCase();
125
+ if (word && !word.startsWith('#')) {
126
+ words.add(word);
127
+ }
128
+ }
129
+ }
130
+
131
+ return words;
132
+ }
133
+
134
+ /**
135
+ * Save custom dictionary to file
136
+ * @param {Set<string>} words - Set of words
137
+ * @param {string} directory - Directory to save dictionary
138
+ */
139
+ export function saveDictionary(words, directory = '.') {
140
+ const dictPath = path.join(directory, DEFAULT_DICT_NAME);
141
+
142
+ const header = `# Custom dictionary for docrev
143
+ # Add one word per line
144
+ # Lines starting with # are comments
145
+ `;
146
+
147
+ const content = header + [...words].sort().join('\n') + '\n';
148
+ fs.writeFileSync(dictPath, content, 'utf-8');
149
+ }
150
+
151
+ /**
152
+ * Add word to custom dictionary
153
+ * @param {string} word - Word to add
154
+ * @param {string} directory - Directory containing dictionary
155
+ * @returns {boolean} True if word was added (not already present)
156
+ */
157
+ export function addToDictionary(word, directory = '.') {
158
+ const words = loadDictionary(directory);
159
+ const normalizedWord = word.trim().toLowerCase();
160
+
161
+ if (words.has(normalizedWord)) {
162
+ return false;
163
+ }
164
+
165
+ words.add(normalizedWord);
166
+ saveDictionary(words, directory);
167
+ return true;
168
+ }
169
+
170
+ /**
171
+ * Remove word from custom dictionary
172
+ * @param {string} word - Word to remove
173
+ * @param {string} directory - Directory containing dictionary
174
+ * @returns {boolean} True if word was removed
175
+ */
176
+ export function removeFromDictionary(word, directory = '.') {
177
+ const words = loadDictionary(directory);
178
+ const normalizedWord = word.trim().toLowerCase();
179
+
180
+ if (!words.has(normalizedWord)) {
181
+ return false;
182
+ }
183
+
184
+ words.delete(normalizedWord);
185
+ saveDictionary(words, directory);
186
+ return true;
187
+ }
188
+
189
+ /**
190
+ * Check text for grammar/style issues
191
+ * @param {string} text - Text to check
192
+ * @param {object} options - Options
193
+ * @param {boolean} options.scientific - Include scientific writing rules
194
+ * @param {string} options.directory - Directory for custom dictionary
195
+ * @returns {Array<{rule: string, message: string, severity: string, line: number, match: string}>}
196
+ */
197
+ export function checkGrammar(text, options = {}) {
198
+ const { scientific = true, directory = '.' } = options;
199
+ const customDict = loadDictionary(directory);
200
+ const issues = [];
201
+
202
+ // Get all rules
203
+ const rules = scientific ? [...GRAMMAR_RULES, ...SCIENTIFIC_RULES] : GRAMMAR_RULES;
204
+
205
+ // Split into lines for line number tracking
206
+ const lines = text.split('\n');
207
+
208
+ for (let lineNum = 0; lineNum < lines.length; lineNum++) {
209
+ const line = lines[lineNum];
210
+
211
+ // Skip code blocks and YAML frontmatter
212
+ if (line.trim().startsWith('```') || line.trim().startsWith('---')) {
213
+ continue;
214
+ }
215
+
216
+ // Skip lines that are just markdown syntax
217
+ if (/^[#\-*>|]/.test(line.trim()) && line.trim().length < 5) {
218
+ continue;
219
+ }
220
+
221
+ for (const rule of rules) {
222
+ const pattern = new RegExp(rule.pattern.source, rule.pattern.flags);
223
+ let match;
224
+
225
+ while ((match = pattern.exec(line)) !== null) {
226
+ // Check if rule has additional check function
227
+ if (rule.check && !rule.check(match[0])) {
228
+ continue;
229
+ }
230
+
231
+ // Skip if word is in custom dictionary
232
+ const word = match[0].toLowerCase();
233
+ if (customDict.has(word)) {
234
+ continue;
235
+ }
236
+
237
+ issues.push({
238
+ rule: rule.id,
239
+ message: rule.message,
240
+ severity: rule.severity,
241
+ line: lineNum + 1,
242
+ column: match.index + 1,
243
+ match: match[0],
244
+ context: line.trim(),
245
+ });
246
+ }
247
+ }
248
+ }
249
+
250
+ return issues;
251
+ }
252
+
253
+ /**
254
+ * Get grammar check summary
255
+ * @param {Array} issues - List of issues from checkGrammar
256
+ * @returns {object} Summary stats
257
+ */
258
+ export function getGrammarSummary(issues) {
259
+ const summary = {
260
+ total: issues.length,
261
+ errors: 0,
262
+ warnings: 0,
263
+ info: 0,
264
+ byRule: {},
265
+ };
266
+
267
+ for (const issue of issues) {
268
+ if (issue.severity === 'error') summary.errors++;
269
+ else if (issue.severity === 'warning') summary.warnings++;
270
+ else summary.info++;
271
+
272
+ summary.byRule[issue.rule] = (summary.byRule[issue.rule] || 0) + 1;
273
+ }
274
+
275
+ return summary;
276
+ }
277
+
278
+ /**
279
+ * List available grammar rules
280
+ * @param {boolean} scientific - Include scientific rules
281
+ * @returns {Array<{id: string, message: string, severity: string}>}
282
+ */
283
+ export function listRules(scientific = true) {
284
+ const rules = scientific ? [...GRAMMAR_RULES, ...SCIENTIFIC_RULES] : GRAMMAR_RULES;
285
+ return rules.map(r => ({
286
+ id: r.id,
287
+ message: r.message,
288
+ severity: r.severity,
289
+ }));
290
+ }