docrev 0.6.13 → 0.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/config.js CHANGED
@@ -58,3 +58,22 @@ export function setUserName(name) {
58
58
  export function getConfigPath() {
59
59
  return CONFIG_PATH;
60
60
  }
61
+
62
+ /**
63
+ * Get default sections for new projects
64
+ * @returns {string[]|null}
65
+ */
66
+ export function getDefaultSections() {
67
+ const config = loadUserConfig();
68
+ return config.defaultSections || null;
69
+ }
70
+
71
+ /**
72
+ * Set default sections for new projects
73
+ * @param {string[]} sections - Array of section names (without .md extension)
74
+ */
75
+ export function setDefaultSections(sections) {
76
+ const config = loadUserConfig();
77
+ config.defaultSections = sections;
78
+ saveUserConfig(config);
79
+ }
package/lib/crossref.js CHANGED
@@ -9,6 +9,7 @@
9
9
 
10
10
  import * as fs from 'fs';
11
11
  import * as path from 'path';
12
+ import YAML from 'yaml';
12
13
 
13
14
  /**
14
15
  * Patterns for detecting hardcoded references
@@ -27,6 +28,8 @@ const DETECTION_PATTERNS = {
27
28
  // Matches: "1", "1a", "1-3", "1a-c", "1, 2, 3", "1 and 2", "1, 2 and 3", "1, 2, and 3"
28
29
  // Separator: comma/dash/ampersand, optionally followed by "and"
29
30
  // Standalone letters must be followed by separator, punctuation, or word boundary
31
+ // Also handles: "see Figure 1", "(Fig. 1)", "in Figures 1–3"
32
+ // Note: 'gi' flag makes these case-insensitive, so "figure 1" is also matched
30
33
  figure: /\b(Figures?|Figs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
31
34
 
32
35
  table: /\b(Tables?|Tabs?\.?)\s+((?:\d+|S\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+|S\d+)[a-z]?|[a-z]\b))*)/gi,
@@ -34,6 +37,19 @@ const DETECTION_PATTERNS = {
34
37
  equation: /\b(Equations?|Eqs?\.?)\s+((?:\d+)[a-z]?(?:(?:\s*[-–—,&]\s*(?:and\s+)?|\s+and\s+)(?:(?:\d+)[a-z]?|[a-z]\b))*)/gi,
35
38
  };
36
39
 
40
+ /**
41
+ * Patterns to EXCLUDE from detection (false positives)
42
+ * These look like references but aren't (e.g., "Table of Contents", "Figure skating")
43
+ */
44
+ const EXCLUSION_PATTERNS = [
45
+ /\bTable\s+of\s+Contents?\b/gi,
46
+ /\bFigure\s+skating\b/gi,
47
+ /\bFigure\s+out\b/gi,
48
+ /\bFigure\s+it\b/gi,
49
+ /\bTable\s+setting/gi,
50
+ /\bEquation\s+editor\b/gi,
51
+ ];
52
+
37
53
  /**
38
54
  * Pattern for extracting anchors from markdown: {#fig:label}, {#tbl:label}
39
55
  */
@@ -225,8 +241,7 @@ export function buildRegistry(directory, excludeFiles = ['paper.md', 'README.md'
225
241
  const sectionsPath = path.join(directory, 'sections.yaml');
226
242
  if (fs.existsSync(sectionsPath)) {
227
243
  try {
228
- const yaml = require('js-yaml');
229
- const config = yaml.load(fs.readFileSync(sectionsPath, 'utf-8'));
244
+ const config = YAML.parse(fs.readFileSync(sectionsPath, 'utf-8'));
230
245
  if (config.sections) {
231
246
  const sectionOrder = Object.entries(config.sections)
232
247
  .sort((a, b) => (a[1].order ?? 999) - (b[1].order ?? 999))
package/lib/doi.js CHANGED
@@ -4,6 +4,201 @@
4
4
  */
5
5
 
6
6
  import * as fs from 'fs';
7
+ import * as path from 'path';
8
+ import * as os from 'os';
9
+
10
+ // ============================================================================
11
+ // Rate Limiter - Prevents API abuse with exponential backoff
12
+ // ============================================================================
13
+
14
+ class RateLimiter {
15
+ constructor(options = {}) {
16
+ this.minDelay = options.minDelay || 100; // Min delay between requests (ms)
17
+ this.maxDelay = options.maxDelay || 30000; // Max delay after backoff (ms)
18
+ this.maxRetries = options.maxRetries || 3; // Max retry attempts
19
+ this.backoffFactor = options.backoffFactor || 2;
20
+ this.lastRequestTime = 0;
21
+ this.currentDelay = this.minDelay;
22
+ this.consecutiveErrors = 0;
23
+ }
24
+
25
+ async wait() {
26
+ const now = Date.now();
27
+ const elapsed = now - this.lastRequestTime;
28
+ if (elapsed < this.currentDelay) {
29
+ await new Promise(r => setTimeout(r, this.currentDelay - elapsed));
30
+ }
31
+ this.lastRequestTime = Date.now();
32
+ }
33
+
34
+ onSuccess() {
35
+ // Gradually reduce delay on success
36
+ this.consecutiveErrors = 0;
37
+ this.currentDelay = Math.max(this.minDelay, this.currentDelay / this.backoffFactor);
38
+ }
39
+
40
+ onError(statusCode) {
41
+ this.consecutiveErrors++;
42
+ // Exponential backoff
43
+ if (statusCode === 429 || statusCode >= 500) {
44
+ this.currentDelay = Math.min(this.maxDelay, this.currentDelay * this.backoffFactor);
45
+ }
46
+ return this.consecutiveErrors <= this.maxRetries;
47
+ }
48
+
49
+ async fetchWithRetry(url, options = {}) {
50
+ let lastError;
51
+
52
+ for (let attempt = 0; attempt <= this.maxRetries; attempt++) {
53
+ await this.wait();
54
+
55
+ try {
56
+ const response = await fetch(url, options);
57
+
58
+ if (response.status === 429) {
59
+ // Rate limited - back off
60
+ const retryAfter = response.headers.get('Retry-After');
61
+ const delay = retryAfter ? parseInt(retryAfter, 10) * 1000 : this.currentDelay * 2;
62
+ this.currentDelay = Math.min(this.maxDelay, delay);
63
+ if (!this.onError(429)) break;
64
+ continue;
65
+ }
66
+
67
+ if (response.status >= 500 && attempt < this.maxRetries) {
68
+ // Server error - retry with backoff
69
+ if (!this.onError(response.status)) break;
70
+ continue;
71
+ }
72
+
73
+ this.onSuccess();
74
+ return response;
75
+ } catch (err) {
76
+ lastError = err;
77
+ if (!this.onError(0)) break;
78
+ }
79
+ }
80
+
81
+ throw lastError || new Error('Max retries exceeded');
82
+ }
83
+ }
84
+
85
+ // Shared rate limiters for different APIs
86
+ const crossrefLimiter = new RateLimiter({ minDelay: 100, maxDelay: 10000 });
87
+ const dataciteLimiter = new RateLimiter({ minDelay: 100, maxDelay: 10000 });
88
+ const doiOrgLimiter = new RateLimiter({ minDelay: 200, maxDelay: 15000 });
89
+
90
+ // ============================================================================
91
+ // DOI Cache - Reduces API calls for repeated lookups
92
+ // ============================================================================
93
+
94
+ const CACHE_FILE = path.join(os.homedir(), '.rev-doi-cache.json');
95
+ const CACHE_TTL = 7 * 24 * 60 * 60 * 1000; // 7 days in ms
96
+
97
+ let doiCache = null;
98
+
99
+ /**
100
+ * Load DOI cache from disk
101
+ * @returns {object}
102
+ */
103
+ function loadCache() {
104
+ if (doiCache !== null) return doiCache;
105
+
106
+ try {
107
+ if (fs.existsSync(CACHE_FILE)) {
108
+ const data = JSON.parse(fs.readFileSync(CACHE_FILE, 'utf-8'));
109
+ doiCache = data;
110
+ return doiCache;
111
+ }
112
+ } catch {
113
+ // Ignore cache errors
114
+ }
115
+
116
+ doiCache = { entries: {}, version: 1 };
117
+ return doiCache;
118
+ }
119
+
120
+ /**
121
+ * Save DOI cache to disk
122
+ */
123
+ function saveCache() {
124
+ if (!doiCache) return;
125
+
126
+ try {
127
+ fs.writeFileSync(CACHE_FILE, JSON.stringify(doiCache, null, 2), 'utf-8');
128
+ } catch {
129
+ // Ignore cache write errors
130
+ }
131
+ }
132
+
133
+ /**
134
+ * Get cached DOI result
135
+ * @param {string} doi
136
+ * @returns {object|null}
137
+ */
138
+ function getCachedDoi(doi) {
139
+ const cache = loadCache();
140
+ const entry = cache.entries[doi];
141
+
142
+ if (!entry) return null;
143
+
144
+ // Check if cache entry is expired
145
+ if (Date.now() - entry.timestamp > CACHE_TTL) {
146
+ delete cache.entries[doi];
147
+ return null;
148
+ }
149
+
150
+ return entry.result;
151
+ }
152
+
153
+ /**
154
+ * Cache a DOI result
155
+ * @param {string} doi
156
+ * @param {object} result
157
+ */
158
+ function cacheDoi(doi, result) {
159
+ const cache = loadCache();
160
+ cache.entries[doi] = {
161
+ result,
162
+ timestamp: Date.now(),
163
+ };
164
+
165
+ // Limit cache size - remove oldest entries if over 1000
166
+ const entries = Object.entries(cache.entries);
167
+ if (entries.length > 1000) {
168
+ entries
169
+ .sort((a, b) => a[1].timestamp - b[1].timestamp)
170
+ .slice(0, entries.length - 800)
171
+ .forEach(([key]) => delete cache.entries[key]);
172
+ }
173
+
174
+ saveCache();
175
+ }
176
+
177
+ /**
178
+ * Clear the DOI cache
179
+ */
180
+ export function clearDoiCache() {
181
+ doiCache = { entries: {}, version: 1 };
182
+ try {
183
+ if (fs.existsSync(CACHE_FILE)) {
184
+ fs.unlinkSync(CACHE_FILE);
185
+ }
186
+ } catch {
187
+ // Ignore
188
+ }
189
+ }
190
+
191
+ /**
192
+ * Get DOI cache statistics
193
+ * @returns {{ size: number, path: string }}
194
+ */
195
+ export function getDoiCacheStats() {
196
+ const cache = loadCache();
197
+ return {
198
+ size: Object.keys(cache.entries).length,
199
+ path: CACHE_FILE,
200
+ };
201
+ }
7
202
 
8
203
  // Entry types that typically don't have DOIs
9
204
  const NO_DOI_TYPES = new Set([
@@ -150,12 +345,15 @@ export function isValidDoiFormat(doi) {
150
345
  */
151
346
  async function checkDoiDataCite(doi) {
152
347
  try {
153
- const response = await fetch(`https://api.datacite.org/dois/${encodeURIComponent(doi)}`, {
154
- headers: {
155
- 'Accept': 'application/vnd.api+json',
156
- 'User-Agent': 'rev-cli/0.2.0',
157
- },
158
- });
348
+ const response = await dataciteLimiter.fetchWithRetry(
349
+ `https://api.datacite.org/dois/${encodeURIComponent(doi)}`,
350
+ {
351
+ headers: {
352
+ 'Accept': 'application/vnd.api+json',
353
+ 'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev)',
354
+ },
355
+ }
356
+ );
159
357
 
160
358
  if (response.status === 404) {
161
359
  return { valid: false, error: 'DOI not found in DataCite' };
@@ -190,14 +388,25 @@ async function checkDoiDataCite(doi) {
190
388
 
191
389
  /**
192
390
  * Check if DOI resolves (exists) - tries Crossref first, then DataCite
391
+ * Results are cached for 7 days to reduce API calls.
193
392
  * @param {string} doi
194
- * @returns {Promise<{valid: boolean, source?: string, metadata?: object, error?: string}>}
393
+ * @param {object} options
394
+ * @param {boolean} options.skipCache - Skip cache lookup
395
+ * @returns {Promise<{valid: boolean, source?: string, metadata?: object, error?: string, cached?: boolean}>}
195
396
  */
196
- export async function checkDoi(doi) {
397
+ export async function checkDoi(doi, options = {}) {
197
398
  if (!isValidDoiFormat(doi)) {
198
399
  return { valid: false, error: 'Invalid DOI format' };
199
400
  }
200
401
 
402
+ // Check cache first (unless skipped)
403
+ if (!options.skipCache) {
404
+ const cached = getCachedDoi(doi);
405
+ if (cached) {
406
+ return { ...cached, cached: true };
407
+ }
408
+ }
409
+
201
410
  // Zenodo DOIs start with 10.5281 - check DataCite first
202
411
  const isZenodo = doi.startsWith('10.5281/');
203
412
  const isFigshare = doi.startsWith('10.6084/');
@@ -206,37 +415,45 @@ export async function checkDoi(doi) {
206
415
  if (isDataCiteLikely) {
207
416
  const dataciteResult = await checkDoiDataCite(doi);
208
417
  if (dataciteResult.valid) {
418
+ cacheDoi(doi, dataciteResult);
209
419
  return dataciteResult;
210
420
  }
211
421
  }
212
422
 
213
423
  try {
214
424
  // Use Crossref API to check DOI
215
- const response = await fetch(`https://api.crossref.org/works/${encodeURIComponent(doi)}`, {
216
- headers: {
217
- 'User-Agent': 'rev-cli/0.2.0 (mailto:dev@example.com)',
218
- },
219
- });
425
+ const response = await crossrefLimiter.fetchWithRetry(
426
+ `https://api.crossref.org/works/${encodeURIComponent(doi)}`,
427
+ {
428
+ headers: {
429
+ 'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev; mailto:docrev@example.com)',
430
+ },
431
+ }
432
+ );
220
433
 
221
434
  if (response.status === 404) {
222
435
  // Try DataCite as fallback (if not already tried)
223
436
  if (!isDataCiteLikely) {
224
437
  const dataciteResult = await checkDoiDataCite(doi);
225
438
  if (dataciteResult.valid) {
439
+ cacheDoi(doi, dataciteResult);
226
440
  return dataciteResult;
227
441
  }
228
442
  }
229
- return { valid: false, error: 'DOI not found' };
443
+ const result = { valid: false, error: 'DOI not found' };
444
+ cacheDoi(doi, result);
445
+ return result;
230
446
  }
231
447
 
232
448
  if (!response.ok) {
449
+ // Don't cache transient errors
233
450
  return { valid: false, error: `HTTP ${response.status}` };
234
451
  }
235
452
 
236
453
  const data = await response.json();
237
454
  const work = data.message;
238
455
 
239
- return {
456
+ const result = {
240
457
  valid: true,
241
458
  source: 'crossref',
242
459
  metadata: {
@@ -247,7 +464,11 @@ export async function checkDoi(doi) {
247
464
  type: work.type,
248
465
  },
249
466
  };
467
+
468
+ cacheDoi(doi, result);
469
+ return result;
250
470
  } catch (err) {
471
+ // Don't cache network errors
251
472
  return { valid: false, error: err.message };
252
473
  }
253
474
  }
@@ -266,13 +487,16 @@ export async function fetchBibtex(doi) {
266
487
  }
267
488
 
268
489
  try {
269
- const response = await fetch(`https://doi.org/${encodeURIComponent(doi)}`, {
270
- headers: {
271
- 'Accept': 'application/x-bibtex',
272
- 'User-Agent': 'rev-cli/0.2.0',
273
- },
274
- redirect: 'follow',
275
- });
490
+ const response = await doiOrgLimiter.fetchWithRetry(
491
+ `https://doi.org/${encodeURIComponent(doi)}`,
492
+ {
493
+ headers: {
494
+ 'Accept': 'application/x-bibtex',
495
+ 'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev)',
496
+ },
497
+ redirect: 'follow',
498
+ }
499
+ );
276
500
 
277
501
  if (!response.ok) {
278
502
  return { success: false, error: `HTTP ${response.status}` };
@@ -387,12 +611,15 @@ async function searchDataCite(title, author = '', year = null) {
387
611
  'page[size]': '5',
388
612
  });
389
613
 
390
- const response = await fetch(`https://api.datacite.org/dois?${params}`, {
391
- headers: {
392
- 'Accept': 'application/vnd.api+json',
393
- 'User-Agent': 'rev-cli/0.2.0',
394
- },
395
- });
614
+ const response = await dataciteLimiter.fetchWithRetry(
615
+ `https://api.datacite.org/dois?${params}`,
616
+ {
617
+ headers: {
618
+ 'Accept': 'application/vnd.api+json',
619
+ 'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev)',
620
+ },
621
+ }
622
+ );
396
623
 
397
624
  if (!response.ok) return [];
398
625
 
@@ -502,11 +729,14 @@ export async function lookupDoi(title, author = '', year = null, journal = '') {
502
729
  structuredParams.set('query.container-title', journal);
503
730
  }
504
731
 
505
- let response = await fetch(`https://api.crossref.org/works?${structuredParams}`, {
506
- headers: {
507
- 'User-Agent': 'rev-cli/0.2.0 (mailto:dev@example.com)',
508
- },
509
- });
732
+ let response = await crossrefLimiter.fetchWithRetry(
733
+ `https://api.crossref.org/works?${structuredParams}`,
734
+ {
735
+ headers: {
736
+ 'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev; mailto:docrev@example.com)',
737
+ },
738
+ }
739
+ );
510
740
 
511
741
  if (response.ok) {
512
742
  const data = await response.json();
@@ -521,11 +751,14 @@ export async function lookupDoi(title, author = '', year = null, journal = '') {
521
751
  });
522
752
  titleParams.set('query.title', title);
523
753
 
524
- const response2 = await fetch(`https://api.crossref.org/works?${titleParams}`, {
525
- headers: {
526
- 'User-Agent': 'rev-cli/0.2.0 (mailto:dev@example.com)',
527
- },
528
- });
754
+ const response2 = await crossrefLimiter.fetchWithRetry(
755
+ `https://api.crossref.org/works?${titleParams}`,
756
+ {
757
+ headers: {
758
+ 'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev; mailto:docrev@example.com)',
759
+ },
760
+ }
761
+ );
529
762
 
530
763
  if (response2.ok) {
531
764
  const data = await response2.json();
@@ -548,11 +781,14 @@ export async function lookupDoi(title, author = '', year = null, journal = '') {
548
781
  select: 'DOI,title,author,published-print,published-online,container-title,score,type',
549
782
  });
550
783
 
551
- response = await fetch(`https://api.crossref.org/works?${basicParams}`, {
552
- headers: {
553
- 'User-Agent': 'rev-cli/0.2.0 (mailto:dev@example.com)',
554
- },
555
- });
784
+ response = await crossrefLimiter.fetchWithRetry(
785
+ `https://api.crossref.org/works?${basicParams}`,
786
+ {
787
+ headers: {
788
+ 'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev; mailto:docrev@example.com)',
789
+ },
790
+ }
791
+ );
556
792
 
557
793
  if (response.ok) {
558
794
  const data = await response.json();