docrev 0.8.1 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. package/.claude/settings.local.json +9 -0
  2. package/PLAN-tables-and-postprocess.md +850 -0
  3. package/README.md +33 -0
  4. package/bin/rev.js +12 -131
  5. package/bin/rev.ts +145 -0
  6. package/dist/bin/rev.d.ts +9 -0
  7. package/dist/bin/rev.d.ts.map +1 -0
  8. package/dist/bin/rev.js +118 -0
  9. package/dist/bin/rev.js.map +1 -0
  10. package/dist/lib/annotations.d.ts +91 -0
  11. package/dist/lib/annotations.d.ts.map +1 -0
  12. package/dist/lib/annotations.js +554 -0
  13. package/dist/lib/annotations.js.map +1 -0
  14. package/dist/lib/build.d.ts +171 -0
  15. package/dist/lib/build.d.ts.map +1 -0
  16. package/dist/lib/build.js +755 -0
  17. package/dist/lib/build.js.map +1 -0
  18. package/dist/lib/citations.d.ts +34 -0
  19. package/dist/lib/citations.d.ts.map +1 -0
  20. package/dist/lib/citations.js +140 -0
  21. package/dist/lib/citations.js.map +1 -0
  22. package/dist/lib/commands/build.d.ts +13 -0
  23. package/dist/lib/commands/build.d.ts.map +1 -0
  24. package/dist/lib/commands/build.js +678 -0
  25. package/dist/lib/commands/build.js.map +1 -0
  26. package/dist/lib/commands/citations.d.ts +11 -0
  27. package/dist/lib/commands/citations.d.ts.map +1 -0
  28. package/dist/lib/commands/citations.js +428 -0
  29. package/dist/lib/commands/citations.js.map +1 -0
  30. package/dist/lib/commands/comments.d.ts +11 -0
  31. package/dist/lib/commands/comments.d.ts.map +1 -0
  32. package/dist/lib/commands/comments.js +883 -0
  33. package/dist/lib/commands/comments.js.map +1 -0
  34. package/dist/lib/commands/context.d.ts +35 -0
  35. package/dist/lib/commands/context.d.ts.map +1 -0
  36. package/dist/lib/commands/context.js +59 -0
  37. package/dist/lib/commands/context.js.map +1 -0
  38. package/dist/lib/commands/core.d.ts +11 -0
  39. package/dist/lib/commands/core.d.ts.map +1 -0
  40. package/dist/lib/commands/core.js +246 -0
  41. package/dist/lib/commands/core.js.map +1 -0
  42. package/dist/lib/commands/doi.d.ts +11 -0
  43. package/dist/lib/commands/doi.d.ts.map +1 -0
  44. package/dist/lib/commands/doi.js +373 -0
  45. package/dist/lib/commands/doi.js.map +1 -0
  46. package/dist/lib/commands/history.d.ts +11 -0
  47. package/dist/lib/commands/history.d.ts.map +1 -0
  48. package/dist/lib/commands/history.js +245 -0
  49. package/dist/lib/commands/history.js.map +1 -0
  50. package/dist/lib/commands/index.d.ts +28 -0
  51. package/dist/lib/commands/index.d.ts.map +1 -0
  52. package/dist/lib/commands/index.js +35 -0
  53. package/dist/lib/commands/index.js.map +1 -0
  54. package/dist/lib/commands/init.d.ts +11 -0
  55. package/dist/lib/commands/init.d.ts.map +1 -0
  56. package/dist/lib/commands/init.js +209 -0
  57. package/dist/lib/commands/init.js.map +1 -0
  58. package/dist/lib/commands/response.d.ts +11 -0
  59. package/dist/lib/commands/response.d.ts.map +1 -0
  60. package/dist/lib/commands/response.js +317 -0
  61. package/dist/lib/commands/response.js.map +1 -0
  62. package/dist/lib/commands/sections.d.ts +11 -0
  63. package/dist/lib/commands/sections.d.ts.map +1 -0
  64. package/dist/lib/commands/sections.js +1071 -0
  65. package/dist/lib/commands/sections.js.map +1 -0
  66. package/dist/lib/commands/utilities.d.ts +19 -0
  67. package/dist/lib/commands/utilities.d.ts.map +1 -0
  68. package/dist/lib/commands/utilities.js +2009 -0
  69. package/dist/lib/commands/utilities.js.map +1 -0
  70. package/dist/lib/comment-realign.d.ts +50 -0
  71. package/dist/lib/comment-realign.d.ts.map +1 -0
  72. package/dist/lib/comment-realign.js +372 -0
  73. package/dist/lib/comment-realign.js.map +1 -0
  74. package/dist/lib/config.d.ts +41 -0
  75. package/dist/lib/config.d.ts.map +1 -0
  76. package/dist/lib/config.js +76 -0
  77. package/dist/lib/config.js.map +1 -0
  78. package/dist/lib/crossref.d.ts +108 -0
  79. package/dist/lib/crossref.d.ts.map +1 -0
  80. package/dist/lib/crossref.js +597 -0
  81. package/dist/lib/crossref.js.map +1 -0
  82. package/dist/lib/dependencies.d.ts +30 -0
  83. package/dist/lib/dependencies.d.ts.map +1 -0
  84. package/dist/lib/dependencies.js +95 -0
  85. package/dist/lib/dependencies.js.map +1 -0
  86. package/dist/lib/doi-cache.d.ts +29 -0
  87. package/dist/lib/doi-cache.d.ts.map +1 -0
  88. package/dist/lib/doi-cache.js +104 -0
  89. package/dist/lib/doi-cache.js.map +1 -0
  90. package/dist/lib/doi.d.ts +65 -0
  91. package/dist/lib/doi.d.ts.map +1 -0
  92. package/dist/lib/doi.js +710 -0
  93. package/dist/lib/doi.js.map +1 -0
  94. package/dist/lib/equations.d.ts +61 -0
  95. package/dist/lib/equations.d.ts.map +1 -0
  96. package/dist/lib/equations.js +445 -0
  97. package/dist/lib/equations.js.map +1 -0
  98. package/dist/lib/errors.d.ts +60 -0
  99. package/dist/lib/errors.d.ts.map +1 -0
  100. package/dist/lib/errors.js +303 -0
  101. package/dist/lib/errors.js.map +1 -0
  102. package/dist/lib/format.d.ts +104 -0
  103. package/dist/lib/format.d.ts.map +1 -0
  104. package/dist/lib/format.js +416 -0
  105. package/dist/lib/format.js.map +1 -0
  106. package/dist/lib/git.d.ts +88 -0
  107. package/dist/lib/git.d.ts.map +1 -0
  108. package/dist/lib/git.js +304 -0
  109. package/dist/lib/git.js.map +1 -0
  110. package/dist/lib/grammar.d.ts +62 -0
  111. package/dist/lib/grammar.d.ts.map +1 -0
  112. package/dist/lib/grammar.js +244 -0
  113. package/dist/lib/grammar.js.map +1 -0
  114. package/dist/lib/image-registry.d.ts +68 -0
  115. package/dist/lib/image-registry.d.ts.map +1 -0
  116. package/dist/lib/image-registry.js +112 -0
  117. package/dist/lib/image-registry.js.map +1 -0
  118. package/dist/lib/import.d.ts +184 -0
  119. package/dist/lib/import.d.ts.map +1 -0
  120. package/dist/lib/import.js +1581 -0
  121. package/dist/lib/import.js.map +1 -0
  122. package/dist/lib/journals.d.ts +55 -0
  123. package/dist/lib/journals.d.ts.map +1 -0
  124. package/dist/lib/journals.js +417 -0
  125. package/dist/lib/journals.js.map +1 -0
  126. package/dist/lib/merge.d.ts +138 -0
  127. package/dist/lib/merge.d.ts.map +1 -0
  128. package/dist/lib/merge.js +603 -0
  129. package/dist/lib/merge.js.map +1 -0
  130. package/dist/lib/orcid.d.ts +36 -0
  131. package/dist/lib/orcid.d.ts.map +1 -0
  132. package/dist/lib/orcid.js +117 -0
  133. package/dist/lib/orcid.js.map +1 -0
  134. package/dist/lib/pdf-comments.d.ts +95 -0
  135. package/dist/lib/pdf-comments.d.ts.map +1 -0
  136. package/dist/lib/pdf-comments.js +192 -0
  137. package/dist/lib/pdf-comments.js.map +1 -0
  138. package/dist/lib/pdf-import.d.ts +118 -0
  139. package/dist/lib/pdf-import.d.ts.map +1 -0
  140. package/dist/lib/pdf-import.js +397 -0
  141. package/dist/lib/pdf-import.js.map +1 -0
  142. package/dist/lib/plugins.d.ts +76 -0
  143. package/dist/lib/plugins.d.ts.map +1 -0
  144. package/dist/lib/plugins.js +235 -0
  145. package/dist/lib/plugins.js.map +1 -0
  146. package/dist/lib/postprocess.d.ts +42 -0
  147. package/dist/lib/postprocess.d.ts.map +1 -0
  148. package/dist/lib/postprocess.js +138 -0
  149. package/dist/lib/postprocess.js.map +1 -0
  150. package/dist/lib/pptx-template.d.ts +59 -0
  151. package/dist/lib/pptx-template.d.ts.map +1 -0
  152. package/dist/lib/pptx-template.js +613 -0
  153. package/dist/lib/pptx-template.js.map +1 -0
  154. package/dist/lib/pptx-themes.d.ts +80 -0
  155. package/dist/lib/pptx-themes.d.ts.map +1 -0
  156. package/dist/lib/pptx-themes.js +818 -0
  157. package/dist/lib/pptx-themes.js.map +1 -0
  158. package/dist/lib/protect-restore.d.ts +137 -0
  159. package/dist/lib/protect-restore.d.ts.map +1 -0
  160. package/dist/lib/protect-restore.js +394 -0
  161. package/dist/lib/protect-restore.js.map +1 -0
  162. package/dist/lib/rate-limiter.d.ts +27 -0
  163. package/dist/lib/rate-limiter.d.ts.map +1 -0
  164. package/dist/lib/rate-limiter.js +79 -0
  165. package/dist/lib/rate-limiter.js.map +1 -0
  166. package/dist/lib/response.d.ts +41 -0
  167. package/dist/lib/response.d.ts.map +1 -0
  168. package/dist/lib/response.js +150 -0
  169. package/dist/lib/response.js.map +1 -0
  170. package/dist/lib/review.d.ts +35 -0
  171. package/dist/lib/review.d.ts.map +1 -0
  172. package/dist/lib/review.js +263 -0
  173. package/dist/lib/review.js.map +1 -0
  174. package/dist/lib/schema.d.ts +66 -0
  175. package/dist/lib/schema.d.ts.map +1 -0
  176. package/dist/lib/schema.js +339 -0
  177. package/dist/lib/schema.js.map +1 -0
  178. package/dist/lib/scientific-words.d.ts +6 -0
  179. package/dist/lib/scientific-words.d.ts.map +1 -0
  180. package/dist/lib/scientific-words.js +66 -0
  181. package/dist/lib/scientific-words.js.map +1 -0
  182. package/dist/lib/sections.d.ts +40 -0
  183. package/dist/lib/sections.d.ts.map +1 -0
  184. package/dist/lib/sections.js +288 -0
  185. package/dist/lib/sections.js.map +1 -0
  186. package/dist/lib/slides.d.ts +86 -0
  187. package/dist/lib/slides.d.ts.map +1 -0
  188. package/dist/lib/slides.js +676 -0
  189. package/dist/lib/slides.js.map +1 -0
  190. package/dist/lib/spelling.d.ts +76 -0
  191. package/dist/lib/spelling.d.ts.map +1 -0
  192. package/dist/lib/spelling.js +272 -0
  193. package/dist/lib/spelling.js.map +1 -0
  194. package/dist/lib/templates.d.ts +30 -0
  195. package/dist/lib/templates.d.ts.map +1 -0
  196. package/dist/lib/templates.js +504 -0
  197. package/dist/lib/templates.js.map +1 -0
  198. package/dist/lib/themes.d.ts +85 -0
  199. package/dist/lib/themes.d.ts.map +1 -0
  200. package/dist/lib/themes.js +652 -0
  201. package/dist/lib/themes.js.map +1 -0
  202. package/dist/lib/trackchanges.d.ts +51 -0
  203. package/dist/lib/trackchanges.d.ts.map +1 -0
  204. package/dist/lib/trackchanges.js +202 -0
  205. package/dist/lib/trackchanges.js.map +1 -0
  206. package/dist/lib/tui.d.ts +76 -0
  207. package/dist/lib/tui.d.ts.map +1 -0
  208. package/dist/lib/tui.js +377 -0
  209. package/dist/lib/tui.js.map +1 -0
  210. package/dist/lib/types.d.ts +447 -0
  211. package/dist/lib/types.d.ts.map +1 -0
  212. package/dist/lib/types.js +6 -0
  213. package/dist/lib/types.js.map +1 -0
  214. package/dist/lib/undo.d.ts +57 -0
  215. package/dist/lib/undo.d.ts.map +1 -0
  216. package/dist/lib/undo.js +185 -0
  217. package/dist/lib/undo.js.map +1 -0
  218. package/dist/lib/utils.d.ts +16 -0
  219. package/dist/lib/utils.d.ts.map +1 -0
  220. package/dist/lib/utils.js +40 -0
  221. package/dist/lib/utils.js.map +1 -0
  222. package/dist/lib/variables.d.ts +42 -0
  223. package/dist/lib/variables.d.ts.map +1 -0
  224. package/dist/lib/variables.js +141 -0
  225. package/dist/lib/variables.js.map +1 -0
  226. package/dist/lib/word.d.ts +80 -0
  227. package/dist/lib/word.d.ts.map +1 -0
  228. package/dist/lib/word.js +360 -0
  229. package/dist/lib/word.js.map +1 -0
  230. package/dist/lib/wordcomments.d.ts +51 -0
  231. package/dist/lib/wordcomments.d.ts.map +1 -0
  232. package/dist/lib/wordcomments.js +587 -0
  233. package/dist/lib/wordcomments.js.map +1 -0
  234. package/eslint.config.js +27 -0
  235. package/lib/annotations.ts +622 -0
  236. package/lib/apply-buildup-colors.py +88 -0
  237. package/lib/build.ts +1013 -0
  238. package/lib/{citations.js → citations.ts} +38 -27
  239. package/lib/commands/{build.js → build.ts} +80 -27
  240. package/lib/commands/{citations.js → citations.ts} +36 -18
  241. package/lib/commands/{comments.js → comments.ts} +187 -54
  242. package/lib/commands/{context.js → context.ts} +18 -8
  243. package/lib/commands/{core.js → core.ts} +34 -20
  244. package/lib/commands/{doi.js → doi.ts} +32 -16
  245. package/lib/commands/{history.js → history.ts} +25 -12
  246. package/lib/commands/{index.js → index.ts} +9 -5
  247. package/lib/commands/{init.js → init.ts} +20 -8
  248. package/lib/commands/{response.js → response.ts} +47 -20
  249. package/lib/commands/{sections.js → sections.ts} +273 -68
  250. package/lib/commands/{utilities.js → utilities.ts} +338 -158
  251. package/lib/{comment-realign.js → comment-realign.ts} +117 -45
  252. package/lib/config.ts +84 -0
  253. package/lib/{crossref.js → crossref.ts} +213 -138
  254. package/lib/dependencies.ts +106 -0
  255. package/lib/doi-cache.ts +115 -0
  256. package/lib/{doi.js → doi.ts} +115 -281
  257. package/lib/{equations.js → equations.ts} +60 -64
  258. package/lib/{errors.js → errors.ts} +56 -48
  259. package/lib/{format.js → format.ts} +137 -63
  260. package/lib/{git.js → git.ts} +66 -63
  261. package/lib/{grammar.js → grammar.ts} +45 -32
  262. package/lib/image-registry.ts +180 -0
  263. package/lib/import.ts +2060 -0
  264. package/lib/journals.ts +505 -0
  265. package/lib/{merge.js → merge.ts} +185 -135
  266. package/lib/{orcid.js → orcid.ts} +17 -22
  267. package/lib/{pdf-comments.js → pdf-comments.ts} +76 -18
  268. package/lib/{pdf-import.js → pdf-import.ts} +148 -70
  269. package/lib/{plugins.js → plugins.ts} +82 -39
  270. package/lib/postprocess.ts +188 -0
  271. package/lib/pptx-color-filter.lua +37 -0
  272. package/lib/pptx-template.ts +625 -0
  273. package/lib/pptx-themes/academic.pptx +0 -0
  274. package/lib/pptx-themes/corporate.pptx +0 -0
  275. package/lib/pptx-themes/dark.pptx +0 -0
  276. package/lib/pptx-themes/default.pptx +0 -0
  277. package/lib/pptx-themes/minimal.pptx +0 -0
  278. package/lib/pptx-themes/plant.pptx +0 -0
  279. package/lib/pptx-themes.ts +896 -0
  280. package/lib/protect-restore.ts +516 -0
  281. package/lib/rate-limiter.ts +94 -0
  282. package/lib/{response.js → response.ts} +36 -21
  283. package/lib/{review.js → review.ts} +53 -43
  284. package/lib/{schema.js → schema.ts} +70 -25
  285. package/lib/{sections.js → sections.ts} +71 -76
  286. package/lib/slides.ts +793 -0
  287. package/lib/{spelling.js → spelling.ts} +43 -59
  288. package/lib/{templates.js → templates.ts} +20 -17
  289. package/lib/themes.ts +742 -0
  290. package/lib/{trackchanges.js → trackchanges.ts} +52 -23
  291. package/lib/types.ts +509 -0
  292. package/lib/{undo.js → undo.ts} +75 -52
  293. package/lib/utils.ts +41 -0
  294. package/lib/{variables.js → variables.ts} +60 -54
  295. package/lib/word.ts +428 -0
  296. package/lib/{wordcomments.js → wordcomments.ts} +94 -40
  297. package/package.json +15 -5
  298. package/skill/REFERENCE.md +67 -0
  299. package/tsconfig.json +26 -0
  300. package/lib/annotations.js +0 -414
  301. package/lib/build.js +0 -639
  302. package/lib/config.js +0 -79
  303. package/lib/import.js +0 -1145
  304. package/lib/journals.js +0 -629
  305. package/lib/word.js +0 -225
  306. /package/lib/{scientific-words.js → scientific-words.ts} +0 -0
@@ -0,0 +1,710 @@
1
+ /**
2
+ * DOI validation and fetching utilities
3
+ * Check DOIs in .bib files, fetch BibTeX from DOIs
4
+ */
5
+ import * as fs from 'fs';
6
+ import { crossrefLimiter, dataciteLimiter, doiOrgLimiter } from './rate-limiter.js';
7
+ import { getCachedDoi, cacheDoi } from './doi-cache.js';
8
+ // Entry types that typically don't have DOIs
9
+ const NO_DOI_TYPES = new Set([
10
+ 'book', // Books often don't have DOIs (chapters might)
11
+ 'inbook', // Book chapters - variable
12
+ 'thesis', // Theses rarely have DOIs
13
+ 'mastersthesis',
14
+ 'phdthesis',
15
+ 'misc', // Catch-all, often no DOI
16
+ 'unpublished', // By definition
17
+ 'manual', // Software manuals
18
+ 'techreport', // Some do, many don't
19
+ 'booklet',
20
+ ]);
21
+ // Entry types that should have DOIs
22
+ const EXPECT_DOI_TYPES = new Set([
23
+ 'article', // Journal articles should have DOIs
24
+ 'inproceedings', // Conference papers usually do
25
+ 'proceedings',
26
+ 'incollection', // Book chapters in collections
27
+ ]);
28
+ /**
29
+ * Parse .bib file and extract entries with DOI info
30
+ */
31
+ export function parseBibEntries(bibPath) {
32
+ if (!fs.existsSync(bibPath)) {
33
+ return [];
34
+ }
35
+ const content = fs.readFileSync(bibPath, 'utf-8');
36
+ const entries = [];
37
+ const lines = content.split('\n');
38
+ // Pattern for bib entries: @type{key,
39
+ const entryPattern = /@(\w+)\s*\{\s*([^,\s]+)\s*,/g;
40
+ let match;
41
+ while ((match = entryPattern.exec(content)) !== null) {
42
+ const type = match[1].toLowerCase();
43
+ const key = match[2];
44
+ const startPos = match.index;
45
+ // Find the line number
46
+ let line = 1;
47
+ for (let i = 0; i < startPos; i++) {
48
+ if (content[i] === '\n')
49
+ line++;
50
+ }
51
+ // Find the end of this entry (matching closing brace)
52
+ let braceCount = 0;
53
+ let entryEnd = startPos;
54
+ let inEntry = false;
55
+ for (let i = startPos; i < content.length; i++) {
56
+ if (content[i] === '{') {
57
+ braceCount++;
58
+ inEntry = true;
59
+ }
60
+ else if (content[i] === '}') {
61
+ braceCount--;
62
+ if (inEntry && braceCount === 0) {
63
+ entryEnd = i + 1;
64
+ break;
65
+ }
66
+ }
67
+ }
68
+ const entryContent = content.slice(startPos, entryEnd);
69
+ // Extract DOI field
70
+ const doiMatch = entryContent.match(/\bdoi\s*=\s*[{"]([^}"]+)[}"]/i);
71
+ let doi = doiMatch ? doiMatch[1].trim() : null;
72
+ // Clean DOI - remove URL prefix if present
73
+ if (doi) {
74
+ doi = doi.replace(/^https?:\/\/(dx\.)?doi\.org\//i, '');
75
+ }
76
+ // Extract title for display
77
+ const titleMatch = entryContent.match(/\btitle\s*=\s*[{"]([^}"]+)[}"]/i);
78
+ const title = titleMatch ? titleMatch[1].trim().slice(0, 60) : '';
79
+ // Extract author for lookup
80
+ const authorMatch = entryContent.match(/\bauthor\s*=\s*[{"]([^}"]+)[}"]/i);
81
+ const authorRaw = authorMatch ? authorMatch[1].trim() : '';
82
+ // Extract year
83
+ const yearMatch = entryContent.match(/\byear\s*=\s*[{"]?(\d{4})[}""]?/i);
84
+ const year = yearMatch ? parseInt(yearMatch[1]) : null;
85
+ // Extract journal
86
+ const journalMatch = entryContent.match(/\bjournal\s*=\s*[{"]([^}"]+)[}"]/i);
87
+ const journal = journalMatch ? journalMatch[1].trim() : '';
88
+ // Check for skip marker: nodoi = {true} or nodoi = true
89
+ const skipMatch = entryContent.match(/\bnodoi\s*=\s*[{"]?(true|yes|1)[}""]?/i);
90
+ const skip = !!skipMatch;
91
+ // Check for comment marker immediately before entry: % no-doi
92
+ // Only look at the text between the last entry end (or start) and this entry
93
+ const linesBefore = content.slice(Math.max(0, startPos - 200), startPos);
94
+ // Find the last closing brace or start of file to avoid matching comments for previous entries
95
+ const lastEntryEnd = linesBefore.lastIndexOf('}');
96
+ const relevantBefore = lastEntryEnd >= 0 ? linesBefore.slice(lastEntryEnd + 1) : linesBefore;
97
+ const commentSkip = /% *no-?doi/i.test(relevantBefore);
98
+ entries.push({
99
+ key,
100
+ type,
101
+ doi: doi || null,
102
+ title,
103
+ authorRaw,
104
+ year,
105
+ journal,
106
+ skip: skip || commentSkip,
107
+ expectDoi: EXPECT_DOI_TYPES.has(type),
108
+ noDoi: NO_DOI_TYPES.has(type),
109
+ line,
110
+ });
111
+ }
112
+ return entries;
113
+ }
114
+ /**
115
+ * Validate DOI format
116
+ */
117
+ export function isValidDoiFormat(doi) {
118
+ if (!doi)
119
+ return false;
120
+ // DOI format: 10.prefix/suffix
121
+ // Prefix is 4+ digits, suffix can contain most characters
122
+ return /^10\.\d{4,}\/[^\s]+$/.test(doi);
123
+ }
124
+ /**
125
+ * Check if DOI resolves via DataCite (for Zenodo, Figshare, etc.)
126
+ */
127
+ async function checkDoiDataCite(doi) {
128
+ try {
129
+ const response = await dataciteLimiter.fetchWithRetry(`https://api.datacite.org/dois/${encodeURIComponent(doi)}`, {
130
+ headers: {
131
+ 'Accept': 'application/vnd.api+json',
132
+ 'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev)',
133
+ },
134
+ });
135
+ if (response.status === 404) {
136
+ return { valid: false, error: 'DOI not found in DataCite' };
137
+ }
138
+ if (!response.ok) {
139
+ return { valid: false, error: `HTTP ${response.status}` };
140
+ }
141
+ const data = await response.json();
142
+ const attrs = data.data?.attributes;
143
+ if (!attrs) {
144
+ return { valid: false, error: 'Invalid DataCite response' };
145
+ }
146
+ return {
147
+ valid: true,
148
+ source: 'datacite',
149
+ metadata: {
150
+ title: attrs.titles?.[0]?.title || '',
151
+ authors: attrs.creators?.map((c) => `${c.givenName || ''} ${c.familyName || ''}`.trim()) || [],
152
+ year: attrs.publicationYear,
153
+ journal: attrs.publisher || '',
154
+ type: attrs.types?.resourceTypeGeneral || '',
155
+ },
156
+ };
157
+ }
158
+ catch (err) {
159
+ return { valid: false, error: err.message };
160
+ }
161
+ }
162
+ /**
163
+ * Check if DOI resolves (exists) - tries Crossref first, then DataCite
164
+ * Results are cached for 7 days to reduce API calls.
165
+ */
166
+ export async function checkDoi(doi, options = {}) {
167
+ if (!isValidDoiFormat(doi)) {
168
+ return { valid: false, error: 'Invalid DOI format' };
169
+ }
170
+ // Check cache first (unless skipped)
171
+ if (!options.skipCache) {
172
+ const cached = getCachedDoi(doi);
173
+ if (cached) {
174
+ return { ...cached, cached: true };
175
+ }
176
+ }
177
+ // Zenodo DOIs start with 10.5281 - check DataCite first
178
+ const isZenodo = doi.startsWith('10.5281/');
179
+ const isFigshare = doi.startsWith('10.6084/');
180
+ const isDataCiteLikely = isZenodo || isFigshare;
181
+ if (isDataCiteLikely) {
182
+ const dataciteResult = await checkDoiDataCite(doi);
183
+ if (dataciteResult.valid) {
184
+ cacheDoi(doi, dataciteResult);
185
+ return dataciteResult;
186
+ }
187
+ }
188
+ try {
189
+ // Use Crossref API to check DOI
190
+ const response = await crossrefLimiter.fetchWithRetry(`https://api.crossref.org/works/${encodeURIComponent(doi)}`, {
191
+ headers: {
192
+ 'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev; mailto:docrev@example.com)',
193
+ },
194
+ });
195
+ if (response.status === 404) {
196
+ // Try DataCite as fallback (if not already tried)
197
+ if (!isDataCiteLikely) {
198
+ const dataciteResult = await checkDoiDataCite(doi);
199
+ if (dataciteResult.valid) {
200
+ cacheDoi(doi, dataciteResult);
201
+ return dataciteResult;
202
+ }
203
+ }
204
+ const result = { valid: false, error: 'DOI not found' };
205
+ cacheDoi(doi, result);
206
+ return result;
207
+ }
208
+ if (!response.ok) {
209
+ // Don't cache transient errors
210
+ return { valid: false, error: `HTTP ${response.status}` };
211
+ }
212
+ const data = await response.json();
213
+ const work = data.message;
214
+ const result = {
215
+ valid: true,
216
+ source: 'crossref',
217
+ metadata: {
218
+ title: work.title?.[0] || '',
219
+ authors: work.author?.map((a) => `${a.given || ''} ${a.family || ''}`.trim()) || [],
220
+ year: work.published?.['date-parts']?.[0]?.[0] || work.created?.['date-parts']?.[0]?.[0],
221
+ journal: work['container-title']?.[0] || '',
222
+ type: work.type,
223
+ },
224
+ };
225
+ cacheDoi(doi, result);
226
+ return result;
227
+ }
228
+ catch (err) {
229
+ // Don't cache network errors
230
+ return { valid: false, error: err.message };
231
+ }
232
+ }
233
+ /**
234
+ * Fetch BibTeX from DOI using content negotiation
235
+ */
236
+ export async function fetchBibtex(doi) {
237
+ // Clean DOI
238
+ doi = doi.replace(/^https?:\/\/(dx\.)?doi\.org\//i, '');
239
+ if (!isValidDoiFormat(doi)) {
240
+ return { success: false, error: 'Invalid DOI format' };
241
+ }
242
+ try {
243
+ const response = await doiOrgLimiter.fetchWithRetry(`https://doi.org/${encodeURIComponent(doi)}`, {
244
+ headers: {
245
+ 'Accept': 'application/x-bibtex',
246
+ 'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev)',
247
+ },
248
+ redirect: 'follow',
249
+ });
250
+ if (!response.ok) {
251
+ return { success: false, error: `HTTP ${response.status}` };
252
+ }
253
+ const bibtex = await response.text();
254
+ if (!bibtex.includes('@')) {
255
+ return { success: false, error: 'Invalid BibTeX response' };
256
+ }
257
+ return { success: true, bibtex: bibtex.trim() };
258
+ }
259
+ catch (err) {
260
+ return { success: false, error: err.message };
261
+ }
262
+ }
263
+ /**
264
+ * Check all DOIs in a .bib file
265
+ */
266
+ export async function checkBibDois(bibPath, options = {}) {
267
+ const { checkMissing = false, parallel = 5 } = options;
268
+ const entries = parseBibEntries(bibPath);
269
+ const results = [];
270
+ let valid = 0;
271
+ let invalid = 0;
272
+ let missing = 0;
273
+ let skipped = 0;
274
+ // Process in batches to avoid rate limiting
275
+ for (let i = 0; i < entries.length; i += parallel) {
276
+ const batch = entries.slice(i, i + parallel);
277
+ const batchResults = await Promise.all(batch.map(async (entry) => {
278
+ // Skip if marked
279
+ if (entry.skip) {
280
+ skipped++;
281
+ return { ...entry, status: 'skipped', message: 'Marked as no-doi' };
282
+ }
283
+ // No DOI field
284
+ if (!entry.doi) {
285
+ if (entry.noDoi) {
286
+ // Expected - books, theses, etc.
287
+ skipped++;
288
+ return { ...entry, status: 'skipped', message: `${entry.type} typically has no DOI` };
289
+ }
290
+ else if (entry.expectDoi) {
291
+ // Should have DOI but doesn't
292
+ missing++;
293
+ return { ...entry, status: 'missing', message: 'Expected DOI for article/proceedings' };
294
+ }
295
+ else {
296
+ skipped++;
297
+ return { ...entry, status: 'skipped', message: 'No DOI field' };
298
+ }
299
+ }
300
+ // Validate DOI format first
301
+ if (!isValidDoiFormat(entry.doi)) {
302
+ invalid++;
303
+ return { ...entry, status: 'invalid', message: 'Invalid DOI format' };
304
+ }
305
+ // Check if DOI resolves
306
+ const check = await checkDoi(entry.doi);
307
+ if (check.valid) {
308
+ valid++;
309
+ return { ...entry, status: 'valid', metadata: check.metadata };
310
+ }
311
+ else {
312
+ invalid++;
313
+ return { ...entry, status: 'invalid', message: check.error };
314
+ }
315
+ }));
316
+ results.push(...batchResults);
317
+ // Small delay between batches to be nice to the API
318
+ if (i + parallel < entries.length) {
319
+ await new Promise(r => setTimeout(r, 200));
320
+ }
321
+ }
322
+ return { entries: results, valid, invalid, missing, skipped };
323
+ }
324
+ /**
325
+ * Search DataCite API (for Zenodo, Figshare, etc.)
326
+ */
327
+ async function searchDataCite(title, author = '', year = null) {
328
+ try {
329
+ // DataCite query syntax
330
+ let query = `titles.title:${title.replace(/[{}]/g, '')}`;
331
+ if (author) {
332
+ query += ` AND creators.name:${author}`;
333
+ }
334
+ if (year) {
335
+ query += ` AND publicationYear:${year}`;
336
+ }
337
+ const params = new URLSearchParams({
338
+ query: query,
339
+ 'page[size]': '5',
340
+ });
341
+ const response = await dataciteLimiter.fetchWithRetry(`https://api.datacite.org/dois?${params}`, {
342
+ headers: {
343
+ 'Accept': 'application/vnd.api+json',
344
+ 'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev)',
345
+ },
346
+ });
347
+ if (!response.ok)
348
+ return [];
349
+ const data = await response.json();
350
+ const items = data.data || [];
351
+ return items.map(item => {
352
+ const attrs = item.attributes;
353
+ return {
354
+ DOI: item.id,
355
+ title: [attrs.titles?.[0]?.title || ''],
356
+ author: attrs.creators?.map(c => ({ family: c.familyName, given: c.givenName })) || [],
357
+ 'published-print': { 'date-parts': [[attrs.publicationYear]] },
358
+ 'container-title': [attrs.publisher || ''],
359
+ score: 50, // Base score for DataCite results
360
+ source: 'datacite',
361
+ };
362
+ });
363
+ }
364
+ catch {
365
+ return [];
366
+ }
367
+ }
368
+ /**
369
+ * Normalize text for comparison (lowercase, remove special chars)
370
+ */
371
+ function normalizeForMatching(text) {
372
+ return (text || '')
373
+ .toLowerCase()
374
+ .replace(/[{}\\]/g, '') // Remove LaTeX braces
375
+ .replace(/[^a-z0-9\s]/g, ' ') // Replace special chars with space
376
+ .replace(/\s+/g, ' ')
377
+ .trim();
378
+ }
379
+ /**
380
+ * Check if DOI looks like a supplement, figure, or review (not the main paper)
381
+ */
382
+ function isSupplementOrReview(doi, title = '', journal = '') {
383
+ const doiLower = (doi || '').toLowerCase();
384
+ const titleLower = (title || '').toLowerCase();
385
+ const journalLower = (journal || '').toLowerCase();
386
+ // Supplement/figure DOI patterns
387
+ if (/\.suppl|\/suppl|\.figure|\/figure|\.s\d+$|_s\d+$/i.test(doiLower)) {
388
+ return true;
389
+ }
390
+ // F1000/Faculty Opinions (post-publication reviews)
391
+ if (/10\.3410\/f\./i.test(doiLower) || /faculty opinions/i.test(journalLower)) {
392
+ return true;
393
+ }
394
+ // Title suggests it's supplementary material
395
+ if (/^supplementary|^supporting information|^appendix/i.test(titleLower)) {
396
+ return true;
397
+ }
398
+ return false;
399
+ }
400
+ /**
401
+ * Search for DOI by title and author using Crossref API (+ DataCite fallback)
402
+ */
403
+ export async function lookupDoi(title, author = '', year = null, journal = '') {
404
+ if (!title || title.length < 10) {
405
+ return { found: false, error: 'Title too short for reliable search' };
406
+ }
407
+ // Check for keywords that suggest Zenodo/DataCite sources
408
+ const likelyZenodo = /\b(IPBES|zenodo|assessment report|secretariat)\b/i.test(title);
409
+ try {
410
+ // Build query - title is most important, add author and journal if available
411
+ let query = title;
412
+ if (author) {
413
+ query = `${title} ${author}`;
414
+ }
415
+ // Add journal to query for better matching
416
+ if (journal) {
417
+ query = `${query} ${journal}`;
418
+ }
419
+ let items = [];
420
+ // Try structured bibliographic query first (more accurate)
421
+ const structuredParams = new URLSearchParams({
422
+ rows: '10',
423
+ select: 'DOI,title,author,published-print,published-online,container-title,score,type',
424
+ });
425
+ structuredParams.set('query.bibliographic', title);
426
+ if (author) {
427
+ structuredParams.set('query.author', author);
428
+ }
429
+ if (journal) {
430
+ structuredParams.set('query.container-title', journal);
431
+ }
432
+ let response = await crossrefLimiter.fetchWithRetry(`https://api.crossref.org/works?${structuredParams}`, {
433
+ headers: {
434
+ 'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev; mailto:docrev@example.com)',
435
+ },
436
+ });
437
+ if (response.ok) {
438
+ const data = await response.json();
439
+ items = data.message?.items || [];
440
+ }
441
+ // If structured query found few results, also try query.title (often better for exact matches)
442
+ if (items.length < 5) {
443
+ const titleParams = new URLSearchParams({
444
+ rows: '10',
445
+ select: 'DOI,title,author,published-print,published-online,container-title,score,type',
446
+ });
447
+ titleParams.set('query.title', title);
448
+ const response2 = await crossrefLimiter.fetchWithRetry(`https://api.crossref.org/works?${titleParams}`, {
449
+ headers: {
450
+ 'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev; mailto:docrev@example.com)',
451
+ },
452
+ });
453
+ if (response2.ok) {
454
+ const data = await response2.json();
455
+ const newItems = data.message?.items || [];
456
+ // Merge results, avoiding duplicates
457
+ const existingDois = new Set(items.map(i => i.DOI));
458
+ for (const item of newItems) {
459
+ if (!existingDois.has(item.DOI)) {
460
+ items.push(item);
461
+ }
462
+ }
463
+ }
464
+ }
465
+ // If still nothing, try basic query (most lenient)
466
+ if (items.length === 0) {
467
+ const basicParams = new URLSearchParams({
468
+ query: query,
469
+ rows: '10',
470
+ select: 'DOI,title,author,published-print,published-online,container-title,score,type',
471
+ });
472
+ response = await crossrefLimiter.fetchWithRetry(`https://api.crossref.org/works?${basicParams}`, {
473
+ headers: {
474
+ 'User-Agent': 'docrev/0.6.0 (https://github.com/gcol33/docrev; mailto:docrev@example.com)',
475
+ },
476
+ });
477
+ if (response.ok) {
478
+ const data = await response.json();
479
+ items = data.message?.items || [];
480
+ }
481
+ }
482
+ // Also search DataCite for Zenodo/institutional repos
483
+ if (likelyZenodo || items.length === 0) {
484
+ const dataciteItems = await searchDataCite(title, author, year);
485
+ items = [...items, ...dataciteItems];
486
+ }
487
+ if (items.length === 0) {
488
+ return { found: false, error: 'No results found' };
489
+ }
490
+ const normalizedSearchTitle = normalizeForMatching(title);
491
+ const normalizedJournal = normalizeForMatching(journal);
492
+ // Score the results
493
+ const scored = items.map(item => {
494
+ let score = 0;
495
+ const itemTitle = item.title?.[0] || '';
496
+ const itemJournal = item['container-title']?.[0] || '';
497
+ const normalizedItemTitle = normalizeForMatching(itemTitle);
498
+ const normalizedItemJournal = normalizeForMatching(itemJournal);
499
+ // === PENALTY: Supplement/figure/review DOIs ===
500
+ if (isSupplementOrReview(item.DOI, itemTitle, itemJournal)) {
501
+ score -= 100; // Heavy penalty - almost never want these
502
+ }
503
+ // === Title similarity (most important) ===
504
+ if (normalizedItemTitle === normalizedSearchTitle) {
505
+ score += 100; // Exact match
506
+ }
507
+ else if (normalizedItemTitle.includes(normalizedSearchTitle) ||
508
+ normalizedSearchTitle.includes(normalizedItemTitle)) {
509
+ score += 50;
510
+ }
511
+ else {
512
+ // Check word overlap
513
+ const searchWords = normalizedSearchTitle.split(/\s+/).filter(w => w.length > 3);
514
+ const itemWords = normalizedItemTitle.split(/\s+/).filter(w => w.length > 3);
515
+ const overlap = searchWords.filter(w => itemWords.some(iw => iw.includes(w) || w.includes(iw)));
516
+ score += (overlap.length / Math.max(searchWords.length, 1)) * 40;
517
+ }
518
+ // === Author match ===
519
+ if (author && item.author) {
520
+ const authorLower = author.toLowerCase();
521
+ const hasAuthor = item.author.some(a => (a.family || '').toLowerCase().includes(authorLower) ||
522
+ authorLower.includes((a.family || '').toLowerCase()));
523
+ if (hasAuthor)
524
+ score += 30;
525
+ }
526
+ // === Journal match (NEW) ===
527
+ if (normalizedJournal && normalizedItemJournal) {
528
+ // Check for journal name match (handles abbreviations)
529
+ const journalWords = normalizedJournal.split(/\s+/).filter(w => w.length > 2);
530
+ const itemJournalWords = normalizedItemJournal.split(/\s+/).filter(w => w.length > 2);
531
+ // Count matching words
532
+ const journalOverlap = journalWords.filter(w => itemJournalWords.some(iw => iw.includes(w) || w.includes(iw)));
533
+ if (journalOverlap.length >= Math.min(2, journalWords.length)) {
534
+ score += 40; // Good journal match
535
+ }
536
+ else if (journalOverlap.length >= 1) {
537
+ score += 15; // Partial match
538
+ }
539
+ // Bonus for exact journal match
540
+ if (normalizedItemJournal === normalizedJournal) {
541
+ score += 20;
542
+ }
543
+ }
544
+ // === Year match - CRITICAL for accuracy ===
545
+ const itemYear = item['published-print']?.['date-parts']?.[0]?.[0] ||
546
+ item['published-online']?.['date-parts']?.[0]?.[0];
547
+ if (year && itemYear) {
548
+ if (itemYear === year) {
549
+ score += 50; // Exact match - required for high confidence
550
+ }
551
+ else if (Math.abs(itemYear - year) === 1) {
552
+ score += 20; // Off by one (common for online-first)
553
+ }
554
+ else {
555
+ score -= 50; // Wrong year = likely wrong paper
556
+ }
557
+ }
558
+ else if (year && !itemYear) {
559
+ score -= 10; // Can't verify year
560
+ }
561
+ // Crossref's own relevance score (capped)
562
+ score += Math.min(item.score || 0, 10);
563
+ return {
564
+ doi: item.DOI,
565
+ title: itemTitle,
566
+ authors: item.author?.map(a => `${a.given || ''} ${a.family || ''}`.trim()) || [],
567
+ year: itemYear,
568
+ journal: itemJournal,
569
+ score,
570
+ crossrefScore: item.score,
571
+ isSupplement: isSupplementOrReview(item.DOI, itemTitle, itemJournal),
572
+ };
573
+ });
574
+ // Sort by our score
575
+ scored.sort((a, b) => b.score - a.score);
576
+ // Filter out supplements for the "best" pick (but keep in alternatives)
577
+ const mainPapers = scored.filter(s => !s.isSupplement);
578
+ const best = mainPapers.length > 0 ? mainPapers[0] : scored[0];
579
+ if (!best) {
580
+ return { found: false, error: 'No valid results found' };
581
+ }
582
+ // Confidence thresholds
583
+ let confidence = 'low';
584
+ if (best.score >= 120)
585
+ confidence = 'high';
586
+ else if (best.score >= 70)
587
+ confidence = 'medium';
588
+ // === NEW: Try DataCite if Crossref confidence is low ===
589
+ if (confidence === 'low' && !likelyZenodo) {
590
+ const dataciteItems = await searchDataCite(title, author, year);
591
+ if (dataciteItems.length > 0) {
592
+ // Score DataCite results with same logic
593
+ for (const dcItem of dataciteItems) {
594
+ const dcTitle = dcItem.title?.[0] || '';
595
+ const normalizedDcTitle = normalizeForMatching(dcTitle);
596
+ let dcScore = 0;
597
+ // Title match
598
+ if (normalizedDcTitle === normalizedSearchTitle) {
599
+ dcScore += 100;
600
+ }
601
+ else if (normalizedDcTitle.includes(normalizedSearchTitle) ||
602
+ normalizedSearchTitle.includes(normalizedDcTitle)) {
603
+ dcScore += 50;
604
+ }
605
+ // Year match
606
+ const dcYear = dcItem['published-print']?.['date-parts']?.[0]?.[0];
607
+ if (year && dcYear && dcYear === year) {
608
+ dcScore += 50;
609
+ }
610
+ if (dcScore > best.score) {
611
+ return {
612
+ found: true,
613
+ doi: dcItem.DOI,
614
+ confidence: dcScore >= 120 ? 'high' : dcScore >= 70 ? 'medium' : 'low',
615
+ score: dcScore,
616
+ metadata: {
617
+ title: dcTitle,
618
+ authors: dcItem.author?.map((a) => `${a.given || ''} ${a.family || ''}`.trim()) || [],
619
+ year: dcYear,
620
+ journal: dcItem['container-title']?.[0] || '',
621
+ },
622
+ alternatives: scored.slice(0, 2),
623
+ };
624
+ }
625
+ }
626
+ }
627
+ }
628
+ return {
629
+ found: true,
630
+ doi: best.doi,
631
+ confidence,
632
+ score: best.score,
633
+ metadata: {
634
+ title: best.title,
635
+ authors: best.authors,
636
+ year: best.year || 0,
637
+ journal: best.journal,
638
+ },
639
+ alternatives: scored.filter(s => s.doi !== best.doi).slice(0, 3),
640
+ };
641
+ }
642
+ catch (err) {
643
+ return { found: false, error: err.message };
644
+ }
645
+ }
646
+ /**
647
+ * Look up DOIs for all entries missing them in a .bib file
648
+ */
649
+ export async function lookupMissingDois(bibPath, options = {}) {
650
+ const { parallel = 3, onProgress } = options;
651
+ const entries = parseBibEntries(bibPath);
652
+ const missing = entries.filter(e => !e.doi &&
653
+ !e.skip &&
654
+ !NO_DOI_TYPES.has(e.type));
655
+ const results = [];
656
+ for (let i = 0; i < missing.length; i += parallel) {
657
+ const batch = missing.slice(i, i + parallel);
658
+ const batchResults = await Promise.all(batch.map(async (entry) => {
659
+ // Extract first author's last name from the entry
660
+ // This is tricky because BibTeX author format varies
661
+ let author = '';
662
+ if (entry.authorRaw) {
663
+ // Try to get first author's last name
664
+ const firstAuthor = entry.authorRaw.split(' and ')[0];
665
+ if (firstAuthor) {
666
+ const parts = firstAuthor.split(',');
667
+ author = parts[0]?.trim() || '';
668
+ }
669
+ }
670
+ const result = await lookupDoi(entry.title, author, entry.year, entry.journal);
671
+ return {
672
+ key: entry.key,
673
+ title: entry.title,
674
+ type: entry.type,
675
+ journal: entry.journal,
676
+ result,
677
+ };
678
+ }));
679
+ results.push(...batchResults);
680
+ if (onProgress) {
681
+ onProgress(Math.min(i + parallel, missing.length), missing.length);
682
+ }
683
+ // Rate limiting
684
+ if (i + parallel < missing.length) {
685
+ await new Promise(r => setTimeout(r, 300));
686
+ }
687
+ }
688
+ return results;
689
+ }
690
+ /**
691
+ * Add a BibTeX entry to a .bib file
692
+ */
693
+ export function addToBib(bibPath, bibtex) {
694
+ // Extract key from BibTeX
695
+ const keyMatch = bibtex.match(/@\w+\s*\{\s*([^,\s]+)/);
696
+ if (!keyMatch) {
697
+ return { success: false, error: 'Could not extract citation key from BibTeX' };
698
+ }
699
+ const key = keyMatch[1];
700
+ // Check if key already exists
701
+ const existing = fs.existsSync(bibPath) ? fs.readFileSync(bibPath, 'utf-8') : '';
702
+ if (existing.includes(`{${key},`) || existing.includes(`{${key}\n`)) {
703
+ return { success: false, error: `Key "${key}" already exists in ${bibPath}` };
704
+ }
705
+ // Append to file
706
+ const newContent = existing.trim() + '\n\n' + bibtex + '\n';
707
+ fs.writeFileSync(bibPath, newContent, 'utf-8');
708
+ return { success: true, key };
709
+ }
710
+ //# sourceMappingURL=doi.js.map