docrev 0.8.1 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. package/.claude/settings.local.json +9 -0
  2. package/PLAN-tables-and-postprocess.md +850 -0
  3. package/README.md +33 -0
  4. package/bin/rev.js +12 -131
  5. package/bin/rev.ts +145 -0
  6. package/dist/bin/rev.d.ts +9 -0
  7. package/dist/bin/rev.d.ts.map +1 -0
  8. package/dist/bin/rev.js +118 -0
  9. package/dist/bin/rev.js.map +1 -0
  10. package/dist/lib/annotations.d.ts +91 -0
  11. package/dist/lib/annotations.d.ts.map +1 -0
  12. package/dist/lib/annotations.js +554 -0
  13. package/dist/lib/annotations.js.map +1 -0
  14. package/dist/lib/build.d.ts +171 -0
  15. package/dist/lib/build.d.ts.map +1 -0
  16. package/dist/lib/build.js +755 -0
  17. package/dist/lib/build.js.map +1 -0
  18. package/dist/lib/citations.d.ts +34 -0
  19. package/dist/lib/citations.d.ts.map +1 -0
  20. package/dist/lib/citations.js +140 -0
  21. package/dist/lib/citations.js.map +1 -0
  22. package/dist/lib/commands/build.d.ts +13 -0
  23. package/dist/lib/commands/build.d.ts.map +1 -0
  24. package/dist/lib/commands/build.js +678 -0
  25. package/dist/lib/commands/build.js.map +1 -0
  26. package/dist/lib/commands/citations.d.ts +11 -0
  27. package/dist/lib/commands/citations.d.ts.map +1 -0
  28. package/dist/lib/commands/citations.js +428 -0
  29. package/dist/lib/commands/citations.js.map +1 -0
  30. package/dist/lib/commands/comments.d.ts +11 -0
  31. package/dist/lib/commands/comments.d.ts.map +1 -0
  32. package/dist/lib/commands/comments.js +883 -0
  33. package/dist/lib/commands/comments.js.map +1 -0
  34. package/dist/lib/commands/context.d.ts +35 -0
  35. package/dist/lib/commands/context.d.ts.map +1 -0
  36. package/dist/lib/commands/context.js +59 -0
  37. package/dist/lib/commands/context.js.map +1 -0
  38. package/dist/lib/commands/core.d.ts +11 -0
  39. package/dist/lib/commands/core.d.ts.map +1 -0
  40. package/dist/lib/commands/core.js +246 -0
  41. package/dist/lib/commands/core.js.map +1 -0
  42. package/dist/lib/commands/doi.d.ts +11 -0
  43. package/dist/lib/commands/doi.d.ts.map +1 -0
  44. package/dist/lib/commands/doi.js +373 -0
  45. package/dist/lib/commands/doi.js.map +1 -0
  46. package/dist/lib/commands/history.d.ts +11 -0
  47. package/dist/lib/commands/history.d.ts.map +1 -0
  48. package/dist/lib/commands/history.js +245 -0
  49. package/dist/lib/commands/history.js.map +1 -0
  50. package/dist/lib/commands/index.d.ts +28 -0
  51. package/dist/lib/commands/index.d.ts.map +1 -0
  52. package/dist/lib/commands/index.js +35 -0
  53. package/dist/lib/commands/index.js.map +1 -0
  54. package/dist/lib/commands/init.d.ts +11 -0
  55. package/dist/lib/commands/init.d.ts.map +1 -0
  56. package/dist/lib/commands/init.js +209 -0
  57. package/dist/lib/commands/init.js.map +1 -0
  58. package/dist/lib/commands/response.d.ts +11 -0
  59. package/dist/lib/commands/response.d.ts.map +1 -0
  60. package/dist/lib/commands/response.js +317 -0
  61. package/dist/lib/commands/response.js.map +1 -0
  62. package/dist/lib/commands/sections.d.ts +11 -0
  63. package/dist/lib/commands/sections.d.ts.map +1 -0
  64. package/dist/lib/commands/sections.js +1071 -0
  65. package/dist/lib/commands/sections.js.map +1 -0
  66. package/dist/lib/commands/utilities.d.ts +19 -0
  67. package/dist/lib/commands/utilities.d.ts.map +1 -0
  68. package/dist/lib/commands/utilities.js +2009 -0
  69. package/dist/lib/commands/utilities.js.map +1 -0
  70. package/dist/lib/comment-realign.d.ts +50 -0
  71. package/dist/lib/comment-realign.d.ts.map +1 -0
  72. package/dist/lib/comment-realign.js +372 -0
  73. package/dist/lib/comment-realign.js.map +1 -0
  74. package/dist/lib/config.d.ts +41 -0
  75. package/dist/lib/config.d.ts.map +1 -0
  76. package/dist/lib/config.js +76 -0
  77. package/dist/lib/config.js.map +1 -0
  78. package/dist/lib/crossref.d.ts +108 -0
  79. package/dist/lib/crossref.d.ts.map +1 -0
  80. package/dist/lib/crossref.js +597 -0
  81. package/dist/lib/crossref.js.map +1 -0
  82. package/dist/lib/dependencies.d.ts +30 -0
  83. package/dist/lib/dependencies.d.ts.map +1 -0
  84. package/dist/lib/dependencies.js +95 -0
  85. package/dist/lib/dependencies.js.map +1 -0
  86. package/dist/lib/doi-cache.d.ts +29 -0
  87. package/dist/lib/doi-cache.d.ts.map +1 -0
  88. package/dist/lib/doi-cache.js +104 -0
  89. package/dist/lib/doi-cache.js.map +1 -0
  90. package/dist/lib/doi.d.ts +65 -0
  91. package/dist/lib/doi.d.ts.map +1 -0
  92. package/dist/lib/doi.js +710 -0
  93. package/dist/lib/doi.js.map +1 -0
  94. package/dist/lib/equations.d.ts +61 -0
  95. package/dist/lib/equations.d.ts.map +1 -0
  96. package/dist/lib/equations.js +445 -0
  97. package/dist/lib/equations.js.map +1 -0
  98. package/dist/lib/errors.d.ts +60 -0
  99. package/dist/lib/errors.d.ts.map +1 -0
  100. package/dist/lib/errors.js +303 -0
  101. package/dist/lib/errors.js.map +1 -0
  102. package/dist/lib/format.d.ts +104 -0
  103. package/dist/lib/format.d.ts.map +1 -0
  104. package/dist/lib/format.js +416 -0
  105. package/dist/lib/format.js.map +1 -0
  106. package/dist/lib/git.d.ts +88 -0
  107. package/dist/lib/git.d.ts.map +1 -0
  108. package/dist/lib/git.js +304 -0
  109. package/dist/lib/git.js.map +1 -0
  110. package/dist/lib/grammar.d.ts +62 -0
  111. package/dist/lib/grammar.d.ts.map +1 -0
  112. package/dist/lib/grammar.js +244 -0
  113. package/dist/lib/grammar.js.map +1 -0
  114. package/dist/lib/image-registry.d.ts +68 -0
  115. package/dist/lib/image-registry.d.ts.map +1 -0
  116. package/dist/lib/image-registry.js +112 -0
  117. package/dist/lib/image-registry.js.map +1 -0
  118. package/dist/lib/import.d.ts +184 -0
  119. package/dist/lib/import.d.ts.map +1 -0
  120. package/dist/lib/import.js +1581 -0
  121. package/dist/lib/import.js.map +1 -0
  122. package/dist/lib/journals.d.ts +55 -0
  123. package/dist/lib/journals.d.ts.map +1 -0
  124. package/dist/lib/journals.js +417 -0
  125. package/dist/lib/journals.js.map +1 -0
  126. package/dist/lib/merge.d.ts +138 -0
  127. package/dist/lib/merge.d.ts.map +1 -0
  128. package/dist/lib/merge.js +603 -0
  129. package/dist/lib/merge.js.map +1 -0
  130. package/dist/lib/orcid.d.ts +36 -0
  131. package/dist/lib/orcid.d.ts.map +1 -0
  132. package/dist/lib/orcid.js +117 -0
  133. package/dist/lib/orcid.js.map +1 -0
  134. package/dist/lib/pdf-comments.d.ts +95 -0
  135. package/dist/lib/pdf-comments.d.ts.map +1 -0
  136. package/dist/lib/pdf-comments.js +192 -0
  137. package/dist/lib/pdf-comments.js.map +1 -0
  138. package/dist/lib/pdf-import.d.ts +118 -0
  139. package/dist/lib/pdf-import.d.ts.map +1 -0
  140. package/dist/lib/pdf-import.js +397 -0
  141. package/dist/lib/pdf-import.js.map +1 -0
  142. package/dist/lib/plugins.d.ts +76 -0
  143. package/dist/lib/plugins.d.ts.map +1 -0
  144. package/dist/lib/plugins.js +235 -0
  145. package/dist/lib/plugins.js.map +1 -0
  146. package/dist/lib/postprocess.d.ts +42 -0
  147. package/dist/lib/postprocess.d.ts.map +1 -0
  148. package/dist/lib/postprocess.js +138 -0
  149. package/dist/lib/postprocess.js.map +1 -0
  150. package/dist/lib/pptx-template.d.ts +59 -0
  151. package/dist/lib/pptx-template.d.ts.map +1 -0
  152. package/dist/lib/pptx-template.js +613 -0
  153. package/dist/lib/pptx-template.js.map +1 -0
  154. package/dist/lib/pptx-themes.d.ts +80 -0
  155. package/dist/lib/pptx-themes.d.ts.map +1 -0
  156. package/dist/lib/pptx-themes.js +818 -0
  157. package/dist/lib/pptx-themes.js.map +1 -0
  158. package/dist/lib/protect-restore.d.ts +137 -0
  159. package/dist/lib/protect-restore.d.ts.map +1 -0
  160. package/dist/lib/protect-restore.js +394 -0
  161. package/dist/lib/protect-restore.js.map +1 -0
  162. package/dist/lib/rate-limiter.d.ts +27 -0
  163. package/dist/lib/rate-limiter.d.ts.map +1 -0
  164. package/dist/lib/rate-limiter.js +79 -0
  165. package/dist/lib/rate-limiter.js.map +1 -0
  166. package/dist/lib/response.d.ts +41 -0
  167. package/dist/lib/response.d.ts.map +1 -0
  168. package/dist/lib/response.js +150 -0
  169. package/dist/lib/response.js.map +1 -0
  170. package/dist/lib/review.d.ts +35 -0
  171. package/dist/lib/review.d.ts.map +1 -0
  172. package/dist/lib/review.js +263 -0
  173. package/dist/lib/review.js.map +1 -0
  174. package/dist/lib/schema.d.ts +66 -0
  175. package/dist/lib/schema.d.ts.map +1 -0
  176. package/dist/lib/schema.js +339 -0
  177. package/dist/lib/schema.js.map +1 -0
  178. package/dist/lib/scientific-words.d.ts +6 -0
  179. package/dist/lib/scientific-words.d.ts.map +1 -0
  180. package/dist/lib/scientific-words.js +66 -0
  181. package/dist/lib/scientific-words.js.map +1 -0
  182. package/dist/lib/sections.d.ts +40 -0
  183. package/dist/lib/sections.d.ts.map +1 -0
  184. package/dist/lib/sections.js +288 -0
  185. package/dist/lib/sections.js.map +1 -0
  186. package/dist/lib/slides.d.ts +86 -0
  187. package/dist/lib/slides.d.ts.map +1 -0
  188. package/dist/lib/slides.js +676 -0
  189. package/dist/lib/slides.js.map +1 -0
  190. package/dist/lib/spelling.d.ts +76 -0
  191. package/dist/lib/spelling.d.ts.map +1 -0
  192. package/dist/lib/spelling.js +272 -0
  193. package/dist/lib/spelling.js.map +1 -0
  194. package/dist/lib/templates.d.ts +30 -0
  195. package/dist/lib/templates.d.ts.map +1 -0
  196. package/dist/lib/templates.js +504 -0
  197. package/dist/lib/templates.js.map +1 -0
  198. package/dist/lib/themes.d.ts +85 -0
  199. package/dist/lib/themes.d.ts.map +1 -0
  200. package/dist/lib/themes.js +652 -0
  201. package/dist/lib/themes.js.map +1 -0
  202. package/dist/lib/trackchanges.d.ts +51 -0
  203. package/dist/lib/trackchanges.d.ts.map +1 -0
  204. package/dist/lib/trackchanges.js +202 -0
  205. package/dist/lib/trackchanges.js.map +1 -0
  206. package/dist/lib/tui.d.ts +76 -0
  207. package/dist/lib/tui.d.ts.map +1 -0
  208. package/dist/lib/tui.js +377 -0
  209. package/dist/lib/tui.js.map +1 -0
  210. package/dist/lib/types.d.ts +447 -0
  211. package/dist/lib/types.d.ts.map +1 -0
  212. package/dist/lib/types.js +6 -0
  213. package/dist/lib/types.js.map +1 -0
  214. package/dist/lib/undo.d.ts +57 -0
  215. package/dist/lib/undo.d.ts.map +1 -0
  216. package/dist/lib/undo.js +185 -0
  217. package/dist/lib/undo.js.map +1 -0
  218. package/dist/lib/utils.d.ts +16 -0
  219. package/dist/lib/utils.d.ts.map +1 -0
  220. package/dist/lib/utils.js +40 -0
  221. package/dist/lib/utils.js.map +1 -0
  222. package/dist/lib/variables.d.ts +42 -0
  223. package/dist/lib/variables.d.ts.map +1 -0
  224. package/dist/lib/variables.js +141 -0
  225. package/dist/lib/variables.js.map +1 -0
  226. package/dist/lib/word.d.ts +80 -0
  227. package/dist/lib/word.d.ts.map +1 -0
  228. package/dist/lib/word.js +360 -0
  229. package/dist/lib/word.js.map +1 -0
  230. package/dist/lib/wordcomments.d.ts +51 -0
  231. package/dist/lib/wordcomments.d.ts.map +1 -0
  232. package/dist/lib/wordcomments.js +587 -0
  233. package/dist/lib/wordcomments.js.map +1 -0
  234. package/eslint.config.js +27 -0
  235. package/lib/annotations.ts +622 -0
  236. package/lib/apply-buildup-colors.py +88 -0
  237. package/lib/build.ts +1013 -0
  238. package/lib/{citations.js → citations.ts} +38 -27
  239. package/lib/commands/{build.js → build.ts} +80 -27
  240. package/lib/commands/{citations.js → citations.ts} +36 -18
  241. package/lib/commands/{comments.js → comments.ts} +187 -54
  242. package/lib/commands/{context.js → context.ts} +18 -8
  243. package/lib/commands/{core.js → core.ts} +34 -20
  244. package/lib/commands/{doi.js → doi.ts} +32 -16
  245. package/lib/commands/{history.js → history.ts} +25 -12
  246. package/lib/commands/{index.js → index.ts} +9 -5
  247. package/lib/commands/{init.js → init.ts} +20 -8
  248. package/lib/commands/{response.js → response.ts} +47 -20
  249. package/lib/commands/{sections.js → sections.ts} +273 -68
  250. package/lib/commands/{utilities.js → utilities.ts} +338 -158
  251. package/lib/{comment-realign.js → comment-realign.ts} +117 -45
  252. package/lib/config.ts +84 -0
  253. package/lib/{crossref.js → crossref.ts} +213 -138
  254. package/lib/dependencies.ts +106 -0
  255. package/lib/doi-cache.ts +115 -0
  256. package/lib/{doi.js → doi.ts} +115 -281
  257. package/lib/{equations.js → equations.ts} +60 -64
  258. package/lib/{errors.js → errors.ts} +56 -48
  259. package/lib/{format.js → format.ts} +137 -63
  260. package/lib/{git.js → git.ts} +66 -63
  261. package/lib/{grammar.js → grammar.ts} +45 -32
  262. package/lib/image-registry.ts +180 -0
  263. package/lib/import.ts +2060 -0
  264. package/lib/journals.ts +505 -0
  265. package/lib/{merge.js → merge.ts} +185 -135
  266. package/lib/{orcid.js → orcid.ts} +17 -22
  267. package/lib/{pdf-comments.js → pdf-comments.ts} +76 -18
  268. package/lib/{pdf-import.js → pdf-import.ts} +148 -70
  269. package/lib/{plugins.js → plugins.ts} +82 -39
  270. package/lib/postprocess.ts +188 -0
  271. package/lib/pptx-color-filter.lua +37 -0
  272. package/lib/pptx-template.ts +625 -0
  273. package/lib/pptx-themes/academic.pptx +0 -0
  274. package/lib/pptx-themes/corporate.pptx +0 -0
  275. package/lib/pptx-themes/dark.pptx +0 -0
  276. package/lib/pptx-themes/default.pptx +0 -0
  277. package/lib/pptx-themes/minimal.pptx +0 -0
  278. package/lib/pptx-themes/plant.pptx +0 -0
  279. package/lib/pptx-themes.ts +896 -0
  280. package/lib/protect-restore.ts +516 -0
  281. package/lib/rate-limiter.ts +94 -0
  282. package/lib/{response.js → response.ts} +36 -21
  283. package/lib/{review.js → review.ts} +53 -43
  284. package/lib/{schema.js → schema.ts} +70 -25
  285. package/lib/{sections.js → sections.ts} +71 -76
  286. package/lib/slides.ts +793 -0
  287. package/lib/{spelling.js → spelling.ts} +43 -59
  288. package/lib/{templates.js → templates.ts} +20 -17
  289. package/lib/themes.ts +742 -0
  290. package/lib/{trackchanges.js → trackchanges.ts} +52 -23
  291. package/lib/types.ts +509 -0
  292. package/lib/{undo.js → undo.ts} +75 -52
  293. package/lib/utils.ts +41 -0
  294. package/lib/{variables.js → variables.ts} +60 -54
  295. package/lib/word.ts +428 -0
  296. package/lib/{wordcomments.js → wordcomments.ts} +94 -40
  297. package/package.json +15 -5
  298. package/skill/REFERENCE.md +67 -0
  299. package/tsconfig.json +26 -0
  300. package/lib/annotations.js +0 -414
  301. package/lib/build.js +0 -639
  302. package/lib/config.js +0 -79
  303. package/lib/import.js +0 -1145
  304. package/lib/journals.js +0 -629
  305. package/lib/word.js +0 -225
  306. /package/lib/{scientific-words.js → scientific-words.ts} +0 -0
@@ -0,0 +1,516 @@
1
+ /**
2
+ * Protection and restoration utilities for markdown elements during Word import
3
+ *
4
+ * These functions protect special markdown syntax (anchors, cross-refs, math, citations,
5
+ * images, tables) by replacing them with placeholders before diffing, then restore them after.
6
+ */
7
+
8
+ // =============================================================================
9
+ // Interfaces
10
+ // =============================================================================
11
+
12
+ interface MarkdownPrefix {
13
+ prefix: string;
14
+ content: string;
15
+ }
16
+
17
+ interface ProtectedItem {
18
+ original: string;
19
+ placeholder: string;
20
+ }
21
+
22
+ interface ProtectedMath extends ProtectedItem {
23
+ type: 'inline' | 'display';
24
+ simplified: string;
25
+ }
26
+
27
+ interface ProtectedImage extends ProtectedItem {
28
+ label: string | null;
29
+ caption: string;
30
+ path: string;
31
+ figureNumber: string | null;
32
+ }
33
+
34
+ interface ProtectedTable extends ProtectedItem {
35
+ cellCount: number;
36
+ }
37
+
38
+ interface ProtectAnchorsResult {
39
+ text: string;
40
+ anchors: ProtectedItem[];
41
+ }
42
+
43
+ interface ProtectCrossrefsResult {
44
+ text: string;
45
+ crossrefs: ProtectedItem[];
46
+ }
47
+
48
+ interface ProtectMathResult {
49
+ text: string;
50
+ mathBlocks: ProtectedMath[];
51
+ }
52
+
53
+ interface ProtectCitationsResult {
54
+ text: string;
55
+ citations: string[];
56
+ }
57
+
58
+ interface ProtectImagesResult {
59
+ text: string;
60
+ images: ProtectedImage[];
61
+ }
62
+
63
+ interface ProtectTablesResult {
64
+ text: string;
65
+ tables: ProtectedTable[];
66
+ }
67
+
68
+ interface ImageRegistry {
69
+ byNumber?: Map<string, { label: string }>;
70
+ }
71
+
72
+ // =============================================================================
73
+ // Public Functions
74
+ // =============================================================================
75
+
76
+ /**
77
+ * Extract markdown prefix (headers, list markers) from a line
78
+ */
79
+ export function extractMarkdownPrefix(line: string): MarkdownPrefix {
80
+ // Headers
81
+ const headerMatch = line.match(/^(#{1,6}\s+)/);
82
+ if (headerMatch && headerMatch[1]) {
83
+ return { prefix: headerMatch[1], content: line.slice(headerMatch[1].length) };
84
+ }
85
+
86
+ // List items
87
+ const listMatch = line.match(/^(\s*[-*+]\s+|\s*\d+\.\s+)/);
88
+ if (listMatch && listMatch[1]) {
89
+ return { prefix: listMatch[1], content: line.slice(listMatch[1].length) };
90
+ }
91
+
92
+ // Blockquotes
93
+ const quoteMatch = line.match(/^(>\s*)/);
94
+ if (quoteMatch && quoteMatch[1]) {
95
+ return { prefix: quoteMatch[1], content: line.slice(quoteMatch[1].length) };
96
+ }
97
+
98
+ return { prefix: '', content: line };
99
+ }
100
+
101
+ /**
102
+ * Protect figure/table anchors before diffing
103
+ * Anchors like {#fig:heatmap} and {#tbl:results} should never be deleted
104
+ */
105
+ export function protectAnchors(md: string): ProtectAnchorsResult {
106
+ const anchors: ProtectedItem[] = [];
107
+
108
+ // Match {#fig:label}, {#tbl:label}, {#eq:label}, {#sec:label} etc.
109
+ // Also match with additional attributes like {#fig:label width=50%}
110
+ const text = md.replace(/\{#(fig|tbl|eq|sec|lst):[^}]+\}/g, (match) => {
111
+ const idx = anchors.length;
112
+ const placeholder = `ANCHORBLOCK${idx}ENDANCHOR`;
113
+ anchors.push({ original: match, placeholder });
114
+ return placeholder;
115
+ });
116
+
117
+ return { text, anchors };
118
+ }
119
+
120
+ /**
121
+ * Restore anchors from placeholders
122
+ */
123
+ export function restoreAnchors(text: string, anchors: ProtectedItem[]): string {
124
+ for (const anchor of anchors) {
125
+ // Handle case where anchor is inside a deletion annotation
126
+ // {--...ANCHORBLOCK0ENDANCHOR--} should become {--...--}{#fig:label}
127
+ const deletionPattern = new RegExp(`\\{--([^}]*?)${anchor.placeholder}([^}]*?)--\\}`, 'g');
128
+ text = text.replace(deletionPattern, (match, before, after) => {
129
+ const cleanBefore = before.trim();
130
+ const cleanAfter = after.trim();
131
+ let result = '';
132
+ if (cleanBefore) result += `{--${cleanBefore}--}`;
133
+ result += anchor.original;
134
+ if (cleanAfter) result += `{--${cleanAfter}--}`;
135
+ return result;
136
+ });
137
+
138
+ // Handle case where anchor is inside a substitution
139
+ // {~~old ANCHORBLOCK0ENDANCHOR~>new~~} -> {~~old~>new~~}{#fig:label}
140
+ const substitutionPattern = new RegExp(`\\{~~([^~]*?)${anchor.placeholder}([^~]*?)~>([^~]*)~~\\}`, 'g');
141
+ text = text.replace(substitutionPattern, (match: string, oldBefore: string, oldAfter: string, newText: string) => {
142
+ const cleanOldBefore = (oldBefore ?? '').trim();
143
+ const cleanOldAfter = (oldAfter ?? '').trim();
144
+ const cleanNew = (newText ?? '').trim();
145
+ const oldText = (cleanOldBefore + ' ' + cleanOldAfter).trim();
146
+ let result = '';
147
+ if (oldText !== cleanNew) {
148
+ result += `{~~${oldText}~>${cleanNew}~~}`;
149
+ } else {
150
+ result += cleanNew;
151
+ }
152
+ result += anchor.original;
153
+ return result;
154
+ });
155
+
156
+ // Normal replacement
157
+ text = text.split(anchor.placeholder).join(anchor.original);
158
+ }
159
+ return text;
160
+ }
161
+
162
+ /**
163
+ * Protect cross-references before diffing
164
+ * References like @fig:label, @tbl:label should be preserved
165
+ */
166
+ export function protectCrossrefs(md: string): ProtectCrossrefsResult {
167
+ const crossrefs: ProtectedItem[] = [];
168
+
169
+ // Match @fig:label, @tbl:label, @eq:label, @sec:label
170
+ // Can appear as @fig:label or (@fig:label) or [@fig:label]
171
+ const text = md.replace(/@(fig|tbl|eq|sec|lst):[a-zA-Z0-9_-]+/g, (match) => {
172
+ const idx = crossrefs.length;
173
+ const placeholder = `XREFBLOCK${idx}ENDXREF`;
174
+ crossrefs.push({ original: match, placeholder });
175
+ return placeholder;
176
+ });
177
+
178
+ return { text, crossrefs };
179
+ }
180
+
181
+ /**
182
+ * Restore cross-references from placeholders
183
+ */
184
+ export function restoreCrossrefs(text: string, crossrefs: ProtectedItem[]): string {
185
+ for (const xref of crossrefs) {
186
+ // Handle deletions - restore the reference even if marked deleted
187
+ const deletionPattern = new RegExp(`\\{--([^}]*?)${xref.placeholder}([^}]*?)--\\}`, 'g');
188
+ text = text.replace(deletionPattern, (match, before, after) => {
189
+ const cleanBefore = before.trim();
190
+ const cleanAfter = after.trim();
191
+ let result = '';
192
+ if (cleanBefore) result += `{--${cleanBefore}--}`;
193
+ result += xref.original;
194
+ if (cleanAfter) result += `{--${cleanAfter}--}`;
195
+ return result;
196
+ });
197
+
198
+ // Handle substitutions where rendered form (Figure 1) replaced the reference
199
+ // {~~XREFBLOCK0ENDXREF~>Figure 1~~} -> @fig:label
200
+ const substitutionPattern = new RegExp(`\\{~~${xref.placeholder}~>[^~]+~~\\}`, 'g');
201
+ text = text.replace(substitutionPattern, xref.original);
202
+
203
+ // Normal replacement
204
+ text = text.split(xref.placeholder).join(xref.original);
205
+ }
206
+ return text;
207
+ }
208
+
209
+ /**
210
+ * Simplify LaTeX math for fuzzy matching against Word text
211
+ * Word renders math as text, so we need to match the rendered form
212
+ */
213
+ export function simplifyMathForMatching(latex: string): string {
214
+ return latex
215
+ // Remove common LaTeX commands
216
+ .replace(/\\text\{([^}]+)\}/g, '$1')
217
+ .replace(/\\hat\{([^}]+)\}/g, '$1')
218
+ .replace(/\\bar\{([^}]+)\}/g, '$1')
219
+ .replace(/\\frac\{([^}]+)\}\{([^}]+)\}/g, '$1/$2')
220
+ .replace(/\\sum_([a-z])/g, 'Σ')
221
+ .replace(/\\sum/g, 'Σ')
222
+ .replace(/\\cdot/g, '·')
223
+ .replace(/\\quad/g, ' ')
224
+ .replace(/\\,/g, ' ')
225
+ .replace(/\\_/g, '_')
226
+ .replace(/\\{/g, '{')
227
+ .replace(/\\}/g, '}')
228
+ .replace(/\\/g, '') // Remove remaining backslashes
229
+ .replace(/[{}]/g, '') // Remove braces
230
+ .replace(/\s+/g, ' ')
231
+ .trim();
232
+ }
233
+
234
+ /**
235
+ * Protect mathematical notation before diffing by replacing with placeholders
236
+ * Handles both inline $...$ and display $$...$$ math
237
+ */
238
+ export function protectMath(md: string): ProtectMathResult {
239
+ const mathBlocks: ProtectedMath[] = [];
240
+
241
+ // First protect display math ($$...$$) - must be done before inline math
242
+ let text = md.replace(/\$\$([^$]+)\$\$/g, (match, content) => {
243
+ const idx = mathBlocks.length;
244
+ const placeholder = `MATHBLOCK${idx}ENDMATH`;
245
+ // Create simplified version for matching in Word text
246
+ const simplified = simplifyMathForMatching(content);
247
+ mathBlocks.push({ original: match, placeholder, type: 'display', simplified });
248
+ return placeholder;
249
+ });
250
+
251
+ // Then protect inline math ($...$)
252
+ text = text.replace(/\$([^$\n]+)\$/g, (match, content) => {
253
+ const idx = mathBlocks.length;
254
+ const placeholder = `MATHBLOCK${idx}ENDMATH`;
255
+ const simplified = simplifyMathForMatching(content);
256
+ mathBlocks.push({ original: match, placeholder, type: 'inline', simplified });
257
+ return placeholder;
258
+ });
259
+
260
+ return { text, mathBlocks };
261
+ }
262
+
263
+ /**
264
+ * Restore math from placeholders
265
+ */
266
+ export function restoreMath(text: string, mathBlocks: ProtectedMath[]): string {
267
+ for (const block of mathBlocks) {
268
+ text = text.split(block.placeholder).join(block.original);
269
+ }
270
+ return text;
271
+ }
272
+
273
+ /**
274
+ * Replace rendered math in Word text with matching placeholders
275
+ * This is heuristic-based since Word can render math in various ways
276
+ */
277
+ export function replaceRenderedMath(wordText: string, mathBlocks: ProtectedMath[]): string {
278
+ let result = wordText;
279
+
280
+ for (const block of mathBlocks) {
281
+ // For inline math, try to find the simplified form in Word text
282
+ if (block.simplified.length >= 2) {
283
+ // Try exact match first
284
+ if (result.includes(block.simplified)) {
285
+ result = result.replace(block.simplified, block.placeholder);
286
+ }
287
+ }
288
+ }
289
+
290
+ return result;
291
+ }
292
+
293
+ /**
294
+ * Protect citations before diffing by replacing with placeholders
295
+ */
296
+ export function protectCitations(md: string): ProtectCitationsResult {
297
+ const citations: string[] = [];
298
+ const text = md.replace(/\[@[^\]]+\]/g, (match) => {
299
+ const idx = citations.length;
300
+ citations.push(match);
301
+ return `CITEREF${idx}ENDCITE`;
302
+ });
303
+ return { text, citations };
304
+ }
305
+
306
+ /**
307
+ * Restore citations from placeholders
308
+ */
309
+ export function restoreCitations(text: string, citations: string[]): string {
310
+ for (let i = 0; i < citations.length; i++) {
311
+ // Handle cases where placeholder might be inside annotations
312
+ const placeholder = `CITEREF${i}ENDCITE`;
313
+ text = text.split(placeholder).join(citations[i]);
314
+ }
315
+ return text;
316
+ }
317
+
318
+ /**
319
+ * Remove rendered citations from Word text (replace with matching placeholders)
320
+ */
321
+ export function replaceRenderedCitations(wordText: string, count: number): string {
322
+ // Match rendered citation patterns: (Author 2021), (Author et al. 2021), etc.
323
+ const pattern = /\((?:[A-Z][a-zé]+(?:\s+et\s+al\.?)?(?:\s*[&,;]\s*[A-Z][a-zé]+(?:\s+et\s+al\.?)?)*\s+\d{4}(?:[a-z])?(?:\s*[,;]\s*(?:[A-Z][a-zé]+(?:\s+et\s+al\.?)?\s+)?\d{4}(?:[a-z])?)*)\)/g;
324
+
325
+ let idx = 0;
326
+ return wordText.replace(pattern, (match) => {
327
+ if (idx < count) {
328
+ const placeholder = `CITEREF${idx}ENDCITE`;
329
+ idx++;
330
+ return placeholder;
331
+ }
332
+ return match;
333
+ });
334
+ }
335
+
336
+ /**
337
+ * Protect markdown images before diffing by replacing with placeholders
338
+ * Images are treated as atomic blocks to prevent corruption during diff
339
+ *
340
+ * Matches: ![caption](path){#fig:label} or ![caption](path)
341
+ * Also matches Word-style: ![Figure N: caption](media/path)
342
+ */
343
+ export function protectImages(md: string, registry: ImageRegistry | null = null): ProtectImagesResult {
344
+ const images: ProtectedImage[] = [];
345
+
346
+ // Match markdown images: ![caption](path){#anchor} or ![caption](path)
347
+ // The anchor is optional and can have additional attributes
348
+ const imagePattern = /!\[([^\]]*)\]\(([^)]+)\)(?:\{([^}]+)\})?/g;
349
+
350
+ const text = md.replace(imagePattern, (match, caption, path, anchor) => {
351
+ const idx = images.length;
352
+ const placeholder = `IMAGEBLOCK${idx}ENDIMAGE`;
353
+
354
+ // Extract label from anchor if present (e.g., "#fig:map" -> "map")
355
+ let label: string | null = null;
356
+ if (anchor) {
357
+ const labelMatch = anchor.match(/#(fig|tbl):([a-zA-Z0-9_-]+)/);
358
+ if (labelMatch) {
359
+ label = labelMatch[2];
360
+ }
361
+ }
362
+
363
+ // Try to extract figure number from Word-style caption "Figure N: ..."
364
+ let figureNumber: string | null = null;
365
+ const figNumMatch = caption.match(/^(?:Figure|Fig\.?|Table|Tbl\.?)\s+(\d+|S\d+)[:\.]?\s*/i);
366
+ if (figNumMatch) {
367
+ figureNumber = figNumMatch[1];
368
+ }
369
+
370
+ images.push({
371
+ original: match,
372
+ placeholder,
373
+ label,
374
+ caption: caption.trim(),
375
+ path,
376
+ figureNumber,
377
+ });
378
+
379
+ return placeholder;
380
+ });
381
+
382
+ return { text, images };
383
+ }
384
+
385
+ /**
386
+ * Restore images from placeholders
387
+ */
388
+ export function restoreImages(text: string, images: ProtectedImage[]): string {
389
+ for (const img of images) {
390
+ // Handle cases where placeholder might be inside annotations
391
+ // {--IMAGEBLOCK0ENDIMAGE--} should restore the original image
392
+ const deletionPattern = new RegExp(`\\{--[^}]*?${img.placeholder}[^}]*?--\\}`, 'g');
393
+ text = text.replace(deletionPattern, img.original);
394
+
395
+ const insertionPattern = new RegExp(`\\{\\+\\+[^}]*?${img.placeholder}[^}]*?\\+\\+\\}`, 'g');
396
+ text = text.replace(insertionPattern, img.original);
397
+
398
+ // Normal replacement
399
+ text = text.split(img.placeholder).join(img.original);
400
+ }
401
+ return text;
402
+ }
403
+
404
+ /**
405
+ * Match Word-extracted images to original images using registry
406
+ * Returns a mapping of Word image placeholders to original image placeholders
407
+ */
408
+ export function matchWordImagesToOriginal(
409
+ originalImages: ProtectedImage[],
410
+ wordImages: ProtectedImage[],
411
+ registry: ImageRegistry | null = null
412
+ ): Map<string, string> {
413
+ const mapping = new Map<string, string>();
414
+ const usedOriginals = new Set<string>();
415
+
416
+ for (const wordImg of wordImages) {
417
+ let bestMatch: ProtectedImage | null = null;
418
+ let bestScore = 0;
419
+
420
+ for (const origImg of originalImages) {
421
+ if (usedOriginals.has(origImg.placeholder)) continue;
422
+
423
+ let score = 0;
424
+
425
+ // Match by label (most reliable)
426
+ if (wordImg.label && origImg.label && wordImg.label === origImg.label) {
427
+ score += 100;
428
+ }
429
+
430
+ // Match by figure number via registry
431
+ if (wordImg.figureNumber && registry) {
432
+ const entry = registry.byNumber?.get(`fig:${wordImg.figureNumber}`);
433
+ if (entry && entry.label === origImg.label) {
434
+ score += 90;
435
+ }
436
+ }
437
+
438
+ // Match by caption similarity (first 50 chars, normalized)
439
+ const wordCaption = wordImg.caption.replace(/^(?:Figure|Fig\.?|Table|Tbl\.?)\s+\d+[:\.]?\s*/i, '').toLowerCase().slice(0, 50);
440
+ const origCaption = origImg.caption.toLowerCase().slice(0, 50);
441
+ if (wordCaption && origCaption && wordCaption === origCaption) {
442
+ score += 80;
443
+ } else if (wordCaption && origCaption && (wordCaption.includes(origCaption.slice(0, 30)) || origCaption.includes(wordCaption.slice(0, 30)))) {
444
+ score += 40;
445
+ }
446
+
447
+ // Match by path similarity (filename)
448
+ const wordFile = wordImg.path.split('/').pop()?.toLowerCase() || '';
449
+ const origFile = origImg.path.split('/').pop()?.toLowerCase() || '';
450
+ if (wordFile === origFile) {
451
+ score += 30;
452
+ }
453
+
454
+ if (score > bestScore) {
455
+ bestScore = score;
456
+ bestMatch = origImg;
457
+ }
458
+ }
459
+
460
+ if (bestMatch && bestScore >= 40) {
461
+ mapping.set(wordImg.placeholder, bestMatch.placeholder);
462
+ usedOriginals.add(bestMatch.placeholder);
463
+ }
464
+ }
465
+
466
+ return mapping;
467
+ }
468
+
469
+ /**
470
+ * Protect markdown tables before diffing by replacing with placeholders
471
+ * Tables are treated as atomic blocks to prevent corruption during diff
472
+ */
473
+ export function protectTables(md: string): ProtectTablesResult {
474
+ const tables: ProtectedTable[] = [];
475
+
476
+ // Match markdown tables: lines starting with | and containing |
477
+ // A table is: optional caption, header row, separator row (|---|), data rows
478
+ const tablePattern = /(?:^(?:\*\*)?Table[^\n]*\n\n?)?(?:^\|[^\n]+\|\n)+/gm;
479
+
480
+ const text = md.replace(tablePattern, (match) => {
481
+ // Verify it's actually a table (has separator row with dashes)
482
+ if (!match.includes('|---') && !match.includes('| ---') && !match.includes('|:--')) {
483
+ return match; // Not a real table, just lines with pipes
484
+ }
485
+
486
+ const idx = tables.length;
487
+ const placeholder = `\n\nTABLEBLOCK${idx}ENDTABLE\n\n`;
488
+
489
+ // Count cells for matching in Word (approximate)
490
+ const cellCount = (match.match(/\|/g) || []).length;
491
+
492
+ tables.push({ original: match.trim(), placeholder: placeholder.trim(), cellCount });
493
+ return placeholder;
494
+ });
495
+
496
+ return { text, tables };
497
+ }
498
+
499
+ /**
500
+ * Restore tables from placeholders
501
+ */
502
+ export function restoreTables(text: string, tables: ProtectedTable[]): string {
503
+ for (const table of tables) {
504
+ // Handle cases where placeholder might be inside annotations
505
+ // For tables, we want to preserve the original if it wasn't changed
506
+ const deletionPattern = new RegExp(`\\{--[^}]*?${table.placeholder}[^}]*?--\\}`, 'g');
507
+ text = text.replace(deletionPattern, table.original);
508
+
509
+ const insertionPattern = new RegExp(`\\{\\+\\+[^}]*?${table.placeholder}[^}]*?\\+\\+\\}`, 'g');
510
+ text = text.replace(insertionPattern, table.original);
511
+
512
+ // Normal replacement
513
+ text = text.split(table.placeholder).join(table.original);
514
+ }
515
+ return text;
516
+ }
@@ -0,0 +1,94 @@
1
+ /**
2
+ * Rate Limiter - Prevents API abuse with exponential backoff
3
+ */
4
+
5
+ export interface RateLimiterOptions {
6
+ minDelay?: number;
7
+ maxDelay?: number;
8
+ maxRetries?: number;
9
+ backoffFactor?: number;
10
+ }
11
+
12
+ export class RateLimiter {
13
+ private minDelay: number;
14
+ private maxDelay: number;
15
+ private maxRetries: number;
16
+ private backoffFactor: number;
17
+ private lastRequestTime: number;
18
+ private currentDelay: number;
19
+ private consecutiveErrors: number;
20
+
21
+ constructor(options: RateLimiterOptions = {}) {
22
+ this.minDelay = options.minDelay || 100; // Min delay between requests (ms)
23
+ this.maxDelay = options.maxDelay || 30000; // Max delay after backoff (ms)
24
+ this.maxRetries = options.maxRetries || 3; // Max retry attempts
25
+ this.backoffFactor = options.backoffFactor || 2;
26
+ this.lastRequestTime = 0;
27
+ this.currentDelay = this.minDelay;
28
+ this.consecutiveErrors = 0;
29
+ }
30
+
31
+ async wait(): Promise<void> {
32
+ const now = Date.now();
33
+ const elapsed = now - this.lastRequestTime;
34
+ if (elapsed < this.currentDelay) {
35
+ await new Promise(r => setTimeout(r, this.currentDelay - elapsed));
36
+ }
37
+ this.lastRequestTime = Date.now();
38
+ }
39
+
40
+ onSuccess(): void {
41
+ // Gradually reduce delay on success
42
+ this.consecutiveErrors = 0;
43
+ this.currentDelay = Math.max(this.minDelay, this.currentDelay / this.backoffFactor);
44
+ }
45
+
46
+ onError(statusCode: number): boolean {
47
+ this.consecutiveErrors++;
48
+ // Exponential backoff
49
+ if (statusCode === 429 || statusCode >= 500) {
50
+ this.currentDelay = Math.min(this.maxDelay, this.currentDelay * this.backoffFactor);
51
+ }
52
+ return this.consecutiveErrors <= this.maxRetries;
53
+ }
54
+
55
+ async fetchWithRetry(url: string, options: RequestInit = {}): Promise<Response> {
56
+ let lastError: Error | undefined;
57
+
58
+ for (let attempt = 0; attempt <= this.maxRetries; attempt++) {
59
+ await this.wait();
60
+
61
+ try {
62
+ const response = await fetch(url, options);
63
+
64
+ if (response.status === 429) {
65
+ // Rate limited - back off
66
+ const retryAfter = response.headers.get('Retry-After');
67
+ const delay = retryAfter ? parseInt(retryAfter, 10) * 1000 : this.currentDelay * 2;
68
+ this.currentDelay = Math.min(this.maxDelay, delay);
69
+ if (!this.onError(429)) break;
70
+ continue;
71
+ }
72
+
73
+ if (response.status >= 500 && attempt < this.maxRetries) {
74
+ // Server error - retry with backoff
75
+ if (!this.onError(response.status)) break;
76
+ continue;
77
+ }
78
+
79
+ this.onSuccess();
80
+ return response;
81
+ } catch (err) {
82
+ lastError = err as Error;
83
+ if (!this.onError(0)) break;
84
+ }
85
+ }
86
+
87
+ throw lastError || new Error('Max retries exceeded');
88
+ }
89
+ }
90
+
91
+ // Shared rate limiters for different APIs
92
+ export const crossrefLimiter = new RateLimiter({ minDelay: 100, maxDelay: 10000 });
93
+ export const dataciteLimiter = new RateLimiter({ minDelay: 100, maxDelay: 10000 });
94
+ export const doiOrgLimiter = new RateLimiter({ minDelay: 200, maxDelay: 15000 });