docrev 0.8.5 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitattributes +1 -0
- package/README.md +25 -1
- package/dist/lib/annotations.d.ts.map +1 -1
- package/dist/lib/annotations.js +6 -0
- package/dist/lib/annotations.js.map +1 -1
- package/dist/lib/build.d.ts +6 -1
- package/dist/lib/build.d.ts.map +1 -1
- package/dist/lib/build.js +67 -1
- package/dist/lib/build.js.map +1 -1
- package/dist/lib/commands/build.d.ts.map +1 -1
- package/dist/lib/commands/build.js +26 -7
- package/dist/lib/commands/build.js.map +1 -1
- package/dist/lib/commands/response.d.ts.map +1 -1
- package/dist/lib/commands/response.js +50 -2
- package/dist/lib/commands/response.js.map +1 -1
- package/dist/lib/commands/sections.d.ts.map +1 -1
- package/dist/lib/commands/sections.js +28 -9
- package/dist/lib/commands/sections.js.map +1 -1
- package/dist/lib/csl.d.ts +38 -0
- package/dist/lib/csl.d.ts.map +1 -0
- package/dist/lib/csl.js +170 -0
- package/dist/lib/csl.js.map +1 -0
- package/dist/lib/import.d.ts.map +1 -1
- package/dist/lib/import.js +20 -7
- package/dist/lib/import.js.map +1 -1
- package/dist/lib/journals.d.ts.map +1 -1
- package/dist/lib/journals.js +24 -0
- package/dist/lib/journals.js.map +1 -1
- package/dist/lib/plugins.d.ts +11 -0
- package/dist/lib/plugins.d.ts.map +1 -1
- package/dist/lib/plugins.js +21 -1
- package/dist/lib/plugins.js.map +1 -1
- package/dist/lib/pptx-template.d.ts +17 -22
- package/dist/lib/pptx-template.d.ts.map +1 -1
- package/dist/lib/pptx-template.js +296 -552
- package/dist/lib/pptx-template.js.map +1 -1
- package/dist/lib/schema.d.ts.map +1 -1
- package/dist/lib/schema.js +4 -0
- package/dist/lib/schema.js.map +1 -1
- package/dist/lib/types.d.ts +19 -1
- package/dist/lib/types.d.ts.map +1 -1
- package/dist/lib/word.d.ts +24 -11
- package/dist/lib/word.d.ts.map +1 -1
- package/dist/lib/word.js +233 -32
- package/dist/lib/word.js.map +1 -1
- package/lib/annotations.ts +8 -0
- package/lib/build.ts +75 -2
- package/lib/commands/build.ts +25 -7
- package/lib/commands/response.ts +55 -2
- package/lib/commands/sections.ts +31 -9
- package/lib/csl.ts +191 -0
- package/lib/import.ts +21 -7
- package/lib/journals.ts +25 -1
- package/lib/plugins.ts +35 -1
- package/lib/pptx-template.ts +346 -502
- package/lib/schema.ts +4 -0
- package/lib/types.ts +20 -1
- package/lib/word.ts +253 -38
- package/package.json +1 -2
- package/lib/apply-buildup-colors.py +0 -88
package/lib/annotations.ts
CHANGED
|
@@ -281,6 +281,9 @@ export function stripAnnotations(text: string, options: StripOptions = {}): stri
|
|
|
281
281
|
text = text.replace(PATTERNS.comment, '');
|
|
282
282
|
}
|
|
283
283
|
|
|
284
|
+
// Strip pandoc highlight spans: [text]{.mark} → text
|
|
285
|
+
text = text.replace(/\[([^\]]*)\]\{\.mark\}/g, '$1');
|
|
286
|
+
|
|
284
287
|
// Clean up partial/orphaned markers within the loop
|
|
285
288
|
// This handles cases where nested annotations leave behind fragments
|
|
286
289
|
|
|
@@ -319,6 +322,11 @@ export function stripAnnotations(text: string, options: StripOptions = {}): stri
|
|
|
319
322
|
text = text.replace(/\{~~/g, '');
|
|
320
323
|
text = text.replace(/~>/g, '');
|
|
321
324
|
|
|
325
|
+
// Remove orphan [ from stripped {.mark} spans where the closing ]{.mark}
|
|
326
|
+
// was inside a comment. A [ is orphan if no matching ] follows before
|
|
327
|
+
// the next [ or end of line.
|
|
328
|
+
text = text.replace(/\[(?![^\[\]]*\])/g, '');
|
|
329
|
+
|
|
322
330
|
return text;
|
|
323
331
|
}
|
|
324
332
|
|
package/lib/build.ts
CHANGED
|
@@ -21,7 +21,9 @@ import { getThemePath, getThemeNames, PPTX_THEMES } from './pptx-themes.js';
|
|
|
21
21
|
import { runPostprocess } from './postprocess.js';
|
|
22
22
|
import { hasPandoc, hasPandocCrossref, hasLatex } from './dependencies.js';
|
|
23
23
|
import { buildImageRegistry, writeImageRegistry } from './image-registry.js';
|
|
24
|
-
import type { Author } from './types.js';
|
|
24
|
+
import type { Author, JournalFormatting } from './types.js';
|
|
25
|
+
import { getJournalProfile } from './journals.js';
|
|
26
|
+
import { resolveCSL } from './csl.js';
|
|
25
27
|
|
|
26
28
|
// =============================================================================
|
|
27
29
|
// Constants
|
|
@@ -250,6 +252,69 @@ export const DEFAULT_CONFIG: BuildConfig = {
|
|
|
250
252
|
// Public API
|
|
251
253
|
// =============================================================================
|
|
252
254
|
|
|
255
|
+
/**
|
|
256
|
+
* Merge journal formatting defaults into a config.
|
|
257
|
+
* Priority: DEFAULT_CONFIG < journal formatting < rev.yaml explicit settings
|
|
258
|
+
*/
|
|
259
|
+
export function mergeJournalFormatting(config: BuildConfig, formatting: JournalFormatting, directory: string): BuildConfig {
|
|
260
|
+
const merged = { ...config };
|
|
261
|
+
|
|
262
|
+
// CSL: only apply if user hasn't set one
|
|
263
|
+
if (formatting.csl && !config.csl) {
|
|
264
|
+
const resolved = resolveCSL(formatting.csl, directory);
|
|
265
|
+
if (resolved) {
|
|
266
|
+
merged.csl = resolved;
|
|
267
|
+
}
|
|
268
|
+
// If not resolved locally, store the name — pandoc --citeproc
|
|
269
|
+
// can sometimes resolve it, and the user can fetch with rev profiles --fetch-csl
|
|
270
|
+
if (!resolved) {
|
|
271
|
+
merged.csl = formatting.csl;
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// PDF settings: merge only unset fields
|
|
276
|
+
if (formatting.pdf) {
|
|
277
|
+
const userPdf = config.pdf || {};
|
|
278
|
+
const defaults = DEFAULT_CONFIG.pdf;
|
|
279
|
+
merged.pdf = { ...config.pdf };
|
|
280
|
+
for (const [key, value] of Object.entries(formatting.pdf)) {
|
|
281
|
+
const k = key as keyof PdfConfig;
|
|
282
|
+
// Apply journal value only if user config matches the default (i.e., wasn't explicitly set)
|
|
283
|
+
if (value !== undefined && JSON.stringify(userPdf[k]) === JSON.stringify(defaults[k])) {
|
|
284
|
+
(merged.pdf as Record<string, unknown>)[k] = value;
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// DOCX settings: merge only unset fields
|
|
290
|
+
if (formatting.docx) {
|
|
291
|
+
const userDocx = config.docx || {};
|
|
292
|
+
const defaults = DEFAULT_CONFIG.docx;
|
|
293
|
+
merged.docx = { ...config.docx };
|
|
294
|
+
for (const [key, value] of Object.entries(formatting.docx)) {
|
|
295
|
+
const k = key as keyof DocxConfig;
|
|
296
|
+
if (value !== undefined && JSON.stringify(userDocx[k]) === JSON.stringify(defaults[k])) {
|
|
297
|
+
(merged.docx as Record<string, unknown>)[k] = value;
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Crossref settings: merge only unset fields
|
|
303
|
+
if (formatting.crossref) {
|
|
304
|
+
const userCrossref = config.crossref || {};
|
|
305
|
+
const defaults = DEFAULT_CONFIG.crossref;
|
|
306
|
+
merged.crossref = { ...config.crossref };
|
|
307
|
+
for (const [key, value] of Object.entries(formatting.crossref)) {
|
|
308
|
+
const k = key as keyof CrossrefConfig;
|
|
309
|
+
if (value !== undefined && JSON.stringify(userCrossref[k]) === JSON.stringify(defaults[k])) {
|
|
310
|
+
(merged.crossref as Record<string, unknown>)[k] = value;
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
return merged;
|
|
316
|
+
}
|
|
317
|
+
|
|
253
318
|
/**
|
|
254
319
|
* Load rev.yaml config from directory
|
|
255
320
|
* @param directory - Project directory path
|
|
@@ -273,7 +338,7 @@ export function loadConfig(directory: string): BuildConfig {
|
|
|
273
338
|
const userConfig = YAML.parse(content) || {};
|
|
274
339
|
|
|
275
340
|
// Deep merge with defaults
|
|
276
|
-
|
|
341
|
+
let config: BuildConfig = {
|
|
277
342
|
...DEFAULT_CONFIG,
|
|
278
343
|
...userConfig,
|
|
279
344
|
crossref: { ...DEFAULT_CONFIG.crossref, ...userConfig.crossref },
|
|
@@ -287,6 +352,14 @@ export function loadConfig(directory: string): BuildConfig {
|
|
|
287
352
|
_configPath: configPath,
|
|
288
353
|
};
|
|
289
354
|
|
|
355
|
+
// Apply journal formatting defaults (between DEFAULT_CONFIG and user settings)
|
|
356
|
+
if (userConfig.journal) {
|
|
357
|
+
const profile = getJournalProfile(userConfig.journal);
|
|
358
|
+
if (profile?.formatting) {
|
|
359
|
+
config = mergeJournalFormatting(config, profile.formatting, directory);
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
290
363
|
return config;
|
|
291
364
|
} catch (err) {
|
|
292
365
|
const error = err as Error;
|
package/lib/commands/build.ts
CHANGED
|
@@ -42,6 +42,7 @@ interface InstallOptions {
|
|
|
42
42
|
|
|
43
43
|
interface BuildOptions {
|
|
44
44
|
dir: string;
|
|
45
|
+
journal?: string;
|
|
45
46
|
crossref?: boolean;
|
|
46
47
|
toc?: boolean;
|
|
47
48
|
showChanges?: boolean;
|
|
@@ -270,13 +271,6 @@ export function register(program: Command, pkg?: { version?: string }): void {
|
|
|
270
271
|
console.log(chalk.yellow(' ✗ pandoc-crossref not found'));
|
|
271
272
|
}
|
|
272
273
|
|
|
273
|
-
try {
|
|
274
|
-
await import('mammoth');
|
|
275
|
-
console.log(chalk.green(' ✓ mammoth (Word parsing)'));
|
|
276
|
-
} catch {
|
|
277
|
-
console.log(chalk.red(' ✗ mammoth not found - run: npm install'));
|
|
278
|
-
}
|
|
279
|
-
|
|
280
274
|
console.log('');
|
|
281
275
|
|
|
282
276
|
if (hasPandocInstalled && hasCrossref) {
|
|
@@ -484,6 +478,7 @@ export function register(program: Command, pkg?: { version?: string }): void {
|
|
|
484
478
|
.description('Build PDF/DOCX/TEX/PPTX/Beamer from sections')
|
|
485
479
|
.argument('[formats...]', 'Output formats: pdf, docx, tex, beamer, pptx, all', ['pdf', 'docx'])
|
|
486
480
|
.option('-d, --dir <directory>', 'Project directory', '.')
|
|
481
|
+
.option('-j, --journal <name>', 'Use journal profile for build formatting defaults')
|
|
487
482
|
.option('--no-crossref', 'Skip pandoc-crossref filter')
|
|
488
483
|
.option('--toc', 'Include table of contents')
|
|
489
484
|
.option('--show-changes', 'Export DOCX with visible track changes (audit mode)')
|
|
@@ -515,11 +510,34 @@ export function register(program: Command, pkg?: { version?: string }): void {
|
|
|
515
510
|
process.exit(1);
|
|
516
511
|
}
|
|
517
512
|
|
|
513
|
+
// Apply journal formatting from CLI flag (overrides rev.yaml journal field)
|
|
514
|
+
let journalName: string | undefined;
|
|
515
|
+
if (options.journal) {
|
|
516
|
+
const { getJournalProfile } = await import('../journals.js');
|
|
517
|
+
const { mergeJournalFormatting } = await import('../build.js');
|
|
518
|
+
const profile = getJournalProfile(options.journal);
|
|
519
|
+
if (!profile) {
|
|
520
|
+
console.error(fmt.status('error', `Unknown journal: ${options.journal}`));
|
|
521
|
+
console.error(chalk.dim('Use "rev validate --list" to see available profiles'));
|
|
522
|
+
process.exit(1);
|
|
523
|
+
}
|
|
524
|
+
journalName = profile.name;
|
|
525
|
+
if (profile.formatting) {
|
|
526
|
+
Object.assign(config, mergeJournalFormatting(config, profile.formatting, dir));
|
|
527
|
+
}
|
|
528
|
+
} else if ((config as unknown as { journal?: string }).journal) {
|
|
529
|
+
// Journal set in rev.yaml — already applied by loadConfig, just get name for display
|
|
530
|
+
const { getJournalProfile } = await import('../journals.js');
|
|
531
|
+
const profile = getJournalProfile((config as unknown as { journal?: string }).journal!);
|
|
532
|
+
if (profile) journalName = profile.name;
|
|
533
|
+
}
|
|
534
|
+
|
|
518
535
|
console.log(fmt.header(`Building ${config.title || 'document'}`));
|
|
519
536
|
console.log();
|
|
520
537
|
|
|
521
538
|
const targetFormats = formats.length > 0 ? formats : ['pdf', 'docx'];
|
|
522
539
|
const tocEnabled = options.toc || config.pdf?.toc || config.docx?.toc;
|
|
540
|
+
if (journalName) console.log(chalk.dim(` Journal: ${journalName}`));
|
|
523
541
|
console.log(chalk.dim(` Formats: ${targetFormats.join(', ')}`));
|
|
524
542
|
console.log(chalk.dim(` Crossref: ${hasPandocCrossref() && options.crossref !== false ? 'enabled' : 'disabled'}`));
|
|
525
543
|
if (tocEnabled) console.log(chalk.dim(` TOC: enabled`));
|
package/lib/commands/response.ts
CHANGED
|
@@ -33,6 +33,8 @@ interface ProfilesOptions {
|
|
|
33
33
|
new?: string;
|
|
34
34
|
project?: boolean;
|
|
35
35
|
dirs?: boolean;
|
|
36
|
+
fetchCsl?: string;
|
|
37
|
+
listCsl?: boolean;
|
|
36
38
|
}
|
|
37
39
|
|
|
38
40
|
interface AnonymizeOptions {
|
|
@@ -126,7 +128,9 @@ export function register(program: Command): void {
|
|
|
126
128
|
const custom = journals.filter(j => j.custom);
|
|
127
129
|
|
|
128
130
|
for (const j of builtIn) {
|
|
129
|
-
|
|
131
|
+
const profile = getJournalProfile(j.id);
|
|
132
|
+
const fmtTag = profile?.formatting ? chalk.green(' [formatting]') : '';
|
|
133
|
+
console.log(` ${chalk.bold(j.id)} - ${j.name}${fmtTag}`);
|
|
130
134
|
if (j.url) console.log(chalk.dim(` ${j.url}`));
|
|
131
135
|
}
|
|
132
136
|
|
|
@@ -134,13 +138,16 @@ export function register(program: Command): void {
|
|
|
134
138
|
console.log();
|
|
135
139
|
console.log(chalk.cyan(' Custom Profiles:'));
|
|
136
140
|
for (const j of custom) {
|
|
137
|
-
|
|
141
|
+
const profile = getJournalProfile(j.id);
|
|
142
|
+
const fmtTag = profile?.formatting ? chalk.green(' [formatting]') : '';
|
|
143
|
+
console.log(` ${chalk.bold(j.id)} - ${j.name} ${chalk.cyan('[custom]')}${fmtTag}`);
|
|
138
144
|
if (j.url) console.log(chalk.dim(` ${j.url}`));
|
|
139
145
|
}
|
|
140
146
|
}
|
|
141
147
|
|
|
142
148
|
console.log();
|
|
143
149
|
console.log(chalk.dim('Usage: rev validate --journal <name>'));
|
|
150
|
+
console.log(chalk.dim('Profiles with [formatting] can also be used with: rev build -j <name>'));
|
|
144
151
|
console.log(chalk.dim('Manage custom profiles: rev profiles'));
|
|
145
152
|
return;
|
|
146
153
|
}
|
|
@@ -224,6 +231,8 @@ export function register(program: Command): void {
|
|
|
224
231
|
.option('--new <name>', 'Create a new profile template')
|
|
225
232
|
.option('--project', 'Create profile in project directory (with --new)')
|
|
226
233
|
.option('--dirs', 'Show profile directory locations')
|
|
234
|
+
.option('--fetch-csl <name>', 'Download a CSL citation style to cache')
|
|
235
|
+
.option('--list-csl', 'List cached CSL citation styles')
|
|
227
236
|
.action(async (options: ProfilesOptions) => {
|
|
228
237
|
const {
|
|
229
238
|
listCustomProfiles,
|
|
@@ -232,6 +241,50 @@ export function register(program: Command): void {
|
|
|
232
241
|
} = await import('../plugins.js');
|
|
233
242
|
const { listJournals } = await import('../journals.js');
|
|
234
243
|
|
|
244
|
+
if (options.listCsl) {
|
|
245
|
+
const { listCachedCSL, getCSLCacheDir } = await import('../csl.js');
|
|
246
|
+
const cached = listCachedCSL();
|
|
247
|
+
console.log(fmt.header('Cached CSL Styles'));
|
|
248
|
+
console.log(chalk.dim(` ${getCSLCacheDir()}`));
|
|
249
|
+
console.log();
|
|
250
|
+
if (cached.length === 0) {
|
|
251
|
+
console.log(chalk.dim(' No cached styles. Download with: rev profiles --fetch-csl <name>'));
|
|
252
|
+
} else {
|
|
253
|
+
for (const c of cached) {
|
|
254
|
+
console.log(` ${chalk.bold(c.name)}`);
|
|
255
|
+
}
|
|
256
|
+
console.log();
|
|
257
|
+
console.log(chalk.dim(` ${cached.length} cached style(s)`));
|
|
258
|
+
}
|
|
259
|
+
return;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
if (options.fetchCsl) {
|
|
263
|
+
const { fetchCSL, resolveCSL, getCSLAliases } = await import('../csl.js');
|
|
264
|
+
|
|
265
|
+
// Check if already cached
|
|
266
|
+
const existing = resolveCSL(options.fetchCsl);
|
|
267
|
+
if (existing) {
|
|
268
|
+
console.log(fmt.status('info', `Already cached: ${existing}`));
|
|
269
|
+
return;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
const spin = fmt.spinner(`Downloading CSL style "${options.fetchCsl}"...`).start();
|
|
273
|
+
const result = await fetchCSL(options.fetchCsl);
|
|
274
|
+
spin.stop();
|
|
275
|
+
|
|
276
|
+
if (result) {
|
|
277
|
+
console.log(fmt.status('success', `Downloaded: ${result}`));
|
|
278
|
+
} else {
|
|
279
|
+
console.error(fmt.status('error', `Could not download CSL style "${options.fetchCsl}"`));
|
|
280
|
+
const aliases = getCSLAliases();
|
|
281
|
+
const names = Object.keys(aliases).sort().join(', ');
|
|
282
|
+
console.error(chalk.dim(` Known short names: ${names}`));
|
|
283
|
+
process.exit(1);
|
|
284
|
+
}
|
|
285
|
+
return;
|
|
286
|
+
}
|
|
287
|
+
|
|
235
288
|
if (options.dirs) {
|
|
236
289
|
const dirs = getPluginDirs();
|
|
237
290
|
console.log(fmt.header('Profile Directories'));
|
package/lib/commands/sections.ts
CHANGED
|
@@ -204,11 +204,10 @@ async function bootstrapFromWord(docx: string, options: BootstrapOptions): Promi
|
|
|
204
204
|
console.log(chalk.cyan(`Bootstrapping project from ${path.basename(docx)}...\n`));
|
|
205
205
|
|
|
206
206
|
try {
|
|
207
|
-
const
|
|
207
|
+
const { extractTextFromWord } = await import('../word.js');
|
|
208
208
|
const { default: YAML } = await import('yaml');
|
|
209
209
|
|
|
210
|
-
const
|
|
211
|
-
const text = result.value;
|
|
210
|
+
const text = await extractTextFromWord(docx);
|
|
212
211
|
|
|
213
212
|
const sections = detectSectionsFromWord(text);
|
|
214
213
|
|
|
@@ -328,6 +327,13 @@ export function register(program: Command): void {
|
|
|
328
327
|
|
|
329
328
|
console.log(chalk.cyan(`Comparing ${path.basename(docx)} against ${path.basename(original)}...`));
|
|
330
329
|
|
|
330
|
+
// Warn if pandoc is missing
|
|
331
|
+
const { hasPandoc: hasPandocImport, getInstallInstructions: getInstallImport } = await import('../dependencies.js');
|
|
332
|
+
if (!hasPandocImport()) {
|
|
333
|
+
console.log(chalk.yellow(`\n Warning: Pandoc not installed. Track changes extracted from XML (formatting may differ).`));
|
|
334
|
+
console.log(chalk.dim(` Install for best results: ${getInstallImport('pandoc')}\n`));
|
|
335
|
+
}
|
|
336
|
+
|
|
331
337
|
try {
|
|
332
338
|
const { importFromWord } = await import('../import.js');
|
|
333
339
|
const { annotated, stats } = await importFromWord(docx, original, {
|
|
@@ -386,14 +392,14 @@ export function register(program: Command): void {
|
|
|
386
392
|
}
|
|
387
393
|
|
|
388
394
|
try {
|
|
389
|
-
const
|
|
390
|
-
const
|
|
395
|
+
const { extractTextFromWord } = await import('../word.js');
|
|
396
|
+
const text = await extractTextFromWord(docx);
|
|
391
397
|
|
|
392
398
|
if (options.output) {
|
|
393
|
-
fs.writeFileSync(options.output,
|
|
399
|
+
fs.writeFileSync(options.output, text, 'utf-8');
|
|
394
400
|
console.error(chalk.green(`Extracted to ${options.output}`));
|
|
395
401
|
} else {
|
|
396
|
-
process.stdout.write(
|
|
402
|
+
process.stdout.write(text);
|
|
397
403
|
}
|
|
398
404
|
} catch (err) {
|
|
399
405
|
const error = err as Error;
|
|
@@ -564,6 +570,14 @@ export function register(program: Command): void {
|
|
|
564
570
|
process.exit(1);
|
|
565
571
|
}
|
|
566
572
|
|
|
573
|
+
// Check pandoc availability upfront and warn
|
|
574
|
+
const { hasPandoc, getInstallInstructions } = await import('../dependencies.js');
|
|
575
|
+
if (!hasPandoc()) {
|
|
576
|
+
console.log(fmt.status('warning', `Pandoc not installed. Track changes will be extracted from XML (formatting may differ).`));
|
|
577
|
+
console.log(chalk.dim(` Install for best results: ${getInstallInstructions('pandoc')}`));
|
|
578
|
+
console.log();
|
|
579
|
+
}
|
|
580
|
+
|
|
567
581
|
const spin = fmt.spinner(`Importing ${path.basename(docx)}...`).start();
|
|
568
582
|
|
|
569
583
|
try {
|
|
@@ -579,12 +593,20 @@ export function register(program: Command): void {
|
|
|
579
593
|
const comments = await extractWordComments(docx);
|
|
580
594
|
const { anchors, fullDocText: xmlDocText } = await extractCommentAnchors(docx);
|
|
581
595
|
|
|
582
|
-
//
|
|
583
|
-
// Mammoth only extracts plain text which loses all formatting
|
|
596
|
+
// Extract Word text (uses pandoc if available, falls back to XML extraction)
|
|
584
597
|
const wordExtraction = await extractFromWord(docx, { mediaDir: options.dir });
|
|
585
598
|
let wordText = wordExtraction.text;
|
|
586
599
|
const wordTables = wordExtraction.tables || [];
|
|
587
600
|
|
|
601
|
+
// Log extraction messages (warnings about pandoc, track change stats, etc.)
|
|
602
|
+
for (const msg of wordExtraction.messages || []) {
|
|
603
|
+
if (msg.type === 'warning') {
|
|
604
|
+
spin.stop();
|
|
605
|
+
console.log(fmt.status('warning', msg.message));
|
|
606
|
+
spin.start();
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
|
|
588
610
|
// Restore crossref on FULL text BEFORE splitting into sections
|
|
589
611
|
// This ensures duplicate labels from track changes are handled correctly
|
|
590
612
|
// (the same figure may appear multiple times in old/new versions)
|
package/lib/csl.ts
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CSL citation style resolution and caching
|
|
3
|
+
*
|
|
4
|
+
* Resolves short CSL names (e.g. "nature") to local file paths,
|
|
5
|
+
* downloading from the CSL repository if needed.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import * as fs from 'fs';
|
|
9
|
+
import * as path from 'path';
|
|
10
|
+
import * as os from 'os';
|
|
11
|
+
import * as https from 'https';
|
|
12
|
+
|
|
13
|
+
// =============================================================================
|
|
14
|
+
// Constants
|
|
15
|
+
// =============================================================================
|
|
16
|
+
|
|
17
|
+
/** Cache directory for downloaded CSL files */
|
|
18
|
+
const CSL_CACHE_DIR = path.join(os.homedir(), '.rev', 'csl');
|
|
19
|
+
|
|
20
|
+
/** GitHub raw URL for the CSL styles repository */
|
|
21
|
+
const CSL_REPO_BASE = 'https://raw.githubusercontent.com/citation-style-language/styles/master';
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Short name → CSL filename mapping for common styles.
|
|
25
|
+
* Names that match their filename exactly don't need an entry here.
|
|
26
|
+
*/
|
|
27
|
+
const CSL_ALIASES: Record<string, string> = {
|
|
28
|
+
'apa': 'apa',
|
|
29
|
+
'chicago': 'chicago-author-date',
|
|
30
|
+
'vancouver': 'vancouver',
|
|
31
|
+
'ieee': 'ieee',
|
|
32
|
+
'nature': 'nature',
|
|
33
|
+
'science': 'science',
|
|
34
|
+
'cell': 'cell',
|
|
35
|
+
'pnas': 'pnas',
|
|
36
|
+
'plos': 'plos',
|
|
37
|
+
'elife': 'elife',
|
|
38
|
+
'ecology-letters': 'ecology-letters',
|
|
39
|
+
'ecology': 'ecology',
|
|
40
|
+
'ama': 'american-medical-association',
|
|
41
|
+
'acs': 'american-chemical-society',
|
|
42
|
+
'rsc': 'royal-society-of-chemistry',
|
|
43
|
+
'harvard': 'harvard-cite-them-right',
|
|
44
|
+
'mla': 'modern-language-association',
|
|
45
|
+
'elsevier': 'elsevier-harvard',
|
|
46
|
+
'springer': 'springer-basic-author-date',
|
|
47
|
+
'biomed-central': 'biomed-central',
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
// =============================================================================
|
|
51
|
+
// Public API
|
|
52
|
+
// =============================================================================
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Get the CSL cache directory path
|
|
56
|
+
*/
|
|
57
|
+
export function getCSLCacheDir(): string {
|
|
58
|
+
return CSL_CACHE_DIR;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Resolve a CSL name or path to a local file path.
|
|
63
|
+
*
|
|
64
|
+
* Resolution order:
|
|
65
|
+
* 1. If it's an absolute path or relative path that exists, return it
|
|
66
|
+
* 2. Check project directory for <name>.csl
|
|
67
|
+
* 3. Check ~/.rev/csl/ cache
|
|
68
|
+
* 4. Return null (caller can then use fetchCSL to download)
|
|
69
|
+
*/
|
|
70
|
+
export function resolveCSL(nameOrPath: string, projectDir?: string): string | null {
|
|
71
|
+
// Already a file path that exists
|
|
72
|
+
if (path.isAbsolute(nameOrPath) && fs.existsSync(nameOrPath)) {
|
|
73
|
+
return nameOrPath;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Relative path in project directory
|
|
77
|
+
if (projectDir) {
|
|
78
|
+
const projectPath = path.join(projectDir, nameOrPath);
|
|
79
|
+
if (fs.existsSync(projectPath)) {
|
|
80
|
+
return projectPath;
|
|
81
|
+
}
|
|
82
|
+
// Try with .csl extension
|
|
83
|
+
const projectPathCsl = projectPath.endsWith('.csl') ? projectPath : `${projectPath}.csl`;
|
|
84
|
+
if (fs.existsSync(projectPathCsl)) {
|
|
85
|
+
return projectPathCsl;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Resolve short name to filename
|
|
90
|
+
const baseName = resolveCSLName(nameOrPath);
|
|
91
|
+
const fileName = baseName.endsWith('.csl') ? baseName : `${baseName}.csl`;
|
|
92
|
+
|
|
93
|
+
// Check cache
|
|
94
|
+
const cachePath = path.join(CSL_CACHE_DIR, fileName);
|
|
95
|
+
if (fs.existsSync(cachePath)) {
|
|
96
|
+
return cachePath;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return null;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Download a CSL style from the CSL repository to the local cache.
|
|
104
|
+
*
|
|
105
|
+
* @returns Path to the cached file, or null on failure
|
|
106
|
+
*/
|
|
107
|
+
export async function fetchCSL(name: string): Promise<string | null> {
|
|
108
|
+
const baseName = resolveCSLName(name);
|
|
109
|
+
const fileName = baseName.endsWith('.csl') ? baseName : `${baseName}.csl`;
|
|
110
|
+
const url = `${CSL_REPO_BASE}/${fileName}`;
|
|
111
|
+
const cachePath = path.join(CSL_CACHE_DIR, fileName);
|
|
112
|
+
|
|
113
|
+
// Ensure cache directory exists
|
|
114
|
+
if (!fs.existsSync(CSL_CACHE_DIR)) {
|
|
115
|
+
fs.mkdirSync(CSL_CACHE_DIR, { recursive: true });
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
try {
|
|
119
|
+
const content = await httpGet(url);
|
|
120
|
+
if (content) {
|
|
121
|
+
fs.writeFileSync(cachePath, content, 'utf-8');
|
|
122
|
+
return cachePath;
|
|
123
|
+
}
|
|
124
|
+
return null;
|
|
125
|
+
} catch {
|
|
126
|
+
return null;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* List all cached CSL files
|
|
132
|
+
*/
|
|
133
|
+
export function listCachedCSL(): Array<{ name: string; path: string }> {
|
|
134
|
+
if (!fs.existsSync(CSL_CACHE_DIR)) {
|
|
135
|
+
return [];
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return fs.readdirSync(CSL_CACHE_DIR)
|
|
139
|
+
.filter(f => f.endsWith('.csl'))
|
|
140
|
+
.sort()
|
|
141
|
+
.map(f => ({
|
|
142
|
+
name: path.basename(f, '.csl'),
|
|
143
|
+
path: path.join(CSL_CACHE_DIR, f),
|
|
144
|
+
}));
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Get the list of known CSL short name aliases
|
|
149
|
+
*/
|
|
150
|
+
export function getCSLAliases(): Record<string, string> {
|
|
151
|
+
return { ...CSL_ALIASES };
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// =============================================================================
|
|
155
|
+
// Internal helpers
|
|
156
|
+
// =============================================================================
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Resolve a short name to a CSL filename (without extension)
|
|
160
|
+
*/
|
|
161
|
+
function resolveCSLName(name: string): string {
|
|
162
|
+
const normalized = name.toLowerCase().replace(/\.csl$/, '');
|
|
163
|
+
return CSL_ALIASES[normalized] || normalized;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Simple HTTPS GET that follows redirects
|
|
168
|
+
*/
|
|
169
|
+
function httpGet(url: string, redirectCount = 0): Promise<string | null> {
|
|
170
|
+
if (redirectCount > 5) return Promise.resolve(null);
|
|
171
|
+
|
|
172
|
+
return new Promise((resolve) => {
|
|
173
|
+
https.get(url, (res) => {
|
|
174
|
+
// Follow redirects
|
|
175
|
+
if ((res.statusCode === 301 || res.statusCode === 302) && res.headers.location) {
|
|
176
|
+
resolve(httpGet(res.headers.location, redirectCount + 1));
|
|
177
|
+
return;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (res.statusCode !== 200) {
|
|
181
|
+
resolve(null);
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
let data = '';
|
|
186
|
+
res.on('data', chunk => { data += chunk; });
|
|
187
|
+
res.on('end', () => resolve(data));
|
|
188
|
+
res.on('error', () => resolve(null));
|
|
189
|
+
}).on('error', () => resolve(null));
|
|
190
|
+
});
|
|
191
|
+
}
|
package/lib/import.ts
CHANGED
|
@@ -743,13 +743,27 @@ export async function extractFromWord(
|
|
|
743
743
|
}
|
|
744
744
|
}
|
|
745
745
|
} catch (pandocErr: any) {
|
|
746
|
-
//
|
|
747
|
-
|
|
748
|
-
const
|
|
749
|
-
const
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
746
|
+
// Pandoc not available — use XML-based extraction with track change support
|
|
747
|
+
const { extractPlainTextWithTrackChanges } = await import('./word.js');
|
|
748
|
+
const { getInstallInstructions } = await import('./dependencies.js');
|
|
749
|
+
const installCmd = getInstallInstructions('pandoc');
|
|
750
|
+
|
|
751
|
+
const xmlResult = await extractPlainTextWithTrackChanges(docxPath);
|
|
752
|
+
text = xmlResult.text;
|
|
753
|
+
hasTrackChanges = xmlResult.hasTrackChanges;
|
|
754
|
+
trackChangeStats = xmlResult.stats;
|
|
755
|
+
|
|
756
|
+
if (hasTrackChanges) {
|
|
757
|
+
messages.push({
|
|
758
|
+
type: 'warning',
|
|
759
|
+
message: `Pandoc not installed. Using built-in XML extractor (${trackChangeStats.insertions} insertions, ${trackChangeStats.deletions} deletions preserved). Formatting may differ. Install pandoc for best results: ${installCmd}`
|
|
760
|
+
});
|
|
761
|
+
} else {
|
|
762
|
+
messages.push({
|
|
763
|
+
type: 'warning',
|
|
764
|
+
message: `Pandoc not installed. Using built-in XML extractor (no track changes found). Install pandoc for better formatting: ${installCmd}`
|
|
765
|
+
});
|
|
766
|
+
}
|
|
753
767
|
}
|
|
754
768
|
|
|
755
769
|
// Extract comments directly from docx XML
|
package/lib/journals.ts
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
import * as fs from 'fs';
|
|
7
7
|
import * as path from 'path';
|
|
8
|
-
import type { JournalProfile, JournalRequirements, ValidationResult } from './types.js';
|
|
8
|
+
import type { JournalProfile, JournalRequirements, JournalFormatting, ValidationResult } from './types.js';
|
|
9
9
|
import { loadCustomProfiles } from './plugins.js';
|
|
10
10
|
import { countWords } from './utils.js';
|
|
11
11
|
|
|
@@ -23,6 +23,10 @@ export const JOURNAL_PROFILES: Record<string, JournalProfile> = {
|
|
|
23
23
|
figures: { max: 6 },
|
|
24
24
|
sections: ['Abstract', 'Introduction', 'Results', 'Discussion', 'Methods'],
|
|
25
25
|
},
|
|
26
|
+
formatting: {
|
|
27
|
+
csl: 'nature',
|
|
28
|
+
pdf: { fontsize: '11pt', geometry: 'margin=2.5cm', linestretch: 2 },
|
|
29
|
+
},
|
|
26
30
|
},
|
|
27
31
|
|
|
28
32
|
science: {
|
|
@@ -34,6 +38,10 @@ export const JOURNAL_PROFILES: Record<string, JournalProfile> = {
|
|
|
34
38
|
figures: { max: 4 },
|
|
35
39
|
sections: ['Abstract', 'Introduction', 'Results', 'Discussion'],
|
|
36
40
|
},
|
|
41
|
+
formatting: {
|
|
42
|
+
csl: 'science',
|
|
43
|
+
pdf: { fontsize: '12pt', geometry: 'margin=1in', linestretch: 2 },
|
|
44
|
+
},
|
|
37
45
|
},
|
|
38
46
|
|
|
39
47
|
'plos-one': {
|
|
@@ -44,6 +52,10 @@ export const JOURNAL_PROFILES: Record<string, JournalProfile> = {
|
|
|
44
52
|
references: { doiRequired: false },
|
|
45
53
|
sections: ['Abstract', 'Introduction', 'Methods', 'Results', 'Discussion'],
|
|
46
54
|
},
|
|
55
|
+
formatting: {
|
|
56
|
+
csl: 'plos',
|
|
57
|
+
pdf: { fontsize: '12pt', geometry: 'margin=1in', linestretch: 2 },
|
|
58
|
+
},
|
|
47
59
|
},
|
|
48
60
|
|
|
49
61
|
'pnas': {
|
|
@@ -55,6 +67,10 @@ export const JOURNAL_PROFILES: Record<string, JournalProfile> = {
|
|
|
55
67
|
figures: { max: 6 },
|
|
56
68
|
sections: ['Abstract', 'Introduction', 'Results', 'Discussion'],
|
|
57
69
|
},
|
|
70
|
+
formatting: {
|
|
71
|
+
csl: 'pnas',
|
|
72
|
+
pdf: { documentclass: 'article', fontsize: '9pt', geometry: 'margin=2cm', linestretch: 1.2, numbersections: false },
|
|
73
|
+
},
|
|
58
74
|
},
|
|
59
75
|
|
|
60
76
|
'ecology-letters': {
|
|
@@ -97,6 +113,10 @@ export const JOURNAL_PROFILES: Record<string, JournalProfile> = {
|
|
|
97
113
|
references: { doiRequired: true },
|
|
98
114
|
sections: ['Abstract', 'Introduction', 'Results', 'Discussion', 'Methods'],
|
|
99
115
|
},
|
|
116
|
+
formatting: {
|
|
117
|
+
csl: 'elife',
|
|
118
|
+
pdf: { fontsize: '11pt', geometry: 'margin=2.5cm', linestretch: 1.5 },
|
|
119
|
+
},
|
|
100
120
|
},
|
|
101
121
|
|
|
102
122
|
'cell': {
|
|
@@ -108,6 +128,10 @@ export const JOURNAL_PROFILES: Record<string, JournalProfile> = {
|
|
|
108
128
|
figures: { max: 7 },
|
|
109
129
|
sections: ['Abstract', 'Introduction', 'Results', 'Discussion'],
|
|
110
130
|
},
|
|
131
|
+
formatting: {
|
|
132
|
+
csl: 'cell',
|
|
133
|
+
pdf: { fontsize: '12pt', geometry: 'margin=2.5cm', linestretch: 2 },
|
|
134
|
+
},
|
|
111
135
|
},
|
|
112
136
|
|
|
113
137
|
'current-biology': {
|