docrev 0.8.5 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/.gitattributes +1 -0
  2. package/README.md +25 -1
  3. package/dist/lib/annotations.d.ts.map +1 -1
  4. package/dist/lib/annotations.js +6 -0
  5. package/dist/lib/annotations.js.map +1 -1
  6. package/dist/lib/build.d.ts +6 -1
  7. package/dist/lib/build.d.ts.map +1 -1
  8. package/dist/lib/build.js +67 -1
  9. package/dist/lib/build.js.map +1 -1
  10. package/dist/lib/commands/build.d.ts.map +1 -1
  11. package/dist/lib/commands/build.js +26 -7
  12. package/dist/lib/commands/build.js.map +1 -1
  13. package/dist/lib/commands/response.d.ts.map +1 -1
  14. package/dist/lib/commands/response.js +50 -2
  15. package/dist/lib/commands/response.js.map +1 -1
  16. package/dist/lib/commands/sections.d.ts.map +1 -1
  17. package/dist/lib/commands/sections.js +28 -9
  18. package/dist/lib/commands/sections.js.map +1 -1
  19. package/dist/lib/csl.d.ts +38 -0
  20. package/dist/lib/csl.d.ts.map +1 -0
  21. package/dist/lib/csl.js +170 -0
  22. package/dist/lib/csl.js.map +1 -0
  23. package/dist/lib/import.d.ts.map +1 -1
  24. package/dist/lib/import.js +20 -7
  25. package/dist/lib/import.js.map +1 -1
  26. package/dist/lib/journals.d.ts.map +1 -1
  27. package/dist/lib/journals.js +24 -0
  28. package/dist/lib/journals.js.map +1 -1
  29. package/dist/lib/plugins.d.ts +11 -0
  30. package/dist/lib/plugins.d.ts.map +1 -1
  31. package/dist/lib/plugins.js +21 -1
  32. package/dist/lib/plugins.js.map +1 -1
  33. package/dist/lib/pptx-template.d.ts +17 -22
  34. package/dist/lib/pptx-template.d.ts.map +1 -1
  35. package/dist/lib/pptx-template.js +296 -552
  36. package/dist/lib/pptx-template.js.map +1 -1
  37. package/dist/lib/schema.d.ts.map +1 -1
  38. package/dist/lib/schema.js +4 -0
  39. package/dist/lib/schema.js.map +1 -1
  40. package/dist/lib/types.d.ts +19 -1
  41. package/dist/lib/types.d.ts.map +1 -1
  42. package/dist/lib/word.d.ts +24 -11
  43. package/dist/lib/word.d.ts.map +1 -1
  44. package/dist/lib/word.js +233 -32
  45. package/dist/lib/word.js.map +1 -1
  46. package/lib/annotations.ts +8 -0
  47. package/lib/build.ts +75 -2
  48. package/lib/commands/build.ts +25 -7
  49. package/lib/commands/response.ts +55 -2
  50. package/lib/commands/sections.ts +31 -9
  51. package/lib/csl.ts +191 -0
  52. package/lib/import.ts +21 -7
  53. package/lib/journals.ts +25 -1
  54. package/lib/plugins.ts +35 -1
  55. package/lib/pptx-template.ts +346 -502
  56. package/lib/schema.ts +4 -0
  57. package/lib/types.ts +20 -1
  58. package/lib/word.ts +253 -38
  59. package/package.json +1 -2
  60. package/lib/apply-buildup-colors.py +0 -88
@@ -281,6 +281,9 @@ export function stripAnnotations(text: string, options: StripOptions = {}): stri
281
281
  text = text.replace(PATTERNS.comment, '');
282
282
  }
283
283
 
284
+ // Strip pandoc highlight spans: [text]{.mark} → text
285
+ text = text.replace(/\[([^\]]*)\]\{\.mark\}/g, '$1');
286
+
284
287
  // Clean up partial/orphaned markers within the loop
285
288
  // This handles cases where nested annotations leave behind fragments
286
289
 
@@ -319,6 +322,11 @@ export function stripAnnotations(text: string, options: StripOptions = {}): stri
319
322
  text = text.replace(/\{~~/g, '');
320
323
  text = text.replace(/~>/g, '');
321
324
 
325
+ // Remove orphan [ from stripped {.mark} spans where the closing ]{.mark}
326
+ // was inside a comment. A [ is orphan if no matching ] follows before
327
+ // the next [ or end of line.
328
+ text = text.replace(/\[(?![^\[\]]*\])/g, '');
329
+
322
330
  return text;
323
331
  }
324
332
 
package/lib/build.ts CHANGED
@@ -21,7 +21,9 @@ import { getThemePath, getThemeNames, PPTX_THEMES } from './pptx-themes.js';
21
21
  import { runPostprocess } from './postprocess.js';
22
22
  import { hasPandoc, hasPandocCrossref, hasLatex } from './dependencies.js';
23
23
  import { buildImageRegistry, writeImageRegistry } from './image-registry.js';
24
- import type { Author } from './types.js';
24
+ import type { Author, JournalFormatting } from './types.js';
25
+ import { getJournalProfile } from './journals.js';
26
+ import { resolveCSL } from './csl.js';
25
27
 
26
28
  // =============================================================================
27
29
  // Constants
@@ -250,6 +252,69 @@ export const DEFAULT_CONFIG: BuildConfig = {
250
252
  // Public API
251
253
  // =============================================================================
252
254
 
255
+ /**
256
+ * Merge journal formatting defaults into a config.
257
+ * Priority: DEFAULT_CONFIG < journal formatting < rev.yaml explicit settings
258
+ */
259
+ export function mergeJournalFormatting(config: BuildConfig, formatting: JournalFormatting, directory: string): BuildConfig {
260
+ const merged = { ...config };
261
+
262
+ // CSL: only apply if user hasn't set one
263
+ if (formatting.csl && !config.csl) {
264
+ const resolved = resolveCSL(formatting.csl, directory);
265
+ if (resolved) {
266
+ merged.csl = resolved;
267
+ }
268
+ // If not resolved locally, store the name — pandoc --citeproc
269
+ // can sometimes resolve it, and the user can fetch with rev profiles --fetch-csl
270
+ if (!resolved) {
271
+ merged.csl = formatting.csl;
272
+ }
273
+ }
274
+
275
+ // PDF settings: merge only unset fields
276
+ if (formatting.pdf) {
277
+ const userPdf = config.pdf || {};
278
+ const defaults = DEFAULT_CONFIG.pdf;
279
+ merged.pdf = { ...config.pdf };
280
+ for (const [key, value] of Object.entries(formatting.pdf)) {
281
+ const k = key as keyof PdfConfig;
282
+ // Apply journal value only if user config matches the default (i.e., wasn't explicitly set)
283
+ if (value !== undefined && JSON.stringify(userPdf[k]) === JSON.stringify(defaults[k])) {
284
+ (merged.pdf as Record<string, unknown>)[k] = value;
285
+ }
286
+ }
287
+ }
288
+
289
+ // DOCX settings: merge only unset fields
290
+ if (formatting.docx) {
291
+ const userDocx = config.docx || {};
292
+ const defaults = DEFAULT_CONFIG.docx;
293
+ merged.docx = { ...config.docx };
294
+ for (const [key, value] of Object.entries(formatting.docx)) {
295
+ const k = key as keyof DocxConfig;
296
+ if (value !== undefined && JSON.stringify(userDocx[k]) === JSON.stringify(defaults[k])) {
297
+ (merged.docx as Record<string, unknown>)[k] = value;
298
+ }
299
+ }
300
+ }
301
+
302
+ // Crossref settings: merge only unset fields
303
+ if (formatting.crossref) {
304
+ const userCrossref = config.crossref || {};
305
+ const defaults = DEFAULT_CONFIG.crossref;
306
+ merged.crossref = { ...config.crossref };
307
+ for (const [key, value] of Object.entries(formatting.crossref)) {
308
+ const k = key as keyof CrossrefConfig;
309
+ if (value !== undefined && JSON.stringify(userCrossref[k]) === JSON.stringify(defaults[k])) {
310
+ (merged.crossref as Record<string, unknown>)[k] = value;
311
+ }
312
+ }
313
+ }
314
+
315
+ return merged;
316
+ }
317
+
253
318
  /**
254
319
  * Load rev.yaml config from directory
255
320
  * @param directory - Project directory path
@@ -273,7 +338,7 @@ export function loadConfig(directory: string): BuildConfig {
273
338
  const userConfig = YAML.parse(content) || {};
274
339
 
275
340
  // Deep merge with defaults
276
- const config: BuildConfig = {
341
+ let config: BuildConfig = {
277
342
  ...DEFAULT_CONFIG,
278
343
  ...userConfig,
279
344
  crossref: { ...DEFAULT_CONFIG.crossref, ...userConfig.crossref },
@@ -287,6 +352,14 @@ export function loadConfig(directory: string): BuildConfig {
287
352
  _configPath: configPath,
288
353
  };
289
354
 
355
+ // Apply journal formatting defaults (between DEFAULT_CONFIG and user settings)
356
+ if (userConfig.journal) {
357
+ const profile = getJournalProfile(userConfig.journal);
358
+ if (profile?.formatting) {
359
+ config = mergeJournalFormatting(config, profile.formatting, directory);
360
+ }
361
+ }
362
+
290
363
  return config;
291
364
  } catch (err) {
292
365
  const error = err as Error;
@@ -42,6 +42,7 @@ interface InstallOptions {
42
42
 
43
43
  interface BuildOptions {
44
44
  dir: string;
45
+ journal?: string;
45
46
  crossref?: boolean;
46
47
  toc?: boolean;
47
48
  showChanges?: boolean;
@@ -270,13 +271,6 @@ export function register(program: Command, pkg?: { version?: string }): void {
270
271
  console.log(chalk.yellow(' ✗ pandoc-crossref not found'));
271
272
  }
272
273
 
273
- try {
274
- await import('mammoth');
275
- console.log(chalk.green(' ✓ mammoth (Word parsing)'));
276
- } catch {
277
- console.log(chalk.red(' ✗ mammoth not found - run: npm install'));
278
- }
279
-
280
274
  console.log('');
281
275
 
282
276
  if (hasPandocInstalled && hasCrossref) {
@@ -484,6 +478,7 @@ export function register(program: Command, pkg?: { version?: string }): void {
484
478
  .description('Build PDF/DOCX/TEX/PPTX/Beamer from sections')
485
479
  .argument('[formats...]', 'Output formats: pdf, docx, tex, beamer, pptx, all', ['pdf', 'docx'])
486
480
  .option('-d, --dir <directory>', 'Project directory', '.')
481
+ .option('-j, --journal <name>', 'Use journal profile for build formatting defaults')
487
482
  .option('--no-crossref', 'Skip pandoc-crossref filter')
488
483
  .option('--toc', 'Include table of contents')
489
484
  .option('--show-changes', 'Export DOCX with visible track changes (audit mode)')
@@ -515,11 +510,34 @@ export function register(program: Command, pkg?: { version?: string }): void {
515
510
  process.exit(1);
516
511
  }
517
512
 
513
+ // Apply journal formatting from CLI flag (overrides rev.yaml journal field)
514
+ let journalName: string | undefined;
515
+ if (options.journal) {
516
+ const { getJournalProfile } = await import('../journals.js');
517
+ const { mergeJournalFormatting } = await import('../build.js');
518
+ const profile = getJournalProfile(options.journal);
519
+ if (!profile) {
520
+ console.error(fmt.status('error', `Unknown journal: ${options.journal}`));
521
+ console.error(chalk.dim('Use "rev validate --list" to see available profiles'));
522
+ process.exit(1);
523
+ }
524
+ journalName = profile.name;
525
+ if (profile.formatting) {
526
+ Object.assign(config, mergeJournalFormatting(config, profile.formatting, dir));
527
+ }
528
+ } else if ((config as unknown as { journal?: string }).journal) {
529
+ // Journal set in rev.yaml — already applied by loadConfig, just get name for display
530
+ const { getJournalProfile } = await import('../journals.js');
531
+ const profile = getJournalProfile((config as unknown as { journal?: string }).journal!);
532
+ if (profile) journalName = profile.name;
533
+ }
534
+
518
535
  console.log(fmt.header(`Building ${config.title || 'document'}`));
519
536
  console.log();
520
537
 
521
538
  const targetFormats = formats.length > 0 ? formats : ['pdf', 'docx'];
522
539
  const tocEnabled = options.toc || config.pdf?.toc || config.docx?.toc;
540
+ if (journalName) console.log(chalk.dim(` Journal: ${journalName}`));
523
541
  console.log(chalk.dim(` Formats: ${targetFormats.join(', ')}`));
524
542
  console.log(chalk.dim(` Crossref: ${hasPandocCrossref() && options.crossref !== false ? 'enabled' : 'disabled'}`));
525
543
  if (tocEnabled) console.log(chalk.dim(` TOC: enabled`));
@@ -33,6 +33,8 @@ interface ProfilesOptions {
33
33
  new?: string;
34
34
  project?: boolean;
35
35
  dirs?: boolean;
36
+ fetchCsl?: string;
37
+ listCsl?: boolean;
36
38
  }
37
39
 
38
40
  interface AnonymizeOptions {
@@ -126,7 +128,9 @@ export function register(program: Command): void {
126
128
  const custom = journals.filter(j => j.custom);
127
129
 
128
130
  for (const j of builtIn) {
129
- console.log(` ${chalk.bold(j.id)} - ${j.name}`);
131
+ const profile = getJournalProfile(j.id);
132
+ const fmtTag = profile?.formatting ? chalk.green(' [formatting]') : '';
133
+ console.log(` ${chalk.bold(j.id)} - ${j.name}${fmtTag}`);
130
134
  if (j.url) console.log(chalk.dim(` ${j.url}`));
131
135
  }
132
136
 
@@ -134,13 +138,16 @@ export function register(program: Command): void {
134
138
  console.log();
135
139
  console.log(chalk.cyan(' Custom Profiles:'));
136
140
  for (const j of custom) {
137
- console.log(` ${chalk.bold(j.id)} - ${j.name} ${chalk.cyan('[custom]')}`);
141
+ const profile = getJournalProfile(j.id);
142
+ const fmtTag = profile?.formatting ? chalk.green(' [formatting]') : '';
143
+ console.log(` ${chalk.bold(j.id)} - ${j.name} ${chalk.cyan('[custom]')}${fmtTag}`);
138
144
  if (j.url) console.log(chalk.dim(` ${j.url}`));
139
145
  }
140
146
  }
141
147
 
142
148
  console.log();
143
149
  console.log(chalk.dim('Usage: rev validate --journal <name>'));
150
+ console.log(chalk.dim('Profiles with [formatting] can also be used with: rev build -j <name>'));
144
151
  console.log(chalk.dim('Manage custom profiles: rev profiles'));
145
152
  return;
146
153
  }
@@ -224,6 +231,8 @@ export function register(program: Command): void {
224
231
  .option('--new <name>', 'Create a new profile template')
225
232
  .option('--project', 'Create profile in project directory (with --new)')
226
233
  .option('--dirs', 'Show profile directory locations')
234
+ .option('--fetch-csl <name>', 'Download a CSL citation style to cache')
235
+ .option('--list-csl', 'List cached CSL citation styles')
227
236
  .action(async (options: ProfilesOptions) => {
228
237
  const {
229
238
  listCustomProfiles,
@@ -232,6 +241,50 @@ export function register(program: Command): void {
232
241
  } = await import('../plugins.js');
233
242
  const { listJournals } = await import('../journals.js');
234
243
 
244
+ if (options.listCsl) {
245
+ const { listCachedCSL, getCSLCacheDir } = await import('../csl.js');
246
+ const cached = listCachedCSL();
247
+ console.log(fmt.header('Cached CSL Styles'));
248
+ console.log(chalk.dim(` ${getCSLCacheDir()}`));
249
+ console.log();
250
+ if (cached.length === 0) {
251
+ console.log(chalk.dim(' No cached styles. Download with: rev profiles --fetch-csl <name>'));
252
+ } else {
253
+ for (const c of cached) {
254
+ console.log(` ${chalk.bold(c.name)}`);
255
+ }
256
+ console.log();
257
+ console.log(chalk.dim(` ${cached.length} cached style(s)`));
258
+ }
259
+ return;
260
+ }
261
+
262
+ if (options.fetchCsl) {
263
+ const { fetchCSL, resolveCSL, getCSLAliases } = await import('../csl.js');
264
+
265
+ // Check if already cached
266
+ const existing = resolveCSL(options.fetchCsl);
267
+ if (existing) {
268
+ console.log(fmt.status('info', `Already cached: ${existing}`));
269
+ return;
270
+ }
271
+
272
+ const spin = fmt.spinner(`Downloading CSL style "${options.fetchCsl}"...`).start();
273
+ const result = await fetchCSL(options.fetchCsl);
274
+ spin.stop();
275
+
276
+ if (result) {
277
+ console.log(fmt.status('success', `Downloaded: ${result}`));
278
+ } else {
279
+ console.error(fmt.status('error', `Could not download CSL style "${options.fetchCsl}"`));
280
+ const aliases = getCSLAliases();
281
+ const names = Object.keys(aliases).sort().join(', ');
282
+ console.error(chalk.dim(` Known short names: ${names}`));
283
+ process.exit(1);
284
+ }
285
+ return;
286
+ }
287
+
235
288
  if (options.dirs) {
236
289
  const dirs = getPluginDirs();
237
290
  console.log(fmt.header('Profile Directories'));
@@ -204,11 +204,10 @@ async function bootstrapFromWord(docx: string, options: BootstrapOptions): Promi
204
204
  console.log(chalk.cyan(`Bootstrapping project from ${path.basename(docx)}...\n`));
205
205
 
206
206
  try {
207
- const mammoth = await import('mammoth');
207
+ const { extractTextFromWord } = await import('../word.js');
208
208
  const { default: YAML } = await import('yaml');
209
209
 
210
- const result = await mammoth.extractRawText({ path: docx });
211
- const text = result.value;
210
+ const text = await extractTextFromWord(docx);
212
211
 
213
212
  const sections = detectSectionsFromWord(text);
214
213
 
@@ -328,6 +327,13 @@ export function register(program: Command): void {
328
327
 
329
328
  console.log(chalk.cyan(`Comparing ${path.basename(docx)} against ${path.basename(original)}...`));
330
329
 
330
+ // Warn if pandoc is missing
331
+ const { hasPandoc: hasPandocImport, getInstallInstructions: getInstallImport } = await import('../dependencies.js');
332
+ if (!hasPandocImport()) {
333
+ console.log(chalk.yellow(`\n Warning: Pandoc not installed. Track changes extracted from XML (formatting may differ).`));
334
+ console.log(chalk.dim(` Install for best results: ${getInstallImport('pandoc')}\n`));
335
+ }
336
+
331
337
  try {
332
338
  const { importFromWord } = await import('../import.js');
333
339
  const { annotated, stats } = await importFromWord(docx, original, {
@@ -386,14 +392,14 @@ export function register(program: Command): void {
386
392
  }
387
393
 
388
394
  try {
389
- const mammoth = await import('mammoth');
390
- const result = await mammoth.extractRawText({ path: docx });
395
+ const { extractTextFromWord } = await import('../word.js');
396
+ const text = await extractTextFromWord(docx);
391
397
 
392
398
  if (options.output) {
393
- fs.writeFileSync(options.output, result.value, 'utf-8');
399
+ fs.writeFileSync(options.output, text, 'utf-8');
394
400
  console.error(chalk.green(`Extracted to ${options.output}`));
395
401
  } else {
396
- process.stdout.write(result.value);
402
+ process.stdout.write(text);
397
403
  }
398
404
  } catch (err) {
399
405
  const error = err as Error;
@@ -564,6 +570,14 @@ export function register(program: Command): void {
564
570
  process.exit(1);
565
571
  }
566
572
 
573
+ // Check pandoc availability upfront and warn
574
+ const { hasPandoc, getInstallInstructions } = await import('../dependencies.js');
575
+ if (!hasPandoc()) {
576
+ console.log(fmt.status('warning', `Pandoc not installed. Track changes will be extracted from XML (formatting may differ).`));
577
+ console.log(chalk.dim(` Install for best results: ${getInstallInstructions('pandoc')}`));
578
+ console.log();
579
+ }
580
+
567
581
  const spin = fmt.spinner(`Importing ${path.basename(docx)}...`).start();
568
582
 
569
583
  try {
@@ -579,12 +593,20 @@ export function register(program: Command): void {
579
593
  const comments = await extractWordComments(docx);
580
594
  const { anchors, fullDocText: xmlDocText } = await extractCommentAnchors(docx);
581
595
 
582
- // Use pandoc for extraction to preserve markdown formatting (bold, tables, etc.)
583
- // Mammoth only extracts plain text which loses all formatting
596
+ // Extract Word text (uses pandoc if available, falls back to XML extraction)
584
597
  const wordExtraction = await extractFromWord(docx, { mediaDir: options.dir });
585
598
  let wordText = wordExtraction.text;
586
599
  const wordTables = wordExtraction.tables || [];
587
600
 
601
+ // Log extraction messages (warnings about pandoc, track change stats, etc.)
602
+ for (const msg of wordExtraction.messages || []) {
603
+ if (msg.type === 'warning') {
604
+ spin.stop();
605
+ console.log(fmt.status('warning', msg.message));
606
+ spin.start();
607
+ }
608
+ }
609
+
588
610
  // Restore crossref on FULL text BEFORE splitting into sections
589
611
  // This ensures duplicate labels from track changes are handled correctly
590
612
  // (the same figure may appear multiple times in old/new versions)
package/lib/csl.ts ADDED
@@ -0,0 +1,191 @@
1
+ /**
2
+ * CSL citation style resolution and caching
3
+ *
4
+ * Resolves short CSL names (e.g. "nature") to local file paths,
5
+ * downloading from the CSL repository if needed.
6
+ */
7
+
8
+ import * as fs from 'fs';
9
+ import * as path from 'path';
10
+ import * as os from 'os';
11
+ import * as https from 'https';
12
+
13
+ // =============================================================================
14
+ // Constants
15
+ // =============================================================================
16
+
17
+ /** Cache directory for downloaded CSL files */
18
+ const CSL_CACHE_DIR = path.join(os.homedir(), '.rev', 'csl');
19
+
20
+ /** GitHub raw URL for the CSL styles repository */
21
+ const CSL_REPO_BASE = 'https://raw.githubusercontent.com/citation-style-language/styles/master';
22
+
23
+ /**
24
+ * Short name → CSL filename mapping for common styles.
25
+ * Names that match their filename exactly don't need an entry here.
26
+ */
27
+ const CSL_ALIASES: Record<string, string> = {
28
+ 'apa': 'apa',
29
+ 'chicago': 'chicago-author-date',
30
+ 'vancouver': 'vancouver',
31
+ 'ieee': 'ieee',
32
+ 'nature': 'nature',
33
+ 'science': 'science',
34
+ 'cell': 'cell',
35
+ 'pnas': 'pnas',
36
+ 'plos': 'plos',
37
+ 'elife': 'elife',
38
+ 'ecology-letters': 'ecology-letters',
39
+ 'ecology': 'ecology',
40
+ 'ama': 'american-medical-association',
41
+ 'acs': 'american-chemical-society',
42
+ 'rsc': 'royal-society-of-chemistry',
43
+ 'harvard': 'harvard-cite-them-right',
44
+ 'mla': 'modern-language-association',
45
+ 'elsevier': 'elsevier-harvard',
46
+ 'springer': 'springer-basic-author-date',
47
+ 'biomed-central': 'biomed-central',
48
+ };
49
+
50
+ // =============================================================================
51
+ // Public API
52
+ // =============================================================================
53
+
54
+ /**
55
+ * Get the CSL cache directory path
56
+ */
57
+ export function getCSLCacheDir(): string {
58
+ return CSL_CACHE_DIR;
59
+ }
60
+
61
+ /**
62
+ * Resolve a CSL name or path to a local file path.
63
+ *
64
+ * Resolution order:
65
+ * 1. If it's an absolute path or relative path that exists, return it
66
+ * 2. Check project directory for <name>.csl
67
+ * 3. Check ~/.rev/csl/ cache
68
+ * 4. Return null (caller can then use fetchCSL to download)
69
+ */
70
+ export function resolveCSL(nameOrPath: string, projectDir?: string): string | null {
71
+ // Already a file path that exists
72
+ if (path.isAbsolute(nameOrPath) && fs.existsSync(nameOrPath)) {
73
+ return nameOrPath;
74
+ }
75
+
76
+ // Relative path in project directory
77
+ if (projectDir) {
78
+ const projectPath = path.join(projectDir, nameOrPath);
79
+ if (fs.existsSync(projectPath)) {
80
+ return projectPath;
81
+ }
82
+ // Try with .csl extension
83
+ const projectPathCsl = projectPath.endsWith('.csl') ? projectPath : `${projectPath}.csl`;
84
+ if (fs.existsSync(projectPathCsl)) {
85
+ return projectPathCsl;
86
+ }
87
+ }
88
+
89
+ // Resolve short name to filename
90
+ const baseName = resolveCSLName(nameOrPath);
91
+ const fileName = baseName.endsWith('.csl') ? baseName : `${baseName}.csl`;
92
+
93
+ // Check cache
94
+ const cachePath = path.join(CSL_CACHE_DIR, fileName);
95
+ if (fs.existsSync(cachePath)) {
96
+ return cachePath;
97
+ }
98
+
99
+ return null;
100
+ }
101
+
102
+ /**
103
+ * Download a CSL style from the CSL repository to the local cache.
104
+ *
105
+ * @returns Path to the cached file, or null on failure
106
+ */
107
+ export async function fetchCSL(name: string): Promise<string | null> {
108
+ const baseName = resolveCSLName(name);
109
+ const fileName = baseName.endsWith('.csl') ? baseName : `${baseName}.csl`;
110
+ const url = `${CSL_REPO_BASE}/${fileName}`;
111
+ const cachePath = path.join(CSL_CACHE_DIR, fileName);
112
+
113
+ // Ensure cache directory exists
114
+ if (!fs.existsSync(CSL_CACHE_DIR)) {
115
+ fs.mkdirSync(CSL_CACHE_DIR, { recursive: true });
116
+ }
117
+
118
+ try {
119
+ const content = await httpGet(url);
120
+ if (content) {
121
+ fs.writeFileSync(cachePath, content, 'utf-8');
122
+ return cachePath;
123
+ }
124
+ return null;
125
+ } catch {
126
+ return null;
127
+ }
128
+ }
129
+
130
+ /**
131
+ * List all cached CSL files
132
+ */
133
+ export function listCachedCSL(): Array<{ name: string; path: string }> {
134
+ if (!fs.existsSync(CSL_CACHE_DIR)) {
135
+ return [];
136
+ }
137
+
138
+ return fs.readdirSync(CSL_CACHE_DIR)
139
+ .filter(f => f.endsWith('.csl'))
140
+ .sort()
141
+ .map(f => ({
142
+ name: path.basename(f, '.csl'),
143
+ path: path.join(CSL_CACHE_DIR, f),
144
+ }));
145
+ }
146
+
147
+ /**
148
+ * Get the list of known CSL short name aliases
149
+ */
150
+ export function getCSLAliases(): Record<string, string> {
151
+ return { ...CSL_ALIASES };
152
+ }
153
+
154
+ // =============================================================================
155
+ // Internal helpers
156
+ // =============================================================================
157
+
158
+ /**
159
+ * Resolve a short name to a CSL filename (without extension)
160
+ */
161
+ function resolveCSLName(name: string): string {
162
+ const normalized = name.toLowerCase().replace(/\.csl$/, '');
163
+ return CSL_ALIASES[normalized] || normalized;
164
+ }
165
+
166
+ /**
167
+ * Simple HTTPS GET that follows redirects
168
+ */
169
+ function httpGet(url: string, redirectCount = 0): Promise<string | null> {
170
+ if (redirectCount > 5) return Promise.resolve(null);
171
+
172
+ return new Promise((resolve) => {
173
+ https.get(url, (res) => {
174
+ // Follow redirects
175
+ if ((res.statusCode === 301 || res.statusCode === 302) && res.headers.location) {
176
+ resolve(httpGet(res.headers.location, redirectCount + 1));
177
+ return;
178
+ }
179
+
180
+ if (res.statusCode !== 200) {
181
+ resolve(null);
182
+ return;
183
+ }
184
+
185
+ let data = '';
186
+ res.on('data', chunk => { data += chunk; });
187
+ res.on('end', () => resolve(data));
188
+ res.on('error', () => resolve(null));
189
+ }).on('error', () => resolve(null));
190
+ });
191
+ }
package/lib/import.ts CHANGED
@@ -743,13 +743,27 @@ export async function extractFromWord(
743
743
  }
744
744
  }
745
745
  } catch (pandocErr: any) {
746
- // Fall back to mammoth if pandoc fails
747
- messages.push({ type: 'warning', message: 'Pandoc failed, using mammoth (equations and images may not be preserved)' });
748
- const mammoth = await import('mammoth');
749
- const textResult = await mammoth.extractRawText({ path: docxPath });
750
- const htmlResult = await mammoth.convertToHtml({ path: docxPath });
751
- text = textResult.value;
752
- messages = [...textResult.messages, ...htmlResult.messages].map(m => ({ type: 'warning' as const, message: String(m) }));
746
+ // Pandoc not available use XML-based extraction with track change support
747
+ const { extractPlainTextWithTrackChanges } = await import('./word.js');
748
+ const { getInstallInstructions } = await import('./dependencies.js');
749
+ const installCmd = getInstallInstructions('pandoc');
750
+
751
+ const xmlResult = await extractPlainTextWithTrackChanges(docxPath);
752
+ text = xmlResult.text;
753
+ hasTrackChanges = xmlResult.hasTrackChanges;
754
+ trackChangeStats = xmlResult.stats;
755
+
756
+ if (hasTrackChanges) {
757
+ messages.push({
758
+ type: 'warning',
759
+ message: `Pandoc not installed. Using built-in XML extractor (${trackChangeStats.insertions} insertions, ${trackChangeStats.deletions} deletions preserved). Formatting may differ. Install pandoc for best results: ${installCmd}`
760
+ });
761
+ } else {
762
+ messages.push({
763
+ type: 'warning',
764
+ message: `Pandoc not installed. Using built-in XML extractor (no track changes found). Install pandoc for better formatting: ${installCmd}`
765
+ });
766
+ }
753
767
  }
754
768
 
755
769
  // Extract comments directly from docx XML
package/lib/journals.ts CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  import * as fs from 'fs';
7
7
  import * as path from 'path';
8
- import type { JournalProfile, JournalRequirements, ValidationResult } from './types.js';
8
+ import type { JournalProfile, JournalRequirements, JournalFormatting, ValidationResult } from './types.js';
9
9
  import { loadCustomProfiles } from './plugins.js';
10
10
  import { countWords } from './utils.js';
11
11
 
@@ -23,6 +23,10 @@ export const JOURNAL_PROFILES: Record<string, JournalProfile> = {
23
23
  figures: { max: 6 },
24
24
  sections: ['Abstract', 'Introduction', 'Results', 'Discussion', 'Methods'],
25
25
  },
26
+ formatting: {
27
+ csl: 'nature',
28
+ pdf: { fontsize: '11pt', geometry: 'margin=2.5cm', linestretch: 2 },
29
+ },
26
30
  },
27
31
 
28
32
  science: {
@@ -34,6 +38,10 @@ export const JOURNAL_PROFILES: Record<string, JournalProfile> = {
34
38
  figures: { max: 4 },
35
39
  sections: ['Abstract', 'Introduction', 'Results', 'Discussion'],
36
40
  },
41
+ formatting: {
42
+ csl: 'science',
43
+ pdf: { fontsize: '12pt', geometry: 'margin=1in', linestretch: 2 },
44
+ },
37
45
  },
38
46
 
39
47
  'plos-one': {
@@ -44,6 +52,10 @@ export const JOURNAL_PROFILES: Record<string, JournalProfile> = {
44
52
  references: { doiRequired: false },
45
53
  sections: ['Abstract', 'Introduction', 'Methods', 'Results', 'Discussion'],
46
54
  },
55
+ formatting: {
56
+ csl: 'plos',
57
+ pdf: { fontsize: '12pt', geometry: 'margin=1in', linestretch: 2 },
58
+ },
47
59
  },
48
60
 
49
61
  'pnas': {
@@ -55,6 +67,10 @@ export const JOURNAL_PROFILES: Record<string, JournalProfile> = {
55
67
  figures: { max: 6 },
56
68
  sections: ['Abstract', 'Introduction', 'Results', 'Discussion'],
57
69
  },
70
+ formatting: {
71
+ csl: 'pnas',
72
+ pdf: { documentclass: 'article', fontsize: '9pt', geometry: 'margin=2cm', linestretch: 1.2, numbersections: false },
73
+ },
58
74
  },
59
75
 
60
76
  'ecology-letters': {
@@ -97,6 +113,10 @@ export const JOURNAL_PROFILES: Record<string, JournalProfile> = {
97
113
  references: { doiRequired: true },
98
114
  sections: ['Abstract', 'Introduction', 'Results', 'Discussion', 'Methods'],
99
115
  },
116
+ formatting: {
117
+ csl: 'elife',
118
+ pdf: { fontsize: '11pt', geometry: 'margin=2.5cm', linestretch: 1.5 },
119
+ },
100
120
  },
101
121
 
102
122
  'cell': {
@@ -108,6 +128,10 @@ export const JOURNAL_PROFILES: Record<string, JournalProfile> = {
108
128
  figures: { max: 7 },
109
129
  sections: ['Abstract', 'Introduction', 'Results', 'Discussion'],
110
130
  },
131
+ formatting: {
132
+ csl: 'cell',
133
+ pdf: { fontsize: '12pt', geometry: 'margin=2.5cm', linestretch: 2 },
134
+ },
111
135
  },
112
136
 
113
137
  'current-biology': {