openrxiv 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/dist/cli/src/api/api-client.d.ts +96 -0
  2. package/dist/cli/src/api/api-client.d.ts.map +1 -0
  3. package/dist/cli/src/api/api-client.js +257 -0
  4. package/dist/cli/src/aws/bucket-explorer.d.ts +26 -0
  5. package/dist/cli/src/aws/bucket-explorer.d.ts.map +1 -0
  6. package/dist/cli/src/aws/bucket-explorer.js +220 -0
  7. package/dist/cli/src/aws/config.d.ts +18 -0
  8. package/dist/cli/src/aws/config.d.ts.map +1 -0
  9. package/dist/cli/src/aws/config.js +191 -0
  10. package/dist/cli/src/aws/downloader.d.ts +13 -0
  11. package/dist/cli/src/aws/downloader.d.ts.map +1 -0
  12. package/dist/cli/src/aws/downloader.js +115 -0
  13. package/dist/cli/src/aws/month-lister.d.ts +18 -0
  14. package/dist/cli/src/aws/month-lister.d.ts.map +1 -0
  15. package/dist/cli/src/aws/month-lister.js +90 -0
  16. package/dist/cli/src/commands/batch-process.d.ts +3 -0
  17. package/dist/cli/src/commands/batch-process.d.ts.map +1 -0
  18. package/dist/cli/src/commands/batch-process.js +557 -0
  19. package/dist/cli/src/commands/config.d.ts +3 -0
  20. package/dist/cli/src/commands/config.d.ts.map +1 -0
  21. package/dist/cli/src/commands/config.js +42 -0
  22. package/dist/cli/src/commands/download.d.ts +3 -0
  23. package/dist/cli/src/commands/download.d.ts.map +1 -0
  24. package/dist/cli/src/commands/download.js +76 -0
  25. package/dist/cli/src/commands/list.d.ts +3 -0
  26. package/dist/cli/src/commands/list.d.ts.map +1 -0
  27. package/dist/cli/src/commands/list.js +18 -0
  28. package/dist/cli/src/commands/month-info.d.ts +3 -0
  29. package/dist/cli/src/commands/month-info.d.ts.map +1 -0
  30. package/dist/cli/src/commands/month-info.js +213 -0
  31. package/dist/cli/src/commands/summary.d.ts +3 -0
  32. package/dist/cli/src/commands/summary.d.ts.map +1 -0
  33. package/dist/cli/src/commands/summary.js +249 -0
  34. package/dist/cli/src/index.d.ts +3 -0
  35. package/dist/cli/src/index.d.ts.map +1 -0
  36. package/dist/cli/src/index.js +35 -0
  37. package/dist/cli/src/utils/batches.d.ts +9 -0
  38. package/dist/cli/src/utils/batches.d.ts.map +1 -0
  39. package/dist/cli/src/utils/batches.js +61 -0
  40. package/dist/cli/src/utils/batches.test.d.ts +2 -0
  41. package/dist/cli/src/utils/batches.test.d.ts.map +1 -0
  42. package/dist/cli/src/utils/batches.test.js +119 -0
  43. package/dist/cli/src/utils/default-server.d.ts +3 -0
  44. package/dist/cli/src/utils/default-server.d.ts.map +1 -0
  45. package/dist/cli/src/utils/default-server.js +20 -0
  46. package/dist/cli/src/utils/index.d.ts +5 -0
  47. package/dist/cli/src/utils/index.d.ts.map +1 -0
  48. package/dist/cli/src/utils/index.js +5 -0
  49. package/dist/cli/src/utils/meca-processor.d.ts +28 -0
  50. package/dist/cli/src/utils/meca-processor.d.ts.map +1 -0
  51. package/dist/cli/src/utils/meca-processor.js +503 -0
  52. package/dist/cli/src/utils/meca-processor.test.d.ts +2 -0
  53. package/dist/cli/src/utils/meca-processor.test.d.ts.map +1 -0
  54. package/dist/cli/src/utils/meca-processor.test.js +123 -0
  55. package/dist/cli/src/utils/months.d.ts +36 -0
  56. package/dist/cli/src/utils/months.d.ts.map +1 -0
  57. package/dist/cli/src/utils/months.js +135 -0
  58. package/dist/cli/src/utils/months.test.d.ts +2 -0
  59. package/dist/cli/src/utils/months.test.d.ts.map +1 -0
  60. package/dist/cli/src/utils/months.test.js +209 -0
  61. package/dist/cli/src/utils/requester-pays-error.d.ts +6 -0
  62. package/dist/cli/src/utils/requester-pays-error.d.ts.map +1 -0
  63. package/dist/cli/src/utils/requester-pays-error.js +20 -0
  64. package/dist/cli/src/version.d.ts +3 -0
  65. package/dist/cli/src/version.d.ts.map +1 -0
  66. package/dist/cli/src/version.js +2 -0
  67. package/dist/cli.cjs +98815 -0
  68. package/dist/utils/src/biorxiv-parser.d.ts +51 -0
  69. package/dist/utils/src/biorxiv-parser.d.ts.map +1 -0
  70. package/dist/utils/src/biorxiv-parser.js +126 -0
  71. package/dist/utils/src/folder-structure.d.ts +44 -0
  72. package/dist/utils/src/folder-structure.d.ts.map +1 -0
  73. package/dist/utils/src/folder-structure.js +207 -0
  74. package/dist/utils/src/index.d.ts +3 -0
  75. package/dist/utils/src/index.d.ts.map +1 -0
  76. package/dist/utils/src/index.js +3 -0
  77. package/package.json +76 -0
@@ -0,0 +1,76 @@
1
+ import { Command } from 'commander';
2
+ import axios from 'axios';
3
+ import { downloadFile } from '../aws/downloader.js';
4
+ import { setGlobalRequesterPays } from '../aws/config.js';
5
+ import { displayRequesterPaysError } from '../utils/requester-pays-error.js';
6
+ export const downloadCommand = new Command('download')
7
+ .description('Download MECA files from the bioRxiv/medRxiv S3 bucket by DOI')
8
+ .argument('<doi>', 'DOI of the paper (e.g., "10.1101/2024.01.15.123456")')
9
+ .option('-o, --output <dir>', 'Output directory for downloaded files', './downloads')
10
+ .option('-a, --api-url <url>', 'API base URL', 'https://biorxiv.curvenote.dev')
11
+ .option('--requester-pays', 'Enable requester-pays for S3 bucket access')
12
+ .action(async (doi, options) => {
13
+ var _a, _b, _c;
14
+ try {
15
+ // Validate DOI format
16
+ if (!doi.includes('/')) {
17
+ console.error('❌ Invalid DOI format. Expected format: 10.1101/2024.01.15.123456');
18
+ process.exit(1);
19
+ }
20
+ // Split DOI into prefix and suffix
21
+ const [doiPrefix, doiSuffix] = doi.split('/', 2);
22
+ console.log(`🔍 Looking up paper with DOI: ${doi}`);
23
+ console.log(`📡 API URL: ${options.apiUrl}`);
24
+ // Look up the paper in the API
25
+ const response = await axios.get(`${options.apiUrl}/v1/works/${doiPrefix}/${doiSuffix}`);
26
+ if (!response.data || !response.data.s3Key) {
27
+ console.error('❌ Paper not found or no S3 key available');
28
+ process.exit(1);
29
+ }
30
+ const paper = response.data;
31
+ console.log(`📄 Found paper: ${paper.title || 'Unknown title'}`);
32
+ console.log(`📦 S3 Key: ${paper.s3Key}`);
33
+ // Set requester-pays if flag is provided
34
+ if (options.requesterPays) {
35
+ setGlobalRequesterPays(true);
36
+ console.log(`💰 Requester-pays enabled for S3 access`);
37
+ }
38
+ // Create a filesystem-safe filename from the DOI
39
+ const safeDoi = doi.replace(/[^a-zA-Z0-9.-]/g, '_');
40
+ const filename = `${safeDoi}.meca`;
41
+ console.log(`📥 Downloading MECA file as: ${filename}`);
42
+ // Download the file using the S3 key from the API
43
+ try {
44
+ await downloadFile(paper.s3Key, { ...options, filename });
45
+ console.log(`✅ Successfully downloaded MECA file for DOI: ${doi}`);
46
+ }
47
+ catch (downloadError) {
48
+ // Check if it's a requester-pays related error
49
+ const errorMessage = downloadError instanceof Error ? downloadError.message : String(downloadError);
50
+ if (errorMessage.includes('UnknownError') || errorMessage.includes('AccessDenied')) {
51
+ displayRequesterPaysError();
52
+ }
53
+ else {
54
+ console.error('❌ Download failed:', errorMessage);
55
+ }
56
+ process.exit(1);
57
+ }
58
+ }
59
+ catch (error) {
60
+ if (axios.isAxiosError(error)) {
61
+ if (((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 404) {
62
+ console.error('❌ Article not found with the specified DOI');
63
+ }
64
+ else if (((_b = error.response) === null || _b === void 0 ? void 0 : _b.status) === 401) {
65
+ console.error('❌ Authentication failed. Please check your API key');
66
+ }
67
+ else {
68
+ console.error('❌ API error:', ((_c = error.response) === null || _c === void 0 ? void 0 : _c.data) || error.message);
69
+ }
70
+ }
71
+ else {
72
+ console.error('❌ Error looking up paper:', error);
73
+ }
74
+ process.exit(1);
75
+ }
76
+ });
@@ -0,0 +1,3 @@
1
+ import { Command } from 'commander';
2
+ export declare const listCommand: Command;
3
+ //# sourceMappingURL=list.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"list.d.ts","sourceRoot":"","sources":["../../../../src/commands/list.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAIpC,eAAO,MAAM,WAAW,SAapB,CAAC"}
@@ -0,0 +1,18 @@
1
+ import { Command } from 'commander';
2
+ import { listBucketContent } from '../aws/bucket-explorer.js';
3
+ import { getDefaultServer } from '../utils/index.js';
4
+ export const listCommand = new Command('list')
5
+ .description('List available content in the bioRxiv or medRxiv S3 bucket')
6
+ .option('-m, --month <month>', 'Filter by specific month (e.g., "2024-01")')
7
+ .option('-b, --batch <batch>', 'Filter by specific batch (e.g., "Batch_01")')
8
+ .option('-l, --limit <number>', 'Limit the number of results', '50')
9
+ .option('-s, --server <server>', 'Server to use: "biorxiv" or "medrxiv"', getDefaultServer())
10
+ .action(async (options) => {
11
+ try {
12
+ await listBucketContent(options);
13
+ }
14
+ catch (error) {
15
+ console.error('Error listing content:', error);
16
+ process.exit(1);
17
+ }
18
+ });
@@ -0,0 +1,3 @@
1
+ import { Command } from 'commander';
2
+ export declare const monthInfoCommand: Command;
3
+ //# sourceMappingURL=month-info.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"month-info.d.ts","sourceRoot":"","sources":["../../../../src/commands/month-info.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAQpC,eAAO,MAAM,gBAAgB,SAczB,CAAC"}
@@ -0,0 +1,213 @@
1
+ import { Command } from 'commander';
2
+ import { ListObjectsV2Command } from '@aws-sdk/client-s3';
3
+ import chalk from 'chalk';
4
+ import { getS3Client } from '../aws/config.js';
5
+ import { getFolderStructure } from 'biorxiv-utils';
6
+ import { getBucketName } from '../aws/bucket-explorer.js';
7
+ import { getDefaultServer } from '../utils/index.js';
8
+ export const monthInfoCommand = new Command('month-info')
9
+ .description('List detailed metadata for all files in a specific month or batch from bioRxiv or medRxiv')
10
+ .option('-m, --month <month>', 'Month to list (e.g., "January_2024" or "2024-01")')
11
+ .option('-b, --batch <batch>', 'Batch to list (e.g., "1", "batch-1", "Batch_01")')
12
+ .option('-s, --server <server>', 'Server to use: "biorxiv" or "medrxiv"', getDefaultServer())
13
+ .action(async (options) => {
14
+ try {
15
+ await listMonthMetadata(options);
16
+ }
17
+ catch (error) {
18
+ console.error('Error listing month metadata:', error);
19
+ process.exit(1);
20
+ }
21
+ });
22
+ async function listMonthMetadata(options) {
23
+ const client = await getS3Client();
24
+ const { month, batch, server = getDefaultServer() } = options;
25
+ const bucketName = getBucketName(server);
26
+ if (!month && !batch) {
27
+ console.error('❌ Error: Either --month or --batch option must be specified');
28
+ process.exit(1);
29
+ }
30
+ // Determine folder structure based on options
31
+ const contentStructure = getFolderStructure({ month, batch, server });
32
+ const prefix = contentStructure.prefix;
33
+ const description = month ? `Month: ${month}` : `Batch: ${batch}`;
34
+ console.log(chalk.blue(`📅 Month/Batch Information: ${description}`));
35
+ console.log(chalk.blue('===================================='));
36
+ console.log(chalk.gray(`🔍 Content Type: ${contentStructure.type === 'current' ? 'Current Content' : 'Back Content'}`));
37
+ if (contentStructure.batch) {
38
+ console.log(chalk.gray(`🔍 Batch: ${contentStructure.batch}`));
39
+ }
40
+ console.log(chalk.gray(`🔍 Scanning S3 prefix: ${prefix}`));
41
+ console.log('');
42
+ const allFiles = [];
43
+ let continuationToken;
44
+ let batchCount = 0;
45
+ try {
46
+ // Use pagination to get all files
47
+ do {
48
+ batchCount++;
49
+ console.log(chalk.gray(`📦 Fetching batch ${batchCount}...`));
50
+ const command = new ListObjectsV2Command({
51
+ Bucket: bucketName,
52
+ Prefix: prefix,
53
+ MaxKeys: 1000,
54
+ ContinuationToken: continuationToken,
55
+ RequestPayer: 'requester',
56
+ });
57
+ const response = await client.send(command);
58
+ if (response.Contents) {
59
+ for (const item of response.Contents) {
60
+ if (!item.Key)
61
+ continue;
62
+ const type = getContentType(item.Key);
63
+ allFiles.push({
64
+ key: item.Key,
65
+ size: item.Size || 0,
66
+ lastModified: item.LastModified || new Date(),
67
+ type,
68
+ fileName: item.Key.split('/').pop() || 'unknown',
69
+ fileExtension: item.Key.split('.').pop() || 'none',
70
+ });
71
+ }
72
+ }
73
+ continuationToken = response.NextContinuationToken;
74
+ if (response.Contents) {
75
+ console.log(chalk.gray(` Found ${response.Contents.length} files in this batch`));
76
+ }
77
+ } while (continuationToken);
78
+ console.log(chalk.green(`✅ Total files found: ${allFiles.length}`));
79
+ console.log('');
80
+ displaySummary(allFiles, month || batch || 'unknown', server);
81
+ }
82
+ catch (error) {
83
+ if (error instanceof Error) {
84
+ throw new Error(`Failed to list month metadata: ${error.message}`);
85
+ }
86
+ throw error;
87
+ }
88
+ }
89
+ function getContentType(key) {
90
+ if (key.endsWith('.meca'))
91
+ return 'meca';
92
+ if (key.endsWith('.pdf'))
93
+ return 'pdf';
94
+ if (key.endsWith('.xml'))
95
+ return 'xml';
96
+ return 'other';
97
+ }
98
+ function displaySummary(files, month, server = getDefaultServer()) {
99
+ console.log(chalk.blue.bold('📊 Summary Statistics'));
100
+ console.log(chalk.blue('===================='));
101
+ console.log('');
102
+ // Show content structure info if available
103
+ try {
104
+ const contentStructure = getFolderStructure({ month, server });
105
+ console.log(chalk.cyan('📁 Content Structure:'));
106
+ console.log(` Type: ${chalk.yellow(contentStructure.type === 'current' ? 'Current Content' : 'Back Content')}`);
107
+ if (contentStructure.batch) {
108
+ console.log(` Batch: ${chalk.yellow(contentStructure.batch)}`);
109
+ }
110
+ console.log('');
111
+ }
112
+ catch (error) {
113
+ // Ignore errors in summary display
114
+ }
115
+ // File type breakdown
116
+ const typeCounts = files.reduce((acc, file) => {
117
+ acc[file.type] = (acc[file.type] || 0) + 1;
118
+ return acc;
119
+ }, {});
120
+ console.log(chalk.cyan('📁 File Types:'));
121
+ for (const [type, count] of Object.entries(typeCounts)) {
122
+ const percentage = ((count / files.length) * 100).toFixed(1);
123
+ console.log(` ${chalk.yellow(type.toUpperCase())}: ${chalk.green(count)} (${percentage}%)`);
124
+ }
125
+ console.log('');
126
+ // Size statistics
127
+ const mecaFiles = files.filter((f) => f.type === 'meca');
128
+ if (mecaFiles.length > 0) {
129
+ const sizes = mecaFiles.map((f) => f.size);
130
+ const totalSize = sizes.reduce((sum, size) => sum + size, 0);
131
+ const avgSize = totalSize / sizes.length;
132
+ const minSize = Math.min(...sizes);
133
+ const maxSize = Math.max(...sizes);
134
+ console.log(chalk.cyan('📦 MECA File Sizes:'));
135
+ console.log(` Total: ${chalk.green(formatFileSize(totalSize))}`);
136
+ console.log(` Average: ${chalk.green(formatFileSize(avgSize))}`);
137
+ console.log(` Range: ${chalk.green(formatFileSize(minSize))} - ${chalk.green(formatFileSize(maxSize))}`);
138
+ console.log('');
139
+ }
140
+ // Date range
141
+ const dates = files.map((f) => f.lastModified);
142
+ const earliest = new Date(Math.min(...dates.map((d) => d.getTime())));
143
+ const latest = new Date(Math.max(...dates.map((d) => d.getTime())));
144
+ console.log(chalk.cyan('📅 Upload Date Range:'));
145
+ console.log(` Earliest: ${chalk.green(earliest.toLocaleDateString())}`);
146
+ console.log(` Latest: ${chalk.green(latest.toLocaleDateString())}`);
147
+ console.log('');
148
+ // Upload date histogram
149
+ const sortedDates = displayUploadDateHistogram(files);
150
+ console.log('');
151
+ // Show batch analysis
152
+ console.log('');
153
+ analyzeBatchPatterns(sortedDates);
154
+ }
155
+ function displayUploadDateHistogram(files) {
156
+ console.log(chalk.cyan('📊 Upload Date Distribution:'));
157
+ console.log(chalk.cyan('============================'));
158
+ console.log('');
159
+ // Group files by date
160
+ const dateGroups = new Map();
161
+ for (const file of files) {
162
+ const dateKey = file.lastModified.toLocaleDateString();
163
+ dateGroups.set(dateKey, (dateGroups.get(dateKey) || 0) + 1);
164
+ }
165
+ // Sort dates chronologically
166
+ const sortedDates = Array.from(dateGroups.entries()).sort((a, b) => {
167
+ return new Date(a[0]).getTime() - new Date(b[0]).getTime();
168
+ });
169
+ // Find the maximum count for scaling
170
+ const maxCount = Math.max(...Array.from(dateGroups.values()));
171
+ const maxBarLength = 50; // Maximum bar length in characters
172
+ // Display histogram
173
+ for (const [date, count] of sortedDates) {
174
+ const barLength = Math.round((count / maxCount) * maxBarLength);
175
+ const bar = '█'.repeat(barLength);
176
+ const percentage = ((count / files.length) * 100).toFixed(1);
177
+ // Color code by upload volume
178
+ let countColor = chalk.green;
179
+ if (count > maxCount * 0.8) {
180
+ countColor = chalk.red; // High volume
181
+ }
182
+ else if (count > maxCount * 0.5) {
183
+ countColor = chalk.yellow; // Medium volume
184
+ }
185
+ else {
186
+ countColor = chalk.green; // Low volume
187
+ }
188
+ console.log(`${chalk.cyan(date.padStart(10))} ${countColor(count.toString().padStart(4))} ${chalk.gray('│')} ${chalk.blue(bar)} ${chalk.gray(`(${percentage}%)`)}`);
189
+ }
190
+ return sortedDates;
191
+ }
192
+ function analyzeBatchPatterns(dateGroups) {
193
+ console.log(chalk.cyan('🔍 Batch Analysis'));
194
+ console.log(chalk.cyan('================='));
195
+ console.log('');
196
+ if (dateGroups.length === 0)
197
+ return;
198
+ // Analyze upload patterns
199
+ const totalDays = dateGroups.length;
200
+ const totalFiles = dateGroups.reduce((sum, [, count]) => sum + count, 0);
201
+ const avgFilesPerDay = totalFiles / totalDays;
202
+ console.log(` Total active days: ${chalk.green(totalDays)}`);
203
+ console.log(` Average files per day: ${chalk.green(avgFilesPerDay.toFixed(1))}`);
204
+ console.log('');
205
+ }
206
+ function formatFileSize(bytes) {
207
+ if (bytes === 0)
208
+ return '0 B';
209
+ const k = 1024;
210
+ const sizes = ['B', 'KB', 'MB', 'GB'];
211
+ const i = Math.floor(Math.log(bytes) / Math.log(k));
212
+ return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
213
+ }
@@ -0,0 +1,3 @@
1
+ import { Command } from 'commander';
2
+ export declare const summaryCommand: Command;
3
+ //# sourceMappingURL=summary.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"summary.d.ts","sourceRoot":"","sources":["../../../../src/commands/summary.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAOpC,eAAO,MAAM,cAAc,SAyGvB,CAAC"}
@@ -0,0 +1,249 @@
1
+ import { Command } from 'commander';
2
+ import chalk from 'chalk';
3
+ import boxen from 'boxen';
4
+ import { createOpenRxivApiClient, getServerFromDOI } from '../api/api-client.js';
5
+ import { parseBiorxivURL } from 'biorxiv-utils';
6
+ import { getDefaultServer } from '../utils/index.js';
7
+ export const summaryCommand = new Command('summary')
8
+ .description('Get a summary of a bioRxiv preprint from a URL or DOI')
9
+ .argument('<url-or-doi>', 'bioRxiv URL or DOI to summarize')
10
+ .option('-m, --more', 'Show additional details and full abstract')
11
+ .option('-s, --server <server>', 'Specify server (biorxiv or medrxiv)', getDefaultServer())
12
+ .action(async (urlOrDoi, options) => {
13
+ var _a, _b;
14
+ try {
15
+ console.log(chalk.blue.bold(`🔬 ${(_a = options.server) !== null && _a !== void 0 ? _a : ''} Preprint Summary`));
16
+ console.log(chalk.blue('================================\n'));
17
+ // Parse the input (could be URL or DOI)
18
+ let doi;
19
+ let parsedUrl = null;
20
+ if (urlOrDoi.includes('biorxiv.org') ||
21
+ urlOrDoi.includes('medrxiv.org') ||
22
+ urlOrDoi.includes('doi.org')) {
23
+ // It's a URL
24
+ parsedUrl = parseBiorxivURL(urlOrDoi);
25
+ if (!parsedUrl) {
26
+ console.log(chalk.red('❌ Invalid bioRxiv URL'));
27
+ process.exit(1);
28
+ }
29
+ doi = parsedUrl.doi;
30
+ console.log(chalk.cyan(`📝 URL: ${urlOrDoi}`));
31
+ }
32
+ else {
33
+ // It's a DOI
34
+ doi = urlOrDoi;
35
+ console.log(chalk.cyan(`🔍 DOI: ${doi}`));
36
+ }
37
+ console.log('');
38
+ // Determine server if possible
39
+ let server = (_b = options.server) !== null && _b !== void 0 ? _b : getServerFromDOI(urlOrDoi);
40
+ console.log(chalk.blue(`🌐 Server: ${server}`));
41
+ console.log('');
42
+ // Create API client
43
+ const apiClient = createOpenRxivApiClient({
44
+ server,
45
+ format: 'json',
46
+ timeout: 15000,
47
+ });
48
+ // Show loading indicator
49
+ console.log(chalk.yellow('⏳ Fetching preprint information...'));
50
+ console.log('');
51
+ // Get content details
52
+ let contentDetail = await apiClient.getContentDetail(doi);
53
+ let fallbackServer = null;
54
+ // If not found on bioRxiv and we're not already on medrxiv, try medrxiv as fallback
55
+ if (!contentDetail && server === 'biorxiv') {
56
+ console.log(chalk.yellow('⚠️ Paper not found on bioRxiv, trying medRxiv...'));
57
+ fallbackServer = 'medrxiv';
58
+ const medrxivApiClient = createOpenRxivApiClient({
59
+ server: 'medrxiv',
60
+ format: 'json',
61
+ timeout: 15000,
62
+ });
63
+ contentDetail = await medrxivApiClient.getContentDetail(doi);
64
+ if (contentDetail) {
65
+ console.log(chalk.green('✅ Found paper on medRxiv!'));
66
+ server = 'medrxiv'; // Update server for display
67
+ contentDetail.server = 'medrxiv'; // Ensure the content detail has the correct server
68
+ }
69
+ }
70
+ if (!contentDetail) {
71
+ console.log(chalk.red('❌ No content found for this DOI on either bioRxiv or medRxiv'));
72
+ console.log(chalk.yellow("💡 This might be a new preprint that hasn't been indexed yet"));
73
+ process.exit(1);
74
+ }
75
+ // Get all versions
76
+ let allVersions = await apiClient.getAllVersions(doi);
77
+ // If we used fallback, get versions from the fallback server
78
+ if (fallbackServer && contentDetail) {
79
+ const fallbackApiClient = createOpenRxivApiClient({
80
+ server: fallbackServer,
81
+ format: 'json',
82
+ timeout: 15000,
83
+ });
84
+ allVersions = await fallbackApiClient.getAllVersions(doi);
85
+ }
86
+ // Display summary
87
+ const isVerbose = options.more === true;
88
+ displaySummary(contentDetail, allVersions, isVerbose);
89
+ }
90
+ catch (error) {
91
+ console.error(chalk.red('❌ Error:'), error instanceof Error ? error.message : 'Unknown error');
92
+ process.exit(1);
93
+ }
94
+ });
95
+ function displaySummary(contentDetail, allVersions, verbose = false) {
96
+ // Title in a prominent box
97
+ const titleBox = boxen(chalk.green.bold.underline(contentDetail.title), {
98
+ padding: 1,
99
+ margin: 1,
100
+ borderStyle: 'double',
101
+ borderColor: 'green',
102
+ backgroundColor: 'black',
103
+ textAlignment: 'left',
104
+ ...(verbose ? {} : { width: 80 }),
105
+ });
106
+ console.log(titleBox);
107
+ // Basic info
108
+ const basicInfo = [
109
+ `${chalk.cyan('DOI:')} ${contentDetail.doi}`,
110
+ `${chalk.cyan('Server:')} ${contentDetail.server}`,
111
+ `${chalk.cyan('Category:')} ${chalk.yellow(contentDetail.category)}`,
112
+ `${chalk.cyan('License:')} ${chalk.yellow(contentDetail.license)}`,
113
+ `${chalk.cyan('Type:')} ${chalk.yellow(contentDetail.type)}`,
114
+ `${chalk.cyan('Published:')} ${contentDetail.published === 'NA'
115
+ ? chalk.gray('Not published')
116
+ : chalk.green(contentDetail.published)}`,
117
+ `${chalk.cyan('Total versions:')} ${allVersions ? allVersions.length : 0}`,
118
+ ...(verbose
119
+ ? [
120
+ `${chalk.cyan('Date:')} ${contentDetail.date}`,
121
+ `${chalk.cyan('Version:')} ${contentDetail.version}`,
122
+ ...(contentDetail.jatsxml
123
+ ? [`${chalk.cyan('JATS XML:')} ${chalk.underline.blue(contentDetail.jatsxml)}`]
124
+ : []),
125
+ ]
126
+ : []),
127
+ ].join('\n');
128
+ const basicInfoBox = boxen(basicInfo, {
129
+ padding: 1,
130
+ margin: 1,
131
+ borderStyle: 'round',
132
+ borderColor: 'blue',
133
+ title: chalk.blue.bold('📋 Basic Information'),
134
+ titleAlignment: 'left',
135
+ textAlignment: 'left',
136
+ ...(verbose ? {} : { width: 80 }),
137
+ });
138
+ console.log(basicInfoBox);
139
+ // Authors
140
+ const authorsInfo = [
141
+ `${chalk.cyan('Authors:')} ${contentDetail.authors}`,
142
+ ...(contentDetail.author_corresponding
143
+ ? [`${chalk.cyan('Corresponding:')} ${chalk.green(contentDetail.author_corresponding)}`]
144
+ : []),
145
+ ...(contentDetail.author_corresponding_institution
146
+ ? [
147
+ `${chalk.cyan('Institution:')} ${chalk.gray(contentDetail.author_corresponding_institution)}`,
148
+ ]
149
+ : []),
150
+ ].join('\n');
151
+ const authorsBox = boxen(authorsInfo, {
152
+ padding: 1,
153
+ margin: 1,
154
+ borderStyle: 'round',
155
+ borderColor: 'cyan',
156
+ title: chalk.cyan.bold('👥 Authors'),
157
+ titleAlignment: 'left',
158
+ textAlignment: 'left',
159
+ ...(verbose ? {} : { width: 80 }),
160
+ });
161
+ console.log(authorsBox);
162
+ // Abstract
163
+ if (contentDetail.abstract) {
164
+ const abstractBox = boxen(contentDetail.abstract, {
165
+ padding: 1,
166
+ margin: 1,
167
+ borderStyle: 'round',
168
+ borderColor: 'yellow',
169
+ title: chalk.yellow.bold('📖 Abstract'),
170
+ titleAlignment: 'left',
171
+ textAlignment: 'left',
172
+ ...(verbose ? {} : { width: 80 }),
173
+ });
174
+ console.log(abstractBox);
175
+ }
176
+ // Funding information
177
+ if (contentDetail.funding && contentDetail.funding.length > 0) {
178
+ const fundingInfo = contentDetail.funding
179
+ .map((fund, index) => {
180
+ let fundText = `${chalk.cyan(index + 1)}. ${chalk.green(fund.name)}`;
181
+ if (fund.id) {
182
+ fundText += `\n ${chalk.gray('ID:')} ${fund.id} (${fund['id-type']})`;
183
+ }
184
+ if (fund.award) {
185
+ fundText += `\n ${chalk.gray('Award:')} ${fund.award}`;
186
+ }
187
+ return fundText;
188
+ })
189
+ .join('\n\n');
190
+ const fundingBox = boxen(fundingInfo, {
191
+ padding: 1,
192
+ margin: 1,
193
+ borderStyle: 'round',
194
+ borderColor: 'magenta',
195
+ title: chalk.magenta.bold('💰 Funding'),
196
+ titleAlignment: 'left',
197
+ textAlignment: 'left',
198
+ ...(verbose ? {} : { width: 80 }),
199
+ });
200
+ console.log(fundingBox);
201
+ }
202
+ // Versions - only show when --more is provided
203
+ if (verbose && allVersions && allVersions.length > 0) {
204
+ const versionsInfo = [
205
+ ...allVersions.map((version, index) => {
206
+ let versionText = `${chalk.cyan(`v${version.version}`)} (${version.date})`;
207
+ versionText += `\n ${chalk.gray('Type:')} ${version.type}`;
208
+ versionText += `\n ${chalk.gray('Title:')} ${version.title.substring(0, 60)}...`;
209
+ if (version.jatsxml) {
210
+ versionText += `\n ${chalk.gray('JATS XML:')} ${chalk.underline.blue(version.jatsxml)}`;
211
+ }
212
+ return versionText;
213
+ }),
214
+ ].join('\n\n');
215
+ const versionsBox = boxen(versionsInfo, {
216
+ padding: 1,
217
+ margin: 1,
218
+ borderStyle: 'round',
219
+ borderColor: 'green',
220
+ title: chalk.green.bold('🔄 Versions'),
221
+ titleAlignment: 'left',
222
+ textAlignment: 'left',
223
+ ...(verbose ? {} : { width: 80 }),
224
+ });
225
+ console.log(versionsBox);
226
+ }
227
+ // Footer
228
+ const baseUrl = contentDetail.server === 'medrxiv'
229
+ ? `https://www.medrxiv.org/content/${contentDetail.doi}`
230
+ : `https://www.biorxiv.org/content/${contentDetail.doi}`;
231
+ const footerInfo = [
232
+ `💡 View online: ${chalk.underline.blue(baseUrl)}`,
233
+ ...(allVersions && allVersions.length > 1 && !verbose
234
+ ? [
235
+ `📚 This preprint has ${allVersions.length} versions. Use --more to see additional details.`,
236
+ ]
237
+ : []),
238
+ ].join('\n');
239
+ const footerBox = boxen(footerInfo, {
240
+ padding: 1,
241
+ margin: 1,
242
+ borderStyle: 'round',
243
+ borderColor: 'gray',
244
+ backgroundColor: 'black',
245
+ textAlignment: 'left',
246
+ ...(verbose ? {} : { width: 80 }),
247
+ });
248
+ console.log(footerBox);
249
+ }
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/index.ts"],"names":[],"mappings":""}
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env node
2
+ import { Command } from 'commander';
3
+ import { listCommand } from './commands/list.js';
4
+ import { downloadCommand } from './commands/download.js';
5
+ import { configCommand } from './commands/config.js';
6
+ import { summaryCommand } from './commands/summary.js';
7
+ import { monthInfoCommand } from './commands/month-info.js';
8
+ import { batchProcessCommand } from './commands/batch-process.js';
9
+ import { setGlobalRequesterPays } from './aws/config.js';
10
+ import version from './version.js';
11
+ import { getCliName } from './utils/index.js';
12
+ const cliName = getCliName();
13
+ const program = new Command();
14
+ program
15
+ .name(cliName)
16
+ .description(`CLI tool to download bioRxiv/medRxiv MECA files from AWS S3 for text and data mining`)
17
+ .version(`v${version}`, '-v, --version', `Print the current version of the ${cliName} CLI`);
18
+ // Add commands
19
+ program.addCommand(listCommand);
20
+ program.addCommand(downloadCommand);
21
+ program.addCommand(configCommand);
22
+ program.addCommand(summaryCommand);
23
+ program.addCommand(monthInfoCommand);
24
+ program.addCommand(batchProcessCommand);
25
+ // Global options
26
+ program.option('-d, --debug', 'Enable debug mode');
27
+ program.option('--requester-pays', 'Enable requester pays for local development (required when not on EC2 with IAM role)');
28
+ // Parse command line arguments
29
+ program.parse();
30
+ // Set global requester pays based on command line option
31
+ const options = program.opts();
32
+ if (options.requesterPays) {
33
+ setGlobalRequesterPays(true);
34
+ console.log('Requester pays enabled - you will be charged for S3 requests');
35
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Parse batch input to support ranges like "1-10" or "batch-1,batch-2"
3
+ */
4
+ export declare function parseBatchInput(batchInput: string): string[];
5
+ /**
6
+ * Validate batch format
7
+ */
8
+ export declare function validateBatchFormat(batch: string): boolean;
9
+ //# sourceMappingURL=batches.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"batches.d.ts","sourceRoot":"","sources":["../../../../src/utils/batches.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,wBAAgB,eAAe,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,EAAE,CAgE5D;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAG1D"}