openrxiv 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/src/api/api-client.d.ts +96 -0
- package/dist/cli/src/api/api-client.d.ts.map +1 -0
- package/dist/cli/src/api/api-client.js +257 -0
- package/dist/cli/src/aws/bucket-explorer.d.ts +26 -0
- package/dist/cli/src/aws/bucket-explorer.d.ts.map +1 -0
- package/dist/cli/src/aws/bucket-explorer.js +220 -0
- package/dist/cli/src/aws/config.d.ts +18 -0
- package/dist/cli/src/aws/config.d.ts.map +1 -0
- package/dist/cli/src/aws/config.js +191 -0
- package/dist/cli/src/aws/downloader.d.ts +13 -0
- package/dist/cli/src/aws/downloader.d.ts.map +1 -0
- package/dist/cli/src/aws/downloader.js +115 -0
- package/dist/cli/src/aws/month-lister.d.ts +18 -0
- package/dist/cli/src/aws/month-lister.d.ts.map +1 -0
- package/dist/cli/src/aws/month-lister.js +90 -0
- package/dist/cli/src/commands/batch-process.d.ts +3 -0
- package/dist/cli/src/commands/batch-process.d.ts.map +1 -0
- package/dist/cli/src/commands/batch-process.js +557 -0
- package/dist/cli/src/commands/config.d.ts +3 -0
- package/dist/cli/src/commands/config.d.ts.map +1 -0
- package/dist/cli/src/commands/config.js +42 -0
- package/dist/cli/src/commands/download.d.ts +3 -0
- package/dist/cli/src/commands/download.d.ts.map +1 -0
- package/dist/cli/src/commands/download.js +76 -0
- package/dist/cli/src/commands/list.d.ts +3 -0
- package/dist/cli/src/commands/list.d.ts.map +1 -0
- package/dist/cli/src/commands/list.js +18 -0
- package/dist/cli/src/commands/month-info.d.ts +3 -0
- package/dist/cli/src/commands/month-info.d.ts.map +1 -0
- package/dist/cli/src/commands/month-info.js +213 -0
- package/dist/cli/src/commands/summary.d.ts +3 -0
- package/dist/cli/src/commands/summary.d.ts.map +1 -0
- package/dist/cli/src/commands/summary.js +249 -0
- package/dist/cli/src/index.d.ts +3 -0
- package/dist/cli/src/index.d.ts.map +1 -0
- package/dist/cli/src/index.js +35 -0
- package/dist/cli/src/utils/batches.d.ts +9 -0
- package/dist/cli/src/utils/batches.d.ts.map +1 -0
- package/dist/cli/src/utils/batches.js +61 -0
- package/dist/cli/src/utils/batches.test.d.ts +2 -0
- package/dist/cli/src/utils/batches.test.d.ts.map +1 -0
- package/dist/cli/src/utils/batches.test.js +119 -0
- package/dist/cli/src/utils/default-server.d.ts +3 -0
- package/dist/cli/src/utils/default-server.d.ts.map +1 -0
- package/dist/cli/src/utils/default-server.js +20 -0
- package/dist/cli/src/utils/index.d.ts +5 -0
- package/dist/cli/src/utils/index.d.ts.map +1 -0
- package/dist/cli/src/utils/index.js +5 -0
- package/dist/cli/src/utils/meca-processor.d.ts +28 -0
- package/dist/cli/src/utils/meca-processor.d.ts.map +1 -0
- package/dist/cli/src/utils/meca-processor.js +503 -0
- package/dist/cli/src/utils/meca-processor.test.d.ts +2 -0
- package/dist/cli/src/utils/meca-processor.test.d.ts.map +1 -0
- package/dist/cli/src/utils/meca-processor.test.js +123 -0
- package/dist/cli/src/utils/months.d.ts +36 -0
- package/dist/cli/src/utils/months.d.ts.map +1 -0
- package/dist/cli/src/utils/months.js +135 -0
- package/dist/cli/src/utils/months.test.d.ts +2 -0
- package/dist/cli/src/utils/months.test.d.ts.map +1 -0
- package/dist/cli/src/utils/months.test.js +209 -0
- package/dist/cli/src/utils/requester-pays-error.d.ts +6 -0
- package/dist/cli/src/utils/requester-pays-error.d.ts.map +1 -0
- package/dist/cli/src/utils/requester-pays-error.js +20 -0
- package/dist/cli/src/version.d.ts +3 -0
- package/dist/cli/src/version.d.ts.map +1 -0
- package/dist/cli/src/version.js +2 -0
- package/dist/cli.cjs +98815 -0
- package/dist/utils/src/biorxiv-parser.d.ts +51 -0
- package/dist/utils/src/biorxiv-parser.d.ts.map +1 -0
- package/dist/utils/src/biorxiv-parser.js +126 -0
- package/dist/utils/src/folder-structure.d.ts +44 -0
- package/dist/utils/src/folder-structure.d.ts.map +1 -0
- package/dist/utils/src/folder-structure.js +207 -0
- package/dist/utils/src/index.d.ts +3 -0
- package/dist/utils/src/index.d.ts.map +1 -0
- package/dist/utils/src/index.js +3 -0
- package/package.json +76 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { Command } from 'commander';
|
|
2
|
+
import axios from 'axios';
|
|
3
|
+
import { downloadFile } from '../aws/downloader.js';
|
|
4
|
+
import { setGlobalRequesterPays } from '../aws/config.js';
|
|
5
|
+
import { displayRequesterPaysError } from '../utils/requester-pays-error.js';
|
|
6
|
+
export const downloadCommand = new Command('download')
|
|
7
|
+
.description('Download MECA files from the bioRxiv/medRxiv S3 bucket by DOI')
|
|
8
|
+
.argument('<doi>', 'DOI of the paper (e.g., "10.1101/2024.01.15.123456")')
|
|
9
|
+
.option('-o, --output <dir>', 'Output directory for downloaded files', './downloads')
|
|
10
|
+
.option('-a, --api-url <url>', 'API base URL', 'https://biorxiv.curvenote.dev')
|
|
11
|
+
.option('--requester-pays', 'Enable requester-pays for S3 bucket access')
|
|
12
|
+
.action(async (doi, options) => {
|
|
13
|
+
var _a, _b, _c;
|
|
14
|
+
try {
|
|
15
|
+
// Validate DOI format
|
|
16
|
+
if (!doi.includes('/')) {
|
|
17
|
+
console.error('❌ Invalid DOI format. Expected format: 10.1101/2024.01.15.123456');
|
|
18
|
+
process.exit(1);
|
|
19
|
+
}
|
|
20
|
+
// Split DOI into prefix and suffix
|
|
21
|
+
const [doiPrefix, doiSuffix] = doi.split('/', 2);
|
|
22
|
+
console.log(`🔍 Looking up paper with DOI: ${doi}`);
|
|
23
|
+
console.log(`📡 API URL: ${options.apiUrl}`);
|
|
24
|
+
// Look up the paper in the API
|
|
25
|
+
const response = await axios.get(`${options.apiUrl}/v1/works/${doiPrefix}/${doiSuffix}`);
|
|
26
|
+
if (!response.data || !response.data.s3Key) {
|
|
27
|
+
console.error('❌ Paper not found or no S3 key available');
|
|
28
|
+
process.exit(1);
|
|
29
|
+
}
|
|
30
|
+
const paper = response.data;
|
|
31
|
+
console.log(`📄 Found paper: ${paper.title || 'Unknown title'}`);
|
|
32
|
+
console.log(`📦 S3 Key: ${paper.s3Key}`);
|
|
33
|
+
// Set requester-pays if flag is provided
|
|
34
|
+
if (options.requesterPays) {
|
|
35
|
+
setGlobalRequesterPays(true);
|
|
36
|
+
console.log(`💰 Requester-pays enabled for S3 access`);
|
|
37
|
+
}
|
|
38
|
+
// Create a filesystem-safe filename from the DOI
|
|
39
|
+
const safeDoi = doi.replace(/[^a-zA-Z0-9.-]/g, '_');
|
|
40
|
+
const filename = `${safeDoi}.meca`;
|
|
41
|
+
console.log(`📥 Downloading MECA file as: ${filename}`);
|
|
42
|
+
// Download the file using the S3 key from the API
|
|
43
|
+
try {
|
|
44
|
+
await downloadFile(paper.s3Key, { ...options, filename });
|
|
45
|
+
console.log(`✅ Successfully downloaded MECA file for DOI: ${doi}`);
|
|
46
|
+
}
|
|
47
|
+
catch (downloadError) {
|
|
48
|
+
// Check if it's a requester-pays related error
|
|
49
|
+
const errorMessage = downloadError instanceof Error ? downloadError.message : String(downloadError);
|
|
50
|
+
if (errorMessage.includes('UnknownError') || errorMessage.includes('AccessDenied')) {
|
|
51
|
+
displayRequesterPaysError();
|
|
52
|
+
}
|
|
53
|
+
else {
|
|
54
|
+
console.error('❌ Download failed:', errorMessage);
|
|
55
|
+
}
|
|
56
|
+
process.exit(1);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
catch (error) {
|
|
60
|
+
if (axios.isAxiosError(error)) {
|
|
61
|
+
if (((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 404) {
|
|
62
|
+
console.error('❌ Article not found with the specified DOI');
|
|
63
|
+
}
|
|
64
|
+
else if (((_b = error.response) === null || _b === void 0 ? void 0 : _b.status) === 401) {
|
|
65
|
+
console.error('❌ Authentication failed. Please check your API key');
|
|
66
|
+
}
|
|
67
|
+
else {
|
|
68
|
+
console.error('❌ API error:', ((_c = error.response) === null || _c === void 0 ? void 0 : _c.data) || error.message);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
console.error('❌ Error looking up paper:', error);
|
|
73
|
+
}
|
|
74
|
+
process.exit(1);
|
|
75
|
+
}
|
|
76
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"list.d.ts","sourceRoot":"","sources":["../../../../src/commands/list.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAIpC,eAAO,MAAM,WAAW,SAapB,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { Command } from 'commander';
|
|
2
|
+
import { listBucketContent } from '../aws/bucket-explorer.js';
|
|
3
|
+
import { getDefaultServer } from '../utils/index.js';
|
|
4
|
+
export const listCommand = new Command('list')
|
|
5
|
+
.description('List available content in the bioRxiv or medRxiv S3 bucket')
|
|
6
|
+
.option('-m, --month <month>', 'Filter by specific month (e.g., "2024-01")')
|
|
7
|
+
.option('-b, --batch <batch>', 'Filter by specific batch (e.g., "Batch_01")')
|
|
8
|
+
.option('-l, --limit <number>', 'Limit the number of results', '50')
|
|
9
|
+
.option('-s, --server <server>', 'Server to use: "biorxiv" or "medrxiv"', getDefaultServer())
|
|
10
|
+
.action(async (options) => {
|
|
11
|
+
try {
|
|
12
|
+
await listBucketContent(options);
|
|
13
|
+
}
|
|
14
|
+
catch (error) {
|
|
15
|
+
console.error('Error listing content:', error);
|
|
16
|
+
process.exit(1);
|
|
17
|
+
}
|
|
18
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"month-info.d.ts","sourceRoot":"","sources":["../../../../src/commands/month-info.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAQpC,eAAO,MAAM,gBAAgB,SAczB,CAAC"}
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
import { Command } from 'commander';
|
|
2
|
+
import { ListObjectsV2Command } from '@aws-sdk/client-s3';
|
|
3
|
+
import chalk from 'chalk';
|
|
4
|
+
import { getS3Client } from '../aws/config.js';
|
|
5
|
+
import { getFolderStructure } from 'biorxiv-utils';
|
|
6
|
+
import { getBucketName } from '../aws/bucket-explorer.js';
|
|
7
|
+
import { getDefaultServer } from '../utils/index.js';
|
|
8
|
+
export const monthInfoCommand = new Command('month-info')
|
|
9
|
+
.description('List detailed metadata for all files in a specific month or batch from bioRxiv or medRxiv')
|
|
10
|
+
.option('-m, --month <month>', 'Month to list (e.g., "January_2024" or "2024-01")')
|
|
11
|
+
.option('-b, --batch <batch>', 'Batch to list (e.g., "1", "batch-1", "Batch_01")')
|
|
12
|
+
.option('-s, --server <server>', 'Server to use: "biorxiv" or "medrxiv"', getDefaultServer())
|
|
13
|
+
.action(async (options) => {
|
|
14
|
+
try {
|
|
15
|
+
await listMonthMetadata(options);
|
|
16
|
+
}
|
|
17
|
+
catch (error) {
|
|
18
|
+
console.error('Error listing month metadata:', error);
|
|
19
|
+
process.exit(1);
|
|
20
|
+
}
|
|
21
|
+
});
|
|
22
|
+
async function listMonthMetadata(options) {
|
|
23
|
+
const client = await getS3Client();
|
|
24
|
+
const { month, batch, server = getDefaultServer() } = options;
|
|
25
|
+
const bucketName = getBucketName(server);
|
|
26
|
+
if (!month && !batch) {
|
|
27
|
+
console.error('❌ Error: Either --month or --batch option must be specified');
|
|
28
|
+
process.exit(1);
|
|
29
|
+
}
|
|
30
|
+
// Determine folder structure based on options
|
|
31
|
+
const contentStructure = getFolderStructure({ month, batch, server });
|
|
32
|
+
const prefix = contentStructure.prefix;
|
|
33
|
+
const description = month ? `Month: ${month}` : `Batch: ${batch}`;
|
|
34
|
+
console.log(chalk.blue(`📅 Month/Batch Information: ${description}`));
|
|
35
|
+
console.log(chalk.blue('===================================='));
|
|
36
|
+
console.log(chalk.gray(`🔍 Content Type: ${contentStructure.type === 'current' ? 'Current Content' : 'Back Content'}`));
|
|
37
|
+
if (contentStructure.batch) {
|
|
38
|
+
console.log(chalk.gray(`🔍 Batch: ${contentStructure.batch}`));
|
|
39
|
+
}
|
|
40
|
+
console.log(chalk.gray(`🔍 Scanning S3 prefix: ${prefix}`));
|
|
41
|
+
console.log('');
|
|
42
|
+
const allFiles = [];
|
|
43
|
+
let continuationToken;
|
|
44
|
+
let batchCount = 0;
|
|
45
|
+
try {
|
|
46
|
+
// Use pagination to get all files
|
|
47
|
+
do {
|
|
48
|
+
batchCount++;
|
|
49
|
+
console.log(chalk.gray(`📦 Fetching batch ${batchCount}...`));
|
|
50
|
+
const command = new ListObjectsV2Command({
|
|
51
|
+
Bucket: bucketName,
|
|
52
|
+
Prefix: prefix,
|
|
53
|
+
MaxKeys: 1000,
|
|
54
|
+
ContinuationToken: continuationToken,
|
|
55
|
+
RequestPayer: 'requester',
|
|
56
|
+
});
|
|
57
|
+
const response = await client.send(command);
|
|
58
|
+
if (response.Contents) {
|
|
59
|
+
for (const item of response.Contents) {
|
|
60
|
+
if (!item.Key)
|
|
61
|
+
continue;
|
|
62
|
+
const type = getContentType(item.Key);
|
|
63
|
+
allFiles.push({
|
|
64
|
+
key: item.Key,
|
|
65
|
+
size: item.Size || 0,
|
|
66
|
+
lastModified: item.LastModified || new Date(),
|
|
67
|
+
type,
|
|
68
|
+
fileName: item.Key.split('/').pop() || 'unknown',
|
|
69
|
+
fileExtension: item.Key.split('.').pop() || 'none',
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
continuationToken = response.NextContinuationToken;
|
|
74
|
+
if (response.Contents) {
|
|
75
|
+
console.log(chalk.gray(` Found ${response.Contents.length} files in this batch`));
|
|
76
|
+
}
|
|
77
|
+
} while (continuationToken);
|
|
78
|
+
console.log(chalk.green(`✅ Total files found: ${allFiles.length}`));
|
|
79
|
+
console.log('');
|
|
80
|
+
displaySummary(allFiles, month || batch || 'unknown', server);
|
|
81
|
+
}
|
|
82
|
+
catch (error) {
|
|
83
|
+
if (error instanceof Error) {
|
|
84
|
+
throw new Error(`Failed to list month metadata: ${error.message}`);
|
|
85
|
+
}
|
|
86
|
+
throw error;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
function getContentType(key) {
|
|
90
|
+
if (key.endsWith('.meca'))
|
|
91
|
+
return 'meca';
|
|
92
|
+
if (key.endsWith('.pdf'))
|
|
93
|
+
return 'pdf';
|
|
94
|
+
if (key.endsWith('.xml'))
|
|
95
|
+
return 'xml';
|
|
96
|
+
return 'other';
|
|
97
|
+
}
|
|
98
|
+
function displaySummary(files, month, server = getDefaultServer()) {
|
|
99
|
+
console.log(chalk.blue.bold('📊 Summary Statistics'));
|
|
100
|
+
console.log(chalk.blue('===================='));
|
|
101
|
+
console.log('');
|
|
102
|
+
// Show content structure info if available
|
|
103
|
+
try {
|
|
104
|
+
const contentStructure = getFolderStructure({ month, server });
|
|
105
|
+
console.log(chalk.cyan('📁 Content Structure:'));
|
|
106
|
+
console.log(` Type: ${chalk.yellow(contentStructure.type === 'current' ? 'Current Content' : 'Back Content')}`);
|
|
107
|
+
if (contentStructure.batch) {
|
|
108
|
+
console.log(` Batch: ${chalk.yellow(contentStructure.batch)}`);
|
|
109
|
+
}
|
|
110
|
+
console.log('');
|
|
111
|
+
}
|
|
112
|
+
catch (error) {
|
|
113
|
+
// Ignore errors in summary display
|
|
114
|
+
}
|
|
115
|
+
// File type breakdown
|
|
116
|
+
const typeCounts = files.reduce((acc, file) => {
|
|
117
|
+
acc[file.type] = (acc[file.type] || 0) + 1;
|
|
118
|
+
return acc;
|
|
119
|
+
}, {});
|
|
120
|
+
console.log(chalk.cyan('📁 File Types:'));
|
|
121
|
+
for (const [type, count] of Object.entries(typeCounts)) {
|
|
122
|
+
const percentage = ((count / files.length) * 100).toFixed(1);
|
|
123
|
+
console.log(` ${chalk.yellow(type.toUpperCase())}: ${chalk.green(count)} (${percentage}%)`);
|
|
124
|
+
}
|
|
125
|
+
console.log('');
|
|
126
|
+
// Size statistics
|
|
127
|
+
const mecaFiles = files.filter((f) => f.type === 'meca');
|
|
128
|
+
if (mecaFiles.length > 0) {
|
|
129
|
+
const sizes = mecaFiles.map((f) => f.size);
|
|
130
|
+
const totalSize = sizes.reduce((sum, size) => sum + size, 0);
|
|
131
|
+
const avgSize = totalSize / sizes.length;
|
|
132
|
+
const minSize = Math.min(...sizes);
|
|
133
|
+
const maxSize = Math.max(...sizes);
|
|
134
|
+
console.log(chalk.cyan('📦 MECA File Sizes:'));
|
|
135
|
+
console.log(` Total: ${chalk.green(formatFileSize(totalSize))}`);
|
|
136
|
+
console.log(` Average: ${chalk.green(formatFileSize(avgSize))}`);
|
|
137
|
+
console.log(` Range: ${chalk.green(formatFileSize(minSize))} - ${chalk.green(formatFileSize(maxSize))}`);
|
|
138
|
+
console.log('');
|
|
139
|
+
}
|
|
140
|
+
// Date range
|
|
141
|
+
const dates = files.map((f) => f.lastModified);
|
|
142
|
+
const earliest = new Date(Math.min(...dates.map((d) => d.getTime())));
|
|
143
|
+
const latest = new Date(Math.max(...dates.map((d) => d.getTime())));
|
|
144
|
+
console.log(chalk.cyan('📅 Upload Date Range:'));
|
|
145
|
+
console.log(` Earliest: ${chalk.green(earliest.toLocaleDateString())}`);
|
|
146
|
+
console.log(` Latest: ${chalk.green(latest.toLocaleDateString())}`);
|
|
147
|
+
console.log('');
|
|
148
|
+
// Upload date histogram
|
|
149
|
+
const sortedDates = displayUploadDateHistogram(files);
|
|
150
|
+
console.log('');
|
|
151
|
+
// Show batch analysis
|
|
152
|
+
console.log('');
|
|
153
|
+
analyzeBatchPatterns(sortedDates);
|
|
154
|
+
}
|
|
155
|
+
function displayUploadDateHistogram(files) {
|
|
156
|
+
console.log(chalk.cyan('📊 Upload Date Distribution:'));
|
|
157
|
+
console.log(chalk.cyan('============================'));
|
|
158
|
+
console.log('');
|
|
159
|
+
// Group files by date
|
|
160
|
+
const dateGroups = new Map();
|
|
161
|
+
for (const file of files) {
|
|
162
|
+
const dateKey = file.lastModified.toLocaleDateString();
|
|
163
|
+
dateGroups.set(dateKey, (dateGroups.get(dateKey) || 0) + 1);
|
|
164
|
+
}
|
|
165
|
+
// Sort dates chronologically
|
|
166
|
+
const sortedDates = Array.from(dateGroups.entries()).sort((a, b) => {
|
|
167
|
+
return new Date(a[0]).getTime() - new Date(b[0]).getTime();
|
|
168
|
+
});
|
|
169
|
+
// Find the maximum count for scaling
|
|
170
|
+
const maxCount = Math.max(...Array.from(dateGroups.values()));
|
|
171
|
+
const maxBarLength = 50; // Maximum bar length in characters
|
|
172
|
+
// Display histogram
|
|
173
|
+
for (const [date, count] of sortedDates) {
|
|
174
|
+
const barLength = Math.round((count / maxCount) * maxBarLength);
|
|
175
|
+
const bar = '█'.repeat(barLength);
|
|
176
|
+
const percentage = ((count / files.length) * 100).toFixed(1);
|
|
177
|
+
// Color code by upload volume
|
|
178
|
+
let countColor = chalk.green;
|
|
179
|
+
if (count > maxCount * 0.8) {
|
|
180
|
+
countColor = chalk.red; // High volume
|
|
181
|
+
}
|
|
182
|
+
else if (count > maxCount * 0.5) {
|
|
183
|
+
countColor = chalk.yellow; // Medium volume
|
|
184
|
+
}
|
|
185
|
+
else {
|
|
186
|
+
countColor = chalk.green; // Low volume
|
|
187
|
+
}
|
|
188
|
+
console.log(`${chalk.cyan(date.padStart(10))} ${countColor(count.toString().padStart(4))} ${chalk.gray('│')} ${chalk.blue(bar)} ${chalk.gray(`(${percentage}%)`)}`);
|
|
189
|
+
}
|
|
190
|
+
return sortedDates;
|
|
191
|
+
}
|
|
192
|
+
function analyzeBatchPatterns(dateGroups) {
|
|
193
|
+
console.log(chalk.cyan('🔍 Batch Analysis'));
|
|
194
|
+
console.log(chalk.cyan('================='));
|
|
195
|
+
console.log('');
|
|
196
|
+
if (dateGroups.length === 0)
|
|
197
|
+
return;
|
|
198
|
+
// Analyze upload patterns
|
|
199
|
+
const totalDays = dateGroups.length;
|
|
200
|
+
const totalFiles = dateGroups.reduce((sum, [, count]) => sum + count, 0);
|
|
201
|
+
const avgFilesPerDay = totalFiles / totalDays;
|
|
202
|
+
console.log(` Total active days: ${chalk.green(totalDays)}`);
|
|
203
|
+
console.log(` Average files per day: ${chalk.green(avgFilesPerDay.toFixed(1))}`);
|
|
204
|
+
console.log('');
|
|
205
|
+
}
|
|
206
|
+
function formatFileSize(bytes) {
|
|
207
|
+
if (bytes === 0)
|
|
208
|
+
return '0 B';
|
|
209
|
+
const k = 1024;
|
|
210
|
+
const sizes = ['B', 'KB', 'MB', 'GB'];
|
|
211
|
+
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
212
|
+
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
|
213
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"summary.d.ts","sourceRoot":"","sources":["../../../../src/commands/summary.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAOpC,eAAO,MAAM,cAAc,SAyGvB,CAAC"}
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
import { Command } from 'commander';
|
|
2
|
+
import chalk from 'chalk';
|
|
3
|
+
import boxen from 'boxen';
|
|
4
|
+
import { createOpenRxivApiClient, getServerFromDOI } from '../api/api-client.js';
|
|
5
|
+
import { parseBiorxivURL } from 'biorxiv-utils';
|
|
6
|
+
import { getDefaultServer } from '../utils/index.js';
|
|
7
|
+
export const summaryCommand = new Command('summary')
|
|
8
|
+
.description('Get a summary of a bioRxiv preprint from a URL or DOI')
|
|
9
|
+
.argument('<url-or-doi>', 'bioRxiv URL or DOI to summarize')
|
|
10
|
+
.option('-m, --more', 'Show additional details and full abstract')
|
|
11
|
+
.option('-s, --server <server>', 'Specify server (biorxiv or medrxiv)', getDefaultServer())
|
|
12
|
+
.action(async (urlOrDoi, options) => {
|
|
13
|
+
var _a, _b;
|
|
14
|
+
try {
|
|
15
|
+
console.log(chalk.blue.bold(`🔬 ${(_a = options.server) !== null && _a !== void 0 ? _a : ''} Preprint Summary`));
|
|
16
|
+
console.log(chalk.blue('================================\n'));
|
|
17
|
+
// Parse the input (could be URL or DOI)
|
|
18
|
+
let doi;
|
|
19
|
+
let parsedUrl = null;
|
|
20
|
+
if (urlOrDoi.includes('biorxiv.org') ||
|
|
21
|
+
urlOrDoi.includes('medrxiv.org') ||
|
|
22
|
+
urlOrDoi.includes('doi.org')) {
|
|
23
|
+
// It's a URL
|
|
24
|
+
parsedUrl = parseBiorxivURL(urlOrDoi);
|
|
25
|
+
if (!parsedUrl) {
|
|
26
|
+
console.log(chalk.red('❌ Invalid bioRxiv URL'));
|
|
27
|
+
process.exit(1);
|
|
28
|
+
}
|
|
29
|
+
doi = parsedUrl.doi;
|
|
30
|
+
console.log(chalk.cyan(`📝 URL: ${urlOrDoi}`));
|
|
31
|
+
}
|
|
32
|
+
else {
|
|
33
|
+
// It's a DOI
|
|
34
|
+
doi = urlOrDoi;
|
|
35
|
+
console.log(chalk.cyan(`🔍 DOI: ${doi}`));
|
|
36
|
+
}
|
|
37
|
+
console.log('');
|
|
38
|
+
// Determine server if possible
|
|
39
|
+
let server = (_b = options.server) !== null && _b !== void 0 ? _b : getServerFromDOI(urlOrDoi);
|
|
40
|
+
console.log(chalk.blue(`🌐 Server: ${server}`));
|
|
41
|
+
console.log('');
|
|
42
|
+
// Create API client
|
|
43
|
+
const apiClient = createOpenRxivApiClient({
|
|
44
|
+
server,
|
|
45
|
+
format: 'json',
|
|
46
|
+
timeout: 15000,
|
|
47
|
+
});
|
|
48
|
+
// Show loading indicator
|
|
49
|
+
console.log(chalk.yellow('⏳ Fetching preprint information...'));
|
|
50
|
+
console.log('');
|
|
51
|
+
// Get content details
|
|
52
|
+
let contentDetail = await apiClient.getContentDetail(doi);
|
|
53
|
+
let fallbackServer = null;
|
|
54
|
+
// If not found on bioRxiv and we're not already on medrxiv, try medrxiv as fallback
|
|
55
|
+
if (!contentDetail && server === 'biorxiv') {
|
|
56
|
+
console.log(chalk.yellow('⚠️ Paper not found on bioRxiv, trying medRxiv...'));
|
|
57
|
+
fallbackServer = 'medrxiv';
|
|
58
|
+
const medrxivApiClient = createOpenRxivApiClient({
|
|
59
|
+
server: 'medrxiv',
|
|
60
|
+
format: 'json',
|
|
61
|
+
timeout: 15000,
|
|
62
|
+
});
|
|
63
|
+
contentDetail = await medrxivApiClient.getContentDetail(doi);
|
|
64
|
+
if (contentDetail) {
|
|
65
|
+
console.log(chalk.green('✅ Found paper on medRxiv!'));
|
|
66
|
+
server = 'medrxiv'; // Update server for display
|
|
67
|
+
contentDetail.server = 'medrxiv'; // Ensure the content detail has the correct server
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
if (!contentDetail) {
|
|
71
|
+
console.log(chalk.red('❌ No content found for this DOI on either bioRxiv or medRxiv'));
|
|
72
|
+
console.log(chalk.yellow("💡 This might be a new preprint that hasn't been indexed yet"));
|
|
73
|
+
process.exit(1);
|
|
74
|
+
}
|
|
75
|
+
// Get all versions
|
|
76
|
+
let allVersions = await apiClient.getAllVersions(doi);
|
|
77
|
+
// If we used fallback, get versions from the fallback server
|
|
78
|
+
if (fallbackServer && contentDetail) {
|
|
79
|
+
const fallbackApiClient = createOpenRxivApiClient({
|
|
80
|
+
server: fallbackServer,
|
|
81
|
+
format: 'json',
|
|
82
|
+
timeout: 15000,
|
|
83
|
+
});
|
|
84
|
+
allVersions = await fallbackApiClient.getAllVersions(doi);
|
|
85
|
+
}
|
|
86
|
+
// Display summary
|
|
87
|
+
const isVerbose = options.more === true;
|
|
88
|
+
displaySummary(contentDetail, allVersions, isVerbose);
|
|
89
|
+
}
|
|
90
|
+
catch (error) {
|
|
91
|
+
console.error(chalk.red('❌ Error:'), error instanceof Error ? error.message : 'Unknown error');
|
|
92
|
+
process.exit(1);
|
|
93
|
+
}
|
|
94
|
+
});
|
|
95
|
+
function displaySummary(contentDetail, allVersions, verbose = false) {
|
|
96
|
+
// Title in a prominent box
|
|
97
|
+
const titleBox = boxen(chalk.green.bold.underline(contentDetail.title), {
|
|
98
|
+
padding: 1,
|
|
99
|
+
margin: 1,
|
|
100
|
+
borderStyle: 'double',
|
|
101
|
+
borderColor: 'green',
|
|
102
|
+
backgroundColor: 'black',
|
|
103
|
+
textAlignment: 'left',
|
|
104
|
+
...(verbose ? {} : { width: 80 }),
|
|
105
|
+
});
|
|
106
|
+
console.log(titleBox);
|
|
107
|
+
// Basic info
|
|
108
|
+
const basicInfo = [
|
|
109
|
+
`${chalk.cyan('DOI:')} ${contentDetail.doi}`,
|
|
110
|
+
`${chalk.cyan('Server:')} ${contentDetail.server}`,
|
|
111
|
+
`${chalk.cyan('Category:')} ${chalk.yellow(contentDetail.category)}`,
|
|
112
|
+
`${chalk.cyan('License:')} ${chalk.yellow(contentDetail.license)}`,
|
|
113
|
+
`${chalk.cyan('Type:')} ${chalk.yellow(contentDetail.type)}`,
|
|
114
|
+
`${chalk.cyan('Published:')} ${contentDetail.published === 'NA'
|
|
115
|
+
? chalk.gray('Not published')
|
|
116
|
+
: chalk.green(contentDetail.published)}`,
|
|
117
|
+
`${chalk.cyan('Total versions:')} ${allVersions ? allVersions.length : 0}`,
|
|
118
|
+
...(verbose
|
|
119
|
+
? [
|
|
120
|
+
`${chalk.cyan('Date:')} ${contentDetail.date}`,
|
|
121
|
+
`${chalk.cyan('Version:')} ${contentDetail.version}`,
|
|
122
|
+
...(contentDetail.jatsxml
|
|
123
|
+
? [`${chalk.cyan('JATS XML:')} ${chalk.underline.blue(contentDetail.jatsxml)}`]
|
|
124
|
+
: []),
|
|
125
|
+
]
|
|
126
|
+
: []),
|
|
127
|
+
].join('\n');
|
|
128
|
+
const basicInfoBox = boxen(basicInfo, {
|
|
129
|
+
padding: 1,
|
|
130
|
+
margin: 1,
|
|
131
|
+
borderStyle: 'round',
|
|
132
|
+
borderColor: 'blue',
|
|
133
|
+
title: chalk.blue.bold('📋 Basic Information'),
|
|
134
|
+
titleAlignment: 'left',
|
|
135
|
+
textAlignment: 'left',
|
|
136
|
+
...(verbose ? {} : { width: 80 }),
|
|
137
|
+
});
|
|
138
|
+
console.log(basicInfoBox);
|
|
139
|
+
// Authors
|
|
140
|
+
const authorsInfo = [
|
|
141
|
+
`${chalk.cyan('Authors:')} ${contentDetail.authors}`,
|
|
142
|
+
...(contentDetail.author_corresponding
|
|
143
|
+
? [`${chalk.cyan('Corresponding:')} ${chalk.green(contentDetail.author_corresponding)}`]
|
|
144
|
+
: []),
|
|
145
|
+
...(contentDetail.author_corresponding_institution
|
|
146
|
+
? [
|
|
147
|
+
`${chalk.cyan('Institution:')} ${chalk.gray(contentDetail.author_corresponding_institution)}`,
|
|
148
|
+
]
|
|
149
|
+
: []),
|
|
150
|
+
].join('\n');
|
|
151
|
+
const authorsBox = boxen(authorsInfo, {
|
|
152
|
+
padding: 1,
|
|
153
|
+
margin: 1,
|
|
154
|
+
borderStyle: 'round',
|
|
155
|
+
borderColor: 'cyan',
|
|
156
|
+
title: chalk.cyan.bold('👥 Authors'),
|
|
157
|
+
titleAlignment: 'left',
|
|
158
|
+
textAlignment: 'left',
|
|
159
|
+
...(verbose ? {} : { width: 80 }),
|
|
160
|
+
});
|
|
161
|
+
console.log(authorsBox);
|
|
162
|
+
// Abstract
|
|
163
|
+
if (contentDetail.abstract) {
|
|
164
|
+
const abstractBox = boxen(contentDetail.abstract, {
|
|
165
|
+
padding: 1,
|
|
166
|
+
margin: 1,
|
|
167
|
+
borderStyle: 'round',
|
|
168
|
+
borderColor: 'yellow',
|
|
169
|
+
title: chalk.yellow.bold('📖 Abstract'),
|
|
170
|
+
titleAlignment: 'left',
|
|
171
|
+
textAlignment: 'left',
|
|
172
|
+
...(verbose ? {} : { width: 80 }),
|
|
173
|
+
});
|
|
174
|
+
console.log(abstractBox);
|
|
175
|
+
}
|
|
176
|
+
// Funding information
|
|
177
|
+
if (contentDetail.funding && contentDetail.funding.length > 0) {
|
|
178
|
+
const fundingInfo = contentDetail.funding
|
|
179
|
+
.map((fund, index) => {
|
|
180
|
+
let fundText = `${chalk.cyan(index + 1)}. ${chalk.green(fund.name)}`;
|
|
181
|
+
if (fund.id) {
|
|
182
|
+
fundText += `\n ${chalk.gray('ID:')} ${fund.id} (${fund['id-type']})`;
|
|
183
|
+
}
|
|
184
|
+
if (fund.award) {
|
|
185
|
+
fundText += `\n ${chalk.gray('Award:')} ${fund.award}`;
|
|
186
|
+
}
|
|
187
|
+
return fundText;
|
|
188
|
+
})
|
|
189
|
+
.join('\n\n');
|
|
190
|
+
const fundingBox = boxen(fundingInfo, {
|
|
191
|
+
padding: 1,
|
|
192
|
+
margin: 1,
|
|
193
|
+
borderStyle: 'round',
|
|
194
|
+
borderColor: 'magenta',
|
|
195
|
+
title: chalk.magenta.bold('💰 Funding'),
|
|
196
|
+
titleAlignment: 'left',
|
|
197
|
+
textAlignment: 'left',
|
|
198
|
+
...(verbose ? {} : { width: 80 }),
|
|
199
|
+
});
|
|
200
|
+
console.log(fundingBox);
|
|
201
|
+
}
|
|
202
|
+
// Versions - only show when --more is provided
|
|
203
|
+
if (verbose && allVersions && allVersions.length > 0) {
|
|
204
|
+
const versionsInfo = [
|
|
205
|
+
...allVersions.map((version, index) => {
|
|
206
|
+
let versionText = `${chalk.cyan(`v${version.version}`)} (${version.date})`;
|
|
207
|
+
versionText += `\n ${chalk.gray('Type:')} ${version.type}`;
|
|
208
|
+
versionText += `\n ${chalk.gray('Title:')} ${version.title.substring(0, 60)}...`;
|
|
209
|
+
if (version.jatsxml) {
|
|
210
|
+
versionText += `\n ${chalk.gray('JATS XML:')} ${chalk.underline.blue(version.jatsxml)}`;
|
|
211
|
+
}
|
|
212
|
+
return versionText;
|
|
213
|
+
}),
|
|
214
|
+
].join('\n\n');
|
|
215
|
+
const versionsBox = boxen(versionsInfo, {
|
|
216
|
+
padding: 1,
|
|
217
|
+
margin: 1,
|
|
218
|
+
borderStyle: 'round',
|
|
219
|
+
borderColor: 'green',
|
|
220
|
+
title: chalk.green.bold('🔄 Versions'),
|
|
221
|
+
titleAlignment: 'left',
|
|
222
|
+
textAlignment: 'left',
|
|
223
|
+
...(verbose ? {} : { width: 80 }),
|
|
224
|
+
});
|
|
225
|
+
console.log(versionsBox);
|
|
226
|
+
}
|
|
227
|
+
// Footer
|
|
228
|
+
const baseUrl = contentDetail.server === 'medrxiv'
|
|
229
|
+
? `https://www.medrxiv.org/content/${contentDetail.doi}`
|
|
230
|
+
: `https://www.biorxiv.org/content/${contentDetail.doi}`;
|
|
231
|
+
const footerInfo = [
|
|
232
|
+
`💡 View online: ${chalk.underline.blue(baseUrl)}`,
|
|
233
|
+
...(allVersions && allVersions.length > 1 && !verbose
|
|
234
|
+
? [
|
|
235
|
+
`📚 This preprint has ${allVersions.length} versions. Use --more to see additional details.`,
|
|
236
|
+
]
|
|
237
|
+
: []),
|
|
238
|
+
].join('\n');
|
|
239
|
+
const footerBox = boxen(footerInfo, {
|
|
240
|
+
padding: 1,
|
|
241
|
+
margin: 1,
|
|
242
|
+
borderStyle: 'round',
|
|
243
|
+
borderColor: 'gray',
|
|
244
|
+
backgroundColor: 'black',
|
|
245
|
+
textAlignment: 'left',
|
|
246
|
+
...(verbose ? {} : { width: 80 }),
|
|
247
|
+
});
|
|
248
|
+
console.log(footerBox);
|
|
249
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/index.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Command } from 'commander';
|
|
3
|
+
import { listCommand } from './commands/list.js';
|
|
4
|
+
import { downloadCommand } from './commands/download.js';
|
|
5
|
+
import { configCommand } from './commands/config.js';
|
|
6
|
+
import { summaryCommand } from './commands/summary.js';
|
|
7
|
+
import { monthInfoCommand } from './commands/month-info.js';
|
|
8
|
+
import { batchProcessCommand } from './commands/batch-process.js';
|
|
9
|
+
import { setGlobalRequesterPays } from './aws/config.js';
|
|
10
|
+
import version from './version.js';
|
|
11
|
+
import { getCliName } from './utils/index.js';
|
|
12
|
+
const cliName = getCliName();
|
|
13
|
+
const program = new Command();
|
|
14
|
+
program
|
|
15
|
+
.name(cliName)
|
|
16
|
+
.description(`CLI tool to download bioRxiv/medRxiv MECA files from AWS S3 for text and data mining`)
|
|
17
|
+
.version(`v${version}`, '-v, --version', `Print the current version of the ${cliName} CLI`);
|
|
18
|
+
// Add commands
|
|
19
|
+
program.addCommand(listCommand);
|
|
20
|
+
program.addCommand(downloadCommand);
|
|
21
|
+
program.addCommand(configCommand);
|
|
22
|
+
program.addCommand(summaryCommand);
|
|
23
|
+
program.addCommand(monthInfoCommand);
|
|
24
|
+
program.addCommand(batchProcessCommand);
|
|
25
|
+
// Global options
|
|
26
|
+
program.option('-d, --debug', 'Enable debug mode');
|
|
27
|
+
program.option('--requester-pays', 'Enable requester pays for local development (required when not on EC2 with IAM role)');
|
|
28
|
+
// Parse command line arguments
|
|
29
|
+
program.parse();
|
|
30
|
+
// Set global requester pays based on command line option
|
|
31
|
+
const options = program.opts();
|
|
32
|
+
if (options.requesterPays) {
|
|
33
|
+
setGlobalRequesterPays(true);
|
|
34
|
+
console.log('Requester pays enabled - you will be charged for S3 requests');
|
|
35
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse batch input to support ranges like "1-10" or "batch-1,batch-2"
|
|
3
|
+
*/
|
|
4
|
+
export declare function parseBatchInput(batchInput: string): string[];
|
|
5
|
+
/**
|
|
6
|
+
* Validate batch format
|
|
7
|
+
*/
|
|
8
|
+
export declare function validateBatchFormat(batch: string): boolean;
|
|
9
|
+
//# sourceMappingURL=batches.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"batches.d.ts","sourceRoot":"","sources":["../../../../src/utils/batches.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,wBAAgB,eAAe,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,EAAE,CAgE5D;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAG1D"}
|