openrxiv-cli 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/api-client.d.ts +96 -0
- package/dist/api/api-client.d.ts.map +1 -0
- package/dist/api/api-client.js +257 -0
- package/dist/aws/bucket-explorer.d.ts +26 -0
- package/dist/aws/bucket-explorer.d.ts.map +1 -0
- package/dist/aws/bucket-explorer.js +220 -0
- package/dist/aws/config.d.ts +5 -0
- package/dist/aws/config.d.ts.map +1 -0
- package/dist/aws/config.js +36 -0
- package/dist/aws/downloader.d.ts +13 -0
- package/dist/aws/downloader.d.ts.map +1 -0
- package/dist/aws/downloader.js +115 -0
- package/dist/aws/month-lister.d.ts +18 -0
- package/dist/aws/month-lister.d.ts.map +1 -0
- package/dist/aws/month-lister.js +90 -0
- package/dist/commands/batch-info.d.ts +3 -0
- package/dist/commands/batch-info.d.ts.map +1 -0
- package/dist/commands/batch-info.js +213 -0
- package/dist/commands/batch-process.d.ts +3 -0
- package/dist/commands/batch-process.d.ts.map +1 -0
- package/dist/commands/batch-process.js +557 -0
- package/dist/commands/download.d.ts +3 -0
- package/dist/commands/download.d.ts.map +1 -0
- package/dist/commands/download.js +76 -0
- package/dist/commands/index.d.ts +6 -0
- package/dist/commands/index.d.ts.map +1 -0
- package/dist/commands/index.js +5 -0
- package/dist/commands/list.d.ts +3 -0
- package/dist/commands/list.d.ts.map +1 -0
- package/dist/commands/list.js +18 -0
- package/dist/commands/summary.d.ts +3 -0
- package/dist/commands/summary.d.ts.map +1 -0
- package/dist/commands/summary.js +249 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +35 -0
- package/dist/utils/batches.d.ts +9 -0
- package/dist/utils/batches.d.ts.map +1 -0
- package/dist/utils/batches.js +61 -0
- package/dist/utils/batches.test.d.ts +2 -0
- package/dist/utils/batches.test.d.ts.map +1 -0
- package/dist/utils/batches.test.js +119 -0
- package/dist/utils/default-server.d.ts +3 -0
- package/dist/utils/default-server.d.ts.map +1 -0
- package/dist/utils/default-server.js +20 -0
- package/dist/utils/index.d.ts +5 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +5 -0
- package/dist/utils/meca-processor.d.ts +28 -0
- package/dist/utils/meca-processor.d.ts.map +1 -0
- package/dist/utils/meca-processor.js +503 -0
- package/dist/utils/meca-processor.test.d.ts +2 -0
- package/dist/utils/meca-processor.test.d.ts.map +1 -0
- package/dist/utils/meca-processor.test.js +123 -0
- package/dist/utils/months.d.ts +36 -0
- package/dist/utils/months.d.ts.map +1 -0
- package/dist/utils/months.js +135 -0
- package/dist/utils/months.test.d.ts +2 -0
- package/dist/utils/months.test.d.ts.map +1 -0
- package/dist/utils/months.test.js +209 -0
- package/dist/utils/requester-pays-error.d.ts +6 -0
- package/dist/utils/requester-pays-error.d.ts.map +1 -0
- package/dist/utils/requester-pays-error.js +20 -0
- package/dist/version.d.ts +3 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +2 -0
- package/package.json +67 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { Command } from 'commander';
|
|
2
|
+
import { listBucketContent } from '../aws/bucket-explorer.js';
|
|
3
|
+
import { getDefaultServer } from '../utils/index.js';
|
|
4
|
+
export const listCommand = new Command('list')
|
|
5
|
+
.description('List available content in the bioRxiv or medRxiv S3 bucket')
|
|
6
|
+
.option('-m, --month <month>', 'Filter by specific month (e.g., "2024-01")')
|
|
7
|
+
.option('-b, --batch <batch>', 'Filter by specific batch (e.g., "Batch_01")')
|
|
8
|
+
.option('-l, --limit <number>', 'Limit the number of results', '50')
|
|
9
|
+
.option('-s, --server <server>', 'Server to use: "biorxiv" or "medrxiv"', getDefaultServer())
|
|
10
|
+
.action(async (options) => {
|
|
11
|
+
try {
|
|
12
|
+
await listBucketContent(options);
|
|
13
|
+
}
|
|
14
|
+
catch (error) {
|
|
15
|
+
console.error('Error listing content:', error);
|
|
16
|
+
process.exit(1);
|
|
17
|
+
}
|
|
18
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"summary.d.ts","sourceRoot":"","sources":["../../src/commands/summary.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAOpC,eAAO,MAAM,cAAc,SAyGvB,CAAC"}
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
import { Command } from 'commander';
|
|
2
|
+
import chalk from 'chalk';
|
|
3
|
+
import boxen from 'boxen';
|
|
4
|
+
import { createOpenRxivApiClient, getServerFromDOI } from '../api/api-client.js';
|
|
5
|
+
import { parseBiorxivURL } from 'openrxiv-utils';
|
|
6
|
+
import { getDefaultServer } from '../utils/index.js';
|
|
7
|
+
export const summaryCommand = new Command('summary')
|
|
8
|
+
.description('Get a summary of a bioRxiv preprint from a URL or DOI')
|
|
9
|
+
.argument('<url-or-doi>', 'bioRxiv URL or DOI to summarize')
|
|
10
|
+
.option('-m, --more', 'Show additional details and full abstract')
|
|
11
|
+
.option('-s, --server <server>', 'Specify server (biorxiv or medrxiv)', getDefaultServer())
|
|
12
|
+
.action(async (urlOrDoi, options) => {
|
|
13
|
+
var _a, _b;
|
|
14
|
+
try {
|
|
15
|
+
console.log(chalk.blue.bold(`🔬 ${(_a = options.server) !== null && _a !== void 0 ? _a : ''} Preprint Summary`));
|
|
16
|
+
console.log(chalk.blue('================================\n'));
|
|
17
|
+
// Parse the input (could be URL or DOI)
|
|
18
|
+
let doi;
|
|
19
|
+
let parsedUrl = null;
|
|
20
|
+
if (urlOrDoi.includes('biorxiv.org') ||
|
|
21
|
+
urlOrDoi.includes('medrxiv.org') ||
|
|
22
|
+
urlOrDoi.includes('doi.org')) {
|
|
23
|
+
// It's a URL
|
|
24
|
+
parsedUrl = parseBiorxivURL(urlOrDoi);
|
|
25
|
+
if (!parsedUrl) {
|
|
26
|
+
console.log(chalk.red('❌ Invalid bioRxiv URL'));
|
|
27
|
+
process.exit(1);
|
|
28
|
+
}
|
|
29
|
+
doi = parsedUrl.doi;
|
|
30
|
+
console.log(chalk.cyan(`📝 URL: ${urlOrDoi}`));
|
|
31
|
+
}
|
|
32
|
+
else {
|
|
33
|
+
// It's a DOI
|
|
34
|
+
doi = urlOrDoi;
|
|
35
|
+
console.log(chalk.cyan(`🔍 DOI: ${doi}`));
|
|
36
|
+
}
|
|
37
|
+
console.log('');
|
|
38
|
+
// Determine server if possible
|
|
39
|
+
let server = (_b = options.server) !== null && _b !== void 0 ? _b : getServerFromDOI(urlOrDoi);
|
|
40
|
+
console.log(chalk.blue(`🌐 Server: ${server}`));
|
|
41
|
+
console.log('');
|
|
42
|
+
// Create API client
|
|
43
|
+
const apiClient = createOpenRxivApiClient({
|
|
44
|
+
server,
|
|
45
|
+
format: 'json',
|
|
46
|
+
timeout: 15000,
|
|
47
|
+
});
|
|
48
|
+
// Show loading indicator
|
|
49
|
+
console.log(chalk.yellow('⏳ Fetching preprint information...'));
|
|
50
|
+
console.log('');
|
|
51
|
+
// Get content details
|
|
52
|
+
let contentDetail = await apiClient.getContentDetail(doi);
|
|
53
|
+
let fallbackServer = null;
|
|
54
|
+
// If not found on bioRxiv and we're not already on medrxiv, try medrxiv as fallback
|
|
55
|
+
if (!contentDetail && server === 'biorxiv') {
|
|
56
|
+
console.log(chalk.yellow('⚠️ Paper not found on bioRxiv, trying medRxiv...'));
|
|
57
|
+
fallbackServer = 'medrxiv';
|
|
58
|
+
const medrxivApiClient = createOpenRxivApiClient({
|
|
59
|
+
server: 'medrxiv',
|
|
60
|
+
format: 'json',
|
|
61
|
+
timeout: 15000,
|
|
62
|
+
});
|
|
63
|
+
contentDetail = await medrxivApiClient.getContentDetail(doi);
|
|
64
|
+
if (contentDetail) {
|
|
65
|
+
console.log(chalk.green('✅ Found paper on medRxiv!'));
|
|
66
|
+
server = 'medrxiv'; // Update server for display
|
|
67
|
+
contentDetail.server = 'medrxiv'; // Ensure the content detail has the correct server
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
if (!contentDetail) {
|
|
71
|
+
console.log(chalk.red('❌ No content found for this DOI on either bioRxiv or medRxiv'));
|
|
72
|
+
console.log(chalk.yellow("💡 This might be a new preprint that hasn't been indexed yet"));
|
|
73
|
+
process.exit(1);
|
|
74
|
+
}
|
|
75
|
+
// Get all versions
|
|
76
|
+
let allVersions = await apiClient.getAllVersions(doi);
|
|
77
|
+
// If we used fallback, get versions from the fallback server
|
|
78
|
+
if (fallbackServer && contentDetail) {
|
|
79
|
+
const fallbackApiClient = createOpenRxivApiClient({
|
|
80
|
+
server: fallbackServer,
|
|
81
|
+
format: 'json',
|
|
82
|
+
timeout: 15000,
|
|
83
|
+
});
|
|
84
|
+
allVersions = await fallbackApiClient.getAllVersions(doi);
|
|
85
|
+
}
|
|
86
|
+
// Display summary
|
|
87
|
+
const isVerbose = options.more === true;
|
|
88
|
+
displaySummary(contentDetail, allVersions, isVerbose);
|
|
89
|
+
}
|
|
90
|
+
catch (error) {
|
|
91
|
+
console.error(chalk.red('❌ Error:'), error instanceof Error ? error.message : 'Unknown error');
|
|
92
|
+
process.exit(1);
|
|
93
|
+
}
|
|
94
|
+
});
|
|
95
|
+
function displaySummary(contentDetail, allVersions, verbose = false) {
|
|
96
|
+
// Title in a prominent box
|
|
97
|
+
const titleBox = boxen(chalk.green.bold.underline(contentDetail.title), {
|
|
98
|
+
padding: 1,
|
|
99
|
+
margin: 1,
|
|
100
|
+
borderStyle: 'double',
|
|
101
|
+
borderColor: 'green',
|
|
102
|
+
backgroundColor: 'black',
|
|
103
|
+
textAlignment: 'left',
|
|
104
|
+
...(verbose ? {} : { width: 80 }),
|
|
105
|
+
});
|
|
106
|
+
console.log(titleBox);
|
|
107
|
+
// Basic info
|
|
108
|
+
const basicInfo = [
|
|
109
|
+
`${chalk.cyan('DOI:')} ${contentDetail.doi}`,
|
|
110
|
+
`${chalk.cyan('Server:')} ${contentDetail.server}`,
|
|
111
|
+
`${chalk.cyan('Category:')} ${chalk.yellow(contentDetail.category)}`,
|
|
112
|
+
`${chalk.cyan('License:')} ${chalk.yellow(contentDetail.license)}`,
|
|
113
|
+
`${chalk.cyan('Type:')} ${chalk.yellow(contentDetail.type)}`,
|
|
114
|
+
`${chalk.cyan('Published:')} ${contentDetail.published === 'NA'
|
|
115
|
+
? chalk.gray('Not published')
|
|
116
|
+
: chalk.green(contentDetail.published)}`,
|
|
117
|
+
`${chalk.cyan('Total versions:')} ${allVersions ? allVersions.length : 0}`,
|
|
118
|
+
...(verbose
|
|
119
|
+
? [
|
|
120
|
+
`${chalk.cyan('Date:')} ${contentDetail.date}`,
|
|
121
|
+
`${chalk.cyan('Version:')} ${contentDetail.version}`,
|
|
122
|
+
...(contentDetail.jatsxml
|
|
123
|
+
? [`${chalk.cyan('JATS XML:')} ${chalk.underline.blue(contentDetail.jatsxml)}`]
|
|
124
|
+
: []),
|
|
125
|
+
]
|
|
126
|
+
: []),
|
|
127
|
+
].join('\n');
|
|
128
|
+
const basicInfoBox = boxen(basicInfo, {
|
|
129
|
+
padding: 1,
|
|
130
|
+
margin: 1,
|
|
131
|
+
borderStyle: 'round',
|
|
132
|
+
borderColor: 'blue',
|
|
133
|
+
title: chalk.blue.bold('📋 Basic Information'),
|
|
134
|
+
titleAlignment: 'left',
|
|
135
|
+
textAlignment: 'left',
|
|
136
|
+
...(verbose ? {} : { width: 80 }),
|
|
137
|
+
});
|
|
138
|
+
console.log(basicInfoBox);
|
|
139
|
+
// Authors
|
|
140
|
+
const authorsInfo = [
|
|
141
|
+
`${chalk.cyan('Authors:')} ${contentDetail.authors}`,
|
|
142
|
+
...(contentDetail.author_corresponding
|
|
143
|
+
? [`${chalk.cyan('Corresponding:')} ${chalk.green(contentDetail.author_corresponding)}`]
|
|
144
|
+
: []),
|
|
145
|
+
...(contentDetail.author_corresponding_institution
|
|
146
|
+
? [
|
|
147
|
+
`${chalk.cyan('Institution:')} ${chalk.gray(contentDetail.author_corresponding_institution)}`,
|
|
148
|
+
]
|
|
149
|
+
: []),
|
|
150
|
+
].join('\n');
|
|
151
|
+
const authorsBox = boxen(authorsInfo, {
|
|
152
|
+
padding: 1,
|
|
153
|
+
margin: 1,
|
|
154
|
+
borderStyle: 'round',
|
|
155
|
+
borderColor: 'cyan',
|
|
156
|
+
title: chalk.cyan.bold('👥 Authors'),
|
|
157
|
+
titleAlignment: 'left',
|
|
158
|
+
textAlignment: 'left',
|
|
159
|
+
...(verbose ? {} : { width: 80 }),
|
|
160
|
+
});
|
|
161
|
+
console.log(authorsBox);
|
|
162
|
+
// Abstract
|
|
163
|
+
if (contentDetail.abstract) {
|
|
164
|
+
const abstractBox = boxen(contentDetail.abstract, {
|
|
165
|
+
padding: 1,
|
|
166
|
+
margin: 1,
|
|
167
|
+
borderStyle: 'round',
|
|
168
|
+
borderColor: 'yellow',
|
|
169
|
+
title: chalk.yellow.bold('📖 Abstract'),
|
|
170
|
+
titleAlignment: 'left',
|
|
171
|
+
textAlignment: 'left',
|
|
172
|
+
...(verbose ? {} : { width: 80 }),
|
|
173
|
+
});
|
|
174
|
+
console.log(abstractBox);
|
|
175
|
+
}
|
|
176
|
+
// Funding information
|
|
177
|
+
if (contentDetail.funding && contentDetail.funding.length > 0) {
|
|
178
|
+
const fundingInfo = contentDetail.funding
|
|
179
|
+
.map((fund, index) => {
|
|
180
|
+
let fundText = `${chalk.cyan(index + 1)}. ${chalk.green(fund.name)}`;
|
|
181
|
+
if (fund.id) {
|
|
182
|
+
fundText += `\n ${chalk.gray('ID:')} ${fund.id} (${fund['id-type']})`;
|
|
183
|
+
}
|
|
184
|
+
if (fund.award) {
|
|
185
|
+
fundText += `\n ${chalk.gray('Award:')} ${fund.award}`;
|
|
186
|
+
}
|
|
187
|
+
return fundText;
|
|
188
|
+
})
|
|
189
|
+
.join('\n\n');
|
|
190
|
+
const fundingBox = boxen(fundingInfo, {
|
|
191
|
+
padding: 1,
|
|
192
|
+
margin: 1,
|
|
193
|
+
borderStyle: 'round',
|
|
194
|
+
borderColor: 'magenta',
|
|
195
|
+
title: chalk.magenta.bold('💰 Funding'),
|
|
196
|
+
titleAlignment: 'left',
|
|
197
|
+
textAlignment: 'left',
|
|
198
|
+
...(verbose ? {} : { width: 80 }),
|
|
199
|
+
});
|
|
200
|
+
console.log(fundingBox);
|
|
201
|
+
}
|
|
202
|
+
// Versions - only show when --more is provided
|
|
203
|
+
if (verbose && allVersions && allVersions.length > 0) {
|
|
204
|
+
const versionsInfo = [
|
|
205
|
+
...allVersions.map((version, index) => {
|
|
206
|
+
let versionText = `${chalk.cyan(`v${version.version}`)} (${version.date})`;
|
|
207
|
+
versionText += `\n ${chalk.gray('Type:')} ${version.type}`;
|
|
208
|
+
versionText += `\n ${chalk.gray('Title:')} ${version.title.substring(0, 60)}...`;
|
|
209
|
+
if (version.jatsxml) {
|
|
210
|
+
versionText += `\n ${chalk.gray('JATS XML:')} ${chalk.underline.blue(version.jatsxml)}`;
|
|
211
|
+
}
|
|
212
|
+
return versionText;
|
|
213
|
+
}),
|
|
214
|
+
].join('\n\n');
|
|
215
|
+
const versionsBox = boxen(versionsInfo, {
|
|
216
|
+
padding: 1,
|
|
217
|
+
margin: 1,
|
|
218
|
+
borderStyle: 'round',
|
|
219
|
+
borderColor: 'green',
|
|
220
|
+
title: chalk.green.bold('🔄 Versions'),
|
|
221
|
+
titleAlignment: 'left',
|
|
222
|
+
textAlignment: 'left',
|
|
223
|
+
...(verbose ? {} : { width: 80 }),
|
|
224
|
+
});
|
|
225
|
+
console.log(versionsBox);
|
|
226
|
+
}
|
|
227
|
+
// Footer
|
|
228
|
+
const baseUrl = contentDetail.server === 'medrxiv'
|
|
229
|
+
? `https://www.medrxiv.org/content/${contentDetail.doi}`
|
|
230
|
+
: `https://www.biorxiv.org/content/${contentDetail.doi}`;
|
|
231
|
+
const footerInfo = [
|
|
232
|
+
`💡 View online: ${chalk.underline.blue(baseUrl)}`,
|
|
233
|
+
...(allVersions && allVersions.length > 1 && !verbose
|
|
234
|
+
? [
|
|
235
|
+
`📚 This preprint has ${allVersions.length} versions. Use --more to see additional details.`,
|
|
236
|
+
]
|
|
237
|
+
: []),
|
|
238
|
+
].join('\n');
|
|
239
|
+
const footerBox = boxen(footerInfo, {
|
|
240
|
+
padding: 1,
|
|
241
|
+
margin: 1,
|
|
242
|
+
borderStyle: 'round',
|
|
243
|
+
borderColor: 'gray',
|
|
244
|
+
backgroundColor: 'black',
|
|
245
|
+
textAlignment: 'left',
|
|
246
|
+
...(verbose ? {} : { width: 80 }),
|
|
247
|
+
});
|
|
248
|
+
console.log(footerBox);
|
|
249
|
+
}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { Command } from 'commander';
|
|
2
|
+
export * from './commands/index.js';
|
|
3
|
+
export { default as version } from './version.js';
|
|
4
|
+
export { getCliName } from './utils/index.js';
|
|
5
|
+
export { setGlobalRequesterPays } from './aws/config.js';
|
|
6
|
+
export declare function createCLI(): Command;
|
|
7
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAYpC,cAAc,qBAAqB,CAAC;AACpC,OAAO,EAAE,OAAO,IAAI,OAAO,EAAE,MAAM,cAAc,CAAC;AAClD,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAC9C,OAAO,EAAE,sBAAsB,EAAE,MAAM,iBAAiB,CAAC;AAEzD,wBAAgB,SAAS,IAAI,OAAO,CAqCnC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { Command } from 'commander';
|
|
2
|
+
import { listCommand, downloadCommand, summaryCommand, monthInfoCommand, batchProcessCommand, } from './commands/index.js';
|
|
3
|
+
import { getCliName } from './utils/index.js';
|
|
4
|
+
import { setGlobalRequesterPays } from './aws/config.js';
|
|
5
|
+
import version from './version.js';
|
|
6
|
+
export * from './commands/index.js';
|
|
7
|
+
export { default as version } from './version.js';
|
|
8
|
+
export { getCliName } from './utils/index.js';
|
|
9
|
+
export { setGlobalRequesterPays } from './aws/config.js';
|
|
10
|
+
export function createCLI() {
|
|
11
|
+
const cliName = getCliName();
|
|
12
|
+
const program = new Command();
|
|
13
|
+
program
|
|
14
|
+
.name(cliName)
|
|
15
|
+
.description(`CLI tool to download bioRxiv/medRxiv MECA files from AWS S3 for text and data mining`)
|
|
16
|
+
.version(`v${version}`, '-v, --version', `Print the current version of the ${cliName} CLI`);
|
|
17
|
+
// Add commands
|
|
18
|
+
program.addCommand(listCommand);
|
|
19
|
+
program.addCommand(downloadCommand);
|
|
20
|
+
program.addCommand(summaryCommand);
|
|
21
|
+
program.addCommand(monthInfoCommand);
|
|
22
|
+
program.addCommand(batchProcessCommand);
|
|
23
|
+
// Global options
|
|
24
|
+
program.option('-d, --debug', 'Enable debug mode');
|
|
25
|
+
program.option('--requester-pays', 'Enable requester pays for local development (required when not on EC2 with IAM role)');
|
|
26
|
+
// Parse command line arguments
|
|
27
|
+
program.parse();
|
|
28
|
+
// Set global requester pays based on command line option
|
|
29
|
+
const options = program.opts();
|
|
30
|
+
if (options.requesterPays) {
|
|
31
|
+
setGlobalRequesterPays(true);
|
|
32
|
+
console.log('Requester pays enabled - you will be charged for S3 requests');
|
|
33
|
+
}
|
|
34
|
+
return program;
|
|
35
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse batch input to support ranges like "1-10" or "batch-1,batch-2"
|
|
3
|
+
*/
|
|
4
|
+
export declare function parseBatchInput(batchInput: string): string[];
|
|
5
|
+
/**
|
|
6
|
+
* Validate batch format
|
|
7
|
+
*/
|
|
8
|
+
export declare function validateBatchFormat(batch: string): boolean;
|
|
9
|
+
//# sourceMappingURL=batches.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"batches.d.ts","sourceRoot":"","sources":["../../src/utils/batches.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,wBAAgB,eAAe,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,EAAE,CAgE5D;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAG1D"}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse batch input to support ranges like "1-10" or "batch-1,batch-2"
|
|
3
|
+
*/
|
|
4
|
+
export function parseBatchInput(batchInput) {
|
|
5
|
+
// Check if it's a comma-separated list first
|
|
6
|
+
if (batchInput.includes(',')) {
|
|
7
|
+
const parts = batchInput
|
|
8
|
+
.split(',')
|
|
9
|
+
.map((b) => b.trim())
|
|
10
|
+
.filter((b) => b.length > 0);
|
|
11
|
+
const allBatches = [];
|
|
12
|
+
for (const part of parts) {
|
|
13
|
+
// Check if this part is a range
|
|
14
|
+
const rangeMatch = part.match(/^(\d+)-(\d+)$/);
|
|
15
|
+
if (rangeMatch) {
|
|
16
|
+
const start = parseInt(rangeMatch[1], 10);
|
|
17
|
+
const end = parseInt(rangeMatch[2], 10);
|
|
18
|
+
if (start > end) {
|
|
19
|
+
throw new Error(`Invalid batch range: start (${start}) cannot be greater than end (${end})`);
|
|
20
|
+
}
|
|
21
|
+
if (end - start >= 100) {
|
|
22
|
+
throw new Error(`Batch range too large: ${end - start + 1} batches. Maximum allowed: 100`);
|
|
23
|
+
}
|
|
24
|
+
for (let i = start; i <= end; i++) {
|
|
25
|
+
allBatches.push(i.toString());
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
// Single batch
|
|
30
|
+
allBatches.push(part);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return allBatches;
|
|
34
|
+
}
|
|
35
|
+
// Check if it's a single range (e.g., "1-10")
|
|
36
|
+
const rangeMatch = batchInput.match(/^(\d+)-(\d+)$/);
|
|
37
|
+
if (rangeMatch) {
|
|
38
|
+
const start = parseInt(rangeMatch[1], 10);
|
|
39
|
+
const end = parseInt(rangeMatch[2], 10);
|
|
40
|
+
if (start > end) {
|
|
41
|
+
throw new Error(`Invalid batch range: start (${start}) cannot be greater than end (${end})`);
|
|
42
|
+
}
|
|
43
|
+
if (end - start >= 100) {
|
|
44
|
+
throw new Error(`Batch range too large: ${end - start + 1} batches. Maximum allowed: 100`);
|
|
45
|
+
}
|
|
46
|
+
const batches = [];
|
|
47
|
+
for (let i = start; i <= end; i++) {
|
|
48
|
+
batches.push(i.toString());
|
|
49
|
+
}
|
|
50
|
+
return batches;
|
|
51
|
+
}
|
|
52
|
+
// Single batch
|
|
53
|
+
return [batchInput];
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Validate batch format
|
|
57
|
+
*/
|
|
58
|
+
export function validateBatchFormat(batch) {
|
|
59
|
+
// Allow numeric batches (1, 2, 3) or named batches (batch-1, Batch_01, etc.)
|
|
60
|
+
return /^[\w\-_]+$/.test(batch);
|
|
61
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"batches.test.d.ts","sourceRoot":"","sources":["../../src/utils/batches.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { parseBatchInput, validateBatchFormat } from './batches.js';
|
|
3
|
+
describe('Batch Utilities', () => {
|
|
4
|
+
describe('parseBatchInput', () => {
|
|
5
|
+
describe('single batches', () => {
|
|
6
|
+
it('should parse single numeric batch', () => {
|
|
7
|
+
expect(parseBatchInput('1')).toEqual(['1']);
|
|
8
|
+
expect(parseBatchInput('42')).toEqual(['42']);
|
|
9
|
+
});
|
|
10
|
+
it('should parse single named batch', () => {
|
|
11
|
+
expect(parseBatchInput('batch-1')).toEqual(['batch-1']);
|
|
12
|
+
expect(parseBatchInput('Batch_01')).toEqual(['Batch_01']);
|
|
13
|
+
expect(parseBatchInput('historical-content')).toEqual(['historical-content']);
|
|
14
|
+
});
|
|
15
|
+
});
|
|
16
|
+
describe('numeric ranges', () => {
|
|
17
|
+
it('should parse simple ranges', () => {
|
|
18
|
+
expect(parseBatchInput('1-3')).toEqual(['1', '2', '3']);
|
|
19
|
+
expect(parseBatchInput('5-10')).toEqual(['5', '6', '7', '8', '9', '10']);
|
|
20
|
+
expect(parseBatchInput('1-1')).toEqual(['1']);
|
|
21
|
+
});
|
|
22
|
+
it('should handle large ranges', () => {
|
|
23
|
+
const result = parseBatchInput('1-100');
|
|
24
|
+
expect(result).toHaveLength(100);
|
|
25
|
+
expect(result[0]).toBe('1');
|
|
26
|
+
expect(result[99]).toBe('100');
|
|
27
|
+
});
|
|
28
|
+
it('should reject invalid ranges', () => {
|
|
29
|
+
expect(() => parseBatchInput('10-5')).toThrow('Invalid batch range: start (10) cannot be greater than end (5)');
|
|
30
|
+
expect(() => parseBatchInput('5-5')).not.toThrow(); // Valid single-item range
|
|
31
|
+
});
|
|
32
|
+
it('should reject ranges that are too large', () => {
|
|
33
|
+
expect(() => parseBatchInput('1-102')).toThrow('Batch range too large: 102 batches. Maximum allowed: 100');
|
|
34
|
+
expect(() => parseBatchInput('1-101')).toThrow('Batch range too large: 101 batches. Maximum allowed: 100');
|
|
35
|
+
expect(() => parseBatchInput('1-100')).not.toThrow(); // Valid maximum range
|
|
36
|
+
});
|
|
37
|
+
});
|
|
38
|
+
describe('comma-separated lists', () => {
|
|
39
|
+
it('should parse simple comma-separated lists', () => {
|
|
40
|
+
expect(parseBatchInput('1,2,3')).toEqual(['1', '2', '3']);
|
|
41
|
+
expect(parseBatchInput('batch-1,batch-2,batch-3')).toEqual([
|
|
42
|
+
'batch-1',
|
|
43
|
+
'batch-2',
|
|
44
|
+
'batch-3',
|
|
45
|
+
]);
|
|
46
|
+
});
|
|
47
|
+
it('should handle whitespace in comma-separated lists', () => {
|
|
48
|
+
expect(parseBatchInput('1, 2, 3')).toEqual(['1', '2', '3']);
|
|
49
|
+
expect(parseBatchInput(' 1 , 2 , 3 ')).toEqual(['1', '2', '3']);
|
|
50
|
+
});
|
|
51
|
+
it('should filter out empty entries', () => {
|
|
52
|
+
expect(parseBatchInput('1,,2,3')).toEqual(['1', '2', '3']);
|
|
53
|
+
expect(parseBatchInput('1, ,2,3')).toEqual(['1', '2', '3']);
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
describe('mixed formats', () => {
|
|
57
|
+
it('should handle ranges within comma-separated lists', () => {
|
|
58
|
+
expect(parseBatchInput('1-3,5,7-9')).toEqual(['1', '2', '3', '5', '7', '8', '9']);
|
|
59
|
+
expect(parseBatchInput('1-5,10,15-17')).toEqual([
|
|
60
|
+
'1',
|
|
61
|
+
'2',
|
|
62
|
+
'3',
|
|
63
|
+
'4',
|
|
64
|
+
'5',
|
|
65
|
+
'10',
|
|
66
|
+
'15',
|
|
67
|
+
'16',
|
|
68
|
+
'17',
|
|
69
|
+
]);
|
|
70
|
+
});
|
|
71
|
+
it('should handle complex mixed formats', () => {
|
|
72
|
+
expect(parseBatchInput('1-3,batch-1,5-7,historical')).toEqual([
|
|
73
|
+
'1',
|
|
74
|
+
'2',
|
|
75
|
+
'3',
|
|
76
|
+
'batch-1',
|
|
77
|
+
'5',
|
|
78
|
+
'6',
|
|
79
|
+
'7',
|
|
80
|
+
'historical',
|
|
81
|
+
]);
|
|
82
|
+
});
|
|
83
|
+
});
|
|
84
|
+
describe('edge cases', () => {
|
|
85
|
+
it('should handle empty string', () => {
|
|
86
|
+
expect(parseBatchInput('')).toEqual(['']);
|
|
87
|
+
});
|
|
88
|
+
it('should handle single comma', () => {
|
|
89
|
+
expect(parseBatchInput(',')).toEqual([]);
|
|
90
|
+
});
|
|
91
|
+
it('should handle multiple commas', () => {
|
|
92
|
+
expect(parseBatchInput(',,')).toEqual([]);
|
|
93
|
+
expect(parseBatchInput('1,,2')).toEqual(['1', '2']);
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
});
|
|
97
|
+
describe('validateBatchFormat', () => {
|
|
98
|
+
it('should accept valid batch names', () => {
|
|
99
|
+
expect(validateBatchFormat('1')).toBe(true);
|
|
100
|
+
expect(validateBatchFormat('42')).toBe(true);
|
|
101
|
+
expect(validateBatchFormat('batch-1')).toBe(true);
|
|
102
|
+
expect(validateBatchFormat('Batch_01')).toBe(true);
|
|
103
|
+
expect(validateBatchFormat('historical-content')).toBe(true);
|
|
104
|
+
expect(validateBatchFormat('content_2023')).toBe(true);
|
|
105
|
+
});
|
|
106
|
+
it('should reject invalid batch names', () => {
|
|
107
|
+
expect(validateBatchFormat('')).toBe(false);
|
|
108
|
+
expect(validateBatchFormat('batch 1')).toBe(false); // space not allowed
|
|
109
|
+
expect(validateBatchFormat('batch.1')).toBe(false); // dot not allowed
|
|
110
|
+
expect(validateBatchFormat('batch/1')).toBe(false); // slash not allowed
|
|
111
|
+
expect(validateBatchFormat('batch@1')).toBe(false); // @ not allowed
|
|
112
|
+
});
|
|
113
|
+
it('should handle special characters correctly', () => {
|
|
114
|
+
expect(validateBatchFormat('batch-1')).toBe(true); // hyphen allowed
|
|
115
|
+
expect(validateBatchFormat('batch_1')).toBe(true); // underscore allowed
|
|
116
|
+
expect(validateBatchFormat('Batch01')).toBe(true); // alphanumeric allowed
|
|
117
|
+
});
|
|
118
|
+
});
|
|
119
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"default-server.d.ts","sourceRoot":"","sources":["../../src/utils/default-server.ts"],"names":[],"mappings":"AAEA,wBAAgB,UAAU,IAAI,UAAU,GAAG,SAAS,GAAG,SAAS,CAY/D;AAED,wBAAgB,gBAAgB,IAAI,SAAS,GAAG,SAAS,CAOxD"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
export function getCliName() {
|
|
3
|
+
// process.argv[1] contains the script path, which includes the alias
|
|
4
|
+
const scriptPath = process.argv[1];
|
|
5
|
+
const commandName = path.basename(scriptPath);
|
|
6
|
+
if (commandName.toLowerCase().includes('biorxiv')) {
|
|
7
|
+
return 'biorxiv';
|
|
8
|
+
}
|
|
9
|
+
if (commandName.toLowerCase().includes('medrxiv')) {
|
|
10
|
+
return 'medrxiv';
|
|
11
|
+
}
|
|
12
|
+
return 'openrxiv';
|
|
13
|
+
}
|
|
14
|
+
export function getDefaultServer() {
|
|
15
|
+
const cliName = getCliName();
|
|
16
|
+
if (cliName.toLowerCase().includes('medrxiv')) {
|
|
17
|
+
return 'medrxiv';
|
|
18
|
+
}
|
|
19
|
+
return 'biorxiv';
|
|
20
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AACA,cAAc,aAAa,CAAC;AAC5B,cAAc,qBAAqB,CAAC;AACpC,cAAc,cAAc,CAAC;AAC7B,cAAc,qBAAqB,CAAC"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export interface ProcessMecaOptions {
|
|
2
|
+
batch: string;
|
|
3
|
+
server: 'biorxiv' | 'medrxiv';
|
|
4
|
+
apiUrl: string;
|
|
5
|
+
output?: string;
|
|
6
|
+
s3Key: string;
|
|
7
|
+
apiKey?: string;
|
|
8
|
+
selective?: boolean;
|
|
9
|
+
}
|
|
10
|
+
export interface ProcessMecaResult {
|
|
11
|
+
success: boolean;
|
|
12
|
+
paper?: any;
|
|
13
|
+
error?: string;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Process a MECA file and extract metadata
|
|
17
|
+
* @param mecaPath Path to the MECA file (local file path)
|
|
18
|
+
* @param options Processing options
|
|
19
|
+
* @returns ProcessMecaResult with success status and extracted paper data
|
|
20
|
+
*/
|
|
21
|
+
export declare function processMecaFile(mecaPath: string, options: ProcessMecaOptions): Promise<ProcessMecaResult>;
|
|
22
|
+
/**
|
|
23
|
+
* Preprocess XML content to fix common HTML entities that cause parsing errors
|
|
24
|
+
* @param xmlContent Raw XML content
|
|
25
|
+
* @returns Preprocessed XML content with entities replaced
|
|
26
|
+
*/
|
|
27
|
+
export declare function preprocessXMLContent(xmlContent: string): string;
|
|
28
|
+
//# sourceMappingURL=meca-processor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"meca-processor.d.ts","sourceRoot":"","sources":["../../src/utils/meca-processor.ts"],"names":[],"mappings":"AAqCA,MAAM,WAAW,kBAAkB;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,SAAS,GAAG,SAAS,CAAC;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,CAAC,EAAE,GAAG,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;;;;GAKG;AACH,wBAAsB,eAAe,CACnC,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,kBAAkB,GAC1B,OAAO,CAAC,iBAAiB,CAAC,CAkE5B;AAwQD;;;;GAIG;AACH,wBAAgB,oBAAoB,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAwD/D"}
|