openrxiv-cli 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/dist/api/api-client.d.ts +96 -0
  2. package/dist/api/api-client.d.ts.map +1 -0
  3. package/dist/api/api-client.js +257 -0
  4. package/dist/aws/bucket-explorer.d.ts +26 -0
  5. package/dist/aws/bucket-explorer.d.ts.map +1 -0
  6. package/dist/aws/bucket-explorer.js +220 -0
  7. package/dist/aws/config.d.ts +5 -0
  8. package/dist/aws/config.d.ts.map +1 -0
  9. package/dist/aws/config.js +36 -0
  10. package/dist/aws/downloader.d.ts +13 -0
  11. package/dist/aws/downloader.d.ts.map +1 -0
  12. package/dist/aws/downloader.js +115 -0
  13. package/dist/aws/month-lister.d.ts +18 -0
  14. package/dist/aws/month-lister.d.ts.map +1 -0
  15. package/dist/aws/month-lister.js +90 -0
  16. package/dist/commands/batch-info.d.ts +3 -0
  17. package/dist/commands/batch-info.d.ts.map +1 -0
  18. package/dist/commands/batch-info.js +213 -0
  19. package/dist/commands/batch-process.d.ts +3 -0
  20. package/dist/commands/batch-process.d.ts.map +1 -0
  21. package/dist/commands/batch-process.js +557 -0
  22. package/dist/commands/download.d.ts +3 -0
  23. package/dist/commands/download.d.ts.map +1 -0
  24. package/dist/commands/download.js +76 -0
  25. package/dist/commands/index.d.ts +6 -0
  26. package/dist/commands/index.d.ts.map +1 -0
  27. package/dist/commands/index.js +5 -0
  28. package/dist/commands/list.d.ts +3 -0
  29. package/dist/commands/list.d.ts.map +1 -0
  30. package/dist/commands/list.js +18 -0
  31. package/dist/commands/summary.d.ts +3 -0
  32. package/dist/commands/summary.d.ts.map +1 -0
  33. package/dist/commands/summary.js +249 -0
  34. package/dist/index.d.ts +7 -0
  35. package/dist/index.d.ts.map +1 -0
  36. package/dist/index.js +35 -0
  37. package/dist/utils/batches.d.ts +9 -0
  38. package/dist/utils/batches.d.ts.map +1 -0
  39. package/dist/utils/batches.js +61 -0
  40. package/dist/utils/batches.test.d.ts +2 -0
  41. package/dist/utils/batches.test.d.ts.map +1 -0
  42. package/dist/utils/batches.test.js +119 -0
  43. package/dist/utils/default-server.d.ts +3 -0
  44. package/dist/utils/default-server.d.ts.map +1 -0
  45. package/dist/utils/default-server.js +20 -0
  46. package/dist/utils/index.d.ts +5 -0
  47. package/dist/utils/index.d.ts.map +1 -0
  48. package/dist/utils/index.js +5 -0
  49. package/dist/utils/meca-processor.d.ts +28 -0
  50. package/dist/utils/meca-processor.d.ts.map +1 -0
  51. package/dist/utils/meca-processor.js +503 -0
  52. package/dist/utils/meca-processor.test.d.ts +2 -0
  53. package/dist/utils/meca-processor.test.d.ts.map +1 -0
  54. package/dist/utils/meca-processor.test.js +123 -0
  55. package/dist/utils/months.d.ts +36 -0
  56. package/dist/utils/months.d.ts.map +1 -0
  57. package/dist/utils/months.js +135 -0
  58. package/dist/utils/months.test.d.ts +2 -0
  59. package/dist/utils/months.test.d.ts.map +1 -0
  60. package/dist/utils/months.test.js +209 -0
  61. package/dist/utils/requester-pays-error.d.ts +6 -0
  62. package/dist/utils/requester-pays-error.d.ts.map +1 -0
  63. package/dist/utils/requester-pays-error.js +20 -0
  64. package/dist/version.d.ts +3 -0
  65. package/dist/version.d.ts.map +1 -0
  66. package/dist/version.js +2 -0
  67. package/package.json +67 -0
@@ -0,0 +1,18 @@
1
+ import { Command } from 'commander';
2
+ import { listBucketContent } from '../aws/bucket-explorer.js';
3
+ import { getDefaultServer } from '../utils/index.js';
4
+ export const listCommand = new Command('list')
5
+ .description('List available content in the bioRxiv or medRxiv S3 bucket')
6
+ .option('-m, --month <month>', 'Filter by specific month (e.g., "2024-01")')
7
+ .option('-b, --batch <batch>', 'Filter by specific batch (e.g., "Batch_01")')
8
+ .option('-l, --limit <number>', 'Limit the number of results', '50')
9
+ .option('-s, --server <server>', 'Server to use: "biorxiv" or "medrxiv"', getDefaultServer())
10
+ .action(async (options) => {
11
+ try {
12
+ await listBucketContent(options);
13
+ }
14
+ catch (error) {
15
+ console.error('Error listing content:', error);
16
+ process.exit(1);
17
+ }
18
+ });
@@ -0,0 +1,3 @@
1
+ import { Command } from 'commander';
2
+ export declare const summaryCommand: Command;
3
+ //# sourceMappingURL=summary.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"summary.d.ts","sourceRoot":"","sources":["../../src/commands/summary.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAOpC,eAAO,MAAM,cAAc,SAyGvB,CAAC"}
@@ -0,0 +1,249 @@
1
+ import { Command } from 'commander';
2
+ import chalk from 'chalk';
3
+ import boxen from 'boxen';
4
+ import { createOpenRxivApiClient, getServerFromDOI } from '../api/api-client.js';
5
+ import { parseBiorxivURL } from 'openrxiv-utils';
6
+ import { getDefaultServer } from '../utils/index.js';
7
+ export const summaryCommand = new Command('summary')
8
+ .description('Get a summary of a bioRxiv preprint from a URL or DOI')
9
+ .argument('<url-or-doi>', 'bioRxiv URL or DOI to summarize')
10
+ .option('-m, --more', 'Show additional details and full abstract')
11
+ .option('-s, --server <server>', 'Specify server (biorxiv or medrxiv)', getDefaultServer())
12
+ .action(async (urlOrDoi, options) => {
13
+ var _a, _b;
14
+ try {
15
+ console.log(chalk.blue.bold(`🔬 ${(_a = options.server) !== null && _a !== void 0 ? _a : ''} Preprint Summary`));
16
+ console.log(chalk.blue('================================\n'));
17
+ // Parse the input (could be URL or DOI)
18
+ let doi;
19
+ let parsedUrl = null;
20
+ if (urlOrDoi.includes('biorxiv.org') ||
21
+ urlOrDoi.includes('medrxiv.org') ||
22
+ urlOrDoi.includes('doi.org')) {
23
+ // It's a URL
24
+ parsedUrl = parseBiorxivURL(urlOrDoi);
25
+ if (!parsedUrl) {
26
+ console.log(chalk.red('❌ Invalid bioRxiv URL'));
27
+ process.exit(1);
28
+ }
29
+ doi = parsedUrl.doi;
30
+ console.log(chalk.cyan(`📝 URL: ${urlOrDoi}`));
31
+ }
32
+ else {
33
+ // It's a DOI
34
+ doi = urlOrDoi;
35
+ console.log(chalk.cyan(`🔍 DOI: ${doi}`));
36
+ }
37
+ console.log('');
38
+ // Determine server if possible
39
+ let server = (_b = options.server) !== null && _b !== void 0 ? _b : getServerFromDOI(urlOrDoi);
40
+ console.log(chalk.blue(`🌐 Server: ${server}`));
41
+ console.log('');
42
+ // Create API client
43
+ const apiClient = createOpenRxivApiClient({
44
+ server,
45
+ format: 'json',
46
+ timeout: 15000,
47
+ });
48
+ // Show loading indicator
49
+ console.log(chalk.yellow('⏳ Fetching preprint information...'));
50
+ console.log('');
51
+ // Get content details
52
+ let contentDetail = await apiClient.getContentDetail(doi);
53
+ let fallbackServer = null;
54
+ // If not found on bioRxiv and we're not already on medrxiv, try medrxiv as fallback
55
+ if (!contentDetail && server === 'biorxiv') {
56
+ console.log(chalk.yellow('⚠️ Paper not found on bioRxiv, trying medRxiv...'));
57
+ fallbackServer = 'medrxiv';
58
+ const medrxivApiClient = createOpenRxivApiClient({
59
+ server: 'medrxiv',
60
+ format: 'json',
61
+ timeout: 15000,
62
+ });
63
+ contentDetail = await medrxivApiClient.getContentDetail(doi);
64
+ if (contentDetail) {
65
+ console.log(chalk.green('✅ Found paper on medRxiv!'));
66
+ server = 'medrxiv'; // Update server for display
67
+ contentDetail.server = 'medrxiv'; // Ensure the content detail has the correct server
68
+ }
69
+ }
70
+ if (!contentDetail) {
71
+ console.log(chalk.red('❌ No content found for this DOI on either bioRxiv or medRxiv'));
72
+ console.log(chalk.yellow("💡 This might be a new preprint that hasn't been indexed yet"));
73
+ process.exit(1);
74
+ }
75
+ // Get all versions
76
+ let allVersions = await apiClient.getAllVersions(doi);
77
+ // If we used fallback, get versions from the fallback server
78
+ if (fallbackServer && contentDetail) {
79
+ const fallbackApiClient = createOpenRxivApiClient({
80
+ server: fallbackServer,
81
+ format: 'json',
82
+ timeout: 15000,
83
+ });
84
+ allVersions = await fallbackApiClient.getAllVersions(doi);
85
+ }
86
+ // Display summary
87
+ const isVerbose = options.more === true;
88
+ displaySummary(contentDetail, allVersions, isVerbose);
89
+ }
90
+ catch (error) {
91
+ console.error(chalk.red('❌ Error:'), error instanceof Error ? error.message : 'Unknown error');
92
+ process.exit(1);
93
+ }
94
+ });
95
+ function displaySummary(contentDetail, allVersions, verbose = false) {
96
+ // Title in a prominent box
97
+ const titleBox = boxen(chalk.green.bold.underline(contentDetail.title), {
98
+ padding: 1,
99
+ margin: 1,
100
+ borderStyle: 'double',
101
+ borderColor: 'green',
102
+ backgroundColor: 'black',
103
+ textAlignment: 'left',
104
+ ...(verbose ? {} : { width: 80 }),
105
+ });
106
+ console.log(titleBox);
107
+ // Basic info
108
+ const basicInfo = [
109
+ `${chalk.cyan('DOI:')} ${contentDetail.doi}`,
110
+ `${chalk.cyan('Server:')} ${contentDetail.server}`,
111
+ `${chalk.cyan('Category:')} ${chalk.yellow(contentDetail.category)}`,
112
+ `${chalk.cyan('License:')} ${chalk.yellow(contentDetail.license)}`,
113
+ `${chalk.cyan('Type:')} ${chalk.yellow(contentDetail.type)}`,
114
+ `${chalk.cyan('Published:')} ${contentDetail.published === 'NA'
115
+ ? chalk.gray('Not published')
116
+ : chalk.green(contentDetail.published)}`,
117
+ `${chalk.cyan('Total versions:')} ${allVersions ? allVersions.length : 0}`,
118
+ ...(verbose
119
+ ? [
120
+ `${chalk.cyan('Date:')} ${contentDetail.date}`,
121
+ `${chalk.cyan('Version:')} ${contentDetail.version}`,
122
+ ...(contentDetail.jatsxml
123
+ ? [`${chalk.cyan('JATS XML:')} ${chalk.underline.blue(contentDetail.jatsxml)}`]
124
+ : []),
125
+ ]
126
+ : []),
127
+ ].join('\n');
128
+ const basicInfoBox = boxen(basicInfo, {
129
+ padding: 1,
130
+ margin: 1,
131
+ borderStyle: 'round',
132
+ borderColor: 'blue',
133
+ title: chalk.blue.bold('📋 Basic Information'),
134
+ titleAlignment: 'left',
135
+ textAlignment: 'left',
136
+ ...(verbose ? {} : { width: 80 }),
137
+ });
138
+ console.log(basicInfoBox);
139
+ // Authors
140
+ const authorsInfo = [
141
+ `${chalk.cyan('Authors:')} ${contentDetail.authors}`,
142
+ ...(contentDetail.author_corresponding
143
+ ? [`${chalk.cyan('Corresponding:')} ${chalk.green(contentDetail.author_corresponding)}`]
144
+ : []),
145
+ ...(contentDetail.author_corresponding_institution
146
+ ? [
147
+ `${chalk.cyan('Institution:')} ${chalk.gray(contentDetail.author_corresponding_institution)}`,
148
+ ]
149
+ : []),
150
+ ].join('\n');
151
+ const authorsBox = boxen(authorsInfo, {
152
+ padding: 1,
153
+ margin: 1,
154
+ borderStyle: 'round',
155
+ borderColor: 'cyan',
156
+ title: chalk.cyan.bold('👥 Authors'),
157
+ titleAlignment: 'left',
158
+ textAlignment: 'left',
159
+ ...(verbose ? {} : { width: 80 }),
160
+ });
161
+ console.log(authorsBox);
162
+ // Abstract
163
+ if (contentDetail.abstract) {
164
+ const abstractBox = boxen(contentDetail.abstract, {
165
+ padding: 1,
166
+ margin: 1,
167
+ borderStyle: 'round',
168
+ borderColor: 'yellow',
169
+ title: chalk.yellow.bold('📖 Abstract'),
170
+ titleAlignment: 'left',
171
+ textAlignment: 'left',
172
+ ...(verbose ? {} : { width: 80 }),
173
+ });
174
+ console.log(abstractBox);
175
+ }
176
+ // Funding information
177
+ if (contentDetail.funding && contentDetail.funding.length > 0) {
178
+ const fundingInfo = contentDetail.funding
179
+ .map((fund, index) => {
180
+ let fundText = `${chalk.cyan(index + 1)}. ${chalk.green(fund.name)}`;
181
+ if (fund.id) {
182
+ fundText += `\n ${chalk.gray('ID:')} ${fund.id} (${fund['id-type']})`;
183
+ }
184
+ if (fund.award) {
185
+ fundText += `\n ${chalk.gray('Award:')} ${fund.award}`;
186
+ }
187
+ return fundText;
188
+ })
189
+ .join('\n\n');
190
+ const fundingBox = boxen(fundingInfo, {
191
+ padding: 1,
192
+ margin: 1,
193
+ borderStyle: 'round',
194
+ borderColor: 'magenta',
195
+ title: chalk.magenta.bold('💰 Funding'),
196
+ titleAlignment: 'left',
197
+ textAlignment: 'left',
198
+ ...(verbose ? {} : { width: 80 }),
199
+ });
200
+ console.log(fundingBox);
201
+ }
202
+ // Versions - only show when --more is provided
203
+ if (verbose && allVersions && allVersions.length > 0) {
204
+ const versionsInfo = [
205
+ ...allVersions.map((version, index) => {
206
+ let versionText = `${chalk.cyan(`v${version.version}`)} (${version.date})`;
207
+ versionText += `\n ${chalk.gray('Type:')} ${version.type}`;
208
+ versionText += `\n ${chalk.gray('Title:')} ${version.title.substring(0, 60)}...`;
209
+ if (version.jatsxml) {
210
+ versionText += `\n ${chalk.gray('JATS XML:')} ${chalk.underline.blue(version.jatsxml)}`;
211
+ }
212
+ return versionText;
213
+ }),
214
+ ].join('\n\n');
215
+ const versionsBox = boxen(versionsInfo, {
216
+ padding: 1,
217
+ margin: 1,
218
+ borderStyle: 'round',
219
+ borderColor: 'green',
220
+ title: chalk.green.bold('🔄 Versions'),
221
+ titleAlignment: 'left',
222
+ textAlignment: 'left',
223
+ ...(verbose ? {} : { width: 80 }),
224
+ });
225
+ console.log(versionsBox);
226
+ }
227
+ // Footer
228
+ const baseUrl = contentDetail.server === 'medrxiv'
229
+ ? `https://www.medrxiv.org/content/${contentDetail.doi}`
230
+ : `https://www.biorxiv.org/content/${contentDetail.doi}`;
231
+ const footerInfo = [
232
+ `💡 View online: ${chalk.underline.blue(baseUrl)}`,
233
+ ...(allVersions && allVersions.length > 1 && !verbose
234
+ ? [
235
+ `📚 This preprint has ${allVersions.length} versions. Use --more to see additional details.`,
236
+ ]
237
+ : []),
238
+ ].join('\n');
239
+ const footerBox = boxen(footerInfo, {
240
+ padding: 1,
241
+ margin: 1,
242
+ borderStyle: 'round',
243
+ borderColor: 'gray',
244
+ backgroundColor: 'black',
245
+ textAlignment: 'left',
246
+ ...(verbose ? {} : { width: 80 }),
247
+ });
248
+ console.log(footerBox);
249
+ }
@@ -0,0 +1,7 @@
1
+ import { Command } from 'commander';
2
+ export * from './commands/index.js';
3
+ export { default as version } from './version.js';
4
+ export { getCliName } from './utils/index.js';
5
+ export { setGlobalRequesterPays } from './aws/config.js';
6
+ export declare function createCLI(): Command;
7
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAYpC,cAAc,qBAAqB,CAAC;AACpC,OAAO,EAAE,OAAO,IAAI,OAAO,EAAE,MAAM,cAAc,CAAC;AAClD,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAC9C,OAAO,EAAE,sBAAsB,EAAE,MAAM,iBAAiB,CAAC;AAEzD,wBAAgB,SAAS,IAAI,OAAO,CAqCnC"}
package/dist/index.js ADDED
@@ -0,0 +1,35 @@
1
+ import { Command } from 'commander';
2
+ import { listCommand, downloadCommand, summaryCommand, monthInfoCommand, batchProcessCommand, } from './commands/index.js';
3
+ import { getCliName } from './utils/index.js';
4
+ import { setGlobalRequesterPays } from './aws/config.js';
5
+ import version from './version.js';
6
+ export * from './commands/index.js';
7
+ export { default as version } from './version.js';
8
+ export { getCliName } from './utils/index.js';
9
+ export { setGlobalRequesterPays } from './aws/config.js';
10
+ export function createCLI() {
11
+ const cliName = getCliName();
12
+ const program = new Command();
13
+ program
14
+ .name(cliName)
15
+ .description(`CLI tool to download bioRxiv/medRxiv MECA files from AWS S3 for text and data mining`)
16
+ .version(`v${version}`, '-v, --version', `Print the current version of the ${cliName} CLI`);
17
+ // Add commands
18
+ program.addCommand(listCommand);
19
+ program.addCommand(downloadCommand);
20
+ program.addCommand(summaryCommand);
21
+ program.addCommand(monthInfoCommand);
22
+ program.addCommand(batchProcessCommand);
23
+ // Global options
24
+ program.option('-d, --debug', 'Enable debug mode');
25
+ program.option('--requester-pays', 'Enable requester pays for local development (required when not on EC2 with IAM role)');
26
+ // Parse command line arguments
27
+ program.parse();
28
+ // Set global requester pays based on command line option
29
+ const options = program.opts();
30
+ if (options.requesterPays) {
31
+ setGlobalRequesterPays(true);
32
+ console.log('Requester pays enabled - you will be charged for S3 requests');
33
+ }
34
+ return program;
35
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Parse batch input to support ranges like "1-10" or "batch-1,batch-2"
3
+ */
4
+ export declare function parseBatchInput(batchInput: string): string[];
5
+ /**
6
+ * Validate batch format
7
+ */
8
+ export declare function validateBatchFormat(batch: string): boolean;
9
+ //# sourceMappingURL=batches.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"batches.d.ts","sourceRoot":"","sources":["../../src/utils/batches.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,wBAAgB,eAAe,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,EAAE,CAgE5D;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAG1D"}
@@ -0,0 +1,61 @@
1
+ /**
2
+ * Parse batch input to support ranges like "1-10" or "batch-1,batch-2"
3
+ */
4
+ export function parseBatchInput(batchInput) {
5
+ // Check if it's a comma-separated list first
6
+ if (batchInput.includes(',')) {
7
+ const parts = batchInput
8
+ .split(',')
9
+ .map((b) => b.trim())
10
+ .filter((b) => b.length > 0);
11
+ const allBatches = [];
12
+ for (const part of parts) {
13
+ // Check if this part is a range
14
+ const rangeMatch = part.match(/^(\d+)-(\d+)$/);
15
+ if (rangeMatch) {
16
+ const start = parseInt(rangeMatch[1], 10);
17
+ const end = parseInt(rangeMatch[2], 10);
18
+ if (start > end) {
19
+ throw new Error(`Invalid batch range: start (${start}) cannot be greater than end (${end})`);
20
+ }
21
+ if (end - start >= 100) {
22
+ throw new Error(`Batch range too large: ${end - start + 1} batches. Maximum allowed: 100`);
23
+ }
24
+ for (let i = start; i <= end; i++) {
25
+ allBatches.push(i.toString());
26
+ }
27
+ }
28
+ else {
29
+ // Single batch
30
+ allBatches.push(part);
31
+ }
32
+ }
33
+ return allBatches;
34
+ }
35
+ // Check if it's a single range (e.g., "1-10")
36
+ const rangeMatch = batchInput.match(/^(\d+)-(\d+)$/);
37
+ if (rangeMatch) {
38
+ const start = parseInt(rangeMatch[1], 10);
39
+ const end = parseInt(rangeMatch[2], 10);
40
+ if (start > end) {
41
+ throw new Error(`Invalid batch range: start (${start}) cannot be greater than end (${end})`);
42
+ }
43
+ if (end - start >= 100) {
44
+ throw new Error(`Batch range too large: ${end - start + 1} batches. Maximum allowed: 100`);
45
+ }
46
+ const batches = [];
47
+ for (let i = start; i <= end; i++) {
48
+ batches.push(i.toString());
49
+ }
50
+ return batches;
51
+ }
52
+ // Single batch
53
+ return [batchInput];
54
+ }
55
+ /**
56
+ * Validate batch format
57
+ */
58
+ export function validateBatchFormat(batch) {
59
+ // Allow numeric batches (1, 2, 3) or named batches (batch-1, Batch_01, etc.)
60
+ return /^[\w\-_]+$/.test(batch);
61
+ }
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=batches.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"batches.test.d.ts","sourceRoot":"","sources":["../../src/utils/batches.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,119 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import { parseBatchInput, validateBatchFormat } from './batches.js';
3
+ describe('Batch Utilities', () => {
4
+ describe('parseBatchInput', () => {
5
+ describe('single batches', () => {
6
+ it('should parse single numeric batch', () => {
7
+ expect(parseBatchInput('1')).toEqual(['1']);
8
+ expect(parseBatchInput('42')).toEqual(['42']);
9
+ });
10
+ it('should parse single named batch', () => {
11
+ expect(parseBatchInput('batch-1')).toEqual(['batch-1']);
12
+ expect(parseBatchInput('Batch_01')).toEqual(['Batch_01']);
13
+ expect(parseBatchInput('historical-content')).toEqual(['historical-content']);
14
+ });
15
+ });
16
+ describe('numeric ranges', () => {
17
+ it('should parse simple ranges', () => {
18
+ expect(parseBatchInput('1-3')).toEqual(['1', '2', '3']);
19
+ expect(parseBatchInput('5-10')).toEqual(['5', '6', '7', '8', '9', '10']);
20
+ expect(parseBatchInput('1-1')).toEqual(['1']);
21
+ });
22
+ it('should handle large ranges', () => {
23
+ const result = parseBatchInput('1-100');
24
+ expect(result).toHaveLength(100);
25
+ expect(result[0]).toBe('1');
26
+ expect(result[99]).toBe('100');
27
+ });
28
+ it('should reject invalid ranges', () => {
29
+ expect(() => parseBatchInput('10-5')).toThrow('Invalid batch range: start (10) cannot be greater than end (5)');
30
+ expect(() => parseBatchInput('5-5')).not.toThrow(); // Valid single-item range
31
+ });
32
+ it('should reject ranges that are too large', () => {
33
+ expect(() => parseBatchInput('1-102')).toThrow('Batch range too large: 102 batches. Maximum allowed: 100');
34
+ expect(() => parseBatchInput('1-101')).toThrow('Batch range too large: 101 batches. Maximum allowed: 100');
35
+ expect(() => parseBatchInput('1-100')).not.toThrow(); // Valid maximum range
36
+ });
37
+ });
38
+ describe('comma-separated lists', () => {
39
+ it('should parse simple comma-separated lists', () => {
40
+ expect(parseBatchInput('1,2,3')).toEqual(['1', '2', '3']);
41
+ expect(parseBatchInput('batch-1,batch-2,batch-3')).toEqual([
42
+ 'batch-1',
43
+ 'batch-2',
44
+ 'batch-3',
45
+ ]);
46
+ });
47
+ it('should handle whitespace in comma-separated lists', () => {
48
+ expect(parseBatchInput('1, 2, 3')).toEqual(['1', '2', '3']);
49
+ expect(parseBatchInput(' 1 , 2 , 3 ')).toEqual(['1', '2', '3']);
50
+ });
51
+ it('should filter out empty entries', () => {
52
+ expect(parseBatchInput('1,,2,3')).toEqual(['1', '2', '3']);
53
+ expect(parseBatchInput('1, ,2,3')).toEqual(['1', '2', '3']);
54
+ });
55
+ });
56
+ describe('mixed formats', () => {
57
+ it('should handle ranges within comma-separated lists', () => {
58
+ expect(parseBatchInput('1-3,5,7-9')).toEqual(['1', '2', '3', '5', '7', '8', '9']);
59
+ expect(parseBatchInput('1-5,10,15-17')).toEqual([
60
+ '1',
61
+ '2',
62
+ '3',
63
+ '4',
64
+ '5',
65
+ '10',
66
+ '15',
67
+ '16',
68
+ '17',
69
+ ]);
70
+ });
71
+ it('should handle complex mixed formats', () => {
72
+ expect(parseBatchInput('1-3,batch-1,5-7,historical')).toEqual([
73
+ '1',
74
+ '2',
75
+ '3',
76
+ 'batch-1',
77
+ '5',
78
+ '6',
79
+ '7',
80
+ 'historical',
81
+ ]);
82
+ });
83
+ });
84
+ describe('edge cases', () => {
85
+ it('should handle empty string', () => {
86
+ expect(parseBatchInput('')).toEqual(['']);
87
+ });
88
+ it('should handle single comma', () => {
89
+ expect(parseBatchInput(',')).toEqual([]);
90
+ });
91
+ it('should handle multiple commas', () => {
92
+ expect(parseBatchInput(',,')).toEqual([]);
93
+ expect(parseBatchInput('1,,2')).toEqual(['1', '2']);
94
+ });
95
+ });
96
+ });
97
+ describe('validateBatchFormat', () => {
98
+ it('should accept valid batch names', () => {
99
+ expect(validateBatchFormat('1')).toBe(true);
100
+ expect(validateBatchFormat('42')).toBe(true);
101
+ expect(validateBatchFormat('batch-1')).toBe(true);
102
+ expect(validateBatchFormat('Batch_01')).toBe(true);
103
+ expect(validateBatchFormat('historical-content')).toBe(true);
104
+ expect(validateBatchFormat('content_2023')).toBe(true);
105
+ });
106
+ it('should reject invalid batch names', () => {
107
+ expect(validateBatchFormat('')).toBe(false);
108
+ expect(validateBatchFormat('batch 1')).toBe(false); // space not allowed
109
+ expect(validateBatchFormat('batch.1')).toBe(false); // dot not allowed
110
+ expect(validateBatchFormat('batch/1')).toBe(false); // slash not allowed
111
+ expect(validateBatchFormat('batch@1')).toBe(false); // @ not allowed
112
+ });
113
+ it('should handle special characters correctly', () => {
114
+ expect(validateBatchFormat('batch-1')).toBe(true); // hyphen allowed
115
+ expect(validateBatchFormat('batch_1')).toBe(true); // underscore allowed
116
+ expect(validateBatchFormat('Batch01')).toBe(true); // alphanumeric allowed
117
+ });
118
+ });
119
+ });
@@ -0,0 +1,3 @@
1
+ export declare function getCliName(): 'openrxiv' | 'biorxiv' | 'medrxiv';
2
+ export declare function getDefaultServer(): 'biorxiv' | 'medrxiv';
3
+ //# sourceMappingURL=default-server.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"default-server.d.ts","sourceRoot":"","sources":["../../src/utils/default-server.ts"],"names":[],"mappings":"AAEA,wBAAgB,UAAU,IAAI,UAAU,GAAG,SAAS,GAAG,SAAS,CAY/D;AAED,wBAAgB,gBAAgB,IAAI,SAAS,GAAG,SAAS,CAOxD"}
@@ -0,0 +1,20 @@
1
+ import path from 'node:path';
2
+ export function getCliName() {
3
+ // process.argv[1] contains the script path, which includes the alias
4
+ const scriptPath = process.argv[1];
5
+ const commandName = path.basename(scriptPath);
6
+ if (commandName.toLowerCase().includes('biorxiv')) {
7
+ return 'biorxiv';
8
+ }
9
+ if (commandName.toLowerCase().includes('medrxiv')) {
10
+ return 'medrxiv';
11
+ }
12
+ return 'openrxiv';
13
+ }
14
+ export function getDefaultServer() {
15
+ const cliName = getCliName();
16
+ if (cliName.toLowerCase().includes('medrxiv')) {
17
+ return 'medrxiv';
18
+ }
19
+ return 'biorxiv';
20
+ }
@@ -0,0 +1,5 @@
1
+ export * from './months.js';
2
+ export * from './meca-processor.js';
3
+ export * from './batches.js';
4
+ export * from './default-server.js';
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/utils/index.ts"],"names":[],"mappings":"AACA,cAAc,aAAa,CAAC;AAC5B,cAAc,qBAAqB,CAAC;AACpC,cAAc,cAAc,CAAC;AAC7B,cAAc,qBAAqB,CAAC"}
@@ -0,0 +1,5 @@
1
+ // Export all utility functions
2
+ export * from './months.js';
3
+ export * from './meca-processor.js';
4
+ export * from './batches.js';
5
+ export * from './default-server.js';
@@ -0,0 +1,28 @@
1
+ export interface ProcessMecaOptions {
2
+ batch: string;
3
+ server: 'biorxiv' | 'medrxiv';
4
+ apiUrl: string;
5
+ output?: string;
6
+ s3Key: string;
7
+ apiKey?: string;
8
+ selective?: boolean;
9
+ }
10
+ export interface ProcessMecaResult {
11
+ success: boolean;
12
+ paper?: any;
13
+ error?: string;
14
+ }
15
+ /**
16
+ * Process a MECA file and extract metadata
17
+ * @param mecaPath Path to the MECA file (local file path)
18
+ * @param options Processing options
19
+ * @returns ProcessMecaResult with success status and extracted paper data
20
+ */
21
+ export declare function processMecaFile(mecaPath: string, options: ProcessMecaOptions): Promise<ProcessMecaResult>;
22
+ /**
23
+ * Preprocess XML content to fix common HTML entities that cause parsing errors
24
+ * @param xmlContent Raw XML content
25
+ * @returns Preprocessed XML content with entities replaced
26
+ */
27
+ export declare function preprocessXMLContent(xmlContent: string): string;
28
+ //# sourceMappingURL=meca-processor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"meca-processor.d.ts","sourceRoot":"","sources":["../../src/utils/meca-processor.ts"],"names":[],"mappings":"AAqCA,MAAM,WAAW,kBAAkB;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,SAAS,GAAG,SAAS,CAAC;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,CAAC,EAAE,GAAG,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;;;;GAKG;AACH,wBAAsB,eAAe,CACnC,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,kBAAkB,GAC1B,OAAO,CAAC,iBAAiB,CAAC,CAkE5B;AAwQD;;;;GAIG;AACH,wBAAgB,oBAAoB,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAwD/D"}