openrxiv 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/dist/cli/src/api/api-client.d.ts +96 -0
  2. package/dist/cli/src/api/api-client.d.ts.map +1 -0
  3. package/dist/cli/src/api/api-client.js +257 -0
  4. package/dist/cli/src/aws/bucket-explorer.d.ts +26 -0
  5. package/dist/cli/src/aws/bucket-explorer.d.ts.map +1 -0
  6. package/dist/cli/src/aws/bucket-explorer.js +220 -0
  7. package/dist/cli/src/aws/config.d.ts +18 -0
  8. package/dist/cli/src/aws/config.d.ts.map +1 -0
  9. package/dist/cli/src/aws/config.js +191 -0
  10. package/dist/cli/src/aws/downloader.d.ts +13 -0
  11. package/dist/cli/src/aws/downloader.d.ts.map +1 -0
  12. package/dist/cli/src/aws/downloader.js +115 -0
  13. package/dist/cli/src/aws/month-lister.d.ts +18 -0
  14. package/dist/cli/src/aws/month-lister.d.ts.map +1 -0
  15. package/dist/cli/src/aws/month-lister.js +90 -0
  16. package/dist/cli/src/commands/batch-process.d.ts +3 -0
  17. package/dist/cli/src/commands/batch-process.d.ts.map +1 -0
  18. package/dist/cli/src/commands/batch-process.js +557 -0
  19. package/dist/cli/src/commands/config.d.ts +3 -0
  20. package/dist/cli/src/commands/config.d.ts.map +1 -0
  21. package/dist/cli/src/commands/config.js +42 -0
  22. package/dist/cli/src/commands/download.d.ts +3 -0
  23. package/dist/cli/src/commands/download.d.ts.map +1 -0
  24. package/dist/cli/src/commands/download.js +76 -0
  25. package/dist/cli/src/commands/list.d.ts +3 -0
  26. package/dist/cli/src/commands/list.d.ts.map +1 -0
  27. package/dist/cli/src/commands/list.js +18 -0
  28. package/dist/cli/src/commands/month-info.d.ts +3 -0
  29. package/dist/cli/src/commands/month-info.d.ts.map +1 -0
  30. package/dist/cli/src/commands/month-info.js +213 -0
  31. package/dist/cli/src/commands/summary.d.ts +3 -0
  32. package/dist/cli/src/commands/summary.d.ts.map +1 -0
  33. package/dist/cli/src/commands/summary.js +249 -0
  34. package/dist/cli/src/index.d.ts +3 -0
  35. package/dist/cli/src/index.d.ts.map +1 -0
  36. package/dist/cli/src/index.js +35 -0
  37. package/dist/cli/src/utils/batches.d.ts +9 -0
  38. package/dist/cli/src/utils/batches.d.ts.map +1 -0
  39. package/dist/cli/src/utils/batches.js +61 -0
  40. package/dist/cli/src/utils/batches.test.d.ts +2 -0
  41. package/dist/cli/src/utils/batches.test.d.ts.map +1 -0
  42. package/dist/cli/src/utils/batches.test.js +119 -0
  43. package/dist/cli/src/utils/default-server.d.ts +3 -0
  44. package/dist/cli/src/utils/default-server.d.ts.map +1 -0
  45. package/dist/cli/src/utils/default-server.js +20 -0
  46. package/dist/cli/src/utils/index.d.ts +5 -0
  47. package/dist/cli/src/utils/index.d.ts.map +1 -0
  48. package/dist/cli/src/utils/index.js +5 -0
  49. package/dist/cli/src/utils/meca-processor.d.ts +28 -0
  50. package/dist/cli/src/utils/meca-processor.d.ts.map +1 -0
  51. package/dist/cli/src/utils/meca-processor.js +503 -0
  52. package/dist/cli/src/utils/meca-processor.test.d.ts +2 -0
  53. package/dist/cli/src/utils/meca-processor.test.d.ts.map +1 -0
  54. package/dist/cli/src/utils/meca-processor.test.js +123 -0
  55. package/dist/cli/src/utils/months.d.ts +36 -0
  56. package/dist/cli/src/utils/months.d.ts.map +1 -0
  57. package/dist/cli/src/utils/months.js +135 -0
  58. package/dist/cli/src/utils/months.test.d.ts +2 -0
  59. package/dist/cli/src/utils/months.test.d.ts.map +1 -0
  60. package/dist/cli/src/utils/months.test.js +209 -0
  61. package/dist/cli/src/utils/requester-pays-error.d.ts +6 -0
  62. package/dist/cli/src/utils/requester-pays-error.d.ts.map +1 -0
  63. package/dist/cli/src/utils/requester-pays-error.js +20 -0
  64. package/dist/cli/src/version.d.ts +3 -0
  65. package/dist/cli/src/version.d.ts.map +1 -0
  66. package/dist/cli/src/version.js +2 -0
  67. package/dist/cli.cjs +98815 -0
  68. package/dist/utils/src/biorxiv-parser.d.ts +51 -0
  69. package/dist/utils/src/biorxiv-parser.d.ts.map +1 -0
  70. package/dist/utils/src/biorxiv-parser.js +126 -0
  71. package/dist/utils/src/folder-structure.d.ts +44 -0
  72. package/dist/utils/src/folder-structure.d.ts.map +1 -0
  73. package/dist/utils/src/folder-structure.js +207 -0
  74. package/dist/utils/src/index.d.ts +3 -0
  75. package/dist/utils/src/index.d.ts.map +1 -0
  76. package/dist/utils/src/index.js +3 -0
  77. package/package.json +76 -0
@@ -0,0 +1,61 @@
1
+ /**
2
+ * Parse batch input to support ranges like "1-10" or "batch-1,batch-2"
3
+ */
4
+ export function parseBatchInput(batchInput) {
5
+ // Check if it's a comma-separated list first
6
+ if (batchInput.includes(',')) {
7
+ const parts = batchInput
8
+ .split(',')
9
+ .map((b) => b.trim())
10
+ .filter((b) => b.length > 0);
11
+ const allBatches = [];
12
+ for (const part of parts) {
13
+ // Check if this part is a range
14
+ const rangeMatch = part.match(/^(\d+)-(\d+)$/);
15
+ if (rangeMatch) {
16
+ const start = parseInt(rangeMatch[1], 10);
17
+ const end = parseInt(rangeMatch[2], 10);
18
+ if (start > end) {
19
+ throw new Error(`Invalid batch range: start (${start}) cannot be greater than end (${end})`);
20
+ }
21
+ if (end - start >= 100) {
22
+ throw new Error(`Batch range too large: ${end - start + 1} batches. Maximum allowed: 100`);
23
+ }
24
+ for (let i = start; i <= end; i++) {
25
+ allBatches.push(i.toString());
26
+ }
27
+ }
28
+ else {
29
+ // Single batch
30
+ allBatches.push(part);
31
+ }
32
+ }
33
+ return allBatches;
34
+ }
35
+ // Check if it's a single range (e.g., "1-10")
36
+ const rangeMatch = batchInput.match(/^(\d+)-(\d+)$/);
37
+ if (rangeMatch) {
38
+ const start = parseInt(rangeMatch[1], 10);
39
+ const end = parseInt(rangeMatch[2], 10);
40
+ if (start > end) {
41
+ throw new Error(`Invalid batch range: start (${start}) cannot be greater than end (${end})`);
42
+ }
43
+ if (end - start >= 100) {
44
+ throw new Error(`Batch range too large: ${end - start + 1} batches. Maximum allowed: 100`);
45
+ }
46
+ const batches = [];
47
+ for (let i = start; i <= end; i++) {
48
+ batches.push(i.toString());
49
+ }
50
+ return batches;
51
+ }
52
+ // Single batch
53
+ return [batchInput];
54
+ }
55
+ /**
56
+ * Validate batch format
57
+ */
58
+ export function validateBatchFormat(batch) {
59
+ // Allow numeric batches (1, 2, 3) or named batches (batch-1, Batch_01, etc.)
60
+ return /^[\w\-_]+$/.test(batch);
61
+ }
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=batches.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"batches.test.d.ts","sourceRoot":"","sources":["../../../../src/utils/batches.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,119 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import { parseBatchInput, validateBatchFormat } from './batches.js';
3
+ describe('Batch Utilities', () => {
4
+ describe('parseBatchInput', () => {
5
+ describe('single batches', () => {
6
+ it('should parse single numeric batch', () => {
7
+ expect(parseBatchInput('1')).toEqual(['1']);
8
+ expect(parseBatchInput('42')).toEqual(['42']);
9
+ });
10
+ it('should parse single named batch', () => {
11
+ expect(parseBatchInput('batch-1')).toEqual(['batch-1']);
12
+ expect(parseBatchInput('Batch_01')).toEqual(['Batch_01']);
13
+ expect(parseBatchInput('historical-content')).toEqual(['historical-content']);
14
+ });
15
+ });
16
+ describe('numeric ranges', () => {
17
+ it('should parse simple ranges', () => {
18
+ expect(parseBatchInput('1-3')).toEqual(['1', '2', '3']);
19
+ expect(parseBatchInput('5-10')).toEqual(['5', '6', '7', '8', '9', '10']);
20
+ expect(parseBatchInput('1-1')).toEqual(['1']);
21
+ });
22
+ it('should handle large ranges', () => {
23
+ const result = parseBatchInput('1-100');
24
+ expect(result).toHaveLength(100);
25
+ expect(result[0]).toBe('1');
26
+ expect(result[99]).toBe('100');
27
+ });
28
+ it('should reject invalid ranges', () => {
29
+ expect(() => parseBatchInput('10-5')).toThrow('Invalid batch range: start (10) cannot be greater than end (5)');
30
+ expect(() => parseBatchInput('5-5')).not.toThrow(); // Valid single-item range
31
+ });
32
+ it('should reject ranges that are too large', () => {
33
+ expect(() => parseBatchInput('1-102')).toThrow('Batch range too large: 102 batches. Maximum allowed: 100');
34
+ expect(() => parseBatchInput('1-101')).toThrow('Batch range too large: 101 batches. Maximum allowed: 100');
35
+ expect(() => parseBatchInput('1-100')).not.toThrow(); // Valid maximum range
36
+ });
37
+ });
38
+ describe('comma-separated lists', () => {
39
+ it('should parse simple comma-separated lists', () => {
40
+ expect(parseBatchInput('1,2,3')).toEqual(['1', '2', '3']);
41
+ expect(parseBatchInput('batch-1,batch-2,batch-3')).toEqual([
42
+ 'batch-1',
43
+ 'batch-2',
44
+ 'batch-3',
45
+ ]);
46
+ });
47
+ it('should handle whitespace in comma-separated lists', () => {
48
+ expect(parseBatchInput('1, 2, 3')).toEqual(['1', '2', '3']);
49
+ expect(parseBatchInput(' 1 , 2 , 3 ')).toEqual(['1', '2', '3']);
50
+ });
51
+ it('should filter out empty entries', () => {
52
+ expect(parseBatchInput('1,,2,3')).toEqual(['1', '2', '3']);
53
+ expect(parseBatchInput('1, ,2,3')).toEqual(['1', '2', '3']);
54
+ });
55
+ });
56
+ describe('mixed formats', () => {
57
+ it('should handle ranges within comma-separated lists', () => {
58
+ expect(parseBatchInput('1-3,5,7-9')).toEqual(['1', '2', '3', '5', '7', '8', '9']);
59
+ expect(parseBatchInput('1-5,10,15-17')).toEqual([
60
+ '1',
61
+ '2',
62
+ '3',
63
+ '4',
64
+ '5',
65
+ '10',
66
+ '15',
67
+ '16',
68
+ '17',
69
+ ]);
70
+ });
71
+ it('should handle complex mixed formats', () => {
72
+ expect(parseBatchInput('1-3,batch-1,5-7,historical')).toEqual([
73
+ '1',
74
+ '2',
75
+ '3',
76
+ 'batch-1',
77
+ '5',
78
+ '6',
79
+ '7',
80
+ 'historical',
81
+ ]);
82
+ });
83
+ });
84
+ describe('edge cases', () => {
85
+ it('should handle empty string', () => {
86
+ expect(parseBatchInput('')).toEqual(['']);
87
+ });
88
+ it('should handle single comma', () => {
89
+ expect(parseBatchInput(',')).toEqual([]);
90
+ });
91
+ it('should handle multiple commas', () => {
92
+ expect(parseBatchInput(',,')).toEqual([]);
93
+ expect(parseBatchInput('1,,2')).toEqual(['1', '2']);
94
+ });
95
+ });
96
+ });
97
+ describe('validateBatchFormat', () => {
98
+ it('should accept valid batch names', () => {
99
+ expect(validateBatchFormat('1')).toBe(true);
100
+ expect(validateBatchFormat('42')).toBe(true);
101
+ expect(validateBatchFormat('batch-1')).toBe(true);
102
+ expect(validateBatchFormat('Batch_01')).toBe(true);
103
+ expect(validateBatchFormat('historical-content')).toBe(true);
104
+ expect(validateBatchFormat('content_2023')).toBe(true);
105
+ });
106
+ it('should reject invalid batch names', () => {
107
+ expect(validateBatchFormat('')).toBe(false);
108
+ expect(validateBatchFormat('batch 1')).toBe(false); // space not allowed
109
+ expect(validateBatchFormat('batch.1')).toBe(false); // dot not allowed
110
+ expect(validateBatchFormat('batch/1')).toBe(false); // slash not allowed
111
+ expect(validateBatchFormat('batch@1')).toBe(false); // @ not allowed
112
+ });
113
+ it('should handle special characters correctly', () => {
114
+ expect(validateBatchFormat('batch-1')).toBe(true); // hyphen allowed
115
+ expect(validateBatchFormat('batch_1')).toBe(true); // underscore allowed
116
+ expect(validateBatchFormat('Batch01')).toBe(true); // alphanumeric allowed
117
+ });
118
+ });
119
+ });
@@ -0,0 +1,3 @@
1
+ export declare function getCliName(): 'openrxiv' | 'biorxiv' | 'medrxiv';
2
+ export declare function getDefaultServer(): 'biorxiv' | 'medrxiv';
3
+ //# sourceMappingURL=default-server.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"default-server.d.ts","sourceRoot":"","sources":["../../../../src/utils/default-server.ts"],"names":[],"mappings":"AAEA,wBAAgB,UAAU,IAAI,UAAU,GAAG,SAAS,GAAG,SAAS,CAY/D;AAED,wBAAgB,gBAAgB,IAAI,SAAS,GAAG,SAAS,CAOxD"}
@@ -0,0 +1,20 @@
1
+ import path from 'node:path';
2
+ export function getCliName() {
3
+ // process.argv[1] contains the script path, which includes the alias
4
+ const scriptPath = process.argv[1];
5
+ const commandName = path.basename(scriptPath);
6
+ if (commandName.toLowerCase().includes('biorxiv')) {
7
+ return 'biorxiv';
8
+ }
9
+ if (commandName.toLowerCase().includes('medrxiv')) {
10
+ return 'medrxiv';
11
+ }
12
+ return 'openrxiv';
13
+ }
14
+ export function getDefaultServer() {
15
+ const cliName = getCliName();
16
+ if (cliName.toLowerCase().includes('medrxiv')) {
17
+ return 'medrxiv';
18
+ }
19
+ return 'biorxiv';
20
+ }
@@ -0,0 +1,5 @@
1
+ export * from './months.js';
2
+ export * from './meca-processor.js';
3
+ export * from './batches.js';
4
+ export * from './default-server.js';
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/utils/index.ts"],"names":[],"mappings":"AACA,cAAc,aAAa,CAAC;AAC5B,cAAc,qBAAqB,CAAC;AACpC,cAAc,cAAc,CAAC;AAC7B,cAAc,qBAAqB,CAAC"}
@@ -0,0 +1,5 @@
1
+ // Export all utility functions
2
+ export * from './months.js';
3
+ export * from './meca-processor.js';
4
+ export * from './batches.js';
5
+ export * from './default-server.js';
@@ -0,0 +1,28 @@
1
+ export interface ProcessMecaOptions {
2
+ batch: string;
3
+ server: 'biorxiv' | 'medrxiv';
4
+ apiUrl: string;
5
+ output?: string;
6
+ s3Key: string;
7
+ apiKey?: string;
8
+ selective?: boolean;
9
+ }
10
+ export interface ProcessMecaResult {
11
+ success: boolean;
12
+ paper?: any;
13
+ error?: string;
14
+ }
15
+ /**
16
+ * Process a MECA file and extract metadata
17
+ * @param mecaPath Path to the MECA file (local file path)
18
+ * @param options Processing options
19
+ * @returns ProcessMecaResult with success status and extracted paper data
20
+ */
21
+ export declare function processMecaFile(mecaPath: string, options: ProcessMecaOptions): Promise<ProcessMecaResult>;
22
+ /**
23
+ * Preprocess XML content to fix common HTML entities that cause parsing errors
24
+ * @param xmlContent Raw XML content
25
+ * @returns Preprocessed XML content with entities replaced
26
+ */
27
+ export declare function preprocessXMLContent(xmlContent: string): string;
28
+ //# sourceMappingURL=meca-processor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"meca-processor.d.ts","sourceRoot":"","sources":["../../../../src/utils/meca-processor.ts"],"names":[],"mappings":"AAqCA,MAAM,WAAW,kBAAkB;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,SAAS,GAAG,SAAS,CAAC;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,CAAC,EAAE,GAAG,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;;;;GAKG;AACH,wBAAsB,eAAe,CACnC,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,kBAAkB,GAC1B,OAAO,CAAC,iBAAiB,CAAC,CAkE5B;AAwQD;;;;GAIG;AACH,wBAAgB,oBAAoB,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAwD/D"}