openrxiv 0.0.0 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +224 -0
  2. package/dist/cli.cjs +31725 -63733
  3. package/package.json +8 -32
  4. package/dist/cli/src/api/api-client.d.ts +0 -96
  5. package/dist/cli/src/api/api-client.d.ts.map +0 -1
  6. package/dist/cli/src/api/api-client.js +0 -257
  7. package/dist/cli/src/aws/bucket-explorer.d.ts +0 -26
  8. package/dist/cli/src/aws/bucket-explorer.d.ts.map +0 -1
  9. package/dist/cli/src/aws/bucket-explorer.js +0 -220
  10. package/dist/cli/src/aws/config.d.ts +0 -18
  11. package/dist/cli/src/aws/config.d.ts.map +0 -1
  12. package/dist/cli/src/aws/config.js +0 -191
  13. package/dist/cli/src/aws/downloader.d.ts +0 -13
  14. package/dist/cli/src/aws/downloader.d.ts.map +0 -1
  15. package/dist/cli/src/aws/downloader.js +0 -115
  16. package/dist/cli/src/aws/month-lister.d.ts +0 -18
  17. package/dist/cli/src/aws/month-lister.d.ts.map +0 -1
  18. package/dist/cli/src/aws/month-lister.js +0 -90
  19. package/dist/cli/src/commands/batch-process.d.ts +0 -3
  20. package/dist/cli/src/commands/batch-process.d.ts.map +0 -1
  21. package/dist/cli/src/commands/batch-process.js +0 -557
  22. package/dist/cli/src/commands/config.d.ts +0 -3
  23. package/dist/cli/src/commands/config.d.ts.map +0 -1
  24. package/dist/cli/src/commands/config.js +0 -42
  25. package/dist/cli/src/commands/download.d.ts +0 -3
  26. package/dist/cli/src/commands/download.d.ts.map +0 -1
  27. package/dist/cli/src/commands/download.js +0 -76
  28. package/dist/cli/src/commands/list.d.ts +0 -3
  29. package/dist/cli/src/commands/list.d.ts.map +0 -1
  30. package/dist/cli/src/commands/list.js +0 -18
  31. package/dist/cli/src/commands/month-info.d.ts +0 -3
  32. package/dist/cli/src/commands/month-info.d.ts.map +0 -1
  33. package/dist/cli/src/commands/month-info.js +0 -213
  34. package/dist/cli/src/commands/summary.d.ts +0 -3
  35. package/dist/cli/src/commands/summary.d.ts.map +0 -1
  36. package/dist/cli/src/commands/summary.js +0 -249
  37. package/dist/cli/src/index.d.ts +0 -3
  38. package/dist/cli/src/index.d.ts.map +0 -1
  39. package/dist/cli/src/index.js +0 -35
  40. package/dist/cli/src/utils/batches.d.ts +0 -9
  41. package/dist/cli/src/utils/batches.d.ts.map +0 -1
  42. package/dist/cli/src/utils/batches.js +0 -61
  43. package/dist/cli/src/utils/batches.test.d.ts +0 -2
  44. package/dist/cli/src/utils/batches.test.d.ts.map +0 -1
  45. package/dist/cli/src/utils/batches.test.js +0 -119
  46. package/dist/cli/src/utils/default-server.d.ts +0 -3
  47. package/dist/cli/src/utils/default-server.d.ts.map +0 -1
  48. package/dist/cli/src/utils/default-server.js +0 -20
  49. package/dist/cli/src/utils/index.d.ts +0 -5
  50. package/dist/cli/src/utils/index.d.ts.map +0 -1
  51. package/dist/cli/src/utils/index.js +0 -5
  52. package/dist/cli/src/utils/meca-processor.d.ts +0 -28
  53. package/dist/cli/src/utils/meca-processor.d.ts.map +0 -1
  54. package/dist/cli/src/utils/meca-processor.js +0 -503
  55. package/dist/cli/src/utils/meca-processor.test.d.ts +0 -2
  56. package/dist/cli/src/utils/meca-processor.test.d.ts.map +0 -1
  57. package/dist/cli/src/utils/meca-processor.test.js +0 -123
  58. package/dist/cli/src/utils/months.d.ts +0 -36
  59. package/dist/cli/src/utils/months.d.ts.map +0 -1
  60. package/dist/cli/src/utils/months.js +0 -135
  61. package/dist/cli/src/utils/months.test.d.ts +0 -2
  62. package/dist/cli/src/utils/months.test.d.ts.map +0 -1
  63. package/dist/cli/src/utils/months.test.js +0 -209
  64. package/dist/cli/src/utils/requester-pays-error.d.ts +0 -6
  65. package/dist/cli/src/utils/requester-pays-error.d.ts.map +0 -1
  66. package/dist/cli/src/utils/requester-pays-error.js +0 -20
  67. package/dist/cli/src/version.d.ts +0 -3
  68. package/dist/cli/src/version.d.ts.map +0 -1
  69. package/dist/cli/src/version.js +0 -2
  70. package/dist/utils/src/biorxiv-parser.d.ts +0 -51
  71. package/dist/utils/src/biorxiv-parser.d.ts.map +0 -1
  72. package/dist/utils/src/biorxiv-parser.js +0 -126
  73. package/dist/utils/src/folder-structure.d.ts +0 -44
  74. package/dist/utils/src/folder-structure.d.ts.map +0 -1
  75. package/dist/utils/src/folder-structure.js +0 -207
  76. package/dist/utils/src/index.d.ts +0 -3
  77. package/dist/utils/src/index.d.ts.map +0 -1
  78. package/dist/utils/src/index.js +0 -3
@@ -1,135 +0,0 @@
1
- /**
2
- * Month utility functions for batch processing
3
- */
4
- /**
5
- * Generate a range of months to process backwards from current month to 2018-12
6
- */
7
- export function generateMonthRange() {
8
- const months = [];
9
- const now = new Date();
10
- const currentDate = new Date(now.getFullYear(), now.getMonth(), 1);
11
- // Go back from current month to December 2018
12
- while (currentDate.getFullYear() >= 2018) {
13
- const year = currentDate.getFullYear();
14
- const month = String(currentDate.getMonth() + 1).padStart(2, '0');
15
- // Stop at 2018-12 (inclusive)
16
- if (year === 2018 && month === '12') {
17
- months.push('2018-12');
18
- break;
19
- }
20
- months.push(`${year}-${month}`);
21
- // Move to previous month
22
- currentDate.setMonth(currentDate.getMonth() - 1);
23
- }
24
- return months;
25
- }
26
- /**
27
- * Parse month input and return array of months to process
28
- */
29
- export function parseMonthInput(monthInput) {
30
- // Handle comma-separated list
31
- if (monthInput.includes(',')) {
32
- const parts = monthInput
33
- .split(',')
34
- .map((m) => m.trim())
35
- .filter((m) => m.length > 0);
36
- // Process each part (which may contain wildcards)
37
- const result = [];
38
- for (const part of parts) {
39
- if (part.includes('*')) {
40
- // Expand wildcard pattern
41
- result.push(...parseWildcardPattern(part));
42
- }
43
- else {
44
- // Single month
45
- result.push(part);
46
- }
47
- }
48
- return result;
49
- }
50
- // Handle wildcard pattern (e.g., "2025-*")
51
- if (monthInput.includes('*')) {
52
- return parseWildcardPattern(monthInput);
53
- }
54
- // Single month
55
- return [monthInput];
56
- }
57
- /**
58
- * Parse wildcard pattern like "2025-*" to get all months in that year
59
- */
60
- export function parseWildcardPattern(pattern) {
61
- const months = [];
62
- // Extract year from pattern
63
- const yearMatch = pattern.match(/^(\d{4})-\*$/);
64
- if (!yearMatch) {
65
- throw new Error(`Invalid wildcard pattern: ${pattern}. Use format like "2025-*"`);
66
- }
67
- const year = parseInt(yearMatch[1], 10);
68
- const now = new Date();
69
- const currentYear = now.getFullYear();
70
- const currentMonth = now.getMonth() + 1;
71
- // Generate all months in the year, but only up to current month if it's current year
72
- for (let month = 1; month <= 12; month++) {
73
- if (year === currentYear && month > currentMonth) {
74
- break; // Don't process future months
75
- }
76
- const monthStr = String(month).padStart(2, '0');
77
- months.push(`${year}-${monthStr}`);
78
- }
79
- return months;
80
- }
81
- /**
82
- * Validate month format (YYYY-MM)
83
- */
84
- export function validateMonthFormat(month) {
85
- const monthRegex = /^\d{4}-\d{2}$/;
86
- if (!monthRegex.test(month)) {
87
- return false;
88
- }
89
- const [year, monthNum] = month.split('-');
90
- const yearNum = parseInt(year, 10);
91
- const monthInt = parseInt(monthNum, 10);
92
- return yearNum <= 2100 && monthInt >= 1 && monthInt <= 12;
93
- }
94
- /**
95
- * Sort months chronologically (oldest first)
96
- */
97
- export function sortMonthsChronologically(months) {
98
- return months.sort((a, b) => {
99
- const [yearA, monthA] = a.split('-').map(Number);
100
- const [yearB, monthB] = b.split('-').map(Number);
101
- if (yearA !== yearB) {
102
- return yearA - yearB;
103
- }
104
- return monthA - monthB;
105
- });
106
- }
107
- /**
108
- * Get current month in YYYY-MM format
109
- */
110
- export function getCurrentMonth() {
111
- const now = new Date();
112
- const year = now.getFullYear();
113
- const month = String(now.getMonth() + 1).padStart(2, '0');
114
- return `${year}-${month}`;
115
- }
116
- /**
117
- * Get previous month in YYYY-MM format
118
- */
119
- export function getPreviousMonth() {
120
- const now = new Date();
121
- now.setMonth(now.getMonth() - 1);
122
- const year = now.getFullYear();
123
- const month = String(now.getMonth() + 1).padStart(2, '0');
124
- return `${year}-${month}`;
125
- }
126
- /**
127
- * Check if a month is in the future
128
- */
129
- export function isFutureMonth(month) {
130
- const [year, monthNum] = month.split('-').map(Number);
131
- const now = new Date();
132
- const currentYear = now.getFullYear();
133
- const currentMonth = now.getMonth() + 1;
134
- return year > currentYear || (year === currentYear && monthNum > currentMonth);
135
- }
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=months.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"months.test.d.ts","sourceRoot":"","sources":["../../../../src/utils/months.test.ts"],"names":[],"mappings":""}
@@ -1,209 +0,0 @@
1
- import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
2
- import { generateMonthRange, parseMonthInput, parseWildcardPattern, validateMonthFormat, sortMonthsChronologically, getCurrentMonth, getPreviousMonth, isFutureMonth, } from './months.js';
3
- import { getFolderStructure, removeDuplicateFolders, sortFoldersChronologically, } from 'biorxiv-utils';
4
- describe('Month Utilities', () => {
5
- beforeEach(() => {
6
- // Mock the current date to 2025-01-15 for consistent testing
7
- const mockDate = new Date('2025-01-15T12:00:00Z');
8
- vi.useFakeTimers();
9
- vi.setSystemTime(mockDate);
10
- });
11
- afterEach(() => {
12
- vi.useRealTimers();
13
- });
14
- describe('generateMonthRange', () => {
15
- it('should generate months from current month back to 2018-12', () => {
16
- const months = generateMonthRange();
17
- // Should start from current month (2025-01)
18
- expect(months[0]).toBe('2025-01');
19
- // Should end at 2018-12
20
- expect(months[months.length - 1]).toBe('2018-12');
21
- // Should include 2018-12
22
- expect(months).toContain('2018-12');
23
- // Should not include 2018-11 or earlier
24
- expect(months).not.toContain('2018-11');
25
- expect(months).not.toContain('2017-12');
26
- // Should have correct number of months
27
- // From 2025-01 to 2018-12: (2025-2018+1) * 12 = 8 * 12 = 96 months
28
- // But we stop at 2018-12, so it's actually 74 months
29
- expect(months).toHaveLength(74);
30
- });
31
- it('should generate months in descending order (newest first)', () => {
32
- const months = generateMonthRange();
33
- // First few months should be newest
34
- expect(months[0]).toBe('2025-01');
35
- expect(months[1]).toBe('2024-12');
36
- expect(months[2]).toBe('2024-11');
37
- // Last few months should be oldest
38
- expect(months[months.length - 3]).toBe('2019-02');
39
- expect(months[months.length - 2]).toBe('2019-01');
40
- expect(months[months.length - 1]).toBe('2018-12');
41
- });
42
- });
43
- describe('parseMonthInput', () => {
44
- it('should handle single month', () => {
45
- const result = parseMonthInput('2025-01');
46
- expect(result).toEqual(['2025-01']);
47
- });
48
- it('should handle comma-separated months', () => {
49
- const result = parseMonthInput('2025-01,2024-12,2024-11');
50
- expect(result).toEqual(['2025-01', '2024-12', '2024-11']);
51
- });
52
- it('should handle comma-separated months with spaces', () => {
53
- const result = parseMonthInput(' 2025-01 , 2024-12 , 2024-11 ');
54
- expect(result).toEqual(['2025-01', '2024-12', '2024-11']);
55
- });
56
- it('should handle wildcard pattern', () => {
57
- const result = parseMonthInput('2024-*');
58
- expect(result).toHaveLength(12);
59
- expect(result).toContain('2024-01');
60
- expect(result).toContain('2024-12');
61
- });
62
- it('should filter out empty strings', () => {
63
- const result = parseMonthInput('2025-01,,2024-12,');
64
- expect(result).toEqual(['2025-01', '2024-12']);
65
- });
66
- });
67
- describe('parseWildcardPattern', () => {
68
- it('should parse valid wildcard pattern', () => {
69
- const result = parseWildcardPattern('2024-*');
70
- expect(result).toHaveLength(12);
71
- expect(result).toContain('2024-01');
72
- expect(result).toContain('2024-06');
73
- expect(result).toContain('2024-12');
74
- });
75
- it('should handle current year and stop at current month', () => {
76
- const result = parseWildcardPattern('2025-*');
77
- // Since we're mocking 2025-01, should only include January
78
- expect(result).toHaveLength(1);
79
- expect(result).toEqual(['2025-01']);
80
- });
81
- it('should throw error for invalid wildcard pattern', () => {
82
- expect(() => parseWildcardPattern('2024-')).toThrow('Invalid wildcard pattern');
83
- expect(() => parseWildcardPattern('2024-*x')).toThrow('Invalid wildcard pattern');
84
- expect(() => parseWildcardPattern('*-2024')).toThrow('Invalid wildcard pattern');
85
- });
86
- it('should handle past years correctly', () => {
87
- const result = parseWildcardPattern('2020-*');
88
- expect(result).toHaveLength(12);
89
- expect(result).toContain('2020-01');
90
- expect(result).toContain('2020-12');
91
- });
92
- });
93
- describe('validateMonthFormat', () => {
94
- it('should validate correct month formats', () => {
95
- expect(validateMonthFormat('2025-01')).toBe(true);
96
- expect(validateMonthFormat('2024-12')).toBe(true);
97
- expect(validateMonthFormat('2018-12')).toBe(true);
98
- expect(validateMonthFormat('2020-06')).toBe(true);
99
- });
100
- it('should reject invalid month formats', () => {
101
- expect(validateMonthFormat('2025-1')).toBe(false); // Missing leading zero
102
- expect(validateMonthFormat('2025-13')).toBe(false); // Invalid month
103
- expect(validateMonthFormat('2025-00')).toBe(false); // Invalid month
104
- expect(validateMonthFormat('2025-1a')).toBe(false); // Non-numeric
105
- expect(validateMonthFormat('2025')).toBe(false); // Missing month
106
- expect(validateMonthFormat('2025-01-01')).toBe(false); // Too many parts
107
- });
108
- it('should reject months outside valid year range', () => {
109
- expect(validateMonthFormat('2101-01')).toBe(false); // After 2100
110
- });
111
- it('should accept boundary years', () => {
112
- expect(validateMonthFormat('2018-01')).toBe(true);
113
- expect(validateMonthFormat('2018-12')).toBe(true);
114
- expect(validateMonthFormat('2100-01')).toBe(true);
115
- expect(validateMonthFormat('2100-12')).toBe(true);
116
- });
117
- });
118
- describe('sortMonthsChronologically', () => {
119
- it('should sort months in chronological order (oldest first)', () => {
120
- const months = ['2025-01', '2024-12', '2024-01', '2025-02'];
121
- const sorted = sortMonthsChronologically(months);
122
- expect(sorted).toEqual(['2024-01', '2024-12', '2025-01', '2025-02']);
123
- });
124
- it('should handle months within same year', () => {
125
- const months = ['2024-12', '2024-01', '2024-06'];
126
- const sorted = sortMonthsChronologically(months);
127
- expect(sorted).toEqual(['2024-01', '2024-06', '2024-12']);
128
- });
129
- it('should handle single month', () => {
130
- const months = ['2024-06'];
131
- const sorted = sortMonthsChronologically(months);
132
- expect(sorted).toEqual(['2024-06']);
133
- });
134
- it('should handle empty array', () => {
135
- const months = [];
136
- const sorted = sortMonthsChronologically(months);
137
- expect(sorted).toEqual([]);
138
- });
139
- });
140
- describe('getCurrentMonth', () => {
141
- it('should return current month in YYYY-MM format', () => {
142
- const currentMonth = getCurrentMonth();
143
- expect(currentMonth).toBe('2025-01');
144
- });
145
- });
146
- describe('getPreviousMonth', () => {
147
- it('should return previous month in YYYY-MM format', () => {
148
- const previousMonth = getPreviousMonth();
149
- expect(previousMonth).toBe('2024-12');
150
- });
151
- it('should handle year boundary correctly', () => {
152
- // Mock to January 2024 to test year boundary
153
- vi.setSystemTime(new Date('2024-01-15T12:00:00Z'));
154
- const previousMonth = getPreviousMonth();
155
- expect(previousMonth).toBe('2023-12');
156
- });
157
- });
158
- describe('isFutureMonth', () => {
159
- it('should identify future months correctly', () => {
160
- expect(isFutureMonth('2025-02')).toBe(true);
161
- expect(isFutureMonth('2025-12')).toBe(true);
162
- expect(isFutureMonth('2026-01')).toBe(true);
163
- });
164
- it('should identify current and past months correctly', () => {
165
- expect(isFutureMonth('2025-01')).toBe(false); // Current month
166
- expect(isFutureMonth('2024-12')).toBe(false); // Past month
167
- expect(isFutureMonth('2024-01')).toBe(false); // Past month
168
- expect(isFutureMonth('2018-12')).toBe(false); // Past month
169
- });
170
- it('should handle year boundaries correctly', () => {
171
- expect(isFutureMonth('2024-12')).toBe(false); // Past year
172
- expect(isFutureMonth('2026-01')).toBe(true); // Future year
173
- });
174
- });
175
- describe('Integration tests', () => {
176
- it('should handle complete workflow: parse, validate, sort, deduplicate', () => {
177
- const input = '2025-01,2024-12,2025-01,2024-11,2024-12';
178
- // Parse input
179
- const parsed = parseMonthInput(input);
180
- expect(parsed).toEqual(['2025-01', '2024-12', '2025-01', '2024-11', '2024-12']);
181
- // Validate all months
182
- const valid = parsed.filter(validateMonthFormat);
183
- expect(valid).toEqual(['2025-01', '2024-12', '2025-01', '2024-11', '2024-12']);
184
- // Remove duplicates
185
- const folders = valid.map((month) => getFolderStructure({ month }));
186
- const unique = removeDuplicateFolders(folders);
187
- expect(unique.length).toEqual(3);
188
- // Sort chronologically
189
- const sorted = sortFoldersChronologically(unique);
190
- expect(sorted.length).toEqual(3);
191
- expect(sorted[0].batch).toEqual('November_2024');
192
- expect(sorted[1].batch).toEqual('December_2024');
193
- expect(sorted[2].batch).toEqual('January_2025');
194
- });
195
- it('should handle wildcard with validation and sorting', () => {
196
- const input = '2024-*';
197
- // Parse wildcard
198
- const parsed = parseMonthInput(input);
199
- expect(parsed).toHaveLength(12);
200
- // Validate all months
201
- const valid = parsed.filter(validateMonthFormat);
202
- expect(valid).toHaveLength(12);
203
- // Sort chronologically
204
- const sorted = sortMonthsChronologically(valid);
205
- expect(sorted[0]).toBe('2024-01');
206
- expect(sorted[11]).toBe('2024-12');
207
- });
208
- });
209
- });
@@ -1,6 +0,0 @@
1
- /**
2
- * Utility function to display the requester-pays error message
3
- * Used when S3 operations fail due to requester-pays bucket requirements
4
- */
5
- export declare function displayRequesterPaysError(): void;
6
- //# sourceMappingURL=requester-pays-error.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"requester-pays-error.d.ts","sourceRoot":"","sources":["../../../../src/utils/requester-pays-error.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,wBAAgB,yBAAyB,IAAI,IAAI,CAehD"}
@@ -1,20 +0,0 @@
1
- /**
2
- * Utility function to display the requester-pays error message
3
- * Used when S3 operations fail due to requester-pays bucket requirements
4
- */
5
- export function displayRequesterPaysError() {
6
- console.error(`
7
- ❌ Operation failed: S3 bucket requires requester-pays
8
- 💡 This bucket has requester-pays enabled, which means:
9
- • You need to pay for data transfer costs
10
- • Your AWS credentials must be configured
11
- • The bucket policy must allow your account
12
-
13
- 🔧 To fix this:
14
- 1. Ensure your AWS credentials are configured
15
- 2. Verify you have permission to access the bucket
16
- 3. Add the --requester-pays flag to your command
17
-
18
- 📚 For more help, see: https://docs.aws.amazon.com/AmazonS3/latest/userguide/RequesterPaysBuckets.html
19
- `);
20
- }
@@ -1,3 +0,0 @@
1
- declare const version = "0.0.0";
2
- export default version;
3
- //# sourceMappingURL=version.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"version.d.ts","sourceRoot":"","sources":["../../../src/version.ts"],"names":[],"mappings":"AAAA,QAAA,MAAM,OAAO,UAAU,CAAC;AACxB,eAAe,OAAO,CAAC"}
@@ -1,2 +0,0 @@
1
- const version = '0.0.0';
2
- export default version;
@@ -1,51 +0,0 @@
1
- /**
2
- * Utility functions for parsing bioRxiv URLs and DOIs
3
- */
4
- export interface ParsedBiorxivURL {
5
- doi: string;
6
- baseDOI: string;
7
- version: string | null;
8
- fullURL: string;
9
- isValid: boolean;
10
- }
11
- export interface DOIParts {
12
- doi: string;
13
- prefix: string;
14
- suffix: string;
15
- date: string | null;
16
- identifier: string;
17
- version: string | null;
18
- }
19
- /**
20
- * Extract DOI from a bioRxiv URL
21
- */
22
- export declare function extractDOIFromURL(url: string): string | null;
23
- /**
24
- * Parse a bioRxiv DOI into its components
25
- * Supports both legacy numeric format (2019 and earlier) and current date-based format (2019+)
26
- */
27
- export declare function parseDOI(doi: string): DOIParts | null;
28
- /**
29
- * Extract base DOI (without version)
30
- * Works with both legacy numeric and current date-based formats
31
- */
32
- export declare function extractBaseDOI(doi: string): string;
33
- /**
34
- * Extract version from DOI
35
- * Works with both legacy numeric and current date-based formats
36
- */
37
- export declare function extractVersion(doi: string): string | null;
38
- /**
39
- * Check if a DOI is a valid bioRxiv DOI
40
- * Supports both legacy numeric and current date-based formats
41
- */
42
- export declare function isValidBiorxivDOI(doi: string): boolean;
43
- /**
44
- * Check if a URL is a valid bioRxiv URL
45
- */
46
- export declare function isValidBiorxivURL(url: string): boolean;
47
- /**
48
- * Parse a bioRxiv URL and extract all relevant information
49
- */
50
- export declare function parseBiorxivURL(url: string): ParsedBiorxivURL | null;
51
- //# sourceMappingURL=biorxiv-parser.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"biorxiv-parser.d.ts","sourceRoot":"","sources":["../../../../utils/src/biorxiv-parser.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,gBAAgB;IAC/B,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,QAAQ;IACvB,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;CACxB;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAoC5D;AAED;;;GAGG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,QAAQ,GAAG,IAAI,CAsCrD;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAGlD;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAGzD;AAED;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAEtD;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAGtD;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,gBAAgB,GAAG,IAAI,CAiBpE"}
@@ -1,126 +0,0 @@
1
- /**
2
- * Utility functions for parsing bioRxiv URLs and DOIs
3
- */
4
- /**
5
- * Extract DOI from a bioRxiv URL
6
- */
7
- export function extractDOIFromURL(url) {
8
- // Handle various bioRxiv URL formats
9
- let doi = null;
10
- // Check for bioRxiv content URLs
11
- if (url.includes('biorxiv.org/content/')) {
12
- const match = url.match(/biorxiv\.org\/content\/([^?#]+)/);
13
- if (match && match[1]) {
14
- doi = match[1];
15
- }
16
- }
17
- // Check for medRxiv content URLs
18
- else if (url.includes('medrxiv.org/content/')) {
19
- const match = url.match(/medrxiv\.org\/content\/([^?#]+)/);
20
- if (match && match[1]) {
21
- doi = match[1];
22
- }
23
- }
24
- // Check for doi.org redirects
25
- else if (url.includes('doi.org/')) {
26
- const match = url.match(/doi\.org\/([^?#]+)/);
27
- if (match && match[1]) {
28
- doi = match[1];
29
- }
30
- }
31
- // Check for direct DOI input
32
- else if (url.startsWith('10.1101/')) {
33
- doi = url;
34
- }
35
- if (doi) {
36
- // Clean up the extracted DOI (remove any trailing extensions)
37
- return doi.replace(/\.(article-info|full|abstract|pdf|suppl)$/, '');
38
- }
39
- return null;
40
- }
41
- /**
42
- * Parse a bioRxiv DOI into its components
43
- * Supports both legacy numeric format (2019 and earlier) and current date-based format (2019+)
44
- */
45
- export function parseDOI(doi) {
46
- // Handle current date-based format (2019+): 10.1101/YYYY.MM.DD.XXXXXXvN
47
- const currentPattern = /^10\.1101\/(\d{4})\.(\d{2})\.(\d{2})\.(\d{6,8})(v\d+)?$/;
48
- const currentMatch = doi.match(currentPattern);
49
- if (currentMatch) {
50
- const [prefix, suffix] = doi.split('/');
51
- const [, year, month, day, identifier, version] = currentMatch;
52
- const date = `${year}-${month}-${day}`;
53
- return {
54
- doi,
55
- prefix,
56
- suffix: suffix.replace(/(v\d+)$/, ''),
57
- date,
58
- identifier,
59
- version: version || null,
60
- };
61
- }
62
- // Handle legacy numeric format (2019 and earlier): 10.1101/XXXXXX
63
- const legacyPattern = /^10\.1101\/(\d{6,8})(v\d+)?$/;
64
- const legacyMatch = doi.match(legacyPattern);
65
- if (legacyMatch) {
66
- const [prefix, suffix] = doi.split('/');
67
- const [, identifier, version] = legacyMatch;
68
- return {
69
- doi,
70
- prefix,
71
- suffix: suffix.replace(/(v\d+)$/, ''),
72
- date: null,
73
- identifier,
74
- version: version || null,
75
- };
76
- }
77
- return null;
78
- }
79
- /**
80
- * Extract base DOI (without version)
81
- * Works with both legacy numeric and current date-based formats
82
- */
83
- export function extractBaseDOI(doi) {
84
- // Remove version suffix if present
85
- return doi.replace(/v\d+$/, '');
86
- }
87
- /**
88
- * Extract version from DOI
89
- * Works with both legacy numeric and current date-based formats
90
- */
91
- export function extractVersion(doi) {
92
- const match = doi.match(/v(\d+)$/);
93
- return match ? match[1] : null;
94
- }
95
- /**
96
- * Check if a DOI is a valid bioRxiv DOI
97
- * Supports both legacy numeric and current date-based formats
98
- */
99
- export function isValidBiorxivDOI(doi) {
100
- return parseDOI(doi) !== null;
101
- }
102
- /**
103
- * Check if a URL is a valid bioRxiv URL
104
- */
105
- export function isValidBiorxivURL(url) {
106
- const doi = extractDOIFromURL(url);
107
- return doi !== null && isValidBiorxivDOI(doi);
108
- }
109
- /**
110
- * Parse a bioRxiv URL and extract all relevant information
111
- */
112
- export function parseBiorxivURL(url) {
113
- const doi = extractDOIFromURL(url);
114
- if (!doi || !isValidBiorxivDOI(doi)) {
115
- return null;
116
- }
117
- const baseDOI = extractBaseDOI(doi);
118
- const version = extractVersion(doi);
119
- return {
120
- doi,
121
- baseDOI,
122
- version,
123
- fullURL: url,
124
- isValid: true,
125
- };
126
- }
@@ -1,44 +0,0 @@
1
- /**
2
- * Utility functions for determining bioRxiv folder structure
3
- * based on the date requested.
4
- *
5
- * The bioRxiv structure is:
6
- * - Before late 2018: Files are in Back_Content/Batch_[nn]/ folders
7
- * - After late 2018: Files are in Current_Content/[Month]_[Year]/ folders
8
- */
9
- export interface FolderStructure {
10
- server: 'biorxiv' | 'medrxiv';
11
- type: 'current' | 'back';
12
- prefix: string;
13
- batch: string;
14
- }
15
- export interface FolderStructureOptions {
16
- server?: 'biorxiv' | 'medrxiv';
17
- month?: string;
18
- batch?: string;
19
- }
20
- /**
21
- * Normalizes batch input to the standard "Batch_XX" format
22
- * @param batch - Batch input in various formats (e.g., "1", "batch-1", "Batch_01", "batch_01")
23
- * @param server - Server type to determine batch format (e.g., "biorxiv", "medrxiv")
24
- * @returns Normalized batch string in appropriate format
25
- */
26
- export declare function normalizeBatch(batch: string | number, server?: string): string;
27
- /**
28
- * Determines the folder structure for a given month or batch
29
- * @param options - Options containing month or batch
30
- * @returns FolderStructure with the appropriate prefix and type
31
- */
32
- export declare function getFolderStructure(options: FolderStructureOptions): FolderStructure;
33
- export declare function removeDuplicateFolders(folders: FolderStructure[]): FolderStructure[];
34
- /**
35
- * Sort folders chronologically, putting batches before months
36
- */
37
- export declare function sortFoldersChronologically(folders: FolderStructure[]): FolderStructure[];
38
- /**
39
- * Normalizes various month formats to YYYY-MM
40
- * @param month - Month in various formats
41
- * @returns Normalized YYYY-MM format or null if invalid
42
- */
43
- export declare function normalizeMonthToYYYYMM(month: string): string | null;
44
- //# sourceMappingURL=folder-structure.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"folder-structure.d.ts","sourceRoot":"","sources":["../../../../utils/src/folder-structure.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,SAAS,GAAG,SAAS,CAAC;IAC9B,IAAI,EAAE,SAAS,GAAG,MAAM,CAAC;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,sBAAsB;IACrC,MAAM,CAAC,EAAE,SAAS,GAAG,SAAS,CAAC;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;;;;GAKG;AACH,wBAAgB,cAAc,CAC5B,KAAK,EAAE,MAAM,GAAG,MAAM,EACtB,MAAM,GAAE,MAA2B,GAClC,MAAM,CAsCR;AAED;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,sBAAsB,GAAG,eAAe,CAuDnF;AAED,wBAAgB,sBAAsB,CAAC,OAAO,EAAE,eAAe,EAAE,GAAG,eAAe,EAAE,CAWpF;AAED;;GAEG;AACH,wBAAgB,0BAA0B,CAAC,OAAO,EAAE,eAAe,EAAE,GAAG,eAAe,EAAE,CAsBxF;AAED;;;;GAIG;AACH,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAuBnE"}