openrxiv 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/dist/cli/src/api/api-client.d.ts +96 -0
  2. package/dist/cli/src/api/api-client.d.ts.map +1 -0
  3. package/dist/cli/src/api/api-client.js +257 -0
  4. package/dist/cli/src/aws/bucket-explorer.d.ts +26 -0
  5. package/dist/cli/src/aws/bucket-explorer.d.ts.map +1 -0
  6. package/dist/cli/src/aws/bucket-explorer.js +220 -0
  7. package/dist/cli/src/aws/config.d.ts +18 -0
  8. package/dist/cli/src/aws/config.d.ts.map +1 -0
  9. package/dist/cli/src/aws/config.js +191 -0
  10. package/dist/cli/src/aws/downloader.d.ts +13 -0
  11. package/dist/cli/src/aws/downloader.d.ts.map +1 -0
  12. package/dist/cli/src/aws/downloader.js +115 -0
  13. package/dist/cli/src/aws/month-lister.d.ts +18 -0
  14. package/dist/cli/src/aws/month-lister.d.ts.map +1 -0
  15. package/dist/cli/src/aws/month-lister.js +90 -0
  16. package/dist/cli/src/commands/batch-process.d.ts +3 -0
  17. package/dist/cli/src/commands/batch-process.d.ts.map +1 -0
  18. package/dist/cli/src/commands/batch-process.js +557 -0
  19. package/dist/cli/src/commands/config.d.ts +3 -0
  20. package/dist/cli/src/commands/config.d.ts.map +1 -0
  21. package/dist/cli/src/commands/config.js +42 -0
  22. package/dist/cli/src/commands/download.d.ts +3 -0
  23. package/dist/cli/src/commands/download.d.ts.map +1 -0
  24. package/dist/cli/src/commands/download.js +76 -0
  25. package/dist/cli/src/commands/list.d.ts +3 -0
  26. package/dist/cli/src/commands/list.d.ts.map +1 -0
  27. package/dist/cli/src/commands/list.js +18 -0
  28. package/dist/cli/src/commands/month-info.d.ts +3 -0
  29. package/dist/cli/src/commands/month-info.d.ts.map +1 -0
  30. package/dist/cli/src/commands/month-info.js +213 -0
  31. package/dist/cli/src/commands/summary.d.ts +3 -0
  32. package/dist/cli/src/commands/summary.d.ts.map +1 -0
  33. package/dist/cli/src/commands/summary.js +249 -0
  34. package/dist/cli/src/index.d.ts +3 -0
  35. package/dist/cli/src/index.d.ts.map +1 -0
  36. package/dist/cli/src/index.js +35 -0
  37. package/dist/cli/src/utils/batches.d.ts +9 -0
  38. package/dist/cli/src/utils/batches.d.ts.map +1 -0
  39. package/dist/cli/src/utils/batches.js +61 -0
  40. package/dist/cli/src/utils/batches.test.d.ts +2 -0
  41. package/dist/cli/src/utils/batches.test.d.ts.map +1 -0
  42. package/dist/cli/src/utils/batches.test.js +119 -0
  43. package/dist/cli/src/utils/default-server.d.ts +3 -0
  44. package/dist/cli/src/utils/default-server.d.ts.map +1 -0
  45. package/dist/cli/src/utils/default-server.js +20 -0
  46. package/dist/cli/src/utils/index.d.ts +5 -0
  47. package/dist/cli/src/utils/index.d.ts.map +1 -0
  48. package/dist/cli/src/utils/index.js +5 -0
  49. package/dist/cli/src/utils/meca-processor.d.ts +28 -0
  50. package/dist/cli/src/utils/meca-processor.d.ts.map +1 -0
  51. package/dist/cli/src/utils/meca-processor.js +503 -0
  52. package/dist/cli/src/utils/meca-processor.test.d.ts +2 -0
  53. package/dist/cli/src/utils/meca-processor.test.d.ts.map +1 -0
  54. package/dist/cli/src/utils/meca-processor.test.js +123 -0
  55. package/dist/cli/src/utils/months.d.ts +36 -0
  56. package/dist/cli/src/utils/months.d.ts.map +1 -0
  57. package/dist/cli/src/utils/months.js +135 -0
  58. package/dist/cli/src/utils/months.test.d.ts +2 -0
  59. package/dist/cli/src/utils/months.test.d.ts.map +1 -0
  60. package/dist/cli/src/utils/months.test.js +209 -0
  61. package/dist/cli/src/utils/requester-pays-error.d.ts +6 -0
  62. package/dist/cli/src/utils/requester-pays-error.d.ts.map +1 -0
  63. package/dist/cli/src/utils/requester-pays-error.js +20 -0
  64. package/dist/cli/src/version.d.ts +3 -0
  65. package/dist/cli/src/version.d.ts.map +1 -0
  66. package/dist/cli/src/version.js +2 -0
  67. package/dist/cli.cjs +98815 -0
  68. package/dist/utils/src/biorxiv-parser.d.ts +51 -0
  69. package/dist/utils/src/biorxiv-parser.d.ts.map +1 -0
  70. package/dist/utils/src/biorxiv-parser.js +126 -0
  71. package/dist/utils/src/folder-structure.d.ts +44 -0
  72. package/dist/utils/src/folder-structure.d.ts.map +1 -0
  73. package/dist/utils/src/folder-structure.js +207 -0
  74. package/dist/utils/src/index.d.ts +3 -0
  75. package/dist/utils/src/index.d.ts.map +1 -0
  76. package/dist/utils/src/index.js +3 -0
  77. package/package.json +76 -0
@@ -0,0 +1,51 @@
1
+ /**
2
+ * Utility functions for parsing bioRxiv URLs and DOIs
3
+ */
4
+ export interface ParsedBiorxivURL {
5
+ doi: string;
6
+ baseDOI: string;
7
+ version: string | null;
8
+ fullURL: string;
9
+ isValid: boolean;
10
+ }
11
+ export interface DOIParts {
12
+ doi: string;
13
+ prefix: string;
14
+ suffix: string;
15
+ date: string | null;
16
+ identifier: string;
17
+ version: string | null;
18
+ }
19
+ /**
20
+ * Extract DOI from a bioRxiv URL
21
+ */
22
+ export declare function extractDOIFromURL(url: string): string | null;
23
+ /**
24
+ * Parse a bioRxiv DOI into its components
25
+ * Supports both legacy numeric format (2019 and earlier) and current date-based format (2019+)
26
+ */
27
+ export declare function parseDOI(doi: string): DOIParts | null;
28
+ /**
29
+ * Extract base DOI (without version)
30
+ * Works with both legacy numeric and current date-based formats
31
+ */
32
+ export declare function extractBaseDOI(doi: string): string;
33
+ /**
34
+ * Extract version from DOI
35
+ * Works with both legacy numeric and current date-based formats
36
+ */
37
+ export declare function extractVersion(doi: string): string | null;
38
+ /**
39
+ * Check if a DOI is a valid bioRxiv DOI
40
+ * Supports both legacy numeric and current date-based formats
41
+ */
42
+ export declare function isValidBiorxivDOI(doi: string): boolean;
43
+ /**
44
+ * Check if a URL is a valid bioRxiv URL
45
+ */
46
+ export declare function isValidBiorxivURL(url: string): boolean;
47
+ /**
48
+ * Parse a bioRxiv URL and extract all relevant information
49
+ */
50
+ export declare function parseBiorxivURL(url: string): ParsedBiorxivURL | null;
51
+ //# sourceMappingURL=biorxiv-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"biorxiv-parser.d.ts","sourceRoot":"","sources":["../../../../utils/src/biorxiv-parser.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,gBAAgB;IAC/B,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,QAAQ;IACvB,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;CACxB;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAoC5D;AAED;;;GAGG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,QAAQ,GAAG,IAAI,CAsCrD;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAGlD;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAGzD;AAED;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAEtD;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAGtD;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,gBAAgB,GAAG,IAAI,CAiBpE"}
@@ -0,0 +1,126 @@
1
+ /**
2
+ * Utility functions for parsing bioRxiv URLs and DOIs
3
+ */
4
+ /**
5
+ * Extract DOI from a bioRxiv URL
6
+ */
7
+ export function extractDOIFromURL(url) {
8
+ // Handle various bioRxiv URL formats
9
+ let doi = null;
10
+ // Check for bioRxiv content URLs
11
+ if (url.includes('biorxiv.org/content/')) {
12
+ const match = url.match(/biorxiv\.org\/content\/([^?#]+)/);
13
+ if (match && match[1]) {
14
+ doi = match[1];
15
+ }
16
+ }
17
+ // Check for medRxiv content URLs
18
+ else if (url.includes('medrxiv.org/content/')) {
19
+ const match = url.match(/medrxiv\.org\/content\/([^?#]+)/);
20
+ if (match && match[1]) {
21
+ doi = match[1];
22
+ }
23
+ }
24
+ // Check for doi.org redirects
25
+ else if (url.includes('doi.org/')) {
26
+ const match = url.match(/doi\.org\/([^?#]+)/);
27
+ if (match && match[1]) {
28
+ doi = match[1];
29
+ }
30
+ }
31
+ // Check for direct DOI input
32
+ else if (url.startsWith('10.1101/')) {
33
+ doi = url;
34
+ }
35
+ if (doi) {
36
+ // Clean up the extracted DOI (remove any trailing extensions)
37
+ return doi.replace(/\.(article-info|full|abstract|pdf|suppl)$/, '');
38
+ }
39
+ return null;
40
+ }
41
+ /**
42
+ * Parse a bioRxiv DOI into its components
43
+ * Supports both legacy numeric format (2019 and earlier) and current date-based format (2019+)
44
+ */
45
+ export function parseDOI(doi) {
46
+ // Handle current date-based format (2019+): 10.1101/YYYY.MM.DD.XXXXXXvN
47
+ const currentPattern = /^10\.1101\/(\d{4})\.(\d{2})\.(\d{2})\.(\d{6,8})(v\d+)?$/;
48
+ const currentMatch = doi.match(currentPattern);
49
+ if (currentMatch) {
50
+ const [prefix, suffix] = doi.split('/');
51
+ const [, year, month, day, identifier, version] = currentMatch;
52
+ const date = `${year}-${month}-${day}`;
53
+ return {
54
+ doi,
55
+ prefix,
56
+ suffix: suffix.replace(/(v\d+)$/, ''),
57
+ date,
58
+ identifier,
59
+ version: version || null,
60
+ };
61
+ }
62
+ // Handle legacy numeric format (2019 and earlier): 10.1101/XXXXXX
63
+ const legacyPattern = /^10\.1101\/(\d{6,8})(v\d+)?$/;
64
+ const legacyMatch = doi.match(legacyPattern);
65
+ if (legacyMatch) {
66
+ const [prefix, suffix] = doi.split('/');
67
+ const [, identifier, version] = legacyMatch;
68
+ return {
69
+ doi,
70
+ prefix,
71
+ suffix: suffix.replace(/(v\d+)$/, ''),
72
+ date: null,
73
+ identifier,
74
+ version: version || null,
75
+ };
76
+ }
77
+ return null;
78
+ }
79
+ /**
80
+ * Extract base DOI (without version)
81
+ * Works with both legacy numeric and current date-based formats
82
+ */
83
+ export function extractBaseDOI(doi) {
84
+ // Remove version suffix if present
85
+ return doi.replace(/v\d+$/, '');
86
+ }
87
+ /**
88
+ * Extract version from DOI
89
+ * Works with both legacy numeric and current date-based formats
90
+ */
91
+ export function extractVersion(doi) {
92
+ const match = doi.match(/v(\d+)$/);
93
+ return match ? match[1] : null;
94
+ }
95
+ /**
96
+ * Check if a DOI is a valid bioRxiv DOI
97
+ * Supports both legacy numeric and current date-based formats
98
+ */
99
+ export function isValidBiorxivDOI(doi) {
100
+ return parseDOI(doi) !== null;
101
+ }
102
+ /**
103
+ * Check if a URL is a valid bioRxiv URL
104
+ */
105
+ export function isValidBiorxivURL(url) {
106
+ const doi = extractDOIFromURL(url);
107
+ return doi !== null && isValidBiorxivDOI(doi);
108
+ }
109
+ /**
110
+ * Parse a bioRxiv URL and extract all relevant information
111
+ */
112
+ export function parseBiorxivURL(url) {
113
+ const doi = extractDOIFromURL(url);
114
+ if (!doi || !isValidBiorxivDOI(doi)) {
115
+ return null;
116
+ }
117
+ const baseDOI = extractBaseDOI(doi);
118
+ const version = extractVersion(doi);
119
+ return {
120
+ doi,
121
+ baseDOI,
122
+ version,
123
+ fullURL: url,
124
+ isValid: true,
125
+ };
126
+ }
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Utility functions for determining bioRxiv folder structure
3
+ * based on the date requested.
4
+ *
5
+ * The bioRxiv structure is:
6
+ * - Before late 2018: Files are in Back_Content/Batch_[nn]/ folders
7
+ * - After late 2018: Files are in Current_Content/[Month]_[Year]/ folders
8
+ */
9
+ export interface FolderStructure {
10
+ server: 'biorxiv' | 'medrxiv';
11
+ type: 'current' | 'back';
12
+ prefix: string;
13
+ batch: string;
14
+ }
15
+ export interface FolderStructureOptions {
16
+ server?: 'biorxiv' | 'medrxiv';
17
+ month?: string;
18
+ batch?: string;
19
+ }
20
+ /**
21
+ * Normalizes batch input to the standard "Batch_XX" format
22
+ * @param batch - Batch input in various formats (e.g., "1", "batch-1", "Batch_01", "batch_01")
23
+ * @param server - Server type to determine batch format (e.g., "biorxiv", "medrxiv")
24
+ * @returns Normalized batch string in appropriate format
25
+ */
26
+ export declare function normalizeBatch(batch: string | number, server?: string): string;
27
+ /**
28
+ * Determines the folder structure for a given month or batch
29
+ * @param options - Options containing month or batch
30
+ * @returns FolderStructure with the appropriate prefix and type
31
+ */
32
+ export declare function getFolderStructure(options: FolderStructureOptions): FolderStructure;
33
+ export declare function removeDuplicateFolders(folders: FolderStructure[]): FolderStructure[];
34
+ /**
35
+ * Sort folders chronologically, putting batches before months
36
+ */
37
+ export declare function sortFoldersChronologically(folders: FolderStructure[]): FolderStructure[];
38
+ /**
39
+ * Normalizes various month formats to YYYY-MM
40
+ * @param month - Month in various formats
41
+ * @returns Normalized YYYY-MM format or null if invalid
42
+ */
43
+ export declare function normalizeMonthToYYYYMM(month: string): string | null;
44
+ //# sourceMappingURL=folder-structure.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"folder-structure.d.ts","sourceRoot":"","sources":["../../../../utils/src/folder-structure.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,SAAS,GAAG,SAAS,CAAC;IAC9B,IAAI,EAAE,SAAS,GAAG,MAAM,CAAC;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,sBAAsB;IACrC,MAAM,CAAC,EAAE,SAAS,GAAG,SAAS,CAAC;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;;;;GAKG;AACH,wBAAgB,cAAc,CAC5B,KAAK,EAAE,MAAM,GAAG,MAAM,EACtB,MAAM,GAAE,MAA2B,GAClC,MAAM,CAsCR;AAED;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,sBAAsB,GAAG,eAAe,CAuDnF;AAED,wBAAgB,sBAAsB,CAAC,OAAO,EAAE,eAAe,EAAE,GAAG,eAAe,EAAE,CAWpF;AAED;;GAEG;AACH,wBAAgB,0BAA0B,CAAC,OAAO,EAAE,eAAe,EAAE,GAAG,eAAe,EAAE,CAsBxF;AAED;;;;GAIG;AACH,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAuBnE"}
@@ -0,0 +1,207 @@
1
+ /**
2
+ * Utility functions for determining bioRxiv folder structure
3
+ * based on the date requested.
4
+ *
5
+ * The bioRxiv structure is:
6
+ * - Before late 2018: Files are in Back_Content/Batch_[nn]/ folders
7
+ * - After late 2018: Files are in Current_Content/[Month]_[Year]/ folders
8
+ */
9
+ /**
10
+ * Normalizes batch input to the standard "Batch_XX" format
11
+ * @param batch - Batch input in various formats (e.g., "1", "batch-1", "Batch_01", "batch_01")
12
+ * @param server - Server type to determine batch format (e.g., "biorxiv", "medrxiv")
13
+ * @returns Normalized batch string in appropriate format
14
+ */
15
+ export function normalizeBatch(batch, server = getDefaultServer()) {
16
+ if (typeof batch === 'number') {
17
+ if (batch < 1) {
18
+ throw new Error(`Invalid batch format: ${batch}. Expected a positive number or batch identifier.`);
19
+ }
20
+ const batchNum = batch.toString().padStart(2, '0');
21
+ return server.toLowerCase() === 'medrxiv' ? `medRxiv_Batch_${batchNum}` : `Batch_${batchNum}`;
22
+ }
23
+ // Remove common prefixes and normalize
24
+ const normalized = batch
25
+ .toLowerCase()
26
+ .replace(/^batch[-_]?/i, '') // Remove "batch", "batch-", "batch_"
27
+ .replace(/^medrxiv[-_]?batch[-_]?/i, '') // Remove "medrxiv_batch", "medrxiv-batch", etc.
28
+ .replace(/^0+/, '') // Remove leading zeros
29
+ .trim();
30
+ const matchInt = normalized.match(/^\d+$/);
31
+ if (!matchInt) {
32
+ throw new Error(`Invalid batch format: ${batch}. Expected a positive number or batch identifier.`);
33
+ }
34
+ // Parse the number and format it
35
+ const batchNum = parseInt(normalized, 10);
36
+ if (isNaN(batchNum) || batchNum < 1) {
37
+ throw new Error(`Invalid batch format: ${batch}. Expected a positive number or batch identifier.`);
38
+ }
39
+ const formattedBatchNum = batchNum.toString().padStart(2, '0');
40
+ return server.toLowerCase() === 'medrxiv'
41
+ ? `medRxiv_Batch_${formattedBatchNum}`
42
+ : `Batch_${formattedBatchNum}`;
43
+ }
44
+ /**
45
+ * Determines the folder structure for a given month or batch
46
+ * @param options - Options containing month or batch
47
+ * @returns FolderStructure with the appropriate prefix and type
48
+ */
49
+ export function getFolderStructure(options) {
50
+ if (options.month && options.batch) {
51
+ throw new Error('Either month or batch must be specified, not both');
52
+ }
53
+ if (!options.month && !options.batch) {
54
+ throw new Error('Either month or batch must be specified');
55
+ }
56
+ if (options.batch) {
57
+ // If batch is specified, use Back_Content structure
58
+ const normalizedBatch = normalizeBatch(options.batch, options.server);
59
+ return {
60
+ server: options.server || 'biorxiv',
61
+ type: 'back',
62
+ prefix: `Back_Content/${normalizedBatch}/`,
63
+ batch: normalizedBatch,
64
+ };
65
+ }
66
+ if (options.month) {
67
+ // Normalize month format to YYYY-MM
68
+ const normalizedMonth = normalizeMonthToYYYYMM(options.month);
69
+ if (!normalizedMonth) {
70
+ throw new Error(`Invalid month format: ${options.month}. Expected YYYY-MM or Month_YYYY format.`);
71
+ }
72
+ const [year, monthNum] = normalizedMonth.split('-').map(Number);
73
+ // bioRxiv switched from Back_Content to Current_Content in late 2018
74
+ // We'll use December 2018 as the cutoff point to be safe
75
+ const cutoffDate = new Date(2018, 11, 1); // December 1, 2018 (0-indexed month)
76
+ const requestedDate = new Date(year, monthNum - 1, 1);
77
+ if (requestedDate < cutoffDate) {
78
+ // Use Back_Content structure - but we don't know which batch
79
+ // User should specify batch explicitly for pre-2019 content
80
+ throw new Error(`Date ${options.month} is in the Back_Content period. Please specify a batch using --batch option. ` +
81
+ `Available batches can be listed with 'biorxiv list' command.`);
82
+ }
83
+ else {
84
+ // Use Current_Content structure
85
+ const monthName = getMonthName(monthNum);
86
+ return {
87
+ server: options.server || 'biorxiv',
88
+ type: 'current',
89
+ prefix: `Current_Content/${monthName}_${year}/`,
90
+ batch: `${monthName}_${year}`,
91
+ };
92
+ }
93
+ }
94
+ throw new Error('Invalid folder structure options');
95
+ }
96
+ export function removeDuplicateFolders(folders) {
97
+ return folders.filter((folder, index, arr) => arr.findIndex((f) => f.batch === folder.batch &&
98
+ f.server === folder.server &&
99
+ f.type === folder.type &&
100
+ f.prefix === folder.prefix) === index);
101
+ }
102
+ /**
103
+ * Sort folders chronologically, putting batches before months
104
+ */
105
+ export function sortFoldersChronologically(folders) {
106
+ return folders.sort((a, b) => {
107
+ // Put batches before months
108
+ if (a.type === 'back' && b.type === 'current')
109
+ return -1;
110
+ if (a.type === 'current' && b.type === 'back')
111
+ return 1;
112
+ // For batches, sort by batch number
113
+ if (a.type === 'back' && b.type === 'back') {
114
+ const aNum = parseInt(a.batch.replace(/\D/g, ''));
115
+ const bNum = parseInt(b.batch.replace(/\D/g, ''));
116
+ return aNum - bNum;
117
+ }
118
+ // For months, sort chronologically (newest first)
119
+ if (a.type === 'current' && b.type === 'current') {
120
+ const aDate = new Date(a.batch);
121
+ const bDate = new Date(b.batch);
122
+ return aDate.getTime() - bDate.getTime();
123
+ }
124
+ return 0;
125
+ });
126
+ }
127
+ /**
128
+ * Normalizes various month formats to YYYY-MM
129
+ * @param month - Month in various formats
130
+ * @returns Normalized YYYY-MM format or null if invalid
131
+ */
132
+ export function normalizeMonthToYYYYMM(month) {
133
+ // Already in YYYY-MM format
134
+ if (month.match(/^\d{4}-\d{2}$/)) {
135
+ const [, monthNum] = month.split('-').map(Number);
136
+ if (monthNum < 1 || monthNum > 12) {
137
+ return null; // Invalid month number
138
+ }
139
+ return month;
140
+ }
141
+ // Month_YYYY format (e.g., "November_2018")
142
+ const monthYearMatch = month.match(/^([A-Za-z]+)(?:[-_])(\d{4})$/);
143
+ if (monthYearMatch) {
144
+ const monthName = monthYearMatch[1];
145
+ const year = monthYearMatch[2];
146
+ const monthNum = getMonthNumber(monthName);
147
+ if (monthNum !== null) {
148
+ return `${year}-${monthNum.toString().padStart(2, '0')}`;
149
+ }
150
+ }
151
+ return null;
152
+ }
153
+ /**
154
+ * Gets month number from month name
155
+ * @param monthName - Month name (case insensitive)
156
+ * @returns Month number (1-12) or null if invalid
157
+ */
158
+ function getMonthNumber(monthName) {
159
+ const monthNames = [
160
+ 'january',
161
+ 'february',
162
+ 'march',
163
+ 'april',
164
+ 'may',
165
+ 'june',
166
+ 'july',
167
+ 'august',
168
+ 'september',
169
+ 'october',
170
+ 'november',
171
+ 'december',
172
+ ];
173
+ const normalizedName = monthName.toLowerCase();
174
+ let monthIndex = monthNames.indexOf(normalizedName);
175
+ if (monthIndex === -1) {
176
+ monthIndex = monthNames.map((m) => m.slice(0, 3).toLowerCase()).indexOf(normalizedName);
177
+ }
178
+ return monthIndex !== -1 ? monthIndex + 1 : null;
179
+ }
180
+ /**
181
+ * Gets month name from month number
182
+ * @param monthNum - Month number (1-12)
183
+ * @returns Month name (e.g., "January")
184
+ */
185
+ function getMonthName(monthNum) {
186
+ const monthNames = [
187
+ 'January',
188
+ 'February',
189
+ 'March',
190
+ 'April',
191
+ 'May',
192
+ 'June',
193
+ 'July',
194
+ 'August',
195
+ 'September',
196
+ 'October',
197
+ 'November',
198
+ 'December',
199
+ ];
200
+ if (monthNum < 1 || monthNum > 12) {
201
+ throw new Error(`Invalid month number: ${monthNum}. Must be 1-12.`);
202
+ }
203
+ return monthNames[monthNum - 1];
204
+ }
205
+ function getDefaultServer() {
206
+ throw new Error('Function not implemented.');
207
+ }
@@ -0,0 +1,3 @@
1
+ export * from './biorxiv-parser.js';
2
+ export * from './folder-structure.js';
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../utils/src/index.ts"],"names":[],"mappings":"AACA,cAAc,qBAAqB,CAAC;AACpC,cAAc,uBAAuB,CAAC"}
@@ -0,0 +1,3 @@
1
+ // Export all utility functions
2
+ export * from './biorxiv-parser.js';
3
+ export * from './folder-structure.js';
package/package.json ADDED
@@ -0,0 +1,76 @@
1
+ {
2
+ "name": "openrxiv",
3
+ "version": "0.0.0",
4
+ "description": "CLI tool to download openRxiv MECA files from AWS S3 for text and data mining",
5
+ "main": "dist/src/index.js",
6
+ "bin": {
7
+ "biorxiv": "./dist/cli.cjs",
8
+ "openrxiv": "./dist/cli.cjs",
9
+ "medrxiv": "./dist/cli.cjs"
10
+ },
11
+ "files": [
12
+ "dist"
13
+ ],
14
+ "type": "module",
15
+ "scripts": {
16
+ "clean": "rimraf dist",
17
+ "unlink": "npm uninstall -g biorxiv",
18
+ "link": "npm run unlink; npm link;",
19
+ "dev": "npm run link && esbuild src/index.ts --bundle --outfile=dist/cli.cjs --platform=node --external:fsevents --target=node14 --watch",
20
+ "lint": "eslint \"src/**/!(*.spec).ts\" -c ../../.eslintrc.cjs",
21
+ "lint:format": "npx prettier --check \"src/**/*.ts\"",
22
+ "test": "vitest run",
23
+ "copy:version": "echo \"const version = '\"$npm_package_version\"';\nexport default version;\" > src/version.ts",
24
+ "test:watch": "vitest watch",
25
+ "build:esm": "tsc",
26
+ "build:cli": "esbuild src/index.ts --bundle --outfile=dist/cli.cjs --platform=node --external:fsevents --target=node14",
27
+ "build": "npm-run-all -l clean copy:version -p build:cli build:esm"
28
+ },
29
+ "keywords": [
30
+ "biorxiv",
31
+ "cli",
32
+ "aws",
33
+ "s3",
34
+ "download",
35
+ "meca",
36
+ "research",
37
+ "text-mining",
38
+ "data-mining"
39
+ ],
40
+ "author": "Curvenote",
41
+ "license": "MIT",
42
+ "engines": {
43
+ "node": ">=18.0.0"
44
+ },
45
+ "dependencies": {
46
+ "@aws-sdk/client-s3": "^3.0.0",
47
+ "@aws-sdk/s3-request-presigner": "^3.0.0",
48
+ "axios": "^1.6.0",
49
+ "biorxiv-utils": "^0.0.0",
50
+ "boxen": "^8.0.1",
51
+ "character-entities": "^2.0.2",
52
+ "chalk": "^5.0.0",
53
+ "cli-progress": "^3.12.0",
54
+ "commander": "^11.0.0",
55
+ "conf": "^10.0.0",
56
+ "inquirer": "^9.0.0",
57
+ "jszip": "^3.10.1",
58
+ "ora": "^7.0.0",
59
+ "adm-zip": "^0.5.10",
60
+ "unified": "^11.0.0",
61
+ "xast-util-from-xml": "^4.0.0",
62
+ "p-limit": "^7.0.0"
63
+ },
64
+ "devDependencies": {
65
+ "@types/cli-progress": "^3.11.0",
66
+ "@types/inquirer": "^9.0.0"
67
+ },
68
+ "repository": {
69
+ "type": "git",
70
+ "url": "https://github.com/continuous-foundation/openrxiv.git"
71
+ },
72
+ "bugs": {
73
+ "url": "https://github.com/continuous-foundation/openrxiv/issues"
74
+ },
75
+ "homepage": "https://github.com/continuous-foundation/openrxiv#readme"
76
+ }