openrxiv 0.0.0 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +224 -0
  2. package/dist/cli.cjs +31725 -63733
  3. package/package.json +8 -32
  4. package/dist/cli/src/api/api-client.d.ts +0 -96
  5. package/dist/cli/src/api/api-client.d.ts.map +0 -1
  6. package/dist/cli/src/api/api-client.js +0 -257
  7. package/dist/cli/src/aws/bucket-explorer.d.ts +0 -26
  8. package/dist/cli/src/aws/bucket-explorer.d.ts.map +0 -1
  9. package/dist/cli/src/aws/bucket-explorer.js +0 -220
  10. package/dist/cli/src/aws/config.d.ts +0 -18
  11. package/dist/cli/src/aws/config.d.ts.map +0 -1
  12. package/dist/cli/src/aws/config.js +0 -191
  13. package/dist/cli/src/aws/downloader.d.ts +0 -13
  14. package/dist/cli/src/aws/downloader.d.ts.map +0 -1
  15. package/dist/cli/src/aws/downloader.js +0 -115
  16. package/dist/cli/src/aws/month-lister.d.ts +0 -18
  17. package/dist/cli/src/aws/month-lister.d.ts.map +0 -1
  18. package/dist/cli/src/aws/month-lister.js +0 -90
  19. package/dist/cli/src/commands/batch-process.d.ts +0 -3
  20. package/dist/cli/src/commands/batch-process.d.ts.map +0 -1
  21. package/dist/cli/src/commands/batch-process.js +0 -557
  22. package/dist/cli/src/commands/config.d.ts +0 -3
  23. package/dist/cli/src/commands/config.d.ts.map +0 -1
  24. package/dist/cli/src/commands/config.js +0 -42
  25. package/dist/cli/src/commands/download.d.ts +0 -3
  26. package/dist/cli/src/commands/download.d.ts.map +0 -1
  27. package/dist/cli/src/commands/download.js +0 -76
  28. package/dist/cli/src/commands/list.d.ts +0 -3
  29. package/dist/cli/src/commands/list.d.ts.map +0 -1
  30. package/dist/cli/src/commands/list.js +0 -18
  31. package/dist/cli/src/commands/month-info.d.ts +0 -3
  32. package/dist/cli/src/commands/month-info.d.ts.map +0 -1
  33. package/dist/cli/src/commands/month-info.js +0 -213
  34. package/dist/cli/src/commands/summary.d.ts +0 -3
  35. package/dist/cli/src/commands/summary.d.ts.map +0 -1
  36. package/dist/cli/src/commands/summary.js +0 -249
  37. package/dist/cli/src/index.d.ts +0 -3
  38. package/dist/cli/src/index.d.ts.map +0 -1
  39. package/dist/cli/src/index.js +0 -35
  40. package/dist/cli/src/utils/batches.d.ts +0 -9
  41. package/dist/cli/src/utils/batches.d.ts.map +0 -1
  42. package/dist/cli/src/utils/batches.js +0 -61
  43. package/dist/cli/src/utils/batches.test.d.ts +0 -2
  44. package/dist/cli/src/utils/batches.test.d.ts.map +0 -1
  45. package/dist/cli/src/utils/batches.test.js +0 -119
  46. package/dist/cli/src/utils/default-server.d.ts +0 -3
  47. package/dist/cli/src/utils/default-server.d.ts.map +0 -1
  48. package/dist/cli/src/utils/default-server.js +0 -20
  49. package/dist/cli/src/utils/index.d.ts +0 -5
  50. package/dist/cli/src/utils/index.d.ts.map +0 -1
  51. package/dist/cli/src/utils/index.js +0 -5
  52. package/dist/cli/src/utils/meca-processor.d.ts +0 -28
  53. package/dist/cli/src/utils/meca-processor.d.ts.map +0 -1
  54. package/dist/cli/src/utils/meca-processor.js +0 -503
  55. package/dist/cli/src/utils/meca-processor.test.d.ts +0 -2
  56. package/dist/cli/src/utils/meca-processor.test.d.ts.map +0 -1
  57. package/dist/cli/src/utils/meca-processor.test.js +0 -123
  58. package/dist/cli/src/utils/months.d.ts +0 -36
  59. package/dist/cli/src/utils/months.d.ts.map +0 -1
  60. package/dist/cli/src/utils/months.js +0 -135
  61. package/dist/cli/src/utils/months.test.d.ts +0 -2
  62. package/dist/cli/src/utils/months.test.d.ts.map +0 -1
  63. package/dist/cli/src/utils/months.test.js +0 -209
  64. package/dist/cli/src/utils/requester-pays-error.d.ts +0 -6
  65. package/dist/cli/src/utils/requester-pays-error.d.ts.map +0 -1
  66. package/dist/cli/src/utils/requester-pays-error.js +0 -20
  67. package/dist/cli/src/version.d.ts +0 -3
  68. package/dist/cli/src/version.d.ts.map +0 -1
  69. package/dist/cli/src/version.js +0 -2
  70. package/dist/utils/src/biorxiv-parser.d.ts +0 -51
  71. package/dist/utils/src/biorxiv-parser.d.ts.map +0 -1
  72. package/dist/utils/src/biorxiv-parser.js +0 -126
  73. package/dist/utils/src/folder-structure.d.ts +0 -44
  74. package/dist/utils/src/folder-structure.d.ts.map +0 -1
  75. package/dist/utils/src/folder-structure.js +0 -207
  76. package/dist/utils/src/index.d.ts +0 -3
  77. package/dist/utils/src/index.d.ts.map +0 -1
  78. package/dist/utils/src/index.js +0 -3
@@ -1,207 +0,0 @@
1
- /**
2
- * Utility functions for determining bioRxiv folder structure
3
- * based on the date requested.
4
- *
5
- * The bioRxiv structure is:
6
- * - Before late 2018: Files are in Back_Content/Batch_[nn]/ folders
7
- * - After late 2018: Files are in Current_Content/[Month]_[Year]/ folders
8
- */
9
- /**
10
- * Normalizes batch input to the standard "Batch_XX" format
11
- * @param batch - Batch input in various formats (e.g., "1", "batch-1", "Batch_01", "batch_01")
12
- * @param server - Server type to determine batch format (e.g., "biorxiv", "medrxiv")
13
- * @returns Normalized batch string in appropriate format
14
- */
15
- export function normalizeBatch(batch, server = getDefaultServer()) {
16
- if (typeof batch === 'number') {
17
- if (batch < 1) {
18
- throw new Error(`Invalid batch format: ${batch}. Expected a positive number or batch identifier.`);
19
- }
20
- const batchNum = batch.toString().padStart(2, '0');
21
- return server.toLowerCase() === 'medrxiv' ? `medRxiv_Batch_${batchNum}` : `Batch_${batchNum}`;
22
- }
23
- // Remove common prefixes and normalize
24
- const normalized = batch
25
- .toLowerCase()
26
- .replace(/^batch[-_]?/i, '') // Remove "batch", "batch-", "batch_"
27
- .replace(/^medrxiv[-_]?batch[-_]?/i, '') // Remove "medrxiv_batch", "medrxiv-batch", etc.
28
- .replace(/^0+/, '') // Remove leading zeros
29
- .trim();
30
- const matchInt = normalized.match(/^\d+$/);
31
- if (!matchInt) {
32
- throw new Error(`Invalid batch format: ${batch}. Expected a positive number or batch identifier.`);
33
- }
34
- // Parse the number and format it
35
- const batchNum = parseInt(normalized, 10);
36
- if (isNaN(batchNum) || batchNum < 1) {
37
- throw new Error(`Invalid batch format: ${batch}. Expected a positive number or batch identifier.`);
38
- }
39
- const formattedBatchNum = batchNum.toString().padStart(2, '0');
40
- return server.toLowerCase() === 'medrxiv'
41
- ? `medRxiv_Batch_${formattedBatchNum}`
42
- : `Batch_${formattedBatchNum}`;
43
- }
44
- /**
45
- * Determines the folder structure for a given month or batch
46
- * @param options - Options containing month or batch
47
- * @returns FolderStructure with the appropriate prefix and type
48
- */
49
- export function getFolderStructure(options) {
50
- if (options.month && options.batch) {
51
- throw new Error('Either month or batch must be specified, not both');
52
- }
53
- if (!options.month && !options.batch) {
54
- throw new Error('Either month or batch must be specified');
55
- }
56
- if (options.batch) {
57
- // If batch is specified, use Back_Content structure
58
- const normalizedBatch = normalizeBatch(options.batch, options.server);
59
- return {
60
- server: options.server || 'biorxiv',
61
- type: 'back',
62
- prefix: `Back_Content/${normalizedBatch}/`,
63
- batch: normalizedBatch,
64
- };
65
- }
66
- if (options.month) {
67
- // Normalize month format to YYYY-MM
68
- const normalizedMonth = normalizeMonthToYYYYMM(options.month);
69
- if (!normalizedMonth) {
70
- throw new Error(`Invalid month format: ${options.month}. Expected YYYY-MM or Month_YYYY format.`);
71
- }
72
- const [year, monthNum] = normalizedMonth.split('-').map(Number);
73
- // bioRxiv switched from Back_Content to Current_Content in late 2018
74
- // We'll use December 2018 as the cutoff point to be safe
75
- const cutoffDate = new Date(2018, 11, 1); // December 1, 2018 (0-indexed month)
76
- const requestedDate = new Date(year, monthNum - 1, 1);
77
- if (requestedDate < cutoffDate) {
78
- // Use Back_Content structure - but we don't know which batch
79
- // User should specify batch explicitly for pre-2019 content
80
- throw new Error(`Date ${options.month} is in the Back_Content period. Please specify a batch using --batch option. ` +
81
- `Available batches can be listed with 'biorxiv list' command.`);
82
- }
83
- else {
84
- // Use Current_Content structure
85
- const monthName = getMonthName(monthNum);
86
- return {
87
- server: options.server || 'biorxiv',
88
- type: 'current',
89
- prefix: `Current_Content/${monthName}_${year}/`,
90
- batch: `${monthName}_${year}`,
91
- };
92
- }
93
- }
94
- throw new Error('Invalid folder structure options');
95
- }
96
- export function removeDuplicateFolders(folders) {
97
- return folders.filter((folder, index, arr) => arr.findIndex((f) => f.batch === folder.batch &&
98
- f.server === folder.server &&
99
- f.type === folder.type &&
100
- f.prefix === folder.prefix) === index);
101
- }
102
- /**
103
- * Sort folders chronologically, putting batches before months
104
- */
105
- export function sortFoldersChronologically(folders) {
106
- return folders.sort((a, b) => {
107
- // Put batches before months
108
- if (a.type === 'back' && b.type === 'current')
109
- return -1;
110
- if (a.type === 'current' && b.type === 'back')
111
- return 1;
112
- // For batches, sort by batch number
113
- if (a.type === 'back' && b.type === 'back') {
114
- const aNum = parseInt(a.batch.replace(/\D/g, ''));
115
- const bNum = parseInt(b.batch.replace(/\D/g, ''));
116
- return aNum - bNum;
117
- }
118
- // For months, sort chronologically (newest first)
119
- if (a.type === 'current' && b.type === 'current') {
120
- const aDate = new Date(a.batch);
121
- const bDate = new Date(b.batch);
122
- return aDate.getTime() - bDate.getTime();
123
- }
124
- return 0;
125
- });
126
- }
127
- /**
128
- * Normalizes various month formats to YYYY-MM
129
- * @param month - Month in various formats
130
- * @returns Normalized YYYY-MM format or null if invalid
131
- */
132
- export function normalizeMonthToYYYYMM(month) {
133
- // Already in YYYY-MM format
134
- if (month.match(/^\d{4}-\d{2}$/)) {
135
- const [, monthNum] = month.split('-').map(Number);
136
- if (monthNum < 1 || monthNum > 12) {
137
- return null; // Invalid month number
138
- }
139
- return month;
140
- }
141
- // Month_YYYY format (e.g., "November_2018")
142
- const monthYearMatch = month.match(/^([A-Za-z]+)(?:[-_])(\d{4})$/);
143
- if (monthYearMatch) {
144
- const monthName = monthYearMatch[1];
145
- const year = monthYearMatch[2];
146
- const monthNum = getMonthNumber(monthName);
147
- if (monthNum !== null) {
148
- return `${year}-${monthNum.toString().padStart(2, '0')}`;
149
- }
150
- }
151
- return null;
152
- }
153
- /**
154
- * Gets month number from month name
155
- * @param monthName - Month name (case insensitive)
156
- * @returns Month number (1-12) or null if invalid
157
- */
158
- function getMonthNumber(monthName) {
159
- const monthNames = [
160
- 'january',
161
- 'february',
162
- 'march',
163
- 'april',
164
- 'may',
165
- 'june',
166
- 'july',
167
- 'august',
168
- 'september',
169
- 'october',
170
- 'november',
171
- 'december',
172
- ];
173
- const normalizedName = monthName.toLowerCase();
174
- let monthIndex = monthNames.indexOf(normalizedName);
175
- if (monthIndex === -1) {
176
- monthIndex = monthNames.map((m) => m.slice(0, 3).toLowerCase()).indexOf(normalizedName);
177
- }
178
- return monthIndex !== -1 ? monthIndex + 1 : null;
179
- }
180
- /**
181
- * Gets month name from month number
182
- * @param monthNum - Month number (1-12)
183
- * @returns Month name (e.g., "January")
184
- */
185
- function getMonthName(monthNum) {
186
- const monthNames = [
187
- 'January',
188
- 'February',
189
- 'March',
190
- 'April',
191
- 'May',
192
- 'June',
193
- 'July',
194
- 'August',
195
- 'September',
196
- 'October',
197
- 'November',
198
- 'December',
199
- ];
200
- if (monthNum < 1 || monthNum > 12) {
201
- throw new Error(`Invalid month number: ${monthNum}. Must be 1-12.`);
202
- }
203
- return monthNames[monthNum - 1];
204
- }
205
- function getDefaultServer() {
206
- throw new Error('Function not implemented.');
207
- }
@@ -1,3 +0,0 @@
1
- export * from './biorxiv-parser.js';
2
- export * from './folder-structure.js';
3
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../utils/src/index.ts"],"names":[],"mappings":"AACA,cAAc,qBAAqB,CAAC;AACpC,cAAc,uBAAuB,CAAC"}
@@ -1,3 +0,0 @@
1
- // Export all utility functions
2
- export * from './biorxiv-parser.js';
3
- export * from './folder-structure.js';