biorxiv 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/dist/cli/src/api/api-client.d.ts +96 -0
  2. package/dist/cli/src/api/api-client.d.ts.map +1 -0
  3. package/dist/cli/src/api/api-client.js +257 -0
  4. package/dist/cli/src/aws/bucket-explorer.d.ts +26 -0
  5. package/dist/cli/src/aws/bucket-explorer.d.ts.map +1 -0
  6. package/dist/cli/src/aws/bucket-explorer.js +220 -0
  7. package/dist/cli/src/aws/config.d.ts +18 -0
  8. package/dist/cli/src/aws/config.d.ts.map +1 -0
  9. package/dist/cli/src/aws/config.js +191 -0
  10. package/dist/cli/src/aws/downloader.d.ts +13 -0
  11. package/dist/cli/src/aws/downloader.d.ts.map +1 -0
  12. package/dist/cli/src/aws/downloader.js +115 -0
  13. package/dist/cli/src/aws/month-lister.d.ts +18 -0
  14. package/dist/cli/src/aws/month-lister.d.ts.map +1 -0
  15. package/dist/cli/src/aws/month-lister.js +90 -0
  16. package/dist/cli/src/commands/batch-process.d.ts +3 -0
  17. package/dist/cli/src/commands/batch-process.d.ts.map +1 -0
  18. package/dist/cli/src/commands/batch-process.js +557 -0
  19. package/dist/cli/src/commands/config.d.ts +3 -0
  20. package/dist/cli/src/commands/config.d.ts.map +1 -0
  21. package/dist/cli/src/commands/config.js +42 -0
  22. package/dist/cli/src/commands/download.d.ts +3 -0
  23. package/dist/cli/src/commands/download.d.ts.map +1 -0
  24. package/dist/cli/src/commands/download.js +76 -0
  25. package/dist/cli/src/commands/list.d.ts +3 -0
  26. package/dist/cli/src/commands/list.d.ts.map +1 -0
  27. package/dist/cli/src/commands/list.js +18 -0
  28. package/dist/cli/src/commands/month-info.d.ts +3 -0
  29. package/dist/cli/src/commands/month-info.d.ts.map +1 -0
  30. package/dist/cli/src/commands/month-info.js +213 -0
  31. package/dist/cli/src/commands/summary.d.ts +3 -0
  32. package/dist/cli/src/commands/summary.d.ts.map +1 -0
  33. package/dist/cli/src/commands/summary.js +249 -0
  34. package/dist/cli/src/index.d.ts +3 -0
  35. package/dist/cli/src/index.d.ts.map +1 -0
  36. package/dist/cli/src/index.js +35 -0
  37. package/dist/cli/src/utils/batches.d.ts +9 -0
  38. package/dist/cli/src/utils/batches.d.ts.map +1 -0
  39. package/dist/cli/src/utils/batches.js +61 -0
  40. package/dist/cli/src/utils/batches.test.d.ts +2 -0
  41. package/dist/cli/src/utils/batches.test.d.ts.map +1 -0
  42. package/dist/cli/src/utils/batches.test.js +119 -0
  43. package/dist/cli/src/utils/default-server.d.ts +3 -0
  44. package/dist/cli/src/utils/default-server.d.ts.map +1 -0
  45. package/dist/cli/src/utils/default-server.js +20 -0
  46. package/dist/cli/src/utils/index.d.ts +5 -0
  47. package/dist/cli/src/utils/index.d.ts.map +1 -0
  48. package/dist/cli/src/utils/index.js +5 -0
  49. package/dist/cli/src/utils/meca-processor.d.ts +28 -0
  50. package/dist/cli/src/utils/meca-processor.d.ts.map +1 -0
  51. package/dist/cli/src/utils/meca-processor.js +503 -0
  52. package/dist/cli/src/utils/meca-processor.test.d.ts +2 -0
  53. package/dist/cli/src/utils/meca-processor.test.d.ts.map +1 -0
  54. package/dist/cli/src/utils/meca-processor.test.js +123 -0
  55. package/dist/cli/src/utils/months.d.ts +36 -0
  56. package/dist/cli/src/utils/months.d.ts.map +1 -0
  57. package/dist/cli/src/utils/months.js +135 -0
  58. package/dist/cli/src/utils/months.test.d.ts +2 -0
  59. package/dist/cli/src/utils/months.test.d.ts.map +1 -0
  60. package/dist/cli/src/utils/months.test.js +209 -0
  61. package/dist/cli/src/utils/requester-pays-error.d.ts +6 -0
  62. package/dist/cli/src/utils/requester-pays-error.d.ts.map +1 -0
  63. package/dist/cli/src/utils/requester-pays-error.js +20 -0
  64. package/dist/cli/src/version.d.ts +3 -0
  65. package/dist/cli/src/version.d.ts.map +1 -0
  66. package/dist/cli/src/version.js +2 -0
  67. package/dist/cli.cjs +98815 -0
  68. package/dist/utils/src/biorxiv-parser.d.ts +51 -0
  69. package/dist/utils/src/biorxiv-parser.d.ts.map +1 -0
  70. package/dist/utils/src/biorxiv-parser.js +126 -0
  71. package/dist/utils/src/folder-structure.d.ts +44 -0
  72. package/dist/utils/src/folder-structure.d.ts.map +1 -0
  73. package/dist/utils/src/folder-structure.js +207 -0
  74. package/dist/utils/src/index.d.ts +3 -0
  75. package/dist/utils/src/index.d.ts.map +1 -0
  76. package/dist/utils/src/index.js +3 -0
  77. package/package.json +76 -0
@@ -0,0 +1,96 @@
1
+ /**
2
+ * openRxiv API Client
3
+ * Provides access to bioRxiv and medRxiv APIs for fetching preprint metadata
4
+ */
5
+ export interface WorkDetails {
6
+ doi: string;
7
+ title: string;
8
+ authors: string;
9
+ author_corresponding: string;
10
+ author_corresponding_institution: string;
11
+ date: string;
12
+ version: string;
13
+ type: string;
14
+ license: string;
15
+ category: string;
16
+ jats_xml_path: string;
17
+ abstract: string;
18
+ funding: FundingInfo[];
19
+ published: string;
20
+ server: 'biorxiv' | 'medrxiv';
21
+ }
22
+ export interface FundingInfo {
23
+ name: string;
24
+ id: string;
25
+ 'id-type': string;
26
+ award: string;
27
+ }
28
+ export interface ApiResponse {
29
+ collection: WorkDetails[];
30
+ messages: ApiMessage[];
31
+ }
32
+ export interface ApiMessage {
33
+ cursor: number;
34
+ count: number;
35
+ total: number;
36
+ limit: number;
37
+ offset: number;
38
+ }
39
+ export interface ApiOptions {
40
+ format?: 'json' | 'xml' | 'html';
41
+ server?: 'biorxiv' | 'medrxiv';
42
+ timeout?: number;
43
+ }
44
+ export declare class OpenRxivApiClient {
45
+ private options;
46
+ private baseUrl;
47
+ private defaultTimeout;
48
+ constructor(options?: ApiOptions);
49
+ /**
50
+ * Get content details for a specific DOI
51
+ * Endpoint: /details/[server]/[DOI]/na/[format]
52
+ * Note: The API expects base DOIs without version numbers
53
+ */
54
+ getContentDetail(doi: string, options?: Partial<ApiOptions>): Promise<WorkDetails | null>;
55
+ /**
56
+ * Get content details for multiple DOIs
57
+ */
58
+ getContentDetails(dois: string[], options?: Partial<ApiOptions>): Promise<(WorkDetails | null)[]>;
59
+ /**
60
+ * Get all versions of a preprint
61
+ * This is useful for versioned preprints where the API returns multiple versions
62
+ */
63
+ getAllVersions(doi: string, options?: Partial<ApiOptions>): Promise<WorkDetails[]>;
64
+ /**
65
+ * Get content details for a date range
66
+ * Endpoint: /details/[server]/[start_date]/[end_date]/[cursor]
67
+ */
68
+ getContentDetailsByDateRange(startDate: string, endDate: string, cursor?: number, options?: Partial<ApiOptions>): Promise<ApiResponse | null>;
69
+ /**
70
+ * Get content details for recent posts
71
+ * Endpoint: /details/[server]/[count]/[cursor]
72
+ */
73
+ getRecentContentDetails(count: number, cursor?: number, options?: Partial<ApiOptions>): Promise<ApiResponse | null>;
74
+ /**
75
+ * Get content details for recent days
76
+ * Endpoint: /details/[server]/[days]d/[cursor]
77
+ */
78
+ getContentDetailsByDays(days: number, cursor?: number, options?: Partial<ApiOptions>): Promise<ApiResponse | null>;
79
+ /**
80
+ * Get content details with subject category filter
81
+ */
82
+ getContentDetailsByCategory(startDate: string, endDate: string, category: string, cursor?: number, options?: Partial<ApiOptions>): Promise<ApiResponse | null>;
83
+ /**
84
+ * Make HTTP request with timeout and error handling
85
+ */
86
+ private makeRequest;
87
+ }
88
+ /**
89
+ * Utility function to create a bioRxiv API client
90
+ */
91
+ export declare function createOpenRxivApiClient(options?: ApiOptions): OpenRxivApiClient;
92
+ /**
93
+ * Utility function to extract server from DOI
94
+ */
95
+ export declare function getServerFromDOI(doi: string): 'biorxiv' | 'medrxiv';
96
+ //# sourceMappingURL=api-client.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"api-client.d.ts","sourceRoot":"","sources":["../../../../src/api/api-client.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,MAAM,WAAW,WAAW;IAC1B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,gCAAgC,EAAE,MAAM,CAAC;IACzC,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,WAAW,EAAE,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,SAAS,GAAG,SAAS,CAAC;CAC/B;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,WAAW;IAC1B,UAAU,EAAE,WAAW,EAAE,CAAC;IAC1B,QAAQ,EAAE,UAAU,EAAE,CAAC;CACxB;AAED,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,MAAM,CAAC;IACjC,MAAM,CAAC,EAAE,SAAS,GAAG,SAAS,CAAC;IAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,qBAAa,iBAAiB;IAIhB,OAAO,CAAC,OAAO;IAH3B,OAAO,CAAC,OAAO,CAA6B;IAC5C,OAAO,CAAC,cAAc,CAAS;gBAEX,OAAO,GAAE,UAAe;IAS5C;;;;OAIG;IACG,gBAAgB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,GAAG,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC;IAyC/F;;OAEG;IACG,iBAAiB,CACrB,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,GAC5B,OAAO,CAAC,CAAC,WAAW,GAAG,IAAI,CAAC,EAAE,CAAC;IAelC;;;OAGG;IACG,cAAc,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IAyCxF;;;OAGG;IACG,4BAA4B,CAChC,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,MAAM,GAAE,MAAU,EAClB,OAAO,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,GAC5B,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC;IAwB9B;;;OAGG;IACG,uBAAuB,CAC3B,KAAK,EAAE,MAAM,EACb,MAAM,GAAE,MAAU,EAClB,OAAO,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,GAC5B,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC;IAwB9B;;;OAGG;IACG,uBAAuB,CAC3B,IAAI,EAAE,MAAM,EACZ,MAAM,GAAE,MAAU,EAClB,OAAO,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,GAC5B,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC;IAwB9B;;OAEG;IACG,2BAA2B,CAC/B,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,EAChB,MAAM,GAAE,MAAU,EAClB,OAAO,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,GAC5B,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC;IA0B9B;;OAEG;YACW,WAAW;CA0C1B;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CAAC,OAAO,CAAC,EAAE,UAAU,GAAG,iBAAiB,CAE/E;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG,SAAS,GAAG,SAAS,CAOnE"}
@@ -0,0 +1,257 @@
1
+ /**
2
+ * openRxiv API Client
3
+ * Provides access to bioRxiv and medRxiv APIs for fetching preprint metadata
4
+ */
5
+ import version from '../version.js';
6
+ export class OpenRxivApiClient {
7
+ constructor(options = {}) {
8
+ this.options = options;
9
+ this.baseUrl = 'https://api.biorxiv.org';
10
+ this.defaultTimeout = 10000; // 10 seconds
11
+ this.options = {
12
+ format: 'json',
13
+ server: 'biorxiv',
14
+ timeout: this.defaultTimeout,
15
+ ...options,
16
+ };
17
+ }
18
+ /**
19
+ * Get content details for a specific DOI
20
+ * Endpoint: /details/[server]/[DOI]/na/[format]
21
+ * Note: The API expects base DOIs without version numbers
22
+ */
23
+ async getContentDetail(doi, options) {
24
+ const opts = { ...this.options, ...options };
25
+ const server = opts.server || 'biorxiv';
26
+ const format = opts.format || 'json';
27
+ try {
28
+ // Remove version number from DOI for API query
29
+ const baseDOI = doi.replace(/v\d+$/, '');
30
+ // Don't encode the DOI - the API expects literal forward slashes
31
+ const url = `${this.baseUrl}/details/${server}/${baseDOI}/na/${format}`;
32
+ console.log(`🔍 Fetching content details from: ${url}`);
33
+ const response = await this.makeRequest(url, opts.timeout);
34
+ if (!response) {
35
+ return null;
36
+ }
37
+ // Parse response based on format
38
+ let data;
39
+ if (format === 'json') {
40
+ data = response;
41
+ }
42
+ else {
43
+ // For XML/HTML, we'd need to parse differently
44
+ throw new Error(`Format ${format} not yet implemented`);
45
+ }
46
+ // Return the first (and should be only) item in the collection
47
+ if (data.collection && data.collection.length > 0) {
48
+ return data.collection[0];
49
+ }
50
+ return null;
51
+ }
52
+ catch (error) {
53
+ console.error(`❌ Error fetching content details for DOI ${doi}:`, error);
54
+ throw new Error(`Failed to fetch content details: ${error instanceof Error ? error.message : 'Unknown error'}`);
55
+ }
56
+ }
57
+ /**
58
+ * Get content details for multiple DOIs
59
+ */
60
+ async getContentDetails(dois, options) {
61
+ const results = await Promise.allSettled(dois.map((doi) => this.getContentDetail(doi, options)));
62
+ return results.map((result) => {
63
+ if (result.status === 'fulfilled') {
64
+ return result.value;
65
+ }
66
+ else {
67
+ console.error('Failed to fetch DOI:', result.reason);
68
+ return null;
69
+ }
70
+ });
71
+ }
72
+ /**
73
+ * Get all versions of a preprint
74
+ * This is useful for versioned preprints where the API returns multiple versions
75
+ */
76
+ async getAllVersions(doi, options) {
77
+ const opts = { ...this.options, ...options };
78
+ const server = opts.server || 'biorxiv';
79
+ const format = opts.format || 'json';
80
+ try {
81
+ // Remove version number from DOI for API query
82
+ const baseDOI = doi.replace(/v\d+$/, '');
83
+ // Don't encode the DOI - the API expects literal forward slashes
84
+ const url = `${this.baseUrl}/details/${server}/${baseDOI}/na/${format}`;
85
+ console.log(`🔍 Fetching all versions from: ${url}`);
86
+ const response = await this.makeRequest(url, opts.timeout);
87
+ if (!response) {
88
+ return [];
89
+ }
90
+ // Parse response based on format
91
+ let data;
92
+ if (format === 'json') {
93
+ data = response;
94
+ }
95
+ else {
96
+ // For XML/HTML, we'd need to parse differently
97
+ throw new Error(`Format ${format} not yet implemented`);
98
+ }
99
+ // Return all versions in the collection
100
+ if (data.collection && data.collection.length > 0) {
101
+ return data.collection;
102
+ }
103
+ return [];
104
+ }
105
+ catch (error) {
106
+ console.error(`❌ Error fetching all versions for DOI ${doi}:`, error);
107
+ throw new Error(`Failed to fetch all versions: ${error instanceof Error ? error.message : 'Unknown error'}`);
108
+ }
109
+ }
110
+ /**
111
+ * Get content details for a date range
112
+ * Endpoint: /details/[server]/[start_date]/[end_date]/[cursor]
113
+ */
114
+ async getContentDetailsByDateRange(startDate, endDate, cursor = 0, options) {
115
+ const opts = { ...this.options, ...options };
116
+ const server = opts.server || 'biorxiv';
117
+ const format = opts.format || 'json';
118
+ try {
119
+ const url = `${this.baseUrl}/details/${server}/${startDate}/${endDate}/${cursor}/${format}`;
120
+ console.log(`🔍 Fetching content details for date range: ${startDate} to ${endDate}`);
121
+ const response = await this.makeRequest(url, opts.timeout);
122
+ if (!response) {
123
+ return null;
124
+ }
125
+ return response;
126
+ }
127
+ catch (error) {
128
+ console.error(`❌ Error fetching content details for date range:`, error);
129
+ throw new Error(`Failed to fetch content details: ${error instanceof Error ? error.message : 'Unknown error'}`);
130
+ }
131
+ }
132
+ /**
133
+ * Get content details for recent posts
134
+ * Endpoint: /details/[server]/[count]/[cursor]
135
+ */
136
+ async getRecentContentDetails(count, cursor = 0, options) {
137
+ const opts = { ...this.options, ...options };
138
+ const server = opts.server || 'biorxiv';
139
+ const format = opts.format || 'json';
140
+ try {
141
+ const url = `${this.baseUrl}/details/${server}/${count}/${cursor}/${format}`;
142
+ console.log(`🔍 Fetching ${count} recent content details`);
143
+ const response = await this.makeRequest(url, opts.timeout);
144
+ if (!response) {
145
+ return null;
146
+ }
147
+ return response;
148
+ }
149
+ catch (error) {
150
+ console.error(`❌ Error fetching recent content details:`, error);
151
+ throw new Error(`Failed to fetch recent content details: ${error instanceof Error ? error.message : 'Unknown error'}`);
152
+ }
153
+ }
154
+ /**
155
+ * Get content details for recent days
156
+ * Endpoint: /details/[server]/[days]d/[cursor]
157
+ */
158
+ async getContentDetailsByDays(days, cursor = 0, options) {
159
+ const opts = { ...this.options, ...options };
160
+ const server = opts.server || 'biorxiv';
161
+ const format = opts.format || 'json';
162
+ try {
163
+ const url = `${this.baseUrl}/details/${server}/${days}d/${cursor}/${format}`;
164
+ console.log(`🔍 Fetching content details for last ${days} days`);
165
+ const response = await this.makeRequest(url, opts.timeout);
166
+ if (!response) {
167
+ return null;
168
+ }
169
+ return response;
170
+ }
171
+ catch (error) {
172
+ console.error(`❌ Error fetching content details for days:`, error);
173
+ throw new Error(`Failed to fetch content details: ${error instanceof Error ? error.message : 'Unknown error'}`);
174
+ }
175
+ }
176
+ /**
177
+ * Get content details with subject category filter
178
+ */
179
+ async getContentDetailsByCategory(startDate, endDate, category, cursor = 0, options) {
180
+ const opts = { ...this.options, ...options };
181
+ const server = opts.server || 'biorxiv';
182
+ const format = opts.format || 'json';
183
+ try {
184
+ // Encode category (replace spaces with underscores or URL encode)
185
+ const encodedCategory = category.replace(/\s+/g, '_');
186
+ const url = `${this.baseUrl}/details/${server}/${startDate}/${endDate}/${cursor}/${format}?category=${encodedCategory}`;
187
+ console.log(`🔍 Fetching content details for category: ${category}`);
188
+ const response = await this.makeRequest(url, opts.timeout);
189
+ if (!response) {
190
+ return null;
191
+ }
192
+ return response;
193
+ }
194
+ catch (error) {
195
+ console.error(`❌ Error fetching content details for category:`, error);
196
+ throw new Error(`Failed to fetch content details: ${error instanceof Error ? error.message : 'Unknown error'}`);
197
+ }
198
+ }
199
+ /**
200
+ * Make HTTP request with timeout and error handling
201
+ */
202
+ async makeRequest(url, timeout) {
203
+ const controller = new AbortController();
204
+ const timeoutId = setTimeout(() => controller.abort(), timeout || this.defaultTimeout);
205
+ try {
206
+ const response = await fetch(url, {
207
+ signal: controller.signal,
208
+ headers: {
209
+ Accept: 'application/json',
210
+ 'User-Agent': `biorxiv-cli/${version}`,
211
+ },
212
+ });
213
+ clearTimeout(timeoutId);
214
+ if (!response.ok) {
215
+ if (response.status === 404) {
216
+ console.log(`📭 No content found for the requested DOI`);
217
+ return null;
218
+ }
219
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
220
+ }
221
+ const contentType = response.headers.get('content-type');
222
+ if (contentType && contentType.includes('application/json')) {
223
+ return await response.json();
224
+ }
225
+ else {
226
+ return await response.text();
227
+ }
228
+ }
229
+ catch (error) {
230
+ clearTimeout(timeoutId);
231
+ if (error instanceof Error) {
232
+ if (error.name === 'AbortError') {
233
+ throw new Error('Request timed out');
234
+ }
235
+ throw error;
236
+ }
237
+ throw new Error('Unknown error occurred');
238
+ }
239
+ }
240
+ }
241
+ /**
242
+ * Utility function to create a bioRxiv API client
243
+ */
244
+ export function createOpenRxivApiClient(options) {
245
+ return new OpenRxivApiClient(options);
246
+ }
247
+ /**
248
+ * Utility function to extract server from DOI
249
+ */
250
+ export function getServerFromDOI(doi) {
251
+ // This is a simple heuristic - in practice, you might want to check both servers
252
+ // or use additional metadata to determine the correct server
253
+ if (doi.includes('medrxiv')) {
254
+ return 'medrxiv';
255
+ }
256
+ return 'biorxiv';
257
+ }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Get the S3 bucket name based on the server
3
+ */
4
+ export declare function getBucketName(server?: 'biorxiv' | 'medrxiv'): string;
5
+ export interface ListOptions {
6
+ month?: string;
7
+ batch?: string;
8
+ limit?: number;
9
+ server?: 'biorxiv' | 'medrxiv';
10
+ }
11
+ export interface SearchOptions {
12
+ type?: 'pdf' | 'xml' | 'all';
13
+ limit?: number;
14
+ }
15
+ export interface ContentItem {
16
+ key: string;
17
+ size: number;
18
+ lastModified: Date;
19
+ type: 'meca' | 'pdf' | 'xml' | 'other';
20
+ }
21
+ export declare function listBucketContent(options: ListOptions): Promise<void>;
22
+ export declare function getContentInfo(path: string, options?: {
23
+ detailed?: boolean;
24
+ server?: 'biorxiv' | 'medrxiv';
25
+ }): Promise<void>;
26
+ //# sourceMappingURL=bucket-explorer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bucket-explorer.d.ts","sourceRoot":"","sources":["../../../../src/aws/bucket-explorer.ts"],"names":[],"mappings":"AAOA;;GAEG;AACH,wBAAgB,aAAa,CAAC,MAAM,GAAE,SAAS,GAAG,SAA8B,GAAG,MAAM,CAUxF;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,SAAS,GAAG,SAAS,CAAC;CAChC;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,CAAC,EAAE,KAAK,GAAG,KAAK,GAAG,KAAK,CAAC;IAC7B,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,WAAW;IAC1B,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,IAAI,CAAC;IACnB,IAAI,EAAE,MAAM,GAAG,KAAK,GAAG,KAAK,GAAG,OAAO,CAAC;CACxC;AAED,wBAAsB,iBAAiB,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC,CAuE3E;AA+GD,wBAAsB,cAAc,CAClC,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE;IAAE,QAAQ,CAAC,EAAE,OAAO,CAAC;IAAC,MAAM,CAAC,EAAE,SAAS,GAAG,SAAS,CAAA;CAAO,GACnE,OAAO,CAAC,IAAI,CAAC,CAyCf"}
@@ -0,0 +1,220 @@
1
+ import { ListObjectsV2Command, HeadObjectCommand } from '@aws-sdk/client-s3';
2
+ import chalk from 'chalk';
3
+ import { getS3Client } from './config.js';
4
+ import { getFolderStructure } from 'biorxiv-utils';
5
+ import { getDefaultServer } from '../utils/default-server.js';
6
+ /**
7
+ * Get the S3 bucket name based on the server
8
+ */
9
+ export function getBucketName(server = getDefaultServer()) {
10
+ switch (server.toLowerCase()) {
11
+ case 'medrxiv':
12
+ return 'medrxiv-src-monthly';
13
+ case 'biorxiv':
14
+ return 'biorxiv-src-monthly';
15
+ default:
16
+ console.error(`❌ Error: Invalid server ${server}, must be "biorxiv" or "medrxiv"`);
17
+ process.exit(1);
18
+ }
19
+ }
20
+ export async function listBucketContent(options) {
21
+ const client = await getS3Client();
22
+ const { month, batch, limit = 50, server = getDefaultServer() } = options;
23
+ const bucketName = getBucketName(server);
24
+ console.log(chalk.blue(`Listing ${server} bucket content...`));
25
+ console.log(chalk.blue('===================================='));
26
+ try {
27
+ // If no month or batch specified, show the available content structure
28
+ if (!month && !batch) {
29
+ await listFolder(client, server);
30
+ return;
31
+ }
32
+ let prefix = '';
33
+ let folder = null;
34
+ if (month || batch) {
35
+ // Use folder structure utility to determine the correct prefix
36
+ folder = getFolderStructure({ month, batch, server });
37
+ prefix = folder.prefix;
38
+ console.log(chalk.gray(`🔍 Content Type: ${folder.type === 'current' ? 'Current Content' : 'Back Content'}`));
39
+ if (folder.batch) {
40
+ console.log(chalk.gray(`🔍 Batch: ${folder.batch}`));
41
+ }
42
+ }
43
+ const commandOptions = {
44
+ Bucket: bucketName,
45
+ Prefix: prefix,
46
+ MaxKeys: parseInt(limit.toString()),
47
+ RequestPayer: 'requester',
48
+ };
49
+ const command = new ListObjectsV2Command(commandOptions);
50
+ const response = await client.send(command);
51
+ if (!response.Contents || response.Contents.length === 0) {
52
+ console.log(chalk.yellow('No content found'));
53
+ return;
54
+ }
55
+ console.log(chalk.green(`Found ${response.Contents.length} items:`));
56
+ console.log('');
57
+ for (const item of response.Contents) {
58
+ if (!item.Key)
59
+ continue;
60
+ const type = getContentType(item.Key);
61
+ const size = formatFileSize(item.Size || 0);
62
+ const date = item.LastModified ? item.LastModified.toLocaleDateString() : 'Unknown';
63
+ console.log(`${chalk.cyan(item.Key)}`);
64
+ console.log(` Type: ${chalk.yellow(type)} | Size: ${chalk.blue(size)} | Modified: ${chalk.gray(date)}`);
65
+ console.log('');
66
+ }
67
+ }
68
+ catch (error) {
69
+ if (error instanceof Error) {
70
+ throw new Error(`Failed to list bucket content: ${error.message}`);
71
+ }
72
+ throw error;
73
+ }
74
+ }
75
+ /**
76
+ * Lists the available content structure in the specified server bucket
77
+ * Shows available months and batches
78
+ */
79
+ async function listFolder(client, server = getDefaultServer()) {
80
+ console.log(chalk.cyan('📁 Available Content Structure'));
81
+ console.log(chalk.cyan('=============================='));
82
+ console.log('');
83
+ try {
84
+ // List Current_Content folders (monthly content)
85
+ console.log(chalk.blue('📅 Current Content (Monthly):'));
86
+ console.log(chalk.gray(' Recent content organized by month'));
87
+ console.log('');
88
+ const bucketName = getBucketName(server);
89
+ const currentContentCommand = new ListObjectsV2Command({
90
+ Bucket: bucketName,
91
+ Prefix: 'Current_Content/',
92
+ Delimiter: '/',
93
+ MaxKeys: 1000,
94
+ RequestPayer: 'requester',
95
+ });
96
+ const currentResponse = await client.send(currentContentCommand);
97
+ if (currentResponse.CommonPrefixes && currentResponse.CommonPrefixes.length > 0) {
98
+ const months = currentResponse.CommonPrefixes.map((prefix) => { var _a; return (_a = prefix.Prefix) === null || _a === void 0 ? void 0 : _a.replace('Current_Content/', '').replace('/', ''); })
99
+ .filter(Boolean)
100
+ .sort((a, b) => {
101
+ // Sort by year first, then by month
102
+ const [monthA, yearA] = a.split('_');
103
+ const [monthB, yearB] = b.split('_');
104
+ if (yearA !== yearB)
105
+ return parseInt(yearB) - parseInt(yearA); // Newest year first
106
+ const monthOrder = [
107
+ 'January',
108
+ 'February',
109
+ 'March',
110
+ 'April',
111
+ 'May',
112
+ 'June',
113
+ 'July',
114
+ 'August',
115
+ 'September',
116
+ 'October',
117
+ 'November',
118
+ 'December',
119
+ ];
120
+ return monthOrder.indexOf(monthB) - monthOrder.indexOf(monthA);
121
+ });
122
+ for (const month of months) {
123
+ console.log(` ${chalk.green('📁')} ${chalk.cyan(month)}`);
124
+ }
125
+ }
126
+ else {
127
+ console.log(chalk.gray(' No monthly content found'));
128
+ }
129
+ console.log('');
130
+ // List Back_Content batches
131
+ console.log(chalk.blue('📦 Back Content (Historical Batches):'));
132
+ console.log(chalk.gray(' Legacy content organized in batches'));
133
+ console.log('');
134
+ const backContentCommand = new ListObjectsV2Command({
135
+ Bucket: bucketName,
136
+ Prefix: 'Back_Content/',
137
+ Delimiter: '/',
138
+ MaxKeys: 1000,
139
+ RequestPayer: 'requester',
140
+ });
141
+ const backResponse = await client.send(backContentCommand);
142
+ if (backResponse.CommonPrefixes && backResponse.CommonPrefixes.length > 0) {
143
+ const batches = backResponse.CommonPrefixes.map((prefix) => { var _a; return (_a = prefix.Prefix) === null || _a === void 0 ? void 0 : _a.replace('Back_Content/', '').replace('/', ''); })
144
+ .filter(Boolean)
145
+ .sort();
146
+ for (const batch of batches) {
147
+ console.log(` ${chalk.green('📁')} ${chalk.cyan(batch)}`);
148
+ }
149
+ }
150
+ else {
151
+ console.log(chalk.gray(' No historical batches found'));
152
+ }
153
+ console.log('');
154
+ console.log(chalk.blue('💡 Usage Examples:'));
155
+ console.log(chalk.gray(` List specific month: ${server} list --month 2024-01`));
156
+ console.log(chalk.gray(` List specific batch: ${server} list --batch Batch_01`));
157
+ console.log(chalk.gray(` List with limit: ${server} list --month 2024-01 --limit 100`));
158
+ console.log('');
159
+ }
160
+ catch (error) {
161
+ if (error instanceof Error) {
162
+ console.log(chalk.yellow(`⚠️ Warning: Could not fetch content structure: ${error.message}`));
163
+ console.log(chalk.gray(' This may be due to AWS permissions or network issues'));
164
+ console.log('');
165
+ }
166
+ }
167
+ }
168
+ export async function getContentInfo(path, options = {}) {
169
+ var _a;
170
+ const client = await getS3Client();
171
+ const { detailed = false, server = getDefaultServer() } = options;
172
+ const bucketName = getBucketName(server);
173
+ console.log(chalk.blue(`Getting info for: ${path}`));
174
+ console.log(chalk.blue('=============================='));
175
+ try {
176
+ const commandOptions = {
177
+ Bucket: bucketName,
178
+ Key: path,
179
+ RequestPayer: 'requester',
180
+ };
181
+ const command = new HeadObjectCommand(commandOptions);
182
+ const response = await client.send(command);
183
+ console.log(chalk.green('✓ Content found'));
184
+ console.log('');
185
+ console.log(`Key: ${chalk.cyan(path)}`);
186
+ console.log(`Size: ${chalk.blue(formatFileSize(response.ContentLength || 0))}`);
187
+ console.log(`Type: ${chalk.yellow(response.ContentType || 'Unknown')}`);
188
+ console.log(`Last Modified: ${chalk.gray(((_a = response.LastModified) === null || _a === void 0 ? void 0 : _a.toLocaleString()) || 'Unknown')}`);
189
+ if (detailed && response.Metadata) {
190
+ console.log('');
191
+ console.log(chalk.blue('Metadata:'));
192
+ for (const [key, value] of Object.entries(response.Metadata)) {
193
+ console.log(` ${key}: ${value}`);
194
+ }
195
+ }
196
+ }
197
+ catch (error) {
198
+ if (error instanceof Error) {
199
+ throw new Error(`Failed to get content info: ${error.message}`);
200
+ }
201
+ throw error;
202
+ }
203
+ }
204
+ function getContentType(key) {
205
+ if (key.endsWith('.meca'))
206
+ return 'meca';
207
+ if (key.endsWith('.pdf'))
208
+ return 'pdf';
209
+ if (key.endsWith('.xml'))
210
+ return 'xml';
211
+ return 'other';
212
+ }
213
+ function formatFileSize(bytes) {
214
+ if (bytes === 0)
215
+ return '0 B';
216
+ const k = 1024;
217
+ const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
218
+ const i = Math.floor(Math.log(bytes) / Math.log(k));
219
+ return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
220
+ }
@@ -0,0 +1,18 @@
1
+ import { S3Client } from '@aws-sdk/client-s3';
2
+ export declare function setGlobalRequesterPays(enabled: boolean): void;
3
+ export declare function getGlobalRequesterPays(): boolean;
4
+ export interface AWSCredentials {
5
+ accessKey?: string;
6
+ secretKey?: string;
7
+ region?: string;
8
+ }
9
+ export declare function setCredentials(options: AWSCredentials): Promise<void>;
10
+ export declare function testConnection(): Promise<void>;
11
+ export declare function showConfig(): Promise<void>;
12
+ export declare function getCredentials(): Promise<{
13
+ accessKeyId: string | undefined;
14
+ secretAccessKey: string | undefined;
15
+ region: string;
16
+ }>;
17
+ export declare function getS3Client(): Promise<S3Client>;
18
+ //# sourceMappingURL=config.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../../../src/aws/config.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAA2C,MAAM,oBAAoB,CAAC;AAsBvF,wBAAgB,sBAAsB,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAE7D;AAED,wBAAgB,sBAAsB,IAAI,OAAO,CAEhD;AA8BD,MAAM,WAAW,cAAc;IAC7B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,wBAAsB,cAAc,CAAC,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,CA0C3E;AAED,wBAAsB,cAAc,IAAI,OAAO,CAAC,IAAI,CAAC,CAwEpD;AAED,wBAAsB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC,CAahD;AAED,wBAAsB,cAAc;;;;GAOnC;AAED,wBAAsB,WAAW,IAAI,OAAO,CAAC,QAAQ,CAAC,CAoCrD"}