openrxiv 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/src/api/api-client.d.ts +96 -0
- package/dist/cli/src/api/api-client.d.ts.map +1 -0
- package/dist/cli/src/api/api-client.js +257 -0
- package/dist/cli/src/aws/bucket-explorer.d.ts +26 -0
- package/dist/cli/src/aws/bucket-explorer.d.ts.map +1 -0
- package/dist/cli/src/aws/bucket-explorer.js +220 -0
- package/dist/cli/src/aws/config.d.ts +18 -0
- package/dist/cli/src/aws/config.d.ts.map +1 -0
- package/dist/cli/src/aws/config.js +191 -0
- package/dist/cli/src/aws/downloader.d.ts +13 -0
- package/dist/cli/src/aws/downloader.d.ts.map +1 -0
- package/dist/cli/src/aws/downloader.js +115 -0
- package/dist/cli/src/aws/month-lister.d.ts +18 -0
- package/dist/cli/src/aws/month-lister.d.ts.map +1 -0
- package/dist/cli/src/aws/month-lister.js +90 -0
- package/dist/cli/src/commands/batch-process.d.ts +3 -0
- package/dist/cli/src/commands/batch-process.d.ts.map +1 -0
- package/dist/cli/src/commands/batch-process.js +557 -0
- package/dist/cli/src/commands/config.d.ts +3 -0
- package/dist/cli/src/commands/config.d.ts.map +1 -0
- package/dist/cli/src/commands/config.js +42 -0
- package/dist/cli/src/commands/download.d.ts +3 -0
- package/dist/cli/src/commands/download.d.ts.map +1 -0
- package/dist/cli/src/commands/download.js +76 -0
- package/dist/cli/src/commands/list.d.ts +3 -0
- package/dist/cli/src/commands/list.d.ts.map +1 -0
- package/dist/cli/src/commands/list.js +18 -0
- package/dist/cli/src/commands/month-info.d.ts +3 -0
- package/dist/cli/src/commands/month-info.d.ts.map +1 -0
- package/dist/cli/src/commands/month-info.js +213 -0
- package/dist/cli/src/commands/summary.d.ts +3 -0
- package/dist/cli/src/commands/summary.d.ts.map +1 -0
- package/dist/cli/src/commands/summary.js +249 -0
- package/dist/cli/src/index.d.ts +3 -0
- package/dist/cli/src/index.d.ts.map +1 -0
- package/dist/cli/src/index.js +35 -0
- package/dist/cli/src/utils/batches.d.ts +9 -0
- package/dist/cli/src/utils/batches.d.ts.map +1 -0
- package/dist/cli/src/utils/batches.js +61 -0
- package/dist/cli/src/utils/batches.test.d.ts +2 -0
- package/dist/cli/src/utils/batches.test.d.ts.map +1 -0
- package/dist/cli/src/utils/batches.test.js +119 -0
- package/dist/cli/src/utils/default-server.d.ts +3 -0
- package/dist/cli/src/utils/default-server.d.ts.map +1 -0
- package/dist/cli/src/utils/default-server.js +20 -0
- package/dist/cli/src/utils/index.d.ts +5 -0
- package/dist/cli/src/utils/index.d.ts.map +1 -0
- package/dist/cli/src/utils/index.js +5 -0
- package/dist/cli/src/utils/meca-processor.d.ts +28 -0
- package/dist/cli/src/utils/meca-processor.d.ts.map +1 -0
- package/dist/cli/src/utils/meca-processor.js +503 -0
- package/dist/cli/src/utils/meca-processor.test.d.ts +2 -0
- package/dist/cli/src/utils/meca-processor.test.d.ts.map +1 -0
- package/dist/cli/src/utils/meca-processor.test.js +123 -0
- package/dist/cli/src/utils/months.d.ts +36 -0
- package/dist/cli/src/utils/months.d.ts.map +1 -0
- package/dist/cli/src/utils/months.js +135 -0
- package/dist/cli/src/utils/months.test.d.ts +2 -0
- package/dist/cli/src/utils/months.test.d.ts.map +1 -0
- package/dist/cli/src/utils/months.test.js +209 -0
- package/dist/cli/src/utils/requester-pays-error.d.ts +6 -0
- package/dist/cli/src/utils/requester-pays-error.d.ts.map +1 -0
- package/dist/cli/src/utils/requester-pays-error.js +20 -0
- package/dist/cli/src/version.d.ts +3 -0
- package/dist/cli/src/version.d.ts.map +1 -0
- package/dist/cli/src/version.js +2 -0
- package/dist/cli.cjs +98815 -0
- package/dist/utils/src/biorxiv-parser.d.ts +51 -0
- package/dist/utils/src/biorxiv-parser.d.ts.map +1 -0
- package/dist/utils/src/biorxiv-parser.js +126 -0
- package/dist/utils/src/folder-structure.d.ts +44 -0
- package/dist/utils/src/folder-structure.d.ts.map +1 -0
- package/dist/utils/src/folder-structure.js +207 -0
- package/dist/utils/src/index.d.ts +3 -0
- package/dist/utils/src/index.d.ts.map +1 -0
- package/dist/utils/src/index.js +3 -0
- package/package.json +76 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import inquirer from 'inquirer';
|
|
2
|
+
import { S3Client, ListObjectsV2Command } from '@aws-sdk/client-s3';
|
|
3
|
+
import chalk from 'chalk';
|
|
4
|
+
import { homedir } from 'os';
|
|
5
|
+
import { join } from 'path';
|
|
6
|
+
import { readFile, writeFile, mkdir } from 'fs/promises';
|
|
7
|
+
import { existsSync } from 'fs';
|
|
8
|
+
const CONFIG_DIR = join(homedir(), '.biorxiv');
|
|
9
|
+
const CONFIG_FILE = join(CONFIG_DIR, 'config.json');
|
|
10
|
+
// Global configuration for requester pays
|
|
11
|
+
let globalRequesterPays = false; // Default to false (no requester pays)
|
|
12
|
+
export function setGlobalRequesterPays(enabled) {
|
|
13
|
+
globalRequesterPays = enabled;
|
|
14
|
+
}
|
|
15
|
+
export function getGlobalRequesterPays() {
|
|
16
|
+
return globalRequesterPays;
|
|
17
|
+
}
|
|
18
|
+
async function ensureConfigDir() {
|
|
19
|
+
if (!existsSync(CONFIG_DIR)) {
|
|
20
|
+
await mkdir(CONFIG_DIR, { recursive: true });
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
async function loadConfig() {
|
|
24
|
+
try {
|
|
25
|
+
if (existsSync(CONFIG_FILE)) {
|
|
26
|
+
const data = await readFile(CONFIG_FILE, 'utf-8');
|
|
27
|
+
return JSON.parse(data);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
catch (error) {
|
|
31
|
+
// If config file is corrupted, start fresh
|
|
32
|
+
}
|
|
33
|
+
return {
|
|
34
|
+
aws: {
|
|
35
|
+
region: 'us-east-1',
|
|
36
|
+
},
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
async function saveConfig(config) {
|
|
40
|
+
await ensureConfigDir();
|
|
41
|
+
await writeFile(CONFIG_FILE, JSON.stringify(config, null, 2));
|
|
42
|
+
}
|
|
43
|
+
export async function setCredentials(options) {
|
|
44
|
+
let { accessKey, secretKey, region } = options;
|
|
45
|
+
// Prompt for missing credentials
|
|
46
|
+
if (!accessKey) {
|
|
47
|
+
const answer = await inquirer.prompt([
|
|
48
|
+
{
|
|
49
|
+
type: 'input',
|
|
50
|
+
name: 'accessKey',
|
|
51
|
+
message: 'Enter your AWS Access Key ID:',
|
|
52
|
+
validate: (input) => (input.length > 0 ? true : 'Access Key is required'),
|
|
53
|
+
},
|
|
54
|
+
]);
|
|
55
|
+
accessKey = answer.accessKey;
|
|
56
|
+
}
|
|
57
|
+
if (!secretKey) {
|
|
58
|
+
const answer = await inquirer.prompt([
|
|
59
|
+
{
|
|
60
|
+
type: 'password',
|
|
61
|
+
name: 'secretKey',
|
|
62
|
+
message: 'Enter your AWS Secret Access Key:',
|
|
63
|
+
validate: (input) => (input.length > 0 ? true : 'Secret Key is required'),
|
|
64
|
+
},
|
|
65
|
+
]);
|
|
66
|
+
secretKey = answer.secretKey;
|
|
67
|
+
}
|
|
68
|
+
if (!region) {
|
|
69
|
+
region = 'us-east-1';
|
|
70
|
+
}
|
|
71
|
+
// Save credentials
|
|
72
|
+
const config = await loadConfig();
|
|
73
|
+
config.aws.accessKeyId = accessKey;
|
|
74
|
+
config.aws.secretAccessKey = secretKey;
|
|
75
|
+
config.aws.region = region;
|
|
76
|
+
await saveConfig(config);
|
|
77
|
+
console.log(chalk.green('✓ AWS credentials saved successfully'));
|
|
78
|
+
console.log(chalk.blue(`Region: ${region}`));
|
|
79
|
+
}
|
|
80
|
+
export async function testConnection() {
|
|
81
|
+
const credentials = await getCredentials();
|
|
82
|
+
// If no credentials are provided, try to use instance metadata (EC2 IAM role)
|
|
83
|
+
if (!credentials.accessKeyId || !credentials.secretAccessKey) {
|
|
84
|
+
console.log(chalk.yellow('No AWS credentials found in config, attempting to use EC2 instance metadata...'));
|
|
85
|
+
}
|
|
86
|
+
const client = new S3Client({
|
|
87
|
+
region: credentials.region,
|
|
88
|
+
...(credentials.accessKeyId && credentials.secretAccessKey
|
|
89
|
+
? {
|
|
90
|
+
credentials: {
|
|
91
|
+
accessKeyId: credentials.accessKeyId,
|
|
92
|
+
secretAccessKey: credentials.secretAccessKey,
|
|
93
|
+
},
|
|
94
|
+
}
|
|
95
|
+
: {}),
|
|
96
|
+
requestHandler: {
|
|
97
|
+
httpOptions: {
|
|
98
|
+
timeout: 300000, // 5 minutes timeout for large operations
|
|
99
|
+
},
|
|
100
|
+
},
|
|
101
|
+
});
|
|
102
|
+
try {
|
|
103
|
+
console.log(chalk.blue('Testing connection to bioRxiv bucket...'));
|
|
104
|
+
// Test connection by listing a small amount of content
|
|
105
|
+
const listCommandOptions = {
|
|
106
|
+
Bucket: 'biorxiv-src-monthly',
|
|
107
|
+
MaxKeys: 1,
|
|
108
|
+
};
|
|
109
|
+
// Only add RequestPayer if requester pays is enabled
|
|
110
|
+
if (getGlobalRequesterPays()) {
|
|
111
|
+
listCommandOptions.RequestPayer = 'requester';
|
|
112
|
+
}
|
|
113
|
+
const listCommand = new ListObjectsV2Command(listCommandOptions);
|
|
114
|
+
await client.send(listCommand);
|
|
115
|
+
console.log(chalk.green('✓ Successfully connected to bioRxiv bucket'));
|
|
116
|
+
console.log(chalk.blue('Bucket: biorxiv-src-monthly'));
|
|
117
|
+
console.log(chalk.blue('Region: us-east-1'));
|
|
118
|
+
if (getGlobalRequesterPays()) {
|
|
119
|
+
console.log(chalk.blue('Requester-pays: Enabled'));
|
|
120
|
+
console.log(chalk.yellow('⚠️ You will be charged for S3 requests'));
|
|
121
|
+
}
|
|
122
|
+
else {
|
|
123
|
+
console.log(chalk.blue('Requester-pays: Disabled'));
|
|
124
|
+
console.log(chalk.green('✓ No charges for S3 requests (using EC2 IAM role or bucket owner pays)'));
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
catch (error) {
|
|
128
|
+
if (error instanceof Error) {
|
|
129
|
+
if (error.message.includes('Access Denied') && !getGlobalRequesterPays()) {
|
|
130
|
+
console.log(chalk.red('✗ Access denied. This bucket requires requester pays.'));
|
|
131
|
+
console.log(chalk.yellow('Try running with --requester-pays flag for local development.'));
|
|
132
|
+
console.log(chalk.blue('On EC2 with proper IAM role, this should work without the flag.'));
|
|
133
|
+
}
|
|
134
|
+
else {
|
|
135
|
+
throw new Error(`Connection failed: ${error.message}`);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
throw error;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
export async function showConfig() {
|
|
142
|
+
const credentials = await getCredentials();
|
|
143
|
+
console.log(chalk.blue('Current bioRxiv Configuration:'));
|
|
144
|
+
console.log(chalk.blue('=============================='));
|
|
145
|
+
if (credentials.accessKeyId) {
|
|
146
|
+
console.log(chalk.green('✓ AWS Access Key: Configured'));
|
|
147
|
+
console.log(chalk.blue(`Region: ${credentials.region}`));
|
|
148
|
+
}
|
|
149
|
+
else {
|
|
150
|
+
console.log(chalk.red('✗ AWS credentials not configured'));
|
|
151
|
+
console.log(chalk.yellow('Run "biorxiv config set-credentials" to configure'));
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
export async function getCredentials() {
|
|
155
|
+
const config = await loadConfig();
|
|
156
|
+
return {
|
|
157
|
+
accessKeyId: config.aws.accessKeyId,
|
|
158
|
+
secretAccessKey: config.aws.secretAccessKey,
|
|
159
|
+
region: config.aws.region,
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
export async function getS3Client() {
|
|
163
|
+
const credentials = await getCredentials();
|
|
164
|
+
// If no credentials are provided, try to use instance metadata (EC2 IAM role)
|
|
165
|
+
if (!credentials.accessKeyId || !credentials.secretAccessKey) {
|
|
166
|
+
console.log(chalk.yellow('No AWS credentials found in config, attempting to use EC2 instance metadata...'));
|
|
167
|
+
return new S3Client({
|
|
168
|
+
region: credentials.region,
|
|
169
|
+
// AWS SDK will automatically use instance metadata service for credentials
|
|
170
|
+
requestHandler: {
|
|
171
|
+
httpOptions: {
|
|
172
|
+
timeout: 300000, // 5 minutes timeout for large operations
|
|
173
|
+
},
|
|
174
|
+
},
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
return new S3Client({
|
|
178
|
+
region: credentials.region,
|
|
179
|
+
credentials: {
|
|
180
|
+
accessKeyId: credentials.accessKeyId,
|
|
181
|
+
secretAccessKey: credentials.secretAccessKey,
|
|
182
|
+
},
|
|
183
|
+
// bioRxiv bucket is requester-pays, so we need to configure the client accordingly
|
|
184
|
+
requestHandler: {
|
|
185
|
+
// This ensures the client can handle requester-pays operations
|
|
186
|
+
httpOptions: {
|
|
187
|
+
timeout: 300000, // 5 minutes timeout for large operations
|
|
188
|
+
},
|
|
189
|
+
},
|
|
190
|
+
});
|
|
191
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export interface DownloadOptions {
|
|
2
|
+
output?: string;
|
|
3
|
+
filename?: string;
|
|
4
|
+
server?: 'biorxiv' | 'medrxiv';
|
|
5
|
+
}
|
|
6
|
+
export interface DownloadProgress {
|
|
7
|
+
downloaded: number;
|
|
8
|
+
total: number;
|
|
9
|
+
speed: number;
|
|
10
|
+
eta: number;
|
|
11
|
+
}
|
|
12
|
+
export declare function downloadFile(path: string, options: DownloadOptions): Promise<void>;
|
|
13
|
+
//# sourceMappingURL=downloader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"downloader.d.ts","sourceRoot":"","sources":["../../../../src/aws/downloader.ts"],"names":[],"mappings":"AAaA,MAAM,WAAW,eAAe;IAC9B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,SAAS,GAAG,SAAS,CAAC;CAChC;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;CACb;AAED,wBAAsB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,GAAG,OAAO,CAAC,IAAI,CAAC,CAwHxF"}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import { GetObjectCommand, HeadObjectCommand } from '@aws-sdk/client-s3';
|
|
2
|
+
import { createWriteStream } from 'fs';
|
|
3
|
+
import { mkdir } from 'fs/promises';
|
|
4
|
+
import { dirname, join } from 'path';
|
|
5
|
+
import { pipeline } from 'stream/promises';
|
|
6
|
+
import { Transform } from 'stream';
|
|
7
|
+
import chalk from 'chalk';
|
|
8
|
+
import ora from 'ora';
|
|
9
|
+
import cliProgress from 'cli-progress';
|
|
10
|
+
import { getS3Client, getGlobalRequesterPays } from './config.js';
|
|
11
|
+
import { getDefaultServer } from '../utils/default-server.js';
|
|
12
|
+
import { getBucketName } from './bucket-explorer.js';
|
|
13
|
+
export async function downloadFile(path, options) {
|
|
14
|
+
const { output = './downloads', server = getDefaultServer() } = options;
|
|
15
|
+
const bucket = getBucketName(server);
|
|
16
|
+
const client = await getS3Client();
|
|
17
|
+
console.log(chalk.blue(`Downloading: ${path}`));
|
|
18
|
+
console.log(chalk.blue('=============================='));
|
|
19
|
+
try {
|
|
20
|
+
// Get file metadata
|
|
21
|
+
const headCommandOptions = {
|
|
22
|
+
Bucket: bucket,
|
|
23
|
+
Key: path,
|
|
24
|
+
};
|
|
25
|
+
// Only add RequestPayer if requester pays is enabled
|
|
26
|
+
if (getGlobalRequesterPays()) {
|
|
27
|
+
headCommandOptions.RequestPayer = 'requester';
|
|
28
|
+
}
|
|
29
|
+
const headCommand = new HeadObjectCommand(headCommandOptions);
|
|
30
|
+
const metadata = await client.send(headCommand);
|
|
31
|
+
const fileSize = metadata.ContentLength || 0;
|
|
32
|
+
const fileName = options.filename || path.split('/').pop() || 'unknown';
|
|
33
|
+
const outputPath = join(output, fileName);
|
|
34
|
+
// Create output directory
|
|
35
|
+
await mkdir(dirname(outputPath), { recursive: true });
|
|
36
|
+
// Start download
|
|
37
|
+
const spinner = ora('Preparing download...').start();
|
|
38
|
+
const getCommandOptions = {
|
|
39
|
+
Bucket: bucket,
|
|
40
|
+
Key: path,
|
|
41
|
+
};
|
|
42
|
+
// Only add RequestPayer if requester pays is enabled
|
|
43
|
+
if (getGlobalRequesterPays()) {
|
|
44
|
+
getCommandOptions.RequestPayer = 'requester';
|
|
45
|
+
}
|
|
46
|
+
const getCommand = new GetObjectCommand(getCommandOptions);
|
|
47
|
+
const response = await client.send(getCommand);
|
|
48
|
+
if (!response.Body) {
|
|
49
|
+
throw new Error('No file content received');
|
|
50
|
+
}
|
|
51
|
+
spinner.succeed('Download started');
|
|
52
|
+
// Create progress bar
|
|
53
|
+
const progressBar = new cliProgress.SingleBar({
|
|
54
|
+
format: 'Downloading |{bar}| {percentage}% | {value}/{total} bytes | Speed: {speed} | ETA: {eta}',
|
|
55
|
+
barCompleteChar: '\u2588',
|
|
56
|
+
barIncompleteChar: '\u2591',
|
|
57
|
+
hideCursor: true,
|
|
58
|
+
});
|
|
59
|
+
progressBar.start(fileSize, 0);
|
|
60
|
+
let downloadedBytes = 0;
|
|
61
|
+
const startTime = Date.now();
|
|
62
|
+
// Create transform stream to track progress
|
|
63
|
+
const progressStream = new (class extends Transform {
|
|
64
|
+
constructor() {
|
|
65
|
+
super();
|
|
66
|
+
}
|
|
67
|
+
_transform(chunk, encoding, callback) {
|
|
68
|
+
downloadedBytes += chunk.length;
|
|
69
|
+
const elapsed = (Date.now() - startTime) / 1000;
|
|
70
|
+
const speed = downloadedBytes / elapsed;
|
|
71
|
+
const eta = (fileSize - downloadedBytes) / speed;
|
|
72
|
+
progressBar.update(downloadedBytes);
|
|
73
|
+
callback(null, chunk);
|
|
74
|
+
}
|
|
75
|
+
})();
|
|
76
|
+
// Download file
|
|
77
|
+
const writeStream = createWriteStream(outputPath);
|
|
78
|
+
await pipeline(response.Body, progressStream, writeStream);
|
|
79
|
+
progressBar.stop();
|
|
80
|
+
console.log(chalk.green(`✓ Download completed: ${outputPath}`));
|
|
81
|
+
console.log(chalk.blue(`File size: ${formatFileSize(fileSize)}`));
|
|
82
|
+
}
|
|
83
|
+
catch (error) {
|
|
84
|
+
if (error instanceof Error) {
|
|
85
|
+
// Check for specific AWS errors that indicate requester pays is needed
|
|
86
|
+
if (error.message.includes('Access Denied') || error.message.includes('403')) {
|
|
87
|
+
if (!getGlobalRequesterPays()) {
|
|
88
|
+
throw new Error(`Download failed: Access denied. This bucket requires requester pays for downloads. ` +
|
|
89
|
+
`Try running with --requester-pays flag or ensure your IAM role has requester pays permissions.`);
|
|
90
|
+
}
|
|
91
|
+
else {
|
|
92
|
+
throw new Error(`Download failed: Access denied. Check your AWS credentials and permissions.`);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
else if (error.message.includes('NoSuchKey')) {
|
|
96
|
+
throw new Error(`Download failed: File not found in S3 bucket.`);
|
|
97
|
+
}
|
|
98
|
+
else if (error.message.includes('NoSuchBucket')) {
|
|
99
|
+
throw new Error(`Download failed: S3 bucket not found.`);
|
|
100
|
+
}
|
|
101
|
+
else {
|
|
102
|
+
throw new Error(`Download failed: ${error.message}`);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
throw error;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
function formatFileSize(bytes) {
|
|
109
|
+
if (bytes === 0)
|
|
110
|
+
return '0 B';
|
|
111
|
+
const k = 1024;
|
|
112
|
+
const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
|
|
113
|
+
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
114
|
+
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
|
115
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
export interface S3FileInfo {
|
|
2
|
+
s3Bucket: string;
|
|
3
|
+
s3Key: string;
|
|
4
|
+
fileSize: number;
|
|
5
|
+
lastModified: Date;
|
|
6
|
+
batch: string;
|
|
7
|
+
}
|
|
8
|
+
export interface ListMonthOptions {
|
|
9
|
+
month?: string;
|
|
10
|
+
batch?: string;
|
|
11
|
+
server?: 'biorxiv' | 'medrxiv';
|
|
12
|
+
limit?: number;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Lists MECA files in S3 for a specific month with pagination support
|
|
16
|
+
*/
|
|
17
|
+
export declare function listMonthFiles(options: ListMonthOptions): Promise<S3FileInfo[]>;
|
|
18
|
+
//# sourceMappingURL=month-lister.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"month-lister.d.ts","sourceRoot":"","sources":["../../../../src/aws/month-lister.ts"],"names":[],"mappings":"AAMA,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,IAAI,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,gBAAgB;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,SAAS,GAAG,SAAS,CAAC;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,wBAAsB,cAAc,CAAC,OAAO,EAAE,gBAAgB,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC,CAoGrF"}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import { ListObjectsV2Command } from '@aws-sdk/client-s3';
|
|
2
|
+
import { getS3Client } from './config.js';
|
|
3
|
+
import { getFolderStructure } from 'biorxiv-utils';
|
|
4
|
+
import { getDefaultServer } from '../utils/default-server.js';
|
|
5
|
+
import { getBucketName } from './bucket-explorer.js';
|
|
6
|
+
/**
|
|
7
|
+
* Lists MECA files in S3 for a specific month with pagination support
|
|
8
|
+
*/
|
|
9
|
+
export async function listMonthFiles(options) {
|
|
10
|
+
const { month, batch, limit = 1000, server = getDefaultServer() } = options;
|
|
11
|
+
const awsBucket = getBucketName(server);
|
|
12
|
+
if (!month && !batch) {
|
|
13
|
+
throw new Error('Either month or batch must be specified');
|
|
14
|
+
}
|
|
15
|
+
const description = month ? `month: ${month}` : `batch: ${batch}`;
|
|
16
|
+
console.log(`🔍 Listing files for ${description} from AWS S3 bucket: ${awsBucket}`);
|
|
17
|
+
try {
|
|
18
|
+
const s3Client = await getS3Client();
|
|
19
|
+
// Determine folder structure based on options
|
|
20
|
+
const folder = getFolderStructure({ month, batch, server: options.server || 'biorxiv' });
|
|
21
|
+
const s3Prefix = folder.prefix;
|
|
22
|
+
console.log(`🔍 Content Type: ${folder.type === 'current' ? 'Current Content' : 'Back Content'}`);
|
|
23
|
+
if (folder.batch) {
|
|
24
|
+
console.log(`🔍 Batch: ${folder.batch}`);
|
|
25
|
+
}
|
|
26
|
+
console.log(`🔍 Searching S3 prefix: ${s3Prefix}`);
|
|
27
|
+
const allFiles = [];
|
|
28
|
+
let continuationToken;
|
|
29
|
+
let batchCount = 0;
|
|
30
|
+
// Use pagination to get all files
|
|
31
|
+
do {
|
|
32
|
+
batchCount++;
|
|
33
|
+
console.log(`📦 Fetching batch ${batchCount}...`);
|
|
34
|
+
const listCommand = new ListObjectsV2Command({
|
|
35
|
+
Bucket: awsBucket,
|
|
36
|
+
Prefix: s3Prefix,
|
|
37
|
+
MaxKeys: Math.min(1000, limit - allFiles.length), // Don't fetch more than we need
|
|
38
|
+
ContinuationToken: continuationToken,
|
|
39
|
+
RequestPayer: 'requester',
|
|
40
|
+
});
|
|
41
|
+
const response = await s3Client.send(listCommand);
|
|
42
|
+
if (response.Contents) {
|
|
43
|
+
for (const item of response.Contents) {
|
|
44
|
+
if (!item.Key || !item.Size)
|
|
45
|
+
continue;
|
|
46
|
+
// Only process .meca files
|
|
47
|
+
if (!item.Key.endsWith('.meca'))
|
|
48
|
+
continue;
|
|
49
|
+
// Extract S3 file information
|
|
50
|
+
const s3Key = item.Key;
|
|
51
|
+
const fileSize = item.Size;
|
|
52
|
+
const lastModified = item.LastModified || new Date();
|
|
53
|
+
const fileInfo = {
|
|
54
|
+
s3Bucket: awsBucket,
|
|
55
|
+
s3Key: s3Key, // This is already the full path from S3
|
|
56
|
+
fileSize: fileSize,
|
|
57
|
+
lastModified: lastModified,
|
|
58
|
+
batch: folder.batch,
|
|
59
|
+
};
|
|
60
|
+
allFiles.push(fileInfo);
|
|
61
|
+
// Check if we've reached the limit
|
|
62
|
+
if (allFiles.length >= limit) {
|
|
63
|
+
console.log(`📋 Reached limit of ${limit} files`);
|
|
64
|
+
break;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
console.log(` Found ${response.Contents.length} files in this batch`);
|
|
68
|
+
}
|
|
69
|
+
continuationToken = response.NextContinuationToken;
|
|
70
|
+
// Break if we've reached the limit
|
|
71
|
+
if (allFiles.length >= limit) {
|
|
72
|
+
break;
|
|
73
|
+
}
|
|
74
|
+
} while (continuationToken);
|
|
75
|
+
console.log(`📋 Found ${allFiles.length} MECA files in S3 bucket`);
|
|
76
|
+
return allFiles;
|
|
77
|
+
}
|
|
78
|
+
catch (error) {
|
|
79
|
+
if (error instanceof Error) {
|
|
80
|
+
console.error(`❌ Error listing S3 files: ${error.message}`);
|
|
81
|
+
if (error.message.includes('AWS credentials not configured')) {
|
|
82
|
+
console.error('💡 Run "biorxiv config set-credentials" to configure AWS access');
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
else {
|
|
86
|
+
console.error('❌ Unknown error listing S3 files:', error);
|
|
87
|
+
}
|
|
88
|
+
return [];
|
|
89
|
+
}
|
|
90
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"batch-process.d.ts","sourceRoot":"","sources":["../../../../src/commands/batch-process.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAU,MAAM,WAAW,CAAC;AAwC5C,eAAO,MAAM,mBAAmB,SA0Q5B,CAAC"}
|