npm - openrxiv-cli - Versions diffs - 0.0.2 - Mend

openrxiv-cli 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

package/dist/api/api-client.d.ts +96 -0
package/dist/api/api-client.d.ts.map +1 -0
package/dist/api/api-client.js +257 -0
package/dist/aws/bucket-explorer.d.ts +26 -0
package/dist/aws/bucket-explorer.d.ts.map +1 -0
package/dist/aws/bucket-explorer.js +220 -0
package/dist/aws/config.d.ts +5 -0
package/dist/aws/config.d.ts.map +1 -0
package/dist/aws/config.js +36 -0
package/dist/aws/downloader.d.ts +13 -0
package/dist/aws/downloader.d.ts.map +1 -0
package/dist/aws/downloader.js +115 -0
package/dist/aws/month-lister.d.ts +18 -0
package/dist/aws/month-lister.d.ts.map +1 -0
package/dist/aws/month-lister.js +90 -0
package/dist/commands/batch-info.d.ts +3 -0
package/dist/commands/batch-info.d.ts.map +1 -0
package/dist/commands/batch-info.js +213 -0
package/dist/commands/batch-process.d.ts +3 -0
package/dist/commands/batch-process.d.ts.map +1 -0
package/dist/commands/batch-process.js +557 -0
package/dist/commands/download.d.ts +3 -0
package/dist/commands/download.d.ts.map +1 -0
package/dist/commands/download.js +76 -0
package/dist/commands/index.d.ts +6 -0
package/dist/commands/index.d.ts.map +1 -0
package/dist/commands/index.js +5 -0
package/dist/commands/list.d.ts +3 -0
package/dist/commands/list.d.ts.map +1 -0
package/dist/commands/list.js +18 -0
package/dist/commands/summary.d.ts +3 -0
package/dist/commands/summary.d.ts.map +1 -0
package/dist/commands/summary.js +249 -0
package/dist/index.d.ts +7 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +35 -0
package/dist/utils/batches.d.ts +9 -0
package/dist/utils/batches.d.ts.map +1 -0
package/dist/utils/batches.js +61 -0
package/dist/utils/batches.test.d.ts +2 -0
package/dist/utils/batches.test.d.ts.map +1 -0
package/dist/utils/batches.test.js +119 -0
package/dist/utils/default-server.d.ts +3 -0
package/dist/utils/default-server.d.ts.map +1 -0
package/dist/utils/default-server.js +20 -0
package/dist/utils/index.d.ts +5 -0
package/dist/utils/index.d.ts.map +1 -0
package/dist/utils/index.js +5 -0
package/dist/utils/meca-processor.d.ts +28 -0
package/dist/utils/meca-processor.d.ts.map +1 -0
package/dist/utils/meca-processor.js +503 -0
package/dist/utils/meca-processor.test.d.ts +2 -0
package/dist/utils/meca-processor.test.d.ts.map +1 -0
package/dist/utils/meca-processor.test.js +123 -0
package/dist/utils/months.d.ts +36 -0
package/dist/utils/months.d.ts.map +1 -0
package/dist/utils/months.js +135 -0
package/dist/utils/months.test.d.ts +2 -0
package/dist/utils/months.test.d.ts.map +1 -0
package/dist/utils/months.test.js +209 -0
package/dist/utils/requester-pays-error.d.ts +6 -0
package/dist/utils/requester-pays-error.d.ts.map +1 -0
package/dist/utils/requester-pays-error.js +20 -0
package/dist/version.d.ts +3 -0
package/dist/version.d.ts.map +1 -0
package/dist/version.js +2 -0
package/package.json +67 -0

package/dist/aws/downloader.js ADDED Viewed

@@ -0,0 +1,115 @@
+import { GetObjectCommand, HeadObjectCommand } from '@aws-sdk/client-s3';
+import { createWriteStream } from 'fs';
+import { mkdir } from 'fs/promises';
+import { dirname, join } from 'path';
+import { pipeline } from 'stream/promises';
+import { Transform } from 'stream';
+import chalk from 'chalk';
+import ora from 'ora';
+import cliProgress from 'cli-progress';
+import { getS3Client, getGlobalRequesterPays } from './config.js';
+import { getDefaultServer } from '../utils/default-server.js';
+import { getBucketName } from './bucket-explorer.js';
+export async function downloadFile(path, options) {
+    const { output = './downloads', server = getDefaultServer() } = options;
+    const bucket = getBucketName(server);
+    const client = await getS3Client();
+    console.log(chalk.blue(`Downloading: ${path}`));
+    console.log(chalk.blue('=============================='));
+    try {
+        // Get file metadata
+        const headCommandOptions = {
+            Bucket: bucket,
+            Key: path,
+        };
+        // Only add RequestPayer if requester pays is enabled
+        if (getGlobalRequesterPays()) {
+            headCommandOptions.RequestPayer = 'requester';
+        }
+        const headCommand = new HeadObjectCommand(headCommandOptions);
+        const metadata = await client.send(headCommand);
+        const fileSize = metadata.ContentLength || 0;
+        const fileName = options.filename || path.split('/').pop() || 'unknown';
+        const outputPath = join(output, fileName);
+        // Create output directory
+        await mkdir(dirname(outputPath), { recursive: true });
+        // Start download
+        const spinner = ora('Preparing download...').start();
+        const getCommandOptions = {
+            Bucket: bucket,
+            Key: path,
+        };
+        // Only add RequestPayer if requester pays is enabled
+        if (getGlobalRequesterPays()) {
+            getCommandOptions.RequestPayer = 'requester';
+        }
+        const getCommand = new GetObjectCommand(getCommandOptions);
+        const response = await client.send(getCommand);
+        if (!response.Body) {
+            throw new Error('No file content received');
+        }
+        spinner.succeed('Download started');
+        // Create progress bar
+        const progressBar = new cliProgress.SingleBar({
+            format: 'Downloading |{bar}| {percentage}% | {value}/{total} bytes | Speed: {speed} | ETA: {eta}',
+            barCompleteChar: '\u2588',
+            barIncompleteChar: '\u2591',
+            hideCursor: true,
+        });
+        progressBar.start(fileSize, 0);
+        let downloadedBytes = 0;
+        const startTime = Date.now();
+        // Create transform stream to track progress
+        const progressStream = new (class extends Transform {
+            constructor() {
+                super();
+            }
+            _transform(chunk, encoding, callback) {
+                downloadedBytes += chunk.length;
+                const elapsed = (Date.now() - startTime) / 1000;
+                const speed = downloadedBytes / elapsed;
+                const eta = (fileSize - downloadedBytes) / speed;
+                progressBar.update(downloadedBytes);
+                callback(null, chunk);
+            }
+        })();
+        // Download file
+        const writeStream = createWriteStream(outputPath);
+        await pipeline(response.Body, progressStream, writeStream);
+        progressBar.stop();
+        console.log(chalk.green(`✓ Download completed: ${outputPath}`));
+        console.log(chalk.blue(`File size: ${formatFileSize(fileSize)}`));
+    }
+    catch (error) {
+        if (error instanceof Error) {
+            // Check for specific AWS errors that indicate requester pays is needed
+            if (error.message.includes('Access Denied') || error.message.includes('403')) {
+                if (!getGlobalRequesterPays()) {
+                    throw new Error(`Download failed: Access denied. This bucket requires requester pays for downloads. ` +
+                        `Try running with --requester-pays flag or ensure your IAM role has requester pays permissions.`);
+                }
+                else {
+                    throw new Error(`Download failed: Access denied. Check your AWS credentials and permissions.`);
+                }
+            }
+            else if (error.message.includes('NoSuchKey')) {
+                throw new Error(`Download failed: File not found in S3 bucket.`);
+            }
+            else if (error.message.includes('NoSuchBucket')) {
+                throw new Error(`Download failed: S3 bucket not found.`);
+            }
+            else {
+                throw new Error(`Download failed: ${error.message}`);
+            }
+        }
+        throw error;
+    }
+}
+function formatFileSize(bytes) {
+    if (bytes === 0)
+        return '0 B';
+    const k = 1024;
+    const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
+    const i = Math.floor(Math.log(bytes) / Math.log(k));
+    return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
+}

package/dist/aws/month-lister.d.ts ADDED Viewed

@@ -0,0 +1,18 @@
+export interface S3FileInfo {
+    s3Bucket: string;
+    s3Key: string;
+    fileSize: number;
+    lastModified: Date;
+    batch: string;
+}
+export interface ListMonthOptions {
+    month?: string;
+    batch?: string;
+    server?: 'biorxiv' | 'medrxiv';
+    limit?: number;
+}
+/**
+ * Lists MECA files in S3 for a specific month with pagination support
+ */
+export declare function listMonthFiles(options: ListMonthOptions): Promise<S3FileInfo[]>;
+//# sourceMappingURL=month-lister.d.ts.map

package/dist/aws/month-lister.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"month-lister.d.ts","sourceRoot":"","sources":["../../src/aws/month-lister.ts"],"names":[],"mappings":"AAMA,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,IAAI,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,gBAAgB;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,SAAS,GAAG,SAAS,CAAC;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,wBAAsB,cAAc,CAAC,OAAO,EAAE,gBAAgB,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC,CAoGrF"}

package/dist/aws/month-lister.js ADDED Viewed

@@ -0,0 +1,90 @@
+import { ListObjectsV2Command } from '@aws-sdk/client-s3';
+import { getS3Client } from './config.js';
+import { getFolderStructure } from 'openrxiv-utils';
+import { getDefaultServer } from '../utils/default-server.js';
+import { getBucketName } from './bucket-explorer.js';
+/**
+ * Lists MECA files in S3 for a specific month with pagination support
+ */
+export async function listMonthFiles(options) {
+    const { month, batch, limit = 1000, server = getDefaultServer() } = options;
+    const awsBucket = getBucketName(server);
+    if (!month && !batch) {
+        throw new Error('Either month or batch must be specified');
+    }
+    const description = month ? `month: ${month}` : `batch: ${batch}`;
+    console.log(`🔍 Listing files for ${description} from AWS S3 bucket: ${awsBucket}`);
+    try {
+        const s3Client = await getS3Client();
+        // Determine folder structure based on options
+        const folder = getFolderStructure({ month, batch, server: options.server || 'biorxiv' });
+        const s3Prefix = folder.prefix;
+        console.log(`🔍 Content Type: ${folder.type === 'current' ? 'Current Content' : 'Back Content'}`);
+        if (folder.batch) {
+            console.log(`🔍 Batch: ${folder.batch}`);
+        }
+        console.log(`🔍 Searching S3 prefix: ${s3Prefix}`);
+        const allFiles = [];
+        let continuationToken;
+        let batchCount = 0;
+        // Use pagination to get all files
+        do {
+            batchCount++;
+            console.log(`📦 Fetching batch ${batchCount}...`);
+            const listCommand = new ListObjectsV2Command({
+                Bucket: awsBucket,
+                Prefix: s3Prefix,
+                MaxKeys: Math.min(1000, limit - allFiles.length), // Don't fetch more than we need
+                ContinuationToken: continuationToken,
+                RequestPayer: 'requester',
+            });
+            const response = await s3Client.send(listCommand);
+            if (response.Contents) {
+                for (const item of response.Contents) {
+                    if (!item.Key || !item.Size)
+                        continue;
+                    // Only process .meca files
+                    if (!item.Key.endsWith('.meca'))
+                        continue;
+                    // Extract S3 file information
+                    const s3Key = item.Key;
+                    const fileSize = item.Size;
+                    const lastModified = item.LastModified || new Date();
+                    const fileInfo = {
+                        s3Bucket: awsBucket,
+                        s3Key: s3Key, // This is already the full path from S3
+                        fileSize: fileSize,
+                        lastModified: lastModified,
+                        batch: folder.batch,
+                    };
+                    allFiles.push(fileInfo);
+                    // Check if we've reached the limit
+                    if (allFiles.length >= limit) {
+                        console.log(`📋 Reached limit of ${limit} files`);
+                        break;
+                    }
+                }
+                console.log(`   Found ${response.Contents.length} files in this batch`);
+            }
+            continuationToken = response.NextContinuationToken;
+            // Break if we've reached the limit
+            if (allFiles.length >= limit) {
+                break;
+            }
+        } while (continuationToken);
+        console.log(`📋 Found ${allFiles.length} MECA files in S3 bucket`);
+        return allFiles;
+    }
+    catch (error) {
+        if (error instanceof Error) {
+            console.error(`❌ Error listing S3 files: ${error.message}`);
+            if (error.message.includes('AWS credentials not configured')) {
+                console.error('💡 Run "biorxiv config set-credentials" to configure AWS access');
+            }
+        }
+        else {
+            console.error('❌ Unknown error listing S3 files:', error);
+        }
+        return [];
+    }
+}

package/dist/commands/batch-info.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import { Command } from 'commander';
+export declare const monthInfoCommand: Command;
+//# sourceMappingURL=batch-info.d.ts.map

package/dist/commands/batch-info.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"batch-info.d.ts","sourceRoot":"","sources":["../../src/commands/batch-info.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAQpC,eAAO,MAAM,gBAAgB,SAczB,CAAC"}

package/dist/commands/batch-info.js ADDED Viewed

@@ -0,0 +1,213 @@
+import { Command } from 'commander';
+import { ListObjectsV2Command } from '@aws-sdk/client-s3';
+import chalk from 'chalk';
+import { getS3Client } from '../aws/config.js';
+import { getFolderStructure } from 'openrxiv-utils';
+import { getBucketName } from '../aws/bucket-explorer.js';
+import { getDefaultServer } from '../utils/index.js';
+export const monthInfoCommand = new Command('batch-info')
+    .description('List detailed metadata for all files in a specific month or batch from bioRxiv or medRxiv')
+    .option('-m, --month <month>', 'Month to list (e.g., "January_2024" or "2024-01")')
+    .option('-b, --batch <batch>', 'Batch to list (e.g., "1", "batch-1", "Batch_01")')
+    .option('-s, --server <server>', 'Server to use: "biorxiv" or "medrxiv"', getDefaultServer())
+    .action(async (options) => {
+    try {
+        await listMonthMetadata(options);
+    }
+    catch (error) {
+        console.error('Error listing month metadata:', error);
+        process.exit(1);
+    }
+});
+async function listMonthMetadata(options) {
+    const client = await getS3Client();
+    const { month, batch, server = getDefaultServer() } = options;
+    const bucketName = getBucketName(server);
+    if (!month && !batch) {
+        console.error('❌ Error: Either --month or --batch option must be specified');
+        process.exit(1);
+    }
+    // Determine folder structure based on options
+    const contentStructure = getFolderStructure({ month, batch, server });
+    const prefix = contentStructure.prefix;
+    const description = month ? `Month: ${month}` : `Batch: ${batch}`;
+    console.log(chalk.blue(`📅 Month/Batch Information: ${description}`));
+    console.log(chalk.blue('===================================='));
+    console.log(chalk.gray(`🔍 Content Type: ${contentStructure.type === 'current' ? 'Current Content' : 'Back Content'}`));
+    if (contentStructure.batch) {
+        console.log(chalk.gray(`🔍 Batch: ${contentStructure.batch}`));
+    }
+    console.log(chalk.gray(`🔍 Scanning S3 prefix: ${prefix}`));
+    console.log('');
+    const allFiles = [];
+    let continuationToken;
+    let batchCount = 0;
+    try {
+        // Use pagination to get all files
+        do {
+            batchCount++;
+            console.log(chalk.gray(`📦 Fetching batch ${batchCount}...`));
+            const command = new ListObjectsV2Command({
+                Bucket: bucketName,
+                Prefix: prefix,
+                MaxKeys: 1000,
+                ContinuationToken: continuationToken,
+                RequestPayer: 'requester',
+            });
+            const response = await client.send(command);
+            if (response.Contents) {
+                for (const item of response.Contents) {
+                    if (!item.Key)
+                        continue;
+                    const type = getContentType(item.Key);
+                    allFiles.push({
+                        key: item.Key,
+                        size: item.Size || 0,
+                        lastModified: item.LastModified || new Date(),
+                        type,
+                        fileName: item.Key.split('/').pop() || 'unknown',
+                        fileExtension: item.Key.split('.').pop() || 'none',
+                    });
+                }
+            }
+            continuationToken = response.NextContinuationToken;
+            if (response.Contents) {
+                console.log(chalk.gray(`   Found ${response.Contents.length} files in this batch`));
+            }
+        } while (continuationToken);
+        console.log(chalk.green(`✅ Total files found: ${allFiles.length}`));
+        console.log('');
+        displaySummary(allFiles, month || batch || 'unknown', server);
+    }
+    catch (error) {
+        if (error instanceof Error) {
+            throw new Error(`Failed to list month metadata: ${error.message}`);
+        }
+        throw error;
+    }
+}
+function getContentType(key) {
+    if (key.endsWith('.meca'))
+        return 'meca';
+    if (key.endsWith('.pdf'))
+        return 'pdf';
+    if (key.endsWith('.xml'))
+        return 'xml';
+    return 'other';
+}
+function displaySummary(files, month, server = getDefaultServer()) {
+    console.log(chalk.blue.bold('📊 Summary Statistics'));
+    console.log(chalk.blue('===================='));
+    console.log('');
+    // Show content structure info if available
+    try {
+        const contentStructure = getFolderStructure({ month, server });
+        console.log(chalk.cyan('📁 Content Structure:'));
+        console.log(`   Type: ${chalk.yellow(contentStructure.type === 'current' ? 'Current Content' : 'Back Content')}`);
+        if (contentStructure.batch) {
+            console.log(`   Batch: ${chalk.yellow(contentStructure.batch)}`);
+        }
+        console.log('');
+    }
+    catch (error) {
+        // Ignore errors in summary display
+    }
+    // File type breakdown
+    const typeCounts = files.reduce((acc, file) => {
+        acc[file.type] = (acc[file.type] || 0) + 1;
+        return acc;
+    }, {});
+    console.log(chalk.cyan('📁 File Types:'));
+    for (const [type, count] of Object.entries(typeCounts)) {
+        const percentage = ((count / files.length) * 100).toFixed(1);
+        console.log(`   ${chalk.yellow(type.toUpperCase())}: ${chalk.green(count)} (${percentage}%)`);
+    }
+    console.log('');
+    // Size statistics
+    const mecaFiles = files.filter((f) => f.type === 'meca');
+    if (mecaFiles.length > 0) {
+        const sizes = mecaFiles.map((f) => f.size);
+        const totalSize = sizes.reduce((sum, size) => sum + size, 0);
+        const avgSize = totalSize / sizes.length;
+        const minSize = Math.min(...sizes);
+        const maxSize = Math.max(...sizes);
+        console.log(chalk.cyan('📦 MECA File Sizes:'));
+        console.log(`   Total: ${chalk.green(formatFileSize(totalSize))}`);
+        console.log(`   Average: ${chalk.green(formatFileSize(avgSize))}`);
+        console.log(`   Range: ${chalk.green(formatFileSize(minSize))} - ${chalk.green(formatFileSize(maxSize))}`);
+        console.log('');
+    }
+    // Date range
+    const dates = files.map((f) => f.lastModified);
+    const earliest = new Date(Math.min(...dates.map((d) => d.getTime())));
+    const latest = new Date(Math.max(...dates.map((d) => d.getTime())));
+    console.log(chalk.cyan('📅 Upload Date Range:'));
+    console.log(`   Earliest: ${chalk.green(earliest.toLocaleDateString())}`);
+    console.log(`   Latest: ${chalk.green(latest.toLocaleDateString())}`);
+    console.log('');
+    // Upload date histogram
+    const sortedDates = displayUploadDateHistogram(files);
+    console.log('');
+    // Show batch analysis
+    console.log('');
+    analyzeBatchPatterns(sortedDates);
+}
+function displayUploadDateHistogram(files) {
+    console.log(chalk.cyan('📊 Upload Date Distribution:'));
+    console.log(chalk.cyan('============================'));
+    console.log('');
+    // Group files by date
+    const dateGroups = new Map();
+    for (const file of files) {
+        const dateKey = file.lastModified.toLocaleDateString();
+        dateGroups.set(dateKey, (dateGroups.get(dateKey) || 0) + 1);
+    }
+    // Sort dates chronologically
+    const sortedDates = Array.from(dateGroups.entries()).sort((a, b) => {
+        return new Date(a[0]).getTime() - new Date(b[0]).getTime();
+    });
+    // Find the maximum count for scaling
+    const maxCount = Math.max(...Array.from(dateGroups.values()));
+    const maxBarLength = 50; // Maximum bar length in characters
+    // Display histogram
+    for (const [date, count] of sortedDates) {
+        const barLength = Math.round((count / maxCount) * maxBarLength);
+        const bar = '█'.repeat(barLength);
+        const percentage = ((count / files.length) * 100).toFixed(1);
+        // Color code by upload volume
+        let countColor = chalk.green;
+        if (count > maxCount * 0.8) {
+            countColor = chalk.red; // High volume
+        }
+        else if (count > maxCount * 0.5) {
+            countColor = chalk.yellow; // Medium volume
+        }
+        else {
+            countColor = chalk.green; // Low volume
+        }
+        console.log(`${chalk.cyan(date.padStart(10))} ${countColor(count.toString().padStart(4))} ${chalk.gray('│')} ${chalk.blue(bar)} ${chalk.gray(`(${percentage}%)`)}`);
+    }
+    return sortedDates;
+}
+function analyzeBatchPatterns(dateGroups) {
+    console.log(chalk.cyan('🔍 Batch Analysis'));
+    console.log(chalk.cyan('================='));
+    console.log('');
+    if (dateGroups.length === 0)
+        return;
+    // Analyze upload patterns
+    const totalDays = dateGroups.length;
+    const totalFiles = dateGroups.reduce((sum, [, count]) => sum + count, 0);
+    const avgFilesPerDay = totalFiles / totalDays;
+    console.log(`   Total active days: ${chalk.green(totalDays)}`);
+    console.log(`   Average files per day: ${chalk.green(avgFilesPerDay.toFixed(1))}`);
+    console.log('');
+}
+function formatFileSize(bytes) {
+    if (bytes === 0)
+        return '0 B';
+    const k = 1024;
+    const sizes = ['B', 'KB', 'MB', 'GB'];
+    const i = Math.floor(Math.log(bytes) / Math.log(k));
+    return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
+}

package/dist/commands/batch-process.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import { Command } from 'commander';
+export declare const batchProcessCommand: Command;
+//# sourceMappingURL=batch-process.d.ts.map

package/dist/commands/batch-process.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"batch-process.d.ts","sourceRoot":"","sources":["../../src/commands/batch-process.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAU,MAAM,WAAW,CAAC;AAwC5C,eAAO,MAAM,mBAAmB,SA0Q5B,CAAC"}