npm - git-ripper - Versions diffs - 1.4.3 → 1.4.4 - Mend

git-ripper 1.4.3 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "git-ripper",
-  "version": "1.4.3",
+  "version": "1.4.4",
   "description": "CLI tool that lets you download specific folders from GitHub repositories without cloning the entire repo.",
   "main": "src/index.js",
   "type": "module",
@@ -9,8 +9,7 @@
   },
   "scripts": {
     "test": "echo \"Error: no test specified\" && exit 1",
-    "dev": "node bin/git-ripper.js",
-    "lint": "eslint ."
+    "dev": "node bin/git-ripper.js"
   },
   "keywords": [
     "git",
@@ -29,10 +28,6 @@
   ],
   "author": "sairajb",
   "license": "MIT",
-  "imports": {
-    "#ansi-styles": "ansi-styles",
-    "#supports-color": "supports-color"
-  },
   "dependencies": {
     "ansi-styles": "^6.2.1",
     "archiver": "^6.0.1",

package/src/downloader.js CHANGED Viewed

@@ -7,6 +7,7 @@ import cliProgress from "cli-progress";
 import pLimit from "p-limit";
 import chalk from "chalk";
 import prettyBytes from "pretty-bytes";
+import { ResumeManager } from "./resumeManager.js";
 // Set concurrency limit (adjustable based on network performance)
 // Reduced from 500 to 5 to prevent GitHub API rate limiting
@@ -497,4 +498,289 @@ const downloadFolder = async (
 };
 // Export functions in ESM format
-export { downloadFolder };
+export { downloadFolder, downloadFolderWithResume };
+/**
+ * Downloads all files from a folder in a GitHub repository with resume capability
+ */
+const downloadFolderWithResume = async (
+  { owner, repo, branch, folderPath },
+  outputDir,
+  options = { resume: true, forceRestart: false }
+) => {
+  const { resume = true, forceRestart = false } = options;
+  if (!resume) {
+    return downloadFolder({ owner, repo, branch, folderPath }, outputDir);
+  }
+  const resumeManager = new ResumeManager();
+  const url = `https://github.com/${owner}/${repo}/tree/${branch || "main"}/${
+    folderPath || ""
+  }`;
+  // Clear checkpoint if force restart is requested
+  if (forceRestart) {
+    resumeManager.cleanupCheckpoint(url, outputDir);
+  }
+  // Check for existing checkpoint
+  let checkpoint = resumeManager.loadCheckpoint(url, outputDir);
+  if (checkpoint) {
+    console.log(
+      chalk.blue(
+        `🔄 Found previous download from ${new Date(
+          checkpoint.timestamp
+        ).toLocaleString()}`
+      )
+    );
+    console.log(
+      chalk.blue(
+        `📊 Progress: ${checkpoint.downloadedFiles.length}/${checkpoint.totalFiles} files completed`
+      )
+    );
+    // Verify integrity of existing files
+    const validFiles = [];
+    let corruptedCount = 0;
+    for (const filename of checkpoint.downloadedFiles) {
+      const filepath = path.join(outputDir, filename);
+      const expectedHash = checkpoint.fileHashes[filename];
+      if (
+        expectedHash &&
+        resumeManager.verifyFileIntegrity(filepath, expectedHash)
+      ) {
+        validFiles.push(filename);
+      } else {
+        corruptedCount++;
+      }
+    }
+    checkpoint.downloadedFiles = validFiles;
+    if (corruptedCount > 0) {
+      console.log(
+        chalk.yellow(
+          `🔧 Detected ${corruptedCount} corrupted files, will re-download`
+        )
+      );
+    }
+    console.log(chalk.green(`✅ Verified ${validFiles.length} existing files`));
+  }
+  console.log(
+    chalk.cyan(`Analyzing repository structure for ${owner}/${repo}...`)
+  );
+  try {
+    const contents = await fetchFolderContents(owner, repo, branch, folderPath);
+    if (!contents || contents.length === 0) {
+      console.log(
+        chalk.yellow(`No files found in ${folderPath || "repository root"}`)
+      );
+      console.log(chalk.green(`Folder cloned successfully!`));
+      return;
+    }
+    // Filter for blob type (files)
+    const files = contents.filter((item) => item.type === "blob");
+    const totalFiles = files.length;
+    if (totalFiles === 0) {
+      console.log(
+        chalk.yellow(
+          `No files found in ${
+            folderPath || "repository root"
+          } (only directories)`
+        )
+      );
+      console.log(chalk.green(`Folder cloned successfully!`));
+      return;
+    }
+    // Create new checkpoint if none exists
+    if (!checkpoint) {
+      checkpoint = resumeManager.createNewCheckpoint(
+        url,
+        outputDir,
+        totalFiles
+      );
+      console.log(
+        chalk.cyan(
+          `📥 Starting download of ${totalFiles} files from ${chalk.white(
+            owner + "/" + repo
+          )}...`
+        )
+      );
+    } else {
+      // Update total files in case repository changed
+      checkpoint.totalFiles = totalFiles;
+      console.log(chalk.cyan(`📥 Resuming download...`));
+    }
+    // Get remaining files to download
+    const remainingFiles = files.filter((item) => {
+      let relativePath = item.path;
+      if (folderPath && folderPath.trim() !== "") {
+        relativePath = item.path
+          .substring(folderPath.length)
+          .replace(/^\//, "");
+      }
+      return !checkpoint.downloadedFiles.includes(relativePath);
+    });
+    if (remainingFiles.length === 0) {
+      console.log(chalk.green(`🎉 All files already downloaded!`));
+      resumeManager.cleanupCheckpoint(url, outputDir);
+      return;
+    }
+    console.log(
+      chalk.cyan(`📥 Downloading ${remainingFiles.length} remaining files...`)
+    );
+    // Setup progress bar
+    const progressBar = new cliProgress.SingleBar({
+      format: createProgressRenderer(owner, repo, folderPath),
+      hideCursor: true,
+      clearOnComplete: false,
+      stopOnComplete: true,
+      forceRedraw: true,
+    });
+    // Calculate already downloaded size
+    let downloadedSize = 0;
+    for (const filename of checkpoint.downloadedFiles) {
+      const filepath = path.join(outputDir, filename);
+      try {
+        downloadedSize += fs.statSync(filepath).size;
+      } catch {
+        // File might be missing, will be re-downloaded
+      }
+    }
+    const startTime = Date.now();
+    let failedFiles = [...(checkpoint.failedFiles || [])];
+    // Start progress bar with current progress
+    progressBar.start(totalFiles, checkpoint.downloadedFiles.length, {
+      downloadedSize,
+      startTime,
+    });
+    // Process remaining files
+    let processedCount = 0;
+    for (const item of remainingFiles) {
+      try {
+        let relativePath = item.path;
+        if (folderPath && folderPath.trim() !== "") {
+          relativePath = item.path
+            .substring(folderPath.length)
+            .replace(/^\//, "");
+        }
+        const outputFilePath = path.join(outputDir, relativePath);
+        const result = await downloadFile(
+          owner,
+          repo,
+          branch,
+          item.path,
+          outputFilePath
+        );
+        if (result.success) {
+          // Calculate file hash for integrity checking
+          const fileContent = fs.readFileSync(outputFilePath);
+          const fileHash = resumeManager.calculateHash(fileContent);
+          // Update checkpoint
+          checkpoint.downloadedFiles.push(relativePath);
+          checkpoint.fileHashes[relativePath] = fileHash;
+          downloadedSize += result.size || 0;
+        } else {
+          // Track failed files
+          failedFiles.push({
+            path: relativePath,
+            error: result.error,
+          });
+          checkpoint.failedFiles = failedFiles;
+        }
+        processedCount++;
+        // Save checkpoint every 10 files
+        if (processedCount % 10 === 0) {
+          resumeManager.saveCheckpoint(checkpoint);
+        }
+        // Update progress bar
+        progressBar.increment(1, { downloadedSize });
+      } catch (error) {
+        // Handle interruption gracefully
+        if (error.name === "SIGINT") {
+          resumeManager.saveCheckpoint(checkpoint);
+          progressBar.stop();
+          console.log(
+            chalk.blue(`\n⏸️  Download interrupted. Progress saved.`)
+          );
+          console.log(chalk.blue(`💡 Run the same command again to resume.`));
+          return;
+        }
+        failedFiles.push({
+          path: item.path,
+          error: error.message,
+        });
+        checkpoint.failedFiles = failedFiles;
+        progressBar.increment(1, { downloadedSize });
+      }
+    }
+    progressBar.stop();
+    console.log(); // Add an empty line after progress bar
+    // Final checkpoint save
+    resumeManager.saveCheckpoint(checkpoint);
+    // Count results
+    const succeeded = checkpoint.downloadedFiles.length;
+    const failed = failedFiles.length;
+    if (failed > 0) {
+      console.log(
+        chalk.yellow(
+          `Downloaded ${succeeded} files successfully, ${failed} files failed`
+        )
+      );
+      if (failed <= 5) {
+        console.log(chalk.yellow("Failed files:"));
+        failedFiles.forEach((file) => {
+          console.log(chalk.yellow(`  - ${file.path}: ${file.error}`));
+        });
+      }
+      console.log(
+        chalk.blue(`💡 Run the same command again to retry failed downloads`)
+      );
+    } else {
+      console.log(
+        chalk.green(`🎉 All ${succeeded} files downloaded successfully!`)
+      );
+      resumeManager.cleanupCheckpoint(url, outputDir);
+    }
+    console.log(chalk.green(`Folder cloned successfully!`));
+  } catch (error) {
+    // Save checkpoint on any error
+    if (checkpoint) {
+      resumeManager.saveCheckpoint(checkpoint);
+    }
+    console.error(chalk.red(`Error downloading folder: ${error.message}`));
+    throw error;
+  }
+};

package/src/index.js CHANGED Viewed

@@ -1,10 +1,12 @@
 import { program } from "commander";
 import { parseGitHubUrl } from "./parser.js";
-import { downloadFolder } from "./downloader.js";
+import { downloadFolder, downloadFolderWithResume } from "./downloader.js";
 import { downloadAndArchive } from "./archiver.js";
+import { ResumeManager } from "./resumeManager.js";
 import { fileURLToPath } from "url";
 import { dirname, join, resolve } from "path";
 import fs from "fs";
+import chalk from "chalk";
 // Get package.json for version
 const __filename = fileURLToPath(import.meta.url);
@@ -56,11 +58,50 @@ const initializeCLI = () => {
   program
     .version(packageJson.version)
     .description("Clone specific folders from GitHub repositories")
-    .argument("<url>", "GitHub URL of the folder to clone")
+    .argument("[url]", "GitHub URL of the folder to clone")
     .option("-o, --output <directory>", "Output directory", process.cwd())
     .option("--zip [filename]", "Create ZIP archive of downloaded files")
+    .option("--no-resume", "Disable resume functionality")
+    .option("--force-restart", "Ignore existing checkpoints and start fresh")
+    .option("--list-checkpoints", "List all existing download checkpoints")
     .action(async (url, options) => {
       try {
+        // Handle list checkpoints option
+        if (options.listCheckpoints) {
+          const resumeManager = new ResumeManager();
+          const checkpoints = resumeManager.listCheckpoints();
+          if (checkpoints.length === 0) {
+            console.log(chalk.yellow("No download checkpoints found."));
+            return;
+          }
+          console.log(chalk.cyan("\n📋 Download Checkpoints:"));
+          checkpoints.forEach((cp, index) => {
+            console.log(chalk.blue(`\n${index + 1}. ID: ${cp.id}`));
+            console.log(`   URL: ${cp.url}`);
+            console.log(`   Output: ${cp.outputDir}`);
+            console.log(`   Progress: ${cp.progress}`);
+            console.log(
+              `   Last Updated: ${new Date(cp.timestamp).toLocaleString()}`
+            );
+            if (cp.failedFiles > 0) {
+              console.log(chalk.yellow(`   Failed Files: ${cp.failedFiles}`));
+            }
+          });
+          console.log();
+          return;
+        }
+        // URL is required for download operations
+        if (!url) {
+          console.error(
+            chalk.red("Error: URL is required for download operations")
+          );
+          console.log("Use --list-checkpoints to see existing downloads");
+          process.exit(1);
+        }
         console.log(`Parsing URL: ${url}`);
         const parsedUrl = parseGitHubUrl(url);
@@ -76,12 +117,27 @@ const initializeCLI = () => {
         const archiveName =
           typeof options.zip === "string" ? options.zip : null;
+        // Prepare download options
+        const downloadOptions = {
+          resume: options.resume !== false, // Default to true unless --no-resume
+          forceRestart: options.forceRestart || false,
+        };
         if (createArchive) {
           console.log(`Creating ZIP archive...`);
           await downloadAndArchive(parsedUrl, options.output, archiveName);
         } else {
           console.log(`Downloading folder to: ${options.output}`);
-          await downloadFolder(parsedUrl, options.output);
+          if (downloadOptions.resume) {
+            await downloadFolderWithResume(
+              parsedUrl,
+              options.output,
+              downloadOptions
+            );
+          } else {
+            await downloadFolder(parsedUrl, options.output);
+          }
         }
         console.log("Operation completed successfully!");

package/src/resumeManager.js ADDED Viewed

@@ -0,0 +1,210 @@
+import fs from "fs";
+import path from "path";
+import crypto from "crypto";
+import { fileURLToPath } from "url";
+import { dirname } from "path";
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+/**
+ * Manages download checkpoints for resuming interrupted downloads
+ */
+export class ResumeManager {
+  constructor(checkpointDir = ".git-ripper-checkpoints") {
+    this.checkpointDir = path.resolve(checkpointDir);
+    this.ensureCheckpointDir();
+  }
+  /**
+   * Ensure checkpoint directory exists
+   */
+  ensureCheckpointDir() {
+    if (!fs.existsSync(this.checkpointDir)) {
+      fs.mkdirSync(this.checkpointDir, { recursive: true });
+    }
+  }
+  /**
+   * Create unique checkpoint ID based on URL and output directory
+   * @param {string} url - GitHub URL
+   * @param {string} outputDir - Output directory path
+   * @returns {string} - Unique checkpoint ID
+   */
+  createCheckpointId(url, outputDir) {
+    const combined = `${url}|${path.resolve(outputDir)}`;
+    return crypto
+      .createHash("md5")
+      .update(combined)
+      .digest("hex")
+      .substring(0, 12);
+  }
+  /**
+   * Save download progress to checkpoint file
+   * @param {Object} checkpoint - Checkpoint data
+   * @returns {string} - Checkpoint ID
+   */
+  saveCheckpoint(checkpoint) {
+    const checkpointId = this.createCheckpointId(
+      checkpoint.url,
+      checkpoint.outputDir
+    );
+    const checkpointFile = path.join(
+      this.checkpointDir,
+      `${checkpointId}.json`
+    );
+    const checkpointData = {
+      ...checkpoint,
+      timestamp: new Date().toISOString(),
+      checkpointId,
+    };
+    try {
+      fs.writeFileSync(checkpointFile, JSON.stringify(checkpointData, null, 2));
+      return checkpointId;
+    } catch (error) {
+      console.error(`Failed to save checkpoint: ${error.message}`);
+      return null;
+    }
+  }
+  /**
+   * Load existing checkpoint if available
+   * @param {string} url - GitHub URL
+   * @param {string} outputDir - Output directory path
+   * @returns {Object|null} - Checkpoint data or null if not found
+   */
+  loadCheckpoint(url, outputDir) {
+    const checkpointId = this.createCheckpointId(url, outputDir);
+    const checkpointFile = path.join(
+      this.checkpointDir,
+      `${checkpointId}.json`
+    );
+    if (!fs.existsSync(checkpointFile)) {
+      return null;
+    }
+    try {
+      const data = fs.readFileSync(checkpointFile, "utf8");
+      return JSON.parse(data);
+    } catch (error) {
+      console.error(`Error loading checkpoint: ${error.message}`);
+      return null;
+    }
+  }
+  /**
+   * Verify downloaded file hasn't been corrupted
+   * @param {string} filepath - Path to the file
+   * @param {string} expectedHash - Expected MD5 hash
+   * @returns {boolean} - True if file is valid
+   */
+  verifyFileIntegrity(filepath, expectedHash) {
+    if (!fs.existsSync(filepath)) {
+      return false;
+    }
+    try {
+      const fileContent = fs.readFileSync(filepath);
+      const actualHash = crypto
+        .createHash("md5")
+        .update(fileContent)
+        .digest("hex");
+      return actualHash === expectedHash;
+    } catch (error) {
+      return false;
+    }
+  }
+  /**
+   * Calculate MD5 hash of file content
+   * @param {Buffer} content - File content
+   * @returns {string} - MD5 hash
+   */
+  calculateHash(content) {
+    return crypto.createHash("md5").update(content).digest("hex");
+  }
+  /**
+   * Remove checkpoint file after successful completion
+   * @param {string} url - GitHub URL
+   * @param {string} outputDir - Output directory path
+   */
+  cleanupCheckpoint(url, outputDir) {
+    const checkpointId = this.createCheckpointId(url, outputDir);
+    const checkpointFile = path.join(
+      this.checkpointDir,
+      `${checkpointId}.json`
+    );
+    if (fs.existsSync(checkpointFile)) {
+      try {
+        fs.unlinkSync(checkpointFile);
+      } catch (error) {
+        console.error(`Failed to cleanup checkpoint: ${error.message}`);
+      }
+    }
+  }
+  /**
+   * List all existing checkpoints
+   * @returns {Array} - Array of checkpoint information
+   */
+  listCheckpoints() {
+    if (!fs.existsSync(this.checkpointDir)) {
+      return [];
+    }
+    try {
+      const files = fs.readdirSync(this.checkpointDir);
+      const checkpoints = [];
+      for (const file of files) {
+        if (file.endsWith(".json")) {
+          try {
+            const filepath = path.join(this.checkpointDir, file);
+            const data = JSON.parse(fs.readFileSync(filepath, "utf8"));
+            checkpoints.push({
+              id: data.checkpointId,
+              url: data.url,
+              outputDir: data.outputDir,
+              timestamp: data.timestamp,
+              progress: `${data.downloadedFiles.length}/${data.totalFiles}`,
+              failedFiles: data.failedFiles.length,
+            });
+          } catch (error) {
+            // Skip corrupted checkpoint files
+            continue;
+          }
+        }
+      }
+      return checkpoints;
+    } catch (error) {
+      console.error(`Failed to list checkpoints: ${error.message}`);
+      return [];
+    }
+  }
+  /**
+   * Create a new checkpoint object
+   * @param {string} url - GitHub URL
+   * @param {string} outputDir - Output directory
+   * @param {number} totalFiles - Total number of files to download
+   * @returns {Object} - New checkpoint object
+   */
+  createNewCheckpoint(url, outputDir, totalFiles) {
+    return {
+      url,
+      outputDir: path.resolve(outputDir),
+      totalFiles,
+      downloadedFiles: [],
+      failedFiles: [],
+      fileHashes: {},
+      timestamp: new Date().toISOString(),
+    };
+  }
+}