git-ripper 1.4.3 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "git-ripper",
3
- "version": "1.4.3",
3
+ "version": "1.4.4",
4
4
  "description": "CLI tool that lets you download specific folders from GitHub repositories without cloning the entire repo.",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -9,8 +9,7 @@
9
9
  },
10
10
  "scripts": {
11
11
  "test": "echo \"Error: no test specified\" && exit 1",
12
- "dev": "node bin/git-ripper.js",
13
- "lint": "eslint ."
12
+ "dev": "node bin/git-ripper.js"
14
13
  },
15
14
  "keywords": [
16
15
  "git",
@@ -29,10 +28,6 @@
29
28
  ],
30
29
  "author": "sairajb",
31
30
  "license": "MIT",
32
- "imports": {
33
- "#ansi-styles": "ansi-styles",
34
- "#supports-color": "supports-color"
35
- },
36
31
  "dependencies": {
37
32
  "ansi-styles": "^6.2.1",
38
33
  "archiver": "^6.0.1",
package/src/downloader.js CHANGED
@@ -7,6 +7,7 @@ import cliProgress from "cli-progress";
7
7
  import pLimit from "p-limit";
8
8
  import chalk from "chalk";
9
9
  import prettyBytes from "pretty-bytes";
10
+ import { ResumeManager } from "./resumeManager.js";
10
11
 
11
12
  // Set concurrency limit (adjustable based on network performance)
12
13
  // Reduced from 500 to 5 to prevent GitHub API rate limiting
@@ -497,4 +498,289 @@ const downloadFolder = async (
497
498
  };
498
499
 
499
500
  // Export functions in ESM format
500
- export { downloadFolder };
501
+ export { downloadFolder, downloadFolderWithResume };
502
+
503
+ /**
504
+ * Downloads all files from a folder in a GitHub repository with resume capability
505
+ */
506
+ const downloadFolderWithResume = async (
507
+ { owner, repo, branch, folderPath },
508
+ outputDir,
509
+ options = { resume: true, forceRestart: false }
510
+ ) => {
511
+ const { resume = true, forceRestart = false } = options;
512
+
513
+ if (!resume) {
514
+ return downloadFolder({ owner, repo, branch, folderPath }, outputDir);
515
+ }
516
+
517
+ const resumeManager = new ResumeManager();
518
+ const url = `https://github.com/${owner}/${repo}/tree/${branch || "main"}/${
519
+ folderPath || ""
520
+ }`;
521
+
522
+ // Clear checkpoint if force restart is requested
523
+ if (forceRestart) {
524
+ resumeManager.cleanupCheckpoint(url, outputDir);
525
+ }
526
+
527
+ // Check for existing checkpoint
528
+ let checkpoint = resumeManager.loadCheckpoint(url, outputDir);
529
+
530
+ if (checkpoint) {
531
+ console.log(
532
+ chalk.blue(
533
+ `🔄 Found previous download from ${new Date(
534
+ checkpoint.timestamp
535
+ ).toLocaleString()}`
536
+ )
537
+ );
538
+ console.log(
539
+ chalk.blue(
540
+ `📊 Progress: ${checkpoint.downloadedFiles.length}/${checkpoint.totalFiles} files completed`
541
+ )
542
+ );
543
+
544
+ // Verify integrity of existing files
545
+ const validFiles = [];
546
+ let corruptedCount = 0;
547
+
548
+ for (const filename of checkpoint.downloadedFiles) {
549
+ const filepath = path.join(outputDir, filename);
550
+ const expectedHash = checkpoint.fileHashes[filename];
551
+
552
+ if (
553
+ expectedHash &&
554
+ resumeManager.verifyFileIntegrity(filepath, expectedHash)
555
+ ) {
556
+ validFiles.push(filename);
557
+ } else {
558
+ corruptedCount++;
559
+ }
560
+ }
561
+
562
+ checkpoint.downloadedFiles = validFiles;
563
+ if (corruptedCount > 0) {
564
+ console.log(
565
+ chalk.yellow(
566
+ `🔧 Detected ${corruptedCount} corrupted files, will re-download`
567
+ )
568
+ );
569
+ }
570
+ console.log(chalk.green(`✅ Verified ${validFiles.length} existing files`));
571
+ }
572
+
573
+ console.log(
574
+ chalk.cyan(`Analyzing repository structure for ${owner}/${repo}...`)
575
+ );
576
+
577
+ try {
578
+ const contents = await fetchFolderContents(owner, repo, branch, folderPath);
579
+
580
+ if (!contents || contents.length === 0) {
581
+ console.log(
582
+ chalk.yellow(`No files found in ${folderPath || "repository root"}`)
583
+ );
584
+ console.log(chalk.green(`Folder cloned successfully!`));
585
+ return;
586
+ }
587
+
588
+ // Filter for blob type (files)
589
+ const files = contents.filter((item) => item.type === "blob");
590
+ const totalFiles = files.length;
591
+
592
+ if (totalFiles === 0) {
593
+ console.log(
594
+ chalk.yellow(
595
+ `No files found in ${
596
+ folderPath || "repository root"
597
+ } (only directories)`
598
+ )
599
+ );
600
+ console.log(chalk.green(`Folder cloned successfully!`));
601
+ return;
602
+ }
603
+
604
+ // Create new checkpoint if none exists
605
+ if (!checkpoint) {
606
+ checkpoint = resumeManager.createNewCheckpoint(
607
+ url,
608
+ outputDir,
609
+ totalFiles
610
+ );
611
+ console.log(
612
+ chalk.cyan(
613
+ `📥 Starting download of ${totalFiles} files from ${chalk.white(
614
+ owner + "/" + repo
615
+ )}...`
616
+ )
617
+ );
618
+ } else {
619
+ // Update total files in case repository changed
620
+ checkpoint.totalFiles = totalFiles;
621
+ console.log(chalk.cyan(`📥 Resuming download...`));
622
+ }
623
+
624
+ // Get remaining files to download
625
+ const remainingFiles = files.filter((item) => {
626
+ let relativePath = item.path;
627
+ if (folderPath && folderPath.trim() !== "") {
628
+ relativePath = item.path
629
+ .substring(folderPath.length)
630
+ .replace(/^\//, "");
631
+ }
632
+ return !checkpoint.downloadedFiles.includes(relativePath);
633
+ });
634
+
635
+ if (remainingFiles.length === 0) {
636
+ console.log(chalk.green(`🎉 All files already downloaded!`));
637
+ resumeManager.cleanupCheckpoint(url, outputDir);
638
+ return;
639
+ }
640
+
641
+ console.log(
642
+ chalk.cyan(`📥 Downloading ${remainingFiles.length} remaining files...`)
643
+ );
644
+
645
+ // Setup progress bar
646
+ const progressBar = new cliProgress.SingleBar({
647
+ format: createProgressRenderer(owner, repo, folderPath),
648
+ hideCursor: true,
649
+ clearOnComplete: false,
650
+ stopOnComplete: true,
651
+ forceRedraw: true,
652
+ });
653
+
654
+ // Calculate already downloaded size
655
+ let downloadedSize = 0;
656
+ for (const filename of checkpoint.downloadedFiles) {
657
+ const filepath = path.join(outputDir, filename);
658
+ try {
659
+ downloadedSize += fs.statSync(filepath).size;
660
+ } catch {
661
+ // File might be missing, will be re-downloaded
662
+ }
663
+ }
664
+
665
+ const startTime = Date.now();
666
+ let failedFiles = [...(checkpoint.failedFiles || [])];
667
+
668
+ // Start progress bar with current progress
669
+ progressBar.start(totalFiles, checkpoint.downloadedFiles.length, {
670
+ downloadedSize,
671
+ startTime,
672
+ });
673
+
674
+ // Process remaining files
675
+ let processedCount = 0;
676
+ for (const item of remainingFiles) {
677
+ try {
678
+ let relativePath = item.path;
679
+ if (folderPath && folderPath.trim() !== "") {
680
+ relativePath = item.path
681
+ .substring(folderPath.length)
682
+ .replace(/^\//, "");
683
+ }
684
+ const outputFilePath = path.join(outputDir, relativePath);
685
+
686
+ const result = await downloadFile(
687
+ owner,
688
+ repo,
689
+ branch,
690
+ item.path,
691
+ outputFilePath
692
+ );
693
+
694
+ if (result.success) {
695
+ // Calculate file hash for integrity checking
696
+ const fileContent = fs.readFileSync(outputFilePath);
697
+ const fileHash = resumeManager.calculateHash(fileContent);
698
+
699
+ // Update checkpoint
700
+ checkpoint.downloadedFiles.push(relativePath);
701
+ checkpoint.fileHashes[relativePath] = fileHash;
702
+ downloadedSize += result.size || 0;
703
+ } else {
704
+ // Track failed files
705
+ failedFiles.push({
706
+ path: relativePath,
707
+ error: result.error,
708
+ });
709
+ checkpoint.failedFiles = failedFiles;
710
+ }
711
+
712
+ processedCount++;
713
+
714
+ // Save checkpoint every 10 files
715
+ if (processedCount % 10 === 0) {
716
+ resumeManager.saveCheckpoint(checkpoint);
717
+ }
718
+
719
+ // Update progress bar
720
+ progressBar.increment(1, { downloadedSize });
721
+ } catch (error) {
722
+ // Handle interruption gracefully
723
+ if (error.name === "SIGINT") {
724
+ resumeManager.saveCheckpoint(checkpoint);
725
+ progressBar.stop();
726
+ console.log(
727
+ chalk.blue(`\n⏸️ Download interrupted. Progress saved.`)
728
+ );
729
+ console.log(chalk.blue(`💡 Run the same command again to resume.`));
730
+ return;
731
+ }
732
+
733
+ failedFiles.push({
734
+ path: item.path,
735
+ error: error.message,
736
+ });
737
+ checkpoint.failedFiles = failedFiles;
738
+ progressBar.increment(1, { downloadedSize });
739
+ }
740
+ }
741
+
742
+ progressBar.stop();
743
+ console.log(); // Add an empty line after progress bar
744
+
745
+ // Final checkpoint save
746
+ resumeManager.saveCheckpoint(checkpoint);
747
+
748
+ // Count results
749
+ const succeeded = checkpoint.downloadedFiles.length;
750
+ const failed = failedFiles.length;
751
+
752
+ if (failed > 0) {
753
+ console.log(
754
+ chalk.yellow(
755
+ `Downloaded ${succeeded} files successfully, ${failed} files failed`
756
+ )
757
+ );
758
+
759
+ if (failed <= 5) {
760
+ console.log(chalk.yellow("Failed files:"));
761
+ failedFiles.forEach((file) => {
762
+ console.log(chalk.yellow(` - ${file.path}: ${file.error}`));
763
+ });
764
+ }
765
+
766
+ console.log(
767
+ chalk.blue(`💡 Run the same command again to retry failed downloads`)
768
+ );
769
+ } else {
770
+ console.log(
771
+ chalk.green(`🎉 All ${succeeded} files downloaded successfully!`)
772
+ );
773
+ resumeManager.cleanupCheckpoint(url, outputDir);
774
+ }
775
+
776
+ console.log(chalk.green(`Folder cloned successfully!`));
777
+ } catch (error) {
778
+ // Save checkpoint on any error
779
+ if (checkpoint) {
780
+ resumeManager.saveCheckpoint(checkpoint);
781
+ }
782
+
783
+ console.error(chalk.red(`Error downloading folder: ${error.message}`));
784
+ throw error;
785
+ }
786
+ };
package/src/index.js CHANGED
@@ -1,10 +1,12 @@
1
1
  import { program } from "commander";
2
2
  import { parseGitHubUrl } from "./parser.js";
3
- import { downloadFolder } from "./downloader.js";
3
+ import { downloadFolder, downloadFolderWithResume } from "./downloader.js";
4
4
  import { downloadAndArchive } from "./archiver.js";
5
+ import { ResumeManager } from "./resumeManager.js";
5
6
  import { fileURLToPath } from "url";
6
7
  import { dirname, join, resolve } from "path";
7
8
  import fs from "fs";
9
+ import chalk from "chalk";
8
10
 
9
11
  // Get package.json for version
10
12
  const __filename = fileURLToPath(import.meta.url);
@@ -56,11 +58,50 @@ const initializeCLI = () => {
56
58
  program
57
59
  .version(packageJson.version)
58
60
  .description("Clone specific folders from GitHub repositories")
59
- .argument("<url>", "GitHub URL of the folder to clone")
61
+ .argument("[url]", "GitHub URL of the folder to clone")
60
62
  .option("-o, --output <directory>", "Output directory", process.cwd())
61
63
  .option("--zip [filename]", "Create ZIP archive of downloaded files")
64
+ .option("--no-resume", "Disable resume functionality")
65
+ .option("--force-restart", "Ignore existing checkpoints and start fresh")
66
+ .option("--list-checkpoints", "List all existing download checkpoints")
62
67
  .action(async (url, options) => {
63
68
  try {
69
+ // Handle list checkpoints option
70
+ if (options.listCheckpoints) {
71
+ const resumeManager = new ResumeManager();
72
+ const checkpoints = resumeManager.listCheckpoints();
73
+
74
+ if (checkpoints.length === 0) {
75
+ console.log(chalk.yellow("No download checkpoints found."));
76
+ return;
77
+ }
78
+
79
+ console.log(chalk.cyan("\n📋 Download Checkpoints:"));
80
+ checkpoints.forEach((cp, index) => {
81
+ console.log(chalk.blue(`\n${index + 1}. ID: ${cp.id}`));
82
+ console.log(` URL: ${cp.url}`);
83
+ console.log(` Output: ${cp.outputDir}`);
84
+ console.log(` Progress: ${cp.progress}`);
85
+ console.log(
86
+ ` Last Updated: ${new Date(cp.timestamp).toLocaleString()}`
87
+ );
88
+ if (cp.failedFiles > 0) {
89
+ console.log(chalk.yellow(` Failed Files: ${cp.failedFiles}`));
90
+ }
91
+ });
92
+ console.log();
93
+ return;
94
+ }
95
+
96
+ // URL is required for download operations
97
+ if (!url) {
98
+ console.error(
99
+ chalk.red("Error: URL is required for download operations")
100
+ );
101
+ console.log("Use --list-checkpoints to see existing downloads");
102
+ process.exit(1);
103
+ }
104
+
64
105
  console.log(`Parsing URL: ${url}`);
65
106
  const parsedUrl = parseGitHubUrl(url);
66
107
 
@@ -76,12 +117,27 @@ const initializeCLI = () => {
76
117
  const archiveName =
77
118
  typeof options.zip === "string" ? options.zip : null;
78
119
 
120
+ // Prepare download options
121
+ const downloadOptions = {
122
+ resume: options.resume !== false, // Default to true unless --no-resume
123
+ forceRestart: options.forceRestart || false,
124
+ };
125
+
79
126
  if (createArchive) {
80
127
  console.log(`Creating ZIP archive...`);
81
128
  await downloadAndArchive(parsedUrl, options.output, archiveName);
82
129
  } else {
83
130
  console.log(`Downloading folder to: ${options.output}`);
84
- await downloadFolder(parsedUrl, options.output);
131
+
132
+ if (downloadOptions.resume) {
133
+ await downloadFolderWithResume(
134
+ parsedUrl,
135
+ options.output,
136
+ downloadOptions
137
+ );
138
+ } else {
139
+ await downloadFolder(parsedUrl, options.output);
140
+ }
85
141
  }
86
142
 
87
143
  console.log("Operation completed successfully!");
@@ -0,0 +1,210 @@
1
+ import fs from "fs";
2
+ import path from "path";
3
+ import crypto from "crypto";
4
+ import { fileURLToPath } from "url";
5
+ import { dirname } from "path";
6
+
7
+ const __filename = fileURLToPath(import.meta.url);
8
+ const __dirname = dirname(__filename);
9
+
10
+ /**
11
+ * Manages download checkpoints for resuming interrupted downloads
12
+ */
13
+ export class ResumeManager {
14
+ constructor(checkpointDir = ".git-ripper-checkpoints") {
15
+ this.checkpointDir = path.resolve(checkpointDir);
16
+ this.ensureCheckpointDir();
17
+ }
18
+
19
+ /**
20
+ * Ensure checkpoint directory exists
21
+ */
22
+ ensureCheckpointDir() {
23
+ if (!fs.existsSync(this.checkpointDir)) {
24
+ fs.mkdirSync(this.checkpointDir, { recursive: true });
25
+ }
26
+ }
27
+
28
+ /**
29
+ * Create unique checkpoint ID based on URL and output directory
30
+ * @param {string} url - GitHub URL
31
+ * @param {string} outputDir - Output directory path
32
+ * @returns {string} - Unique checkpoint ID
33
+ */
34
+ createCheckpointId(url, outputDir) {
35
+ const combined = `${url}|${path.resolve(outputDir)}`;
36
+ return crypto
37
+ .createHash("md5")
38
+ .update(combined)
39
+ .digest("hex")
40
+ .substring(0, 12);
41
+ }
42
+
43
+ /**
44
+ * Save download progress to checkpoint file
45
+ * @param {Object} checkpoint - Checkpoint data
46
+ * @returns {string} - Checkpoint ID
47
+ */
48
+ saveCheckpoint(checkpoint) {
49
+ const checkpointId = this.createCheckpointId(
50
+ checkpoint.url,
51
+ checkpoint.outputDir
52
+ );
53
+ const checkpointFile = path.join(
54
+ this.checkpointDir,
55
+ `${checkpointId}.json`
56
+ );
57
+
58
+ const checkpointData = {
59
+ ...checkpoint,
60
+ timestamp: new Date().toISOString(),
61
+ checkpointId,
62
+ };
63
+
64
+ try {
65
+ fs.writeFileSync(checkpointFile, JSON.stringify(checkpointData, null, 2));
66
+ return checkpointId;
67
+ } catch (error) {
68
+ console.error(`Failed to save checkpoint: ${error.message}`);
69
+ return null;
70
+ }
71
+ }
72
+
73
+ /**
74
+ * Load existing checkpoint if available
75
+ * @param {string} url - GitHub URL
76
+ * @param {string} outputDir - Output directory path
77
+ * @returns {Object|null} - Checkpoint data or null if not found
78
+ */
79
+ loadCheckpoint(url, outputDir) {
80
+ const checkpointId = this.createCheckpointId(url, outputDir);
81
+ const checkpointFile = path.join(
82
+ this.checkpointDir,
83
+ `${checkpointId}.json`
84
+ );
85
+
86
+ if (!fs.existsSync(checkpointFile)) {
87
+ return null;
88
+ }
89
+
90
+ try {
91
+ const data = fs.readFileSync(checkpointFile, "utf8");
92
+ return JSON.parse(data);
93
+ } catch (error) {
94
+ console.error(`Error loading checkpoint: ${error.message}`);
95
+ return null;
96
+ }
97
+ }
98
+
99
+ /**
100
+ * Verify downloaded file hasn't been corrupted
101
+ * @param {string} filepath - Path to the file
102
+ * @param {string} expectedHash - Expected MD5 hash
103
+ * @returns {boolean} - True if file is valid
104
+ */
105
+ verifyFileIntegrity(filepath, expectedHash) {
106
+ if (!fs.existsSync(filepath)) {
107
+ return false;
108
+ }
109
+
110
+ try {
111
+ const fileContent = fs.readFileSync(filepath);
112
+ const actualHash = crypto
113
+ .createHash("md5")
114
+ .update(fileContent)
115
+ .digest("hex");
116
+ return actualHash === expectedHash;
117
+ } catch (error) {
118
+ return false;
119
+ }
120
+ }
121
+
122
+ /**
123
+ * Calculate MD5 hash of file content
124
+ * @param {Buffer} content - File content
125
+ * @returns {string} - MD5 hash
126
+ */
127
+ calculateHash(content) {
128
+ return crypto.createHash("md5").update(content).digest("hex");
129
+ }
130
+
131
+ /**
132
+ * Remove checkpoint file after successful completion
133
+ * @param {string} url - GitHub URL
134
+ * @param {string} outputDir - Output directory path
135
+ */
136
+ cleanupCheckpoint(url, outputDir) {
137
+ const checkpointId = this.createCheckpointId(url, outputDir);
138
+ const checkpointFile = path.join(
139
+ this.checkpointDir,
140
+ `${checkpointId}.json`
141
+ );
142
+
143
+ if (fs.existsSync(checkpointFile)) {
144
+ try {
145
+ fs.unlinkSync(checkpointFile);
146
+ } catch (error) {
147
+ console.error(`Failed to cleanup checkpoint: ${error.message}`);
148
+ }
149
+ }
150
+ }
151
+
152
+ /**
153
+ * List all existing checkpoints
154
+ * @returns {Array} - Array of checkpoint information
155
+ */
156
+ listCheckpoints() {
157
+ if (!fs.existsSync(this.checkpointDir)) {
158
+ return [];
159
+ }
160
+
161
+ try {
162
+ const files = fs.readdirSync(this.checkpointDir);
163
+ const checkpoints = [];
164
+
165
+ for (const file of files) {
166
+ if (file.endsWith(".json")) {
167
+ try {
168
+ const filepath = path.join(this.checkpointDir, file);
169
+ const data = JSON.parse(fs.readFileSync(filepath, "utf8"));
170
+ checkpoints.push({
171
+ id: data.checkpointId,
172
+ url: data.url,
173
+ outputDir: data.outputDir,
174
+ timestamp: data.timestamp,
175
+ progress: `${data.downloadedFiles.length}/${data.totalFiles}`,
176
+ failedFiles: data.failedFiles.length,
177
+ });
178
+ } catch (error) {
179
+ // Skip corrupted checkpoint files
180
+ continue;
181
+ }
182
+ }
183
+ }
184
+
185
+ return checkpoints;
186
+ } catch (error) {
187
+ console.error(`Failed to list checkpoints: ${error.message}`);
188
+ return [];
189
+ }
190
+ }
191
+
192
+ /**
193
+ * Create a new checkpoint object
194
+ * @param {string} url - GitHub URL
195
+ * @param {string} outputDir - Output directory
196
+ * @param {number} totalFiles - Total number of files to download
197
+ * @returns {Object} - New checkpoint object
198
+ */
199
+ createNewCheckpoint(url, outputDir, totalFiles) {
200
+ return {
201
+ url,
202
+ outputDir: path.resolve(outputDir),
203
+ totalFiles,
204
+ downloadedFiles: [],
205
+ failedFiles: [],
206
+ fileHashes: {},
207
+ timestamp: new Date().toISOString(),
208
+ };
209
+ }
210
+ }