muaddib-scanner 2.2.6 → 2.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/muaddib.js +10 -1
- package/datasets/benign/packages-npm.txt +576 -77
- package/datasets/benign/packages-pypi.txt +146 -31
- package/datasets/ground-truth/README.md +54 -0
- package/datasets/ground-truth/known-malware.json +622 -0
- package/package.json +1 -1
- package/src/commands/evaluate.js +191 -31
- package/src/index.js +48 -0
- package/src/scanner/typosquat.js +13 -1
- package/tmp-test-pack.js +66 -0
package/src/commands/evaluate.js
CHANGED
|
@@ -3,11 +3,17 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Measures TPR (Ground Truth), FPR (Benign), and ADR (Adversarial).
|
|
5
5
|
* Saves versioned metrics to metrics/v{version}.json.
|
|
6
|
+
*
|
|
7
|
+
* Benign FPR: downloads real npm tarballs and scans actual source code
|
|
8
|
+
* with all 13+ scanners (AST, dataflow, obfuscation, entropy, etc.).
|
|
9
|
+
* Tarballs are cached in .muaddib-cache/benign-tarballs/ to avoid
|
|
10
|
+
* re-downloading on every run.
|
|
6
11
|
*/
|
|
7
12
|
|
|
8
13
|
const fs = require('fs');
|
|
9
14
|
const path = require('path');
|
|
10
|
-
const
|
|
15
|
+
const zlib = require('zlib');
|
|
16
|
+
const { execSync } = require('child_process');
|
|
11
17
|
const { run } = require('../index.js');
|
|
12
18
|
|
|
13
19
|
const ROOT = path.join(__dirname, '..', '..');
|
|
@@ -15,9 +21,11 @@ const GT_DIR = path.join(ROOT, 'tests', 'ground-truth');
|
|
|
15
21
|
const BENIGN_DIR = path.join(ROOT, 'datasets', 'benign');
|
|
16
22
|
const ADVERSARIAL_DIR = path.join(ROOT, 'datasets', 'adversarial');
|
|
17
23
|
const METRICS_DIR = path.join(ROOT, 'metrics');
|
|
24
|
+
const CACHE_DIR = path.join(ROOT, '.muaddib-cache', 'benign-tarballs');
|
|
18
25
|
|
|
19
26
|
const GT_THRESHOLD = 3;
|
|
20
27
|
const BENIGN_THRESHOLD = 20;
|
|
28
|
+
const PACK_TIMEOUT_MS = 30000;
|
|
21
29
|
|
|
22
30
|
const ADVERSARIAL_THRESHOLDS = {
|
|
23
31
|
// Vague 1 (20 samples)
|
|
@@ -102,45 +110,187 @@ async function evaluateGroundTruth() {
|
|
|
102
110
|
return { detected, total, tpr, details };
|
|
103
111
|
}
|
|
104
112
|
|
|
113
|
+
// =========================================================================
|
|
114
|
+
// 2. Benign — download real tarballs and scan actual source code
|
|
115
|
+
// =========================================================================
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Convert a package name to a safe cache directory name.
|
|
119
|
+
* @scoped/pkg → _scoped_pkg
|
|
120
|
+
*/
|
|
121
|
+
function pkgToCacheName(pkg) {
|
|
122
|
+
return pkg.replace(/\//g, '_').replace(/@/g, '_');
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Extract a .tgz file using Node.js built-in zlib + minimal tar parser.
|
|
127
|
+
* Only extracts regular files (type '0' or NUL).
|
|
128
|
+
*/
|
|
129
|
+
function extractTgz(tgzPath, destDir) {
|
|
130
|
+
const compressed = fs.readFileSync(tgzPath);
|
|
131
|
+
const tarData = zlib.gunzipSync(compressed);
|
|
132
|
+
|
|
133
|
+
let offset = 0;
|
|
134
|
+
while (offset + 512 <= tarData.length) {
|
|
135
|
+
const header = tarData.subarray(offset, offset + 512);
|
|
136
|
+
|
|
137
|
+
// Check for end-of-archive (two zero blocks)
|
|
138
|
+
if (header.every(b => b === 0)) break;
|
|
139
|
+
|
|
140
|
+
// Parse tar header
|
|
141
|
+
const name = header.subarray(0, 100).toString('utf8').replace(/\0+$/, '');
|
|
142
|
+
const sizeOctal = header.subarray(124, 136).toString('utf8').replace(/\0+$/, '').trim();
|
|
143
|
+
const size = parseInt(sizeOctal, 8) || 0;
|
|
144
|
+
const typeFlag = String.fromCharCode(header[156]);
|
|
145
|
+
|
|
146
|
+
offset += 512; // move past header
|
|
147
|
+
|
|
148
|
+
if (name && (typeFlag === '0' || typeFlag === '\0') && size > 0) {
|
|
149
|
+
// Regular file — extract it
|
|
150
|
+
const filePath = path.join(destDir, name);
|
|
151
|
+
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
|
152
|
+
const fileData = tarData.subarray(offset, offset + size);
|
|
153
|
+
fs.writeFileSync(filePath, fileData);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Advance past data blocks (512-byte aligned)
|
|
157
|
+
offset += Math.ceil(size / 512) * 512;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
105
161
|
/**
|
|
106
|
-
*
|
|
162
|
+
* Download a package tarball via `npm pack` and extract with native Node.js.
|
|
163
|
+
* Returns the path to the extracted package directory, or null on failure.
|
|
164
|
+
* Uses a persistent cache to avoid re-downloading.
|
|
107
165
|
*/
|
|
108
|
-
|
|
166
|
+
function downloadAndExtract(pkg, options = {}) {
|
|
167
|
+
const cacheName = pkgToCacheName(pkg);
|
|
168
|
+
const pkgCacheDir = path.join(CACHE_DIR, cacheName);
|
|
169
|
+
|
|
170
|
+
// Check cache first (unless refreshing)
|
|
171
|
+
if (!options.refreshBenign && fs.existsSync(pkgCacheDir)) {
|
|
172
|
+
const extractedDir = path.join(pkgCacheDir, 'package');
|
|
173
|
+
if (fs.existsSync(extractedDir)) {
|
|
174
|
+
return extractedDir;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Download via npm pack (cwd approach avoids Windows path issues)
|
|
179
|
+
fs.mkdirSync(pkgCacheDir, { recursive: true });
|
|
180
|
+
|
|
181
|
+
let tgzFilename;
|
|
182
|
+
try {
|
|
183
|
+
const output = execSync(`npm pack ${pkg}`, {
|
|
184
|
+
cwd: pkgCacheDir,
|
|
185
|
+
encoding: 'utf8',
|
|
186
|
+
timeout: PACK_TIMEOUT_MS,
|
|
187
|
+
stdio: ['pipe', 'pipe', 'pipe']
|
|
188
|
+
});
|
|
189
|
+
tgzFilename = output.trim().split('\n').pop().trim();
|
|
190
|
+
} catch (err) {
|
|
191
|
+
if (process.env.MUADDIB_DEBUG) {
|
|
192
|
+
console.error(`\n [DEBUG] npm pack ${pkg} failed: ${(err.stderr || err.message || '').slice(0, 200)}`);
|
|
193
|
+
}
|
|
194
|
+
fs.rmSync(pkgCacheDir, { recursive: true, force: true });
|
|
195
|
+
return null;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
const tgzPath = path.join(pkgCacheDir, tgzFilename);
|
|
199
|
+
if (!fs.existsSync(tgzPath)) {
|
|
200
|
+
fs.rmSync(pkgCacheDir, { recursive: true, force: true });
|
|
201
|
+
return null;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Extract tarball using native Node.js (no shell tar dependency)
|
|
205
|
+
try {
|
|
206
|
+
extractTgz(tgzPath, pkgCacheDir);
|
|
207
|
+
} catch (err) {
|
|
208
|
+
if (process.env.MUADDIB_DEBUG) {
|
|
209
|
+
console.error(`\n [DEBUG] extract ${pkg} failed: ${(err.message || '').slice(0, 200)}`);
|
|
210
|
+
}
|
|
211
|
+
fs.rmSync(pkgCacheDir, { recursive: true, force: true });
|
|
212
|
+
return null;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// Clean up tarball to save space
|
|
216
|
+
try { fs.unlinkSync(tgzPath); } catch { /* ignore */ }
|
|
217
|
+
|
|
218
|
+
const extractedDir = path.join(pkgCacheDir, 'package');
|
|
219
|
+
if (!fs.existsSync(extractedDir)) {
|
|
220
|
+
fs.rmSync(pkgCacheDir, { recursive: true, force: true });
|
|
221
|
+
return null;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
return extractedDir;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Evaluate benign packages by downloading real source code and scanning it.
|
|
229
|
+
*/
|
|
230
|
+
async function evaluateBenign(options = {}) {
|
|
109
231
|
const listFile = path.join(BENIGN_DIR, 'packages-npm.txt');
|
|
110
|
-
|
|
232
|
+
let packages = fs.readFileSync(listFile, 'utf8')
|
|
111
233
|
.split('\n')
|
|
112
234
|
.map(l => l.trim())
|
|
113
235
|
.filter(l => l && !l.startsWith('#'));
|
|
114
236
|
|
|
237
|
+
// Apply limit if specified
|
|
238
|
+
const limit = options.benignLimit || 0;
|
|
239
|
+
if (limit > 0) {
|
|
240
|
+
packages = packages.slice(0, limit);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
fs.mkdirSync(CACHE_DIR, { recursive: true });
|
|
244
|
+
|
|
115
245
|
const details = [];
|
|
116
246
|
let flagged = 0;
|
|
247
|
+
let skipped = 0;
|
|
248
|
+
const total = packages.length;
|
|
249
|
+
|
|
250
|
+
for (let i = 0; i < packages.length; i++) {
|
|
251
|
+
const pkg = packages[i];
|
|
252
|
+
const progress = `[${i + 1}/${total}]`;
|
|
117
253
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
// Create minimal project with this package as dependency
|
|
122
|
-
const pkgJson = { name: 'eval-project', version: '1.0.0', dependencies: { [pkg]: '*' } };
|
|
123
|
-
fs.writeFileSync(path.join(tmpDir, 'package.json'), JSON.stringify(pkgJson));
|
|
124
|
-
|
|
125
|
-
// Create fake node_modules entry so dependency scanner picks it up
|
|
126
|
-
const parts = pkg.split('/');
|
|
127
|
-
const nmDir = path.join(tmpDir, 'node_modules', ...parts);
|
|
128
|
-
fs.mkdirSync(nmDir, { recursive: true });
|
|
129
|
-
fs.writeFileSync(path.join(nmDir, 'package.json'), JSON.stringify({ name: pkg, version: '999.0.0' }));
|
|
130
|
-
|
|
131
|
-
const result = await silentScan(tmpDir);
|
|
132
|
-
const score = result.summary.riskScore;
|
|
133
|
-
const isFlagged = score > BENIGN_THRESHOLD;
|
|
134
|
-
if (isFlagged) flagged++;
|
|
135
|
-
details.push({ name: pkg, score, flagged: isFlagged });
|
|
136
|
-
} finally {
|
|
137
|
-
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
254
|
+
// Progress indicator (overwrite line)
|
|
255
|
+
if (!options.json && process.stdout.isTTY) {
|
|
256
|
+
process.stdout.write(`\r [2/3] Benign ${progress} ${pkg}${''.padEnd(40)}`);
|
|
138
257
|
}
|
|
258
|
+
|
|
259
|
+
const extractedDir = downloadAndExtract(pkg, options);
|
|
260
|
+
if (!extractedDir) {
|
|
261
|
+
details.push({ name: pkg, score: 0, flagged: false, skipped: true, error: 'download failed' });
|
|
262
|
+
skipped++;
|
|
263
|
+
continue;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
const result = await silentScan(extractedDir);
|
|
267
|
+
const score = result.summary.riskScore;
|
|
268
|
+
const isFlagged = score > BENIGN_THRESHOLD;
|
|
269
|
+
if (isFlagged) flagged++;
|
|
270
|
+
|
|
271
|
+
const entry = { name: pkg, score, flagged: isFlagged };
|
|
272
|
+
|
|
273
|
+
// Include threat details for flagged packages (for debugging FPs)
|
|
274
|
+
if (isFlagged && result.threats) {
|
|
275
|
+
entry.threats = result.threats.map(t => ({
|
|
276
|
+
type: t.type,
|
|
277
|
+
severity: t.severity,
|
|
278
|
+
message: t.message,
|
|
279
|
+
file: t.file
|
|
280
|
+
}));
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
details.push(entry);
|
|
139
284
|
}
|
|
140
285
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
286
|
+
// Clear progress line
|
|
287
|
+
if (!options.json && process.stdout.isTTY) {
|
|
288
|
+
process.stdout.write('\r' + ''.padEnd(80) + '\r');
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
const scanned = total - skipped;
|
|
292
|
+
const fpr = scanned > 0 ? flagged / scanned : 0;
|
|
293
|
+
return { flagged, total, scanned, skipped, fpr, details };
|
|
144
294
|
}
|
|
145
295
|
|
|
146
296
|
/**
|
|
@@ -186,6 +336,11 @@ function saveMetrics(report) {
|
|
|
186
336
|
|
|
187
337
|
/**
|
|
188
338
|
* Main evaluate function
|
|
339
|
+
*
|
|
340
|
+
* Options:
|
|
341
|
+
* json — JSON output mode
|
|
342
|
+
* benignLimit — Only test first N benign packages
|
|
343
|
+
* refreshBenign — Force re-download of all tarballs
|
|
189
344
|
*/
|
|
190
345
|
async function evaluate(options = {}) {
|
|
191
346
|
const version = require('../../package.json').version;
|
|
@@ -198,9 +353,9 @@ async function evaluate(options = {}) {
|
|
|
198
353
|
const groundTruth = await evaluateGroundTruth();
|
|
199
354
|
|
|
200
355
|
if (!jsonMode) {
|
|
201
|
-
console.log(` [2/3] Benign packages...`);
|
|
356
|
+
console.log(` [2/3] Benign packages (real source code)...`);
|
|
202
357
|
}
|
|
203
|
-
const benign = await evaluateBenign();
|
|
358
|
+
const benign = await evaluateBenign(options);
|
|
204
359
|
|
|
205
360
|
if (!jsonMode) {
|
|
206
361
|
console.log(` [3/3] Adversarial samples...`);
|
|
@@ -226,7 +381,7 @@ async function evaluate(options = {}) {
|
|
|
226
381
|
|
|
227
382
|
console.log('');
|
|
228
383
|
console.log(` Ground Truth (TPR): ${groundTruth.detected}/${groundTruth.total} ${tprPct}%`);
|
|
229
|
-
console.log(` Benign (FPR): ${benign.flagged}/${benign.
|
|
384
|
+
console.log(` Benign (FPR): ${benign.flagged}/${benign.scanned} ${fprPct}% (${benign.skipped} skipped)`);
|
|
230
385
|
console.log(` Adversarial (ADR): ${adversarial.detected}/${adversarial.total} ${adrPct}%`);
|
|
231
386
|
console.log('');
|
|
232
387
|
|
|
@@ -240,12 +395,17 @@ async function evaluate(options = {}) {
|
|
|
240
395
|
console.log('');
|
|
241
396
|
}
|
|
242
397
|
|
|
243
|
-
// Show false positives
|
|
398
|
+
// Show false positives with threat details
|
|
244
399
|
const fps = benign.details.filter(d => d.flagged);
|
|
245
400
|
if (fps.length > 0) {
|
|
246
401
|
console.log(' False positives:');
|
|
247
402
|
for (const fp of fps) {
|
|
248
403
|
console.log(` ${fp.name}: score ${fp.score}`);
|
|
404
|
+
if (fp.threats) {
|
|
405
|
+
for (const t of fp.threats) {
|
|
406
|
+
console.log(` [${t.severity}] ${t.type}: ${t.message}${t.file ? ' (' + t.file + ')' : ''}`);
|
|
407
|
+
}
|
|
408
|
+
}
|
|
249
409
|
}
|
|
250
410
|
console.log('');
|
|
251
411
|
}
|
package/src/index.js
CHANGED
|
@@ -64,6 +64,50 @@ const MAX_RISK_SCORE = 100;
|
|
|
64
64
|
|
|
65
65
|
const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB
|
|
66
66
|
|
|
67
|
+
// ============================================
|
|
68
|
+
// FP REDUCTION POST-PROCESSING
|
|
69
|
+
// ============================================
|
|
70
|
+
// Legitimate frameworks produce high volumes of certain threat types that
|
|
71
|
+
// malware never does. This function downgrades severity when the count
|
|
72
|
+
// exceeds thresholds only seen in legitimate codebases.
|
|
73
|
+
const FP_COUNT_THRESHOLDS = {
|
|
74
|
+
dynamic_require: { maxCount: 10, from: 'HIGH', to: 'LOW' },
|
|
75
|
+
dangerous_call_function: { maxCount: 5, from: 'MEDIUM', to: 'LOW' },
|
|
76
|
+
require_cache_poison: { maxCount: 3, from: 'CRITICAL', to: 'LOW' }
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
// Custom class prototypes that HTTP frameworks legitimately extend.
|
|
80
|
+
// Distinguished from dangerous core Node.js prototype hooks.
|
|
81
|
+
const FRAMEWORK_PROTOTYPES = ['Request', 'Response', 'App', 'Router'];
|
|
82
|
+
const FRAMEWORK_PROTO_RE = new RegExp(
|
|
83
|
+
'^(' + FRAMEWORK_PROTOTYPES.join('|') + ')\\.prototype\\.'
|
|
84
|
+
);
|
|
85
|
+
|
|
86
|
+
function applyFPReductions(threats) {
|
|
87
|
+
// Count occurrences of each threat type (package-level, across all files)
|
|
88
|
+
const typeCounts = {};
|
|
89
|
+
for (const t of threats) {
|
|
90
|
+
typeCounts[t.type] = (typeCounts[t.type] || 0) + 1;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
for (const t of threats) {
|
|
94
|
+
// Count-based downgrade: if a threat type appears too many times,
|
|
95
|
+
// it's a framework/plugin system, not malware
|
|
96
|
+
const rule = FP_COUNT_THRESHOLDS[t.type];
|
|
97
|
+
if (rule && typeCounts[t.type] > rule.maxCount && t.severity === rule.from) {
|
|
98
|
+
t.severity = rule.to;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Prototype hook: framework class prototypes → MEDIUM
|
|
102
|
+
// Core Node.js prototypes (http.IncomingMessage, net.Socket) stay CRITICAL
|
|
103
|
+
// Browser/native APIs (globalThis.fetch, XMLHttpRequest) stay HIGH
|
|
104
|
+
if (t.type === 'prototype_hook' && t.severity === 'HIGH' &&
|
|
105
|
+
FRAMEWORK_PROTO_RE.test(t.message)) {
|
|
106
|
+
t.severity = 'MEDIUM';
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
67
111
|
// Paranoid mode scanner
|
|
68
112
|
function scanParanoid(targetPath) {
|
|
69
113
|
const threats = [];
|
|
@@ -563,6 +607,10 @@ async function run(targetPath, options = {}) {
|
|
|
563
607
|
}
|
|
564
608
|
}
|
|
565
609
|
|
|
610
|
+
// FP reduction: legitimate frameworks produce high volumes of certain threat types.
|
|
611
|
+
// A malware package typically has 1-3 occurrences, not dozens.
|
|
612
|
+
applyFPReductions(deduped);
|
|
613
|
+
|
|
566
614
|
// Enrich each threat with rules
|
|
567
615
|
const enrichedThreats = deduped.map(t => {
|
|
568
616
|
const rule = getRule(t.type);
|
package/src/scanner/typosquat.js
CHANGED
|
@@ -72,7 +72,19 @@ const WHITELIST = new Set([
|
|
|
72
72
|
'eslint-config-prettier', 'eslint-plugin-prettier',
|
|
73
73
|
'eslint-scope', 'eslint-visitor-keys',
|
|
74
74
|
'esbuild-register',
|
|
75
|
-
'neo-async'
|
|
75
|
+
'neo-async',
|
|
76
|
+
|
|
77
|
+
// Packages with names close to other popular packages (not typosquats)
|
|
78
|
+
'chai', // resembles chalk (missing_char)
|
|
79
|
+
'pino', // resembles sinon (missing_char)
|
|
80
|
+
'ioredis', // resembles redis (extra prefix)
|
|
81
|
+
'bcryptjs', // resembles bcrypt (suffix)
|
|
82
|
+
'recast', // resembles react (extra_char)
|
|
83
|
+
'asyncdi', // resembles async (suffix)
|
|
84
|
+
'redux', // resembles redis (wrong_char)
|
|
85
|
+
'args', // resembles yargs (missing_char)
|
|
86
|
+
'oxlint', // resembles eslint (wrong_char)
|
|
87
|
+
'vasync' // resembles async (extra prefix)
|
|
76
88
|
]);
|
|
77
89
|
|
|
78
90
|
|
package/tmp-test-pack.js
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
const { execSync } = require('child_process');
|
|
2
|
+
const fs = require('fs');
|
|
3
|
+
const path = require('path');
|
|
4
|
+
const zlib = require('zlib');
|
|
5
|
+
|
|
6
|
+
const testDir = path.join(__dirname, '.muaddib-cache', 'benign-tarballs', '_test_express');
|
|
7
|
+
fs.mkdirSync(testDir, { recursive: true });
|
|
8
|
+
|
|
9
|
+
// Step 1: npm pack with cwd
|
|
10
|
+
console.log('Step 1: npm pack express (cwd)...');
|
|
11
|
+
try {
|
|
12
|
+
const out = execSync('npm pack express', { cwd: testDir, encoding: 'utf8', timeout: 30000 });
|
|
13
|
+
const tgzFile = out.trim().split('\n').pop().trim();
|
|
14
|
+
console.log(' OK:', tgzFile);
|
|
15
|
+
|
|
16
|
+
// Step 2: extract with native Node.js
|
|
17
|
+
const tgzPath = path.join(testDir, tgzFile);
|
|
18
|
+
console.log('Step 2: native extraction...');
|
|
19
|
+
extractTgz(tgzPath, testDir);
|
|
20
|
+
|
|
21
|
+
const pkgDir = path.join(testDir, 'package');
|
|
22
|
+
const files = fs.readdirSync(pkgDir);
|
|
23
|
+
console.log(' Extracted files:', files.join(', '));
|
|
24
|
+
console.log(' SUCCESS');
|
|
25
|
+
} catch (e) {
|
|
26
|
+
console.log(' FAIL:', e.message.slice(0, 300));
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// Cleanup
|
|
30
|
+
fs.rmSync(testDir, { recursive: true, force: true });
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Extract a .tgz file using Node.js built-in zlib + minimal tar parser.
|
|
34
|
+
* Only extracts regular files (type '0' or NUL).
|
|
35
|
+
*/
|
|
36
|
+
function extractTgz(tgzPath, destDir) {
|
|
37
|
+
const compressed = fs.readFileSync(tgzPath);
|
|
38
|
+
const tarData = zlib.gunzipSync(compressed);
|
|
39
|
+
|
|
40
|
+
let offset = 0;
|
|
41
|
+
while (offset + 512 <= tarData.length) {
|
|
42
|
+
const header = tarData.subarray(offset, offset + 512);
|
|
43
|
+
|
|
44
|
+
// Check for end-of-archive (two zero blocks)
|
|
45
|
+
if (header.every(b => b === 0)) break;
|
|
46
|
+
|
|
47
|
+
// Parse tar header
|
|
48
|
+
const name = header.subarray(0, 100).toString('utf8').replace(/\0+$/, '');
|
|
49
|
+
const sizeOctal = header.subarray(124, 136).toString('utf8').replace(/\0+$/, '').trim();
|
|
50
|
+
const size = parseInt(sizeOctal, 8) || 0;
|
|
51
|
+
const typeFlag = String.fromCharCode(header[156]);
|
|
52
|
+
|
|
53
|
+
offset += 512; // move past header
|
|
54
|
+
|
|
55
|
+
if (name && (typeFlag === '0' || typeFlag === '\0') && size > 0) {
|
|
56
|
+
// Regular file — extract it
|
|
57
|
+
const filePath = path.join(destDir, name);
|
|
58
|
+
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
|
59
|
+
const fileData = tarData.subarray(offset, offset + size);
|
|
60
|
+
fs.writeFileSync(filePath, fileData);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Advance past data blocks (512-byte aligned)
|
|
64
|
+
offset += Math.ceil(size / 512) * 512;
|
|
65
|
+
}
|
|
66
|
+
}
|