muaddib-scanner 2.10.64 → 2.10.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/muaddib.js +30 -0
- package/package.json +1 -1
- package/src/ml/classifier.js +12 -5
- package/src/monitor/auto-labeler.js +344 -0
- package/src/monitor/daemon.js +13 -0
- package/src/monitor/queue.js +36 -10
package/bin/muaddib.js
CHANGED
|
@@ -687,6 +687,36 @@ if (command === 'version' || command === '--version' || command === '-v') {
|
|
|
687
687
|
console.log('Usage: muaddib report --now | --status');
|
|
688
688
|
process.exit(1);
|
|
689
689
|
}
|
|
690
|
+
} else if (command === 'relabel') {
|
|
691
|
+
if (wantHelp) {
|
|
692
|
+
console.log('Usage: muaddib relabel [--input <path>] [--output <path>] [--dry-run]');
|
|
693
|
+
console.log('');
|
|
694
|
+
console.log('Auto-relabel ML training data by checking registry takedown status.');
|
|
695
|
+
console.log('Verifies each package against npm/PyPI registries:');
|
|
696
|
+
console.log(' - npm 0.0.1-security → confirmed_malicious');
|
|
697
|
+
console.log(' - HTTP 404 + score >= 50 → confirmed_malicious');
|
|
698
|
+
console.log(' - Alive > 30 days + score < 20 → confirmed_benign');
|
|
699
|
+
console.log('');
|
|
700
|
+
console.log('Options:');
|
|
701
|
+
console.log(' --input <path> Input JSONL file (default: data/ml-training.jsonl)');
|
|
702
|
+
console.log(' --output <path> Output JSONL file (default: data/ml-training-relabeled.jsonl)');
|
|
703
|
+
console.log(' --dry-run Log changes without writing');
|
|
704
|
+
process.exit(0);
|
|
705
|
+
}
|
|
706
|
+
const { relabelDataset } = require('../src/monitor/auto-labeler.js');
|
|
707
|
+
let inputPath, outputPath;
|
|
708
|
+
for (let i = 0; i < options.length; i++) {
|
|
709
|
+
if (options[i] === '--input' && options[i + 1]) { inputPath = options[++i]; }
|
|
710
|
+
else if (options[i] === '--output' && options[i + 1]) { outputPath = options[++i]; }
|
|
711
|
+
}
|
|
712
|
+
const dryRun = options.includes('--dry-run');
|
|
713
|
+
relabelDataset({ input: inputPath, output: outputPath, dryRun }).then(summary => {
|
|
714
|
+
console.log(JSON.stringify(summary, null, 2));
|
|
715
|
+
process.exit(0);
|
|
716
|
+
}).catch(err => {
|
|
717
|
+
console.error('[ERROR]', err.message);
|
|
718
|
+
process.exit(1);
|
|
719
|
+
});
|
|
690
720
|
} else if (command === 'help') {
|
|
691
721
|
// muaddib help <command> — show per-command help
|
|
692
722
|
const helpCmd = options.filter(o => !o.startsWith('-'))[0];
|
package/package.json
CHANGED
package/src/ml/classifier.js
CHANGED
|
@@ -326,21 +326,28 @@ function classifyPackage(result, meta) {
|
|
|
326
326
|
return { prediction: 'bypass', probability: 1, reason: 'high_confidence_threat' };
|
|
327
327
|
}
|
|
328
328
|
|
|
329
|
-
// Guard rail 2b: bundler model
|
|
329
|
+
// Guard rail 2b: bundler model — LOG-ONLY mode
|
|
330
|
+
// DISABLED (2026-04-08): Model semi-collapsed — gives p≈0.37 for both bundler FPs
|
|
331
|
+
// and real malware (identical output despite 11/19 features diverging). Cannot
|
|
332
|
+
// discriminate. Safe (nothing filtered at threshold 0.1) but useless.
|
|
333
|
+
// Disabled until retrained alongside ML1 on corrected JSONL data.
|
|
330
334
|
if (isBundlerModelAvailable()) {
|
|
331
335
|
const bundlerVec = buildBundlerFeatureVector(result, meta);
|
|
332
336
|
const bundlerResult = predictBundler(bundlerVec);
|
|
333
|
-
|
|
337
|
+
// Log-only: record prediction for retraining validation
|
|
338
|
+
const roundedP = Math.round(bundlerResult.probability * 1000) / 1000;
|
|
339
|
+
// When retrained and validated, remove the 'false &&' guard below.
|
|
340
|
+
if (false && bundlerResult.prediction === 'clean') {
|
|
334
341
|
return {
|
|
335
342
|
prediction: 'fp_bundler',
|
|
336
|
-
probability:
|
|
343
|
+
probability: roundedP,
|
|
337
344
|
reason: 'ml_bundler_clean'
|
|
338
345
|
};
|
|
339
346
|
}
|
|
340
347
|
return {
|
|
341
348
|
prediction: 'bypass',
|
|
342
|
-
probability:
|
|
343
|
-
reason: 'ml_bundler_malicious'
|
|
349
|
+
probability: roundedP,
|
|
350
|
+
reason: bundlerResult.prediction === 'clean' ? 'ml_bundler_clean_disabled' : 'ml_bundler_malicious'
|
|
344
351
|
};
|
|
345
352
|
}
|
|
346
353
|
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Auto-labeler — registry takedown-based ML training label correction.
|
|
5
|
+
*
|
|
6
|
+
* Verifies packages in the JSONL training dataset against npm/PyPI registries:
|
|
7
|
+
* - npm `0.0.1-security` replacement → confirmed_malicious (npm Security takedown)
|
|
8
|
+
* - HTTP 404 + high score → confirmed_malicious (removed, high conviction)
|
|
9
|
+
* - HTTP 404 + low score → removed_unlabeled (removed, unknown intent)
|
|
10
|
+
* - Alive > 30 days + low score → confirmed_benign (survival heuristic)
|
|
11
|
+
* - Alive > 30 days + moderate score → likely_benign
|
|
12
|
+
*
|
|
13
|
+
* Never modifies the input JSONL — writes a new file.
|
|
14
|
+
* Reuses the shared HTTP semaphore to avoid starving monitor scans.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
const fs = require('fs');
|
|
18
|
+
const path = require('path');
|
|
19
|
+
const https = require('https');
|
|
20
|
+
const { acquireRegistrySlot, releaseRegistrySlot } = require('../shared/http-limiter.js');
|
|
21
|
+
const { atomicWriteFileSync } = require('./state.js');
|
|
22
|
+
|
|
23
|
+
const DEFAULT_INPUT = path.join(__dirname, '..', '..', 'data', 'ml-training.jsonl');
|
|
24
|
+
const DEFAULT_OUTPUT = path.join(__dirname, '..', '..', 'data', 'ml-training-relabeled.jsonl');
|
|
25
|
+
const DEFAULT_DELAY_MS = 200; // 5 req/s max — gentle on registries
|
|
26
|
+
const SURVIVAL_DAYS = 30;
|
|
27
|
+
|
|
28
|
+
// Labels eligible for auto-relabeling
|
|
29
|
+
const RELABELABLE = new Set(['suspect', 'ml_clean', 'unconfirmed', 'clean']);
|
|
30
|
+
|
|
31
|
+
// --- HTTP helper (minimal, avoids circular deps with ingestion.js) ---
|
|
32
|
+
|
|
33
|
+
function httpsGetJson(url, timeoutMs = 15000) {
|
|
34
|
+
return new Promise((resolve, reject) => {
|
|
35
|
+
const req = https.get(url, { timeout: timeoutMs }, (res) => {
|
|
36
|
+
if (res.statusCode === 404) {
|
|
37
|
+
res.resume();
|
|
38
|
+
return resolve({ _httpStatus: 404 });
|
|
39
|
+
}
|
|
40
|
+
if (res.statusCode < 200 || res.statusCode >= 300) {
|
|
41
|
+
res.resume();
|
|
42
|
+
return reject(new Error(`HTTP ${res.statusCode} for ${url}`));
|
|
43
|
+
}
|
|
44
|
+
const chunks = [];
|
|
45
|
+
res.on('data', (chunk) => chunks.push(chunk));
|
|
46
|
+
res.on('end', () => {
|
|
47
|
+
try {
|
|
48
|
+
const body = Buffer.concat(chunks).toString('utf8');
|
|
49
|
+
resolve(JSON.parse(body));
|
|
50
|
+
} catch (err) {
|
|
51
|
+
reject(new Error(`JSON parse error for ${url}: ${err.message}`));
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
res.on('error', reject);
|
|
55
|
+
});
|
|
56
|
+
req.on('error', reject);
|
|
57
|
+
req.on('timeout', () => {
|
|
58
|
+
req.destroy();
|
|
59
|
+
reject(new Error(`Timeout for ${url}`));
|
|
60
|
+
});
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function sleep(ms) {
|
|
65
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// --- Registry status checks ---
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Check npm registry status for a package.
|
|
72
|
+
* @param {string} name - package name
|
|
73
|
+
* @returns {Promise<{status: string, latestVersion?: string, detail?: string}>}
|
|
74
|
+
*/
|
|
75
|
+
async function checkNpmStatus(name) {
|
|
76
|
+
await acquireRegistrySlot();
|
|
77
|
+
try {
|
|
78
|
+
const data = await httpsGetJson(`https://registry.npmjs.org/${encodeURIComponent(name)}`);
|
|
79
|
+
|
|
80
|
+
if (data._httpStatus === 404) {
|
|
81
|
+
return { status: 'removed' };
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const latest = data['dist-tags'] && data['dist-tags'].latest;
|
|
85
|
+
if (latest === '0.0.1-security') {
|
|
86
|
+
return { status: 'security_takedown', latestVersion: latest };
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return { status: 'alive', latestVersion: latest || 'unknown' };
|
|
90
|
+
} catch (err) {
|
|
91
|
+
return { status: 'error', detail: err.message };
|
|
92
|
+
} finally {
|
|
93
|
+
releaseRegistrySlot();
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Check PyPI registry status for a package.
|
|
99
|
+
* @param {string} name - package name
|
|
100
|
+
* @returns {Promise<{status: string, detail?: string}>}
|
|
101
|
+
*/
|
|
102
|
+
async function checkPyPIStatus(name) {
|
|
103
|
+
try {
|
|
104
|
+
const data = await httpsGetJson(`https://pypi.org/pypi/${encodeURIComponent(name)}/json`);
|
|
105
|
+
|
|
106
|
+
if (data._httpStatus === 404) {
|
|
107
|
+
return { status: 'removed' };
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return { status: 'alive' };
|
|
111
|
+
} catch (err) {
|
|
112
|
+
return { status: 'error', detail: err.message };
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// --- Label computation ---
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Compute the new label for a record based on registry status.
|
|
120
|
+
*
|
|
121
|
+
* Guards:
|
|
122
|
+
* - security_takedown → always confirmed_malicious
|
|
123
|
+
* - removed + score >= 50 → confirmed_malicious (high conviction)
|
|
124
|
+
* - removed + score < 50 → removed_unlabeled (don't train on uncertain data)
|
|
125
|
+
* - alive + age >= 30d + score < 20 → confirmed_benign
|
|
126
|
+
* - alive + age >= 30d + score 20-34 → likely_benign
|
|
127
|
+
* - alive + age >= 30d + score >= 35 → no change (sleeper risk)
|
|
128
|
+
* - alive + age < 30d → no change (too early)
|
|
129
|
+
*
|
|
130
|
+
* @param {Object} record - JSONL training record (must have: score, timestamp, label)
|
|
131
|
+
* @param {{status: string}} registryStatus - from checkNpmStatus/checkPyPIStatus
|
|
132
|
+
* @returns {{label: string, source: string} | null} new label or null if no change
|
|
133
|
+
*/
|
|
134
|
+
function computeNewLabel(record, registryStatus) {
|
|
135
|
+
const { status } = registryStatus;
|
|
136
|
+
const score = record.score || 0;
|
|
137
|
+
|
|
138
|
+
// Already confirmed — don't re-label
|
|
139
|
+
if (record.label === 'confirmed_malicious' || record.label === 'confirmed_benign' ||
|
|
140
|
+
record.label === 'fp' || record.label === 'confirmed') {
|
|
141
|
+
return null;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// --- Takedown signals ---
|
|
145
|
+
if (status === 'security_takedown') {
|
|
146
|
+
return { label: 'confirmed_malicious', source: 'npm_security_takedown' };
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
if (status === 'removed') {
|
|
150
|
+
if (score >= 50) {
|
|
151
|
+
return { label: 'confirmed_malicious', source: 'registry_removed_high_score' };
|
|
152
|
+
}
|
|
153
|
+
return { label: 'removed_unlabeled', source: 'registry_removed_low_score' };
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// --- Survival signals ---
|
|
157
|
+
if (status === 'alive') {
|
|
158
|
+
const recordAge = record.timestamp
|
|
159
|
+
? (Date.now() - new Date(record.timestamp).getTime()) / (1000 * 60 * 60 * 24)
|
|
160
|
+
: 0;
|
|
161
|
+
|
|
162
|
+
if (recordAge >= SURVIVAL_DAYS) {
|
|
163
|
+
if (score < 20) {
|
|
164
|
+
return { label: 'confirmed_benign', source: 'survival_30d' };
|
|
165
|
+
}
|
|
166
|
+
if (score >= 20 && score < 35) {
|
|
167
|
+
return { label: 'likely_benign', source: 'survival_30d_moderate' };
|
|
168
|
+
}
|
|
169
|
+
// score >= 35: no change (sleeper risk)
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return null;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// --- Dataset relabeling ---
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Read JSONL, check each unique package against registries, write relabeled output.
|
|
180
|
+
*
|
|
181
|
+
* @param {Object} [options]
|
|
182
|
+
* @param {string} [options.input] - input JSONL path
|
|
183
|
+
* @param {string} [options.output] - output JSONL path
|
|
184
|
+
* @param {boolean} [options.dryRun] - log changes without writing
|
|
185
|
+
* @param {number} [options.delayMs] - ms between registry requests
|
|
186
|
+
* @returns {Promise<Object>} summary stats
|
|
187
|
+
*/
|
|
188
|
+
async function relabelDataset(options = {}) {
|
|
189
|
+
const inputPath = options.input || DEFAULT_INPUT;
|
|
190
|
+
const outputPath = options.output || DEFAULT_OUTPUT;
|
|
191
|
+
const dryRun = options.dryRun || false;
|
|
192
|
+
const delayMs = options.delayMs != null ? options.delayMs : DEFAULT_DELAY_MS;
|
|
193
|
+
|
|
194
|
+
// 1. Read records
|
|
195
|
+
if (!fs.existsSync(inputPath)) {
|
|
196
|
+
throw new Error(`Input file not found: ${inputPath}`);
|
|
197
|
+
}
|
|
198
|
+
const content = fs.readFileSync(inputPath, 'utf8');
|
|
199
|
+
const lines = content.split('\n');
|
|
200
|
+
const records = [];
|
|
201
|
+
for (let i = 0; i < lines.length; i++) {
|
|
202
|
+
const line = lines[i].trim();
|
|
203
|
+
if (!line) continue;
|
|
204
|
+
try {
|
|
205
|
+
records.push({ idx: i, data: JSON.parse(line), raw: lines[i] });
|
|
206
|
+
} catch {
|
|
207
|
+
records.push({ idx: i, data: null, raw: lines[i] });
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// 2. Extract unique packages eligible for relabeling
|
|
212
|
+
const packageMap = new Map(); // key → { name, ecosystem, score, timestamp, indices[] }
|
|
213
|
+
for (const rec of records) {
|
|
214
|
+
if (!rec.data) continue;
|
|
215
|
+
if (!RELABELABLE.has(rec.data.label)) continue;
|
|
216
|
+
const key = `${rec.data.ecosystem || 'npm'}/${rec.data.name}`;
|
|
217
|
+
if (!packageMap.has(key)) {
|
|
218
|
+
packageMap.set(key, {
|
|
219
|
+
name: rec.data.name,
|
|
220
|
+
ecosystem: rec.data.ecosystem || 'npm',
|
|
221
|
+
score: rec.data.score || 0,
|
|
222
|
+
timestamp: rec.data.timestamp,
|
|
223
|
+
indices: []
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
packageMap.get(key).indices.push(rec.idx);
|
|
227
|
+
// Use highest score seen for this package
|
|
228
|
+
if ((rec.data.score || 0) > packageMap.get(key).score) {
|
|
229
|
+
packageMap.get(key).score = rec.data.score;
|
|
230
|
+
}
|
|
231
|
+
// Use earliest timestamp
|
|
232
|
+
if (rec.data.timestamp && (!packageMap.get(key).timestamp || rec.data.timestamp < packageMap.get(key).timestamp)) {
|
|
233
|
+
packageMap.get(key).timestamp = rec.data.timestamp;
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
console.log(`[RELABEL] ${records.length} records, ${packageMap.size} unique packages to check`);
|
|
238
|
+
|
|
239
|
+
// 3. Check each package against registry
|
|
240
|
+
const summary = {
|
|
241
|
+
checked: 0,
|
|
242
|
+
relabeled_malicious: 0,
|
|
243
|
+
relabeled_benign: 0,
|
|
244
|
+
relabeled_likely_benign: 0,
|
|
245
|
+
removed_unlabeled: 0,
|
|
246
|
+
unchanged: 0,
|
|
247
|
+
errors: 0,
|
|
248
|
+
records_updated: 0
|
|
249
|
+
};
|
|
250
|
+
|
|
251
|
+
const labelChanges = new Map(); // packageKey → { label, source }
|
|
252
|
+
|
|
253
|
+
for (const [key, pkg] of packageMap) {
|
|
254
|
+
let registryStatus;
|
|
255
|
+
try {
|
|
256
|
+
if (pkg.ecosystem === 'npm') {
|
|
257
|
+
registryStatus = await checkNpmStatus(pkg.name);
|
|
258
|
+
} else if (pkg.ecosystem === 'pypi') {
|
|
259
|
+
registryStatus = await checkPyPIStatus(pkg.name);
|
|
260
|
+
} else {
|
|
261
|
+
summary.unchanged++;
|
|
262
|
+
summary.checked++;
|
|
263
|
+
continue;
|
|
264
|
+
}
|
|
265
|
+
} catch (err) {
|
|
266
|
+
summary.errors++;
|
|
267
|
+
summary.checked++;
|
|
268
|
+
continue;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
if (registryStatus.status === 'error') {
|
|
272
|
+
summary.errors++;
|
|
273
|
+
summary.checked++;
|
|
274
|
+
if (delayMs > 0) await sleep(delayMs);
|
|
275
|
+
continue;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
const newLabel = computeNewLabel(pkg, registryStatus);
|
|
279
|
+
summary.checked++;
|
|
280
|
+
|
|
281
|
+
if (newLabel) {
|
|
282
|
+
labelChanges.set(key, newLabel);
|
|
283
|
+
if (newLabel.label === 'confirmed_malicious') summary.relabeled_malicious++;
|
|
284
|
+
else if (newLabel.label === 'confirmed_benign') summary.relabeled_benign++;
|
|
285
|
+
else if (newLabel.label === 'likely_benign') summary.relabeled_likely_benign++;
|
|
286
|
+
else if (newLabel.label === 'removed_unlabeled') summary.removed_unlabeled++;
|
|
287
|
+
|
|
288
|
+
if (dryRun) {
|
|
289
|
+
console.log(`[RELABEL] DRY-RUN: ${key} → ${newLabel.label} (${newLabel.source}, score=${pkg.score}, status=${registryStatus.status})`);
|
|
290
|
+
}
|
|
291
|
+
} else {
|
|
292
|
+
summary.unchanged++;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
if (delayMs > 0) await sleep(delayMs);
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// 4. Apply label changes to records
|
|
299
|
+
const outputLines = [];
|
|
300
|
+
for (const rec of records) {
|
|
301
|
+
if (!rec.data) {
|
|
302
|
+
outputLines.push(rec.raw);
|
|
303
|
+
continue;
|
|
304
|
+
}
|
|
305
|
+
const key = `${rec.data.ecosystem || 'npm'}/${rec.data.name}`;
|
|
306
|
+
const change = labelChanges.get(key);
|
|
307
|
+
if (change && RELABELABLE.has(rec.data.label)) {
|
|
308
|
+
rec.data.label = change.label;
|
|
309
|
+
rec.data.relabel_source = change.source;
|
|
310
|
+
rec.data.relabel_timestamp = new Date().toISOString();
|
|
311
|
+
outputLines.push(JSON.stringify(rec.data));
|
|
312
|
+
summary.records_updated++;
|
|
313
|
+
} else {
|
|
314
|
+
outputLines.push(rec.raw);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// 5. Write output
|
|
319
|
+
if (!dryRun) {
|
|
320
|
+
const dir = path.dirname(outputPath);
|
|
321
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
322
|
+
atomicWriteFileSync(outputPath, outputLines.join('\n'));
|
|
323
|
+
console.log(`[RELABEL] Written ${outputLines.length} records to ${path.basename(outputPath)} (${summary.records_updated} updated)`);
|
|
324
|
+
} else {
|
|
325
|
+
console.log(`[RELABEL] DRY-RUN complete: ${summary.records_updated} records would be updated`);
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
console.log(`[RELABEL] Summary: ${summary.relabeled_malicious} malicious, ${summary.relabeled_benign} benign, ${summary.relabeled_likely_benign} likely_benign, ${summary.removed_unlabeled} removed_unlabeled, ${summary.unchanged} unchanged, ${summary.errors} errors`);
|
|
329
|
+
|
|
330
|
+
return summary;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
module.exports = {
|
|
334
|
+
checkNpmStatus,
|
|
335
|
+
checkPyPIStatus,
|
|
336
|
+
computeNewLabel,
|
|
337
|
+
relabelDataset,
|
|
338
|
+
// Constants (for testing)
|
|
339
|
+
RELABELABLE,
|
|
340
|
+
SURVIVAL_DAYS,
|
|
341
|
+
DEFAULT_INPUT,
|
|
342
|
+
DEFAULT_OUTPUT,
|
|
343
|
+
DEFAULT_DELAY_MS
|
|
344
|
+
};
|
package/src/monitor/daemon.js
CHANGED
|
@@ -558,6 +558,19 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
|
|
|
558
558
|
// Daily webhook report at 08:00 Paris time
|
|
559
559
|
if (isDailyReportDue(stats)) {
|
|
560
560
|
await sendDailyReport(stats, dailyAlerts, recentlyScanned, downloadsCache);
|
|
561
|
+
// Auto-relabel JSONL training data after daily report (once per day).
|
|
562
|
+
// Checks registry takedown status for unconfirmed packages.
|
|
563
|
+
try {
|
|
564
|
+
const { relabelDataset } = require('./auto-labeler.js');
|
|
565
|
+
const summary = await relabelDataset({});
|
|
566
|
+
const totalRelabeled = summary.relabeled_malicious + summary.relabeled_benign + summary.relabeled_likely_benign;
|
|
567
|
+
if (totalRelabeled > 0) {
|
|
568
|
+
console.log(`[MONITOR] Auto-relabel: ${summary.relabeled_malicious} malicious, ${summary.relabeled_benign} benign, ${summary.relabeled_likely_benign} likely_benign (${summary.checked} checked)`);
|
|
569
|
+
}
|
|
570
|
+
} catch (err) {
|
|
571
|
+
// Non-fatal: relabel failure must never crash the monitor
|
|
572
|
+
console.error(`[MONITOR] Auto-relabel failed: ${err.message}`);
|
|
573
|
+
}
|
|
561
574
|
}
|
|
562
575
|
|
|
563
576
|
// Short pause before re-checking queue — yields event loop for poll interval
|
package/src/monitor/queue.js
CHANGED
|
@@ -413,11 +413,15 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
413
413
|
// First-publish detection: used for sandbox priority below
|
|
414
414
|
const isFirstPublish = cacheTrigger && cacheTrigger.reason === 'first_publish';
|
|
415
415
|
|
|
416
|
-
//
|
|
417
|
-
//
|
|
418
|
-
//
|
|
416
|
+
// Fetch npm registry metadata for ALL npm packages (not just those with findings).
|
|
417
|
+
// Needed for: (1) isFirstPublishHighRisk decision, (2) ML classifier features,
|
|
418
|
+
// (3) JSONL training records — clean packages MUST have metadata to prevent
|
|
419
|
+
// data leakage (model learning "metadata=0 → clean" instead of behavioral signals).
|
|
420
|
+
// Cost: near-zero for npm packages because temporal checks (line ~1014) already
|
|
421
|
+
// pre-fetch registry metadata into temporal-analysis._metadataCache, and
|
|
422
|
+
// getPackageMetadata() reads this cache first (npm-registry.js:87-95).
|
|
419
423
|
let npmRegistryMeta = null;
|
|
420
|
-
if (
|
|
424
|
+
if (ecosystem === 'npm') {
|
|
421
425
|
try {
|
|
422
426
|
const { getPackageMetadata } = require('../scanner/npm-registry.js');
|
|
423
427
|
npmRegistryMeta = await getPackageMetadata(name);
|
|
@@ -589,19 +593,43 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
589
593
|
console.log(`[MONITOR] FINDINGS: ${name}@${version} → ${formatFindings(result)}`);
|
|
590
594
|
|
|
591
595
|
// ML Phase 2: classifier filter for T1 zone (score 20-34)
|
|
592
|
-
// Reduces FP webhook noise by filtering clean packages before sandbox/webhook.
|
|
593
596
|
// Guard rails in classifyPackage() ensure HC types and high-score packages are never suppressed.
|
|
594
597
|
// Hoisted so trySendWebhook can use ML result to prevent suppression (p >= 0.90).
|
|
595
|
-
//
|
|
598
|
+
//
|
|
599
|
+
// DISABLED (2026-04-08): Model has collapsed — predicts p≈0.002 for ALL inputs (always "clean"),
|
|
600
|
+
// including clearly malicious patterns (lifecycle+exec+staged_payload). This suppresses real
|
|
601
|
+
// threats as ml_clean (false negatives). Disabled until model is retrained on corrected JSONL
|
|
602
|
+
// data with balanced labels. The classifier still runs in LOG-ONLY mode to collect data for
|
|
603
|
+
// retraining validation, but its prediction is never used for filtering.
|
|
604
|
+
//
|
|
605
|
+
// Guards added: ecosystem === 'npm' (PyPI has no npm registry metadata),
|
|
606
|
+
// npmRegistryMeta fallback fetch (ensure metadata is never null for ML features).
|
|
596
607
|
let mlResult = null;
|
|
597
608
|
const riskScore = result.summary.riskScore || 0;
|
|
598
|
-
if ((tier === '1a' || tier === '1b') && riskScore >= 20 && riskScore < 35) {
|
|
609
|
+
if ((tier === '1a' || tier === '1b') && riskScore >= 20 && riskScore < 35 && ecosystem === 'npm') {
|
|
599
610
|
try {
|
|
600
611
|
const { classifyPackage, isModelAvailable } = require('../ml/classifier.js');
|
|
601
612
|
if (isModelAvailable()) {
|
|
613
|
+
// Defensive: ensure npmRegistryMeta is fetched (should already be from line ~420,
|
|
614
|
+
// but network failures can silently leave it null)
|
|
615
|
+
if (!npmRegistryMeta) {
|
|
616
|
+
try {
|
|
617
|
+
const { getPackageMetadata } = require('../scanner/npm-registry.js');
|
|
618
|
+
npmRegistryMeta = await getPackageMetadata(name);
|
|
619
|
+
if (!npmRegistryMeta) {
|
|
620
|
+
console.warn(`[ML] Registry metadata unavailable for ${name} — ML features will be zero-filled`);
|
|
621
|
+
}
|
|
622
|
+
} catch (fetchErr) {
|
|
623
|
+
console.warn(`[ML] Registry metadata fetch failed for ${name}: ${fetchErr.message}`);
|
|
624
|
+
}
|
|
625
|
+
}
|
|
602
626
|
const enrichedMeta = { npmRegistryMeta, fileCountTotal, hasTests, unpackedSize: meta.unpackedSize, registryMeta: meta };
|
|
603
627
|
mlResult = classifyPackage(result, enrichedMeta);
|
|
604
|
-
|
|
628
|
+
// LOG-ONLY: record ML prediction for retraining data but do NOT filter.
|
|
629
|
+
// When model is retrained and validated, remove the 'true ||' guard below.
|
|
630
|
+
console.log(`[MONITOR] ML LOG-ONLY: ${name}@${version} (prediction=${mlResult.prediction}, p=${mlResult.probability}, score=${riskScore})`);
|
|
631
|
+
if (false && mlResult.prediction === 'clean') {
|
|
632
|
+
// DISABLED: model collapsed (p≈0.002 for all inputs). Re-enable after retrain.
|
|
605
633
|
console.log(`[MONITOR] ML CLEAN: ${name}@${version} (p=${mlResult.probability}, score=${riskScore})`);
|
|
606
634
|
stats.mlFiltered++;
|
|
607
635
|
stats.scanned++;
|
|
@@ -612,8 +640,6 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
612
640
|
recordTrainingSample(result, { name, version, ecosystem, label: 'ml_clean', tier, registryMeta: meta, unpackedSize: meta.unpackedSize, npmRegistryMeta, fileCountTotal, hasTests });
|
|
613
641
|
return { sandboxResult: null, mlFiltered: true, tier };
|
|
614
642
|
}
|
|
615
|
-
// Not clean — proceed normally
|
|
616
|
-
console.log(`[MONITOR] ML SUSPECT: ${name}@${version} (p=${mlResult.probability}, reason=${mlResult.reason})`);
|
|
617
643
|
}
|
|
618
644
|
} catch (err) {
|
|
619
645
|
// Non-fatal: ML failure must never block the scan pipeline
|