muaddib-scanner 2.11.2 → 2.11.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/monitor/daemon.js +6 -0
- package/src/monitor/tarball-archive.js +102 -1
package/package.json
CHANGED
package/src/monitor/daemon.js
CHANGED
|
@@ -13,6 +13,7 @@ const { processQueue, ensureWorkers, drainWorkers, getTargetConcurrency, setTarg
|
|
|
13
13
|
const { computeTarget, ADJUST_INTERVAL_MS, BASE_CONCURRENCY, resetDeltas } = require('./adaptive-concurrency.js');
|
|
14
14
|
const { startHealthcheck } = require('./healthcheck.js');
|
|
15
15
|
const { startDeferredWorker, stopDeferredWorker, persistDeferredQueue, restoreDeferredQueue, clearDeferredQueue } = require('./deferred-sandbox.js');
|
|
16
|
+
const { cleanupOldArchives, getRetentionDays } = require('./tarball-archive.js');
|
|
16
17
|
const { clearMetadataCache } = require('../scanner/temporal-analysis.js');
|
|
17
18
|
// Caches not previously cleared by handleMemoryPressure (OOM fix). These live
|
|
18
19
|
// in the main thread and are populated by temporal-ast-diff and the typosquat
|
|
@@ -498,6 +499,11 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
|
|
|
498
499
|
cleanupRunscOrphans();
|
|
499
500
|
// Layer 3: Purge expired cached tarballs on startup
|
|
500
501
|
purgeTarballCache();
|
|
502
|
+
// Purge archived tarballs older than MUADDIB_ARCHIVE_RETENTION_DAYS (default 30).
|
|
503
|
+
// Runs in-process at startup so no external cron is required.
|
|
504
|
+
try { cleanupOldArchives(getRetentionDays()); } catch (err) {
|
|
505
|
+
console.warn(`[Archive] Startup cleanup failed: ${err.message}`);
|
|
506
|
+
}
|
|
501
507
|
|
|
502
508
|
console.log(`
|
|
503
509
|
╔════════════════════════════════════════════╗
|
|
@@ -19,6 +19,17 @@ const { downloadToFile } = require('../shared/download.js');
|
|
|
19
19
|
const ARCHIVE_DIR = process.env.MUADDIB_ARCHIVE_DIR || '/opt/muaddib/archive';
|
|
20
20
|
const ARCHIVE_TIMEOUT_MS = 10_000;
|
|
21
21
|
|
|
22
|
+
// Retention window for archived tarballs. Anything older is purged on startup.
|
|
23
|
+
// Bounded to [1, 365] days; non-numeric or out-of-range values fall back to 30.
|
|
24
|
+
const DEFAULT_RETENTION_DAYS = 30;
|
|
25
|
+
function getRetentionDays() {
|
|
26
|
+
const raw = process.env.MUADDIB_ARCHIVE_RETENTION_DAYS;
|
|
27
|
+
if (raw === undefined || raw === '') return DEFAULT_RETENTION_DAYS;
|
|
28
|
+
const n = parseInt(raw, 10);
|
|
29
|
+
if (!Number.isFinite(n) || n < 1 || n > 365) return DEFAULT_RETENTION_DAYS;
|
|
30
|
+
return n;
|
|
31
|
+
}
|
|
32
|
+
|
|
22
33
|
/**
|
|
23
34
|
* Get the date string in YYYY-MM-DD format (Paris timezone, consistent with monitor).
|
|
24
35
|
* Falls back to UTC if Intl is unavailable.
|
|
@@ -70,6 +81,16 @@ function sha256File(filePath) {
|
|
|
70
81
|
async function archiveSuspectTarball(packageName, version, tarballUrl, scanResult) {
|
|
71
82
|
if (!tarballUrl || !packageName || !version) return false;
|
|
72
83
|
|
|
84
|
+
// Defense-in-depth: never archive packages that are statically clean.
|
|
85
|
+
// Callers in the pipeline already gate on tier 1a/1b/2 classification, but a
|
|
86
|
+
// numeric score of 0 with no triggered rules is unambiguously CLEAN — those
|
|
87
|
+
// dominated archive volume in production.
|
|
88
|
+
const score = (scanResult && typeof scanResult.score === 'number') ? scanResult.score : 0;
|
|
89
|
+
const rules = (scanResult && Array.isArray(scanResult.rulesTriggered)) ? scanResult.rulesTriggered : [];
|
|
90
|
+
if (score === 0 && rules.length === 0) {
|
|
91
|
+
return false;
|
|
92
|
+
}
|
|
93
|
+
|
|
73
94
|
const dateStr = getArchiveDateString();
|
|
74
95
|
const dayDir = path.join(ARCHIVE_DIR, dateStr);
|
|
75
96
|
const safeName = sanitizeForFilename(packageName);
|
|
@@ -110,11 +131,91 @@ async function archiveSuspectTarball(packageName, version, tarballUrl, scanResul
|
|
|
110
131
|
return true;
|
|
111
132
|
}
|
|
112
133
|
|
|
134
|
+
/**
|
|
135
|
+
* Parse a YYYY-MM-DD directory name into a UTC midnight Date.
|
|
136
|
+
* Returns null for malformed names (so we never delete an unrelated directory).
|
|
137
|
+
*/
|
|
138
|
+
function parseArchiveDayDir(name) {
|
|
139
|
+
const m = /^(\d{4})-(\d{2})-(\d{2})$/.exec(name);
|
|
140
|
+
if (!m) return null;
|
|
141
|
+
const y = Number(m[1]);
|
|
142
|
+
const mo = Number(m[2]);
|
|
143
|
+
const d = Number(m[3]);
|
|
144
|
+
if (mo < 1 || mo > 12 || d < 1 || d > 31) return null;
|
|
145
|
+
const date = new Date(Date.UTC(y, mo - 1, d));
|
|
146
|
+
if (Number.isNaN(date.getTime())) return null;
|
|
147
|
+
return date;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Recursively delete a directory, swallowing per-file errors so one bad file
|
|
152
|
+
* doesn't abort the cleanup of the rest of the archive.
|
|
153
|
+
*/
|
|
154
|
+
function rmDirRecursiveSafe(dirPath) {
|
|
155
|
+
try {
|
|
156
|
+
fs.rmSync(dirPath, { recursive: true, force: true });
|
|
157
|
+
return true;
|
|
158
|
+
} catch (err) {
|
|
159
|
+
console.warn(`[Archive] Failed to remove ${dirPath}: ${err.message}`);
|
|
160
|
+
return false;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Purge archived tarballs older than the retention window. Runs at monitor
|
|
166
|
+
* startup so no external cron is needed.
|
|
167
|
+
*
|
|
168
|
+
* Streams stats: { kept, purged, freedBytes }. Errors are logged, never thrown.
|
|
169
|
+
*/
|
|
170
|
+
function cleanupOldArchives(retentionDays = getRetentionDays()) {
|
|
171
|
+
const stats = { kept: 0, purged: 0, freedBytes: 0 };
|
|
172
|
+
if (!fs.existsSync(ARCHIVE_DIR)) return stats;
|
|
173
|
+
|
|
174
|
+
const cutoff = Date.now() - retentionDays * 24 * 60 * 60 * 1000;
|
|
175
|
+
let entries;
|
|
176
|
+
try {
|
|
177
|
+
entries = fs.readdirSync(ARCHIVE_DIR, { withFileTypes: true });
|
|
178
|
+
} catch (err) {
|
|
179
|
+
console.warn(`[Archive] Cannot read ${ARCHIVE_DIR}: ${err.message}`);
|
|
180
|
+
return stats;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
for (const entry of entries) {
|
|
184
|
+
if (!entry.isDirectory()) continue;
|
|
185
|
+
const date = parseArchiveDayDir(entry.name);
|
|
186
|
+
if (!date) continue; // ignore unrelated subdirs
|
|
187
|
+
if (date.getTime() >= cutoff) {
|
|
188
|
+
stats.kept++;
|
|
189
|
+
continue;
|
|
190
|
+
}
|
|
191
|
+
const fullPath = path.join(ARCHIVE_DIR, entry.name);
|
|
192
|
+
let bytes = 0;
|
|
193
|
+
try {
|
|
194
|
+
for (const f of fs.readdirSync(fullPath)) {
|
|
195
|
+
try { bytes += fs.statSync(path.join(fullPath, f)).size; } catch { /* ignore */ }
|
|
196
|
+
}
|
|
197
|
+
} catch { /* ignore */ }
|
|
198
|
+
if (rmDirRecursiveSafe(fullPath)) {
|
|
199
|
+
stats.purged++;
|
|
200
|
+
stats.freedBytes += bytes;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
if (stats.purged > 0) {
|
|
205
|
+
const mb = (stats.freedBytes / 1024 / 1024).toFixed(0);
|
|
206
|
+
console.log(`[Archive] Purged ${stats.purged} day(s) older than ${retentionDays}d (~${mb}MB freed). Kept ${stats.kept}.`);
|
|
207
|
+
}
|
|
208
|
+
return stats;
|
|
209
|
+
}
|
|
210
|
+
|
|
113
211
|
module.exports = {
|
|
114
212
|
archiveSuspectTarball,
|
|
213
|
+
cleanupOldArchives,
|
|
115
214
|
ARCHIVE_DIR,
|
|
116
215
|
// Exported for testing
|
|
117
216
|
sanitizeForFilename,
|
|
118
217
|
sha256File,
|
|
119
|
-
getArchiveDateString
|
|
218
|
+
getArchiveDateString,
|
|
219
|
+
getRetentionDays,
|
|
220
|
+
parseArchiveDayDir
|
|
120
221
|
};
|