muaddib-scanner 2.11.2 → 2.11.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.2",
3
+ "version": "2.11.4",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -13,6 +13,7 @@ const { processQueue, ensureWorkers, drainWorkers, getTargetConcurrency, setTarg
13
13
  const { computeTarget, ADJUST_INTERVAL_MS, BASE_CONCURRENCY, resetDeltas } = require('./adaptive-concurrency.js');
14
14
  const { startHealthcheck } = require('./healthcheck.js');
15
15
  const { startDeferredWorker, stopDeferredWorker, persistDeferredQueue, restoreDeferredQueue, clearDeferredQueue } = require('./deferred-sandbox.js');
16
+ const { cleanupOldArchives, getRetentionDays } = require('./tarball-archive.js');
16
17
  const { clearMetadataCache } = require('../scanner/temporal-analysis.js');
17
18
  // Caches not previously cleared by handleMemoryPressure (OOM fix). These live
18
19
  // in the main thread and are populated by temporal-ast-diff and the typosquat
@@ -498,6 +499,11 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
498
499
  cleanupRunscOrphans();
499
500
  // Layer 3: Purge expired cached tarballs on startup
500
501
  purgeTarballCache();
502
+ // Purge archived tarballs older than MUADDIB_ARCHIVE_RETENTION_DAYS (default 30).
503
+ // Runs in-process at startup so no external cron is required.
504
+ try { cleanupOldArchives(getRetentionDays()); } catch (err) {
505
+ console.warn(`[Archive] Startup cleanup failed: ${err.message}`);
506
+ }
501
507
 
502
508
  console.log(`
503
509
  ╔════════════════════════════════════════════╗
@@ -19,6 +19,17 @@ const { downloadToFile } = require('../shared/download.js');
19
19
  const ARCHIVE_DIR = process.env.MUADDIB_ARCHIVE_DIR || '/opt/muaddib/archive';
20
20
  const ARCHIVE_TIMEOUT_MS = 10_000;
21
21
 
22
+ // Retention window for archived tarballs. Anything older is purged on startup.
23
+ // Bounded to [1, 365] days; non-numeric or out-of-range values fall back to 30.
24
+ const DEFAULT_RETENTION_DAYS = 30;
25
+ function getRetentionDays() {
26
+ const raw = process.env.MUADDIB_ARCHIVE_RETENTION_DAYS;
27
+ if (raw === undefined || raw === '') return DEFAULT_RETENTION_DAYS;
28
+ const n = parseInt(raw, 10);
29
+ if (!Number.isFinite(n) || n < 1 || n > 365) return DEFAULT_RETENTION_DAYS;
30
+ return n;
31
+ }
32
+
22
33
  /**
23
34
  * Get the date string in YYYY-MM-DD format (Paris timezone, consistent with monitor).
24
35
  * Falls back to UTC if Intl is unavailable.
@@ -70,6 +81,16 @@ function sha256File(filePath) {
70
81
  async function archiveSuspectTarball(packageName, version, tarballUrl, scanResult) {
71
82
  if (!tarballUrl || !packageName || !version) return false;
72
83
 
84
+ // Defense-in-depth: never archive packages that are statically clean.
85
+ // Callers in the pipeline already gate on tier 1a/1b/2 classification, but a
86
+ // numeric score of 0 with no triggered rules is unambiguously CLEAN — those
87
+ // dominated archive volume in production.
88
+ const score = (scanResult && typeof scanResult.score === 'number') ? scanResult.score : 0;
89
+ const rules = (scanResult && Array.isArray(scanResult.rulesTriggered)) ? scanResult.rulesTriggered : [];
90
+ if (score === 0 && rules.length === 0) {
91
+ return false;
92
+ }
93
+
73
94
  const dateStr = getArchiveDateString();
74
95
  const dayDir = path.join(ARCHIVE_DIR, dateStr);
75
96
  const safeName = sanitizeForFilename(packageName);
@@ -110,11 +131,91 @@ async function archiveSuspectTarball(packageName, version, tarballUrl, scanResul
110
131
  return true;
111
132
  }
112
133
 
134
+ /**
135
+ * Parse a YYYY-MM-DD directory name into a UTC midnight Date.
136
+ * Returns null for malformed names (so we never delete an unrelated directory).
137
+ */
138
+ function parseArchiveDayDir(name) {
139
+ const m = /^(\d{4})-(\d{2})-(\d{2})$/.exec(name);
140
+ if (!m) return null;
141
+ const y = Number(m[1]);
142
+ const mo = Number(m[2]);
143
+ const d = Number(m[3]);
144
+ if (mo < 1 || mo > 12 || d < 1 || d > 31) return null;
145
+ const date = new Date(Date.UTC(y, mo - 1, d));
146
+ if (Number.isNaN(date.getTime())) return null;
147
+ return date;
148
+ }
149
+
150
+ /**
151
+ * Recursively delete a directory, swallowing per-file errors so one bad file
152
+ * doesn't abort the cleanup of the rest of the archive.
153
+ */
154
+ function rmDirRecursiveSafe(dirPath) {
155
+ try {
156
+ fs.rmSync(dirPath, { recursive: true, force: true });
157
+ return true;
158
+ } catch (err) {
159
+ console.warn(`[Archive] Failed to remove ${dirPath}: ${err.message}`);
160
+ return false;
161
+ }
162
+ }
163
+
164
+ /**
165
+ * Purge archived tarballs older than the retention window. Runs at monitor
166
+ * startup so no external cron is needed.
167
+ *
168
+ * Streams stats: { kept, purged, freedBytes }. Errors are logged, never thrown.
169
+ */
170
+ function cleanupOldArchives(retentionDays = getRetentionDays()) {
171
+ const stats = { kept: 0, purged: 0, freedBytes: 0 };
172
+ if (!fs.existsSync(ARCHIVE_DIR)) return stats;
173
+
174
+ const cutoff = Date.now() - retentionDays * 24 * 60 * 60 * 1000;
175
+ let entries;
176
+ try {
177
+ entries = fs.readdirSync(ARCHIVE_DIR, { withFileTypes: true });
178
+ } catch (err) {
179
+ console.warn(`[Archive] Cannot read ${ARCHIVE_DIR}: ${err.message}`);
180
+ return stats;
181
+ }
182
+
183
+ for (const entry of entries) {
184
+ if (!entry.isDirectory()) continue;
185
+ const date = parseArchiveDayDir(entry.name);
186
+ if (!date) continue; // ignore unrelated subdirs
187
+ if (date.getTime() >= cutoff) {
188
+ stats.kept++;
189
+ continue;
190
+ }
191
+ const fullPath = path.join(ARCHIVE_DIR, entry.name);
192
+ let bytes = 0;
193
+ try {
194
+ for (const f of fs.readdirSync(fullPath)) {
195
+ try { bytes += fs.statSync(path.join(fullPath, f)).size; } catch { /* ignore */ }
196
+ }
197
+ } catch { /* ignore */ }
198
+ if (rmDirRecursiveSafe(fullPath)) {
199
+ stats.purged++;
200
+ stats.freedBytes += bytes;
201
+ }
202
+ }
203
+
204
+ if (stats.purged > 0) {
205
+ const mb = (stats.freedBytes / 1024 / 1024).toFixed(0);
206
+ console.log(`[Archive] Purged ${stats.purged} day(s) older than ${retentionDays}d (~${mb}MB freed). Kept ${stats.kept}.`);
207
+ }
208
+ return stats;
209
+ }
210
+
113
211
  module.exports = {
114
212
  archiveSuspectTarball,
213
+ cleanupOldArchives,
115
214
  ARCHIVE_DIR,
116
215
  // Exported for testing
117
216
  sanitizeForFilename,
118
217
  sha256File,
119
- getArchiveDateString
218
+ getArchiveDateString,
219
+ getRetentionDays,
220
+ parseArchiveDayDir
120
221
  };