muaddib-scanner 2.11.14 → 2.11.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/muaddib.js +2 -0
- package/package.json +2 -2
- package/src/monitor/ingestion.js +358 -11
- package/src/monitor/queue.js +1 -1
- package/src/monitor/state.js +58 -2
- package/src/response/playbooks.js +8 -0
- package/src/rules/index.js +32 -0
- package/src/scanner/dataflow.js +25 -5
- package/src/scanner/reachability.js +20 -2
- package/src/scanner/typosquat.js +165 -3
- package/src/scoring.js +15 -2
package/bin/muaddib.js
CHANGED
|
@@ -297,6 +297,7 @@ if (command === 'version' || command === '--version' || command === '-v') {
|
|
|
297
297
|
if (wantHelp) showHelp('watch');
|
|
298
298
|
watch(target);
|
|
299
299
|
} else if (command === 'update') {
|
|
300
|
+
if (wantHelp) showHelp('update');
|
|
300
301
|
updateIOCs().then(() => {
|
|
301
302
|
process.exit(0);
|
|
302
303
|
}).catch(err => {
|
|
@@ -304,6 +305,7 @@ if (command === 'version' || command === '--version' || command === '-v') {
|
|
|
304
305
|
process.exit(1);
|
|
305
306
|
});
|
|
306
307
|
} else if (command === 'scrape') {
|
|
308
|
+
if (wantHelp) showHelp('scrape');
|
|
307
309
|
runScraper().then(result => {
|
|
308
310
|
console.log(`[OK] ${result.added} new IOCs (total: ${result.total})`);
|
|
309
311
|
process.exit(0);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "muaddib-scanner",
|
|
3
|
-
"version": "2.11.
|
|
3
|
+
"version": "2.11.17",
|
|
4
4
|
"description": "Supply-chain threat detection & response for npm & PyPI/Python",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -46,7 +46,7 @@
|
|
|
46
46
|
"node": ">=18.0.0"
|
|
47
47
|
},
|
|
48
48
|
"dependencies": {
|
|
49
|
-
"@inquirer/prompts": "8.4.
|
|
49
|
+
"@inquirer/prompts": "8.4.3",
|
|
50
50
|
"acorn": "8.16.0",
|
|
51
51
|
"acorn-walk": "8.3.5",
|
|
52
52
|
"adm-zip": "0.5.17",
|
package/src/monitor/ingestion.js
CHANGED
|
@@ -10,7 +10,10 @@
|
|
|
10
10
|
const https = require('https');
|
|
11
11
|
const { acquireRegistrySlot, releaseRegistrySlot } = require('../shared/http-limiter.js');
|
|
12
12
|
const { loadCachedIOCs } = require('../ioc/updater.js');
|
|
13
|
-
const {
|
|
13
|
+
const {
|
|
14
|
+
loadNpmSeq, saveNpmSeq, CHANGES_STREAM_URL, CHANGES_LIMIT, CHANGES_CATCHUP_MAX,
|
|
15
|
+
savePypiSerial, PYPI_XMLRPC_URL, PYPI_CATCHUP_MAX
|
|
16
|
+
} = require('./state.js');
|
|
14
17
|
const { sendIOCPreAlert } = require('./webhook.js');
|
|
15
18
|
const { evaluateCacheTrigger, POPULAR_THRESHOLD, downloadsCache, DOWNLOADS_CACHE_TTL } = require('./classify.js');
|
|
16
19
|
|
|
@@ -22,6 +25,14 @@ const POLL_MAX_BACKOFF = 960_000; // 16 minutes max backoff
|
|
|
22
25
|
// --- Mutable state ---
|
|
23
26
|
let consecutivePollErrors = 0;
|
|
24
27
|
|
|
28
|
+
// Test seam: code paths that need to be stubbed in tests call these through
|
|
29
|
+
// `_deps` instead of the bare module-local name, so a test can swap
|
|
30
|
+
// `ingestion._deps.httpsPost = fakePost` and have it take effect inside
|
|
31
|
+
// pollPyPIChangelog. Kept tiny on purpose — only network I/O lives here.
|
|
32
|
+
const _deps = {
|
|
33
|
+
httpsPost: null // populated below once httpsPost is defined
|
|
34
|
+
};
|
|
35
|
+
|
|
25
36
|
function getConsecutivePollErrors() {
|
|
26
37
|
return consecutivePollErrors;
|
|
27
38
|
}
|
|
@@ -64,6 +75,47 @@ function httpsGet(url, timeoutMs = 30_000) {
|
|
|
64
75
|
});
|
|
65
76
|
}
|
|
66
77
|
|
|
78
|
+
/**
|
|
79
|
+
* Minimal HTTPS POST. Used for PyPI XML-RPC; kept inside the ingestion module
|
|
80
|
+
* (rather than pulled into shared/) because XML-RPC is its only consumer today.
|
|
81
|
+
*/
|
|
82
|
+
function httpsPost(url, body, headers = {}, timeoutMs = 30_000) {
|
|
83
|
+
return new Promise((resolve, reject) => {
|
|
84
|
+
const u = new URL(url);
|
|
85
|
+
const options = {
|
|
86
|
+
method: 'POST',
|
|
87
|
+
hostname: u.hostname,
|
|
88
|
+
port: u.port || 443,
|
|
89
|
+
path: u.pathname + (u.search || ''),
|
|
90
|
+
timeout: timeoutMs,
|
|
91
|
+
headers: {
|
|
92
|
+
'Content-Type': 'text/xml',
|
|
93
|
+
'Content-Length': Buffer.byteLength(body),
|
|
94
|
+
...headers
|
|
95
|
+
}
|
|
96
|
+
};
|
|
97
|
+
const req = https.request(options, (res) => {
|
|
98
|
+
if (res.statusCode < 200 || res.statusCode >= 300) {
|
|
99
|
+
res.resume();
|
|
100
|
+
return reject(new Error(`HTTP ${res.statusCode} for POST ${url}`));
|
|
101
|
+
}
|
|
102
|
+
const chunks = [];
|
|
103
|
+
res.on('data', (chunk) => chunks.push(chunk));
|
|
104
|
+
res.on('end', () => resolve(Buffer.concat(chunks).toString('utf8')));
|
|
105
|
+
res.on('error', reject);
|
|
106
|
+
});
|
|
107
|
+
req.on('error', reject);
|
|
108
|
+
req.on('timeout', () => {
|
|
109
|
+
req.destroy();
|
|
110
|
+
reject(new Error(`Timeout for POST ${url}`));
|
|
111
|
+
});
|
|
112
|
+
req.write(body);
|
|
113
|
+
req.end();
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
_deps.httpsPost = httpsPost;
|
|
118
|
+
|
|
67
119
|
async function getWeeklyDownloads(packageName) {
|
|
68
120
|
const cached = downloadsCache.get(packageName);
|
|
69
121
|
if (cached && (Date.now() - cached.fetchedAt) < DOWNLOADS_CACHE_TTL) {
|
|
@@ -186,8 +238,13 @@ function getNpmTarballUrl(pkgData) {
|
|
|
186
238
|
return (pkgData.dist && pkgData.dist.tarball) || null;
|
|
187
239
|
}
|
|
188
240
|
|
|
189
|
-
async function getPyPITarballUrl(packageName) {
|
|
190
|
-
|
|
241
|
+
async function getPyPITarballUrl(packageName, packageVersion = '') {
|
|
242
|
+
// Per-version endpoint when we know the version (e.g. from the XML-RPC changelog) —
|
|
243
|
+
// guarantees we scan the artifact that just landed, not whatever became "latest"
|
|
244
|
+
// between event detection and scan. Falls back to /pypi/<name>/json (latest) otherwise.
|
|
245
|
+
const url = packageVersion
|
|
246
|
+
? `https://pypi.org/pypi/${encodeURIComponent(packageName)}/${encodeURIComponent(packageVersion)}/json`
|
|
247
|
+
: `https://pypi.org/pypi/${encodeURIComponent(packageName)}/json`;
|
|
191
248
|
const body = await httpsGet(url);
|
|
192
249
|
let data;
|
|
193
250
|
try {
|
|
@@ -195,7 +252,7 @@ async function getPyPITarballUrl(packageName) {
|
|
|
195
252
|
} catch (e) {
|
|
196
253
|
throw new Error(`Invalid JSON from PyPI for ${packageName}: ${e.message}`);
|
|
197
254
|
}
|
|
198
|
-
const version = (data.info && data.info.version) || '';
|
|
255
|
+
const version = (data.info && data.info.version) || packageVersion || '';
|
|
199
256
|
const urls = data.urls || [];
|
|
200
257
|
// Prefer sdist (.tar.gz)
|
|
201
258
|
const sdist = urls.find(u => u.packagetype === 'sdist' && u.url);
|
|
@@ -386,7 +443,10 @@ async function pollNpmChanges(state, scanQueue, stats) {
|
|
|
386
443
|
const currentSeq = currentSeqData.update_seq;
|
|
387
444
|
if (typeof currentSeq === 'number' && typeof data.last_seq === 'number' &&
|
|
388
445
|
(currentSeq - data.last_seq) > CHANGES_CATCHUP_MAX) {
|
|
389
|
-
|
|
446
|
+
const gap = currentSeq - lastSeq;
|
|
447
|
+
console.warn(`[MONITOR] Changes stream too far behind (${gap} changes) — skipping to current`);
|
|
448
|
+
stats.npmCatchupSkips = (stats.npmCatchupSkips || 0) + 1;
|
|
449
|
+
stats.npmCatchupSkippedSeqs = (stats.npmCatchupSkippedSeqs || 0) + gap;
|
|
390
450
|
state.npmLastSeq = currentSeq;
|
|
391
451
|
saveNpmSeq(currentSeq);
|
|
392
452
|
return 0;
|
|
@@ -590,13 +650,271 @@ async function pollNpm(state, scanQueue, stats) {
|
|
|
590
650
|
|
|
591
651
|
// --- PyPI polling ---
|
|
592
652
|
|
|
653
|
+
const PYPI_USER_AGENT = `${SELF_PACKAGE_NAME} (security-monitor; +https://github.com/DNSZLSK/muaddib)`;
|
|
654
|
+
|
|
593
655
|
/**
|
|
594
|
-
*
|
|
656
|
+
* Build an XML-RPC methodCall envelope. PyPI accepts only <int> and <string>
|
|
657
|
+
* params for the methods we use (changelog_last_serial, changelog_since_serial),
|
|
658
|
+
* so this builder is deliberately minimal.
|
|
659
|
+
*/
|
|
660
|
+
function buildXmlRpcCall(method, params) {
|
|
661
|
+
const paramXml = params.map((p) => {
|
|
662
|
+
if (typeof p === 'number' && Number.isInteger(p)) {
|
|
663
|
+
return `<param><value><int>${p}</int></value></param>`;
|
|
664
|
+
}
|
|
665
|
+
if (typeof p === 'string') {
|
|
666
|
+
// Method names + serial numbers only — no user-supplied strings reach this path.
|
|
667
|
+
const escaped = p.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
|
|
668
|
+
return `<param><value><string>${escaped}</string></value></param>`;
|
|
669
|
+
}
|
|
670
|
+
throw new Error(`Unsupported XML-RPC param type: ${typeof p}`);
|
|
671
|
+
}).join('');
|
|
672
|
+
return `<?xml version="1.0"?><methodCall><methodName>${method}</methodName><params>${paramXml}</params></methodCall>`;
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
/**
|
|
676
|
+
* Parse a PyPI changelog_since_serial response.
|
|
677
|
+
*
|
|
678
|
+
* Response shape (per https://warehouse.pypa.io/api-reference/xml-rpc.html):
|
|
679
|
+
* <array><data>
|
|
680
|
+
* <value><array><data>
|
|
681
|
+
* <value><string>NAME</string></value> <!-- index 0 -->
|
|
682
|
+
* <value><string>VERSION</string></value> <!-- index 1, may be empty -->
|
|
683
|
+
* <value><int>TIMESTAMP</int></value> <!-- index 2 -->
|
|
684
|
+
* <value><string>ACTION</string></value> <!-- index 3 -->
|
|
685
|
+
* <value><int>SERIAL</int></value> <!-- index 4 -->
|
|
686
|
+
* </data></array></value>
|
|
687
|
+
* ...
|
|
688
|
+
* </data></array>
|
|
689
|
+
*
|
|
690
|
+
* Returns array of { name, version, timestamp, action, serial }. Invalid tuples
|
|
691
|
+
* are skipped silently — partial data is better than dropping the whole batch.
|
|
692
|
+
*/
|
|
693
|
+
function parseXmlRpcChangelog(xml) {
|
|
694
|
+
const out = [];
|
|
695
|
+
if (typeof xml !== 'string' || !xml.includes('<methodResponse>')) return out;
|
|
696
|
+
if (xml.includes('<fault>')) return out; // PyPI fault → caller should treat as failure
|
|
697
|
+
|
|
698
|
+
// The response is a nested array: outer <array><data>...inner tuples...</data></array>.
|
|
699
|
+
// We strip the outer wrapper first so the inner-tuple regex can't accidentally
|
|
700
|
+
// greedy-match across the outer boundary (which would swallow tuple #1).
|
|
701
|
+
const outerArrayStart = xml.indexOf('<array>');
|
|
702
|
+
if (outerArrayStart === -1) return out;
|
|
703
|
+
const outerDataStart = xml.indexOf('<data>', outerArrayStart);
|
|
704
|
+
if (outerDataStart === -1) return out;
|
|
705
|
+
const outerDataEnd = xml.lastIndexOf('</data>');
|
|
706
|
+
if (outerDataEnd === -1 || outerDataEnd <= outerDataStart) return out;
|
|
707
|
+
const body = xml.slice(outerDataStart + '<data>'.length, outerDataEnd);
|
|
708
|
+
|
|
709
|
+
// Each tuple inside `body` is exactly: <value><array><data>...</data></array></value>
|
|
710
|
+
const tupleRegex = /<value>\s*<array>\s*<data>([\s\S]*?)<\/data>\s*<\/array>\s*<\/value>/g;
|
|
711
|
+
let m;
|
|
712
|
+
while ((m = tupleRegex.exec(body)) !== null) {
|
|
713
|
+
const inner = m[1];
|
|
714
|
+
const values = [];
|
|
715
|
+
const valRegex = /<value>\s*(?:<string>([\s\S]*?)<\/string>|<int>(-?\d+)<\/int>)\s*<\/value>/g;
|
|
716
|
+
let v;
|
|
717
|
+
while ((v = valRegex.exec(inner)) !== null) {
|
|
718
|
+
if (v[1] !== undefined) {
|
|
719
|
+
// Decode the XML entities we encode on the way in
|
|
720
|
+
values.push(v[1].replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&'));
|
|
721
|
+
} else {
|
|
722
|
+
values.push(parseInt(v[2], 10));
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
if (values.length !== 5) continue;
|
|
726
|
+
const [name, version, timestamp, action, serial] = values;
|
|
727
|
+
if (typeof name !== 'string' || typeof action !== 'string' ||
|
|
728
|
+
typeof timestamp !== 'number' || typeof serial !== 'number') continue;
|
|
729
|
+
out.push({ name, version: typeof version === 'string' ? version : '', timestamp, action, serial });
|
|
730
|
+
}
|
|
731
|
+
return out;
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
/**
|
|
735
|
+
* Parse a changelog_last_serial response. Returns the integer or null.
|
|
736
|
+
*/
|
|
737
|
+
function parseXmlRpcInt(xml) {
|
|
738
|
+
if (typeof xml !== 'string' || xml.includes('<fault>')) return null;
|
|
739
|
+
const m = xml.match(/<value>\s*<int>(-?\d+)<\/int>\s*<\/value>/);
|
|
740
|
+
return m ? parseInt(m[1], 10) : null;
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
/**
|
|
744
|
+
* Decide whether a changelog event introduces scannable content.
|
|
745
|
+
*
|
|
746
|
+
* KEEP (something new was published, scan the release):
|
|
747
|
+
* - "new release" → version metadata created
|
|
748
|
+
* - "add source file …" → sdist uploaded
|
|
749
|
+
* - "add py3 file …" / "add cp… file …" / "add … file …" → wheel uploaded
|
|
750
|
+
*
|
|
751
|
+
* SKIP (no new artifact to scan):
|
|
752
|
+
* - "remove …", "yank release", "unyank release" → removal, not a new threat
|
|
753
|
+
* - "create" → package shell, no version yet
|
|
754
|
+
* - "add Owner", "remove Owner", "accepted Owner" → ACL changes
|
|
755
|
+
* - empty version → administrative event at the package level
|
|
756
|
+
*/
|
|
757
|
+
function isPypiScannableAction(action, version) {
|
|
758
|
+
if (!version) return false;
|
|
759
|
+
if (typeof action !== 'string') return false;
|
|
760
|
+
if (action === 'new release') return true;
|
|
761
|
+
if (action.startsWith('add ') && action.includes(' file ')) return true;
|
|
762
|
+
return false;
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
/**
|
|
766
|
+
* Poll PyPI changelog via XML-RPC (primary path).
|
|
767
|
+
* Equivalent of pollNpmChanges: strictly monotonic serial, lossless resume.
|
|
768
|
+
*
|
|
769
|
+
* @param {Object} state - Monitor state (pypiLastSerial)
|
|
770
|
+
* @param {Array} scanQueue - Mutable scan queue array
|
|
771
|
+
* @param {Object} stats - Mutable stats object
|
|
772
|
+
* @returns {Promise<number>} Number of packages queued, or -1 on error
|
|
773
|
+
*/
|
|
774
|
+
async function pollPyPIChangelog(state, scanQueue, stats) {
|
|
775
|
+
try {
|
|
776
|
+
let lastSerial = state.pypiLastSerial;
|
|
777
|
+
|
|
778
|
+
// First run: anchor to "now" rather than replaying months of history
|
|
779
|
+
if (lastSerial == null) {
|
|
780
|
+
await acquireRegistrySlot();
|
|
781
|
+
let initBody;
|
|
782
|
+
try {
|
|
783
|
+
initBody = await _deps.httpsPost(
|
|
784
|
+
PYPI_XMLRPC_URL,
|
|
785
|
+
buildXmlRpcCall('changelog_last_serial', []),
|
|
786
|
+
{ 'User-Agent': PYPI_USER_AGENT },
|
|
787
|
+
10_000
|
|
788
|
+
);
|
|
789
|
+
} finally {
|
|
790
|
+
releaseRegistrySlot();
|
|
791
|
+
}
|
|
792
|
+
const current = parseXmlRpcInt(initBody);
|
|
793
|
+
if (current == null) {
|
|
794
|
+
console.warn('[MONITOR] PyPI changelog init: no serial in response');
|
|
795
|
+
return -1;
|
|
796
|
+
}
|
|
797
|
+
state.pypiLastSerial = current;
|
|
798
|
+
savePypiSerial(current);
|
|
799
|
+
console.log(`[MONITOR] PyPI changelog initialized at serial ${current}`);
|
|
800
|
+
return 0;
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
await acquireRegistrySlot();
|
|
804
|
+
let body;
|
|
805
|
+
try {
|
|
806
|
+
body = await _deps.httpsPost(
|
|
807
|
+
PYPI_XMLRPC_URL,
|
|
808
|
+
buildXmlRpcCall('changelog_since_serial', [lastSerial]),
|
|
809
|
+
{ 'User-Agent': PYPI_USER_AGENT },
|
|
810
|
+
60_000
|
|
811
|
+
);
|
|
812
|
+
} finally {
|
|
813
|
+
releaseRegistrySlot();
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
const events = parseXmlRpcChangelog(body);
|
|
817
|
+
if (events.length === 0) {
|
|
818
|
+
// Either nothing happened or the response was a fault — distinguish.
|
|
819
|
+
if (body && body.includes('<fault>')) {
|
|
820
|
+
console.error('[MONITOR] PyPI changelog returned XML-RPC fault — falling back to RSS');
|
|
821
|
+
return -1;
|
|
822
|
+
}
|
|
823
|
+
return 0;
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
// Catch-up protection: if events span more than PYPI_CATCHUP_MAX serials,
|
|
827
|
+
// skip to the latest serial to avoid an avalanche after long downtime.
|
|
828
|
+
const lastEventSerial = events[events.length - 1].serial;
|
|
829
|
+
const gap = lastEventSerial - lastSerial;
|
|
830
|
+
if (gap > PYPI_CATCHUP_MAX) {
|
|
831
|
+
console.warn(`[MONITOR] PyPI changelog too far behind (${gap} events) — skipping to current`);
|
|
832
|
+
stats.pypiCatchupSkips = (stats.pypiCatchupSkips || 0) + 1;
|
|
833
|
+
stats.pypiCatchupSkippedEvents = (stats.pypiCatchupSkippedEvents || 0) + gap;
|
|
834
|
+
state.pypiLastSerial = lastEventSerial;
|
|
835
|
+
savePypiSerial(lastEventSerial);
|
|
836
|
+
return 0;
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
// Dedupe (name, version) within the batch: a single release usually emits
|
|
840
|
+
// multiple events (new release + add source file + add wheel files…), but
|
|
841
|
+
// there's only one thing to scan.
|
|
842
|
+
const seen = new Set();
|
|
843
|
+
let queued = 0;
|
|
844
|
+
let maxSerial = lastSerial;
|
|
845
|
+
|
|
846
|
+
for (const ev of events) {
|
|
847
|
+
if (ev.serial > maxSerial) maxSerial = ev.serial;
|
|
848
|
+
|
|
849
|
+
if (!isPypiScannableAction(ev.action, ev.version)) continue;
|
|
850
|
+
|
|
851
|
+
const key = `${ev.name}@${ev.version}`;
|
|
852
|
+
if (seen.has(key)) continue;
|
|
853
|
+
seen.add(key);
|
|
854
|
+
|
|
855
|
+
// Skip self (mirror of the npm path — defensive even though we don't publish to PyPI)
|
|
856
|
+
if (ev.name === SELF_PACKAGE_NAME) continue;
|
|
857
|
+
|
|
858
|
+
// IOC pre-alert for known-malicious PyPI packages
|
|
859
|
+
let isKnownIOC = false;
|
|
860
|
+
try {
|
|
861
|
+
const iocs = loadCachedIOCs();
|
|
862
|
+
// PyPI IOCs are namespaced "pypi:<name>" in the wildcardPackages set
|
|
863
|
+
const pypiKey = `pypi:${ev.name}`;
|
|
864
|
+
isKnownIOC = iocs.wildcardPackages && (
|
|
865
|
+
iocs.wildcardPackages.has(pypiKey) || iocs.wildcardPackages.has(ev.name)
|
|
866
|
+
);
|
|
867
|
+
if (isKnownIOC) {
|
|
868
|
+
console.log(`[MONITOR] IOC PRE-ALERT (pypi): ${ev.name} — known malicious package`);
|
|
869
|
+
stats.iocPreAlerts = (stats.iocPreAlerts || 0) + 1;
|
|
870
|
+
sendIOCPreAlert(ev.name).catch(err => {
|
|
871
|
+
console.error(`[MONITOR] IOC pre-alert webhook failed for ${ev.name}: ${err.message}`);
|
|
872
|
+
});
|
|
873
|
+
}
|
|
874
|
+
} catch { /* IOC load failure is non-fatal */ }
|
|
875
|
+
|
|
876
|
+
scanQueue.push({
|
|
877
|
+
name: ev.name,
|
|
878
|
+
version: ev.version,
|
|
879
|
+
ecosystem: 'pypi',
|
|
880
|
+
tarballUrl: null, // resolved lazily via getPyPITarballUrl()
|
|
881
|
+
isIOCMatch: isKnownIOC
|
|
882
|
+
});
|
|
883
|
+
queued++;
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
// Persist the serial both in memory and on disk before returning.
|
|
887
|
+
// daemon.js also flushes state.json after the queue is saved, but writing the
|
|
888
|
+
// dedicated serial file here means a crash between the two flush points costs
|
|
889
|
+
// at most one poll of replay — and re-queuing the same (name, version) is
|
|
890
|
+
// handled idempotently by the scan-memory dedupe downstream.
|
|
891
|
+
state.pypiLastSerial = maxSerial;
|
|
892
|
+
if (maxSerial !== lastSerial) {
|
|
893
|
+
savePypiSerial(maxSerial);
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
if (queued > 0) {
|
|
897
|
+
console.log(`[MONITOR] PyPI changelog: ${queued} packages queued (serial ${lastSerial} → ${maxSerial}, ${events.length} events)`);
|
|
898
|
+
}
|
|
899
|
+
stats.pypiChangelogPackages = (stats.pypiChangelogPackages || 0) + queued;
|
|
900
|
+
stats.pypiChangelogEvents = (stats.pypiChangelogEvents || 0) + events.length;
|
|
901
|
+
|
|
902
|
+
return queued;
|
|
903
|
+
} catch (err) {
|
|
904
|
+
console.error(`[MONITOR] PyPI changelog error: ${err.message} — falling back to RSS`);
|
|
905
|
+
return -1;
|
|
906
|
+
}
|
|
907
|
+
}
|
|
908
|
+
|
|
909
|
+
/**
|
|
910
|
+
* Poll PyPI RSS feed (legacy fallback).
|
|
911
|
+
* Only covers newly-registered packages (first-ever publish) and is capped at ~40 items —
|
|
912
|
+
* a single burst can silently lose events. Used only when the XML-RPC changelog fails.
|
|
595
913
|
*
|
|
596
914
|
* @param {Object} state - Monitor state object (pypiLastPackage)
|
|
597
915
|
* @param {Array} scanQueue - Mutable scan queue array
|
|
598
916
|
*/
|
|
599
|
-
async function
|
|
917
|
+
async function pollPyPIRss(state, scanQueue) {
|
|
600
918
|
const url = 'https://pypi.org/rss/packages.xml';
|
|
601
919
|
|
|
602
920
|
try {
|
|
@@ -620,7 +938,7 @@ async function pollPyPI(state, scanQueue) {
|
|
|
620
938
|
}
|
|
621
939
|
|
|
622
940
|
for (const name of newPackages) {
|
|
623
|
-
console.log(`[MONITOR] New pypi: ${name}`);
|
|
941
|
+
console.log(`[MONITOR] New pypi (rss): ${name}`);
|
|
624
942
|
// Queue PyPI packages — tarball URL resolved during scan
|
|
625
943
|
scanQueue.push({
|
|
626
944
|
name,
|
|
@@ -637,11 +955,28 @@ async function pollPyPI(state, scanQueue) {
|
|
|
637
955
|
|
|
638
956
|
return newPackages.length;
|
|
639
957
|
} catch (err) {
|
|
640
|
-
console.error(`[MONITOR] PyPI poll error: ${err.message}`);
|
|
958
|
+
console.error(`[MONITOR] PyPI RSS poll error: ${err.message}`);
|
|
641
959
|
return -1;
|
|
642
960
|
}
|
|
643
961
|
}
|
|
644
962
|
|
|
963
|
+
/**
|
|
964
|
+
* Poll PyPI for new packages and versions.
|
|
965
|
+
* Primary: XML-RPC changelog_since_serial (lossless, captures new versions).
|
|
966
|
+
* Fallback: RSS feed (new registrations only, lossy on bursts).
|
|
967
|
+
*
|
|
968
|
+
* @param {Object} state - Monitor state object
|
|
969
|
+
* @param {Array} scanQueue - Mutable scan queue array
|
|
970
|
+
* @param {Object} stats - Mutable stats object
|
|
971
|
+
*/
|
|
972
|
+
async function pollPyPI(state, scanQueue, stats = {}) {
|
|
973
|
+
const count = await pollPyPIChangelog(state, scanQueue, stats);
|
|
974
|
+
if (count >= 0) return count;
|
|
975
|
+
console.log('[MONITOR] Using RSS fallback for PyPI');
|
|
976
|
+
stats.pypiRssFallbackCount = (stats.pypiRssFallbackCount || 0) + 1;
|
|
977
|
+
return pollPyPIRss(state, scanQueue);
|
|
978
|
+
}
|
|
979
|
+
|
|
645
980
|
// --- Main poll orchestrator ---
|
|
646
981
|
|
|
647
982
|
/**
|
|
@@ -686,7 +1021,7 @@ async function poll(state, scanQueue, stats) {
|
|
|
686
1021
|
|
|
687
1022
|
const [npmCount, pypiCount] = await Promise.all([
|
|
688
1023
|
pollNpm(state, scanQueue, stats),
|
|
689
|
-
pollPyPI(state, scanQueue)
|
|
1024
|
+
pollPyPI(state, scanQueue, stats)
|
|
690
1025
|
]);
|
|
691
1026
|
|
|
692
1027
|
// Track consecutive poll failures for backoff
|
|
@@ -718,6 +1053,7 @@ module.exports = {
|
|
|
718
1053
|
|
|
719
1054
|
// HTTP helpers
|
|
720
1055
|
httpsGet,
|
|
1056
|
+
httpsPost,
|
|
721
1057
|
getWeeklyDownloads,
|
|
722
1058
|
checkTrustedDepDiff,
|
|
723
1059
|
TRUSTED_DEP_AGE_THRESHOLD_MS,
|
|
@@ -731,6 +1067,12 @@ module.exports = {
|
|
|
731
1067
|
parseNpmRss,
|
|
732
1068
|
parsePyPIRss,
|
|
733
1069
|
|
|
1070
|
+
// XML-RPC (PyPI changelog)
|
|
1071
|
+
buildXmlRpcCall,
|
|
1072
|
+
parseXmlRpcChangelog,
|
|
1073
|
+
parseXmlRpcInt,
|
|
1074
|
+
isPypiScannableAction,
|
|
1075
|
+
|
|
734
1076
|
// CouchDB doc extraction
|
|
735
1077
|
extractTarballFromDoc,
|
|
736
1078
|
|
|
@@ -738,6 +1080,11 @@ module.exports = {
|
|
|
738
1080
|
pollNpmChanges,
|
|
739
1081
|
pollNpmRss,
|
|
740
1082
|
pollNpm,
|
|
1083
|
+
pollPyPIChangelog,
|
|
1084
|
+
pollPyPIRss,
|
|
741
1085
|
pollPyPI,
|
|
742
|
-
poll
|
|
1086
|
+
poll,
|
|
1087
|
+
|
|
1088
|
+
// Test seam — see _deps definition near the top of this file.
|
|
1089
|
+
_deps
|
|
743
1090
|
};
|
package/src/monitor/queue.js
CHANGED
|
@@ -1138,7 +1138,7 @@ async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned,
|
|
|
1138
1138
|
}
|
|
1139
1139
|
if (item.ecosystem === 'pypi' && !item.tarballUrl) {
|
|
1140
1140
|
try {
|
|
1141
|
-
const pypiInfo = await getPyPITarballUrl(item.name);
|
|
1141
|
+
const pypiInfo = await getPyPITarballUrl(item.name, item.version || '');
|
|
1142
1142
|
if (!pypiInfo.url) {
|
|
1143
1143
|
console.log(`[MONITOR] SKIP: ${item.name} — no tarball URL found on PyPI`);
|
|
1144
1144
|
return;
|
package/src/monitor/state.js
CHANGED
|
@@ -76,6 +76,20 @@ const CHANGES_STREAM_URL = 'https://replicate.npmjs.com/registry/_changes';
|
|
|
76
76
|
const CHANGES_LIMIT = 1000;
|
|
77
77
|
const CHANGES_CATCHUP_MAX = 500000; // If behind by more than 500k seqs, skip to "now"
|
|
78
78
|
|
|
79
|
+
// --- PyPI serial constants ---
|
|
80
|
+
//
|
|
81
|
+
// PyPI's XML-RPC changelog endpoint is the canonical equivalent of npm's CouchDB
|
|
82
|
+
// `_changes` stream: every package event (release, file upload, removal, owner
|
|
83
|
+
// change…) gets a strictly monotonic integer "serial". `changelog_since_serial(n)`
|
|
84
|
+
// returns every event with serial > n, letting us resume losslessly across restarts.
|
|
85
|
+
//
|
|
86
|
+
// PYPI_CATCHUP_MAX is the staleness cap: if we are behind by more than this many
|
|
87
|
+
// serials (≈ days of activity at ~30k events/day in 2026), skip to "now" rather
|
|
88
|
+
// than fetch a monster batch. Mirrors CHANGES_CATCHUP_MAX for npm.
|
|
89
|
+
const PYPI_SERIAL_FILE = path.join(__dirname, '..', '..', 'data', 'pypi-serial.json');
|
|
90
|
+
const PYPI_XMLRPC_URL = 'https://pypi.org/pypi';
|
|
91
|
+
const PYPI_CATCHUP_MAX = 100000;
|
|
92
|
+
|
|
79
93
|
// --- Scan memory constants ---
|
|
80
94
|
|
|
81
95
|
const SCAN_MEMORY_FILE = path.join(__dirname, '..', '..', 'data', 'scan-memory.json');
|
|
@@ -191,6 +205,37 @@ function saveNpmSeq(seq) {
|
|
|
191
205
|
atomicWriteFileSync(NPM_SEQ_FILE, JSON.stringify({ lastSeq: seq, updatedAt: new Date().toISOString() }, null, 2));
|
|
192
206
|
}
|
|
193
207
|
|
|
208
|
+
// --- PyPI serial persistence ---
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Load the last processed PyPI changelog serial from the dedicated file.
|
|
212
|
+
* Returns null if no file exists or file is invalid (triggers "now" initialization).
|
|
213
|
+
*/
|
|
214
|
+
function loadPypiSerial() {
|
|
215
|
+
try {
|
|
216
|
+
if (fs.existsSync(PYPI_SERIAL_FILE)) {
|
|
217
|
+
const data = JSON.parse(fs.readFileSync(PYPI_SERIAL_FILE, 'utf8'));
|
|
218
|
+
if (typeof data.lastSerial === 'number' && Number.isFinite(data.lastSerial)) {
|
|
219
|
+
return data.lastSerial;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
} catch (err) {
|
|
223
|
+
console.warn(`[MONITOR] Failed to load PyPI serial: ${err.message}`);
|
|
224
|
+
}
|
|
225
|
+
return null;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Persist the last processed PyPI changelog serial to a dedicated file.
|
|
230
|
+
* Atomic write (crash-safe). Also mirrored in monitor-state.json via saveState().
|
|
231
|
+
*/
|
|
232
|
+
function savePypiSerial(serial) {
|
|
233
|
+
atomicWriteFileSync(
|
|
234
|
+
PYPI_SERIAL_FILE,
|
|
235
|
+
JSON.stringify({ lastSerial: serial, updatedAt: new Date().toISOString() }, null, 2)
|
|
236
|
+
);
|
|
237
|
+
}
|
|
238
|
+
|
|
194
239
|
// --- C3: Scan Memory Management ---
|
|
195
240
|
|
|
196
241
|
/**
|
|
@@ -649,10 +694,16 @@ function loadState(stats) {
|
|
|
649
694
|
return {
|
|
650
695
|
npmLastPackage: typeof state.npmLastPackage === 'string' ? state.npmLastPackage : '',
|
|
651
696
|
pypiLastPackage: typeof state.pypiLastPackage === 'string' ? state.pypiLastPackage : '',
|
|
652
|
-
npmLastSeq: state.npmLastSeq != null ? state.npmLastSeq : loadNpmSeq()
|
|
697
|
+
npmLastSeq: state.npmLastSeq != null ? state.npmLastSeq : loadNpmSeq(),
|
|
698
|
+
pypiLastSerial: state.pypiLastSerial != null ? state.pypiLastSerial : loadPypiSerial()
|
|
653
699
|
};
|
|
654
700
|
} catch {
|
|
655
|
-
return {
|
|
701
|
+
return {
|
|
702
|
+
npmLastPackage: '',
|
|
703
|
+
pypiLastPackage: '',
|
|
704
|
+
npmLastSeq: loadNpmSeq(),
|
|
705
|
+
pypiLastSerial: loadPypiSerial()
|
|
706
|
+
};
|
|
656
707
|
}
|
|
657
708
|
}
|
|
658
709
|
|
|
@@ -1180,6 +1231,9 @@ module.exports = {
|
|
|
1180
1231
|
CHANGES_STREAM_URL,
|
|
1181
1232
|
CHANGES_LIMIT,
|
|
1182
1233
|
CHANGES_CATCHUP_MAX,
|
|
1234
|
+
PYPI_SERIAL_FILE,
|
|
1235
|
+
PYPI_XMLRPC_URL,
|
|
1236
|
+
PYPI_CATCHUP_MAX,
|
|
1183
1237
|
SCAN_MEMORY_FILE,
|
|
1184
1238
|
SCAN_MEMORY_EXPIRY_MS,
|
|
1185
1239
|
MAX_MEMORY_ENTRIES,
|
|
@@ -1211,6 +1265,8 @@ module.exports = {
|
|
|
1211
1265
|
atomicWriteFileSync,
|
|
1212
1266
|
loadNpmSeq,
|
|
1213
1267
|
saveNpmSeq,
|
|
1268
|
+
loadPypiSerial,
|
|
1269
|
+
savePypiSerial,
|
|
1214
1270
|
loadScanMemory,
|
|
1215
1271
|
saveScanMemory,
|
|
1216
1272
|
recordScanMemory,
|
|
@@ -161,6 +161,14 @@ const PLAYBOOKS = {
|
|
|
161
161
|
typosquat_detected:
|
|
162
162
|
'ATTENTION: Ce package a un nom tres similaire a un package populaire. Verifier que c\'est bien le bon package. Si erreur de frappe, corriger immediatement.',
|
|
163
163
|
|
|
164
|
+
// RT-C1: dependency boundary-squat (Axios UNC1069 March 2026)
|
|
165
|
+
dependency_typosquat:
|
|
166
|
+
'Une dependance declaree ressemble a un package populaire avec un prefixe/suffixe suspect. Verifier le nom exact dans package.json et confirmer avec npm view <package>. Si erreur de frappe, corriger immediatement.',
|
|
167
|
+
dependency_typosquat_used:
|
|
168
|
+
'Le code charge cette dep typosquattee via require/import. Si ce n\'est pas intentionnel, supprimer la dep et la reference, puis reinstaller avec --ignore-scripts.',
|
|
169
|
+
dependency_typosquat_require:
|
|
170
|
+
'CRITIQUE — pattern Axios UNC1069 detecte: dep typosquattee declaree ET chargee dans le code. Le wrapper apparent est probablement legitime mais sa dep contient le payload. Bloquer l\'install (--ignore-scripts), supprimer la dep, auditer le history de modifications.',
|
|
171
|
+
|
|
164
172
|
dangerous_call_function:
|
|
165
173
|
'Appel new Function() detecte. Equivalent a eval(). Verifier la source des donnees.',
|
|
166
174
|
|
package/src/rules/index.js
CHANGED
|
@@ -335,6 +335,38 @@ const RULES = {
|
|
|
335
335
|
mitre: 'T1195.002'
|
|
336
336
|
},
|
|
337
337
|
|
|
338
|
+
// RT-C1: Dependency boundary-squat (Axios UNC1069 March 2026)
|
|
339
|
+
dependency_typosquat: {
|
|
340
|
+
id: 'MUADDIB-TYPO-002',
|
|
341
|
+
name: 'Dependency Boundary-Squat',
|
|
342
|
+
severity: 'HIGH',
|
|
343
|
+
confidence: 'high',
|
|
344
|
+
description: 'Une dependance declaree porte le nom d\'un package populaire prefixe/suffixe d\'un token suspect (Axios UNC1069, mars 2026). Le wrapper innocent declare un sub-dep malveillant.',
|
|
345
|
+
references: [
|
|
346
|
+
'https://snyk.io/blog/typosquatting-attacks/',
|
|
347
|
+
'https://attack.mitre.org/techniques/T1195/002/'
|
|
348
|
+
],
|
|
349
|
+
mitre: 'T1195.002'
|
|
350
|
+
},
|
|
351
|
+
dependency_typosquat_used: {
|
|
352
|
+
id: 'MUADDIB-TYPO-003',
|
|
353
|
+
name: 'Boundary-Squat Dependency Used in Code',
|
|
354
|
+
severity: 'MEDIUM',
|
|
355
|
+
confidence: 'high',
|
|
356
|
+
description: 'Le code du package require/import un nom de dependance identifie comme boundary-squat. Signal fort que la dep typosquattee est intentionnellement chargee.',
|
|
357
|
+
references: ['https://attack.mitre.org/techniques/T1195/002/'],
|
|
358
|
+
mitre: 'T1195.002'
|
|
359
|
+
},
|
|
360
|
+
dependency_typosquat_require: {
|
|
361
|
+
id: 'MUADDIB-COMPOUND-013',
|
|
362
|
+
name: 'Boundary-Squat Dep Required at Runtime',
|
|
363
|
+
severity: 'CRITICAL',
|
|
364
|
+
confidence: 'high',
|
|
365
|
+
description: 'Dependance boundary-squat declaree ET chargee via require/import dans le code: pattern Axios UNC1069 (sub-dep injection avec wrapper innocent).',
|
|
366
|
+
references: ['https://attack.mitre.org/techniques/T1195/002/'],
|
|
367
|
+
mitre: 'T1195.002'
|
|
368
|
+
},
|
|
369
|
+
|
|
338
370
|
// Package.json script patterns
|
|
339
371
|
curl_pipe_sh: {
|
|
340
372
|
id: 'MUADDIB-PKG-002',
|
package/src/scanner/dataflow.js
CHANGED
|
@@ -24,12 +24,13 @@ const MODULE_SOURCE_METHODS = {
|
|
|
24
24
|
},
|
|
25
25
|
child_process: {
|
|
26
26
|
exec: 'command_output', execSync: 'command_output',
|
|
27
|
-
spawn: 'command_output', spawnSync: 'command_output'
|
|
27
|
+
spawn: 'command_output', spawnSync: 'command_output',
|
|
28
|
+
execFile: 'command_output', execFileSync: 'command_output', fork: 'command_output'
|
|
28
29
|
}
|
|
29
30
|
};
|
|
30
31
|
|
|
31
32
|
const MODULE_SINK_METHODS = {
|
|
32
|
-
child_process: { exec: 'exec_sink', execSync: 'exec_sink', spawn: 'exec_sink' },
|
|
33
|
+
child_process: { exec: 'exec_sink', execSync: 'exec_sink', spawn: 'exec_sink', execFile: 'exec_sink', execFileSync: 'exec_sink', fork: 'exec_sink' },
|
|
33
34
|
http: { request: 'network_send', get: 'network_send' },
|
|
34
35
|
https: { request: 'network_send', get: 'network_send' },
|
|
35
36
|
net: { connect: 'network_send', createConnection: 'network_send' },
|
|
@@ -49,7 +50,7 @@ const TRACKED_MODULES = new Set([
|
|
|
49
50
|
]);
|
|
50
51
|
|
|
51
52
|
// Methods that execute commands — used for exec result capture detection
|
|
52
|
-
const EXEC_METHODS = new Set(['exec', 'execSync', 'spawn', 'spawnSync']);
|
|
53
|
+
const EXEC_METHODS = new Set(['exec', 'execSync', 'spawn', 'spawnSync', 'execFile', 'execFileSync', 'fork']);
|
|
53
54
|
|
|
54
55
|
/**
|
|
55
56
|
* Pre-pass: builds a taint map from require() assignments.
|
|
@@ -60,6 +61,24 @@ function buildTaintMap(ast) {
|
|
|
60
61
|
const taintMap = new Map();
|
|
61
62
|
|
|
62
63
|
walk.simple(ast, {
|
|
64
|
+
// ESM: import fs from 'fs' / import * as fs from 'fs' / import { exec } from 'child_process'
|
|
65
|
+
// Mirrors module-graph/annotate-tainted.js so ESM and CJS produce symmetric taint maps.
|
|
66
|
+
ImportDeclaration(node) {
|
|
67
|
+
if (!node.source || typeof node.source.value !== 'string') return;
|
|
68
|
+
const modName = node.source.value;
|
|
69
|
+
if (!TRACKED_MODULES.has(modName)) return;
|
|
70
|
+
for (const spec of node.specifiers) {
|
|
71
|
+
if (!spec.local || spec.local.type !== 'Identifier') continue;
|
|
72
|
+
if (spec.type === 'ImportDefaultSpecifier' || spec.type === 'ImportNamespaceSpecifier') {
|
|
73
|
+
taintMap.set(spec.local.name, { source: modName, detail: modName });
|
|
74
|
+
} else if (spec.type === 'ImportSpecifier') {
|
|
75
|
+
const imported = spec.imported && spec.imported.type === 'Identifier'
|
|
76
|
+
? spec.imported.name
|
|
77
|
+
: (spec.imported && spec.imported.value ? spec.imported.value : spec.local.name);
|
|
78
|
+
taintMap.set(spec.local.name, { source: modName, detail: `${modName}.${imported}` });
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
},
|
|
63
82
|
VariableDeclarator(node) {
|
|
64
83
|
if (!node.init) return;
|
|
65
84
|
let init = node.init;
|
|
@@ -471,7 +490,9 @@ function analyzeFile(content, filePath, basePath) {
|
|
|
471
490
|
|
|
472
491
|
}
|
|
473
492
|
|
|
474
|
-
|
|
493
|
+
// DF-H2: extend exec_network classification to all EXEC_METHODS
|
|
494
|
+
// (execFile/execFileSync/fork were previously missed — trivial evasion vector).
|
|
495
|
+
if (EXEC_METHODS.has(callName)) {
|
|
475
496
|
const arg = node.arguments[0];
|
|
476
497
|
if (arg && arg.type === 'Literal' && typeof arg.value === 'string') {
|
|
477
498
|
if (arg.value.includes('curl') || arg.value.includes('wget')) {
|
|
@@ -480,7 +501,6 @@ function analyzeFile(content, filePath, basePath) {
|
|
|
480
501
|
name: callName,
|
|
481
502
|
line: node.loc?.start?.line
|
|
482
503
|
});
|
|
483
|
-
|
|
484
504
|
}
|
|
485
505
|
}
|
|
486
506
|
}
|
|
@@ -142,8 +142,12 @@ function getEntryPoints(packagePath) {
|
|
|
142
142
|
candidates.push(pkg.module);
|
|
143
143
|
}
|
|
144
144
|
|
|
145
|
-
// Lifecycle scripts: extract .js files from
|
|
146
|
-
const lifecycleKeys = [
|
|
145
|
+
// Lifecycle scripts: extract .js files from npm lifecycle hooks
|
|
146
|
+
const lifecycleKeys = [
|
|
147
|
+
'preinstall', 'install', 'postinstall', 'prepare',
|
|
148
|
+
'prepack', 'postpack', 'prepublishOnly', 'prepublish',
|
|
149
|
+
'preuninstall', 'uninstall', 'postuninstall'
|
|
150
|
+
];
|
|
147
151
|
if (pkg.scripts) {
|
|
148
152
|
for (const key of lifecycleKeys) {
|
|
149
153
|
if (typeof pkg.scripts[key] === 'string') {
|
|
@@ -210,6 +214,20 @@ function walkForSpawnTargets(node, fileDir, packagePath, targets) {
|
|
|
210
214
|
}
|
|
211
215
|
}
|
|
212
216
|
|
|
217
|
+
// RC-C3: new Worker('./worker.js') / new w.Worker(...) — worker_threads spawn.
|
|
218
|
+
// Stable since Node 12 (2019). Resolves only when first arg points to a real .js/.mjs/.cjs.
|
|
219
|
+
if (node.type === 'NewExpression' && node.callee && node.arguments && node.arguments.length >= 1) {
|
|
220
|
+
const ctorName = node.callee.type === 'Identifier'
|
|
221
|
+
? node.callee.name
|
|
222
|
+
: (node.callee.type === 'MemberExpression' && node.callee.property
|
|
223
|
+
? (node.callee.property.name || node.callee.property.value || '')
|
|
224
|
+
: '');
|
|
225
|
+
if (ctorName === 'Worker') {
|
|
226
|
+
const target = resolvePathArg(node.arguments[0], fileDir, packagePath);
|
|
227
|
+
if (target) targets.push(target);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
213
231
|
for (const key of Object.keys(node)) {
|
|
214
232
|
if (key === 'type') continue;
|
|
215
233
|
const child = node[key];
|
package/src/scanner/typosquat.js
CHANGED
|
@@ -24,9 +24,30 @@ const POPULAR_PACKAGES = [
|
|
|
24
24
|
'mobx', 'redux', 'zustand', 'formik', 'yup', 'ajv', 'validator',
|
|
25
25
|
'date-fns', 'dayjs', 'luxon', 'numeral', 'accounting', 'currency.js',
|
|
26
26
|
'lodash-es', 'core-js', 'regenerator-runtime', 'tslib', 'classnames',
|
|
27
|
-
'prop-types', 'cross-env', 'node-fetch', 'got'
|
|
27
|
+
'prop-types', 'cross-env', 'node-fetch', 'got',
|
|
28
|
+
// RT-C1 (Axios UNC1069 March 2026): crypto-js missing — wrapper packages declared
|
|
29
|
+
// `plain-crypto-js` as sub-dep. Added so dependency boundary-squat catches it.
|
|
30
|
+
'crypto-js'
|
|
28
31
|
];
|
|
29
32
|
|
|
33
|
+
// RT-C1: Hyphen tokens that legitimately PREFIX or SUFFIX popular package names.
|
|
34
|
+
// `<token>-<popular>` or `<popular>-<token>` is considered benign when the extra
|
|
35
|
+
// token is in this set (ecosystem qualifiers, framework prefixes, official scopes).
|
|
36
|
+
const LEGIT_BOUNDARY_TOKENS = new Set([
|
|
37
|
+
// Frameworks / build tools (also common official sub-packages)
|
|
38
|
+
'react', 'vue', 'angular', 'svelte', 'next', 'nuxt', 'gatsby', 'expo',
|
|
39
|
+
'eslint', 'babel', 'webpack', 'rollup', 'vite', 'parcel', 'esbuild',
|
|
40
|
+
'jest', 'mocha', 'vitest', 'karma', 'cypress', 'playwright',
|
|
41
|
+
'typescript', 'ts', 'tsdx', 'koa', 'fastify', 'express', 'nest',
|
|
42
|
+
'redux', 'mobx', 'apollo', 'graphql', 'rxjs',
|
|
43
|
+
// Build / runtime variants
|
|
44
|
+
'cli', 'core', 'utils', 'plugin', 'loader', 'preset', 'config',
|
|
45
|
+
'common', 'browser', 'node', 'native', 'web', 'mobile',
|
|
46
|
+
'esm', 'cjs', 'umd', 'es', 'types', 'typings',
|
|
47
|
+
// Versions / channels
|
|
48
|
+
'v2', 'v3', 'v4', 'next', 'latest', 'stable', 'lts', 'legacy', 'beta', 'alpha'
|
|
49
|
+
]);
|
|
50
|
+
|
|
30
51
|
// Packages legitimes courts ou qui ressemblent a des populaires
|
|
31
52
|
const WHITELIST = new Set([
|
|
32
53
|
// Packages tres courts legitimes
|
|
@@ -275,8 +296,6 @@ async function scanTyposquatting(targetPath) {
|
|
|
275
296
|
}
|
|
276
297
|
}
|
|
277
298
|
|
|
278
|
-
if (candidates.length === 0) return threats;
|
|
279
|
-
|
|
280
299
|
// Phase 2: API enrichment (batched to avoid socket exhaustion)
|
|
281
300
|
const BATCH_SIZE = 10;
|
|
282
301
|
const metadataResults = [];
|
|
@@ -333,9 +352,152 @@ async function scanTyposquatting(targetPath) {
|
|
|
333
352
|
});
|
|
334
353
|
}
|
|
335
354
|
|
|
355
|
+
// ============================================
|
|
356
|
+
// RT-C1: dependency boundary-squat detection (Axios UNC1069 March 2026)
|
|
357
|
+
// ============================================
|
|
358
|
+
// Runs on deps that did NOT match Levenshtein (length filter excludes them).
|
|
359
|
+
// Catches `<prefix>-<popular>` / `<popular>-<suffix>` injections in package.json
|
|
360
|
+
// deps, plus require/import usage cross-check inside the package source.
|
|
361
|
+
const levenshteinMatches = new Set(candidates.map(c => c.depName));
|
|
362
|
+
const RT_C1_MAX_DEPS = 50;
|
|
363
|
+
let depsEvaluated = 0;
|
|
364
|
+
for (const depName of Object.keys(dependencies)) {
|
|
365
|
+
if (depsEvaluated >= RT_C1_MAX_DEPS) break;
|
|
366
|
+
if (levenshteinMatches.has(depName)) continue; // already flagged by Levenshtein path
|
|
367
|
+
const bMatch = findDependencyBoundarySquat(depName);
|
|
368
|
+
if (!bMatch) continue;
|
|
369
|
+
depsEvaluated++;
|
|
370
|
+
|
|
371
|
+
const declMsg = 'Dependency "' + depName + '" looks like a boundary-squat of "'
|
|
372
|
+
+ bMatch.original + '" (extra token: "' + bMatch.extra + '"). Axios UNC1069 pattern.';
|
|
373
|
+
threats.push({
|
|
374
|
+
type: 'dependency_typosquat',
|
|
375
|
+
severity: 'HIGH',
|
|
376
|
+
message: declMsg,
|
|
377
|
+
file: 'package.json',
|
|
378
|
+
details: {
|
|
379
|
+
suspicious: depName,
|
|
380
|
+
legitimate: bMatch.original,
|
|
381
|
+
technique: bMatch.type,
|
|
382
|
+
extra: bMatch.extra,
|
|
383
|
+
distance: bMatch.distance
|
|
384
|
+
}
|
|
385
|
+
});
|
|
386
|
+
|
|
387
|
+
// Cross-check: scan package source for require/import of this dep name
|
|
388
|
+
const usages = findDependencyUsages(targetPath, depName);
|
|
389
|
+
for (const u of usages) {
|
|
390
|
+
threats.push({
|
|
391
|
+
type: 'dependency_typosquat_used',
|
|
392
|
+
severity: 'MEDIUM',
|
|
393
|
+
message: 'Boundary-squat dep "' + depName + '" is require()/import()d in source code',
|
|
394
|
+
file: u.file,
|
|
395
|
+
line: u.line,
|
|
396
|
+
details: {
|
|
397
|
+
suspicious: depName,
|
|
398
|
+
legitimate: bMatch.original
|
|
399
|
+
}
|
|
400
|
+
});
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
|
|
336
404
|
return threats;
|
|
337
405
|
}
|
|
338
406
|
|
|
407
|
+
/**
|
|
408
|
+
* RT-C1: Detects "boundary squat" patterns: <prefix>-<popular> or <popular>-<suffix>
|
|
409
|
+
* where the hyphenated tokens fully contain a popular package name and the extra
|
|
410
|
+
* material is NOT in LEGIT_BOUNDARY_TOKENS. This catches Axios UNC1069-style
|
|
411
|
+
* sub-dependency injection like `plain-crypto-js` (resembles `crypto-js`) that
|
|
412
|
+
* the Levenshtein matcher misses because length-diff is too large.
|
|
413
|
+
*/
|
|
414
|
+
function findDependencyBoundarySquat(name) {
|
|
415
|
+
const lower = name.toLowerCase();
|
|
416
|
+
if (!lower || lower.startsWith('@')) return null; // skip scoped
|
|
417
|
+
if (lower.length < MIN_PACKAGE_LENGTH) return null;
|
|
418
|
+
if (WHITELIST.has(lower)) return null;
|
|
419
|
+
if (isLegitimateVariant(lower)) return null;
|
|
420
|
+
if (!lower.includes('-')) return null; // need a boundary
|
|
421
|
+
// If it's an exact match to a popular package, not a squat
|
|
422
|
+
if (POPULAR_PACKAGES_LOWER.indexOf(lower) !== -1) return null;
|
|
423
|
+
|
|
424
|
+
for (let i = 0; i < POPULAR_PACKAGES.length; i++) {
|
|
425
|
+
const popular = POPULAR_PACKAGES_LOWER[i];
|
|
426
|
+
if (popular.length < MIN_PACKAGE_LENGTH) continue;
|
|
427
|
+
if (lower === popular) continue;
|
|
428
|
+
|
|
429
|
+
if (popular.includes('-')) {
|
|
430
|
+
// Multi-token popular (e.g. crypto-js): match prefix or suffix at hyphen boundary
|
|
431
|
+
let extra = null;
|
|
432
|
+
if (lower.endsWith('-' + popular)) {
|
|
433
|
+
extra = lower.slice(0, lower.length - popular.length - 1);
|
|
434
|
+
} else if (lower.startsWith(popular + '-')) {
|
|
435
|
+
extra = lower.slice(popular.length + 1);
|
|
436
|
+
}
|
|
437
|
+
if (extra === null || extra.length === 0) continue;
|
|
438
|
+
// Reject if extra is a legit boundary token (single token only)
|
|
439
|
+
if (!extra.includes('-') && LEGIT_BOUNDARY_TOKENS.has(extra)) continue;
|
|
440
|
+
return { original: POPULAR_PACKAGES[i], type: 'boundary_squat', distance: extra.length, extra };
|
|
441
|
+
} else {
|
|
442
|
+
// Single-token popular: must appear as a full hyphen-bounded token in name
|
|
443
|
+
const tokens = lower.split('-');
|
|
444
|
+
const idx = tokens.indexOf(popular);
|
|
445
|
+
if (idx === -1) continue;
|
|
446
|
+
if (tokens.length === 1) continue;
|
|
447
|
+
const siblings = tokens.filter((_, j) => j !== idx);
|
|
448
|
+
// If all siblings are legit boundary tokens → benign variant (e.g. react-router)
|
|
449
|
+
if (siblings.every(t => LEGIT_BOUNDARY_TOKENS.has(t) || isLegitimateVariant(t))) continue;
|
|
450
|
+
const extra = siblings.join('-');
|
|
451
|
+
return { original: POPULAR_PACKAGES[i], type: 'boundary_squat', distance: extra.length, extra };
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
return null;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
// RT-C1: Bounded scan of the package source for require/import of a given dep name.
|
|
458
|
+
// Returns array of { file, line } usage sites. Bounds: 200 files, 256KB per file.
|
|
459
|
+
const _DEP_USE_MAX_FILES = 200;
|
|
460
|
+
const _DEP_USE_MAX_FILE_BYTES = 256 * 1024;
|
|
461
|
+
function findDependencyUsages(targetPath, depName) {
|
|
462
|
+
const out = [];
|
|
463
|
+
if (!depName) return out;
|
|
464
|
+
|
|
465
|
+
let files;
|
|
466
|
+
try {
|
|
467
|
+
// Use a local require to avoid a circular import surface — utils.js is stable.
|
|
468
|
+
const { findFiles } = require('../utils.js');
|
|
469
|
+
files = findFiles(targetPath, { extensions: ['.js', '.mjs', '.cjs'], maxFiles: _DEP_USE_MAX_FILES });
|
|
470
|
+
} catch {
|
|
471
|
+
return out;
|
|
472
|
+
}
|
|
473
|
+
if (!files || files.length === 0) return out;
|
|
474
|
+
|
|
475
|
+
// Pre-build matchers — escape regex metacharacters in dep name
|
|
476
|
+
const escaped = depName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
477
|
+
const reRequire = new RegExp(`require\\s*\\(\\s*['\"]${escaped}['\"]\\s*\\)`);
|
|
478
|
+
const reFrom = new RegExp(`from\\s+['\"]${escaped}['\"]`);
|
|
479
|
+
const reDynamic = new RegExp(`import\\s*\\(\\s*['\"]${escaped}['\"]\\s*\\)`);
|
|
480
|
+
|
|
481
|
+
for (const abs of files) {
|
|
482
|
+
let stat;
|
|
483
|
+
try { stat = fs.statSync(abs); } catch { continue; }
|
|
484
|
+
if (!stat.isFile() || stat.size > _DEP_USE_MAX_FILE_BYTES) continue;
|
|
485
|
+
let content;
|
|
486
|
+
try { content = fs.readFileSync(abs, 'utf8'); } catch { continue; }
|
|
487
|
+
// Fast-path early bail
|
|
488
|
+
if (!content.includes(depName)) continue;
|
|
489
|
+
const lines = content.split(/\r?\n/);
|
|
490
|
+
for (let i = 0; i < lines.length; i++) {
|
|
491
|
+
const ln = lines[i];
|
|
492
|
+
if (reRequire.test(ln) || reFrom.test(ln) || reDynamic.test(ln)) {
|
|
493
|
+
out.push({ file: path.relative(targetPath, abs), line: i + 1 });
|
|
494
|
+
break; // one match per file is enough — keeps signal density honest
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
return out;
|
|
499
|
+
}
|
|
500
|
+
|
|
339
501
|
function findTyposquatMatch(name) {
|
|
340
502
|
const nameLower = name.toLowerCase();
|
|
341
503
|
|
package/src/scoring.js
CHANGED
|
@@ -112,6 +112,8 @@ const PACKAGE_LEVEL_TYPES = new Set([
|
|
|
112
112
|
// Compound scoring rules — package-level co-occurrences
|
|
113
113
|
'lifecycle_typosquat', 'lifecycle_inline_exec', 'lifecycle_remote_require',
|
|
114
114
|
'lifecycle_dataflow', 'lifecycle_dangerous_exec', 'obfuscated_lifecycle_env',
|
|
115
|
+
// RT-C1: dependency boundary-squat family (Axios UNC1069 March 2026)
|
|
116
|
+
'dependency_typosquat', 'dependency_typosquat_require',
|
|
115
117
|
// Blue Team v8: package-level boost signals
|
|
116
118
|
'isolated_suspicious_file', 'deep_suspicious_file',
|
|
117
119
|
// Blue Team v8b: phantom lifecycle scripts
|
|
@@ -380,7 +382,9 @@ const DIST_EXEMPT_TYPES = new Set([
|
|
|
380
382
|
'crypto_staged_payload', 'lifecycle_typosquat',
|
|
381
383
|
'lifecycle_inline_exec', 'lifecycle_remote_require',
|
|
382
384
|
'lifecycle_file_exec', // B6: lifecycle → malicious file compound
|
|
383
|
-
'lifecycle_dataflow', 'lifecycle_dangerous_exec', 'obfuscated_lifecycle_env'
|
|
385
|
+
'lifecycle_dataflow', 'lifecycle_dangerous_exec', 'obfuscated_lifecycle_env',
|
|
386
|
+
// RT-C1: Boundary-squat compound is never coincidental (dep declared AND require()d)
|
|
387
|
+
'dependency_typosquat_require'
|
|
384
388
|
// P6: remote_code_load and proxy_data_intercept removed — in bundled dist/ files,
|
|
385
389
|
// fetch + eval co-occurrence is coincidental (bundler combines HTTP client + template compilation).
|
|
386
390
|
// fetch_decrypt_exec (fetch+decrypt+eval triple) remains exempt — never coincidental.
|
|
@@ -488,6 +492,15 @@ const SCORING_COMPOUNDS = [
|
|
|
488
492
|
message: 'Lifecycle hook on typosquat package — dependency confusion attack vector (scoring compound).',
|
|
489
493
|
fileFrom: 'typosquat_detected'
|
|
490
494
|
},
|
|
495
|
+
{
|
|
496
|
+
// RT-C1: Boundary-squat dep declared AND require()d in code → CRITICAL.
|
|
497
|
+
// Pattern Axios UNC1069 (March 2026): wrapper looks benign, payload is in the dep.
|
|
498
|
+
type: 'dependency_typosquat_require',
|
|
499
|
+
requires: ['dependency_typosquat', 'dependency_typosquat_used'],
|
|
500
|
+
severity: 'CRITICAL',
|
|
501
|
+
message: 'Boundary-squat dependency declared AND require()d in code — Axios UNC1069 pattern (scoring compound).',
|
|
502
|
+
fileFrom: 'dependency_typosquat_used'
|
|
503
|
+
},
|
|
491
504
|
{
|
|
492
505
|
type: 'lifecycle_inline_exec',
|
|
493
506
|
requires: ['lifecycle_script', 'node_inline_exec'],
|
|
@@ -1186,7 +1199,7 @@ function applyFPReductions(threats, reachableFiles, packageName, packageDeps, re
|
|
|
1186
1199
|
// MUST run AFTER benign_lifecycle reduction to correctly detect LOW lifecycle_script.
|
|
1187
1200
|
const LIFECYCLE_GUARD_TYPES = new Set([
|
|
1188
1201
|
'obfuscation_detected', 'dynamic_require', 'dangerous_call_function',
|
|
1189
|
-
'dangerous_call_eval', 'staged_payload'
|
|
1202
|
+
'dangerous_call_eval', 'staged_payload', 'env_access'
|
|
1190
1203
|
]);
|
|
1191
1204
|
|
|
1192
1205
|
const lifecycleThreats = threats.filter(t => t.type === 'lifecycle_script');
|