npxconfuse 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +462 -0
- package/bin/cli.js +280 -0
- package/package.json +47 -0
- package/src/analyzer.js +167 -0
- package/src/extractors/js-bundle.js +147 -0
- package/src/extractors/package-json.js +162 -0
- package/src/formatters/csv.js +39 -0
- package/src/formatters/json.js +11 -0
- package/src/formatters/table.js +144 -0
- package/src/registries/npm.js +185 -0
- package/src/sources/github.js +142 -0
- package/src/sources/local.js +117 -0
- package/src/sources/web.js +182 -0
- package/src/utils/constants.js +181 -0
- package/src/utils/http.js +179 -0
- package/src/utils/logger.js +83 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import { FINDING_TYPE } from '../utils/constants.js';
|
|
2
|
+
import logger from '../utils/logger.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Extract package/binary name candidates from a package.json file.
|
|
6
|
+
* This is the PRIMARY extractor for npx confusion vulnerabilities.
|
|
7
|
+
*
|
|
8
|
+
* @param {string} content - Raw JSON content of the package.json
|
|
9
|
+
* @param {string} filepath - Path to the file (for reporting)
|
|
10
|
+
* @returns {Array<{name: string, type: string, source: string, context: string}>}
|
|
11
|
+
*/
|
|
12
|
+
export function extractFromPackageJson(content, filepath) {
|
|
13
|
+
const results = [];
|
|
14
|
+
|
|
15
|
+
let pkg;
|
|
16
|
+
try {
|
|
17
|
+
pkg = JSON.parse(content);
|
|
18
|
+
} catch (err) {
|
|
19
|
+
logger.debug(`Failed to parse ${filepath}: ${err.message}`);
|
|
20
|
+
return results;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const packageName = pkg.name || '';
|
|
24
|
+
const isScoped = packageName.startsWith('@');
|
|
25
|
+
const repoUrl = pkg.repository?.url || pkg.repository || '';
|
|
26
|
+
|
|
27
|
+
// ── 1. npx invocations in scripts (PRIMARY — the core npx confusion vector) ──
|
|
28
|
+
if (pkg.scripts && typeof pkg.scripts === 'object') {
|
|
29
|
+
for (const [scriptName, scriptValue] of Object.entries(pkg.scripts)) {
|
|
30
|
+
if (typeof scriptValue !== 'string') continue;
|
|
31
|
+
|
|
32
|
+
// Match npx invocations: npx [flags] <binary-name>
|
|
33
|
+
// Handles: npx foo, npx --yes foo, npx -y foo, npx --package @scope/pkg foo
|
|
34
|
+
const npxPattern = /\bnpx\s+((?:--?\w[\w-]*(?:[=\s]\S+)?\s+)*)(\S+)/g;
|
|
35
|
+
let match;
|
|
36
|
+
|
|
37
|
+
while ((match = npxPattern.exec(scriptValue)) !== null) {
|
|
38
|
+
const flagsPart = match[1].trim();
|
|
39
|
+
let binaryName = match[2];
|
|
40
|
+
|
|
41
|
+
// Skip if the binary name looks like a flag
|
|
42
|
+
if (binaryName.startsWith('-')) continue;
|
|
43
|
+
|
|
44
|
+
// If --package was specified, the actual binary is the next non-flag arg
|
|
45
|
+
// but the binary name itself is still what gets resolved by npx
|
|
46
|
+
// We want to flag the binaryName as it will be looked up on the registry
|
|
47
|
+
if (binaryName.startsWith('.') || binaryName.startsWith('/')) continue;
|
|
48
|
+
|
|
49
|
+
results.push({
|
|
50
|
+
name: binaryName,
|
|
51
|
+
type: FINDING_TYPE.NPX_CONFUSION,
|
|
52
|
+
source: filepath,
|
|
53
|
+
context: `scripts.${scriptName}: npx ${binaryName}`,
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// ── 2. bin field — binary name ↔ package name mismatch ──
|
|
60
|
+
if (pkg.bin) {
|
|
61
|
+
const binEntries = typeof pkg.bin === 'string'
|
|
62
|
+
? { [packageName.split('/').pop() || packageName]: pkg.bin }
|
|
63
|
+
: pkg.bin;
|
|
64
|
+
|
|
65
|
+
for (const binName of Object.keys(binEntries)) {
|
|
66
|
+
// If the package is scoped but the binary is unscoped, this is the
|
|
67
|
+
// core npx confusion vector: the binary name differs from the package name
|
|
68
|
+
if (isScoped && !binName.includes('/')) {
|
|
69
|
+
results.push({
|
|
70
|
+
name: binName,
|
|
71
|
+
type: FINDING_TYPE.BIN_MISMATCH,
|
|
72
|
+
source: filepath,
|
|
73
|
+
context: `bin field: "${binName}" (package: ${packageName})`,
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// ── 3. Dependencies — potential dependency confusion targets ──
|
|
80
|
+
const depFields = ['dependencies', 'devDependencies', 'peerDependencies', 'optionalDependencies'];
|
|
81
|
+
|
|
82
|
+
for (const field of depFields) {
|
|
83
|
+
if (!pkg[field] || typeof pkg[field] !== 'object') continue;
|
|
84
|
+
|
|
85
|
+
for (const depName of Object.keys(pkg[field])) {
|
|
86
|
+
const depValue = pkg[field][depName];
|
|
87
|
+
|
|
88
|
+
// Skip well-known public scopes
|
|
89
|
+
const publicScopes = new Set([
|
|
90
|
+
'@types', '@babel', '@rollup', '@eslint', '@typescript-eslint',
|
|
91
|
+
'@testing-library', '@jest', '@angular', '@vue', '@react-native',
|
|
92
|
+
'@emotion', '@chakra-ui', '@mui', '@headlessui', '@radix-ui',
|
|
93
|
+
'@tanstack', '@trpc', '@prisma', '@nestjs', '@nuxt', '@svelte',
|
|
94
|
+
'@storybook', '@aws-sdk', '@azure', '@google-cloud', '@octokit',
|
|
95
|
+
'@grpc', '@smithy', '@firebase', '@sentry', '@datadog',
|
|
96
|
+
]);
|
|
97
|
+
|
|
98
|
+
// Flag scoped packages with potentially private scopes
|
|
99
|
+
if (depName.startsWith('@')) {
|
|
100
|
+
const scope = depName.split('/')[0];
|
|
101
|
+
if (publicScopes.has(scope)) continue;
|
|
102
|
+
|
|
103
|
+
// Heuristic: the scope looks internal/private
|
|
104
|
+
const looksPrivate = isPrivateScope(scope, repoUrl);
|
|
105
|
+
|
|
106
|
+
if (looksPrivate) {
|
|
107
|
+
results.push({
|
|
108
|
+
name: depName,
|
|
109
|
+
type: FINDING_TYPE.DEPENDENCY_CONFUSION,
|
|
110
|
+
source: filepath,
|
|
111
|
+
context: `${field}: ${depName} (scope "${scope}" looks private)`,
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Flag file:, link:, or workspace: protocol deps — these are local
|
|
117
|
+
// and the unscoped name might be claimable
|
|
118
|
+
if (typeof depValue === 'string' &&
|
|
119
|
+
(depValue.startsWith('file:') || depValue.startsWith('link:') || depValue.startsWith('workspace:'))) {
|
|
120
|
+
results.push({
|
|
121
|
+
name: depName,
|
|
122
|
+
type: FINDING_TYPE.DEPENDENCY_CONFUSION,
|
|
123
|
+
source: filepath,
|
|
124
|
+
context: `${field}: ${depName} (local reference: ${depValue})`,
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return results;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Heuristic to determine if an npm scope looks private/internal.
|
|
135
|
+
*/
|
|
136
|
+
function isPrivateScope(scope, repoUrl) {
|
|
137
|
+
// Enterprise GitHub patterns
|
|
138
|
+
if (typeof repoUrl === 'string') {
|
|
139
|
+
const enterprisePatterns = [
|
|
140
|
+
/github\.[a-z]+\.(com|net|org|io)/i, // github.company.com
|
|
141
|
+
/gitlab\.[a-z]+\.(com|net|org|io)/i, // gitlab.company.com
|
|
142
|
+
/bitbucket\.[a-z]+\.(com|net|org|io)/i, // bitbucket.company.com
|
|
143
|
+
/\.internal\b/i, // *.internal
|
|
144
|
+
/\.corp\b/i, // *.corp
|
|
145
|
+
/\.local\b/i, // *.local
|
|
146
|
+
];
|
|
147
|
+
if (enterprisePatterns.some(p => p.test(repoUrl))) return true;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// Common private scope naming patterns
|
|
151
|
+
const privatePatterns = [
|
|
152
|
+
/^@[a-z]+-internal$/i,
|
|
153
|
+
/^@[a-z]+-private$/i,
|
|
154
|
+
/^@[a-z]+-corp$/i,
|
|
155
|
+
/^@internal-/i,
|
|
156
|
+
/^@private-/i,
|
|
157
|
+
];
|
|
158
|
+
|
|
159
|
+
return privatePatterns.some(p => p.test(scope));
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
export default extractFromPackageJson;
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Format results as CSV for spreadsheet analysis and reporting.
|
|
3
|
+
*
|
|
4
|
+
* @param {{findings: object[], summary: object}} results
|
|
5
|
+
* @returns {string}
|
|
6
|
+
*/
|
|
7
|
+
export function formatCsv(results) {
|
|
8
|
+
const headers = ['severity', 'name', 'type', 'registry', 'status', 'owner', 'details', 'sources'];
|
|
9
|
+
const lines = [headers.join(',')];
|
|
10
|
+
|
|
11
|
+
for (const f of results.findings) {
|
|
12
|
+
const row = [
|
|
13
|
+
escapeField(f.severity),
|
|
14
|
+
escapeField(f.name),
|
|
15
|
+
escapeField(f.type),
|
|
16
|
+
escapeField(f.registry),
|
|
17
|
+
escapeField(f.status),
|
|
18
|
+
escapeField(f.owner || ''),
|
|
19
|
+
escapeField(f.details || ''),
|
|
20
|
+
escapeField((f.sources || []).join('; ')),
|
|
21
|
+
];
|
|
22
|
+
lines.push(row.join(','));
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
return lines.join('\n');
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Escape a CSV field, quoting if it contains commas, quotes, or newlines.
|
|
30
|
+
*/
|
|
31
|
+
function escapeField(value) {
|
|
32
|
+
const str = String(value ?? '');
|
|
33
|
+
if (str.includes(',') || str.includes('"') || str.includes('\n') || str.includes('\r')) {
|
|
34
|
+
return `"${str.replace(/"/g, '""')}"`;
|
|
35
|
+
}
|
|
36
|
+
return str;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export default formatCsv;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Format results as JSON for piping and automation.
|
|
3
|
+
*
|
|
4
|
+
* @param {{findings: object[], summary: object}} results
|
|
5
|
+
* @returns {string}
|
|
6
|
+
*/
|
|
7
|
+
export function formatJson(results) {
|
|
8
|
+
return JSON.stringify(results, null, 2);
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export default formatJson;
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import chalk from "chalk";
|
|
2
|
+
import Table from "cli-table3";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Color a severity label.
|
|
6
|
+
*/
|
|
7
|
+
function colorSeverity(severity) {
|
|
8
|
+
switch (severity) {
|
|
9
|
+
case "CRITICAL":
|
|
10
|
+
return chalk.bgRed.white.bold(` ${severity} `);
|
|
11
|
+
case "HIGH":
|
|
12
|
+
return chalk.red.bold(severity);
|
|
13
|
+
case "MEDIUM":
|
|
14
|
+
return chalk.yellow(severity);
|
|
15
|
+
case "LOW":
|
|
16
|
+
return chalk.blue(severity);
|
|
17
|
+
case "INFO":
|
|
18
|
+
return chalk.gray(severity);
|
|
19
|
+
default:
|
|
20
|
+
return severity;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Color a status label.
|
|
26
|
+
*/
|
|
27
|
+
function colorStatus(status) {
|
|
28
|
+
switch (status) {
|
|
29
|
+
case "unclaimed":
|
|
30
|
+
return chalk.red.bold("⬤ UNCLAIMED");
|
|
31
|
+
case "claimed":
|
|
32
|
+
return chalk.yellow("● claimed");
|
|
33
|
+
case "private":
|
|
34
|
+
return chalk.gray("○ private");
|
|
35
|
+
case "error":
|
|
36
|
+
return chalk.gray("✖ error");
|
|
37
|
+
default:
|
|
38
|
+
return status;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Format finding type for display.
|
|
44
|
+
*/
|
|
45
|
+
function formatType(type) {
|
|
46
|
+
switch (type) {
|
|
47
|
+
case "npx-confusion":
|
|
48
|
+
return chalk.magenta("npx-confusion");
|
|
49
|
+
case "bin-mismatch":
|
|
50
|
+
return chalk.magenta("bin-mismatch");
|
|
51
|
+
case "dependency-confusion":
|
|
52
|
+
return chalk.cyan("dep-confusion");
|
|
53
|
+
case "name-clash":
|
|
54
|
+
return chalk.yellow("name-clash");
|
|
55
|
+
default:
|
|
56
|
+
return type;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Truncate a string to maxLen characters.
|
|
62
|
+
*/
|
|
63
|
+
function truncate(str, maxLen = 40) {
|
|
64
|
+
if (!str) return "";
|
|
65
|
+
if (str.length <= maxLen) return str;
|
|
66
|
+
return str.slice(0, maxLen - 1) + "…";
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Format results as a rich CLI table.
|
|
71
|
+
*
|
|
72
|
+
* @param {{findings: object[], summary: object}} results
|
|
73
|
+
* @returns {string}
|
|
74
|
+
*/
|
|
75
|
+
export function formatTable(results) {
|
|
76
|
+
const { findings, summary } = results;
|
|
77
|
+
const lines = [];
|
|
78
|
+
|
|
79
|
+
if (findings.length === 0) {
|
|
80
|
+
lines.push("");
|
|
81
|
+
lines.push(chalk.green.bold(" ✔ No vulnerabilities found!"));
|
|
82
|
+
lines.push("");
|
|
83
|
+
return lines.join("\n");
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Build table
|
|
87
|
+
const table = new Table({
|
|
88
|
+
head: [
|
|
89
|
+
chalk.bold("Severity"),
|
|
90
|
+
chalk.bold("Package"),
|
|
91
|
+
chalk.bold("Type"),
|
|
92
|
+
chalk.bold("Registry"),
|
|
93
|
+
chalk.bold("Status"),
|
|
94
|
+
chalk.bold("Details"),
|
|
95
|
+
chalk.bold("Source"),
|
|
96
|
+
],
|
|
97
|
+
colWidths: [12, 30, 18, 10, 16, 35, 40],
|
|
98
|
+
wordWrap: true,
|
|
99
|
+
style: {
|
|
100
|
+
head: [],
|
|
101
|
+
border: ["gray"],
|
|
102
|
+
},
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
for (const f of findings) {
|
|
106
|
+
const details =
|
|
107
|
+
f.status === "unclaimed"
|
|
108
|
+
? chalk.red.bold("CLAIMABLE — register this name!")
|
|
109
|
+
: f.details || f.owner || "";
|
|
110
|
+
|
|
111
|
+
table.push([
|
|
112
|
+
colorSeverity(f.severity),
|
|
113
|
+
chalk.white.bold(f.name),
|
|
114
|
+
formatType(f.type),
|
|
115
|
+
f.registry,
|
|
116
|
+
colorStatus(f.status),
|
|
117
|
+
truncate(details, 33),
|
|
118
|
+
truncate(f.sources?.join(", ") || "", 38),
|
|
119
|
+
]);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
lines.push("");
|
|
123
|
+
lines.push(table.toString());
|
|
124
|
+
lines.push("");
|
|
125
|
+
|
|
126
|
+
// Summary line
|
|
127
|
+
const parts = [];
|
|
128
|
+
if (summary.critical > 0)
|
|
129
|
+
parts.push(chalk.red.bold(`${summary.critical} critical`));
|
|
130
|
+
if (summary.high > 0) parts.push(chalk.red(`${summary.high} high`));
|
|
131
|
+
if (summary.medium > 0) parts.push(chalk.yellow(`${summary.medium} medium`));
|
|
132
|
+
if (summary.low > 0) parts.push(chalk.blue(`${summary.low} low`));
|
|
133
|
+
if (summary.info > 0) parts.push(chalk.gray(`${summary.info} info`));
|
|
134
|
+
|
|
135
|
+
lines.push(
|
|
136
|
+
` Found ${chalk.bold(summary.total)} issue(s): ${parts.join(", ")}`,
|
|
137
|
+
);
|
|
138
|
+
lines.push(` Registry checked: npm`);
|
|
139
|
+
lines.push("");
|
|
140
|
+
|
|
141
|
+
return lines.join("\n");
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
export default formatTable;
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* NPM Registry Checker
|
|
3
|
+
*
|
|
4
|
+
* Queries the public npm registry to determine whether a package name is
|
|
5
|
+
* unclaimed, claimed by someone else, or private. Used for detecting
|
|
6
|
+
* npx-confusion and dependency-confusion attack surface.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import pLimit from 'p-limit';
|
|
10
|
+
import { httpGet } from '../utils/http.js';
|
|
11
|
+
import logger from '../utils/logger.js';
|
|
12
|
+
import { SEVERITY, REGISTRIES, DEFAULT_CONCURRENCY } from '../utils/constants.js';
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Encode a package name for safe use in a registry URL.
|
|
16
|
+
* Scoped packages (e.g. @scope/pkg) need special encoding because
|
|
17
|
+
* the npm registry expects `@` → `%40` and `/` → `%2f` in the URL path.
|
|
18
|
+
*
|
|
19
|
+
* @param {string} name — raw package name
|
|
20
|
+
* @returns {string} — URL-safe encoded name
|
|
21
|
+
*/
|
|
22
|
+
function encodePackageName(name) {
|
|
23
|
+
if (name.startsWith('@')) {
|
|
24
|
+
// Scoped package: @scope/name → %40scope%2fname
|
|
25
|
+
return name.replace('@', '%40').replace('/', '%2f');
|
|
26
|
+
}
|
|
27
|
+
return encodeURIComponent(name);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Fetch the weekly download count for a package from the npm downloads API.
|
|
32
|
+
* Returns 0 on any failure so callers always get a numeric value.
|
|
33
|
+
*
|
|
34
|
+
* @param {string} name — package name (unencoded)
|
|
35
|
+
* @returns {Promise<number>}
|
|
36
|
+
*/
|
|
37
|
+
async function fetchWeeklyDownloads(name) {
|
|
38
|
+
try {
|
|
39
|
+
const url = `https://api.npmjs.org/downloads/point/last-week/${encodePackageName(name)}`;
|
|
40
|
+
const { status, data } = await httpGet(url, { retries: 1, timeout: 8000 });
|
|
41
|
+
|
|
42
|
+
if (status === 200 && data && typeof data.downloads === 'number') {
|
|
43
|
+
return data.downloads;
|
|
44
|
+
}
|
|
45
|
+
return 0;
|
|
46
|
+
} catch (err) {
|
|
47
|
+
logger.debug(`Failed to fetch weekly downloads for "${name}": ${err.message}`);
|
|
48
|
+
return 0;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Check a single package name against the npm registry.
|
|
54
|
+
*
|
|
55
|
+
* @param {string} packageName — the npm package name to check
|
|
56
|
+
* @returns {Promise<object>} — result object describing the package status
|
|
57
|
+
*/
|
|
58
|
+
export async function checkNpm(packageName) {
|
|
59
|
+
const name = packageName.trim();
|
|
60
|
+
const registryUrl = `${REGISTRIES.npm.registry}/${encodePackageName(name)}`;
|
|
61
|
+
|
|
62
|
+
logger.debug(`Checking npm registry for "${name}"`);
|
|
63
|
+
|
|
64
|
+
let status, data;
|
|
65
|
+
try {
|
|
66
|
+
const response = await httpGet(registryUrl);
|
|
67
|
+
status = response.status;
|
|
68
|
+
data = response.data;
|
|
69
|
+
} catch (err) {
|
|
70
|
+
logger.debug(`npm registry request failed for "${name}": ${err.message}`);
|
|
71
|
+
return {
|
|
72
|
+
name,
|
|
73
|
+
registry: 'npm',
|
|
74
|
+
status: 'error',
|
|
75
|
+
severity: SEVERITY.INFO,
|
|
76
|
+
details: `Could not reach registry: ${err.message}`,
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// ── 404 — Package does not exist on npm ──────────────────────────────
|
|
81
|
+
if (status === 404) {
|
|
82
|
+
return {
|
|
83
|
+
name,
|
|
84
|
+
registry: 'npm',
|
|
85
|
+
status: 'unclaimed',
|
|
86
|
+
severity: SEVERITY.CRITICAL,
|
|
87
|
+
details: 'Package name is not registered on npmjs.com',
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// ── 401 / 403 — Scoped package exists but is private ─────────────────
|
|
92
|
+
if (status === 401 || status === 403) {
|
|
93
|
+
return {
|
|
94
|
+
name,
|
|
95
|
+
registry: 'npm',
|
|
96
|
+
status: 'private',
|
|
97
|
+
severity: SEVERITY.INFO,
|
|
98
|
+
details: 'Scoped package exists but is private',
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// ── 200 — Package exists and is publicly readable ────────────────────
|
|
103
|
+
if (status === 200 && data) {
|
|
104
|
+
// Extract owner: prefer first maintainer, fall back to author
|
|
105
|
+
const owner =
|
|
106
|
+
(Array.isArray(data.maintainers) && data.maintainers.length > 0
|
|
107
|
+
? data.maintainers[0].name
|
|
108
|
+
: null) ||
|
|
109
|
+
(data.author && typeof data.author === 'object'
|
|
110
|
+
? data.author.name
|
|
111
|
+
: data.author) ||
|
|
112
|
+
'unknown';
|
|
113
|
+
|
|
114
|
+
// Extract description
|
|
115
|
+
const description = data.description || '';
|
|
116
|
+
|
|
117
|
+
// Extract timestamps from the `time` map
|
|
118
|
+
const timeMap = data.time || {};
|
|
119
|
+
const lastPublish = timeMap.modified || null;
|
|
120
|
+
const createdAt = timeMap.created || null;
|
|
121
|
+
|
|
122
|
+
// Fetch weekly downloads in parallel-safe manner
|
|
123
|
+
const weeklyDownloads = await fetchWeeklyDownloads(name);
|
|
124
|
+
|
|
125
|
+
return {
|
|
126
|
+
name,
|
|
127
|
+
registry: 'npm',
|
|
128
|
+
status: 'claimed',
|
|
129
|
+
owner,
|
|
130
|
+
description,
|
|
131
|
+
lastPublish,
|
|
132
|
+
createdAt,
|
|
133
|
+
weeklyDownloads,
|
|
134
|
+
severity: SEVERITY.MEDIUM,
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// ── Catch-all — unexpected status or unreachable ─────────────────────
|
|
139
|
+
return {
|
|
140
|
+
name,
|
|
141
|
+
registry: 'npm',
|
|
142
|
+
status: 'error',
|
|
143
|
+
severity: SEVERITY.INFO,
|
|
144
|
+
details: `Could not reach registry (HTTP ${status || 0})`,
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Check multiple npm package names in parallel with bounded concurrency.
|
|
150
|
+
*
|
|
151
|
+
* @param {string[]} names — array of package names to check
|
|
152
|
+
* @param {number} [concurrency=20] — maximum concurrent requests
|
|
153
|
+
* @returns {Promise<object[]>} — array of result objects from checkNpm()
|
|
154
|
+
*/
|
|
155
|
+
export async function checkNpmBatch(names, concurrency = DEFAULT_CONCURRENCY) {
|
|
156
|
+
if (!names || names.length === 0) {
|
|
157
|
+
return [];
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const limit = pLimit(concurrency);
|
|
161
|
+
const total = names.length;
|
|
162
|
+
let completed = 0;
|
|
163
|
+
|
|
164
|
+
logger.info(`Checking ${total} package(s) against npm registry (concurrency: ${concurrency})`);
|
|
165
|
+
|
|
166
|
+
const tasks = names.map(name =>
|
|
167
|
+
limit(async () => {
|
|
168
|
+
const result = await checkNpm(name);
|
|
169
|
+
completed++;
|
|
170
|
+
|
|
171
|
+
// Log progress at regular intervals to avoid flooding the console
|
|
172
|
+
if (completed % 50 === 0 || completed === total) {
|
|
173
|
+
logger.debug(`npm progress: ${completed}/${total}`);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
return result;
|
|
177
|
+
}),
|
|
178
|
+
);
|
|
179
|
+
|
|
180
|
+
const results = await Promise.all(tasks);
|
|
181
|
+
|
|
182
|
+
logger.success(`Completed npm registry checks: ${total} package(s)`);
|
|
183
|
+
|
|
184
|
+
return results;
|
|
185
|
+
}
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
import { Octokit } from "@octokit/rest";
|
|
2
|
+
import pLimit from "p-limit";
|
|
3
|
+
import logger from "../utils/logger.js";
|
|
4
|
+
import { GITHUB_DEFAULTS } from "../utils/constants.js";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Scan a GitHub organization for package.json manifests.
|
|
8
|
+
*
|
|
9
|
+
* @param {string} org - GitHub organization name
|
|
10
|
+
* @param {object} options
|
|
11
|
+
* @param {number} options.maxRepos - Max repos to scan (default 1000)
|
|
12
|
+
* @param {number} options.concurrency - Parallel repo scans (default 5)
|
|
13
|
+
* @param {string} options.githubEnterprise - Base URL for GHE
|
|
14
|
+
* @returns {Promise<Array<{filepath: string, content: string, type: string}>>}
|
|
15
|
+
*/
|
|
16
|
+
export async function scanGitHub(org, options = {}) {
|
|
17
|
+
const token = process.env.GITHUB_TOKEN;
|
|
18
|
+
if (!token) {
|
|
19
|
+
throw new Error(
|
|
20
|
+
"GITHUB_TOKEN environment variable is required for GitHub scanning.\n" +
|
|
21
|
+
'Create one at https://github.com/settings/tokens with "repo" or "public_repo" scope.\n' +
|
|
22
|
+
"Usage: GITHUB_TOKEN=ghp_xxx npxconfuse github <org>",
|
|
23
|
+
);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const octokitOpts = { auth: token };
|
|
27
|
+
if (options.githubEnterprise) {
|
|
28
|
+
octokitOpts.baseUrl = `${options.githubEnterprise}/api/v3`;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const octokit = new Octokit(octokitOpts);
|
|
32
|
+
const maxRepos = options.maxRepos || GITHUB_DEFAULTS.maxRepos;
|
|
33
|
+
const concurrency = options.concurrency || 5;
|
|
34
|
+
const limit = pLimit(concurrency);
|
|
35
|
+
|
|
36
|
+
logger.info(`Scanning GitHub organization: ${org}`);
|
|
37
|
+
|
|
38
|
+
// ── 1. List all repos ──
|
|
39
|
+
const repos = [];
|
|
40
|
+
let page = 1;
|
|
41
|
+
|
|
42
|
+
while (repos.length < maxRepos) {
|
|
43
|
+
try {
|
|
44
|
+
const { data } = await octokit.repos.listForOrg({
|
|
45
|
+
org,
|
|
46
|
+
per_page: GITHUB_DEFAULTS.perPage,
|
|
47
|
+
page,
|
|
48
|
+
sort: "updated",
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
if (data.length === 0) break;
|
|
52
|
+
repos.push(...data);
|
|
53
|
+
page++;
|
|
54
|
+
|
|
55
|
+
if (data.length < GITHUB_DEFAULTS.perPage) break;
|
|
56
|
+
} catch (err) {
|
|
57
|
+
if (
|
|
58
|
+
err.status === 403 &&
|
|
59
|
+
err.response?.headers?.["x-ratelimit-remaining"] === "0"
|
|
60
|
+
) {
|
|
61
|
+
const resetTime =
|
|
62
|
+
parseInt(err.response.headers["x-ratelimit-reset"], 10) * 1000;
|
|
63
|
+
const waitMs = Math.max(resetTime - Date.now(), 1000);
|
|
64
|
+
logger.warn(
|
|
65
|
+
`GitHub rate limit hit. Waiting ${Math.ceil(waitMs / 1000)}s...`,
|
|
66
|
+
);
|
|
67
|
+
await new Promise((resolve) => setTimeout(resolve, waitMs));
|
|
68
|
+
continue;
|
|
69
|
+
}
|
|
70
|
+
throw new Error(`Failed to list repos for "${org}": ${err.message}`);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
logger.info(`Found ${repos.length} repositories in ${org}`);
|
|
75
|
+
|
|
76
|
+
// ── 2. Search each repo for relevant files ──
|
|
77
|
+
const results = [];
|
|
78
|
+
let processed = 0;
|
|
79
|
+
|
|
80
|
+
const tasks = repos.slice(0, maxRepos).map((repo) =>
|
|
81
|
+
limit(async () => {
|
|
82
|
+
try {
|
|
83
|
+
const repoFiles = await scanRepo(octokit, org, repo.name);
|
|
84
|
+
results.push(...repoFiles);
|
|
85
|
+
} catch (err) {
|
|
86
|
+
logger.debug(`Error scanning ${repo.name}: ${err.message}`);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
processed++;
|
|
90
|
+
if (processed % 10 === 0 || processed === repos.length) {
|
|
91
|
+
logger.info(
|
|
92
|
+
`Progress: ${processed}/${repos.length} repos scanned (${results.length} files found)`,
|
|
93
|
+
);
|
|
94
|
+
}
|
|
95
|
+
}),
|
|
96
|
+
);
|
|
97
|
+
|
|
98
|
+
await Promise.all(tasks);
|
|
99
|
+
logger.success(
|
|
100
|
+
`GitHub scan complete: ${results.length} files from ${repos.length} repos`,
|
|
101
|
+
);
|
|
102
|
+
|
|
103
|
+
return results;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Scan a single repo for package.json
|
|
108
|
+
*/
|
|
109
|
+
async function scanRepo(octokit, org, repoName) {
|
|
110
|
+
const results = [];
|
|
111
|
+
const filesToFind = [{ path: "package.json", type: "package-json" }];
|
|
112
|
+
|
|
113
|
+
for (const { path, type } of filesToFind) {
|
|
114
|
+
try {
|
|
115
|
+
const { data } = await octokit.repos.getContent({
|
|
116
|
+
owner: org,
|
|
117
|
+
repo: repoName,
|
|
118
|
+
path,
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
if (data.type === "file" && data.content) {
|
|
122
|
+
const content = Buffer.from(data.content, "base64").toString("utf-8");
|
|
123
|
+
results.push({
|
|
124
|
+
filepath: `${org}/${repoName}/${path}`,
|
|
125
|
+
content,
|
|
126
|
+
type,
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
} catch (err) {
|
|
130
|
+
// 404 is expected — file doesn't exist in this repo
|
|
131
|
+
if (err.status !== 404) {
|
|
132
|
+
logger.debug(
|
|
133
|
+
`Error fetching ${org}/${repoName}/${path}: ${err.message}`,
|
|
134
|
+
);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return results;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
export default scanGitHub;
|