npxconfuse 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,117 @@
1
+ import { readFile, stat } from "node:fs/promises";
2
+ import { resolve, basename, extname } from "node:path";
3
+ import { glob } from "glob";
4
+ import logger from "../utils/logger.js";
5
+
6
+ /**
7
+ * Scan a local filesystem path for scannable files.
8
+ *
9
+ * @param {string} targetPath - File or directory to scan
10
+ * @param {object} options
11
+ * @param {boolean} options.deep - Also scan JS bundles (slower)
12
+ * @returns {Promise<Array<{filepath: string, content: string, type: string}>>}
13
+ */
14
+ export async function scanLocal(targetPath, options = {}) {
15
+ const absPath = resolve(targetPath);
16
+ const results = [];
17
+
18
+ // Check if it's a file or directory
19
+ let stats;
20
+ try {
21
+ stats = await stat(absPath);
22
+ } catch (err) {
23
+ throw new Error(`Cannot access "${absPath}": ${err.message}`);
24
+ }
25
+
26
+ if (stats.isFile()) {
27
+ const file = await readSingleFile(absPath);
28
+ if (file) results.push(file);
29
+ return results;
30
+ }
31
+
32
+ if (!stats.isDirectory()) {
33
+ throw new Error(`"${absPath}" is not a file or directory`);
34
+ }
35
+
36
+ logger.info(`Scanning directory: ${absPath}`);
37
+
38
+ // Define glob patterns
39
+ const ignorePatterns = [
40
+ "**/node_modules/**",
41
+ "**/.git/**",
42
+ "**/dist/**",
43
+ "**/build/**",
44
+ "**/.next/**",
45
+ "**/coverage/**",
46
+ "**/.nyc_output/**",
47
+ ];
48
+
49
+ const patterns = ["**/package.json"];
50
+
51
+ if (options.deep) {
52
+ patterns.push("**/*.js", "**/*.mjs", "**/*.cjs");
53
+ logger.info(
54
+ "Deep scan enabled — also scanning JS bundles (this may take a while)",
55
+ );
56
+ }
57
+
58
+ // Run glob
59
+ const files = await glob(patterns, {
60
+ cwd: absPath,
61
+ ignore: ignorePatterns,
62
+ nodir: true,
63
+ absolute: true,
64
+ });
65
+
66
+ logger.info(`Found ${files.length} files to analyze`);
67
+
68
+ // Read each file
69
+ for (const filepath of files) {
70
+ try {
71
+ const content = await readFile(filepath, "utf-8");
72
+ const type = classifyFile(filepath);
73
+ if (type) {
74
+ results.push({ filepath, content, type });
75
+ }
76
+ } catch (err) {
77
+ logger.warn(`Skipping unreadable file: ${filepath} (${err.message})`);
78
+ }
79
+ }
80
+
81
+ return results;
82
+ }
83
+
84
+ /**
85
+ * Read and classify a single file.
86
+ */
87
+ async function readSingleFile(filepath) {
88
+ try {
89
+ const content = await readFile(filepath, "utf-8");
90
+ const type = classifyFile(filepath);
91
+ if (!type) {
92
+ logger.warn(`Unrecognized file type: ${filepath}`);
93
+ return null;
94
+ }
95
+ return { filepath, content, type };
96
+ } catch (err) {
97
+ logger.error(`Cannot read file: ${filepath} (${err.message})`);
98
+ return null;
99
+ }
100
+ }
101
+
102
+ /**
103
+ * Classify a file by its name/extension.
104
+ */
105
+ function classifyFile(filepath) {
106
+ const name = basename(filepath).toLowerCase();
107
+ const ext = extname(filepath).toLowerCase();
108
+
109
+ if (name === "package.json") return "package-json";
110
+ if (name === "package-lock.json") return "package-json"; // may contain useful data
111
+
112
+ if ([".js", ".mjs", ".cjs"].includes(ext)) return "js-bundle";
113
+
114
+ return null;
115
+ }
116
+
117
+ export default scanLocal;
@@ -0,0 +1,182 @@
1
+ import { readFile } from 'node:fs/promises';
2
+ import pLimit from 'p-limit';
3
+ import { httpGet, httpGetText } from '../utils/http.js';
4
+ import { WEB_PROBE_PATHS, SKIP_EXTENSIONS } from '../utils/constants.js';
5
+ import logger from '../utils/logger.js';
6
+
7
+ /**
8
+ * Scan web domains for exposed package manifests and JS bundles.
9
+ *
10
+ * @param {string} domainsFile - Path to file with one domain/URL per line
11
+ * @param {object} options
12
+ * @param {number} options.concurrency - Parallel domain scans (default 10)
13
+ * @param {number} options.timeout - HTTP timeout in ms
14
+ * @returns {Promise<Array<{filepath: string, content: string, type: string}>>}
15
+ */
16
+ export async function scanWeb(domainsFile, options = {}) {
17
+ const content = await readFile(domainsFile, 'utf-8');
18
+ const domains = content
19
+ .split('\n')
20
+ .map(line => line.trim())
21
+ .filter(line => line && !line.startsWith('#'));
22
+
23
+ if (domains.length === 0) {
24
+ throw new Error(`No domains found in ${domainsFile}`);
25
+ }
26
+
27
+ logger.info(`Loaded ${domains.length} domains from ${domainsFile}`);
28
+
29
+ const concurrency = options.concurrency || 10;
30
+ const limit = pLimit(concurrency);
31
+ const results = [];
32
+ let processed = 0;
33
+
34
+ const tasks = domains.map(domain =>
35
+ limit(async () => {
36
+ try {
37
+ const domainResults = await scanDomain(normalizeDomain(domain), options);
38
+ results.push(...domainResults);
39
+ } catch (err) {
40
+ logger.debug(`Error scanning ${domain}: ${err.message}`);
41
+ }
42
+
43
+ processed++;
44
+ if (processed % 20 === 0 || processed === domains.length) {
45
+ logger.info(`Progress: ${processed}/${domains.length} domains scanned (${results.length} files found)`);
46
+ }
47
+ })
48
+ );
49
+
50
+ await Promise.all(tasks);
51
+ logger.success(`Web scan complete: ${results.length} files from ${domains.length} domains`);
52
+
53
+ return results;
54
+ }
55
+
56
+ /**
57
+ * Normalize a domain string to a full URL.
58
+ */
59
+ function normalizeDomain(domain) {
60
+ if (domain.startsWith('http://') || domain.startsWith('https://')) {
61
+ return domain.replace(/\/+$/, ''); // strip trailing slashes
62
+ }
63
+ return `https://${domain}`;
64
+ }
65
+
66
+ /**
67
+ * Scan a single domain for exposed package files and JS bundles.
68
+ */
69
+ async function scanDomain(baseUrl, options = {}) {
70
+ const results = [];
71
+
72
+ // ── 1. Probe known paths ──
73
+ for (const probePath of WEB_PROBE_PATHS) {
74
+ const url = `${baseUrl}${probePath}`;
75
+ try {
76
+ const response = await httpGet(url, {
77
+ timeout: options.timeout,
78
+ retries: 1,
79
+ });
80
+
81
+ if (response.status === 200 && response.data) {
82
+ const content = typeof response.data === 'string'
83
+ ? response.data
84
+ : JSON.stringify(response.data);
85
+
86
+ // Validate it looks like a real package.json (not an HTML error page)
87
+ if (probePath.includes('package.json') && isLikelyJson(content)) {
88
+ results.push({
89
+ filepath: url,
90
+ content,
91
+ type: 'package-json',
92
+ });
93
+ logger.debug(`Found exposed ${probePath} at ${baseUrl}`);
94
+ }
95
+ }
96
+ } catch (err) {
97
+ // Expected for most domains
98
+ }
99
+ }
100
+
101
+ // ── 2. Fetch root page and extract JS bundle URLs ──
102
+ try {
103
+ const rootResponse = await httpGetText(`${baseUrl}/`, {
104
+ timeout: options.timeout,
105
+ retries: 1,
106
+ });
107
+
108
+ if (rootResponse.status === 200 && typeof rootResponse.data === 'string') {
109
+ const scriptUrls = extractScriptUrls(rootResponse.data, baseUrl);
110
+ const maxBundles = 10;
111
+
112
+ for (const scriptUrl of scriptUrls.slice(0, maxBundles)) {
113
+ // Skip non-JS assets
114
+ const ext = scriptUrl.split('?')[0].split('#')[0].split('.').pop()?.toLowerCase();
115
+ if (SKIP_EXTENSIONS.has(`.${ext}`)) continue;
116
+
117
+ try {
118
+ const bundleResponse = await httpGetText(scriptUrl, {
119
+ timeout: options.timeout,
120
+ retries: 0,
121
+ });
122
+
123
+ if (bundleResponse.status === 200 && typeof bundleResponse.data === 'string') {
124
+ // Only process files that look like JS (not HTML error pages)
125
+ if (bundleResponse.data.length > 100 && !bundleResponse.data.startsWith('<!')) {
126
+ results.push({
127
+ filepath: scriptUrl,
128
+ content: bundleResponse.data,
129
+ type: 'js-bundle',
130
+ });
131
+ }
132
+ }
133
+ } catch {
134
+ // Skip failed bundle fetches
135
+ }
136
+ }
137
+ }
138
+ } catch {
139
+ // Root page fetch failed — that's fine
140
+ }
141
+
142
+ return results;
143
+ }
144
+
145
+ /**
146
+ * Extract <script src="..."> URLs from HTML.
147
+ */
148
+ function extractScriptUrls(html, baseUrl) {
149
+ const urls = [];
150
+ const pattern = /<script[^>]+src=["']([^"']+)["']/gi;
151
+ let match;
152
+
153
+ while ((match = pattern.exec(html)) !== null) {
154
+ let src = match[1];
155
+
156
+ // Skip inline data URIs and blobs
157
+ if (src.startsWith('data:') || src.startsWith('blob:')) continue;
158
+
159
+ // Resolve relative URLs
160
+ if (src.startsWith('//')) {
161
+ src = `https:${src}`;
162
+ } else if (src.startsWith('/')) {
163
+ src = `${baseUrl}${src}`;
164
+ } else if (!src.startsWith('http')) {
165
+ src = `${baseUrl}/${src}`;
166
+ }
167
+
168
+ urls.push(src);
169
+ }
170
+
171
+ return urls;
172
+ }
173
+
174
+ /**
175
+ * Check if a string looks like JSON (not an HTML page).
176
+ */
177
+ function isLikelyJson(content) {
178
+ const trimmed = content.trim();
179
+ return trimmed.startsWith('{') || trimmed.startsWith('[');
180
+ }
181
+
182
+ export default scanWeb;
@@ -0,0 +1,181 @@
1
+ /**
2
+ * Constants used throughout npxconfuse
3
+ */
4
+
5
+ // Node.js built-in modules to filter out during extraction
6
+ export const NODE_BUILTINS = new Set([
7
+ "_http_agent",
8
+ "_http_client",
9
+ "_http_common",
10
+ "_http_incoming",
11
+ "_http_outgoing",
12
+ "_http_server",
13
+ "_stream_duplex",
14
+ "_stream_passthrough",
15
+ "_stream_readable",
16
+ "_stream_transform",
17
+ "_stream_wrap",
18
+ "_stream_writable",
19
+ "_tls_common",
20
+ "_tls_wrap",
21
+ "assert",
22
+ "assert/strict",
23
+ "async_hooks",
24
+ "buffer",
25
+ "child_process",
26
+ "cluster",
27
+ "console",
28
+ "constants",
29
+ "crypto",
30
+ "dgram",
31
+ "diagnostics_channel",
32
+ "dns",
33
+ "dns/promises",
34
+ "domain",
35
+ "events",
36
+ "fs",
37
+ "fs/promises",
38
+ "http",
39
+ "http2",
40
+ "https",
41
+ "inspector",
42
+ "inspector/promises",
43
+ "module",
44
+ "net",
45
+ "os",
46
+ "path",
47
+ "path/posix",
48
+ "path/win32",
49
+ "perf_hooks",
50
+ "process",
51
+ "punycode",
52
+ "querystring",
53
+ "readline",
54
+ "readline/promises",
55
+ "repl",
56
+ "stream",
57
+ "stream/consumers",
58
+ "stream/promises",
59
+ "stream/web",
60
+ "string_decoder",
61
+ "sys",
62
+ "timers",
63
+ "timers/promises",
64
+ "tls",
65
+ "trace_events",
66
+ "tty",
67
+ "url",
68
+ "util",
69
+ "util/types",
70
+ "v8",
71
+ "vm",
72
+ "wasi",
73
+ "worker_threads",
74
+ "zlib",
75
+ // Prefixed forms
76
+ "node:assert",
77
+ "node:buffer",
78
+ "node:child_process",
79
+ "node:cluster",
80
+ "node:console",
81
+ "node:constants",
82
+ "node:crypto",
83
+ "node:dgram",
84
+ "node:diagnostics_channel",
85
+ "node:dns",
86
+ "node:domain",
87
+ "node:events",
88
+ "node:fs",
89
+ "node:http",
90
+ "node:http2",
91
+ "node:https",
92
+ "node:inspector",
93
+ "node:module",
94
+ "node:net",
95
+ "node:os",
96
+ "node:path",
97
+ "node:perf_hooks",
98
+ "node:process",
99
+ "node:punycode",
100
+ "node:querystring",
101
+ "node:readline",
102
+ "node:repl",
103
+ "node:stream",
104
+ "node:string_decoder",
105
+ "node:sys",
106
+ "node:timers",
107
+ "node:tls",
108
+ "node:trace_events",
109
+ "node:tty",
110
+ "node:url",
111
+ "node:util",
112
+ "node:v8",
113
+ "node:vm",
114
+ "node:wasi",
115
+ "node:worker_threads",
116
+ "node:zlib",
117
+ ]);
118
+
119
+ // Registry endpoints
120
+ export const REGISTRIES = {
121
+ npm: {
122
+ registry: "https://registry.npmjs.org",
123
+ downloads: "https://api.npmjs.org/downloads/point/last-month",
124
+ },
125
+ };
126
+
127
+ // Default concurrency for parallel HTTP requests
128
+ export const DEFAULT_CONCURRENCY = 20;
129
+
130
+ // Default HTTP timeout in ms
131
+ export const DEFAULT_TIMEOUT = 10000;
132
+
133
+ // Severity levels
134
+ export const SEVERITY = {
135
+ CRITICAL: "CRITICAL",
136
+ HIGH: "HIGH",
137
+ MEDIUM: "MEDIUM",
138
+ LOW: "LOW",
139
+ INFO: "INFO",
140
+ };
141
+
142
+ // Finding types
143
+ export const FINDING_TYPE = {
144
+ NPX_CONFUSION: "npx-confusion",
145
+ DEPENDENCY_CONFUSION: "dependency-confusion",
146
+ BIN_MISMATCH: "bin-mismatch",
147
+ NAME_CLASH: "name-clash",
148
+ };
149
+
150
+ // Common patterns to skip in web scraping
151
+ export const SKIP_EXTENSIONS = new Set([
152
+ ".png",
153
+ ".jpg",
154
+ ".jpeg",
155
+ ".gif",
156
+ ".svg",
157
+ ".ico",
158
+ ".woff",
159
+ ".woff2",
160
+ ".ttf",
161
+ ".eot",
162
+ ".mp4",
163
+ ".webm",
164
+ ".mp3",
165
+ ".pdf",
166
+ ".zip",
167
+ ".tar",
168
+ ]);
169
+
170
+ // Paths to probe on web targets
171
+ export const WEB_PROBE_PATHS = [
172
+ "/package.json",
173
+ "/package-lock.json",
174
+ "/npm-shrinkwrap.json",
175
+ ];
176
+
177
+ // GitHub API defaults
178
+ export const GITHUB_DEFAULTS = {
179
+ perPage: 100,
180
+ maxRepos: 1000,
181
+ };
@@ -0,0 +1,179 @@
1
+ import { DEFAULT_TIMEOUT } from "./constants.js";
2
+ import logger from "./logger.js";
3
+
4
+ /**
5
+ * HTTP client wrapper with retry, backoff, and rate limiting.
6
+ * Uses Node.js built-in fetch (available in Node 18+).
7
+ */
8
+
9
+ // Simple token-bucket rate limiter
10
+ class RateLimiter {
11
+ constructor(maxTokens, refillRate) {
12
+ this.maxTokens = maxTokens;
13
+ this.tokens = maxTokens;
14
+ this.refillRate = refillRate; // tokens per second
15
+ this.lastRefill = Date.now();
16
+ }
17
+
18
+ async acquire() {
19
+ this._refill();
20
+ if (this.tokens > 0) {
21
+ this.tokens--;
22
+ return;
23
+ }
24
+ // Wait for a token
25
+ const waitMs = (1 / this.refillRate) * 1000;
26
+ await new Promise((resolve) => setTimeout(resolve, waitMs));
27
+ this._refill();
28
+ this.tokens--;
29
+ }
30
+
31
+ _refill() {
32
+ const now = Date.now();
33
+ const elapsed = (now - this.lastRefill) / 1000;
34
+ this.tokens = Math.min(
35
+ this.maxTokens,
36
+ this.tokens + elapsed * this.refillRate,
37
+ );
38
+ this.lastRefill = now;
39
+ }
40
+ }
41
+
42
+ // Per-host rate limiters
43
+ const limiters = new Map();
44
+
45
+ function getLimiter(hostname, rate = 30) {
46
+ if (!limiters.has(hostname)) {
47
+ limiters.set(hostname, new RateLimiter(rate, rate));
48
+ }
49
+ return limiters.get(hostname);
50
+ }
51
+
52
+ /**
53
+ * Fetch with retry and exponential backoff.
54
+ * @param {string} url
55
+ * @param {object} options
56
+ * @param {number} options.timeout - ms
57
+ * @param {number} options.retries - max retry count
58
+ * @param {number} options.rateLimit - requests per second per host
59
+ * @param {object} options.headers - additional headers
60
+ * @returns {Promise<{status: number, data: any, headers: Headers}>}
61
+ */
62
+ export async function httpGet(url, options = {}) {
63
+ const {
64
+ timeout = DEFAULT_TIMEOUT,
65
+ retries = 3,
66
+ rateLimit = 30,
67
+ headers = {},
68
+ } = options;
69
+
70
+ const parsedUrl = new URL(url);
71
+ const limiter = getLimiter(parsedUrl.hostname, rateLimit);
72
+
73
+ for (let attempt = 0; attempt <= retries; attempt++) {
74
+ await limiter.acquire();
75
+
76
+ const controller = new AbortController();
77
+ const timer = setTimeout(() => controller.abort(), timeout);
78
+
79
+ try {
80
+ logger.debug(`HTTP GET ${url} (attempt ${attempt + 1})`);
81
+
82
+ const response = await fetch(url, {
83
+ signal: controller.signal,
84
+ headers: {
85
+ "User-Agent": "npxconfuse/1.0 (security-scanner)",
86
+ Accept: "application/json",
87
+ ...headers,
88
+ },
89
+ });
90
+
91
+ clearTimeout(timer);
92
+
93
+ // Don't retry 4xx (except 429)
94
+ if (
95
+ response.status >= 400 &&
96
+ response.status < 500 &&
97
+ response.status !== 429
98
+ ) {
99
+ return {
100
+ status: response.status,
101
+ data: null,
102
+ headers: response.headers,
103
+ };
104
+ }
105
+
106
+ // Retry on 429 or 5xx
107
+ if (response.status === 429 || response.status >= 500) {
108
+ const retryAfter = response.headers.get("retry-after");
109
+ const waitMs = retryAfter
110
+ ? parseInt(retryAfter, 10) * 1000
111
+ : Math.min(1000 * Math.pow(2, attempt), 30000);
112
+
113
+ logger.debug(
114
+ `Rate limited or server error (${response.status}), waiting ${waitMs}ms...`,
115
+ );
116
+
117
+ if (attempt < retries) {
118
+ await new Promise((resolve) => setTimeout(resolve, waitMs));
119
+ continue;
120
+ }
121
+
122
+ return {
123
+ status: response.status,
124
+ data: null,
125
+ headers: response.headers,
126
+ };
127
+ }
128
+
129
+ // Parse JSON or text
130
+ const contentType = response.headers.get("content-type") || "";
131
+ let data;
132
+ if (contentType.includes("application/json")) {
133
+ data = await response.json();
134
+ } else {
135
+ data = await response.text();
136
+ }
137
+
138
+ return {
139
+ status: response.status,
140
+ data,
141
+ headers: response.headers,
142
+ };
143
+ } catch (err) {
144
+ clearTimeout(timer);
145
+
146
+ if (err.name === "AbortError") {
147
+ logger.debug(`Request timed out: ${url}`);
148
+ } else {
149
+ logger.debug(`Request failed: ${url} — ${err.message}`);
150
+ }
151
+
152
+ if (attempt < retries) {
153
+ const waitMs = Math.min(1000 * Math.pow(2, attempt), 15000);
154
+ await new Promise((resolve) => setTimeout(resolve, waitMs));
155
+ continue;
156
+ }
157
+
158
+ return {
159
+ status: 0,
160
+ data: null,
161
+ headers: null,
162
+ error: err.message,
163
+ };
164
+ }
165
+ }
166
+ }
167
+
168
+ /**
169
+ * Fetch with HTML/text response (for web scraping).
170
+ */
171
+ export async function httpGetText(url, options = {}) {
172
+ return httpGet(url, {
173
+ ...options,
174
+ headers: {
175
+ Accept: "text/html, application/json, text/plain, */*",
176
+ ...options.headers,
177
+ },
178
+ });
179
+ }