pkgxray 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/audit.js CHANGED
@@ -49,6 +49,8 @@ function parseArgs(argv) {
49
49
  options.sourceScan = false;
50
50
  } else if (arg === "--no-vulnerability-check") {
51
51
  options.vulnerabilityCheck = false;
52
+ } else if (arg === "--no-github") {
53
+ options.githubMetadata = false;
52
54
  } else {
53
55
  throw new Error(`Unknown argument: ${arg}`);
54
56
  }
package/bin/mcp-server.js CHANGED
@@ -102,6 +102,11 @@ function guardToolDefinition() {
102
102
  default: true,
103
103
  description: "Set false to skip OSV vulnerability intelligence checks."
104
104
  },
105
+ githubMetadata: {
106
+ type: "boolean",
107
+ default: true,
108
+ description: "Set false to skip the GitHub provenance cross-check."
109
+ },
105
110
  outputFormat: {
106
111
  type: "string",
107
112
  enum: ["markdown", "json"],
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "pkgxray",
3
- "version": "0.4.0",
4
- "description": "Zero-dep local CLI and MCP server that scans npm packages and AI-agent extensions for supply-chain risk. OSV vuln pre-check, sandboxed quarantine, tarball-integrity verification, calibrated static heuristics.",
3
+ "version": "0.6.0",
4
+ "description": "Zero-dep local CLI and MCP server that scans npm packages for supply-chain risk. OSV vuln pre-check, sandboxed quarantine, tarball-integrity verification, calibrated static heuristics, GitHub provenance cross-check.",
5
5
  "license": "MIT",
6
6
  "author": "Jack Adams-Lovell",
7
7
  "type": "commonjs",
package/src/auditor.js CHANGED
@@ -57,19 +57,54 @@ const PERSISTENCE_REGEXES = [
57
57
  const EXEC_REGEX = /\b(?:child_process\.(?:exec|execSync|spawn|spawnSync|fork)|require\(['"]child_process['"]\)|os\.system\(|subprocess\.(?:Popen|run|call|check_output)|Runtime\.getRuntime\(\)\.exec)/;
58
58
  const DYNAMIC_EVAL_REGEX = /\b(?:eval\s*\(|new\s+Function\s*\(|vm\.runIn[A-Za-z]+Context\b)/;
59
59
 
60
- const NETWORK_REGEX = /\b(?:fetch\s*\(|axios\.[a-z]+\s*\(|got\s*\(|node-fetch|undici|https?\.request\s*\(|XMLHttpRequest|new\s+WebSocket|requests\.[a-z]+\s*\(|urllib(?:\.request)?|net\/http)/i;
60
+ const NETWORK_REGEX = /\b(?:fetch\s*\(|axios\.[a-z]+\s*\(|got\s*\(|node-fetch|undici|https?\.(?:request|get|post|put|delete)\s*\(|XMLHttpRequest|new\s+WebSocket|requests\.[a-z]+\s*\(|urllib(?:\.request)?|net\/http|httpx\.[a-z]+\s*\()/i;
61
61
  const SHELL_NETWORK_REGEX = /(?:^|[\s;&|`$(])(?:curl|wget|Invoke-WebRequest)\s/m;
62
62
 
63
- const URL_SHORTENER_PATTERNS = [
63
+ // Domains that are almost never legitimate destinations from production code.
64
+ // Three buckets: URL shorteners (data hiding), paste/webhook services
65
+ // (drop sites), and OAST/tunneling services (Burp Collaborator-style
66
+ // out-of-band callbacks used in dependency-confusion PoCs and credential
67
+ // staging). A real library would not call any of these.
68
+ const EXFIL_AND_CALLBACK_DOMAINS = [
69
+ // URL shorteners
64
70
  "bit.ly",
65
71
  "tinyurl.com",
66
72
  "t.co/",
67
73
  "goo.gl",
74
+ "is.gd",
75
+ "ow.ly",
76
+ // Paste / drop sites
68
77
  "pastebin.com",
69
78
  "hastebin",
79
+ "transfer.sh",
80
+ // Webhooks
70
81
  "webhook.site",
71
82
  "discord.com/api/webhooks",
72
- "hooks.slack.com"
83
+ "hooks.slack.com",
84
+ "discordapp.com/api/webhooks",
85
+ // OAST / collaborator services (Burp, Caido, ProjectDiscovery)
86
+ "oast.live",
87
+ "oast.fun",
88
+ "oast.online",
89
+ "oast.pro",
90
+ "oast.me",
91
+ "oast.site",
92
+ "oastify.com",
93
+ "interact.sh",
94
+ "burpcollaborator.net",
95
+ // Pipe / request inspector services
96
+ "requestbin.com",
97
+ "requestbin.net",
98
+ "pipedream.net",
99
+ "pipedream.com",
100
+ "rce.ee",
101
+ // Tunneling / reverse proxies
102
+ "ngrok-free.app",
103
+ "ngrok.io",
104
+ "serveo.net",
105
+ "lhr.life",
106
+ "loca.lt",
107
+ "trycloudflare.com"
73
108
  ];
74
109
 
75
110
  // Directive phrases targeting an LLM / auditor. Kept narrow on purpose — generic
@@ -204,7 +239,12 @@ const BAND_DEFINITIONS = [
204
239
  { band: "bulk-env", label: "bulk-env-access", categories: ["environment-access"], rationale: "Reads the entire process environment in bulk; risky paired with network." },
205
240
  { band: "clipboard", label: "clipboard-access", categories: ["data-access"], rationale: "Reads or writes the system clipboard — can expose copied secrets." },
206
241
  { band: "incomplete-evidence", label: "incomplete-evidence", categories: ["missing-evidence", "missing-package-json", "package-metadata"], rationale: "Source or package.json was missing or unparseable — cannot rule the package safe." },
207
- { band: "missing-metadata", label: "missing-metadata", categories: ["missing-metadata", "supply-chain-signal"], rationale: "Provenance metadata (npm registry / GitHub) absent or weak; cross-checks skipped." }
242
+ { band: "missing-metadata", label: "missing-metadata", categories: ["missing-metadata", "supply-chain-signal", "github-fetch"], rationale: "Provenance metadata (npm registry / GitHub) absent or weak; cross-checks skipped." },
243
+ { band: "github-mismatch", label: "github-mismatch", categories: ["github-mismatch"], rationale: "package.json points at a GitHub repo that doesn't exist or doesn't match — strong typosquat / impersonation signal." },
244
+ { band: "github-archived", label: "github-archived", categories: ["github-archived"], rationale: "Linked repository is archived or disabled — no maintenance, security issues will not be fixed." },
245
+ { band: "github-young", label: "github-young", categories: ["github-young"], rationale: "Linked repository was created within the last 30 days — common slopsquat shape." },
246
+ { band: "github-lonely", label: "github-lonely", categories: ["github-lonely"], rationale: "0 stars + 0 forks + low watcher count on a young repo. Low community signal." },
247
+ { band: "github-stale", label: "github-stale", categories: ["github-stale"], rationale: "Repository hasn't been pushed to in over two years and isn't formally archived." }
208
248
  ];
209
249
 
210
250
  const SEVERITY_RANK = { info: 0, low: 1, medium: 2, high: 3 };
@@ -250,10 +290,113 @@ function auditMetadata(evidence, findings) {
250
290
  }
251
291
 
252
292
  inspectMetadataObject("NPM_METADATA", evidence.npmMetadata, findings);
253
- inspectMetadataObject("GITHUB_METADATA", evidence.githubMetadata, findings);
293
+ inspectGithubMetadata(evidence, findings);
254
294
  inspectKnownVulnerabilities(evidence.knownVulnerabilities, findings);
255
295
  }
256
296
 
297
+ const YOUNG_REPO_DAYS = 30;
298
+ const STALE_REPO_DAYS = 365 * 2;
299
+
300
+ function daysAgo(iso) {
301
+ if (!iso) return null;
302
+ const ms = Date.now() - new Date(iso).getTime();
303
+ return Math.floor(ms / 86400000);
304
+ }
305
+
306
+ function inspectGithubMetadata(evidence, findings) {
307
+ const meta = evidence.githubMetadata;
308
+ if (!meta || typeof meta !== "object") {
309
+ findings.push({
310
+ severity: "info",
311
+ category: "missing-metadata",
312
+ file: "GITHUB_METADATA",
313
+ snippet: "GITHUB_METADATA was not provided.",
314
+ rationale: "Supply-chain reputation and repository consistency could not be checked."
315
+ });
316
+ return;
317
+ }
318
+
319
+ if (meta.found === false) {
320
+ const where = meta.owner && meta.repo ? `${meta.owner}/${meta.repo}` : "linked URL";
321
+ if (meta.reason === "not-found") {
322
+ findings.push({
323
+ severity: "high",
324
+ category: "github-mismatch",
325
+ file: "GITHUB_METADATA",
326
+ snippet: `Repository ${where} 404s on GitHub`,
327
+ rationale:
328
+ "package.json points at a GitHub repository that does not exist. Strong typosquat / impersonation signal."
329
+ });
330
+ } else if (meta.reason === "not-github") {
331
+ // Not a GitHub URL at all — skip silently.
332
+ } else {
333
+ findings.push({
334
+ severity: "info",
335
+ category: "github-fetch",
336
+ file: "GITHUB_METADATA",
337
+ snippet: meta.message || "Could not reach GitHub API",
338
+ rationale: "Provenance metadata could not be fetched; cross-checks skipped."
339
+ });
340
+ }
341
+ return;
342
+ }
343
+
344
+ if (meta.archived) {
345
+ findings.push({
346
+ severity: "medium",
347
+ category: "github-archived",
348
+ file: "GITHUB_METADATA",
349
+ snippet: `${meta.full_name} is archived (read-only)`,
350
+ rationale: "Archived repos receive no maintenance; security issues will not be fixed."
351
+ });
352
+ }
353
+
354
+ if (meta.disabled) {
355
+ findings.push({
356
+ severity: "medium",
357
+ category: "github-archived",
358
+ file: "GITHUB_METADATA",
359
+ snippet: `${meta.full_name} is disabled`,
360
+ rationale: "Disabled repos cannot be updated; maintainer access may be revoked."
361
+ });
362
+ }
363
+
364
+ const ageDays = daysAgo(meta.created_at);
365
+ if (ageDays !== null && ageDays < YOUNG_REPO_DAYS) {
366
+ findings.push({
367
+ severity: "medium",
368
+ category: "github-young",
369
+ file: "GITHUB_METADATA",
370
+ snippet: `${meta.full_name} created ${ageDays} days ago`,
371
+ rationale:
372
+ "Brand-new repository combined with an npm package using a popular-sounding name is a classic slopsquat / impersonation shape."
373
+ });
374
+ }
375
+
376
+ const lonelySignal = (meta.stars || 0) === 0 && (meta.forks || 0) === 0 && (meta.watchers || 0) <= 1;
377
+ if (lonelySignal && (ageDays === null || ageDays < 90)) {
378
+ findings.push({
379
+ severity: "low",
380
+ category: "github-lonely",
381
+ file: "GITHUB_METADATA",
382
+ snippet: `${meta.full_name} has 0 stars, 0 forks, ${ageDays !== null ? `${ageDays} days old` : "unknown age"}`,
383
+ rationale:
384
+ "Very low community signal. Common for new tools, but compounds the slopsquat risk on similarly-named popular packages."
385
+ });
386
+ }
387
+
388
+ const pushedDaysAgo = daysAgo(meta.pushed_at);
389
+ if (pushedDaysAgo !== null && pushedDaysAgo > STALE_REPO_DAYS && !meta.archived) {
390
+ findings.push({
391
+ severity: "info",
392
+ category: "github-stale",
393
+ file: "GITHUB_METADATA",
394
+ snippet: `${meta.full_name} last push ${pushedDaysAgo} days ago`,
395
+ rationale: "Repo has not seen a push in over two years; consider whether it's still maintained."
396
+ });
397
+ }
398
+ }
399
+
257
400
  function inspectKnownVulnerabilities(vulnerabilities, findings) {
258
401
  if (!Array.isArray(vulnerabilities) || vulnerabilities.length === 0) {
259
402
  return;
@@ -538,7 +681,7 @@ function inspectExecNetworkCombinations(file, content, lower, findings) {
538
681
  const hasDynamicEval = DYNAMIC_EVAL_REGEX.test(content);
539
682
  const hasNetwork = NETWORK_REGEX.test(content) || SHELL_NETWORK_REGEX.test(content);
540
683
  const hardcodedIp = findPublicIpInCode(content);
541
- const shortener = URL_SHORTENER_PATTERNS.find((pattern) => lower.includes(pattern));
684
+ const shortener = EXFIL_AND_CALLBACK_DOMAINS.find((pattern) => lower.includes(pattern));
542
685
  const hasBulkEnv = BULK_ENV_REGEXES.some((re) => re.test(content));
543
686
 
544
687
  // HIGH: real exfil/loader signal — execution OR network plus a hardcoded IP /
package/src/github.js ADDED
@@ -0,0 +1,166 @@
1
+ "use strict";
2
+
3
+ const fsp = require("node:fs/promises");
4
+ const os = require("node:os");
5
+ const path = require("node:path");
6
+ const https = require("node:https");
7
+
8
+ const USER_AGENT = "pkgxray/0.6.0";
9
+ const CACHE_DIR = path.join(os.homedir(), ".cache", "pkgxray", "github");
10
+ const CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
11
+ const FETCH_TIMEOUT_MS = 3000;
12
+
13
+ async function readCache(key) {
14
+ try {
15
+ const file = path.join(CACHE_DIR, `${encodeURIComponent(key)}.json`);
16
+ const stat = await fsp.stat(file);
17
+ if (Date.now() - stat.mtimeMs > CACHE_TTL_MS) return null;
18
+ return JSON.parse(await fsp.readFile(file, "utf8"));
19
+ } catch {
20
+ return null;
21
+ }
22
+ }
23
+
24
+ async function writeCache(key, value) {
25
+ try {
26
+ await fsp.mkdir(CACHE_DIR, { recursive: true, mode: 0o700 });
27
+ const file = path.join(CACHE_DIR, `${encodeURIComponent(key)}.json`);
28
+ await fsp.writeFile(file, JSON.stringify(value), { mode: 0o600 });
29
+ } catch {
30
+ // best-effort cache; never fail the audit because of a cache write
31
+ }
32
+ }
33
+
34
+ // Pull owner/repo from common repository.url shapes:
35
+ // git+https://github.com/owner/repo.git
36
+ // https://github.com/owner/repo
37
+ // git@github.com:owner/repo.git
38
+ // github:owner/repo
39
+ // git+ssh://git@github.com/owner/repo.git
40
+ function parseGithubRepo(repository) {
41
+ if (!repository) return null;
42
+ const url = typeof repository === "string" ? repository : repository.url;
43
+ if (!url || typeof url !== "string") return null;
44
+ const cleaned = url.replace(/^git\+/, "").replace(/\.git$/, "");
45
+ const patterns = [
46
+ /^github:([^/]+)\/(.+)$/,
47
+ /^(?:https?|git):\/\/github\.com\/([^/]+)\/([^/?#]+)/,
48
+ /^git@github\.com:([^/]+)\/([^/?#]+)/,
49
+ /^ssh:\/\/git@github\.com\/([^/]+)\/([^/?#]+)/
50
+ ];
51
+ for (const pattern of patterns) {
52
+ const match = cleaned.match(pattern);
53
+ if (match) {
54
+ return { owner: match[1], repo: match[2].replace(/\.git$/, "") };
55
+ }
56
+ }
57
+ return null;
58
+ }
59
+
60
+ // Use GITHUB_TOKEN if the user has set it (5000 req/hr). Otherwise fall back
61
+ // to unauthenticated calls (60 req/hr — fine for occasional use). We
62
+ // deliberately do NOT shell out to `gh auth token` — that adds ~150ms on
63
+ // cold runs and speed is a goal.
64
+ function loadToken() {
65
+ return process.env.GITHUB_TOKEN || process.env.PKGXRAY_GITHUB_TOKEN || null;
66
+ }
67
+
68
+ function githubApiGet(urlPath, token, hops = 0) {
69
+ return new Promise((resolve, reject) => {
70
+ if (hops > 3) return reject(new Error("Too many GitHub redirects"));
71
+ const headers = {
72
+ "user-agent": USER_AGENT,
73
+ accept: "application/vnd.github+json",
74
+ "x-github-api-version": "2022-11-28"
75
+ };
76
+ if (token) headers.authorization = `Bearer ${token}`;
77
+ const request = https.get(
78
+ { hostname: "api.github.com", path: urlPath, headers, timeout: FETCH_TIMEOUT_MS },
79
+ (response) => {
80
+ // Follow GitHub's 301 redirects (repo transferred / renamed)
81
+ if ([301, 302, 307, 308].includes(response.statusCode) && response.headers.location) {
82
+ response.resume();
83
+ const nextUrl = new URL(response.headers.location, `https://api.github.com${urlPath}`);
84
+ return githubApiGet(nextUrl.pathname + nextUrl.search, token, hops + 1).then(resolve, reject);
85
+ }
86
+ let body = "";
87
+ response.setEncoding("utf8");
88
+ response.on("data", (chunk) => {
89
+ body += chunk;
90
+ });
91
+ response.on("end", () => {
92
+ if (response.statusCode === 404) {
93
+ const error = new Error(`GitHub 404: ${urlPath}`);
94
+ error.statusCode = 404;
95
+ return reject(error);
96
+ }
97
+ if (response.statusCode < 200 || response.statusCode >= 300) {
98
+ return reject(new Error(`GitHub HTTP ${response.statusCode}: ${body.slice(0, 120)}`));
99
+ }
100
+ try {
101
+ resolve(JSON.parse(body));
102
+ } catch (parseError) {
103
+ reject(parseError);
104
+ }
105
+ });
106
+ }
107
+ );
108
+ request.on("error", reject);
109
+ request.on("timeout", () => {
110
+ request.destroy(new Error("GitHub request timed out"));
111
+ });
112
+ });
113
+ }
114
+
115
+ async function fetchRepoMetadata(repository, options = {}) {
116
+ const parsed = parseGithubRepo(repository);
117
+ if (!parsed) return { found: false, reason: "not-github" };
118
+
119
+ const cacheKey = `${parsed.owner}/${parsed.repo}`;
120
+ if (options.useCache !== false) {
121
+ const cached = await readCache(cacheKey);
122
+ if (cached) return { ...cached, fromCache: true };
123
+ }
124
+
125
+ const token = options.token === undefined ? loadToken() : options.token;
126
+
127
+ try {
128
+ const repo = await githubApiGet(`/repos/${parsed.owner}/${parsed.repo}`, token);
129
+ const result = {
130
+ found: true,
131
+ owner: parsed.owner,
132
+ repo: parsed.repo,
133
+ full_name: repo.full_name,
134
+ description: repo.description,
135
+ archived: Boolean(repo.archived),
136
+ disabled: Boolean(repo.disabled),
137
+ fork: Boolean(repo.fork),
138
+ stars: repo.stargazers_count || 0,
139
+ forks: repo.forks_count || 0,
140
+ open_issues: repo.open_issues_count || 0,
141
+ watchers: repo.watchers_count || 0,
142
+ created_at: repo.created_at,
143
+ updated_at: repo.updated_at,
144
+ pushed_at: repo.pushed_at,
145
+ default_branch: repo.default_branch,
146
+ html_url: repo.html_url,
147
+ license: repo.license && repo.license.spdx_id,
148
+ owner_type: repo.owner && repo.owner.type
149
+ };
150
+ await writeCache(cacheKey, result);
151
+ return result;
152
+ } catch (error) {
153
+ if (error.statusCode === 404) {
154
+ const result = { found: false, reason: "not-found", owner: parsed.owner, repo: parsed.repo };
155
+ await writeCache(cacheKey, result);
156
+ return result;
157
+ }
158
+ // Don't cache transient errors — next call should retry.
159
+ return { found: false, reason: "fetch-error", message: error.message, owner: parsed.owner, repo: parsed.repo };
160
+ }
161
+ }
162
+
163
+ module.exports = {
164
+ parseGithubRepo,
165
+ fetchRepoMetadata
166
+ };
package/src/quarantine.js CHANGED
@@ -8,6 +8,7 @@ const os = require("node:os");
8
8
  const path = require("node:path");
9
9
  const { spawn } = require("node:child_process");
10
10
  const { auditEvidence } = require("./auditor");
11
+ const { fetchRepoMetadata } = require("./github");
11
12
 
12
13
  const DEFAULT_MAX_FILE_BYTES = 256 * 1024;
13
14
  const DEFAULT_MAX_FILES = 600;
@@ -65,6 +66,15 @@ async function guardExtension(reference, options = {}) {
65
66
  const resolved = await stageReference(reference, stagedPath, options);
66
67
  timings.stageMs = elapsed(stageStart);
67
68
 
69
+ // Start the GitHub metadata fetch the moment we have npm metadata. It runs
70
+ // concurrently with vuln-check and tarball download so it only adds latency
71
+ // if it's slower than everything else combined (rare — usually <250ms).
72
+ const githubStart = now();
73
+ const githubMetadataPromise = options.githubMetadata === false
74
+ ? Promise.resolve(null)
75
+ : fetchRepoMetadata(resolved.npmMetadata && resolved.npmMetadata.repository)
76
+ .catch(() => null);
77
+
68
78
  const vulnerabilityStart = now();
69
79
  const vulnerabilities =
70
80
  options.vulnerabilityCheck === false
@@ -89,10 +99,15 @@ async function guardExtension(reference, options = {}) {
89
99
  timings.sourceCollectionMs = 0;
90
100
  }
91
101
 
102
+ // By now the GitHub fetch is either done or has been running concurrently
103
+ // with everything above; await whatever remains.
104
+ const githubMetadata = await githubMetadataPromise;
105
+ timings.githubMetadataMs = elapsed(githubStart);
106
+
92
107
  const evidence = {
93
108
  packageName: resolved.packageName || reference,
94
109
  npmMetadata: resolved.npmMetadata || null,
95
- githubMetadata: null,
110
+ githubMetadata,
96
111
  webPresence: null,
97
112
  knownVulnerabilities: vulnerabilities,
98
113
  sourceFiles
@@ -107,6 +122,7 @@ async function guardExtension(reference, options = {}) {
107
122
  reference,
108
123
  resolved,
109
124
  sourceFiles,
125
+ githubMetadata,
110
126
  vulnerabilityPrecheck: {
111
127
  enabled: options.vulnerabilityCheck !== false,
112
128
  database: "OSV",