npm - @lateos/npm-scan - Versions diffs - 0.1.0 → 0.2.1 - Mend

@lateos/npm-scan 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/.github/workflows/ci.yml +1 -0
package/.github/workflows/scan.yml +1 -0
package/AGENTS.md +1 -0
package/CONTRIBUTING.md +1 -0
package/LICENSING.md +1 -0
package/backend/db/schema.sql +1 -0
package/backend/db.js +1 -0
package/backend/detectors/atk-001-lifecycle.js +1 -0
package/backend/detectors/atk-002-obfusc.js +1 -0
package/backend/detectors/atk-003-creds.js +1 -0
package/backend/detectors/atk-004-persist.js +1 -0
package/backend/detectors/atk-005-exfil.js +1 -0
package/backend/detectors/atk-006-depconf.js +1 -0
package/backend/detectors/atk-007-typosquat.js +1 -0
package/backend/detectors/index.js +1 -0
package/backend/detectors.test.js +1 -0
package/backend/fetch.js +1 -0
package/backend/license.js +1 -0
package/backend/sbom.js +1 -0
package/cli/cli.js +1 -0
package/docker/Dockerfile.cli +1 -0
package/docker/docker-compose.yml +1 -0
package/docs/attack-taxonomy.md +1 -0
package/docs/project-plan.md +372 -0
package/package.json +17 -4
package/tests/corpus/malicious/shai-hulud.tgz +0 -0
package/tests/corpus/run.js +27 -0
package/cli.js +0 -4
/package/{index.js → backend/index.js} +0 -0

package/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1 @@

+ name: CI\n\non:\n push:\n branches: [ main ]\n pull_request:\n branches: [ main ]\n\njobs:\n test:\n runs-on: ubuntu-latest\n steps:\n - uses: actions/checkout@v4\n - uses: actions/setup-node@v4\n with:\n node-version: '20'\n cache: 'npm'\n - run: npm ci\n - run: npm run lint\n - run: npm run test\n - run: npm run build\n # Self-scan stub\n - run: echo 'Self-scan: npm run scan package.json' # Phase 1+

package/.github/workflows/scan.yml ADDED Viewed

	@@ -0,0 +1 @@
1	+ name: npm-scan\n\non: [pull_request]\n\njobs:\n scan:\n runs-on: ubuntu-latest\n steps:\n - uses: actions/checkout@v4\n - uses: actions/setup-node@v4\n with:\n node-version: 20\n - run: npm ci\n - run: npx @lateos/npm-scan@latest scan-lockfile\n # Or: npm-scan scan-lockfile (if global)\n # Fail PR on high/critical

package/AGENTS.md ADDED Viewed

@@ -0,0 +1 @@

+ # AGENTS.md\n\n## Project\nESM Node.js CLI monorepo for npm-scan supply chain scanner.\n\n## Verification\nNo lint/test deps yet. Scripts stubbed in package.json.\nRun `npm run lint test build`.\n\n## Architecture\n- `cli/`: Commander.js entrypoints (Phase 1)\n- `backend/`: Core logic, license.js, db/schema.sql\n- `docker/`: Multi-arch images (cli, pipeline)\n- `docs/`: project-plan.md, attack-taxonomy.md\n\nFollow project-plan.md phases/ATK.\n\n## Conventions\n- No deps—verify package.json before libs.\n- License: Apache-2.0 + Commons Clause (LICENSING.md).\n- Local git (no remote).\n- Phase 0 complete: foundation stubs ready for Phase 1 detectors.

package/CONTRIBUTING.md ADDED Viewed

@@ -0,0 +1 @@

+ # CONTRIBUTING.md\n\nThank you for contributing to npm-scan!\n\n## Development Workflow\n\n1. Fork repo, create feature branch `feat/atk-xxx-description`.\n2. Run `npm run lint test`.\n3. Update CHANGELOG.md.\n4. PR with self-review.\n\n## New ATK Entry / Detector\n\nATK changes require:\n- PoC malicious package sample.\n- Detection rule/code.\n- False positive analysis (test corpus).\n- NIST 800-161 mapping.\n\nUpdate docs/attack-taxonomy.md; bump version.\n\n## Licensing\n\nSee [LICENSING.md](LICENSING.md). Core Apache-2.0; premium Commons Clause.\n\n## No-Go\n- ML/ telemetry until Phase 4.\n- Secrets/keys in code/PR.\n- Sandbox changes without threat model update.\n\n## Test Corpus\n\nAdd to `tests/corpus/clean/` and `malicious/` for CI.\n\nPRs reviewed in 48h.

package/LICENSING.md ADDED Viewed

@@ -0,0 +1 @@

+ # LICENSING.md\n\n## Model: Apache-2.0 core + Commons Clause premium\n\n### Core (Apache-2.0):\n- Static analysis engine, ATK-001–007 detectors, CLI, lockfile scanner, SBOM output (CycloneDX), GitHub Action, Docker images, JSON output, SQLite-backed local storage, basic HTML report.\n\n### Premium (Apache-2.0 + Commons Clause):\n- Dynamic sandbox (ATK-008+), advanced compliance reports (PDF, regulatory templates), SIEM connectors, reachability analysis, team dashboard, SSO, audit logs, API/webhooks, on-prem/air-gapped licenses, priority support.\n\n## Commons Clause\nThe Commons Clause prohibits selling our open core software as a service. See https://commonsclause.com/ for details.\n\n## Feature Flags\nPremium features gated by license key validated at runtime. Keys issued per-seat CLI, per-org hosted.\n\nFull Apache-2.0 license in LICENSE file (TBD).\n\nLicensing boundaries defined before external contributions. Changes require PR updating this file.

package/backend/db/schema.sql ADDED Viewed

@@ -0,0 +1 @@

+ -- SQLite schema for local CLI mode (free tier)\n-- Tables: scans, findings (ATK-linked)\n\nCREATE TABLE IF NOT EXISTS scans (\n id INTEGER PRIMARY KEY AUTOINCREMENT,\n package_name TEXT NOT NULL,\n version TEXT,\n scanned_at DATETIME DEFAULT CURRENT_TIMESTAMP,\n status TEXT DEFAULT 'completed',\n sbom_json TEXT\n);\n\nCREATE TABLE IF NOT EXISTS findings (\n id INTEGER PRIMARY KEY AUTOINCREMENT,\n scan_id INTEGER NOT NULL,\n atk_id TEXT NOT NULL REFERENCES attack_taxonomy(id),\n severity TEXT CHECK (severity IN ('info', 'low', 'medium', 'high', 'critical')),\n description TEXT,\n evidence TEXT,\n mitigation TEXT,\n FOREIGN KEY (scan_id) REFERENCES scans(id) ON DELETE CASCADE\n);\n\n-- View for reports\nCREATE VIEW scan_findings AS\nSELECT s.*, f.* FROM scans s\nJOIN findings f ON s.id = f.scan_id;\n\n-- Indexes\nCREATE INDEX idx_scans_package ON scans(package_name);\nCREATE INDEX idx_findings_atk ON findings(atk_id);\nCREATE INDEX idx_findings_severity ON findings(severity);

package/backend/db.js ADDED Viewed

@@ -0,0 +1 @@

+ import Database from 'better-sqlite3';\nimport fs from 'fs';\nimport path from 'path';\n\nconst DB_PATH = 'npm-scan.db';\n\nlet db;\n\nfunction init() {\n db = new Database(DB_PATH);\n const schemaPath = path.join(process.cwd(), 'backend', 'db', 'schema.sql');\n const schema = fs.readFileSync(schemaPath, 'utf8');\n db.exec(schema);\n}\n\ninit();\n\nexport function saveScan(pkgName, version = 'latest', findings = []) {\n const scanStmt = db.prepare('INSERT INTO scans (package_name, version) VALUES (?, ?)');\n const scanId = scanStmt.run(pkgName, version).lastInsertRowid;\n\n const findStmt = db.prepare('INSERT INTO findings (scan_id, atk_id, severity, description, evidence) VALUES (?, ?, ?, ?, ?)');\n for (const f of findings) {\n findStmt.run(scanId, f.id, f.severity, f.title || f.description, f.evidence || '');\n }\n\n return scanId;\n}\n\nexport function getRecentScans(limit = 10) {\n return db.prepare('SELECT * FROM scans ORDER BY scanned_at DESC LIMIT ?').all(limit);\n}\n\nexport function getFindings(scanId) {\n return db.prepare('SELECT * FROM findings WHERE scan_id = ?').all(scanId);\n}\n\nexport function close() {\n db.close();\n}

package/backend/detectors/atk-001-lifecycle.js ADDED Viewed

@@ -0,0 +1 @@

+ export async function scan(pkgJson, files = []) {\n const findings = [];\n const scripts = pkgJson.scripts || {};\n const suspicious = Object.keys(scripts).filter(s => /pre|post|install/i.test(s));\n if (suspicious.length) {\n findings.push({\n id: 'ATK-001',\n severity: 'high',\n title: 'Malicious lifecycle scripts',\n description: 'Suspicious install hooks',\n evidence: suspicious.join(', ')\n });\n }\n return findings;\n}

package/backend/detectors/atk-002-obfusc.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export async function scan(pkgJson, files = []) {\n const findings = [];\n const code = files.map(f => f.content).join('\\n');\n if (/eval\\(\|atob\\(\|Buffer.from\\(/g.test(code)) {\n findings.push({\n id: 'ATK-002',\n severity: 'medium',\n title: 'Obfuscated payload',\n description: 'Eval/base64/hex patterns',\n evidence: 'eval/atob detected'\n });\n }\n return findings;\n}

package/backend/detectors/atk-003-creds.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export async function scan(pkgJson, files = []) {\n const findings = [];\n const code = files.map(f => f.content).join('\\n');\n if (/process.env.(NPM\|GIT\|AWS\|SSH)\|\\.npmrc/g.test(code)) {\n findings.push({\n id: 'ATK-003',\n severity: 'high',\n title: 'Credential harvesting',\n description: 'Env/ .npmrc access',\n evidence: 'NPM_TOKEN/.npmrc match'\n });\n }\n return findings;\n}

package/backend/detectors/atk-004-persist.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export async function scan(pkgJson, files = []) {\n const findings = [];\n const code = files.map(f => f.content).join('\\n');\n if (/mkdir.*(\\.vscode\|\\.claude\|\\.cursor)/g.test(code)) {\n findings.push({\n id: 'ATK-004',\n severity: 'high',\n title: 'Persistence via editor configs',\n evidence: '.vscode mkdir match'\n });\n }\n return findings;\n}

package/backend/detectors/atk-005-exfil.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export async function scan(pkgJson, files = []) {\n const findings = [];\n const code = files.map(f => f.content).join('\\n');\n if (/fetch\|curl.(github\|pastebin\|c2)\|post.data/g.test(code)) {\n findings.push({\n id: 'ATK-005',\n severity: 'critical',\n title: 'Network exfiltration',\n evidence: 'curl/fetch C2'\n });\n }\n return findings;\n}

package/backend/detectors/atk-006-depconf.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export async function scan(pkgJson) {\n const findings = [];\n const deps = { ...pkgJson.dependencies, ...pkgJson.devDependencies };\n const squat = Object.keys(deps).filter(d => /squat\|confuse\|typo/i.test(d.toLowerCase()));\n if (squat.length) {\n findings.push({\n id: 'ATK-006',\n severity: 'medium',\n title: 'Dependency confusion',\n evidence: squat.join(', ')\n });\n }\n return findings;\n}

package/backend/detectors/atk-007-typosquat.js ADDED Viewed

@@ -0,0 +1 @@

+ export async function scan(pkgJson) {\n const findings = [];\n const deps = { ...pkgJson.dependencies, ...pkgJson.devDependencies };\n // Stub edit-distance (e.g. lodash → lodashh)\n const suspects = Object.keys(deps).filter(d => d.length > 4 && Math.random() < 0.1); // stub\n if (suspects.length) {\n findings.push({\n id: 'ATK-007',\n severity: 'low',\n title: 'Typosquatting suspects',\n evidence: suspects.join(', ')\n });\n }\n return findings;\n}

package/backend/detectors/index.js ADDED Viewed

@@ -0,0 +1 @@

+ // backend/detectors/index.js\n\nimport * as atk001 from './atk-001-lifecycle.js';\nimport * as atk002 from './atk-002-obfusc.js';\nimport * as atk003 from './atk-003-creds.js';\nimport * as atk004 from './atk-004-persist.js';\nimport * as atk005 from './atk-005-exfil.js';\nimport * as atk006 from './atk-006-depconf.js';\nimport * as atk007 from './atk-007-typosquat.js';\n\nexport async function runAll(pkgJson, files = []) {\n const findings = [];\n findings.push(...await atk001.scan(pkgJson, files));\n findings.push(...await atk002.scan(pkgJson, files));\n findings.push(...await atk003.scan(pkgJson, files));\n findings.push(...await atk004.scan(pkgJson, files));\n findings.push(...await atk005.scan(pkgJson, files));\n findings.push(...await atk006.scan(pkgJson, files));\n findings.push(...await atk007.scan(pkgJson, files));\n return findings.sort((a, b) => b.severity.localeCompare(a.severity));\n}

package/backend/detectors.test.js ADDED Viewed

@@ -0,0 +1 @@

+ import { test, mock } from 'node:test';\nimport assert from 'assert/strict';\n\nimport * as detectors from '../detectors/index.js';\n\ntest('detectors runAll empty', async () => {\n const findings = await detectors.runAll({});\n assert.equal(findings.length, 0);\n});\n\ntest('ATK-001 detects preinstall', async () => {\n const pkg = { scripts: { preinstall: 'malicious' } };\n const findings = await detectors.runAll(pkg);\n assert(findings.some(f => f.atk === 'ATK-001'));\n});

package/backend/fetch.js ADDED Viewed

@@ -0,0 +1 @@

+ import fetch from 'node-fetch';\nimport AdmZip from 'adm-zip';\nimport { globSync } from 'glob';\nimport fs from 'fs';\nimport os from 'os';\nimport path from 'path';\n\nexport async function fetchPackage(target) {\n const metaRes = await fetch(`https://registry.npmjs.org/${target}/latest`);\n const meta = await metaRes.json();\n const tarUrl = meta.dist.tarball;\n const tarRes = await fetch(tarUrl);\n const buffer = Buffer.from(await tarRes.arrayBuffer());\n if (buffer.length > 500 * 1024 * 1024) throw new Error('Tarball too large');\n const tmpDir = os.tmpdir() + '/npm-scan-' + Date.now();\n fs.mkdirSync(tmpDir, { recursive: true });\n const zip = new AdmZip(buffer);\n zip.extractAllTo(tmpDir, true);\n const pkgPath = path.join(tmpDir, 'package', 'package.json');\n const pkgJsonStr = fs.readFileSync(pkgPath, 'utf8');\n const pkgJson = JSON.parse(pkgJsonStr);\n const jsFiles = globSync(path.join(tmpDir, 'package', '**/*.js')).map(p => ({\n path: p,\n content: fs.readFileSync(p, 'utf8')\n }));\n return { pkgJson, jsFiles, tmpDir };\n}\n\nexport function cleanup(tmpDir) {\n require('fs').rmSync(tmpDir, { recursive: true, force: true });\n}

package/backend/license.js ADDED Viewed

@@ -0,0 +1 @@

+ /** @module license */\nexport function validateLicense(key, feature = '*') {\n // Stub: runtime validation (env var or file)\n if (!key || !key.startsWith('npm-scan-premium-')) {\n throw new Error(`Invalid license for feature: ${feature}`);\n }\n // TODO: crypto verify signature, expiry, seats\n return true;\n}\n\nexport function isFeatureEnabled(feature, licenseKey = process.env.NPM_SCAN_LICENSE_KEY) {\n try {\n return validateLicense(licenseKey, feature);\n } catch {\n return false;\n }\n}\n\n// Usage: if (!isFeatureEnabled('dynamic-sandbox')) { console.warn('Upgrade for sandbox'); }

package/backend/sbom.js ADDED Viewed

@@ -0,0 +1 @@

+ import { CycloneDX } from 'cyclonedx-node';\n\nexport function generateSBOM(pkgJson, findings, format = 'json') {\n const sbom = new CycloneDX({specVersion: '1.5'});\n // Components\n sbom.addComponent({\n name: pkgJson.name,\n version: pkgJson.version || 'unknown',\n type: 'library',\n purl: `pkg:npm/${pkgJson.name}@${pkgJson.version}`\n });\n // Vulnerabilities from findings\n for (const f of findings) {\n sbom.addVulnerability({\n id: f.id,\n title: f.title,\n severity: f.severity.toUpperCase(),\n description: f.description,\n recommendation: f.mitigation || 'Review evidence'\n });\n }\n return format === 'xml' ? sbom.toJsonXml() : sbom.toJson();\n}

package/cli/cli.js ADDED Viewed

@@ -0,0 +1 @@

+ #!/usr/bin/env node\n\nimport { Command } from 'commander';\nimport { fileURLToPath } from 'url';\nimport { dirname, join } from 'path';\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = dirname(__filename);\n\nconst program = new Command()\n .name('npm-scan')\n .description('npm supply chain security scanner')\n .version('0.1.1');\n\nprogram\n .command('scan')\n .description('Scan package')\n .argument('<target>', 'package name')\n .option('-l, --license-key <key>', 'Premium license')\n .action(async (target, options) => {\n try {\n const { pkgJson, jsFiles, tmpDir } = await import('../backend/fetch.js').then(m => m.fetchPackage(target));\n const findings = await import('../backend/detectors/index.js').then(m => m.runAll(pkgJson, jsFiles));\n const { saveScan } = await import('../backend/db.js');\n const scanId = saveScan(target, 'latest', findings);\n console.log(JSON.stringify({scanId, findings}, null, 2));\n import('../backend/fetch.js').then(m => m.cleanup(tmpDir));\n } catch (e) {\n console.error(e.message);\n }\n });\n\nprogram\n .command('scan-lockfile')\n .description('Scan package-lock.json')\n .action(() => {\n console.log('Scanning lockfile...');\n });\n\nprogram\n .command('report')\n .description('Generate report')\n .option('-i, --id <id>', 'Scan ID')\n .option('--sbom [format]', 'CycloneDX SBOM (json/xml)', 'json')\n .action(async (options) => {\n const { getRecentScans, getFindings } = await import('../backend/db.js');\n if (options.id) {\n const findings = getFindings(options.id);\n if (options.sbom) {\n const pkg = { name: 'scanned-pkg', version: 'unknown' }; // from scan\n const { generateSBOM } = await import('../backend/sbom.js');\n const sbom = generateSBOM(pkg, findings, options.sbom);\n console.log(sbom);\n } else {\n console.log(JSON.stringify(findings, null, 2));\n }\n } else {\n const scans = getRecentScans();\n console.log('Recent scans:', JSON.stringify(scans, null, 2));\n }\n });\n\nprogram.parse();

package/docker/Dockerfile.cli ADDED Viewed

	@@ -0,0 +1 @@
1	+ FROM node:20-alpine AS cli\n\nWORKDIR /app\nCOPY package.json .\nRUN npm ci --only=production\nCOPY . .\n\nENTRYPOINT [\"node\", \"cli/cli.js\"]\n\n# Multi-arch build: docker buildx build --platform linux/amd64,linux/arm64 -t ghcr.io/lateos/npm-scan:cli .

package/docker/docker-compose.yml ADDED Viewed

@@ -0,0 +1 @@

+ version: '3.8'\nservices:\n cli:\n build:\n context: ..\n dockerfile: docker/Dockerfile.cli\n image: ghcr.io/lateos/npm-scan:cli\n\n # Full pipeline stubs (Phase 1+)\n enumerator:\n image: ghcr.io/lateos/npm-scan:enumerator\n fetcher:\n image: ghcr.io/lateos/npm-scan:fetcher\n depends_on: [redis]\n analyzer-static:\n image: ghcr.io/lateos/npm-scan:analyzer\n # ...\n\n redis:\n image: redis:alpine\n\n# Usage: docker compose up cli

package/docs/attack-taxonomy.md ADDED Viewed

@@ -0,0 +1 @@

+ # npm Attack Taxonomy (ATK)\n\nVersioned anchor for detectors, PRs, reports. Each entry: attack class, detection surface, evasion surface, NIST 800-161 mapping.\n\n## ATK Table\n\n| ID | Class | Detection Surface | Evasion Surface | NIST 800-161 | Status |\n|---------|--------------------------------------------|-------------------|--------------------------|------------------|--------|\n| ATK-001 | Malicious lifecycle scripts (pre/postinstall) | Static | Obfuscation | SR-3.1 | Phase 1 |\n| ATK-002 | Obfuscated payload (hex/base64/eval) | Static | Polyglots | SR-4.2 | Phase 1 |\n| ATK-003 | Credential harvesting (.npmrc/SSH/env) | Static+Dynamic | Conditional triggers | SR-5.3 | Phase 1 |\n| ATK-004 | Persistence (.vscode/.claude/.cursor) | Static | Hidden files | SR-6.4 | Phase 1 |\n| ATK-005 | Network exfiltration (GitHub/DNS/HTTP C2) | Static+Dynamic | Encrypted payloads | SR-7.5 | Phase 1 |\n| ATK-006 | Dependency confusion/namespace squatting | Static (lock) | Typosquatting | SR-2.2 | Phase 1 |\n| ATK-007 | Typosquatting (edit-distance top-N) | Static | Homoglyphs | SR-2.1 | Phase 1 |\n| ATK-008 | Tarball tampering (tarball ≠ repo) | Static (diff) | Mirror repos | SR-8.1 | Phase 2 |\n| ATK-009 | Conditional triggers (CI/time) | Dynamic | Env probes | SR-9.2 | Phase 2 |\n| ATK-010 | Sandbox evasion | Dynamic | Anti-analysis | SR-10.3 | Phase 2 |\n| ATK-011 | Transitive propagation (worm) | Dynamic | Peer deps | SR-11.4 | Phase 3 |\n\n## Governance\nNew ATK requires PR with: PoC sample, detection rule, FP analysis, NIST map. Published here; referenced in reports.\n\nChanges version this file.

package/docs/project-plan.md ADDED Viewed

@@ -0,0 +1,372 @@
+# Project Plan: npm-scan
+## Enhanced Open-Core npm Supply Chain Security Scanner
+### (Successor to / Evolution of Package-Inferno)
+**Date:** May 2026
+**Version:** 1.1
+**Author:** Lateos (lateos.ai)
+---
+## 1. Project Vision & Objectives
+Build a best-in-class, developer-friendly npm supply chain security tool that detects sophisticated attacks like Mini Shai-Hulud (and future variants) through behavioral, static, and hybrid analysis.
+### Core Goals
+- Detect malicious patterns: preinstall hooks, obfuscation, credential harvesting, persistence via `.claude`/`.vscode`, GitHub exfiltration, and emerging variants
+- Provide enterprise-grade compliance reporting and SIEM integrations
+- Follow open-core model: generous free tier + gated premium features
+- Distribute as both npm CLI and Docker images
+- Be easy to run locally, in CI, and at scale
+### Differentiation
+Hybrid analysis + a formal, versioned attack taxonomy + a strong compliance/SIEM story + excellent UX.
+---
+## 2. Attack Taxonomy (ATK Series) — The Moat
+Before writing detection code, publish and maintain a versioned **npm Attack Taxonomy (ATK)**. This is modeled on the IPI taxonomy pattern and serves as the anchor for all detector development, contributor PRs, and marketing claims.
+Each entry defines: attack class, detection surface (static/dynamic/both), evasion surface, and mapping to NIST 800-161 controls.
+| ID | Class | Detection Surface | Status |
+|----|-------|-------------------|--------|
+| ATK-001 | Malicious lifecycle scripts (`preinstall`, `postinstall`, `install`) | Static | Phase 1 |
+| ATK-002 | Obfuscated payload delivery (hex encoding, base64, `eval`) | Static | Phase 1 |
+| ATK-003 | Credential harvesting (env var scraping, `.npmrc`, SSH key access) | Static + Dynamic | Phase 1 |
+| ATK-004 | Persistence via editor/tool config dirs (`.vscode`, `.claude`, `.cursor`) | Static | Phase 1 |
+| ATK-005 | Network exfiltration (GitHub API, DNS tunneling, HTTP POST to C2) | Static + Dynamic | Phase 1 |
+| ATK-006 | Dependency confusion / namespace squatting | Static (lockfile) | Phase 1 |
+| ATK-007 | Typosquatting (edit-distance matching against top-N packages) | Static | Phase 1 |
+| ATK-008 | Tarball tampering (published tarball ≠ source repo) | Static (diff) | Phase 2 |
+| ATK-009 | Conditional/dormant triggers (CI env detection, time-based activation) | Dynamic | Phase 2 |
+| ATK-010 | Sandbox evasion detection (anti-analysis behaviors) | Dynamic | Phase 2 |
+| ATK-011 | Transitive supply chain propagation (worm-style lateral spread) | Dynamic | Phase 3 |
+> **Governance:** ATK entries are versioned. New entries require a PR with: proof-of-concept sample, detection rule, false positive analysis, and NIST 800-161 control mapping. The taxonomy is published at `docs/attack-taxonomy.md` and referenced in all scan reports.
+---
+## 3. Licensing — Decided Before the First PR
+Licensing boundaries must be defined in `LICENSING.md` before accepting any external contributions.
+### Model: Apache-2.0 core + Commons Clause premium
+- **Core (Apache-2.0):** Static analysis engine, ATK-001–007 detectors, CLI, lockfile scanner, SBOM output (CycloneDX), GitHub Action, Docker images, JSON output, SQLite-backed local storage, basic HTML report.
+- **Premium (Apache-2.0 + Commons Clause):** Dynamic sandbox (ATK-008+), advanced compliance reports (PDF, regulatory templates), SIEM connectors, reachability analysis, team dashboard, SSO, audit logs, API/webhooks, on-prem/air-gapped licenses, priority support.
+> **Why Commons Clause over BSL:** Commons Clause is lighter-weight, avoids the community friction HashiCorp and Elasticsearch created with BSL transitions, and the boundary ("you may not sell this software as a service") is unambiguous. BSL is a fallback only if legal counsel recommends it.
+### Feature Flags
+Premium features are gated by a license key validated at runtime. The key system skeleton ships in Phase 0. Keys are issued per-seat for CLI, per-org for hosted.
+---
+## 4. Core Requirements
+### Free / Open-Source Tier
+- Static analysis (ATK-001–007): obfuscation, credential patterns, lifecycle scripts, YARA
+- CLI: `scan`, `scan-lockfile`, `report` commands
+- Docker-based full pipeline (Enumerator → Fetcher → Analyzer → Dashboard)
+- SBOM output: CycloneDX (Phase 1), SPDX (Phase 2)
+- Basic HTML report (Phase 1); PDF report is premium (Phase 2)
+- GitHub Action
+- Policy-as-code engine (YAML config, free)
+- SQLite for local/CLI mode — zero external dependencies
+### Premium Features (license key or hosted SaaS)
+- Dynamic sandbox / hybrid analysis (ATK-008–011, safe hook execution with syscall monitoring)
+- Advanced compliance reports (PDF/HTML, regulatory mapping: NIST 800-161, EU CRA, SOC 2, ISO 27001, DORA)
+- Reachability analysis (parse call graphs, surface only reachable findings)
+- Full SIEM connectors (Splunk TA, Microsoft Sentinel Solution, Elastic integration, QRadar)
+- Team dashboard, SSO, audit logs, high-scale orchestration
+- Priority support and on-prem/air-gapped licenses
+- OPA/Rego policy engine (YAML for free tier)
+- PostgreSQL backend (team/hosted tier)
+> **Note on ML-assisted false-positive reduction:** Explicitly deferred until real scan telemetry exists. Will not appear in roadmap until Phase 4+ with data to justify it.
+---
+## 5. Tech Stack
+| Layer | Technology | Notes |
+|-------|------------|-------|
+| CLI | Node.js + Commander.js | Global `npm install -g npm-scan` |
+| Enumerator / Fetcher | Node.js | Extends Package-Inferno structure |
+| Analyzer (static) | Node.js + Python 3.12+ | YARA via `yara-python` |
+| Dynamic Sandbox | gVisor (runsc) | See §6.1 — not vm2/isolated-vm |
+| Local storage (free) | SQLite | Zero-setup, file-based |
+| Team/hosted storage | PostgreSQL | SaaS and enterprise tier only |
+| Dashboard | Streamlit (MVP stub) | FastAPI + React when first enterprise customer requires it |
+| SBOM | cyclonedx-node + cyclonedx-python | CycloneDX 1.5 |
+| Reports (free) | Jinja2 → HTML | PDF is premium |
+| Reports (premium) | Jinja2 → WeasyPrint → PDF | With NIST 800-161 templates |
+| Policy Engine (free) | YAML | Shipped in core |
+| Policy Engine (premium) | OPA/Rego | Full enterprise policy-as-code |
+| Containerization | Docker + Docker Compose + GHCR | Multi-arch images |
+| Observability | OpenTelemetry + structured JSON logging | Opt-in telemetry only |
+| Licensing | Feature flags via license key validation | Skeleton in Phase 0 |
+---
+## 6. Architecture
+```
+CLI Layer (npm-scan command)
+    ↓ lightweight mode (static-only, SQLite, no Docker required)
+    ↓ full mode (delegates to Docker Compose pipeline)
+Docker Compose Pipeline:
+  [enumerator] → [Redis queue] → [fetcher] → [analyzer-static]
+                                                    ↓
+                                          [analyzer-sandbox]  ← premium, gVisor
+                                                    ↓
+                                          [report-generator]
+                                                    ↓
+                                    [api-service (FastAPI)] + [streamlit-dashboard]
+Storage:
+  SQLite (local / free tier)
+  PostgreSQL (team / hosted tier)
+  S3-compatible (tarball cache, optional)
+Output Formats:
+  JSON (structured findings, machine-readable)
+  CycloneDX SBOM (Phase 1) + SPDX (Phase 2)
+  HTML report (free)
+  PDF report with regulatory mappings (premium)
+  SIEM formats: OCSF, CEF, ECS (premium, Phase 3)
+```
+### 6.1 Dynamic Sandbox Architecture — Security-First
+> The sandbox executes malicious code by design. One escape on a user's machine destroys the tool's reputation. This section is non-negotiable.
+**Selected isolation stack: gVisor (runsc)**
+- Kernel-level syscall interception without a full VM — Docker-compatible, production-hardened
+- Firecracker microVMs as an optional upgrade for highest-assurance environments
+- **Explicitly not used:** vm2 (repeated escapes), isolated-vm (Node-based, insufficient for hostile payloads)
+**Sandbox threat model (required before Phase 2 ships):**
+| Threat | Mitigation |
+|--------|------------|
+| Syscall escape | gVisor intercepts all syscalls at the gVisor kernel boundary |
+| Network exfiltration during analysis | Network namespace isolation; egress blocked except to monitored sink |
+| Filesystem escape | Read-only bind mounts; package extracted to ephemeral tmpfs |
+| Resource exhaustion (CPU/memory bomb) | cgroup limits: 1 CPU, 512MB RAM, 30s timeout |
+| Sandbox detection by malware | Randomized env vars, realistic process tree, no obvious sandbox markers |
+| Tarball extraction bomb | Size limits enforced before extraction (uncompressed cap: 500MB) |
+**Anti-sandbox-evasion (ATK-010):** The analyzer checks for behaviors that indicate the package probes its environment before acting (hostname checks, `CI` env var detection, timing attacks). Detection of evasion attempts is itself a high-severity finding.
+---
+## 7. Adversarial Posture
+npm-scan is a security tool that will be actively studied by the people it's designed to catch. The plan explicitly addresses this.
+### Evasion Resistance
+- YARA rules and behavioral signatures are versioned and updated on a defined cadence
+- ATK taxonomy includes known evasion techniques per detector (documented in `docs/attack-taxonomy.md`)
+- Obfuscation detection uses AST-level analysis, not regex — resistant to trivial reformatting
+- Conditional trigger detection (ATK-009) specifically targets code that behaves differently in sandbox vs. production
+### Supply Chain Integrity of npm-scan Itself
+> npm-scan must not be a supply chain attack vector.
+- All releases signed with `npm provenance` (Sigstore) from day one
+- npm-scan's own `package.json` is scanned by npm-scan in CI (self-attestation)
+- Lockfile committed and integrity-checked in CI
+- SBOM generated and published with every release
+- Dependency update PRs gated on passing scan results
+### Known Evasion Vectors (documented, not hidden)
+The `docs/evasion-known.md` file catalogues known evasion techniques so contributors know what to harden against. Transparency about limitations builds trust; hiding them does not.
+---
+## 8. Compliance Strategy — NIST 800-161 First
+> Attempting to map five frameworks simultaneously produces five shallow implementations. One framework done properly is worth more than five done poorly.
+### Phase 2: NIST SP 800-161r1 (Cybersecurity Supply Chain Risk Management)
+**Why NIST 800-161 first:**
+- Supply-chain-specific (directly relevant to the tool's purpose)
+- Maps to CMMC Level 2 — existing domain expertise from SecureStack/DockerShield work
+- US government and defense contractor buyers have budget and mandate
+- NIST → FedRAMP → CMMC creates a coherent enterprise sales story
+**Phase 2 deliverable:** A compliance report template that maps each npm-scan finding to the relevant NIST 800-161 practice (SR-series controls). PDF output with finding → control traceability matrix.
+### Phase 3+: Additional Frameworks (in order)
+1. EU Cyber Resilience Act (CRA) — EU enterprise buyers
+2. SOC 2 Type II (CC6.x supply chain controls) — SaaS buyers
+3. ISO 27001:2022 (A.15 supplier relationships) — global enterprise
+4. DORA — financial sector EU buyers
+Each framework addition is a versioned template, not a rewrite of the report engine.
+---
+## 9. Phased Roadmap
+### Phase 0: Foundation (Week 1)
+**Exit criteria:** npm name claimed, repo structured, licensing documented, license key skeleton wired, Docker images published.
+- [ ] Claim `npm-scan` on npm (publish stub immediately)
+- [ ] Fork/rename Package-Inferno repo → monorepo structure (`/cli`, `/backend`, `/docker`, `/docs`)
+- [ ] Write `LICENSING.md` — define Apache-2.0 + Commons Clause boundary explicitly
+- [ ] Write `docs/attack-taxonomy.md` — ATK-001–007 initial entries with NIST 800-161 mappings
+- [ ] License key feature-flag skeleton (runtime validation, graceful degradation)
+- [ ] SQLite schema for local mode (replaces PostgreSQL for free tier)
+- [ ] `CONTRIBUTING.md` referencing ATK taxonomy governance process
+- [ ] Initial Docker images published to GHCR (multi-arch)
+- [ ] Basic CI/CD pipeline (GitHub Actions): lint, test, image build
+### Phase 1: MVP — Production Ready Scanner (Weeks 2–4)
+**Exit criteria:** Static scanner runs on any package in <30s; false positive rate <2% on top-500 npm packages; GitHub Action published; SBOM output validated.
+**Priority sequence:**
+1. **GitHub Action first** — highest-leverage distribution channel
+2. **ATK-001–007 static detectors** hardened against Shai-Hulud patterns
+3. **CLI:** `npm-scan scan <package>`, `npm-scan scan-lockfile`, `npm-scan report`
+4. **SQLite-backed scan history** for local users (no external dependencies)
+5. **CycloneDX SBOM output** with findings embedded as vulnerabilities
+6. **Basic HTML report** (Jinja2 template; PDF deferred to Phase 2)
+7. **YAML policy engine** (allowlists, severity overrides, block-on-severity)
+8. **Docker Compose improvements** — one-command start, health checks
+9. **Test corpus:** 50+ clean packages + 20+ malicious samples including Shai-Hulud variants
+**Not in Phase 1:** PDF reports, PostgreSQL, sandbox, ML.
+### Phase 2: Hybrid Analysis & Compliance (Weeks 5–7)
+**Exit criteria:** Sandbox threat model documented and reviewed; NIST 800-161 report template covers SR-series controls; dynamic analysis catches ATK-008/009 patterns.
+- Dynamic sandbox service (gVisor-based) — full threat model shipped before first user
+- ATK-008–010 behavioral detectors
+- NIST 800-161r1 compliance report template (PDF via WeasyPrint)
+- SPDX SBOM output (complement to CycloneDX)
+- Reachability analysis (lockfile call graph parsing — surfaces only reachable findings)
+- Dashboard: "Compliance" tab with NIST control mapping
+- ATK taxonomy updated with sandbox-derived evasion findings
+- Self-scan of npm-scan in CI using sandbox tier
+### Phase 3: Enterprise & Integrations (Weeks 8–11)
+**Exit criteria:** At least one Splunkbase listing live; one paying customer.
+- SIEM exporters: Splunk TA, Microsoft Sentinel Solution, Elastic integration, QRadar
+- FastAPI-based REST API + webhooks
+- Team features: multi-user, RBAC, audit logs
+- Feature-flag enforcement (license key hard gating)
+- EU CRA compliance report template
+- PostgreSQL backend for hosted/team tier
+- Kubernetes / Helm chart
+- Publish to Splunkbase + Azure Marketplace (content packs)
+- ATK-011 transitive propagation detector
+### Phase 4: Polish & Scale (Ongoing)
+- VS Code extension (surfaces findings inline)
+- SOC 2 + ISO 27001 compliance templates
+- DORA template (financial sector)
+- Hosted SaaS option with usage-based billing
+- Opt-in telemetry (aggregate false positive rates feed detector improvement)
+- Marketing site + pricing page
+- ML-assisted scoring — **only after telemetry data justifies it**
+---
+## 10. Distribution & Packaging
+| Channel | Details |
+|---------|---------|
+| npm (`npm-scan`) | Global CLI + lightweight static-only mode |
+| Docker images (GHCR) | Focused images per service + all-in-one + Compose file |
+| GitHub Releases | Binaries via `pkg` for offline/air-gapped use |
+| GitHub Action | `lateos/npm-scan-action` — Phase 1 priority |
+| Splunkbase | Splunk TA for SIEM integration — Phase 3 |
+| Azure Marketplace | Sentinel Solution content pack — Phase 3 |
+---
+## 11. Success Metrics — Operational, Not Vanity
+Lagging indicators (stars, press) are tracked but not used for go/no-go decisions. Leading indicators drive phase gates:
+| Metric | Target | Phase Gate |
+|--------|--------|------------|
+| False positive rate on top-500 npm packages | < 2% | Phase 1 exit |
+| Static scan time (average package) | < 30 seconds | Phase 1 exit |
+| Dynamic scan time (average package) | < 5 minutes | Phase 2 exit |
+| ATK taxonomy entries with passing detector tests | 100% | Every phase |
+| NIST 800-161 control coverage in compliance report | SR-series complete | Phase 2 exit |
+| Paying customers | ≥ 1 | Phase 3 exit |
+| Splunkbase listing live | Yes/No | Phase 3 exit |
+| npm-scan self-scan passing in CI | Always green | Ongoing |
+---
+## 12. Risks & Mitigations
+| Risk | Likelihood | Mitigation |
+|------|-----------|------------|
+| Sandbox escape damages user environment | Low but catastrophic | gVisor isolation; full threat model before Phase 2 ships; security advisory process |
+| False positives erode trust | Medium | Strict test corpus; allowlist support; reachability analysis reduces noise |
+| Maintenance burden | High (solo) | Modular design; ATK taxonomy governance keeps contributions structured |
+| Legal / license friction | Low | Commons Clause is cleaner than BSL; `LICENSING.md` published Day 1 |
+| npm registry rate limiting | Medium | Exponential backoff + S3 tarball cache |
+| Adversarial evasion of detectors | High (over time) | ATK taxonomy documents evasion surface; AST-level analysis over regex; evasion findings = high severity |
+| npm-scan itself becomes a target | Medium | Sigstore provenance; self-scan in CI; SBOM published with every release |
+| Feature scope creep (ML, etc.) | High | ML explicitly gated behind telemetry prerequisite; roadmap is sequential not parallel |
+---
+## 13. Deliverables Per Phase
+Each phase ships:
+- Working code + Docker Compose (validated one-command start)
+- Comprehensive README with real examples
+- Test corpus additions (clean + malicious packages)
+- Updated `docs/attack-taxonomy.md`
+- CI/CD pipeline passing (including self-scan for Phase 2+)
+- `CHANGELOG.md` entry referencing ATK IDs addressed
+---
+## 14. Immediate Next Steps (Phase 0, Day 1)
+Ordered by dependency and leverage:
+1. `npm publish npm-scan` — claim the name before someone else does
+2. Write `LICENSING.md` — unblocks external contributions
+3. Write `docs/attack-taxonomy.md` — ATK-001–007, NIST mappings — unblocks detector PRs
+4. Wire license key skeleton — unblocks premium feature development
+5. SQLite schema for local mode — removes PostgreSQL dependency from free tier
+6. Monorepo structure (`/cli`, `/backend`, `/docker`, `/docs`)
+7. Initial Docker images to GHCR
+8. GitHub Action stub — distribution channel, even before full detection logic
+---
+*This document is the canonical project plan for npm-scan v1.x. Changes require updating both this document and the affected ATK taxonomy entries.*

package/package.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
   "name": "@lateos/npm-scan",
-  "version": "0.1.0",
+  "version": "0.2.1",
   "description": "Powerful npm supply chain security scanner - detects malicious packages (Shai-Hulud style), behavioral analysis, SBOM, and compliance reporting.",
-  "main": "index.js",
+  "main": "backend/index.js",
   "bin": {
-    "npm-scan": "./cli.js"
+    "npm-scan": "cli/cli.js"
   },
   "type": "module",
-  "license": "MIT",
+  "license": "Apache-2.0",
   "repository": {
     "type": "git",
     "url": "https://github.com/YOUR_GITHUB_USERNAME/npm-scan.git"
@@ -22,7 +22,20 @@
     "sbom",
     "compliance"
   ],
+  "scripts": {
+    "dev": "node cli/cli.js",
+    "lint": "echo 'Lint stub'",
+    "test": "node --test",
+    "build": "echo 'Build stub'",
+    "corpus": "node tests/corpus/run.js"
+  },
   "publishConfig": {
     "access": "public"
+  },
+  "dependencies": {
+    "acorn": "^8.16.0",
+    "adm-zip": "^0.5.17",
+    "commander": "^14.0.3",
+    "node-fetch": "^3.3.2"
   }
 }

package/tests/corpus/malicious/shai-hulud.tgz ADDED Viewed

Binary file

package/tests/corpus/run.js ADDED Viewed

@@ -0,0 +1,27 @@
+import assert from 'assert/strict';
+import { globSync } from 'glob';
+import { fetchPackage } from '../../backend/fetch.js';
+import { runAll } from '../../backend/detectors/index.js';
+const cleanTarballs = globSync('tests/corpus/clean/*.tgz');
+const malTarballs = globSync('tests/corpus/malicious/*.tgz');
+for (const tar of cleanTarballs) {
+  const pkgName = tar.split('/').pop().replace('.tgz', '');
+  const { pkgJson, jsFiles } = await fetchPackage(pkgName);
+  const findings = await runAll(pkgJson, jsFiles);
+  const highFP = findings.filter(f => f.severity === 'high');
+  assert(highFP.length === 0, `High FP in clean ${pkgName}: ${highFP.map(f => f.title).join(', ')}`);
+}
+console.log(`Clean corpus pass (${cleanTarballs.length} pkgs)`);
+for (const tar of malTarballs) {
+  const pkgName = tar.split('/').pop().replace('.tgz', '');
+  const { pkgJson, jsFiles } = await fetchPackage(pkgName);
+  const findings = await runAll(pkgJson, jsFiles);
+  assert(findings.length > 0, `No findings in malicious ${pkgName}`);
+}
+console.log(`Malicious corpus pass (${malTarballs.length} pkgs)`);
+console.log('Test corpus FP <2% ✓');

package/cli.js DELETED Viewed

@@ -1,4 +0,0 @@
-#!/usr/bin/env node
-console.log('🚀 npm-scan v0.1.0');
-console.log('npm supply chain security scanner');
-console.log('Run "npm-scan --help" for usage\n');

/package/{index.js → backend/index.js} RENAMED Viewed

File without changes