hackmyagent 0.11.5 → 0.11.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -106,6 +106,11 @@ const CHECK_PROJECT_TYPES = {
106
106
  'SKILL-MEM-': ['openclaw', 'mcp'],
107
107
  // NemoClaw/sandbox static analysis checks
108
108
  'NEMO-': ['all'],
109
+ // AI infrastructure exposure checks (research gap coverage)
110
+ 'LLM-': ['all'], // LLM inference endpoint exposure
111
+ 'AITOOL-': ['all'], // AI tooling exposure (Jupyter, Gradio, etc.)
112
+ 'A2A-': ['all'], // A2A protocol exposure
113
+ 'WEBCRED-': ['all'], // Credentials in web-served files
109
114
  };
110
115
  // Patterns for detecting exposed credentials
111
116
  // Each pattern is carefully tuned to minimize false positives
@@ -237,9 +242,41 @@ class HardeningScanner {
237
242
  const normalizedDir = path.resolve(directory);
238
243
  return normalizedFile.startsWith(normalizedDir + path.sep) || normalizedFile === normalizedDir;
239
244
  }
245
+ /**
246
+ * Load .hmaignore file from target directory. Returns list of path prefixes to exclude.
247
+ */
248
+ async loadHmaIgnore(targetDir) {
249
+ const ignorePath = path.join(targetDir, '.hmaignore');
250
+ try {
251
+ const content = await fs.readFile(ignorePath, 'utf-8');
252
+ return content
253
+ .split('\n')
254
+ .map(line => line.trim())
255
+ .filter(line => line && !line.startsWith('#'));
256
+ }
257
+ catch {
258
+ return [];
259
+ }
260
+ }
261
+ /**
262
+ * Check if a file path matches any .hmaignore pattern.
263
+ */
264
+ isPathIgnored(filePath, ignoredPaths) {
265
+ if (!filePath || ignoredPaths.length === 0)
266
+ return false;
267
+ const normalized = filePath.replace(/\\/g, '/');
268
+ return ignoredPaths.some(pattern => {
269
+ const normalizedPattern = pattern.replace(/\\/g, '/').replace(/\/$/, '');
270
+ return normalized.startsWith(normalizedPattern + '/') || normalized === normalizedPattern;
271
+ });
272
+ }
240
273
  async scan(options) {
241
274
  const { targetDir, autoFix = false, dryRun = false, ignore = [], cliName = 'hackmyagent' } = options;
242
275
  this.cliName = cliName;
276
+ // Load .hmaignore for path-based exclusions
277
+ const hmaIgnorePaths = await this.loadHmaIgnore(targetDir);
278
+ // Merge with any programmatic ignorePaths
279
+ const allIgnoredPaths = [...hmaIgnorePaths, ...(options.ignorePaths || [])];
243
280
  // Normalize ignore list to uppercase for case-insensitive matching
244
281
  const ignoredChecks = new Set(ignore.map((id) => id.toUpperCase()));
245
282
  // In dry-run mode, we detect what would be fixed but don't modify anything
@@ -388,6 +425,17 @@ class HardeningScanner {
388
425
  // NemoClaw codebase pattern checks
389
426
  const nemoFindings = await this.checkNemoClawPatterns(targetDir, shouldFix);
390
427
  findings.push(...nemoFindings);
428
+ // AI infrastructure exposure checks (research gap coverage)
429
+ const llmFindings = await this.checkLLMExposure(targetDir, shouldFix);
430
+ findings.push(...llmFindings);
431
+ const aiToolFindings = await this.checkAIToolExposure(targetDir, shouldFix);
432
+ findings.push(...aiToolFindings);
433
+ const a2aFindings = await this.checkA2AExposure(targetDir, shouldFix);
434
+ findings.push(...a2aFindings);
435
+ const mcpDiscoveryFindings = await this.checkMCPDiscovery(targetDir, shouldFix);
436
+ findings.push(...mcpDiscoveryFindings);
437
+ const webCredFindings = await this.checkWebServedCredentials(targetDir, shouldFix);
438
+ findings.push(...webCredFindings);
391
439
  // Enrich findings with attack taxonomy mapping
392
440
  (0, taxonomy_1.enrichWithTaxonomy)(findings);
393
441
  // Layer 2: Structural analysis (always on)
@@ -425,6 +473,36 @@ class HardeningScanner {
425
473
  // LLM analysis failure is non-fatal — fall back to Layer 2 only
426
474
  }
427
475
  }
476
+ // Verify fixes: re-scan fixed files to confirm issues are actually resolved
477
+ if (shouldFix) {
478
+ const fixedFindings = findings.filter(f => f.fixed && f.file);
479
+ if (fixedFindings.length > 0) {
480
+ // Re-run a targeted scan (no fix, just detect) to verify
481
+ const verifyScanner = new HardeningScanner();
482
+ const verifyResult = await verifyScanner.scan({
483
+ targetDir,
484
+ autoFix: false,
485
+ ignore: ignoredChecks.size > 0 ? [...ignoredChecks] : [],
486
+ cliName: this.cliName,
487
+ });
488
+ // For each fixed finding, check if the same checkId still appears as failed
489
+ const stillFailing = new Set(verifyResult.findings
490
+ .filter(f => !f.passed && !f.fixed)
491
+ .map(f => `${f.checkId}:${f.file}`));
492
+ for (const finding of fixedFindings) {
493
+ const key = `${finding.checkId}:${finding.file}`;
494
+ finding.fixVerified = !stillFailing.has(key);
495
+ if (!finding.fixVerified) {
496
+ finding.fixMessage = (finding.fixMessage || '') + ' [FIX NOT VERIFIED - issue may persist]';
497
+ }
498
+ }
499
+ if (options.onProgress) {
500
+ const verified = fixedFindings.filter(f => f.fixVerified).length;
501
+ const total = fixedFindings.length;
502
+ options.onProgress(`Fix verification: ${verified}/${total} fixes confirmed`);
503
+ }
504
+ }
505
+ }
428
506
  // Filter findings to only show real, actionable issues:
429
507
  // 1. Only failed checks (passed: false)
430
508
  // 2. Only checks with a file path (concrete findings, not generic advice)
@@ -440,6 +518,9 @@ class HardeningScanner {
440
518
  // Filter out ignored checks
441
519
  if (ignoredChecks.has(f.checkId.toUpperCase()))
442
520
  return false;
521
+ // Filter out paths matching .hmaignore
522
+ if (f.file && this.isPathIgnored(f.file, allIgnoredPaths))
523
+ return false;
443
524
  return true;
444
525
  });
445
526
  // Calculate score (only on applicable, non-ignored findings)
@@ -7156,6 +7237,498 @@ dist/
7156
7237
  }
7157
7238
  return findings;
7158
7239
  }
7240
+ // ═══════════════════════════════════════════════════════════════════
7241
+ // AI Infrastructure Exposure Checks (Research Gap Coverage)
7242
+ // These checks detect the root causes that lead to internet-exposed
7243
+ // AI services found by Shodan sweeps in the OpenA2A research program.
7244
+ // ═══════════════════════════════════════════════════════════════════
7245
+ /**
7246
+ * LLM-001 to LLM-004: Exposed LLM inference endpoints
7247
+ * Detects Ollama, vLLM, LocalAI, text-generation-webui configs bound
7248
+ * to public interfaces or missing authentication.
7249
+ */
7250
+ async checkLLMExposure(targetDir, autoFix) {
7251
+ const findings = [];
7252
+ // Patterns for LLM server configs that indicate exposure risk
7253
+ const llmConfigFiles = [
7254
+ { name: 'docker-compose.yml', altNames: ['docker-compose.yaml', 'compose.yml', 'compose.yaml'] },
7255
+ { name: 'Dockerfile', altNames: [] },
7256
+ { name: '.env', altNames: ['.env.local', '.env.production'] },
7257
+ { name: 'config.json', altNames: ['config.yaml', 'config.yml'] },
7258
+ { name: 'package.json', altNames: [] },
7259
+ ];
7260
+ const LLM_EXPOSURE_PATTERNS = [
7261
+ { id: 'LLM-001', name: 'Ollama Bound to Public Interface', service: 'Ollama',
7262
+ pattern: /OLLAMA_HOST\s*[=:]\s*["']?0\.0\.0\.0/i,
7263
+ fixPattern: /(OLLAMA_HOST\s*[=:]\s*["']?)0\.0\.0\.0/i,
7264
+ fixReplacement: '$1127.0.0.1',
7265
+ severity: 'critical',
7266
+ description: 'Ollama server configured to listen on all interfaces. Our research found 294K+ exposed AI services on the internet — many are Ollama instances.',
7267
+ fix: 'Set OLLAMA_HOST=127.0.0.1 to restrict to localhost. If remote access is needed, use a reverse proxy with authentication.' },
7268
+ { id: 'LLM-001', name: 'Ollama Port Exposed', service: 'Ollama',
7269
+ pattern: /^(?!.*127\.0\.0\.1).*["']?11434["']?\s*:\s*["']?11434["']?/,
7270
+ fixPattern: /(["']?)11434(["']?\s*:\s*["']?11434["']?)/,
7271
+ fixReplacement: '$1127.0.0.1:11434$2',
7272
+ severity: 'high',
7273
+ description: 'Ollama default port (11434) mapped in container config. Without bind restrictions, this exposes the inference API to the network.',
7274
+ fix: 'Map to localhost only: "127.0.0.1:11434:11434" instead of "11434:11434".' },
7275
+ { id: 'LLM-002', name: 'vLLM/LocalAI Public Binding', service: 'vLLM/LocalAI',
7276
+ pattern: /--host\s+0\.0\.0\.0|host:\s*["']?0\.0\.0\.0/i,
7277
+ fixPattern: /(--host\s+|host:\s*["']?)0\.0\.0\.0/i,
7278
+ fixReplacement: '$1127.0.0.1',
7279
+ severity: 'critical',
7280
+ description: 'LLM inference server configured to bind to all interfaces.',
7281
+ fix: 'Use --host 127.0.0.1 or bind to localhost. Use a reverse proxy with auth for remote access.' },
7282
+ { id: 'LLM-003', name: 'Text Generation WebUI Exposed', service: 'text-generation-webui',
7283
+ pattern: /--listen\s|--share\s|GRADIO_SERVER_NAME\s*=\s*["']?0\.0\.0\.0/i,
7284
+ fixPattern: /\s*--listen\s?|\s*--share\s?|(GRADIO_SERVER_NAME\s*=\s*["']?)0\.0\.0\.0/gi,
7285
+ fixReplacement: '$1127.0.0.1',
7286
+ severity: 'high',
7287
+ description: 'Text generation UI configured for public access with --listen or --share flag.',
7288
+ fix: 'Remove --listen and --share flags. Access via localhost or SSH tunnel.' },
7289
+ { id: 'LLM-004', name: 'OpenAI-Compatible API No Auth', service: 'OpenAI-compatible',
7290
+ pattern: /\/v1\/chat\/completions|\/v1\/completions|\/v1\/models/,
7291
+ severity: 'medium',
7292
+ description: 'Project exposes OpenAI-compatible API endpoints. Verify authentication is enforced.',
7293
+ fix: 'Ensure API key or token authentication is required for all inference endpoints.' },
7294
+ ];
7295
+ for (const configDef of llmConfigFiles) {
7296
+ const filesToCheck = [configDef.name, ...configDef.altNames];
7297
+ for (const filename of filesToCheck) {
7298
+ const filePath = path.join(targetDir, filename);
7299
+ try {
7300
+ let content = await fs.readFile(filePath, 'utf-8');
7301
+ if (content.length > 10 * 1024 * 1024)
7302
+ continue; // Skip files > 10MB
7303
+ const lines = content.split('\n');
7304
+ for (const check of LLM_EXPOSURE_PATTERNS) {
7305
+ for (let i = 0; i < lines.length; i++) {
7306
+ if (check.pattern.test(lines[i])) {
7307
+ let fixed = false;
7308
+ if (autoFix && check.fixPattern && check.fixReplacement) {
7309
+ const original = lines[i];
7310
+ lines[i] = lines[i].replace(check.fixPattern, check.fixReplacement);
7311
+ if (lines[i] !== original) {
7312
+ fixed = true;
7313
+ content = lines.join('\n');
7314
+ await fs.writeFile(filePath, content);
7315
+ }
7316
+ }
7317
+ findings.push({
7318
+ checkId: check.id,
7319
+ name: check.name,
7320
+ description: check.description,
7321
+ category: 'llm-exposure',
7322
+ severity: check.severity,
7323
+ passed: fixed,
7324
+ message: `${check.service} exposure detected in ${filename}`,
7325
+ file: filename,
7326
+ line: i + 1,
7327
+ fixable: !!check.fixPattern,
7328
+ fixed,
7329
+ fix: check.fix,
7330
+ });
7331
+ break; // One finding per pattern per file
7332
+ }
7333
+ }
7334
+ }
7335
+ }
7336
+ catch {
7337
+ // File doesn't exist, skip
7338
+ }
7339
+ }
7340
+ }
7341
+ return findings;
7342
+ }
7343
+ /**
7344
+ * AITOOL-001 to AITOOL-004: Exposed AI development tooling
7345
+ * Detects Jupyter, Gradio, Streamlit, MLflow, LangServe configs
7346
+ * that are publicly accessible.
7347
+ */
7348
+ async checkAIToolExposure(targetDir, autoFix) {
7349
+ const findings = [];
7350
+ // Fix transforms for AI tool patterns
7351
+ const AI_TOOL_FIXES = {
7352
+ 'AITOOL-001': [
7353
+ { match: /(NotebookApp\.token\s*=\s*)['"]{2}/, replace: `$1'${crypto.randomBytes(32).toString('hex')}'` },
7354
+ { match: /(NotebookApp\.password\s*=\s*)['"]{2}/, replace: `$1'${crypto.randomBytes(32).toString('hex')}'` },
7355
+ { match: /(ServerApp\.token\s*=\s*)['"]{2}/, replace: `$1'${crypto.randomBytes(32).toString('hex')}'` },
7356
+ { match: /(--ip\s*=?\s*["']?)0\.0\.0\.0/, replace: '$1127.0.0.1' },
7357
+ { match: /--NotebookApp\.token=['"]?\s/, replace: `--NotebookApp.token=${crypto.randomBytes(32).toString('hex')} ` },
7358
+ ],
7359
+ 'AITOOL-002': [
7360
+ { match: /(share\s*=\s*)True/, replace: '$1False' },
7361
+ { match: /(GRADIO_SERVER_NAME\s*=\s*["']?)0\.0\.0\.0/, replace: '$1127.0.0.1' },
7362
+ { match: /(server\.address\s*=\s*["']?)0\.0\.0\.0/, replace: '$1127.0.0.1' },
7363
+ ],
7364
+ 'AITOOL-003': [
7365
+ { match: /(--host\s+)0\.0\.0\.0/i, replace: '$1127.0.0.1' },
7366
+ ],
7367
+ };
7368
+ const AI_TOOL_PATTERNS = [
7369
+ {
7370
+ id: 'AITOOL-001', name: 'Jupyter Notebook Publicly Accessible',
7371
+ severity: 'critical',
7372
+ description: 'Jupyter notebook server configured without authentication or bound to public interface. Our research found exposed Jupyter instances with full code execution on the internet.',
7373
+ fix: 'Set c.NotebookApp.token or c.NotebookApp.password. Bind to 127.0.0.1. Never use --NotebookApp.token=\'\' in production.',
7374
+ filePatterns: ['jupyter_notebook_config.py', 'jupyter_server_config.py', 'docker-compose.yml', 'docker-compose.yaml', 'Dockerfile'],
7375
+ contentPatterns: [
7376
+ /NotebookApp\.token\s*=\s*['"]{2}/, // Empty token
7377
+ /NotebookApp\.password\s*=\s*['"]{2}/, // Empty password
7378
+ /--NotebookApp\.token=['"]{0,2}\s/, // CLI empty token
7379
+ /--ip\s*=?\s*["']?0\.0\.0\.0/, // Bind all interfaces
7380
+ /ServerApp\.token\s*=\s*['"]{2}/, // Jupyter Server empty token
7381
+ ],
7382
+ },
7383
+ {
7384
+ id: 'AITOOL-002', name: 'Gradio/Streamlit Public Sharing',
7385
+ severity: 'high',
7386
+ description: 'ML demo framework configured for public access. Gradio share links and public Streamlit deployments can expose model inference and data pipelines.',
7387
+ fix: 'Remove share=True from Gradio launch(). For Streamlit, add authentication or use private deployment.',
7388
+ filePatterns: ['*.py', 'app.py', 'main.py', 'streamlit_app.py', 'demo.py'],
7389
+ contentPatterns: [
7390
+ /\.launch\s*\([^)]*share\s*=\s*True/, // Gradio share=True
7391
+ /GRADIO_SERVER_NAME\s*=\s*["']?0\.0\.0\.0/, // Gradio bind all
7392
+ /server\.address\s*=\s*["']?0\.0\.0\.0/, // Streamlit bind all
7393
+ ],
7394
+ },
7395
+ {
7396
+ id: 'AITOOL-003', name: 'MLflow Tracking Server No Auth',
7397
+ severity: 'high',
7398
+ description: 'MLflow tracking server configured without authentication. Exposed MLflow instances leak experiment data, model artifacts, and parameters.',
7399
+ fix: 'Configure MLflow with --backend-store-uri and authentication. Use a reverse proxy with auth for remote access.',
7400
+ filePatterns: ['docker-compose.yml', 'docker-compose.yaml', 'Dockerfile', 'Makefile', '*.sh'],
7401
+ contentPatterns: [
7402
+ /mlflow\s+server\s+.*--host\s+0\.0\.0\.0/i,
7403
+ /mlflow\s+ui\s+.*--host\s+0\.0\.0\.0/i,
7404
+ /MLFLOW_TRACKING_URI\s*=\s*["']?http:\/\//,
7405
+ ],
7406
+ },
7407
+ {
7408
+ id: 'AITOOL-004', name: 'LangServe Endpoint Exposed',
7409
+ severity: 'high',
7410
+ description: 'LangChain LangServe endpoint configured for public access. Exposed LangServe instances allow arbitrary chain invocation.',
7411
+ fix: 'Add authentication middleware to LangServe routes. Bind to 127.0.0.1 for local-only access.',
7412
+ filePatterns: ['*.py', 'app.py', 'main.py', 'server.py'],
7413
+ contentPatterns: [
7414
+ /add_routes\s*\(/, // LangServe route
7415
+ /from\s+langserve\s+import/, // LangServe import
7416
+ ],
7417
+ },
7418
+ ];
7419
+ for (const check of AI_TOOL_PATTERNS) {
7420
+ const fixTransforms = AI_TOOL_FIXES[check.id];
7421
+ const isFixable = !!fixTransforms;
7422
+ for (const filePattern of check.filePatterns) {
7423
+ const filesToCheck = [];
7424
+ if (filePattern.includes('*')) {
7425
+ try {
7426
+ const entries = await fs.readdir(targetDir, { withFileTypes: true });
7427
+ const ext = filePattern.replace('*', '');
7428
+ for (const entry of entries) {
7429
+ if (entry.isFile() && entry.name.endsWith(ext)) {
7430
+ filesToCheck.push(entry.name);
7431
+ }
7432
+ }
7433
+ }
7434
+ catch { /* skip */ }
7435
+ }
7436
+ else {
7437
+ filesToCheck.push(filePattern);
7438
+ }
7439
+ for (const filename of filesToCheck) {
7440
+ const filePath = path.join(targetDir, filename);
7441
+ try {
7442
+ let content = await fs.readFile(filePath, 'utf-8');
7443
+ if (content.length > 10 * 1024 * 1024)
7444
+ continue;
7445
+ const lines = content.split('\n');
7446
+ for (const pattern of check.contentPatterns) {
7447
+ for (let i = 0; i < lines.length; i++) {
7448
+ pattern.lastIndex = 0;
7449
+ if (pattern.test(lines[i])) {
7450
+ if (check.id === 'AITOOL-004' && /from\s+langserve/.test(lines[i])) {
7451
+ const hasRoutes = content.includes('add_routes');
7452
+ const hasBind = /0\.0\.0\.0/.test(content);
7453
+ if (!hasRoutes || !hasBind)
7454
+ continue;
7455
+ }
7456
+ let fixed = false;
7457
+ if (autoFix && fixTransforms) {
7458
+ for (const ft of fixTransforms) {
7459
+ if (ft.match.test(lines[i])) {
7460
+ lines[i] = lines[i].replace(ft.match, ft.replace);
7461
+ fixed = true;
7462
+ }
7463
+ }
7464
+ if (fixed) {
7465
+ content = lines.join('\n');
7466
+ await fs.writeFile(filePath, content);
7467
+ }
7468
+ }
7469
+ findings.push({
7470
+ checkId: check.id,
7471
+ name: check.name,
7472
+ description: check.description,
7473
+ category: 'ai-tool-exposure',
7474
+ severity: check.severity,
7475
+ passed: fixed,
7476
+ message: `${check.name} in ${filename}`,
7477
+ file: filename,
7478
+ line: i + 1,
7479
+ fixable: isFixable,
7480
+ fixed,
7481
+ fix: check.fix,
7482
+ });
7483
+ break;
7484
+ }
7485
+ }
7486
+ }
7487
+ }
7488
+ catch { /* file doesn't exist, skip */ }
7489
+ }
7490
+ }
7491
+ }
7492
+ return findings;
7493
+ }
7494
+ /**
7495
+ * A2A-001 to A2A-002: A2A protocol exposure
7496
+ * Detects .well-known/agent.json and task submission endpoints
7497
+ * that are publicly accessible without authentication.
7498
+ */
7499
+ async checkA2AExposure(targetDir, _autoFix) {
7500
+ const findings = [];
7501
+ // Check for .well-known/agent.json (A2A discovery file)
7502
+ const wellKnownPaths = [
7503
+ path.join(targetDir, '.well-known', 'agent.json'),
7504
+ path.join(targetDir, 'public', '.well-known', 'agent.json'),
7505
+ path.join(targetDir, 'static', '.well-known', 'agent.json'),
7506
+ ];
7507
+ for (const agentJsonPath of wellKnownPaths) {
7508
+ try {
7509
+ const content = await fs.readFile(agentJsonPath, 'utf-8');
7510
+ const relativePath = path.relative(targetDir, agentJsonPath);
7511
+ // Parse and check for sensitive capabilities
7512
+ let agentCard = {};
7513
+ try {
7514
+ agentCard = JSON.parse(content);
7515
+ }
7516
+ catch { /* invalid JSON, still flag it */ }
7517
+ const hasAuth = content.includes('"authentication"') || content.includes('"auth"');
7518
+ findings.push({
7519
+ checkId: 'A2A-001',
7520
+ name: 'A2A Agent Discovery File Exposed',
7521
+ description: 'A .well-known/agent.json file makes this agent discoverable via the A2A protocol. Our research found exposed agent.json files that allow unauthenticated task submission.',
7522
+ category: 'a2a-exposure',
7523
+ severity: hasAuth ? 'medium' : 'high',
7524
+ passed: false,
7525
+ message: hasAuth
7526
+ ? 'Agent card found with authentication configured'
7527
+ : 'Agent card found WITHOUT authentication — any client can submit tasks',
7528
+ file: relativePath,
7529
+ fixable: false,
7530
+ fix: 'Add authentication requirements to your agent card. Restrict task submission to authenticated clients.',
7531
+ details: { hasAuth, capabilities: agentCard.capabilities },
7532
+ });
7533
+ break; // Found one, no need to check other paths
7534
+ }
7535
+ catch { /* doesn't exist */ }
7536
+ }
7537
+ // Check source files for A2A task endpoints without auth middleware
7538
+ const sourceFiles = ['server.py', 'app.py', 'main.py', 'server.ts', 'app.ts', 'index.ts'];
7539
+ for (const filename of sourceFiles) {
7540
+ try {
7541
+ const content = await fs.readFile(path.join(targetDir, filename), 'utf-8');
7542
+ if (content.length > 10 * 1024 * 1024)
7543
+ continue;
7544
+ const hasTaskEndpoint = /\/tasks\/send|\/tasks\/get|\/tasks\/cancel/.test(content);
7545
+ const hasAuthMiddleware = /auth|authenticate|verify.*token|api.?key|bearer/i.test(content);
7546
+ if (hasTaskEndpoint && !hasAuthMiddleware) {
7547
+ const lines = content.split('\n');
7548
+ for (let i = 0; i < lines.length; i++) {
7549
+ if (/\/tasks\/send|\/tasks\/get/.test(lines[i])) {
7550
+ findings.push({
7551
+ checkId: 'A2A-002',
7552
+ name: 'A2A Task Endpoint Without Authentication',
7553
+ description: 'A2A task submission endpoint found without visible authentication middleware.',
7554
+ category: 'a2a-exposure',
7555
+ severity: 'high',
7556
+ passed: false,
7557
+ message: `Unauthenticated task endpoint in ${filename}`,
7558
+ file: filename,
7559
+ line: i + 1,
7560
+ fixable: false,
7561
+ fix: 'Add authentication middleware to /tasks/send and /tasks/get endpoints. Require API key or bearer token.',
7562
+ });
7563
+ break;
7564
+ }
7565
+ }
7566
+ }
7567
+ }
7568
+ catch { /* skip */ }
7569
+ }
7570
+ return findings;
7571
+ }
7572
+ /**
7573
+ * MCP-011: MCP discovery endpoint exposure
7574
+ * Detects .well-known/mcp files that make MCP servers discoverable.
7575
+ */
7576
+ async checkMCPDiscovery(targetDir, _autoFix) {
7577
+ const findings = [];
7578
+ const mcpDiscoveryPaths = [
7579
+ path.join(targetDir, '.well-known', 'mcp'),
7580
+ path.join(targetDir, '.well-known', 'mcp.json'),
7581
+ path.join(targetDir, 'public', '.well-known', 'mcp'),
7582
+ path.join(targetDir, 'public', '.well-known', 'mcp.json'),
7583
+ path.join(targetDir, 'static', '.well-known', 'mcp'),
7584
+ path.join(targetDir, 'static', '.well-known', 'mcp.json'),
7585
+ ];
7586
+ for (const mcpPath of mcpDiscoveryPaths) {
7587
+ try {
7588
+ const content = await fs.readFile(mcpPath, 'utf-8');
7589
+ const relativePath = path.relative(targetDir, mcpPath);
7590
+ const hasCredentials = CREDENTIAL_PATTERNS.some(({ pattern }) => {
7591
+ pattern.lastIndex = 0;
7592
+ return pattern.test(content);
7593
+ });
7594
+ findings.push({
7595
+ checkId: 'MCP-011',
7596
+ name: 'MCP Discovery Endpoint Exposed',
7597
+ description: 'A .well-known/mcp discovery file makes MCP servers publicly discoverable. Our research found exposed MCP endpoints via this mechanism.',
7598
+ category: 'mcp',
7599
+ severity: hasCredentials ? 'critical' : 'high',
7600
+ passed: false,
7601
+ message: hasCredentials
7602
+ ? 'MCP discovery file contains credentials — CRITICAL exposure'
7603
+ : 'MCP discovery file found — servers are publicly discoverable',
7604
+ file: relativePath,
7605
+ fixable: false,
7606
+ fix: 'Remove .well-known/mcp from public-facing directories, or restrict access via web server configuration. Never include credentials in discovery files.',
7607
+ });
7608
+ break;
7609
+ }
7610
+ catch { /* doesn't exist */ }
7611
+ }
7612
+ return findings;
7613
+ }
7614
+ /**
7615
+ * WEBCRED-001 to WEBCRED-002: Credentials in web-served files
7616
+ * Detects API keys in HTML, JS, and other files typically served
7617
+ * by web servers. Distinct from CRED-001 which checks config files.
7618
+ */
7619
+ async checkWebServedCredentials(targetDir, autoFix) {
7620
+ const findings = [];
7621
+ // Directories that are typically web-served
7622
+ const webDirs = ['public', 'static', 'dist', 'build', 'out', 'www', '_site'];
7623
+ const webFileExts = ['.html', '.htm', '.js', '.jsx', '.tsx', '.css', '.svg'];
7624
+ for (const webDir of webDirs) {
7625
+ const dirPath = path.join(targetDir, webDir);
7626
+ try {
7627
+ await fs.access(dirPath);
7628
+ }
7629
+ catch {
7630
+ continue; // Directory doesn't exist
7631
+ }
7632
+ // Recursively scan web-served directory (max depth 3)
7633
+ const webFiles = await this.findWebFiles(dirPath, webFileExts, 0, dirPath);
7634
+ for (const filePath of webFiles) {
7635
+ try {
7636
+ let content = await fs.readFile(filePath, 'utf-8');
7637
+ if (content.length > 10 * 1024 * 1024)
7638
+ continue;
7639
+ let lines = content.split('\n');
7640
+ const relativePath = path.relative(targetDir, filePath);
7641
+ let fileModified = false;
7642
+ for (const { name, pattern } of CREDENTIAL_PATTERNS) {
7643
+ for (let i = 0; i < lines.length; i++) {
7644
+ if (lines[i].length > 10000)
7645
+ continue;
7646
+ pattern.lastIndex = 0;
7647
+ if (pattern.test(lines[i])) {
7648
+ let fixed = false;
7649
+ if (autoFix) {
7650
+ // Replace credential with process.env reference
7651
+ // Also strip surrounding quotes so `"sk-proj-..."` becomes `process.env.VAR` not `"process.env.VAR"`
7652
+ const envVar = name.replace(/\s+/g, '_').toUpperCase();
7653
+ pattern.lastIndex = 0;
7654
+ const original = lines[i];
7655
+ const envRef = `process.env.${envVar}`;
7656
+ // Replace quoted credential: "sk-..." or 'sk-...' → process.env.VAR (no quotes)
7657
+ const quotedPattern = new RegExp(`(['"])${pattern.source}\\1`, pattern.flags);
7658
+ quotedPattern.lastIndex = 0;
7659
+ if (quotedPattern.test(lines[i])) {
7660
+ quotedPattern.lastIndex = 0;
7661
+ lines[i] = lines[i].replace(quotedPattern, envRef);
7662
+ }
7663
+ else {
7664
+ // No quotes, just replace the credential directly
7665
+ pattern.lastIndex = 0;
7666
+ lines[i] = lines[i].replace(pattern, envRef);
7667
+ }
7668
+ if (lines[i] !== original) {
7669
+ fixed = true;
7670
+ fileModified = true;
7671
+ }
7672
+ }
7673
+ findings.push({
7674
+ checkId: 'WEBCRED-001',
7675
+ name: 'Credential in Web-Served File',
7676
+ description: `${name} found in a file within a web-served directory. This credential is likely accessible to anyone who visits the site. Our research found API keys exposed in HTML source on the public internet.`,
7677
+ category: 'web-credentials',
7678
+ severity: 'critical',
7679
+ passed: fixed,
7680
+ message: fixed
7681
+ ? `${name} in ${relativePath} replaced with environment variable reference`
7682
+ : `${name} exposed in ${relativePath}`,
7683
+ file: relativePath,
7684
+ line: i + 1,
7685
+ fixable: true,
7686
+ fixed,
7687
+ fix: `Move credentials to server-side environment variables. Never include API keys in client-side code or static assets. Use a backend proxy for API calls.`,
7688
+ });
7689
+ break;
7690
+ }
7691
+ }
7692
+ }
7693
+ if (fileModified) {
7694
+ content = lines.join('\n');
7695
+ await fs.writeFile(filePath, content);
7696
+ }
7697
+ }
7698
+ catch { /* skip unreadable files */ }
7699
+ }
7700
+ }
7701
+ return findings;
7702
+ }
7703
+ /** Helper: recursively find files in web-served directories */
7704
+ async findWebFiles(dir, extensions, depth, rootDir) {
7705
+ if (depth > 3)
7706
+ return [];
7707
+ const results = [];
7708
+ try {
7709
+ const entries = await fs.readdir(dir, { withFileTypes: true });
7710
+ for (const entry of entries) {
7711
+ if (entry.isSymbolicLink())
7712
+ continue;
7713
+ const fullPath = path.join(dir, entry.name);
7714
+ if (!this.isPathWithinDirectory(fullPath, rootDir))
7715
+ continue;
7716
+ if (entry.isDirectory()) {
7717
+ if (entry.name === 'node_modules' || entry.name.startsWith('.'))
7718
+ continue;
7719
+ const subFiles = await this.findWebFiles(fullPath, extensions, depth + 1, rootDir);
7720
+ results.push(...subFiles);
7721
+ }
7722
+ else if (entry.isFile()) {
7723
+ if (extensions.some(ext => entry.name.endsWith(ext))) {
7724
+ results.push(fullPath);
7725
+ }
7726
+ }
7727
+ }
7728
+ }
7729
+ catch { /* skip inaccessible dirs */ }
7730
+ return results;
7731
+ }
7159
7732
  }
7160
7733
  exports.HardeningScanner = HardeningScanner;
7161
7734
  // Files that may be created or modified during auto-fix
@@ -7176,5 +7749,15 @@ HardeningScanner.BACKUP_FILES = [
7176
7749
  'package.json',
7177
7750
  'openclaw.json',
7178
7751
  'moltbot.json',
7752
+ // AI infrastructure files (research gap checks)
7753
+ 'docker-compose.yml',
7754
+ 'docker-compose.yaml',
7755
+ 'compose.yml',
7756
+ 'compose.yaml',
7757
+ 'Dockerfile',
7758
+ 'jupyter_notebook_config.py',
7759
+ 'jupyter_server_config.py',
7760
+ '.well-known/agent.json',
7761
+ '.well-known/mcp.json',
7179
7762
  ];
7180
7763
  //# sourceMappingURL=scanner.js.map