hackmyagent 0.12.1 → 0.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -41,6 +41,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
41
41
  const commander_1 = require("commander");
42
42
  const index_1 = require("./index");
43
43
  const resolve_mcp_1 = require("./resolve-mcp");
44
+ const wild_1 = require("./wild");
44
45
  const nemoclaw_scanner_1 = require("./hardening/nemoclaw-scanner");
45
46
  const program = new commander_1.Command();
46
47
  program.showHelpAfterError('(run with --help for usage)');
@@ -4877,14 +4878,54 @@ function trustLevelColor(level) {
4877
4878
  return colors.yellow;
4878
4879
  return colors.red;
4879
4880
  }
4880
- function trustVerdictColor(verdict) {
4881
+ function normalizeTrustVerdict(verdict) {
4881
4882
  switch (verdict) {
4883
+ case 'safe':
4884
+ case 'passed': return 'safe';
4885
+ case 'warning':
4886
+ case 'warnings': return 'warning';
4887
+ case 'blocked':
4888
+ case 'failed': return 'blocked';
4889
+ case 'listed': return 'listed';
4890
+ default: return verdict;
4891
+ }
4892
+ }
4893
+ function trustVerdictColor(verdict) {
4894
+ const n = normalizeTrustVerdict(verdict);
4895
+ switch (n) {
4882
4896
  case 'safe': return colors.green;
4883
4897
  case 'warning': return colors.yellow;
4884
4898
  case 'blocked': return colors.red;
4899
+ case 'listed': return colors.cyan;
4885
4900
  default: return colors.dim;
4886
4901
  }
4887
4902
  }
4903
+ function formatTrustScore(trustScore, scanStatus) {
4904
+ if (trustScore === 0 && (!scanStatus || scanStatus === ''))
4905
+ return 'Not scanned';
4906
+ return `${Math.round(trustScore * 100)}/100`;
4907
+ }
4908
+ function formatTrustConfidence(confidence) {
4909
+ if (!confidence || confidence === 0)
4910
+ return null;
4911
+ if (confidence >= 0.7)
4912
+ return 'high confidence';
4913
+ if (confidence >= 0.4)
4914
+ return 'moderate confidence';
4915
+ return 'low confidence';
4916
+ }
4917
+ function formatTrustScanAge(lastScannedAt) {
4918
+ if (!lastScannedAt)
4919
+ return null;
4920
+ const days = Math.floor((Date.now() - new Date(lastScannedAt).getTime()) / (1000 * 60 * 60 * 24));
4921
+ if (days === 0)
4922
+ return 'today';
4923
+ if (days === 1)
4924
+ return '1 day ago';
4925
+ if (days > 90)
4926
+ return `${days} days ago (stale)`;
4927
+ return `${days} days ago`;
4928
+ }
4888
4929
  function formatTrustCheck(answer) {
4889
4930
  if (!answer.found) {
4890
4931
  return [
@@ -4893,19 +4934,42 @@ function formatTrustCheck(answer) {
4893
4934
  ` ${colors.dim}Type: ${answer.packageType || 'unknown'}${colors.reset}`,
4894
4935
  ` ${colors.dim}Status: Not found in registry${colors.reset}`,
4895
4936
  '',
4937
+ ' To scan it locally:',
4938
+ ` ${colors.cyan}ai-trust check ${answer.name} --scan-if-missing${colors.reset}`,
4939
+ '',
4940
+ ' Or scan your full project:',
4941
+ ` ${colors.cyan}npx hackmyagent secure .${colors.reset}`,
4942
+ '',
4896
4943
  ].join('\n');
4897
4944
  }
4945
+ const normalized = normalizeTrustVerdict(answer.verdict);
4898
4946
  const vc = trustVerdictColor(answer.verdict);
4899
4947
  const tc = trustLevelColor(answer.trustLevel);
4948
+ const scoreDisplay = formatTrustScore(answer.trustScore, answer.scanStatus);
4949
+ const isUnscanned = scoreDisplay === 'Not scanned';
4900
4950
  const lines = [
4901
4951
  '',
4902
4952
  ` ${answer.name}`,
4903
4953
  ` Type: ${answer.packageType || 'unknown'}`,
4904
- ` Verdict: ${vc}${answer.verdict.toUpperCase()}${colors.reset}`,
4954
+ ` Verdict: ${vc}${normalized.toUpperCase()}${colors.reset}`,
4905
4955
  ` Trust Level: ${tc}${trustLevelLabel(answer.trustLevel)}${colors.reset} (${answer.trustLevel}/4)`,
4906
- ` Trust Score: ${Math.round(answer.trustScore * 100)}/100`,
4907
- ` Scan Status: ${answer.scanStatus || 'unknown'}`,
4956
+ ` Trust Score: ${isUnscanned ? colors.dim + scoreDisplay + colors.reset : scoreDisplay}`,
4908
4957
  ];
4958
+ const conf = formatTrustConfidence(answer.confidence);
4959
+ if (conf)
4960
+ lines.push(` Confidence: ${conf}`);
4961
+ const scanAge = formatTrustScanAge(answer.lastScannedAt);
4962
+ if (scanAge) {
4963
+ lines.push(` Last Scanned: ${scanAge.includes('stale') ? colors.yellow + scanAge + colors.reset : scanAge}`);
4964
+ }
4965
+ else if (!isUnscanned) {
4966
+ lines.push(` Scan Status: ${answer.scanStatus || 'unknown'}`);
4967
+ }
4968
+ if (isUnscanned) {
4969
+ lines.push('');
4970
+ lines.push(` ${colors.yellow}This package has not been security-scanned.${colors.reset}`);
4971
+ lines.push(` ${colors.yellow}Trust level reflects registry listing only.${colors.reset}`);
4972
+ }
4909
4973
  if (answer.dependencies && answer.dependencies.totalDeps > 0) {
4910
4974
  const deps = answer.dependencies;
4911
4975
  lines.push('');
@@ -4922,7 +4986,7 @@ function formatTrustBatch(response, minTrust) {
4922
4986
  lines.push('');
4923
4987
  lines.push(` Trust Audit: ${response.meta.total} packages queried, ${response.meta.found} found, ${response.meta.notFound} not found`);
4924
4988
  lines.push('');
4925
- const nameW = 40, typeW = 14, verdictW = 10, levelW = 12, scoreW = 8, scanW = 10;
4989
+ const nameW = 40, typeW = 14, verdictW = 10, levelW = 12, scoreW = 14, scanW = 10;
4926
4990
  lines.push(' ' +
4927
4991
  'PACKAGE'.padEnd(nameW) +
4928
4992
  'TYPE'.padEnd(typeW) +
@@ -4932,17 +4996,29 @@ function formatTrustBatch(response, minTrust) {
4932
4996
  'SCAN'.padEnd(scanW));
4933
4997
  lines.push(' ' + '-'.repeat(nameW + typeW + verdictW + levelW + scoreW + scanW));
4934
4998
  for (const result of response.results) {
4935
- const vc = trustVerdictColor(result.verdict);
4936
- const tc = trustLevelColor(result.trustLevel);
4937
4999
  const name = result.name.length > nameW - 2
4938
5000
  ? result.name.substring(0, nameW - 5) + '...'
4939
5001
  : result.name;
5002
+ if (!result.found) {
5003
+ lines.push(' ' +
5004
+ name.padEnd(nameW) +
5005
+ '-'.padEnd(typeW) +
5006
+ colors.dim + 'NO DATA'.padEnd(verdictW) + colors.reset +
5007
+ colors.dim + '-'.padEnd(levelW) + colors.reset +
5008
+ '-'.padEnd(scoreW) +
5009
+ '-'.padEnd(scanW));
5010
+ continue;
5011
+ }
5012
+ const normalized = normalizeTrustVerdict(result.verdict);
5013
+ const vc = trustVerdictColor(result.verdict);
5014
+ const tc = trustLevelColor(result.trustLevel);
5015
+ const scoreDisplay = formatTrustScore(result.trustScore, result.scanStatus);
4940
5016
  lines.push(' ' +
4941
5017
  name.padEnd(nameW) +
4942
5018
  (result.packageType || '-').padEnd(typeW) +
4943
- vc + result.verdict.toUpperCase().padEnd(verdictW) + colors.reset +
5019
+ vc + normalized.toUpperCase().padEnd(verdictW) + colors.reset +
4944
5020
  tc + trustLevelLabel(result.trustLevel).padEnd(levelW) + colors.reset +
4945
- (result.found ? `${Math.round(result.trustScore * 100)}/100` : '-').padEnd(scoreW) +
5021
+ scoreDisplay.padEnd(scoreW) +
4946
5022
  (result.scanStatus || '-').padEnd(scanW));
4947
5023
  }
4948
5024
  const belowThreshold = response.results.filter((r) => r.found && r.trustLevel < minTrust);
@@ -4955,14 +5031,24 @@ function formatTrustBatch(response, minTrust) {
4955
5031
  }
4956
5032
  }
4957
5033
  if (notFound.length > 0) {
4958
- lines.push(` ${colors.dim}[?] ${notFound.length} package(s) not found in registry:${colors.reset}`);
5034
+ lines.push(` ${colors.yellow}[?] ${notFound.length} package(s) not found in registry (no trust data):${colors.reset}`);
4959
5035
  for (const pkg of notFound) {
4960
- lines.push(` ${colors.dim} - ${pkg.name}${colors.reset}`);
5036
+ lines.push(` ${colors.yellow} - ${pkg.name}${colors.reset}`);
4961
5037
  }
4962
5038
  }
4963
5039
  if (belowThreshold.length === 0 && notFound.length === 0) {
4964
5040
  lines.push(` ${colors.green}All ${response.meta.found} packages meet minimum trust level ${minTrust}.${colors.reset}`);
4965
5041
  }
5042
+ // Next steps
5043
+ lines.push('');
5044
+ if (notFound.length > 0) {
5045
+ lines.push(` ${colors.dim}Scan unknown packages: ai-trust audit <file> --scan-missing${colors.reset}`);
5046
+ lines.push(` ${colors.dim}Or individually: ai-trust check <name> --scan-if-missing${colors.reset}`);
5047
+ }
5048
+ if (belowThreshold.length > 0) {
5049
+ lines.push(` ${colors.dim}Inspect flagged packages: ai-trust check <name>${colors.reset}`);
5050
+ }
5051
+ lines.push(` ${colors.dim}Full project security scan: npx hackmyagent secure .${colors.reset}`);
4966
5052
  lines.push('');
4967
5053
  return lines.join('\n');
4968
5054
  }
@@ -5040,7 +5126,7 @@ Examples:
5040
5126
  .option('-t, --type <type>', 'Package type (mcp_server, a2a_agent, ai_tool, etc.)')
5041
5127
  .option('--audit <file>', 'Audit a dependency file (package.json or requirements.txt)')
5042
5128
  .option('--batch <names...>', 'Batch trust lookup for multiple packages')
5043
- .option('--min-trust <level>', 'Minimum trust level threshold (0-4)', '3')
5129
+ .option('--min-trust <level>', 'Minimum trust level threshold (0-4)', '2')
5044
5130
  .option('--registry-url <url>', 'Registry base URL', validateRegistryUrl(REGISTRY_DEFAULT_URL))
5045
5131
  .option('--json', 'Output as JSON')
5046
5132
  .action(async (packageName, opts) => {
@@ -5074,7 +5160,8 @@ Examples:
5074
5160
  process.stdout.write(formatTrustBatch(response, minTrust));
5075
5161
  }
5076
5162
  const belowThreshold = response.results.some((r) => r.found && r.trustLevel < minTrust);
5077
- if (belowThreshold)
5163
+ const hasNotFound = response.results.some((r) => !r.found);
5164
+ if (belowThreshold || hasNotFound)
5078
5165
  process.exitCode = 1;
5079
5166
  return;
5080
5167
  }
@@ -5096,7 +5183,8 @@ Examples:
5096
5183
  process.stdout.write(formatTrustBatch(response, minTrust));
5097
5184
  }
5098
5185
  const belowThreshold = response.results.some((r) => r.found && r.trustLevel < minTrust);
5099
- if (belowThreshold)
5186
+ const hasNotFound = response.results.some((r) => !r.found);
5187
+ if (belowThreshold || hasNotFound)
5100
5188
  process.exitCode = 1;
5101
5189
  return;
5102
5190
  }
@@ -5253,6 +5341,136 @@ program
5253
5341
  console.log(`\n${trainingCount} training samples exported to NanoMind corpus.`);
5254
5342
  }
5255
5343
  });
5344
+ // wild: test AI agent resilience against real-world web-based attacks
5345
+ program
5346
+ .command('wild')
5347
+ .description(`Test AI agent resilience in the wild
5348
+
5349
+ Fetches pages from AgentPwn (agentpwn.com) and analyzes hidden injection
5350
+ payloads that AI agents encounter when browsing the web. Reports which
5351
+ attack surfaces exist and computes a wild resilience score.
5352
+
5353
+ Attack categories (11):
5354
+ prompt-injection, jailbreak, data-exfiltration, capability-abuse,
5355
+ context-manipulation, mcp-exploitation, a2a-attack,
5356
+ memory-weaponization, context-window, supply-chain, tool-shadow
5357
+
5358
+ Injection surfaces detected:
5359
+ html-comment, invisible-span, json-ld, meta-tag, http-header,
5360
+ aria-label, image-alt, unicode-stego
5361
+
5362
+ Also tests: robots.txt, llms.txt, sitemap.xml for embedded payloads
5363
+
5364
+ Examples:
5365
+ $ hackmyagent wild
5366
+ $ hackmyagent wild https://agentpwn.com
5367
+ $ hackmyagent wild --category prompt-injection
5368
+ $ hackmyagent wild --tier 5
5369
+ $ hackmyagent wild --json
5370
+ $ hackmyagent wild -v -o report.json`)
5371
+ .argument('[url]', 'Target URL to scan', 'https://agentpwn.com')
5372
+ .option('-c, --category <category>', 'Filter by attack category')
5373
+ .option('-t, --tier <tier>', 'Filter by specific difficulty tier')
5374
+ .option('--timeout <ms>', 'Request timeout in milliseconds', '15000')
5375
+ .option('--delay <ms>', 'Delay between requests in milliseconds', '500')
5376
+ .option('--json', 'Output as JSON')
5377
+ .option('-o, --output <file>', 'Write output to file')
5378
+ .option('--verbose', 'Show detailed output for each page')
5379
+ .action(async (url, options) => {
5380
+ try {
5381
+ const scanner = new wild_1.WildScanner({
5382
+ url: url || 'https://agentpwn.com',
5383
+ category: options.category,
5384
+ tier: options.tier ? parseInt(options.tier, 10) : undefined,
5385
+ timeout: parseInt(options.timeout || '15000', 10),
5386
+ delay: parseInt(options.delay || '500', 10),
5387
+ verbose: options.verbose || false,
5388
+ json: options.json || false,
5389
+ });
5390
+ if (!options.json) {
5391
+ console.log(`\n${colors.cyan}HackMyAgent Wild Scanner${colors.reset}`);
5392
+ console.log(`${'━'.repeat(50)}\n`);
5393
+ console.log(`Target: ${url || 'https://agentpwn.com'}`);
5394
+ if (options.category)
5395
+ console.log(`Category: ${options.category}`);
5396
+ if (options.tier)
5397
+ console.log(`Tier: ${options.tier}`);
5398
+ console.log('');
5399
+ }
5400
+ const report = await scanner.scan();
5401
+ if (options.json) {
5402
+ const output = JSON.stringify(report, null, 2);
5403
+ if (options.output) {
5404
+ const fs = await Promise.resolve().then(() => __importStar(require('fs')));
5405
+ fs.writeFileSync(options.output, output);
5406
+ process.stderr.write(`Report written to ${options.output}\n`);
5407
+ }
5408
+ else {
5409
+ console.log(output);
5410
+ }
5411
+ }
5412
+ else {
5413
+ printWildReport(report);
5414
+ if (options.output) {
5415
+ const fs = await Promise.resolve().then(() => __importStar(require('fs')));
5416
+ fs.writeFileSync(options.output, JSON.stringify(report, null, 2));
5417
+ console.log(`\nJSON report written to ${options.output}`);
5418
+ }
5419
+ }
5420
+ // Exit with non-zero if resilience is poor
5421
+ if (report.resilienceRating === 'critical' || report.resilienceRating === 'poor') {
5422
+ process.exit(1);
5423
+ }
5424
+ }
5425
+ catch (error) {
5426
+ console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
5427
+ process.exit(1);
5428
+ }
5429
+ });
5430
+ function printWildReport(report) {
5431
+ // File fetches
5432
+ console.log(`${colors.dim}File-Level Attack Surfaces${colors.reset}`);
5433
+ for (const f of report.fileFetches) {
5434
+ const status = f.hasPayload
5435
+ ? `${colors.red}PAYLOAD FOUND${colors.reset}`
5436
+ : `${colors.green}clean${colors.reset}`;
5437
+ console.log(` ${f.file}: ${f.statusCode} [${status}]`);
5438
+ if (f.payloadExcerpt) {
5439
+ console.log(` ${colors.dim}${f.payloadExcerpt}${colors.reset}`);
5440
+ }
5441
+ }
5442
+ // Page results by category
5443
+ console.log(`\n${colors.dim}Attack Pages (${report.pagesScanned} scanned)${colors.reset}`);
5444
+ const categories = Object.keys(report.summary.byCategory).sort();
5445
+ for (const cat of categories) {
5446
+ const stats = report.summary.byCategory[cat];
5447
+ console.log(` ${cat}: ${stats.pages} pages, ${stats.payloads} payloads`);
5448
+ }
5449
+ // Injection surfaces
5450
+ console.log(`\n${colors.dim}Injection Surfaces Detected${colors.reset}`);
5451
+ const surfaces = Object.entries(report.summary.bySurface).sort((a, b) => b[1] - a[1]);
5452
+ for (const [surface, count] of surfaces) {
5453
+ console.log(` ${surface}: ${count}`);
5454
+ }
5455
+ // Score
5456
+ const scoreColor = report.wildResilienceScore >= 60
5457
+ ? colors.green
5458
+ : report.wildResilienceScore >= 40
5459
+ ? colors.yellow
5460
+ : colors.red;
5461
+ console.log(`\n${'━'.repeat(50)}`);
5462
+ console.log(`\n${colors.dim}Wild Resilience Score:${colors.reset} ${scoreColor}${report.wildResilienceScore}/100 (${report.resilienceRating})${colors.reset}`);
5463
+ console.log(`${colors.dim}Pages Scanned:${colors.reset} ${report.pagesScanned}`);
5464
+ console.log(`${colors.dim}Total Payloads:${colors.reset} ${report.summary.totalPayloads}`);
5465
+ console.log(`${colors.dim}Callback Pages:${colors.reset} ${report.summary.callbackPages}`);
5466
+ console.log(`${colors.dim}Canary Pages:${colors.reset} ${report.summary.canaryPages}`);
5467
+ console.log(`${colors.dim}Max Tier:${colors.reset} ${report.summary.maxTier}`);
5468
+ console.log(`${colors.dim}Duration:${colors.reset} ${(report.duration / 1000).toFixed(1)}s`);
5469
+ console.log(`\n${colors.dim}Note: This score reflects the attack surface coverage of the target`);
5470
+ console.log(`site. To test your actual agent's resilience, use --model to pipe`);
5471
+ console.log(`page content through an LLM. For static config scanning, use:${colors.reset}`);
5472
+ console.log(` ${colors.cyan}npx hackmyagent secure${colors.reset}`);
5473
+ }
5256
5474
  // create-skill: generate best-practice, secured skills from plain English
5257
5475
  program
5258
5476
  .command('create-skill')
@@ -5264,11 +5482,12 @@ program
5264
5482
  const { writeSkill } = await Promise.resolve().then(() => __importStar(require('./skills/builder.js')));
5265
5483
  console.log(`\nGenerating secured skill...\n`);
5266
5484
  const result = writeSkill({ purpose: description, name: options.name, outputDir: options.output });
5267
- console.log(`Created ${result.dirName}/`);
5485
+ const outputDir = options.output ?? result.dirName;
5486
+ console.log(`Created ${outputDir}/`);
5268
5487
  for (const file of result.filesWritten) {
5269
5488
  console.log(` ${file.split('/').pop()}`);
5270
5489
  }
5271
- console.log(`\nYour skill is ready. Verify security with: hackmyagent secure ${result.dirName}/`);
5490
+ console.log(`\nYour skill is ready. Verify security with: hackmyagent secure ${outputDir}/`);
5272
5491
  });
5273
5492
  // Self-securing: verify own integrity before running any command
5274
5493
  // A security tool that doesn't verify itself is worse than no security tool