npm - hackmyagent - Versions diffs - 0.12.1 → 0.12.3 - Mend

hackmyagent 0.12.1 → 0.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/README.md +21 -2
package/dist/.integrity-manifest.json +1 -1
package/dist/attack/types.d.ts +2 -0
package/dist/attack/types.d.ts.map +1 -1
package/dist/attack/types.js.map +1 -1
package/dist/cli.js +235 -16
package/dist/cli.js.map +1 -1
package/dist/hardening/scanner.d.ts.map +1 -1
package/dist/hardening/scanner.js +24 -5
package/dist/hardening/scanner.js.map +1 -1
package/dist/index.d.ts +2 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +5 -2
package/dist/index.js.map +1 -1
package/dist/nanomind-core/compiler/semantic-compiler.js +78 -5
package/dist/nanomind-core/compiler/semantic-compiler.js.map +1 -1
package/dist/nanomind-core/inference/tme-classifier.js +1 -1
package/dist/nanomind-core/inference/tme-classifier.js.map +1 -1
package/dist/wild/browser.d.ts +44 -0
package/dist/wild/browser.d.ts.map +1 -0
package/dist/wild/browser.js +222 -0
package/dist/wild/browser.js.map +1 -0
package/dist/wild/index.d.ts +20 -0
package/dist/wild/index.d.ts.map +1 -0
package/dist/wild/index.js +173 -0
package/dist/wild/index.js.map +1 -0
package/dist/wild/scorer.d.ts +29 -0
package/dist/wild/scorer.d.ts.map +1 -0
package/dist/wild/scorer.js +101 -0
package/dist/wild/scorer.js.map +1 -0
package/dist/wild/types.d.ts +95 -0
package/dist/wild/types.d.ts.map +1 -0
package/dist/wild/types.js +8 -0
package/dist/wild/types.js.map +1 -0
package/package.json +2 -2

package/dist/cli.js CHANGED Viewed

@@ -41,6 +41,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
 const commander_1 = require("commander");
 const index_1 = require("./index");
 const resolve_mcp_1 = require("./resolve-mcp");
+const wild_1 = require("./wild");
 const nemoclaw_scanner_1 = require("./hardening/nemoclaw-scanner");
 const program = new commander_1.Command();
 program.showHelpAfterError('(run with --help for usage)');
@@ -4877,14 +4878,54 @@ function trustLevelColor(level) {
         return colors.yellow;
     return colors.red;
 }
-function trustVerdictColor(verdict) {
+function normalizeTrustVerdict(verdict) {
     switch (verdict) {
+        case 'safe':
+        case 'passed': return 'safe';
+        case 'warning':
+        case 'warnings': return 'warning';
+        case 'blocked':
+        case 'failed': return 'blocked';
+        case 'listed': return 'listed';
+        default: return verdict;
+    }
+}
+function trustVerdictColor(verdict) {
+    const n = normalizeTrustVerdict(verdict);
+    switch (n) {
         case 'safe': return colors.green;
         case 'warning': return colors.yellow;
         case 'blocked': return colors.red;
+        case 'listed': return colors.cyan;
         default: return colors.dim;
     }
 }
+function formatTrustScore(trustScore, scanStatus) {
+    if (trustScore === 0 && (!scanStatus || scanStatus === ''))
+        return 'Not scanned';
+    return `${Math.round(trustScore * 100)}/100`;
+}
+function formatTrustConfidence(confidence) {
+    if (!confidence || confidence === 0)
+        return null;
+    if (confidence >= 0.7)
+        return 'high confidence';
+    if (confidence >= 0.4)
+        return 'moderate confidence';
+    return 'low confidence';
+}
+function formatTrustScanAge(lastScannedAt) {
+    if (!lastScannedAt)
+        return null;
+    const days = Math.floor((Date.now() - new Date(lastScannedAt).getTime()) / (1000 * 60 * 60 * 24));
+    if (days === 0)
+        return 'today';
+    if (days === 1)
+        return '1 day ago';
+    if (days > 90)
+        return `${days} days ago (stale)`;
+    return `${days} days ago`;
+}
 function formatTrustCheck(answer) {
     if (!answer.found) {
         return [
@@ -4893,19 +4934,42 @@ function formatTrustCheck(answer) {
             `  ${colors.dim}Type: ${answer.packageType || 'unknown'}${colors.reset}`,
             `  ${colors.dim}Status: Not found in registry${colors.reset}`,
             '',
+            '  To scan it locally:',
+            `    ${colors.cyan}ai-trust check ${answer.name} --scan-if-missing${colors.reset}`,
+            '',
+            '  Or scan your full project:',
+            `    ${colors.cyan}npx hackmyagent secure .${colors.reset}`,
+            '',
         ].join('\n');
     }
+    const normalized = normalizeTrustVerdict(answer.verdict);
     const vc = trustVerdictColor(answer.verdict);
     const tc = trustLevelColor(answer.trustLevel);
+    const scoreDisplay = formatTrustScore(answer.trustScore, answer.scanStatus);
+    const isUnscanned = scoreDisplay === 'Not scanned';
     const lines = [
         '',
         `  ${answer.name}`,
         `  Type:           ${answer.packageType || 'unknown'}`,
-        `  Verdict:        ${vc}${answer.verdict.toUpperCase()}${colors.reset}`,
+        `  Verdict:        ${vc}${normalized.toUpperCase()}${colors.reset}`,
         `  Trust Level:    ${tc}${trustLevelLabel(answer.trustLevel)}${colors.reset} (${answer.trustLevel}/4)`,
-        `  Trust Score:    ${Math.round(answer.trustScore * 100)}/100`,
-        `  Scan Status:    ${answer.scanStatus || 'unknown'}`,
+        `  Trust Score:    ${isUnscanned ? colors.dim + scoreDisplay + colors.reset : scoreDisplay}`,
     ];
+    const conf = formatTrustConfidence(answer.confidence);
+    if (conf)
+        lines.push(`  Confidence:     ${conf}`);
+    const scanAge = formatTrustScanAge(answer.lastScannedAt);
+    if (scanAge) {
+        lines.push(`  Last Scanned:   ${scanAge.includes('stale') ? colors.yellow + scanAge + colors.reset : scanAge}`);
+    }
+    else if (!isUnscanned) {
+        lines.push(`  Scan Status:    ${answer.scanStatus || 'unknown'}`);
+    }
+    if (isUnscanned) {
+        lines.push('');
+        lines.push(`  ${colors.yellow}This package has not been security-scanned.${colors.reset}`);
+        lines.push(`  ${colors.yellow}Trust level reflects registry listing only.${colors.reset}`);
+    }
     if (answer.dependencies && answer.dependencies.totalDeps > 0) {
         const deps = answer.dependencies;
         lines.push('');
@@ -4922,7 +4986,7 @@ function formatTrustBatch(response, minTrust) {
     lines.push('');
     lines.push(`  Trust Audit: ${response.meta.total} packages queried, ${response.meta.found} found, ${response.meta.notFound} not found`);
     lines.push('');
-    const nameW = 40, typeW = 14, verdictW = 10, levelW = 12, scoreW = 8, scanW = 10;
+    const nameW = 40, typeW = 14, verdictW = 10, levelW = 12, scoreW = 14, scanW = 10;
     lines.push('  ' +
         'PACKAGE'.padEnd(nameW) +
         'TYPE'.padEnd(typeW) +
@@ -4932,17 +4996,29 @@ function formatTrustBatch(response, minTrust) {
         'SCAN'.padEnd(scanW));
     lines.push('  ' + '-'.repeat(nameW + typeW + verdictW + levelW + scoreW + scanW));
     for (const result of response.results) {
-        const vc = trustVerdictColor(result.verdict);
-        const tc = trustLevelColor(result.trustLevel);
         const name = result.name.length > nameW - 2
             ? result.name.substring(0, nameW - 5) + '...'
             : result.name;
+        if (!result.found) {
+            lines.push('  ' +
+                name.padEnd(nameW) +
+                '-'.padEnd(typeW) +
+                colors.dim + 'NO DATA'.padEnd(verdictW) + colors.reset +
+                colors.dim + '-'.padEnd(levelW) + colors.reset +
+                '-'.padEnd(scoreW) +
+                '-'.padEnd(scanW));
+            continue;
+        }
+        const normalized = normalizeTrustVerdict(result.verdict);
+        const vc = trustVerdictColor(result.verdict);
+        const tc = trustLevelColor(result.trustLevel);
+        const scoreDisplay = formatTrustScore(result.trustScore, result.scanStatus);
         lines.push('  ' +
             name.padEnd(nameW) +
             (result.packageType || '-').padEnd(typeW) +
-            vc + result.verdict.toUpperCase().padEnd(verdictW) + colors.reset +
+            vc + normalized.toUpperCase().padEnd(verdictW) + colors.reset +
             tc + trustLevelLabel(result.trustLevel).padEnd(levelW) + colors.reset +
-            (result.found ? `${Math.round(result.trustScore * 100)}/100` : '-').padEnd(scoreW) +
+            scoreDisplay.padEnd(scoreW) +
             (result.scanStatus || '-').padEnd(scanW));
     }
     const belowThreshold = response.results.filter((r) => r.found && r.trustLevel < minTrust);
@@ -4955,14 +5031,24 @@ function formatTrustBatch(response, minTrust) {
         }
     }
     if (notFound.length > 0) {
-        lines.push(`  ${colors.dim}[?] ${notFound.length} package(s) not found in registry:${colors.reset}`);
+        lines.push(`  ${colors.yellow}[?] ${notFound.length} package(s) not found in registry (no trust data):${colors.reset}`);
         for (const pkg of notFound) {
-            lines.push(`  ${colors.dim}    - ${pkg.name}${colors.reset}`);
+            lines.push(`  ${colors.yellow}    - ${pkg.name}${colors.reset}`);
         }
     }
     if (belowThreshold.length === 0 && notFound.length === 0) {
         lines.push(`  ${colors.green}All ${response.meta.found} packages meet minimum trust level ${minTrust}.${colors.reset}`);
     }
+    // Next steps
+    lines.push('');
+    if (notFound.length > 0) {
+        lines.push(`  ${colors.dim}Scan unknown packages: ai-trust audit <file> --scan-missing${colors.reset}`);
+        lines.push(`  ${colors.dim}Or individually: ai-trust check <name> --scan-if-missing${colors.reset}`);
+    }
+    if (belowThreshold.length > 0) {
+        lines.push(`  ${colors.dim}Inspect flagged packages: ai-trust check <name>${colors.reset}`);
+    }
+    lines.push(`  ${colors.dim}Full project security scan: npx hackmyagent secure .${colors.reset}`);
     lines.push('');
     return lines.join('\n');
 }
@@ -5040,7 +5126,7 @@ Examples:
     .option('-t, --type <type>', 'Package type (mcp_server, a2a_agent, ai_tool, etc.)')
     .option('--audit <file>', 'Audit a dependency file (package.json or requirements.txt)')
     .option('--batch <names...>', 'Batch trust lookup for multiple packages')
-    .option('--min-trust <level>', 'Minimum trust level threshold (0-4)', '3')
+    .option('--min-trust <level>', 'Minimum trust level threshold (0-4)', '2')
     .option('--registry-url <url>', 'Registry base URL', validateRegistryUrl(REGISTRY_DEFAULT_URL))
     .option('--json', 'Output as JSON')
     .action(async (packageName, opts) => {
@@ -5074,7 +5160,8 @@ Examples:
                 process.stdout.write(formatTrustBatch(response, minTrust));
             }
             const belowThreshold = response.results.some((r) => r.found && r.trustLevel < minTrust);
-            if (belowThreshold)
+            const hasNotFound = response.results.some((r) => !r.found);
+            if (belowThreshold || hasNotFound)
                 process.exitCode = 1;
             return;
         }
@@ -5096,7 +5183,8 @@ Examples:
                 process.stdout.write(formatTrustBatch(response, minTrust));
             }
             const belowThreshold = response.results.some((r) => r.found && r.trustLevel < minTrust);
-            if (belowThreshold)
+            const hasNotFound = response.results.some((r) => !r.found);
+            if (belowThreshold || hasNotFound)
                 process.exitCode = 1;
             return;
         }
@@ -5253,6 +5341,136 @@ program
         console.log(`\n${trainingCount} training samples exported to NanoMind corpus.`);
     }
 });
+// wild: test AI agent resilience against real-world web-based attacks
+program
+    .command('wild')
+    .description(`Test AI agent resilience in the wild
+Fetches pages from AgentPwn (agentpwn.com) and analyzes hidden injection
+payloads that AI agents encounter when browsing the web. Reports which
+attack surfaces exist and computes a wild resilience score.
+Attack categories (11):
+  prompt-injection, jailbreak, data-exfiltration, capability-abuse,
+  context-manipulation, mcp-exploitation, a2a-attack,
+  memory-weaponization, context-window, supply-chain, tool-shadow
+Injection surfaces detected:
+  html-comment, invisible-span, json-ld, meta-tag, http-header,
+  aria-label, image-alt, unicode-stego
+Also tests: robots.txt, llms.txt, sitemap.xml for embedded payloads
+Examples:
+  $ hackmyagent wild
+  $ hackmyagent wild https://agentpwn.com
+  $ hackmyagent wild --category prompt-injection
+  $ hackmyagent wild --tier 5
+  $ hackmyagent wild --json
+  $ hackmyagent wild -v -o report.json`)
+    .argument('[url]', 'Target URL to scan', 'https://agentpwn.com')
+    .option('-c, --category <category>', 'Filter by attack category')
+    .option('-t, --tier <tier>', 'Filter by specific difficulty tier')
+    .option('--timeout <ms>', 'Request timeout in milliseconds', '15000')
+    .option('--delay <ms>', 'Delay between requests in milliseconds', '500')
+    .option('--json', 'Output as JSON')
+    .option('-o, --output <file>', 'Write output to file')
+    .option('--verbose', 'Show detailed output for each page')
+    .action(async (url, options) => {
+    try {
+        const scanner = new wild_1.WildScanner({
+            url: url || 'https://agentpwn.com',
+            category: options.category,
+            tier: options.tier ? parseInt(options.tier, 10) : undefined,
+            timeout: parseInt(options.timeout || '15000', 10),
+            delay: parseInt(options.delay || '500', 10),
+            verbose: options.verbose || false,
+            json: options.json || false,
+        });
+        if (!options.json) {
+            console.log(`\n${colors.cyan}HackMyAgent Wild Scanner${colors.reset}`);
+            console.log(`${'━'.repeat(50)}\n`);
+            console.log(`Target: ${url || 'https://agentpwn.com'}`);
+            if (options.category)
+                console.log(`Category: ${options.category}`);
+            if (options.tier)
+                console.log(`Tier: ${options.tier}`);
+            console.log('');
+        }
+        const report = await scanner.scan();
+        if (options.json) {
+            const output = JSON.stringify(report, null, 2);
+            if (options.output) {
+                const fs = await Promise.resolve().then(() => __importStar(require('fs')));
+                fs.writeFileSync(options.output, output);
+                process.stderr.write(`Report written to ${options.output}\n`);
+            }
+            else {
+                console.log(output);
+            }
+        }
+        else {
+            printWildReport(report);
+            if (options.output) {
+                const fs = await Promise.resolve().then(() => __importStar(require('fs')));
+                fs.writeFileSync(options.output, JSON.stringify(report, null, 2));
+                console.log(`\nJSON report written to ${options.output}`);
+            }
+        }
+        // Exit with non-zero if resilience is poor
+        if (report.resilienceRating === 'critical' || report.resilienceRating === 'poor') {
+            process.exit(1);
+        }
+    }
+    catch (error) {
+        console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
+        process.exit(1);
+    }
+});
+function printWildReport(report) {
+    // File fetches
+    console.log(`${colors.dim}File-Level Attack Surfaces${colors.reset}`);
+    for (const f of report.fileFetches) {
+        const status = f.hasPayload
+            ? `${colors.red}PAYLOAD FOUND${colors.reset}`
+            : `${colors.green}clean${colors.reset}`;
+        console.log(`  ${f.file}: ${f.statusCode} [${status}]`);
+        if (f.payloadExcerpt) {
+            console.log(`    ${colors.dim}${f.payloadExcerpt}${colors.reset}`);
+        }
+    }
+    // Page results by category
+    console.log(`\n${colors.dim}Attack Pages (${report.pagesScanned} scanned)${colors.reset}`);
+    const categories = Object.keys(report.summary.byCategory).sort();
+    for (const cat of categories) {
+        const stats = report.summary.byCategory[cat];
+        console.log(`  ${cat}: ${stats.pages} pages, ${stats.payloads} payloads`);
+    }
+    // Injection surfaces
+    console.log(`\n${colors.dim}Injection Surfaces Detected${colors.reset}`);
+    const surfaces = Object.entries(report.summary.bySurface).sort((a, b) => b[1] - a[1]);
+    for (const [surface, count] of surfaces) {
+        console.log(`  ${surface}: ${count}`);
+    }
+    // Score
+    const scoreColor = report.wildResilienceScore >= 60
+        ? colors.green
+        : report.wildResilienceScore >= 40
+            ? colors.yellow
+            : colors.red;
+    console.log(`\n${'━'.repeat(50)}`);
+    console.log(`\n${colors.dim}Wild Resilience Score:${colors.reset} ${scoreColor}${report.wildResilienceScore}/100 (${report.resilienceRating})${colors.reset}`);
+    console.log(`${colors.dim}Pages Scanned:${colors.reset} ${report.pagesScanned}`);
+    console.log(`${colors.dim}Total Payloads:${colors.reset} ${report.summary.totalPayloads}`);
+    console.log(`${colors.dim}Callback Pages:${colors.reset} ${report.summary.callbackPages}`);
+    console.log(`${colors.dim}Canary Pages:${colors.reset} ${report.summary.canaryPages}`);
+    console.log(`${colors.dim}Max Tier:${colors.reset} ${report.summary.maxTier}`);
+    console.log(`${colors.dim}Duration:${colors.reset} ${(report.duration / 1000).toFixed(1)}s`);
+    console.log(`\n${colors.dim}Note: This score reflects the attack surface coverage of the target`);
+    console.log(`site. To test your actual agent's resilience, use --model to pipe`);
+    console.log(`page content through an LLM. For static config scanning, use:${colors.reset}`);
+    console.log(`  ${colors.cyan}npx hackmyagent secure${colors.reset}`);
+}
 // create-skill: generate best-practice, secured skills from plain English
 program
     .command('create-skill')
@@ -5264,11 +5482,12 @@ program
     const { writeSkill } = await Promise.resolve().then(() => __importStar(require('./skills/builder.js')));
     console.log(`\nGenerating secured skill...\n`);
     const result = writeSkill({ purpose: description, name: options.name, outputDir: options.output });
-    console.log(`Created ${result.dirName}/`);
+    const outputDir = options.output ?? result.dirName;
+    console.log(`Created ${outputDir}/`);
     for (const file of result.filesWritten) {
         console.log(`  ${file.split('/').pop()}`);
     }
-    console.log(`\nYour skill is ready. Verify security with: hackmyagent secure ${result.dirName}/`);
+    console.log(`\nYour skill is ready. Verify security with: hackmyagent secure ${outputDir}/`);
 });
 // Self-securing: verify own integrity before running any command
 // A security tool that doesn't verify itself is worse than no security tool