npm - hackmyagent - Versions diffs - 0.9.3 → 0.9.5 - Mend

hackmyagent 0.9.3 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +65 -0
package/dist/cli.js +76 -27
package/dist/cli.js.map +1 -1
package/dist/index.d.ts +3 -3
package/dist/index.d.ts.map +1 -1
package/dist/index.js +3 -2
package/dist/index.js.map +1 -1
package/dist/soul/index.d.ts +2 -2
package/dist/soul/index.d.ts.map +1 -1
package/dist/soul/index.js +2 -1
package/dist/soul/index.js.map +1 -1
package/dist/soul/scanner.d.ts +39 -3
package/dist/soul/scanner.d.ts.map +1 -1
package/dist/soul/scanner.js +350 -106
package/dist/soul/scanner.js.map +1 -1
package/dist/soul/templates.d.ts +4 -0
package/dist/soul/templates.d.ts.map +1 -1
package/dist/soul/templates.js +217 -0
package/dist/soul/templates.js.map +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -233,6 +233,71 @@ Output formats: `text`, `json`, `sarif`, `html`, `asp` (Agent Security Profile).
 ---
+### `hackmyagent secure -b oasb-2`
+Run OASB-2 composite assessment: infrastructure security (OASB-1, 50%) combined with behavioral governance (scan-soul, 50%) for a unified score.
+```bash
+hackmyagent secure -b oasb-2              # full composite assessment
+hackmyagent secure -b oasb-2 --json       # JSON output
+hackmyagent secure -b oasb-2 --fail-below 60  # CI gate
+```
+Output shows infrastructure score, governance score, composite score, and conformance level. Requires a SOUL.md (or equivalent governance file) in the scanned directory.
+---
+### `hackmyagent scan-soul`
+Scan a SOUL.md (or equivalent governance file) against OASB v2 behavioral governance controls. Checks 8 domains and up to 68 controls depending on agent tier.
+```bash
+hackmyagent scan-soul                     # scan current directory
+hackmyagent scan-soul ./my-agent          # scan specific directory
+hackmyagent scan-soul --tier MULTI-AGENT  # override tier detection
+hackmyagent scan-soul --json              # JSON output for CI
+hackmyagent scan-soul --verbose           # show individual control results
+hackmyagent scan-soul --deep              # LLM semantic analysis (requires ANTHROPIC_API_KEY)
+hackmyagent scan-soul --fail-below 60     # exit 1 if score below threshold
+```
+Auto-detects governance file in priority order: `SOUL.md` > `system-prompt.md` > `CLAUDE.md` > `.cursorrules` > `agent-config.yaml` and others.
+Tier-to-control counts:
+| Tier | Controls | Use case |
+|------|----------|----------|
+| `BASIC` | 27 | Chatbots with no tool access |
+| `TOOL-USING` | 54 | Agents with tool/function calling |
+| `AGENTIC` | 65 | Autonomous multi-step agents |
+| `MULTI-AGENT` | 68 | Orchestrators and sub-agent systems |
+Conformance levels:
+| Level | Criteria |
+|-------|----------|
+| `none` | A critical control (SOUL-IH-003 or SOUL-HB-001) is missing — grade capped at C |
+| `essential` | All critical controls pass |
+| `standard` | All critical + high controls pass, score ≥ 60 |
+| `hardened` | All controls pass, score ≥ 75 |
+---
+### `hackmyagent harden-soul`
+Generate a SOUL.md, or add missing governance sections to an existing one. Existing content is always preserved.
+```bash
+hackmyagent harden-soul                   # add missing sections
+hackmyagent harden-soul --dry-run         # preview without writing
+hackmyagent harden-soul ./my-agent        # target specific directory
+hackmyagent harden-soul --json            # JSON output
+```
+Generates template content for each missing OASB v2 governance domain. Run `scan-soul` after to verify coverage improved.
+---
 ### `hackmyagent fix-all`
 Run all security plugins in sequence: credential vault, file signing, skill guard. Applies fixes and generates a report.

package/dist/cli.js CHANGED Viewed

@@ -3591,6 +3591,35 @@ function gradeColor(grade) {
         case 'F': return colors.brightRed;
     }
 }
+function levelColor(level) {
+    switch (level) {
+        case 'hardened': return colors.green;
+        case 'standard': return colors.green;
+        case 'developing': return colors.yellow;
+        case 'initial': return colors.cyan;
+        case 'not-started': return colors.reset;
+    }
+}
+function levelLabel(level) {
+    switch (level) {
+        case 'hardened': return 'Hardened';
+        case 'standard': return 'Standard';
+        case 'developing': return 'Developing';
+        case 'initial': return 'Initial';
+        case 'not-started': return 'Not Started';
+    }
+}
+/**
+ * Detect how the CLI was invoked to suggest correct command prefix.
+ */
+function getCommandPrefix() {
+    const execPath = process.argv[1] || '';
+    if (execPath.includes('npx') || execPath.includes('.npm/_npx') ||
+        execPath.includes('node_modules/.bin')) {
+        return 'npx hackmyagent';
+    }
+    return 'hackmyagent';
+}
 // Domain percentage bar for text output
 function domainBar(pct) {
     if (pct >= 80)
@@ -3613,31 +3642,29 @@ Searches for governance files in priority order:
   > .github/copilot-instructions.md > CLAUDE.md > .clinerules
   > instructions.md > constitution.md > agent-config.yaml
-Domains checked (OASB v2):
-  7. Trust Hierarchy         8. Capability Boundaries
-  9. Injection Hardening    10. Data Handling
-  11. Hardcoded Behaviors   12. Agentic Safety
-  13. Honesty & Transparency 14. Human Oversight
-Grade: A (80-100), B (60-79), C (40-59), D (20-39), F (0-19)
-Critical floor: Missing SOUL-IH-003 or SOUL-HB-001 caps grade at C.
+Agent profiles filter domains by agent purpose:
+  conversational:  Injection, Hardcoded, Honesty
+  code-assistant:  + Trust, Data
+  tool-agent:      + Capability, Oversight
+  autonomous:      + Agentic Safety
+  orchestrator:    All 8 domains
-Conformance levels:
-  none:      one or more critical controls missing
-  essential: all critical controls pass, score < 60
-  standard:  all critical controls pass, score >= 60
-  hardened:  all critical controls pass, score >= 75
+Maturity levels:
+  Hardened (80+), Standard (60-79), Developing (40-59),
+  Initial (1-39), Not Started (0)
 Examples:
   $ hackmyagent scan-soul                    Scan current directory
   $ hackmyagent scan-soul ./my-agent         Scan specific directory
   $ hackmyagent scan-soul --json             Machine-readable output
   $ hackmyagent scan-soul --verbose          Show all controls
+  $ hackmyagent scan-soul --profile conversational  Override profile
   $ hackmyagent scan-soul --deep             Enable LLM semantic analysis`)
     .argument('[directory]', 'Directory to scan (defaults to current directory)', '.')
     .option('--json', 'Output as JSON')
     .option('-v, --verbose', 'Show individual control results')
     .option('--tier <tier>', 'Override agent tier detection (BASIC, TOOL-USING, AGENTIC, MULTI-AGENT)')
+    .option('--profile <profile>', 'Override agent profile (conversational, code-assistant, tool-agent, autonomous, orchestrator, custom)')
     .option('--fail-below <score>', 'Exit 1 if score below threshold (0-100)')
     .option('--deep', 'Enable LLM semantic analysis for ambiguous controls (requires claude CLI or ANTHROPIC_API_KEY)')
     .action(async (directory, options) => {
@@ -3647,10 +3674,12 @@ Examples:
             process.stderr.write(`Error: Directory '${targetDir}' does not exist.\n`);
             process.exit(1);
         }
+        const prefix = getCommandPrefix();
         const scanner = new index_1.SoulScanner();
         const result = await scanner.scanSoul(targetDir, {
             verbose: options.verbose,
             tier: options.tier,
+            profile: options.profile,
             deepAnalysis: options.deep,
         });
         // JSON output
@@ -3675,9 +3704,23 @@ Examples:
             process.stdout.write(`File: ${colors.red}No governance file found${colors.reset}\n`);
             process.stdout.write(`  Searched: ${['SOUL.md', 'system-prompt.md', 'CLAUDE.md', '...'].join(', ')}\n`);
         }
-        process.stdout.write(`Agent Tier: ${result.agentTier} (auto-detected)\n\n`);
+        const tierLabel = result.tierForced ? `${result.agentTier} (--tier flag)` : `${result.agentTier} (auto-detected)`;
+        const profileLabel = result.profileForced ? `${result.agentProfile} (--profile flag)` : `${result.agentProfile} (auto-detected)`;
+        process.stdout.write(`Agent Tier: ${tierLabel}\n`);
+        process.stdout.write(`Agent Profile: ${profileLabel}\n`);
+        if (result.skippedDomains.length > 0) {
+            process.stdout.write(`Skipped Domains: ${result.skippedDomains.join(', ')}\n`);
+        }
+        process.stdout.write('\n');
         process.stdout.write('Domain Scores:\n');
         for (const domain of result.domains) {
+            if (domain.skippedByProfile) {
+                if (options.verbose) {
+                    const label = (domain.domain + ':').padEnd(26);
+                    process.stdout.write(`  ${label}${colors.reset}--  (skipped by profile)${colors.reset}\n`);
+                }
+                continue;
+            }
             const pctColor = domainBar(domain.percentage);
             const label = (domain.domain + ':').padEnd(26);
             process.stdout.write(`  ${label}${pctColor}${domain.passed}/${domain.total}  (${domain.percentage}%)${colors.reset}\n`);
@@ -3692,9 +3735,9 @@ Examples:
             }
         }
         process.stdout.write('\n');
-        // Score and grade
-        const gc = gradeColor(result.grade);
-        process.stdout.write(`Governance Score: ${gc}${result.score}/100 (Grade: ${result.grade})${colors.reset}\n`);
+        // Score and level (progress-oriented)
+        const lc = levelColor(result.level);
+        process.stdout.write(`Governance Score: ${lc}${result.score}/100 [${levelLabel(result.level)}]${colors.reset}\n`);
         // Conformance level
         if (result.conformance === 'none') {
             process.stdout.write(`Conformance: ${colors.red}NONE${colors.reset} -- critical control missing (${result.criticalMissing.join(', ')})\n`);
@@ -3706,17 +3749,19 @@ Examples:
             process.stdout.write(`${colors.yellow}Critical Floor: APPLIED${colors.reset} (${result.criticalMissing.join(', ')} missing)\n`);
         }
         // Deep analysis summary
-        if (result.deepAnalysisResults && result.deepAnalysisResults.length > 0) {
+        if (result.deepAnalysisAvailable === false) {
+            process.stdout.write(`${colors.yellow}Deep Analysis: unavailable${colors.reset} -- set ANTHROPIC_API_KEY or install the claude CLI\n`);
+        }
+        else if (result.deepAnalysisResults && result.deepAnalysisResults.length > 0) {
             const llmUpgraded = result.deepAnalysisResults.filter((e) => e.llmPassed).length;
-            if (llmUpgraded > 0) {
-                process.stdout.write(`Deep Analysis: ${llmUpgraded} control${llmUpgraded === 1 ? '' : 's'} upgraded by LLM semantic analysis\n`);
-            }
+            process.stdout.write(`Deep Analysis: ${llmUpgraded} control${llmUpgraded === 1 ? '' : 's'} upgraded by LLM semantic analysis\n`);
         }
-        // Path forward
+        // Path forward (recovery-oriented, not punitive)
         const missing = result.totalControls - result.totalPassed;
         if (missing > 0) {
-            process.stdout.write(`\n${missing} control${missing === 1 ? '' : 's'} missing.`);
-            process.stdout.write(` Run '${colors.cyan}hackmyagent harden-soul${colors.reset}' to remediate.\n`);
+            const recoverable = Math.min(100 - result.score, 100);
+            process.stdout.write(`\n  Path forward: +${recoverable} recoverable by addressing ${missing} control${missing === 1 ? '' : 's'}`);
+            process.stdout.write(`\n  Run '${colors.cyan}${prefix} harden-soul${colors.reset}' to remediate.\n`);
         }
         else {
             process.stdout.write(`\n${colors.green}All ${result.totalControls} governance controls covered.${colors.reset}\n`);
@@ -3742,6 +3787,8 @@ program
 Runs scan-soul internally to identify missing controls, then generates
 template content for each missing domain. Existing content is preserved.
+Supports iterative hardening: if a domain heading exists but controls
+fail within it, appends targeted remediation for those controls.
 Modes:
   Default:    Append missing sections to SOUL.md (or create it)
@@ -3754,6 +3801,7 @@ Examples:
   $ hackmyagent harden-soul --json           Machine-readable output`)
     .argument('[directory]', 'Directory to harden (defaults to current directory)', '.')
     .option('--dry-run', 'Preview changes without modifying files')
+    .option('--profile <profile>', 'Override agent profile (conversational, code-assistant, tool-agent, autonomous, orchestrator, custom)')
     .option('--json', 'Output as JSON')
     .action(async (directory, options) => {
     try {
@@ -3762,8 +3810,9 @@ Examples:
             process.stderr.write(`Error: Directory '${targetDir}' does not exist.\n`);
             process.exit(1);
         }
+        const prefix = getCommandPrefix();
         const scanner = new index_1.SoulScanner();
-        const result = await scanner.hardenSoul(targetDir, { dryRun: options.dryRun });
+        const result = await scanner.hardenSoul(targetDir, { dryRun: options.dryRun, profile: options.profile });
         // JSON output
         if (options.json) {
             // Exclude full content from JSON to keep it concise
@@ -3780,7 +3829,7 @@ Examples:
         // Text output
         if (result.sectionsAdded.length === 0) {
             process.stdout.write(`\n${colors.green}All governance domains already have sections in ${result.file}.${colors.reset}\n`);
-            process.stdout.write(`Run 'hackmyagent scan-soul --verbose' to see individual control coverage.\n\n`);
+            process.stdout.write(`Run '${prefix} scan-soul --verbose' to see individual control coverage.\n\n`);
             return;
         }
         if (result.dryRun) {
@@ -3815,7 +3864,7 @@ Examples:
                 process.stdout.write(`  ${colors.green}+${colors.reset} ${section}\n`);
             }
             process.stdout.write(`Controls covered: +${result.controlsAdded}\n\n`);
-            process.stdout.write(`Run '${colors.cyan}hackmyagent scan-soul${colors.reset}' to verify coverage.\n\n`);
+            process.stdout.write(`Run '${colors.cyan}${prefix} scan-soul${colors.reset}' to verify coverage.\n\n`);
         }
     }
     catch (error) {