npm - pentesting - Versions diffs - 0.51.1 → 0.52.1 - Mend

pentesting 0.51.1 → 0.52.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/main.js +191 -21
package/package.json +3 -3

package/dist/main.js CHANGED Viewed

@@ -180,11 +180,15 @@ var MEMORY_LIMITS = {
   WORKING_MEMORY_MAX_ENTRIES: 20,
   /** Maximum events in episodic memory (session timeline) */
   EPISODIC_MEMORY_MAX_EVENTS: 100,
-  /** Consecutive failures before SWITCH VECTOR warning */
+  /** Consecutive failures before prompting to change approach or parameters */
   CONSECUTIVE_FAIL_THRESHOLD: 3,
   /** Maximum learned techniques in DynamicTechniqueLibrary */
   DYNAMIC_TECHNIQUES_MAX: 50,
-  /** Number of leading words to match for duplicate command detection */
+  /** Confidence decay per failure for DynamicTechniqueLibrary learned techniques */
+  TECHNIQUE_FAILURE_DECAY: 30,
+  /** Auto-prune threshold: techniques below this confidence are discarded */
+  TECHNIQUE_PRUNE_THRESHOLD: 10,
+  /** @deprecated Superseded by fingerprint-based matching in extractFingerprint(). Kept for reference. */
   COMMAND_MATCH_WORDS: 3,
   /** Maximum unverified techniques to show in prompt */
   PROMPT_UNVERIFIED_TECHNIQUES: 10,
@@ -345,7 +349,7 @@ var ORPHAN_PROCESS_NAMES = [
 // src/shared/constants/agent.ts
 var APP_NAME = "Pentest AI";
-var APP_VERSION = "0.51.1";
+var APP_VERSION = "0.52.1";
 var APP_DESCRIPTION = "Autonomous Penetration Testing AI Agent";
 var LLM_ROLES = {
   SYSTEM: "system",
@@ -3144,6 +3148,57 @@ var AttackGraph = class {
 // src/shared/utils/agent-memory.ts
 import { existsSync as existsSync4, readFileSync as readFileSync3, writeFileSync as writeFileSync4 } from "fs";
 import { join as join3 } from "path";
+function extractFingerprint(tool, command) {
+  const cmd = command || "";
+  let effectiveTool = tool.toLowerCase();
+  if (effectiveTool === "run_cmd" || effectiveTool === "run_background") {
+    const firstWord = cmd.trim().split(/\s+/)[0];
+    if (firstWord && !firstWord.startsWith("-")) {
+      effectiveTool = firstWord.toLowerCase();
+    }
+  }
+  const targetMatch = cmd.match(
+    /(?::\/\/|@)([\w.\-]+(?::\d+)?)|\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(?::\d+)?\b/
+  );
+  const target = targetMatch?.[1] || targetMatch?.[2] || "";
+  let wordlistMatch = cmd.match(/(?:-[wPUL]\s+)([^\s]+)/);
+  if (!wordlistMatch) {
+    wordlistMatch = cmd.match(/(?:--(?:wordlist|passwords?|usernames?)(?:=|\s+))([^\s]+)/i);
+  }
+  const wordlist = wordlistMatch?.[1] || "";
+  const portMatch = cmd.match(/(?:-p\s+|--port(?:=|\s+))(\S+)/);
+  const port = portMatch?.[1] || "";
+  const flagPatterns = [
+    /--level(?:=|\s+)(\S+)/i,
+    // sqlmap
+    /--risk(?:=|\s+)(\S+)/i,
+    // sqlmap
+    /-s([VSTCAUOPMX]+)/,
+    // nmap scan type
+    /--script(?:=|\s+)(\S+)/i,
+    // nmap scripts
+    /-[el]\s+(\S+)/i,
+    // hydra login/password single
+    /--method(?:=|\s+)(\S+)/i
+    // HTTP method
+  ];
+  const flags = flagPatterns.map((p) => {
+    const m = cmd.match(p);
+    return m?.[0]?.trim() || "";
+  }).filter(Boolean).join(" ");
+  return { tool: effectiveTool, target, wordlist, flags, port };
+}
+function fingerprintsMatch(a, b) {
+  return a.tool === b.tool && a.target === b.target && a.wordlist === b.wordlist && a.flags === b.flags && a.port === b.port;
+}
+function formatFingerprint(fp) {
+  const parts = [fp.tool];
+  if (fp.target) parts.push(`\u2192${fp.target}`);
+  if (fp.wordlist) parts.push(`[wordlist:${fp.wordlist.split("/").pop()}]`);
+  if (fp.port) parts.push(`[port:${fp.port}]`);
+  if (fp.flags) parts.push(`[${fp.flags}]`);
+  return parts.join(" ");
+}
 var WorkingMemory = class {
   entries = [];
   maxEntries = MEMORY_LIMITS.WORKING_MEMORY_MAX_ENTRIES;
@@ -3156,20 +3211,29 @@ var WorkingMemory = class {
       context,
       importance
     });
-    if (this.entries.length > this.maxEntries) {
-      this.entries.sort((a, b) => {
-        const aScore = a.importance * 0.7 + (1 - (Date.now() - a.timestamp) / 6e5) * 0.3;
-        const bScore = b.importance * 0.7 + (1 - (Date.now() - b.timestamp) / 6e5) * 0.3;
-        return bScore - aScore;
-      });
-      this.entries = this.entries.slice(0, this.maxEntries);
-    }
+    this.pruneIfNeeded();
   }
   /**
-   * Record a failed attempt to avoid repeating it.
+   * Record a failed attempt with structured fingerprint.
+   *
+   * WHY: Fingerprint-based recording ensures that "hydra with rockyou.txt"
+   * and "hydra with darkweb2017.txt" are stored as distinct attempts.
+   * The LLM can see what parameter combinations have been tried and pick new ones.
    */
   recordFailure(tool, command, error) {
-    this.add("failure", `FAILED: ${tool} \u2192 ${command.slice(0, DISPLAY_LIMITS.COMMAND_PREVIEW)} \u2192 ${error.slice(0, DISPLAY_LIMITS.ERROR_PREVIEW)}`, 0.8, { tool, command });
+    const fp = extractFingerprint(tool, command);
+    const fpLabel = formatFingerprint(fp);
+    const entry = {
+      id: `wm_${Date.now()}_${Math.random().toString(36).slice(2, 6)}`,
+      timestamp: Date.now(),
+      category: "failure",
+      content: `FAILED: ${fpLabel} \u2192 ${error.slice(0, DISPLAY_LIMITS.ERROR_PREVIEW)}`,
+      context: { tool, command },
+      importance: 0.8,
+      fingerprint: fp
+    };
+    this.entries.push(entry);
+    this.pruneIfNeeded();
   }
   /**
    * Record a successful action for reference.
@@ -3178,16 +3242,53 @@ var WorkingMemory = class {
     this.add("success", `SUCCESS: ${tool} \u2192 ${command.slice(0, DISPLAY_LIMITS.COMMAND_PREVIEW)}`, 0.6, { tool, result: result2.slice(0, DISPLAY_LIMITS.OUTPUT_SUMMARY) });
   }
   /**
-   * Check if a similar command has already failed.
+   * Check if the EXACT same attack attempt (all parameters identical) has failed.
+   *
+   * WHY: Only blocks truly duplicate attempts. Different wordlists, flags, or
+   * ports are treated as new attacks and allowed through.
    */
-  hasFailedBefore(command) {
-    const lower = command.toLowerCase().split(/\s+/).slice(0, MEMORY_LIMITS.COMMAND_MATCH_WORDS).join(" ");
+  hasFailedBefore(command, tool) {
+    const effectiveTool = tool || command.split(/\s+/)[0] || "";
+    const fp = extractFingerprint(effectiveTool, command);
     return this.entries.find(
-      (e) => e.category === "failure" && e.content.toLowerCase().includes(lower)
+      (e) => e.category === "failure" && e.fingerprint != null && fingerprintsMatch(e.fingerprint, fp)
     );
   }
   /**
-   * Get count of consecutive failures (for vector switch detection).
+   * Get all previous attempts (success & failure) for a specific tool+target vector.
+   * Returns the full history so the LLM can see what parameter combinations were tried.
+   *
+   * TODO: Wire up in tools.ts to pass vector history to strategist for smarter retries.
+   */
+  getAttemptsForVector(tool, target) {
+    const lowerTool = tool.toLowerCase();
+    return this.entries.filter((e) => {
+      if (e.fingerprint) {
+        const matchTool = e.fingerprint.tool === lowerTool;
+        const matchTarget = !target || e.fingerprint.target.includes(target);
+        return matchTool && matchTarget;
+      }
+      return String(e.context.tool || "").toLowerCase() === lowerTool;
+    });
+  }
+  /** Internal prune helper (used by both add() and recordFailure()) */
+  pruneIfNeeded() {
+    if (this.entries.length > this.maxEntries) {
+      this.entries.sort((a, b) => {
+        const aScore = a.importance * 0.7 + (1 - (Date.now() - a.timestamp) / 6e5) * 0.3;
+        const bScore = b.importance * 0.7 + (1 - (Date.now() - b.timestamp) / 6e5) * 0.3;
+        return bScore - aScore;
+      });
+      this.entries = this.entries.slice(0, this.maxEntries);
+    }
+  }
+  /**
+   * Get count of consecutive failures on the SAME attack vector.
+   *
+   * WHY: Switching from "hydra with wordlist A" to "hydra with wordlist B"
+   * is a legitimate new approach. The consecutive-failure counter should
+   * only trigger when every recent attempt is against the same tool+target
+   * with no variation producing success. This prevents premature vector abandonment.
    */
   getConsecutiveFailures() {
     let count = 0;
@@ -3197,6 +3298,26 @@ var WorkingMemory = class {
     }
     return count;
   }
+  /**
+   * Get consecutive failures for a specific vector (tool+target).
+   * Returns count of sequential failures where all attempts used the same tool+target.
+   *
+   * TODO: Wire up in tools.ts for per-vector threshold checks alongside global consecutive count.
+   */
+  getConsecutiveVectorFailures(tool, target) {
+    const lowerTool = tool.toLowerCase();
+    let count = 0;
+    for (let i = this.entries.length - 1; i >= 0; i--) {
+      const e = this.entries[i];
+      if (e.category !== "failure") break;
+      if (e.fingerprint && e.fingerprint.tool === lowerTool && (!target || e.fingerprint.target.includes(target))) {
+        count++;
+      } else {
+        break;
+      }
+    }
+    return count;
+  }
   /**
    * Format for prompt injection.
    */
@@ -3207,14 +3328,20 @@ var WorkingMemory = class {
     const insights = this.entries.filter((e) => e.category === "insight" || e.category === "discovery");
     const lines = ["<working-memory>"];
     if (failures.length > 0) {
-      lines.push(`\u26A0\uFE0F FAILED ATTEMPTS (${failures.length} \u2014 DO NOT REPEAT):`);
+      lines.push(`\u26A0\uFE0F FAILED ATTEMPTS (${failures.length} \u2014 DO NOT REPEAT EXACT SAME PARAMS):`);
       for (const f of failures.slice(-DISPLAY_LIMITS.RECENT_FAILURES)) {
-        lines.push(`  \u2717 ${f.content}`);
+        const fp = f.fingerprint;
+        if (fp) {
+          lines.push(`  \u2717 ${formatFingerprint(fp)} \u2192 ${f.content.split("\u2192").pop()?.trim() || ""}`);
+        } else {
+          lines.push(`  \u2717 ${f.content}`);
+        }
       }
+      lines.push(...this.buildAttackCoverageLines(failures));
     }
     const consecutiveFails = this.getConsecutiveFailures();
     if (consecutiveFails >= MEMORY_LIMITS.CONSECUTIVE_FAIL_THRESHOLD) {
-      lines.push(`\u{1F534} ${consecutiveFails} CONSECUTIVE FAILURES \u2014 SWITCH ATTACK VECTOR NOW`);
+      lines.push(`\u{1F534} ${consecutiveFails} CONSECUTIVE FAILURES \u2014 consider changing approach or parameters`);
     }
     if (successes.length > 0) {
       lines.push(`\u2705 RECENT SUCCESSES (${successes.length}):`);
@@ -3231,6 +3358,29 @@ var WorkingMemory = class {
     lines.push("</working-memory>");
     return lines.join("\n");
   }
+  /**
+   * Build ATTACK COVERAGE lines grouped by tool+target.
+   * WHY: Shows the LLM which parameter combinations have been tried per vector,
+   * so it can pick genuinely untried variations instead of repeating.
+   */
+  buildAttackCoverageLines(failures) {
+    const vectorMap = /* @__PURE__ */ new Map();
+    for (const f of failures) {
+      if (f.fingerprint) {
+        const key = `${f.fingerprint.tool}\u2192${f.fingerprint.target}`;
+        const detail = f.fingerprint.wordlist ? f.fingerprint.wordlist.split("/").pop() || "" : f.fingerprint.flags || "default";
+        const existing = vectorMap.get(key) || [];
+        if (!existing.includes(detail)) existing.push(detail);
+        vectorMap.set(key, existing);
+      }
+    }
+    if (vectorMap.size === 0) return [];
+    const lines = [`  \u{1F4CA} ATTACK COVERAGE (tried variations):`];
+    for (const [vector, variations] of vectorMap) {
+      lines.push(`    ${vector}: tried [${variations.join(", ")}] \u2014 try DIFFERENT params`);
+    }
+    return lines;
+  }
   getEntries() {
     return [...this.entries];
   }
@@ -3467,6 +3617,25 @@ var DynamicTechniqueLibrary = class {
       }
     }
   }
+  /**
+   * Record that a technique failed in practice — decrease confidence.
+   *
+   * WHY: Web search results can contain outdated, wrong, or irrelevant techniques.
+   * If the agent tries a learned technique and it fails, we must degrade its
+   * confidence so the agent stops wasting time on bad advice.
+   * After enough failures the technique is auto-pruned entirely.
+   */
+  recordFailure(techniqueSubstring) {
+    if (!techniqueSubstring || techniqueSubstring.length < 3) return;
+    const lower = techniqueSubstring.toLowerCase();
+    this.techniques = this.techniques.filter((t) => {
+      if (t.technique.toLowerCase().includes(lower)) {
+        t.confidence -= MEMORY_LIMITS.TECHNIQUE_FAILURE_DECAY;
+        return t.confidence > MEMORY_LIMITS.TECHNIQUE_PRUNE_THRESHOLD;
+      }
+      return true;
+    });
+  }
   /**
    * Get techniques relevant to a specific service/technology.
    */
@@ -8924,6 +9093,7 @@ var ToolRegistry = class {
       this.state.workingMemory.recordSuccess(toolCall.name, command, result2.output || "");
     } else {
       this.state.workingMemory.recordFailure(toolCall.name, command, result2.error || "Unknown error");
+      this.state.dynamicTechniques.recordFailure(command);
     }
     if (result2.success && result2.output) {
       const structured = autoExtractStructured(toolCall.name, result2.output);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pentesting",
-  "version": "0.51.1",
+  "version": "0.52.1",
   "description": "Autonomous Penetration Testing AI Agent",
   "type": "module",
   "main": "dist/main.js",
@@ -19,7 +19,7 @@
     "dev:tsx": "tsx src/platform/tui/main.tsx",
     "build": "tsup",
     "start": "node dist/main.js",
-    "test": "mkdir -p /tmp/pentesting-vitest && TMPDIR=/tmp/pentesting-vitest vitest run",
+    "test": "mkdir -p .pentesting && TMPDIR=.pentesting vitest run",
     "test:watch": "vitest",
     "lint": "tsc --noEmit",
     "prepublishOnly": "npm run build",
@@ -29,7 +29,7 @@
     "release:minor": "npm version minor && npm run build && npm run publish:token",
     "release:major": "npm version major && npm run build && npm run publish:token",
     "release:docker": "docker buildx build --platform linux/amd64,linux/arm64 -t agnusdei1207/pentesting:latest --push . && docker system prune -af",
-    "check": "TMPDIR=/tmp npm run test && npm run build && npm run release:docker && bash test.sh"
+    "check": "npm run test && npm run build && npm run release:docker && bash test.sh"
   },
   "repository": {
     "type": "git",