pentesting 0.51.1 → 0.52.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/main.js +183 -21
  2. package/package.json +1 -1
package/dist/main.js CHANGED
@@ -180,11 +180,15 @@ var MEMORY_LIMITS = {
180
180
  WORKING_MEMORY_MAX_ENTRIES: 20,
181
181
  /** Maximum events in episodic memory (session timeline) */
182
182
  EPISODIC_MEMORY_MAX_EVENTS: 100,
183
- /** Consecutive failures before SWITCH VECTOR warning */
183
+ /** Consecutive failures before prompting to change approach or parameters */
184
184
  CONSECUTIVE_FAIL_THRESHOLD: 3,
185
185
  /** Maximum learned techniques in DynamicTechniqueLibrary */
186
186
  DYNAMIC_TECHNIQUES_MAX: 50,
187
- /** Number of leading words to match for duplicate command detection */
187
+ /** Confidence decay per failure for DynamicTechniqueLibrary learned techniques */
188
+ TECHNIQUE_FAILURE_DECAY: 30,
189
+ /** Auto-prune threshold: techniques below this confidence are discarded */
190
+ TECHNIQUE_PRUNE_THRESHOLD: 10,
191
+ /** @deprecated Superseded by fingerprint-based matching in extractFingerprint(). Kept for reference. */
188
192
  COMMAND_MATCH_WORDS: 3,
189
193
  /** Maximum unverified techniques to show in prompt */
190
194
  PROMPT_UNVERIFIED_TECHNIQUES: 10,
@@ -345,7 +349,7 @@ var ORPHAN_PROCESS_NAMES = [
345
349
 
346
350
  // src/shared/constants/agent.ts
347
351
  var APP_NAME = "Pentest AI";
348
- var APP_VERSION = "0.51.1";
352
+ var APP_VERSION = "0.52.0";
349
353
  var APP_DESCRIPTION = "Autonomous Penetration Testing AI Agent";
350
354
  var LLM_ROLES = {
351
355
  SYSTEM: "system",
@@ -3144,6 +3148,49 @@ var AttackGraph = class {
3144
3148
  // src/shared/utils/agent-memory.ts
3145
3149
  import { existsSync as existsSync4, readFileSync as readFileSync3, writeFileSync as writeFileSync4 } from "fs";
3146
3150
  import { join as join3 } from "path";
3151
+ function extractFingerprint(tool, command) {
3152
+ const cmd = command || "";
3153
+ const targetMatch = cmd.match(
3154
+ /(?::\/\/|@)([\w.\-]+(?::\d+)?)|\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(?::\d+)?\b/
3155
+ );
3156
+ const target = targetMatch?.[1] || targetMatch?.[2] || "";
3157
+ const wordlistMatch = cmd.match(
3158
+ /(?:-[wPUL]\s+|--(?:wordlist|passwords?|usernames?)(?:=|\s+))([^\s]+)/i
3159
+ );
3160
+ const wordlist = wordlistMatch?.[1] || "";
3161
+ const portMatch = cmd.match(/(?:-p\s+|--port(?:=|\s+))(\S+)/);
3162
+ const port = portMatch?.[1] || "";
3163
+ const flagPatterns = [
3164
+ /--level(?:=|\s+)(\S+)/i,
3165
+ // sqlmap
3166
+ /--risk(?:=|\s+)(\S+)/i,
3167
+ // sqlmap
3168
+ /-s([VSTCAUOPMX]+)/,
3169
+ // nmap scan type
3170
+ /--script(?:=|\s+)(\S+)/i,
3171
+ // nmap scripts
3172
+ /-[el]\s+(\S+)/i,
3173
+ // hydra login/password single
3174
+ /--method(?:=|\s+)(\S+)/i
3175
+ // HTTP method
3176
+ ];
3177
+ const flags = flagPatterns.map((p) => {
3178
+ const m = cmd.match(p);
3179
+ return m?.[0]?.trim() || "";
3180
+ }).filter(Boolean).join(" ");
3181
+ return { tool: tool.toLowerCase(), target, wordlist, flags, port };
3182
+ }
3183
+ function fingerprintsMatch(a, b) {
3184
+ return a.tool === b.tool && a.target === b.target && a.wordlist === b.wordlist && a.flags === b.flags && a.port === b.port;
3185
+ }
3186
+ function formatFingerprint(fp) {
3187
+ const parts = [fp.tool];
3188
+ if (fp.target) parts.push(`\u2192${fp.target}`);
3189
+ if (fp.wordlist) parts.push(`[wordlist:${fp.wordlist.split("/").pop()}]`);
3190
+ if (fp.port) parts.push(`[port:${fp.port}]`);
3191
+ if (fp.flags) parts.push(`[${fp.flags}]`);
3192
+ return parts.join(" ");
3193
+ }
3147
3194
  var WorkingMemory = class {
3148
3195
  entries = [];
3149
3196
  maxEntries = MEMORY_LIMITS.WORKING_MEMORY_MAX_ENTRIES;
@@ -3156,20 +3203,29 @@ var WorkingMemory = class {
3156
3203
  context,
3157
3204
  importance
3158
3205
  });
3159
- if (this.entries.length > this.maxEntries) {
3160
- this.entries.sort((a, b) => {
3161
- const aScore = a.importance * 0.7 + (1 - (Date.now() - a.timestamp) / 6e5) * 0.3;
3162
- const bScore = b.importance * 0.7 + (1 - (Date.now() - b.timestamp) / 6e5) * 0.3;
3163
- return bScore - aScore;
3164
- });
3165
- this.entries = this.entries.slice(0, this.maxEntries);
3166
- }
3206
+ this.pruneIfNeeded();
3167
3207
  }
3168
3208
  /**
3169
- * Record a failed attempt to avoid repeating it.
3209
+ * Record a failed attempt with structured fingerprint.
3210
+ *
3211
+ * WHY: Fingerprint-based recording ensures that "hydra with rockyou.txt"
3212
+ * and "hydra with darkweb2017.txt" are stored as distinct attempts.
3213
+ * The LLM can see what parameter combinations have been tried and pick new ones.
3170
3214
  */
3171
3215
  recordFailure(tool, command, error) {
3172
- this.add("failure", `FAILED: ${tool} \u2192 ${command.slice(0, DISPLAY_LIMITS.COMMAND_PREVIEW)} \u2192 ${error.slice(0, DISPLAY_LIMITS.ERROR_PREVIEW)}`, 0.8, { tool, command });
3216
+ const fp = extractFingerprint(tool, command);
3217
+ const fpLabel = formatFingerprint(fp);
3218
+ const entry = {
3219
+ id: `wm_${Date.now()}_${Math.random().toString(36).slice(2, 6)}`,
3220
+ timestamp: Date.now(),
3221
+ category: "failure",
3222
+ content: `FAILED: ${fpLabel} \u2192 ${error.slice(0, DISPLAY_LIMITS.ERROR_PREVIEW)}`,
3223
+ context: { tool, command },
3224
+ importance: 0.8,
3225
+ fingerprint: fp
3226
+ };
3227
+ this.entries.push(entry);
3228
+ this.pruneIfNeeded();
3173
3229
  }
3174
3230
  /**
3175
3231
  * Record a successful action for reference.
@@ -3178,16 +3234,53 @@ var WorkingMemory = class {
3178
3234
  this.add("success", `SUCCESS: ${tool} \u2192 ${command.slice(0, DISPLAY_LIMITS.COMMAND_PREVIEW)}`, 0.6, { tool, result: result2.slice(0, DISPLAY_LIMITS.OUTPUT_SUMMARY) });
3179
3235
  }
3180
3236
  /**
3181
- * Check if a similar command has already failed.
3237
+ * Check if the EXACT same attack attempt (all parameters identical) has failed.
3238
+ *
3239
+ * WHY: Only blocks truly duplicate attempts. Different wordlists, flags, or
3240
+ * ports are treated as new attacks and allowed through.
3182
3241
  */
3183
- hasFailedBefore(command) {
3184
- const lower = command.toLowerCase().split(/\s+/).slice(0, MEMORY_LIMITS.COMMAND_MATCH_WORDS).join(" ");
3242
+ hasFailedBefore(command, tool) {
3243
+ const effectiveTool = tool || command.split(/\s+/)[0] || "";
3244
+ const fp = extractFingerprint(effectiveTool, command);
3185
3245
  return this.entries.find(
3186
- (e) => e.category === "failure" && e.content.toLowerCase().includes(lower)
3246
+ (e) => e.category === "failure" && e.fingerprint != null && fingerprintsMatch(e.fingerprint, fp)
3187
3247
  );
3188
3248
  }
3189
3249
  /**
3190
- * Get count of consecutive failures (for vector switch detection).
3250
+ * Get all previous attempts (success & failure) for a specific tool+target vector.
3251
+ * Returns the full history so the LLM can see what parameter combinations were tried.
3252
+ *
3253
+ * TODO: Wire up in tools.ts to pass vector history to strategist for smarter retries.
3254
+ */
3255
+ getAttemptsForVector(tool, target) {
3256
+ const lowerTool = tool.toLowerCase();
3257
+ return this.entries.filter((e) => {
3258
+ if (e.fingerprint) {
3259
+ const matchTool = e.fingerprint.tool === lowerTool;
3260
+ const matchTarget = !target || e.fingerprint.target.includes(target);
3261
+ return matchTool && matchTarget;
3262
+ }
3263
+ return String(e.context.tool || "").toLowerCase() === lowerTool;
3264
+ });
3265
+ }
3266
+ /** Internal prune helper (used by both add() and recordFailure()) */
3267
+ pruneIfNeeded() {
3268
+ if (this.entries.length > this.maxEntries) {
3269
+ this.entries.sort((a, b) => {
3270
+ const aScore = a.importance * 0.7 + (1 - (Date.now() - a.timestamp) / 6e5) * 0.3;
3271
+ const bScore = b.importance * 0.7 + (1 - (Date.now() - b.timestamp) / 6e5) * 0.3;
3272
+ return bScore - aScore;
3273
+ });
3274
+ this.entries = this.entries.slice(0, this.maxEntries);
3275
+ }
3276
+ }
3277
+ /**
3278
+ * Get count of consecutive failures on the SAME attack vector.
3279
+ *
3280
+ * WHY: Switching from "hydra with wordlist A" to "hydra with wordlist B"
3281
+ * is a legitimate new approach. The consecutive-failure counter should
3282
+ * only trigger when every recent attempt is against the same tool+target
3283
+ * with no variation producing success. This prevents premature vector abandonment.
3191
3284
  */
3192
3285
  getConsecutiveFailures() {
3193
3286
  let count = 0;
@@ -3197,6 +3290,26 @@ var WorkingMemory = class {
3197
3290
  }
3198
3291
  return count;
3199
3292
  }
3293
+ /**
3294
+ * Get consecutive failures for a specific vector (tool+target).
3295
+ * Returns count of sequential failures where all attempts used the same tool+target.
3296
+ *
3297
+ * TODO: Wire up in tools.ts for per-vector threshold checks alongside global consecutive count.
3298
+ */
3299
+ getConsecutiveVectorFailures(tool, target) {
3300
+ const lowerTool = tool.toLowerCase();
3301
+ let count = 0;
3302
+ for (let i = this.entries.length - 1; i >= 0; i--) {
3303
+ const e = this.entries[i];
3304
+ if (e.category !== "failure") break;
3305
+ if (e.fingerprint && e.fingerprint.tool === lowerTool && (!target || e.fingerprint.target.includes(target))) {
3306
+ count++;
3307
+ } else {
3308
+ break;
3309
+ }
3310
+ }
3311
+ return count;
3312
+ }
3200
3313
  /**
3201
3314
  * Format for prompt injection.
3202
3315
  */
@@ -3207,14 +3320,20 @@ var WorkingMemory = class {
3207
3320
  const insights = this.entries.filter((e) => e.category === "insight" || e.category === "discovery");
3208
3321
  const lines = ["<working-memory>"];
3209
3322
  if (failures.length > 0) {
3210
- lines.push(`\u26A0\uFE0F FAILED ATTEMPTS (${failures.length} \u2014 DO NOT REPEAT):`);
3323
+ lines.push(`\u26A0\uFE0F FAILED ATTEMPTS (${failures.length} \u2014 DO NOT REPEAT EXACT SAME PARAMS):`);
3211
3324
  for (const f of failures.slice(-DISPLAY_LIMITS.RECENT_FAILURES)) {
3212
- lines.push(` \u2717 ${f.content}`);
3325
+ const fp = f.fingerprint;
3326
+ if (fp) {
3327
+ lines.push(` \u2717 ${formatFingerprint(fp)} \u2192 ${f.content.split("\u2192").pop()?.trim() || ""}`);
3328
+ } else {
3329
+ lines.push(` \u2717 ${f.content}`);
3330
+ }
3213
3331
  }
3332
+ lines.push(...this.buildAttackCoverageLines(failures));
3214
3333
  }
3215
3334
  const consecutiveFails = this.getConsecutiveFailures();
3216
3335
  if (consecutiveFails >= MEMORY_LIMITS.CONSECUTIVE_FAIL_THRESHOLD) {
3217
- lines.push(`\u{1F534} ${consecutiveFails} CONSECUTIVE FAILURES \u2014 SWITCH ATTACK VECTOR NOW`);
3336
+ lines.push(`\u{1F534} ${consecutiveFails} CONSECUTIVE FAILURES \u2014 consider changing approach or parameters`);
3218
3337
  }
3219
3338
  if (successes.length > 0) {
3220
3339
  lines.push(`\u2705 RECENT SUCCESSES (${successes.length}):`);
@@ -3231,6 +3350,29 @@ var WorkingMemory = class {
3231
3350
  lines.push("</working-memory>");
3232
3351
  return lines.join("\n");
3233
3352
  }
3353
+ /**
3354
+ * Build ATTACK COVERAGE lines grouped by tool+target.
3355
+ * WHY: Shows the LLM which parameter combinations have been tried per vector,
3356
+ * so it can pick genuinely untried variations instead of repeating.
3357
+ */
3358
+ buildAttackCoverageLines(failures) {
3359
+ const vectorMap = /* @__PURE__ */ new Map();
3360
+ for (const f of failures) {
3361
+ if (f.fingerprint) {
3362
+ const key = `${f.fingerprint.tool}\u2192${f.fingerprint.target}`;
3363
+ const detail = f.fingerprint.wordlist ? f.fingerprint.wordlist.split("/").pop() || "" : f.fingerprint.flags || "default";
3364
+ const existing = vectorMap.get(key) || [];
3365
+ if (!existing.includes(detail)) existing.push(detail);
3366
+ vectorMap.set(key, existing);
3367
+ }
3368
+ }
3369
+ if (vectorMap.size === 0) return [];
3370
+ const lines = [` \u{1F4CA} ATTACK COVERAGE (tried variations):`];
3371
+ for (const [vector, variations] of vectorMap) {
3372
+ lines.push(` ${vector}: tried [${variations.join(", ")}] \u2014 try DIFFERENT params`);
3373
+ }
3374
+ return lines;
3375
+ }
3234
3376
  getEntries() {
3235
3377
  return [...this.entries];
3236
3378
  }
@@ -3467,6 +3609,25 @@ var DynamicTechniqueLibrary = class {
3467
3609
  }
3468
3610
  }
3469
3611
  }
3612
+ /**
3613
+ * Record that a technique failed in practice — decrease confidence.
3614
+ *
3615
+ * WHY: Web search results can contain outdated, wrong, or irrelevant techniques.
3616
+ * If the agent tries a learned technique and it fails, we must degrade its
3617
+ * confidence so the agent stops wasting time on bad advice.
3618
+ * After enough failures the technique is auto-pruned entirely.
3619
+ */
3620
+ recordFailure(techniqueSubstring) {
3621
+ if (!techniqueSubstring || techniqueSubstring.length < 3) return;
3622
+ const lower = techniqueSubstring.toLowerCase();
3623
+ this.techniques = this.techniques.filter((t) => {
3624
+ if (t.technique.toLowerCase().includes(lower)) {
3625
+ t.confidence -= MEMORY_LIMITS.TECHNIQUE_FAILURE_DECAY;
3626
+ return t.confidence > MEMORY_LIMITS.TECHNIQUE_PRUNE_THRESHOLD;
3627
+ }
3628
+ return true;
3629
+ });
3630
+ }
3470
3631
  /**
3471
3632
  * Get techniques relevant to a specific service/technology.
3472
3633
  */
@@ -8924,6 +9085,7 @@ var ToolRegistry = class {
8924
9085
  this.state.workingMemory.recordSuccess(toolCall.name, command, result2.output || "");
8925
9086
  } else {
8926
9087
  this.state.workingMemory.recordFailure(toolCall.name, command, result2.error || "Unknown error");
9088
+ this.state.dynamicTechniques.recordFailure(command);
8927
9089
  }
8928
9090
  if (result2.success && result2.output) {
8929
9091
  const structured = autoExtractStructured(toolCall.name, result2.output);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pentesting",
3
- "version": "0.51.1",
3
+ "version": "0.52.0",
4
4
  "description": "Autonomous Penetration Testing AI Agent",
5
5
  "type": "module",
6
6
  "main": "dist/main.js",