pentesting 0.51.0 → 0.52.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/main.js +183 -22
  2. package/package.json +1 -1
package/dist/main.js CHANGED
@@ -180,11 +180,15 @@ var MEMORY_LIMITS = {
180
180
  WORKING_MEMORY_MAX_ENTRIES: 20,
181
181
  /** Maximum events in episodic memory (session timeline) */
182
182
  EPISODIC_MEMORY_MAX_EVENTS: 100,
183
- /** Consecutive failures before SWITCH VECTOR warning */
183
+ /** Consecutive failures before prompting to change approach or parameters */
184
184
  CONSECUTIVE_FAIL_THRESHOLD: 3,
185
185
  /** Maximum learned techniques in DynamicTechniqueLibrary */
186
186
  DYNAMIC_TECHNIQUES_MAX: 50,
187
- /** Number of leading words to match for duplicate command detection */
187
+ /** Confidence decay per failure for DynamicTechniqueLibrary learned techniques */
188
+ TECHNIQUE_FAILURE_DECAY: 30,
189
+ /** Auto-prune threshold: techniques below this confidence are discarded */
190
+ TECHNIQUE_PRUNE_THRESHOLD: 10,
191
+ /** @deprecated Superseded by fingerprint-based matching in extractFingerprint(). Kept for reference. */
188
192
  COMMAND_MATCH_WORDS: 3,
189
193
  /** Maximum unverified techniques to show in prompt */
190
194
  PROMPT_UNVERIFIED_TECHNIQUES: 10,
@@ -345,7 +349,7 @@ var ORPHAN_PROCESS_NAMES = [
345
349
 
346
350
  // src/shared/constants/agent.ts
347
351
  var APP_NAME = "Pentest AI";
348
- var APP_VERSION = "0.51.0";
352
+ var APP_VERSION = "0.52.0";
349
353
  var APP_DESCRIPTION = "Autonomous Penetration Testing AI Agent";
350
354
  var LLM_ROLES = {
351
355
  SYSTEM: "system",
@@ -767,7 +771,6 @@ var EVENT_TYPES = {
767
771
  RETRY: "retry",
768
772
  USAGE_UPDATE: "usage_update",
769
773
  INPUT_REQUEST: "input_request",
770
- LOG: "log",
771
774
  FLAG_FOUND: "flag_found"
772
775
  };
773
776
  var UI_COMMANDS = {
@@ -3145,6 +3148,49 @@ var AttackGraph = class {
3145
3148
  // src/shared/utils/agent-memory.ts
3146
3149
  import { existsSync as existsSync4, readFileSync as readFileSync3, writeFileSync as writeFileSync4 } from "fs";
3147
3150
  import { join as join3 } from "path";
3151
+ function extractFingerprint(tool, command) {
3152
+ const cmd = command || "";
3153
+ const targetMatch = cmd.match(
3154
+ /(?::\/\/|@)([\w.\-]+(?::\d+)?)|\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(?::\d+)?\b/
3155
+ );
3156
+ const target = targetMatch?.[1] || targetMatch?.[2] || "";
3157
+ const wordlistMatch = cmd.match(
3158
+ /(?:-[wPUL]\s+|--(?:wordlist|passwords?|usernames?)(?:=|\s+))([^\s]+)/i
3159
+ );
3160
+ const wordlist = wordlistMatch?.[1] || "";
3161
+ const portMatch = cmd.match(/(?:-p\s+|--port(?:=|\s+))(\S+)/);
3162
+ const port = portMatch?.[1] || "";
3163
+ const flagPatterns = [
3164
+ /--level(?:=|\s+)(\S+)/i,
3165
+ // sqlmap
3166
+ /--risk(?:=|\s+)(\S+)/i,
3167
+ // sqlmap
3168
+ /-s([VSTCAUOPMX]+)/,
3169
+ // nmap scan type
3170
+ /--script(?:=|\s+)(\S+)/i,
3171
+ // nmap scripts
3172
+ /-[el]\s+(\S+)/i,
3173
+ // hydra login/password single
3174
+ /--method(?:=|\s+)(\S+)/i
3175
+ // HTTP method
3176
+ ];
3177
+ const flags = flagPatterns.map((p) => {
3178
+ const m = cmd.match(p);
3179
+ return m?.[0]?.trim() || "";
3180
+ }).filter(Boolean).join(" ");
3181
+ return { tool: tool.toLowerCase(), target, wordlist, flags, port };
3182
+ }
3183
+ function fingerprintsMatch(a, b) {
3184
+ return a.tool === b.tool && a.target === b.target && a.wordlist === b.wordlist && a.flags === b.flags && a.port === b.port;
3185
+ }
3186
+ function formatFingerprint(fp) {
3187
+ const parts = [fp.tool];
3188
+ if (fp.target) parts.push(`\u2192${fp.target}`);
3189
+ if (fp.wordlist) parts.push(`[wordlist:${fp.wordlist.split("/").pop()}]`);
3190
+ if (fp.port) parts.push(`[port:${fp.port}]`);
3191
+ if (fp.flags) parts.push(`[${fp.flags}]`);
3192
+ return parts.join(" ");
3193
+ }
3148
3194
  var WorkingMemory = class {
3149
3195
  entries = [];
3150
3196
  maxEntries = MEMORY_LIMITS.WORKING_MEMORY_MAX_ENTRIES;
@@ -3157,20 +3203,29 @@ var WorkingMemory = class {
3157
3203
  context,
3158
3204
  importance
3159
3205
  });
3160
- if (this.entries.length > this.maxEntries) {
3161
- this.entries.sort((a, b) => {
3162
- const aScore = a.importance * 0.7 + (1 - (Date.now() - a.timestamp) / 6e5) * 0.3;
3163
- const bScore = b.importance * 0.7 + (1 - (Date.now() - b.timestamp) / 6e5) * 0.3;
3164
- return bScore - aScore;
3165
- });
3166
- this.entries = this.entries.slice(0, this.maxEntries);
3167
- }
3206
+ this.pruneIfNeeded();
3168
3207
  }
3169
3208
  /**
3170
- * Record a failed attempt to avoid repeating it.
3209
+ * Record a failed attempt with structured fingerprint.
3210
+ *
3211
+ * WHY: Fingerprint-based recording ensures that "hydra with rockyou.txt"
3212
+ * and "hydra with darkweb2017.txt" are stored as distinct attempts.
3213
+ * The LLM can see what parameter combinations have been tried and pick new ones.
3171
3214
  */
3172
3215
  recordFailure(tool, command, error) {
3173
- this.add("failure", `FAILED: ${tool} \u2192 ${command.slice(0, DISPLAY_LIMITS.COMMAND_PREVIEW)} \u2192 ${error.slice(0, DISPLAY_LIMITS.ERROR_PREVIEW)}`, 0.8, { tool, command });
3216
+ const fp = extractFingerprint(tool, command);
3217
+ const fpLabel = formatFingerprint(fp);
3218
+ const entry = {
3219
+ id: `wm_${Date.now()}_${Math.random().toString(36).slice(2, 6)}`,
3220
+ timestamp: Date.now(),
3221
+ category: "failure",
3222
+ content: `FAILED: ${fpLabel} \u2192 ${error.slice(0, DISPLAY_LIMITS.ERROR_PREVIEW)}`,
3223
+ context: { tool, command },
3224
+ importance: 0.8,
3225
+ fingerprint: fp
3226
+ };
3227
+ this.entries.push(entry);
3228
+ this.pruneIfNeeded();
3174
3229
  }
3175
3230
  /**
3176
3231
  * Record a successful action for reference.
@@ -3179,16 +3234,53 @@ var WorkingMemory = class {
3179
3234
  this.add("success", `SUCCESS: ${tool} \u2192 ${command.slice(0, DISPLAY_LIMITS.COMMAND_PREVIEW)}`, 0.6, { tool, result: result2.slice(0, DISPLAY_LIMITS.OUTPUT_SUMMARY) });
3180
3235
  }
3181
3236
  /**
3182
- * Check if a similar command has already failed.
3237
+ * Check if the EXACT same attack attempt (all parameters identical) has failed.
3238
+ *
3239
+ * WHY: Only blocks truly duplicate attempts. Different wordlists, flags, or
3240
+ * ports are treated as new attacks and allowed through.
3183
3241
  */
3184
- hasFailedBefore(command) {
3185
- const lower = command.toLowerCase().split(/\s+/).slice(0, MEMORY_LIMITS.COMMAND_MATCH_WORDS).join(" ");
3242
+ hasFailedBefore(command, tool) {
3243
+ const effectiveTool = tool || command.split(/\s+/)[0] || "";
3244
+ const fp = extractFingerprint(effectiveTool, command);
3186
3245
  return this.entries.find(
3187
- (e) => e.category === "failure" && e.content.toLowerCase().includes(lower)
3246
+ (e) => e.category === "failure" && e.fingerprint != null && fingerprintsMatch(e.fingerprint, fp)
3188
3247
  );
3189
3248
  }
3190
3249
  /**
3191
- * Get count of consecutive failures (for vector switch detection).
3250
+ * Get all previous attempts (success & failure) for a specific tool+target vector.
3251
+ * Returns the full history so the LLM can see what parameter combinations were tried.
3252
+ *
3253
+ * TODO: Wire up in tools.ts to pass vector history to strategist for smarter retries.
3254
+ */
3255
+ getAttemptsForVector(tool, target) {
3256
+ const lowerTool = tool.toLowerCase();
3257
+ return this.entries.filter((e) => {
3258
+ if (e.fingerprint) {
3259
+ const matchTool = e.fingerprint.tool === lowerTool;
3260
+ const matchTarget = !target || e.fingerprint.target.includes(target);
3261
+ return matchTool && matchTarget;
3262
+ }
3263
+ return String(e.context.tool || "").toLowerCase() === lowerTool;
3264
+ });
3265
+ }
3266
+ /** Internal prune helper (used by both add() and recordFailure()) */
3267
+ pruneIfNeeded() {
3268
+ if (this.entries.length > this.maxEntries) {
3269
+ this.entries.sort((a, b) => {
3270
+ const aScore = a.importance * 0.7 + (1 - (Date.now() - a.timestamp) / 6e5) * 0.3;
3271
+ const bScore = b.importance * 0.7 + (1 - (Date.now() - b.timestamp) / 6e5) * 0.3;
3272
+ return bScore - aScore;
3273
+ });
3274
+ this.entries = this.entries.slice(0, this.maxEntries);
3275
+ }
3276
+ }
3277
+ /**
3278
+ * Get count of consecutive failures on the SAME attack vector.
3279
+ *
3280
+ * WHY: Switching from "hydra with wordlist A" to "hydra with wordlist B"
3281
+ * is a legitimate new approach. The consecutive-failure counter should
3282
+ * only trigger when every recent attempt is against the same tool+target
3283
+ * with no variation producing success. This prevents premature vector abandonment.
3192
3284
  */
3193
3285
  getConsecutiveFailures() {
3194
3286
  let count = 0;
@@ -3198,6 +3290,26 @@ var WorkingMemory = class {
3198
3290
  }
3199
3291
  return count;
3200
3292
  }
3293
+ /**
3294
+ * Get consecutive failures for a specific vector (tool+target).
3295
+ * Returns count of sequential failures where all attempts used the same tool+target.
3296
+ *
3297
+ * TODO: Wire up in tools.ts for per-vector threshold checks alongside global consecutive count.
3298
+ */
3299
+ getConsecutiveVectorFailures(tool, target) {
3300
+ const lowerTool = tool.toLowerCase();
3301
+ let count = 0;
3302
+ for (let i = this.entries.length - 1; i >= 0; i--) {
3303
+ const e = this.entries[i];
3304
+ if (e.category !== "failure") break;
3305
+ if (e.fingerprint && e.fingerprint.tool === lowerTool && (!target || e.fingerprint.target.includes(target))) {
3306
+ count++;
3307
+ } else {
3308
+ break;
3309
+ }
3310
+ }
3311
+ return count;
3312
+ }
3201
3313
  /**
3202
3314
  * Format for prompt injection.
3203
3315
  */
@@ -3208,14 +3320,20 @@ var WorkingMemory = class {
3208
3320
  const insights = this.entries.filter((e) => e.category === "insight" || e.category === "discovery");
3209
3321
  const lines = ["<working-memory>"];
3210
3322
  if (failures.length > 0) {
3211
- lines.push(`\u26A0\uFE0F FAILED ATTEMPTS (${failures.length} \u2014 DO NOT REPEAT):`);
3323
+ lines.push(`\u26A0\uFE0F FAILED ATTEMPTS (${failures.length} \u2014 DO NOT REPEAT EXACT SAME PARAMS):`);
3212
3324
  for (const f of failures.slice(-DISPLAY_LIMITS.RECENT_FAILURES)) {
3213
- lines.push(` \u2717 ${f.content}`);
3325
+ const fp = f.fingerprint;
3326
+ if (fp) {
3327
+ lines.push(` \u2717 ${formatFingerprint(fp)} \u2192 ${f.content.split("\u2192").pop()?.trim() || ""}`);
3328
+ } else {
3329
+ lines.push(` \u2717 ${f.content}`);
3330
+ }
3214
3331
  }
3332
+ lines.push(...this.buildAttackCoverageLines(failures));
3215
3333
  }
3216
3334
  const consecutiveFails = this.getConsecutiveFailures();
3217
3335
  if (consecutiveFails >= MEMORY_LIMITS.CONSECUTIVE_FAIL_THRESHOLD) {
3218
- lines.push(`\u{1F534} ${consecutiveFails} CONSECUTIVE FAILURES \u2014 SWITCH ATTACK VECTOR NOW`);
3336
+ lines.push(`\u{1F534} ${consecutiveFails} CONSECUTIVE FAILURES \u2014 consider changing approach or parameters`);
3219
3337
  }
3220
3338
  if (successes.length > 0) {
3221
3339
  lines.push(`\u2705 RECENT SUCCESSES (${successes.length}):`);
@@ -3232,6 +3350,29 @@ var WorkingMemory = class {
3232
3350
  lines.push("</working-memory>");
3233
3351
  return lines.join("\n");
3234
3352
  }
3353
+ /**
3354
+ * Build ATTACK COVERAGE lines grouped by tool+target.
3355
+ * WHY: Shows the LLM which parameter combinations have been tried per vector,
3356
+ * so it can pick genuinely untried variations instead of repeating.
3357
+ */
3358
+ buildAttackCoverageLines(failures) {
3359
+ const vectorMap = /* @__PURE__ */ new Map();
3360
+ for (const f of failures) {
3361
+ if (f.fingerprint) {
3362
+ const key = `${f.fingerprint.tool}\u2192${f.fingerprint.target}`;
3363
+ const detail = f.fingerprint.wordlist ? f.fingerprint.wordlist.split("/").pop() || "" : f.fingerprint.flags || "default";
3364
+ const existing = vectorMap.get(key) || [];
3365
+ if (!existing.includes(detail)) existing.push(detail);
3366
+ vectorMap.set(key, existing);
3367
+ }
3368
+ }
3369
+ if (vectorMap.size === 0) return [];
3370
+ const lines = [` \u{1F4CA} ATTACK COVERAGE (tried variations):`];
3371
+ for (const [vector, variations] of vectorMap) {
3372
+ lines.push(` ${vector}: tried [${variations.join(", ")}] \u2014 try DIFFERENT params`);
3373
+ }
3374
+ return lines;
3375
+ }
3235
3376
  getEntries() {
3236
3377
  return [...this.entries];
3237
3378
  }
@@ -3468,6 +3609,25 @@ var DynamicTechniqueLibrary = class {
3468
3609
  }
3469
3610
  }
3470
3611
  }
3612
+ /**
3613
+ * Record that a technique failed in practice — decrease confidence.
3614
+ *
3615
+ * WHY: Web search results can contain outdated, wrong, or irrelevant techniques.
3616
+ * If the agent tries a learned technique and it fails, we must degrade its
3617
+ * confidence so the agent stops wasting time on bad advice.
3618
+ * After enough failures the technique is auto-pruned entirely.
3619
+ */
3620
+ recordFailure(techniqueSubstring) {
3621
+ if (!techniqueSubstring || techniqueSubstring.length < 3) return;
3622
+ const lower = techniqueSubstring.toLowerCase();
3623
+ this.techniques = this.techniques.filter((t) => {
3624
+ if (t.technique.toLowerCase().includes(lower)) {
3625
+ t.confidence -= MEMORY_LIMITS.TECHNIQUE_FAILURE_DECAY;
3626
+ return t.confidence > MEMORY_LIMITS.TECHNIQUE_PRUNE_THRESHOLD;
3627
+ }
3628
+ return true;
3629
+ });
3630
+ }
3471
3631
  /**
3472
3632
  * Get techniques relevant to a specific service/technology.
3473
3633
  */
@@ -8925,6 +9085,7 @@ var ToolRegistry = class {
8925
9085
  this.state.workingMemory.recordSuccess(toolCall.name, command, result2.output || "");
8926
9086
  } else {
8927
9087
  this.state.workingMemory.recordFailure(toolCall.name, command, result2.error || "Unknown error");
9088
+ this.state.dynamicTechniques.recordFailure(command);
8928
9089
  }
8929
9090
  if (result2.success && result2.output) {
8930
9091
  const structured = autoExtractStructured(toolCall.name, result2.output);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pentesting",
3
- "version": "0.51.0",
3
+ "version": "0.52.0",
4
4
  "description": "Autonomous Penetration Testing AI Agent",
5
5
  "type": "module",
6
6
  "main": "dist/main.js",