pentesting 0.51.1 → 0.52.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.js +183 -21
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -180,11 +180,15 @@ var MEMORY_LIMITS = {
|
|
|
180
180
|
WORKING_MEMORY_MAX_ENTRIES: 20,
|
|
181
181
|
/** Maximum events in episodic memory (session timeline) */
|
|
182
182
|
EPISODIC_MEMORY_MAX_EVENTS: 100,
|
|
183
|
-
/** Consecutive failures before
|
|
183
|
+
/** Consecutive failures before prompting to change approach or parameters */
|
|
184
184
|
CONSECUTIVE_FAIL_THRESHOLD: 3,
|
|
185
185
|
/** Maximum learned techniques in DynamicTechniqueLibrary */
|
|
186
186
|
DYNAMIC_TECHNIQUES_MAX: 50,
|
|
187
|
-
/**
|
|
187
|
+
/** Confidence decay per failure for DynamicTechniqueLibrary learned techniques */
|
|
188
|
+
TECHNIQUE_FAILURE_DECAY: 30,
|
|
189
|
+
/** Auto-prune threshold: techniques below this confidence are discarded */
|
|
190
|
+
TECHNIQUE_PRUNE_THRESHOLD: 10,
|
|
191
|
+
/** @deprecated Superseded by fingerprint-based matching in extractFingerprint(). Kept for reference. */
|
|
188
192
|
COMMAND_MATCH_WORDS: 3,
|
|
189
193
|
/** Maximum unverified techniques to show in prompt */
|
|
190
194
|
PROMPT_UNVERIFIED_TECHNIQUES: 10,
|
|
@@ -345,7 +349,7 @@ var ORPHAN_PROCESS_NAMES = [
|
|
|
345
349
|
|
|
346
350
|
// src/shared/constants/agent.ts
|
|
347
351
|
var APP_NAME = "Pentest AI";
|
|
348
|
-
var APP_VERSION = "0.
|
|
352
|
+
var APP_VERSION = "0.52.0";
|
|
349
353
|
var APP_DESCRIPTION = "Autonomous Penetration Testing AI Agent";
|
|
350
354
|
var LLM_ROLES = {
|
|
351
355
|
SYSTEM: "system",
|
|
@@ -3144,6 +3148,49 @@ var AttackGraph = class {
|
|
|
3144
3148
|
// src/shared/utils/agent-memory.ts
|
|
3145
3149
|
import { existsSync as existsSync4, readFileSync as readFileSync3, writeFileSync as writeFileSync4 } from "fs";
|
|
3146
3150
|
import { join as join3 } from "path";
|
|
3151
|
+
function extractFingerprint(tool, command) {
|
|
3152
|
+
const cmd = command || "";
|
|
3153
|
+
const targetMatch = cmd.match(
|
|
3154
|
+
/(?::\/\/|@)([\w.\-]+(?::\d+)?)|\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(?::\d+)?\b/
|
|
3155
|
+
);
|
|
3156
|
+
const target = targetMatch?.[1] || targetMatch?.[2] || "";
|
|
3157
|
+
const wordlistMatch = cmd.match(
|
|
3158
|
+
/(?:-[wPUL]\s+|--(?:wordlist|passwords?|usernames?)(?:=|\s+))([^\s]+)/i
|
|
3159
|
+
);
|
|
3160
|
+
const wordlist = wordlistMatch?.[1] || "";
|
|
3161
|
+
const portMatch = cmd.match(/(?:-p\s+|--port(?:=|\s+))(\S+)/);
|
|
3162
|
+
const port = portMatch?.[1] || "";
|
|
3163
|
+
const flagPatterns = [
|
|
3164
|
+
/--level(?:=|\s+)(\S+)/i,
|
|
3165
|
+
// sqlmap
|
|
3166
|
+
/--risk(?:=|\s+)(\S+)/i,
|
|
3167
|
+
// sqlmap
|
|
3168
|
+
/-s([VSTCAUOPMX]+)/,
|
|
3169
|
+
// nmap scan type
|
|
3170
|
+
/--script(?:=|\s+)(\S+)/i,
|
|
3171
|
+
// nmap scripts
|
|
3172
|
+
/-[el]\s+(\S+)/i,
|
|
3173
|
+
// hydra login/password single
|
|
3174
|
+
/--method(?:=|\s+)(\S+)/i
|
|
3175
|
+
// HTTP method
|
|
3176
|
+
];
|
|
3177
|
+
const flags = flagPatterns.map((p) => {
|
|
3178
|
+
const m = cmd.match(p);
|
|
3179
|
+
return m?.[0]?.trim() || "";
|
|
3180
|
+
}).filter(Boolean).join(" ");
|
|
3181
|
+
return { tool: tool.toLowerCase(), target, wordlist, flags, port };
|
|
3182
|
+
}
|
|
3183
|
+
function fingerprintsMatch(a, b) {
|
|
3184
|
+
return a.tool === b.tool && a.target === b.target && a.wordlist === b.wordlist && a.flags === b.flags && a.port === b.port;
|
|
3185
|
+
}
|
|
3186
|
+
function formatFingerprint(fp) {
|
|
3187
|
+
const parts = [fp.tool];
|
|
3188
|
+
if (fp.target) parts.push(`\u2192${fp.target}`);
|
|
3189
|
+
if (fp.wordlist) parts.push(`[wordlist:${fp.wordlist.split("/").pop()}]`);
|
|
3190
|
+
if (fp.port) parts.push(`[port:${fp.port}]`);
|
|
3191
|
+
if (fp.flags) parts.push(`[${fp.flags}]`);
|
|
3192
|
+
return parts.join(" ");
|
|
3193
|
+
}
|
|
3147
3194
|
var WorkingMemory = class {
|
|
3148
3195
|
entries = [];
|
|
3149
3196
|
maxEntries = MEMORY_LIMITS.WORKING_MEMORY_MAX_ENTRIES;
|
|
@@ -3156,20 +3203,29 @@ var WorkingMemory = class {
|
|
|
3156
3203
|
context,
|
|
3157
3204
|
importance
|
|
3158
3205
|
});
|
|
3159
|
-
|
|
3160
|
-
this.entries.sort((a, b) => {
|
|
3161
|
-
const aScore = a.importance * 0.7 + (1 - (Date.now() - a.timestamp) / 6e5) * 0.3;
|
|
3162
|
-
const bScore = b.importance * 0.7 + (1 - (Date.now() - b.timestamp) / 6e5) * 0.3;
|
|
3163
|
-
return bScore - aScore;
|
|
3164
|
-
});
|
|
3165
|
-
this.entries = this.entries.slice(0, this.maxEntries);
|
|
3166
|
-
}
|
|
3206
|
+
this.pruneIfNeeded();
|
|
3167
3207
|
}
|
|
3168
3208
|
/**
|
|
3169
|
-
* Record a failed attempt
|
|
3209
|
+
* Record a failed attempt with structured fingerprint.
|
|
3210
|
+
*
|
|
3211
|
+
* WHY: Fingerprint-based recording ensures that "hydra with rockyou.txt"
|
|
3212
|
+
* and "hydra with darkweb2017.txt" are stored as distinct attempts.
|
|
3213
|
+
* The LLM can see what parameter combinations have been tried and pick new ones.
|
|
3170
3214
|
*/
|
|
3171
3215
|
recordFailure(tool, command, error) {
|
|
3172
|
-
|
|
3216
|
+
const fp = extractFingerprint(tool, command);
|
|
3217
|
+
const fpLabel = formatFingerprint(fp);
|
|
3218
|
+
const entry = {
|
|
3219
|
+
id: `wm_${Date.now()}_${Math.random().toString(36).slice(2, 6)}`,
|
|
3220
|
+
timestamp: Date.now(),
|
|
3221
|
+
category: "failure",
|
|
3222
|
+
content: `FAILED: ${fpLabel} \u2192 ${error.slice(0, DISPLAY_LIMITS.ERROR_PREVIEW)}`,
|
|
3223
|
+
context: { tool, command },
|
|
3224
|
+
importance: 0.8,
|
|
3225
|
+
fingerprint: fp
|
|
3226
|
+
};
|
|
3227
|
+
this.entries.push(entry);
|
|
3228
|
+
this.pruneIfNeeded();
|
|
3173
3229
|
}
|
|
3174
3230
|
/**
|
|
3175
3231
|
* Record a successful action for reference.
|
|
@@ -3178,16 +3234,53 @@ var WorkingMemory = class {
|
|
|
3178
3234
|
this.add("success", `SUCCESS: ${tool} \u2192 ${command.slice(0, DISPLAY_LIMITS.COMMAND_PREVIEW)}`, 0.6, { tool, result: result2.slice(0, DISPLAY_LIMITS.OUTPUT_SUMMARY) });
|
|
3179
3235
|
}
|
|
3180
3236
|
/**
|
|
3181
|
-
* Check if
|
|
3237
|
+
* Check if the EXACT same attack attempt (all parameters identical) has failed.
|
|
3238
|
+
*
|
|
3239
|
+
* WHY: Only blocks truly duplicate attempts. Different wordlists, flags, or
|
|
3240
|
+
* ports are treated as new attacks and allowed through.
|
|
3182
3241
|
*/
|
|
3183
|
-
hasFailedBefore(command) {
|
|
3184
|
-
const
|
|
3242
|
+
hasFailedBefore(command, tool) {
|
|
3243
|
+
const effectiveTool = tool || command.split(/\s+/)[0] || "";
|
|
3244
|
+
const fp = extractFingerprint(effectiveTool, command);
|
|
3185
3245
|
return this.entries.find(
|
|
3186
|
-
(e) => e.category === "failure" && e.
|
|
3246
|
+
(e) => e.category === "failure" && e.fingerprint != null && fingerprintsMatch(e.fingerprint, fp)
|
|
3187
3247
|
);
|
|
3188
3248
|
}
|
|
3189
3249
|
/**
|
|
3190
|
-
* Get
|
|
3250
|
+
* Get all previous attempts (success & failure) for a specific tool+target vector.
|
|
3251
|
+
* Returns the full history so the LLM can see what parameter combinations were tried.
|
|
3252
|
+
*
|
|
3253
|
+
* TODO: Wire up in tools.ts to pass vector history to strategist for smarter retries.
|
|
3254
|
+
*/
|
|
3255
|
+
getAttemptsForVector(tool, target) {
|
|
3256
|
+
const lowerTool = tool.toLowerCase();
|
|
3257
|
+
return this.entries.filter((e) => {
|
|
3258
|
+
if (e.fingerprint) {
|
|
3259
|
+
const matchTool = e.fingerprint.tool === lowerTool;
|
|
3260
|
+
const matchTarget = !target || e.fingerprint.target.includes(target);
|
|
3261
|
+
return matchTool && matchTarget;
|
|
3262
|
+
}
|
|
3263
|
+
return String(e.context.tool || "").toLowerCase() === lowerTool;
|
|
3264
|
+
});
|
|
3265
|
+
}
|
|
3266
|
+
/** Internal prune helper (used by both add() and recordFailure()) */
|
|
3267
|
+
pruneIfNeeded() {
|
|
3268
|
+
if (this.entries.length > this.maxEntries) {
|
|
3269
|
+
this.entries.sort((a, b) => {
|
|
3270
|
+
const aScore = a.importance * 0.7 + (1 - (Date.now() - a.timestamp) / 6e5) * 0.3;
|
|
3271
|
+
const bScore = b.importance * 0.7 + (1 - (Date.now() - b.timestamp) / 6e5) * 0.3;
|
|
3272
|
+
return bScore - aScore;
|
|
3273
|
+
});
|
|
3274
|
+
this.entries = this.entries.slice(0, this.maxEntries);
|
|
3275
|
+
}
|
|
3276
|
+
}
|
|
3277
|
+
/**
|
|
3278
|
+
* Get count of consecutive failures on the SAME attack vector.
|
|
3279
|
+
*
|
|
3280
|
+
* WHY: Switching from "hydra with wordlist A" to "hydra with wordlist B"
|
|
3281
|
+
* is a legitimate new approach. The consecutive-failure counter should
|
|
3282
|
+
* only trigger when every recent attempt is against the same tool+target
|
|
3283
|
+
* with no variation producing success. This prevents premature vector abandonment.
|
|
3191
3284
|
*/
|
|
3192
3285
|
getConsecutiveFailures() {
|
|
3193
3286
|
let count = 0;
|
|
@@ -3197,6 +3290,26 @@ var WorkingMemory = class {
|
|
|
3197
3290
|
}
|
|
3198
3291
|
return count;
|
|
3199
3292
|
}
|
|
3293
|
+
/**
|
|
3294
|
+
* Get consecutive failures for a specific vector (tool+target).
|
|
3295
|
+
* Returns count of sequential failures where all attempts used the same tool+target.
|
|
3296
|
+
*
|
|
3297
|
+
* TODO: Wire up in tools.ts for per-vector threshold checks alongside global consecutive count.
|
|
3298
|
+
*/
|
|
3299
|
+
getConsecutiveVectorFailures(tool, target) {
|
|
3300
|
+
const lowerTool = tool.toLowerCase();
|
|
3301
|
+
let count = 0;
|
|
3302
|
+
for (let i = this.entries.length - 1; i >= 0; i--) {
|
|
3303
|
+
const e = this.entries[i];
|
|
3304
|
+
if (e.category !== "failure") break;
|
|
3305
|
+
if (e.fingerprint && e.fingerprint.tool === lowerTool && (!target || e.fingerprint.target.includes(target))) {
|
|
3306
|
+
count++;
|
|
3307
|
+
} else {
|
|
3308
|
+
break;
|
|
3309
|
+
}
|
|
3310
|
+
}
|
|
3311
|
+
return count;
|
|
3312
|
+
}
|
|
3200
3313
|
/**
|
|
3201
3314
|
* Format for prompt injection.
|
|
3202
3315
|
*/
|
|
@@ -3207,14 +3320,20 @@ var WorkingMemory = class {
|
|
|
3207
3320
|
const insights = this.entries.filter((e) => e.category === "insight" || e.category === "discovery");
|
|
3208
3321
|
const lines = ["<working-memory>"];
|
|
3209
3322
|
if (failures.length > 0) {
|
|
3210
|
-
lines.push(`\u26A0\uFE0F FAILED ATTEMPTS (${failures.length} \u2014 DO NOT REPEAT):`);
|
|
3323
|
+
lines.push(`\u26A0\uFE0F FAILED ATTEMPTS (${failures.length} \u2014 DO NOT REPEAT EXACT SAME PARAMS):`);
|
|
3211
3324
|
for (const f of failures.slice(-DISPLAY_LIMITS.RECENT_FAILURES)) {
|
|
3212
|
-
|
|
3325
|
+
const fp = f.fingerprint;
|
|
3326
|
+
if (fp) {
|
|
3327
|
+
lines.push(` \u2717 ${formatFingerprint(fp)} \u2192 ${f.content.split("\u2192").pop()?.trim() || ""}`);
|
|
3328
|
+
} else {
|
|
3329
|
+
lines.push(` \u2717 ${f.content}`);
|
|
3330
|
+
}
|
|
3213
3331
|
}
|
|
3332
|
+
lines.push(...this.buildAttackCoverageLines(failures));
|
|
3214
3333
|
}
|
|
3215
3334
|
const consecutiveFails = this.getConsecutiveFailures();
|
|
3216
3335
|
if (consecutiveFails >= MEMORY_LIMITS.CONSECUTIVE_FAIL_THRESHOLD) {
|
|
3217
|
-
lines.push(`\u{1F534} ${consecutiveFails} CONSECUTIVE FAILURES \u2014
|
|
3336
|
+
lines.push(`\u{1F534} ${consecutiveFails} CONSECUTIVE FAILURES \u2014 consider changing approach or parameters`);
|
|
3218
3337
|
}
|
|
3219
3338
|
if (successes.length > 0) {
|
|
3220
3339
|
lines.push(`\u2705 RECENT SUCCESSES (${successes.length}):`);
|
|
@@ -3231,6 +3350,29 @@ var WorkingMemory = class {
|
|
|
3231
3350
|
lines.push("</working-memory>");
|
|
3232
3351
|
return lines.join("\n");
|
|
3233
3352
|
}
|
|
3353
|
+
/**
|
|
3354
|
+
* Build ATTACK COVERAGE lines grouped by tool+target.
|
|
3355
|
+
* WHY: Shows the LLM which parameter combinations have been tried per vector,
|
|
3356
|
+
* so it can pick genuinely untried variations instead of repeating.
|
|
3357
|
+
*/
|
|
3358
|
+
buildAttackCoverageLines(failures) {
|
|
3359
|
+
const vectorMap = /* @__PURE__ */ new Map();
|
|
3360
|
+
for (const f of failures) {
|
|
3361
|
+
if (f.fingerprint) {
|
|
3362
|
+
const key = `${f.fingerprint.tool}\u2192${f.fingerprint.target}`;
|
|
3363
|
+
const detail = f.fingerprint.wordlist ? f.fingerprint.wordlist.split("/").pop() || "" : f.fingerprint.flags || "default";
|
|
3364
|
+
const existing = vectorMap.get(key) || [];
|
|
3365
|
+
if (!existing.includes(detail)) existing.push(detail);
|
|
3366
|
+
vectorMap.set(key, existing);
|
|
3367
|
+
}
|
|
3368
|
+
}
|
|
3369
|
+
if (vectorMap.size === 0) return [];
|
|
3370
|
+
const lines = [` \u{1F4CA} ATTACK COVERAGE (tried variations):`];
|
|
3371
|
+
for (const [vector, variations] of vectorMap) {
|
|
3372
|
+
lines.push(` ${vector}: tried [${variations.join(", ")}] \u2014 try DIFFERENT params`);
|
|
3373
|
+
}
|
|
3374
|
+
return lines;
|
|
3375
|
+
}
|
|
3234
3376
|
getEntries() {
|
|
3235
3377
|
return [...this.entries];
|
|
3236
3378
|
}
|
|
@@ -3467,6 +3609,25 @@ var DynamicTechniqueLibrary = class {
|
|
|
3467
3609
|
}
|
|
3468
3610
|
}
|
|
3469
3611
|
}
|
|
3612
|
+
/**
|
|
3613
|
+
* Record that a technique failed in practice — decrease confidence.
|
|
3614
|
+
*
|
|
3615
|
+
* WHY: Web search results can contain outdated, wrong, or irrelevant techniques.
|
|
3616
|
+
* If the agent tries a learned technique and it fails, we must degrade its
|
|
3617
|
+
* confidence so the agent stops wasting time on bad advice.
|
|
3618
|
+
* After enough failures the technique is auto-pruned entirely.
|
|
3619
|
+
*/
|
|
3620
|
+
recordFailure(techniqueSubstring) {
|
|
3621
|
+
if (!techniqueSubstring || techniqueSubstring.length < 3) return;
|
|
3622
|
+
const lower = techniqueSubstring.toLowerCase();
|
|
3623
|
+
this.techniques = this.techniques.filter((t) => {
|
|
3624
|
+
if (t.technique.toLowerCase().includes(lower)) {
|
|
3625
|
+
t.confidence -= MEMORY_LIMITS.TECHNIQUE_FAILURE_DECAY;
|
|
3626
|
+
return t.confidence > MEMORY_LIMITS.TECHNIQUE_PRUNE_THRESHOLD;
|
|
3627
|
+
}
|
|
3628
|
+
return true;
|
|
3629
|
+
});
|
|
3630
|
+
}
|
|
3470
3631
|
/**
|
|
3471
3632
|
* Get techniques relevant to a specific service/technology.
|
|
3472
3633
|
*/
|
|
@@ -8924,6 +9085,7 @@ var ToolRegistry = class {
|
|
|
8924
9085
|
this.state.workingMemory.recordSuccess(toolCall.name, command, result2.output || "");
|
|
8925
9086
|
} else {
|
|
8926
9087
|
this.state.workingMemory.recordFailure(toolCall.name, command, result2.error || "Unknown error");
|
|
9088
|
+
this.state.dynamicTechniques.recordFailure(command);
|
|
8927
9089
|
}
|
|
8928
9090
|
if (result2.success && result2.output) {
|
|
8929
9091
|
const structured = autoExtractStructured(toolCall.name, result2.output);
|