pentesting 0.51.0 → 0.52.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.js +183 -22
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -180,11 +180,15 @@ var MEMORY_LIMITS = {
|
|
|
180
180
|
WORKING_MEMORY_MAX_ENTRIES: 20,
|
|
181
181
|
/** Maximum events in episodic memory (session timeline) */
|
|
182
182
|
EPISODIC_MEMORY_MAX_EVENTS: 100,
|
|
183
|
-
/** Consecutive failures before
|
|
183
|
+
/** Consecutive failures before prompting to change approach or parameters */
|
|
184
184
|
CONSECUTIVE_FAIL_THRESHOLD: 3,
|
|
185
185
|
/** Maximum learned techniques in DynamicTechniqueLibrary */
|
|
186
186
|
DYNAMIC_TECHNIQUES_MAX: 50,
|
|
187
|
-
/**
|
|
187
|
+
/** Confidence decay per failure for DynamicTechniqueLibrary learned techniques */
|
|
188
|
+
TECHNIQUE_FAILURE_DECAY: 30,
|
|
189
|
+
/** Auto-prune threshold: techniques below this confidence are discarded */
|
|
190
|
+
TECHNIQUE_PRUNE_THRESHOLD: 10,
|
|
191
|
+
/** @deprecated Superseded by fingerprint-based matching in extractFingerprint(). Kept for reference. */
|
|
188
192
|
COMMAND_MATCH_WORDS: 3,
|
|
189
193
|
/** Maximum unverified techniques to show in prompt */
|
|
190
194
|
PROMPT_UNVERIFIED_TECHNIQUES: 10,
|
|
@@ -345,7 +349,7 @@ var ORPHAN_PROCESS_NAMES = [
|
|
|
345
349
|
|
|
346
350
|
// src/shared/constants/agent.ts
|
|
347
351
|
var APP_NAME = "Pentest AI";
|
|
348
|
-
var APP_VERSION = "0.
|
|
352
|
+
var APP_VERSION = "0.52.0";
|
|
349
353
|
var APP_DESCRIPTION = "Autonomous Penetration Testing AI Agent";
|
|
350
354
|
var LLM_ROLES = {
|
|
351
355
|
SYSTEM: "system",
|
|
@@ -767,7 +771,6 @@ var EVENT_TYPES = {
|
|
|
767
771
|
RETRY: "retry",
|
|
768
772
|
USAGE_UPDATE: "usage_update",
|
|
769
773
|
INPUT_REQUEST: "input_request",
|
|
770
|
-
LOG: "log",
|
|
771
774
|
FLAG_FOUND: "flag_found"
|
|
772
775
|
};
|
|
773
776
|
var UI_COMMANDS = {
|
|
@@ -3145,6 +3148,49 @@ var AttackGraph = class {
|
|
|
3145
3148
|
// src/shared/utils/agent-memory.ts
|
|
3146
3149
|
import { existsSync as existsSync4, readFileSync as readFileSync3, writeFileSync as writeFileSync4 } from "fs";
|
|
3147
3150
|
import { join as join3 } from "path";
|
|
3151
|
+
function extractFingerprint(tool, command) {
|
|
3152
|
+
const cmd = command || "";
|
|
3153
|
+
const targetMatch = cmd.match(
|
|
3154
|
+
/(?::\/\/|@)([\w.\-]+(?::\d+)?)|\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(?::\d+)?\b/
|
|
3155
|
+
);
|
|
3156
|
+
const target = targetMatch?.[1] || targetMatch?.[2] || "";
|
|
3157
|
+
const wordlistMatch = cmd.match(
|
|
3158
|
+
/(?:-[wPUL]\s+|--(?:wordlist|passwords?|usernames?)(?:=|\s+))([^\s]+)/i
|
|
3159
|
+
);
|
|
3160
|
+
const wordlist = wordlistMatch?.[1] || "";
|
|
3161
|
+
const portMatch = cmd.match(/(?:-p\s+|--port(?:=|\s+))(\S+)/);
|
|
3162
|
+
const port = portMatch?.[1] || "";
|
|
3163
|
+
const flagPatterns = [
|
|
3164
|
+
/--level(?:=|\s+)(\S+)/i,
|
|
3165
|
+
// sqlmap
|
|
3166
|
+
/--risk(?:=|\s+)(\S+)/i,
|
|
3167
|
+
// sqlmap
|
|
3168
|
+
/-s([VSTCAUOPMX]+)/,
|
|
3169
|
+
// nmap scan type
|
|
3170
|
+
/--script(?:=|\s+)(\S+)/i,
|
|
3171
|
+
// nmap scripts
|
|
3172
|
+
/-[el]\s+(\S+)/i,
|
|
3173
|
+
// hydra login/password single
|
|
3174
|
+
/--method(?:=|\s+)(\S+)/i
|
|
3175
|
+
// HTTP method
|
|
3176
|
+
];
|
|
3177
|
+
const flags = flagPatterns.map((p) => {
|
|
3178
|
+
const m = cmd.match(p);
|
|
3179
|
+
return m?.[0]?.trim() || "";
|
|
3180
|
+
}).filter(Boolean).join(" ");
|
|
3181
|
+
return { tool: tool.toLowerCase(), target, wordlist, flags, port };
|
|
3182
|
+
}
|
|
3183
|
+
function fingerprintsMatch(a, b) {
|
|
3184
|
+
return a.tool === b.tool && a.target === b.target && a.wordlist === b.wordlist && a.flags === b.flags && a.port === b.port;
|
|
3185
|
+
}
|
|
3186
|
+
function formatFingerprint(fp) {
|
|
3187
|
+
const parts = [fp.tool];
|
|
3188
|
+
if (fp.target) parts.push(`\u2192${fp.target}`);
|
|
3189
|
+
if (fp.wordlist) parts.push(`[wordlist:${fp.wordlist.split("/").pop()}]`);
|
|
3190
|
+
if (fp.port) parts.push(`[port:${fp.port}]`);
|
|
3191
|
+
if (fp.flags) parts.push(`[${fp.flags}]`);
|
|
3192
|
+
return parts.join(" ");
|
|
3193
|
+
}
|
|
3148
3194
|
var WorkingMemory = class {
|
|
3149
3195
|
entries = [];
|
|
3150
3196
|
maxEntries = MEMORY_LIMITS.WORKING_MEMORY_MAX_ENTRIES;
|
|
@@ -3157,20 +3203,29 @@ var WorkingMemory = class {
|
|
|
3157
3203
|
context,
|
|
3158
3204
|
importance
|
|
3159
3205
|
});
|
|
3160
|
-
|
|
3161
|
-
this.entries.sort((a, b) => {
|
|
3162
|
-
const aScore = a.importance * 0.7 + (1 - (Date.now() - a.timestamp) / 6e5) * 0.3;
|
|
3163
|
-
const bScore = b.importance * 0.7 + (1 - (Date.now() - b.timestamp) / 6e5) * 0.3;
|
|
3164
|
-
return bScore - aScore;
|
|
3165
|
-
});
|
|
3166
|
-
this.entries = this.entries.slice(0, this.maxEntries);
|
|
3167
|
-
}
|
|
3206
|
+
this.pruneIfNeeded();
|
|
3168
3207
|
}
|
|
3169
3208
|
/**
|
|
3170
|
-
* Record a failed attempt
|
|
3209
|
+
* Record a failed attempt with structured fingerprint.
|
|
3210
|
+
*
|
|
3211
|
+
* WHY: Fingerprint-based recording ensures that "hydra with rockyou.txt"
|
|
3212
|
+
* and "hydra with darkweb2017.txt" are stored as distinct attempts.
|
|
3213
|
+
* The LLM can see what parameter combinations have been tried and pick new ones.
|
|
3171
3214
|
*/
|
|
3172
3215
|
recordFailure(tool, command, error) {
|
|
3173
|
-
|
|
3216
|
+
const fp = extractFingerprint(tool, command);
|
|
3217
|
+
const fpLabel = formatFingerprint(fp);
|
|
3218
|
+
const entry = {
|
|
3219
|
+
id: `wm_${Date.now()}_${Math.random().toString(36).slice(2, 6)}`,
|
|
3220
|
+
timestamp: Date.now(),
|
|
3221
|
+
category: "failure",
|
|
3222
|
+
content: `FAILED: ${fpLabel} \u2192 ${error.slice(0, DISPLAY_LIMITS.ERROR_PREVIEW)}`,
|
|
3223
|
+
context: { tool, command },
|
|
3224
|
+
importance: 0.8,
|
|
3225
|
+
fingerprint: fp
|
|
3226
|
+
};
|
|
3227
|
+
this.entries.push(entry);
|
|
3228
|
+
this.pruneIfNeeded();
|
|
3174
3229
|
}
|
|
3175
3230
|
/**
|
|
3176
3231
|
* Record a successful action for reference.
|
|
@@ -3179,16 +3234,53 @@ var WorkingMemory = class {
|
|
|
3179
3234
|
this.add("success", `SUCCESS: ${tool} \u2192 ${command.slice(0, DISPLAY_LIMITS.COMMAND_PREVIEW)}`, 0.6, { tool, result: result2.slice(0, DISPLAY_LIMITS.OUTPUT_SUMMARY) });
|
|
3180
3235
|
}
|
|
3181
3236
|
/**
|
|
3182
|
-
* Check if
|
|
3237
|
+
* Check if the EXACT same attack attempt (all parameters identical) has failed.
|
|
3238
|
+
*
|
|
3239
|
+
* WHY: Only blocks truly duplicate attempts. Different wordlists, flags, or
|
|
3240
|
+
* ports are treated as new attacks and allowed through.
|
|
3183
3241
|
*/
|
|
3184
|
-
hasFailedBefore(command) {
|
|
3185
|
-
const
|
|
3242
|
+
hasFailedBefore(command, tool) {
|
|
3243
|
+
const effectiveTool = tool || command.split(/\s+/)[0] || "";
|
|
3244
|
+
const fp = extractFingerprint(effectiveTool, command);
|
|
3186
3245
|
return this.entries.find(
|
|
3187
|
-
(e) => e.category === "failure" && e.
|
|
3246
|
+
(e) => e.category === "failure" && e.fingerprint != null && fingerprintsMatch(e.fingerprint, fp)
|
|
3188
3247
|
);
|
|
3189
3248
|
}
|
|
3190
3249
|
/**
|
|
3191
|
-
* Get
|
|
3250
|
+
* Get all previous attempts (success & failure) for a specific tool+target vector.
|
|
3251
|
+
* Returns the full history so the LLM can see what parameter combinations were tried.
|
|
3252
|
+
*
|
|
3253
|
+
* TODO: Wire up in tools.ts to pass vector history to strategist for smarter retries.
|
|
3254
|
+
*/
|
|
3255
|
+
getAttemptsForVector(tool, target) {
|
|
3256
|
+
const lowerTool = tool.toLowerCase();
|
|
3257
|
+
return this.entries.filter((e) => {
|
|
3258
|
+
if (e.fingerprint) {
|
|
3259
|
+
const matchTool = e.fingerprint.tool === lowerTool;
|
|
3260
|
+
const matchTarget = !target || e.fingerprint.target.includes(target);
|
|
3261
|
+
return matchTool && matchTarget;
|
|
3262
|
+
}
|
|
3263
|
+
return String(e.context.tool || "").toLowerCase() === lowerTool;
|
|
3264
|
+
});
|
|
3265
|
+
}
|
|
3266
|
+
/** Internal prune helper (used by both add() and recordFailure()) */
|
|
3267
|
+
pruneIfNeeded() {
|
|
3268
|
+
if (this.entries.length > this.maxEntries) {
|
|
3269
|
+
this.entries.sort((a, b) => {
|
|
3270
|
+
const aScore = a.importance * 0.7 + (1 - (Date.now() - a.timestamp) / 6e5) * 0.3;
|
|
3271
|
+
const bScore = b.importance * 0.7 + (1 - (Date.now() - b.timestamp) / 6e5) * 0.3;
|
|
3272
|
+
return bScore - aScore;
|
|
3273
|
+
});
|
|
3274
|
+
this.entries = this.entries.slice(0, this.maxEntries);
|
|
3275
|
+
}
|
|
3276
|
+
}
|
|
3277
|
+
/**
|
|
3278
|
+
* Get count of consecutive failures on the SAME attack vector.
|
|
3279
|
+
*
|
|
3280
|
+
* WHY: Switching from "hydra with wordlist A" to "hydra with wordlist B"
|
|
3281
|
+
* is a legitimate new approach. The consecutive-failure counter should
|
|
3282
|
+
* only trigger when every recent attempt is against the same tool+target
|
|
3283
|
+
* with no variation producing success. This prevents premature vector abandonment.
|
|
3192
3284
|
*/
|
|
3193
3285
|
getConsecutiveFailures() {
|
|
3194
3286
|
let count = 0;
|
|
@@ -3198,6 +3290,26 @@ var WorkingMemory = class {
|
|
|
3198
3290
|
}
|
|
3199
3291
|
return count;
|
|
3200
3292
|
}
|
|
3293
|
+
/**
|
|
3294
|
+
* Get consecutive failures for a specific vector (tool+target).
|
|
3295
|
+
* Returns count of sequential failures where all attempts used the same tool+target.
|
|
3296
|
+
*
|
|
3297
|
+
* TODO: Wire up in tools.ts for per-vector threshold checks alongside global consecutive count.
|
|
3298
|
+
*/
|
|
3299
|
+
getConsecutiveVectorFailures(tool, target) {
|
|
3300
|
+
const lowerTool = tool.toLowerCase();
|
|
3301
|
+
let count = 0;
|
|
3302
|
+
for (let i = this.entries.length - 1; i >= 0; i--) {
|
|
3303
|
+
const e = this.entries[i];
|
|
3304
|
+
if (e.category !== "failure") break;
|
|
3305
|
+
if (e.fingerprint && e.fingerprint.tool === lowerTool && (!target || e.fingerprint.target.includes(target))) {
|
|
3306
|
+
count++;
|
|
3307
|
+
} else {
|
|
3308
|
+
break;
|
|
3309
|
+
}
|
|
3310
|
+
}
|
|
3311
|
+
return count;
|
|
3312
|
+
}
|
|
3201
3313
|
/**
|
|
3202
3314
|
* Format for prompt injection.
|
|
3203
3315
|
*/
|
|
@@ -3208,14 +3320,20 @@ var WorkingMemory = class {
|
|
|
3208
3320
|
const insights = this.entries.filter((e) => e.category === "insight" || e.category === "discovery");
|
|
3209
3321
|
const lines = ["<working-memory>"];
|
|
3210
3322
|
if (failures.length > 0) {
|
|
3211
|
-
lines.push(`\u26A0\uFE0F FAILED ATTEMPTS (${failures.length} \u2014 DO NOT REPEAT):`);
|
|
3323
|
+
lines.push(`\u26A0\uFE0F FAILED ATTEMPTS (${failures.length} \u2014 DO NOT REPEAT EXACT SAME PARAMS):`);
|
|
3212
3324
|
for (const f of failures.slice(-DISPLAY_LIMITS.RECENT_FAILURES)) {
|
|
3213
|
-
|
|
3325
|
+
const fp = f.fingerprint;
|
|
3326
|
+
if (fp) {
|
|
3327
|
+
lines.push(` \u2717 ${formatFingerprint(fp)} \u2192 ${f.content.split("\u2192").pop()?.trim() || ""}`);
|
|
3328
|
+
} else {
|
|
3329
|
+
lines.push(` \u2717 ${f.content}`);
|
|
3330
|
+
}
|
|
3214
3331
|
}
|
|
3332
|
+
lines.push(...this.buildAttackCoverageLines(failures));
|
|
3215
3333
|
}
|
|
3216
3334
|
const consecutiveFails = this.getConsecutiveFailures();
|
|
3217
3335
|
if (consecutiveFails >= MEMORY_LIMITS.CONSECUTIVE_FAIL_THRESHOLD) {
|
|
3218
|
-
lines.push(`\u{1F534} ${consecutiveFails} CONSECUTIVE FAILURES \u2014
|
|
3336
|
+
lines.push(`\u{1F534} ${consecutiveFails} CONSECUTIVE FAILURES \u2014 consider changing approach or parameters`);
|
|
3219
3337
|
}
|
|
3220
3338
|
if (successes.length > 0) {
|
|
3221
3339
|
lines.push(`\u2705 RECENT SUCCESSES (${successes.length}):`);
|
|
@@ -3232,6 +3350,29 @@ var WorkingMemory = class {
|
|
|
3232
3350
|
lines.push("</working-memory>");
|
|
3233
3351
|
return lines.join("\n");
|
|
3234
3352
|
}
|
|
3353
|
+
/**
|
|
3354
|
+
* Build ATTACK COVERAGE lines grouped by tool+target.
|
|
3355
|
+
* WHY: Shows the LLM which parameter combinations have been tried per vector,
|
|
3356
|
+
* so it can pick genuinely untried variations instead of repeating.
|
|
3357
|
+
*/
|
|
3358
|
+
buildAttackCoverageLines(failures) {
|
|
3359
|
+
const vectorMap = /* @__PURE__ */ new Map();
|
|
3360
|
+
for (const f of failures) {
|
|
3361
|
+
if (f.fingerprint) {
|
|
3362
|
+
const key = `${f.fingerprint.tool}\u2192${f.fingerprint.target}`;
|
|
3363
|
+
const detail = f.fingerprint.wordlist ? f.fingerprint.wordlist.split("/").pop() || "" : f.fingerprint.flags || "default";
|
|
3364
|
+
const existing = vectorMap.get(key) || [];
|
|
3365
|
+
if (!existing.includes(detail)) existing.push(detail);
|
|
3366
|
+
vectorMap.set(key, existing);
|
|
3367
|
+
}
|
|
3368
|
+
}
|
|
3369
|
+
if (vectorMap.size === 0) return [];
|
|
3370
|
+
const lines = [` \u{1F4CA} ATTACK COVERAGE (tried variations):`];
|
|
3371
|
+
for (const [vector, variations] of vectorMap) {
|
|
3372
|
+
lines.push(` ${vector}: tried [${variations.join(", ")}] \u2014 try DIFFERENT params`);
|
|
3373
|
+
}
|
|
3374
|
+
return lines;
|
|
3375
|
+
}
|
|
3235
3376
|
getEntries() {
|
|
3236
3377
|
return [...this.entries];
|
|
3237
3378
|
}
|
|
@@ -3468,6 +3609,25 @@ var DynamicTechniqueLibrary = class {
|
|
|
3468
3609
|
}
|
|
3469
3610
|
}
|
|
3470
3611
|
}
|
|
3612
|
+
/**
|
|
3613
|
+
* Record that a technique failed in practice — decrease confidence.
|
|
3614
|
+
*
|
|
3615
|
+
* WHY: Web search results can contain outdated, wrong, or irrelevant techniques.
|
|
3616
|
+
* If the agent tries a learned technique and it fails, we must degrade its
|
|
3617
|
+
* confidence so the agent stops wasting time on bad advice.
|
|
3618
|
+
* After enough failures the technique is auto-pruned entirely.
|
|
3619
|
+
*/
|
|
3620
|
+
recordFailure(techniqueSubstring) {
|
|
3621
|
+
if (!techniqueSubstring || techniqueSubstring.length < 3) return;
|
|
3622
|
+
const lower = techniqueSubstring.toLowerCase();
|
|
3623
|
+
this.techniques = this.techniques.filter((t) => {
|
|
3624
|
+
if (t.technique.toLowerCase().includes(lower)) {
|
|
3625
|
+
t.confidence -= MEMORY_LIMITS.TECHNIQUE_FAILURE_DECAY;
|
|
3626
|
+
return t.confidence > MEMORY_LIMITS.TECHNIQUE_PRUNE_THRESHOLD;
|
|
3627
|
+
}
|
|
3628
|
+
return true;
|
|
3629
|
+
});
|
|
3630
|
+
}
|
|
3471
3631
|
/**
|
|
3472
3632
|
* Get techniques relevant to a specific service/technology.
|
|
3473
3633
|
*/
|
|
@@ -8925,6 +9085,7 @@ var ToolRegistry = class {
|
|
|
8925
9085
|
this.state.workingMemory.recordSuccess(toolCall.name, command, result2.output || "");
|
|
8926
9086
|
} else {
|
|
8927
9087
|
this.state.workingMemory.recordFailure(toolCall.name, command, result2.error || "Unknown error");
|
|
9088
|
+
this.state.dynamicTechniques.recordFailure(command);
|
|
8928
9089
|
}
|
|
8929
9090
|
if (result2.success && result2.output) {
|
|
8930
9091
|
const structured = autoExtractStructured(toolCall.name, result2.output);
|