pentesting 0.51.1 → 0.52.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.js +191 -21
- package/package.json +3 -3
package/dist/main.js
CHANGED
|
@@ -180,11 +180,15 @@ var MEMORY_LIMITS = {
|
|
|
180
180
|
WORKING_MEMORY_MAX_ENTRIES: 20,
|
|
181
181
|
/** Maximum events in episodic memory (session timeline) */
|
|
182
182
|
EPISODIC_MEMORY_MAX_EVENTS: 100,
|
|
183
|
-
/** Consecutive failures before
|
|
183
|
+
/** Consecutive failures before prompting to change approach or parameters */
|
|
184
184
|
CONSECUTIVE_FAIL_THRESHOLD: 3,
|
|
185
185
|
/** Maximum learned techniques in DynamicTechniqueLibrary */
|
|
186
186
|
DYNAMIC_TECHNIQUES_MAX: 50,
|
|
187
|
-
/**
|
|
187
|
+
/** Confidence decay per failure for DynamicTechniqueLibrary learned techniques */
|
|
188
|
+
TECHNIQUE_FAILURE_DECAY: 30,
|
|
189
|
+
/** Auto-prune threshold: techniques below this confidence are discarded */
|
|
190
|
+
TECHNIQUE_PRUNE_THRESHOLD: 10,
|
|
191
|
+
/** @deprecated Superseded by fingerprint-based matching in extractFingerprint(). Kept for reference. */
|
|
188
192
|
COMMAND_MATCH_WORDS: 3,
|
|
189
193
|
/** Maximum unverified techniques to show in prompt */
|
|
190
194
|
PROMPT_UNVERIFIED_TECHNIQUES: 10,
|
|
@@ -345,7 +349,7 @@ var ORPHAN_PROCESS_NAMES = [
|
|
|
345
349
|
|
|
346
350
|
// src/shared/constants/agent.ts
|
|
347
351
|
var APP_NAME = "Pentest AI";
|
|
348
|
-
var APP_VERSION = "0.
|
|
352
|
+
var APP_VERSION = "0.52.1";
|
|
349
353
|
var APP_DESCRIPTION = "Autonomous Penetration Testing AI Agent";
|
|
350
354
|
var LLM_ROLES = {
|
|
351
355
|
SYSTEM: "system",
|
|
@@ -3144,6 +3148,57 @@ var AttackGraph = class {
|
|
|
3144
3148
|
// src/shared/utils/agent-memory.ts
|
|
3145
3149
|
import { existsSync as existsSync4, readFileSync as readFileSync3, writeFileSync as writeFileSync4 } from "fs";
|
|
3146
3150
|
import { join as join3 } from "path";
|
|
3151
|
+
function extractFingerprint(tool, command) {
|
|
3152
|
+
const cmd = command || "";
|
|
3153
|
+
let effectiveTool = tool.toLowerCase();
|
|
3154
|
+
if (effectiveTool === "run_cmd" || effectiveTool === "run_background") {
|
|
3155
|
+
const firstWord = cmd.trim().split(/\s+/)[0];
|
|
3156
|
+
if (firstWord && !firstWord.startsWith("-")) {
|
|
3157
|
+
effectiveTool = firstWord.toLowerCase();
|
|
3158
|
+
}
|
|
3159
|
+
}
|
|
3160
|
+
const targetMatch = cmd.match(
|
|
3161
|
+
/(?::\/\/|@)([\w.\-]+(?::\d+)?)|\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(?::\d+)?\b/
|
|
3162
|
+
);
|
|
3163
|
+
const target = targetMatch?.[1] || targetMatch?.[2] || "";
|
|
3164
|
+
let wordlistMatch = cmd.match(/(?:-[wPUL]\s+)([^\s]+)/);
|
|
3165
|
+
if (!wordlistMatch) {
|
|
3166
|
+
wordlistMatch = cmd.match(/(?:--(?:wordlist|passwords?|usernames?)(?:=|\s+))([^\s]+)/i);
|
|
3167
|
+
}
|
|
3168
|
+
const wordlist = wordlistMatch?.[1] || "";
|
|
3169
|
+
const portMatch = cmd.match(/(?:-p\s+|--port(?:=|\s+))(\S+)/);
|
|
3170
|
+
const port = portMatch?.[1] || "";
|
|
3171
|
+
const flagPatterns = [
|
|
3172
|
+
/--level(?:=|\s+)(\S+)/i,
|
|
3173
|
+
// sqlmap
|
|
3174
|
+
/--risk(?:=|\s+)(\S+)/i,
|
|
3175
|
+
// sqlmap
|
|
3176
|
+
/-s([VSTCAUOPMX]+)/,
|
|
3177
|
+
// nmap scan type
|
|
3178
|
+
/--script(?:=|\s+)(\S+)/i,
|
|
3179
|
+
// nmap scripts
|
|
3180
|
+
/-[el]\s+(\S+)/i,
|
|
3181
|
+
// hydra login/password single
|
|
3182
|
+
/--method(?:=|\s+)(\S+)/i
|
|
3183
|
+
// HTTP method
|
|
3184
|
+
];
|
|
3185
|
+
const flags = flagPatterns.map((p) => {
|
|
3186
|
+
const m = cmd.match(p);
|
|
3187
|
+
return m?.[0]?.trim() || "";
|
|
3188
|
+
}).filter(Boolean).join(" ");
|
|
3189
|
+
return { tool: effectiveTool, target, wordlist, flags, port };
|
|
3190
|
+
}
|
|
3191
|
+
function fingerprintsMatch(a, b) {
|
|
3192
|
+
return a.tool === b.tool && a.target === b.target && a.wordlist === b.wordlist && a.flags === b.flags && a.port === b.port;
|
|
3193
|
+
}
|
|
3194
|
+
function formatFingerprint(fp) {
|
|
3195
|
+
const parts = [fp.tool];
|
|
3196
|
+
if (fp.target) parts.push(`\u2192${fp.target}`);
|
|
3197
|
+
if (fp.wordlist) parts.push(`[wordlist:${fp.wordlist.split("/").pop()}]`);
|
|
3198
|
+
if (fp.port) parts.push(`[port:${fp.port}]`);
|
|
3199
|
+
if (fp.flags) parts.push(`[${fp.flags}]`);
|
|
3200
|
+
return parts.join(" ");
|
|
3201
|
+
}
|
|
3147
3202
|
var WorkingMemory = class {
|
|
3148
3203
|
entries = [];
|
|
3149
3204
|
maxEntries = MEMORY_LIMITS.WORKING_MEMORY_MAX_ENTRIES;
|
|
@@ -3156,20 +3211,29 @@ var WorkingMemory = class {
|
|
|
3156
3211
|
context,
|
|
3157
3212
|
importance
|
|
3158
3213
|
});
|
|
3159
|
-
|
|
3160
|
-
this.entries.sort((a, b) => {
|
|
3161
|
-
const aScore = a.importance * 0.7 + (1 - (Date.now() - a.timestamp) / 6e5) * 0.3;
|
|
3162
|
-
const bScore = b.importance * 0.7 + (1 - (Date.now() - b.timestamp) / 6e5) * 0.3;
|
|
3163
|
-
return bScore - aScore;
|
|
3164
|
-
});
|
|
3165
|
-
this.entries = this.entries.slice(0, this.maxEntries);
|
|
3166
|
-
}
|
|
3214
|
+
this.pruneIfNeeded();
|
|
3167
3215
|
}
|
|
3168
3216
|
/**
|
|
3169
|
-
* Record a failed attempt
|
|
3217
|
+
* Record a failed attempt with structured fingerprint.
|
|
3218
|
+
*
|
|
3219
|
+
* WHY: Fingerprint-based recording ensures that "hydra with rockyou.txt"
|
|
3220
|
+
* and "hydra with darkweb2017.txt" are stored as distinct attempts.
|
|
3221
|
+
* The LLM can see what parameter combinations have been tried and pick new ones.
|
|
3170
3222
|
*/
|
|
3171
3223
|
recordFailure(tool, command, error) {
|
|
3172
|
-
|
|
3224
|
+
const fp = extractFingerprint(tool, command);
|
|
3225
|
+
const fpLabel = formatFingerprint(fp);
|
|
3226
|
+
const entry = {
|
|
3227
|
+
id: `wm_${Date.now()}_${Math.random().toString(36).slice(2, 6)}`,
|
|
3228
|
+
timestamp: Date.now(),
|
|
3229
|
+
category: "failure",
|
|
3230
|
+
content: `FAILED: ${fpLabel} \u2192 ${error.slice(0, DISPLAY_LIMITS.ERROR_PREVIEW)}`,
|
|
3231
|
+
context: { tool, command },
|
|
3232
|
+
importance: 0.8,
|
|
3233
|
+
fingerprint: fp
|
|
3234
|
+
};
|
|
3235
|
+
this.entries.push(entry);
|
|
3236
|
+
this.pruneIfNeeded();
|
|
3173
3237
|
}
|
|
3174
3238
|
/**
|
|
3175
3239
|
* Record a successful action for reference.
|
|
@@ -3178,16 +3242,53 @@ var WorkingMemory = class {
|
|
|
3178
3242
|
this.add("success", `SUCCESS: ${tool} \u2192 ${command.slice(0, DISPLAY_LIMITS.COMMAND_PREVIEW)}`, 0.6, { tool, result: result2.slice(0, DISPLAY_LIMITS.OUTPUT_SUMMARY) });
|
|
3179
3243
|
}
|
|
3180
3244
|
/**
|
|
3181
|
-
* Check if
|
|
3245
|
+
* Check if the EXACT same attack attempt (all parameters identical) has failed.
|
|
3246
|
+
*
|
|
3247
|
+
* WHY: Only blocks truly duplicate attempts. Different wordlists, flags, or
|
|
3248
|
+
* ports are treated as new attacks and allowed through.
|
|
3182
3249
|
*/
|
|
3183
|
-
hasFailedBefore(command) {
|
|
3184
|
-
const
|
|
3250
|
+
hasFailedBefore(command, tool) {
|
|
3251
|
+
const effectiveTool = tool || command.split(/\s+/)[0] || "";
|
|
3252
|
+
const fp = extractFingerprint(effectiveTool, command);
|
|
3185
3253
|
return this.entries.find(
|
|
3186
|
-
(e) => e.category === "failure" && e.
|
|
3254
|
+
(e) => e.category === "failure" && e.fingerprint != null && fingerprintsMatch(e.fingerprint, fp)
|
|
3187
3255
|
);
|
|
3188
3256
|
}
|
|
3189
3257
|
/**
|
|
3190
|
-
* Get
|
|
3258
|
+
* Get all previous attempts (success & failure) for a specific tool+target vector.
|
|
3259
|
+
* Returns the full history so the LLM can see what parameter combinations were tried.
|
|
3260
|
+
*
|
|
3261
|
+
* TODO: Wire up in tools.ts to pass vector history to strategist for smarter retries.
|
|
3262
|
+
*/
|
|
3263
|
+
getAttemptsForVector(tool, target) {
|
|
3264
|
+
const lowerTool = tool.toLowerCase();
|
|
3265
|
+
return this.entries.filter((e) => {
|
|
3266
|
+
if (e.fingerprint) {
|
|
3267
|
+
const matchTool = e.fingerprint.tool === lowerTool;
|
|
3268
|
+
const matchTarget = !target || e.fingerprint.target.includes(target);
|
|
3269
|
+
return matchTool && matchTarget;
|
|
3270
|
+
}
|
|
3271
|
+
return String(e.context.tool || "").toLowerCase() === lowerTool;
|
|
3272
|
+
});
|
|
3273
|
+
}
|
|
3274
|
+
/** Internal prune helper (used by both add() and recordFailure()) */
|
|
3275
|
+
pruneIfNeeded() {
|
|
3276
|
+
if (this.entries.length > this.maxEntries) {
|
|
3277
|
+
this.entries.sort((a, b) => {
|
|
3278
|
+
const aScore = a.importance * 0.7 + (1 - (Date.now() - a.timestamp) / 6e5) * 0.3;
|
|
3279
|
+
const bScore = b.importance * 0.7 + (1 - (Date.now() - b.timestamp) / 6e5) * 0.3;
|
|
3280
|
+
return bScore - aScore;
|
|
3281
|
+
});
|
|
3282
|
+
this.entries = this.entries.slice(0, this.maxEntries);
|
|
3283
|
+
}
|
|
3284
|
+
}
|
|
3285
|
+
/**
|
|
3286
|
+
* Get count of consecutive failures on the SAME attack vector.
|
|
3287
|
+
*
|
|
3288
|
+
* WHY: Switching from "hydra with wordlist A" to "hydra with wordlist B"
|
|
3289
|
+
* is a legitimate new approach. The consecutive-failure counter should
|
|
3290
|
+
* only trigger when every recent attempt is against the same tool+target
|
|
3291
|
+
* with no variation producing success. This prevents premature vector abandonment.
|
|
3191
3292
|
*/
|
|
3192
3293
|
getConsecutiveFailures() {
|
|
3193
3294
|
let count = 0;
|
|
@@ -3197,6 +3298,26 @@ var WorkingMemory = class {
|
|
|
3197
3298
|
}
|
|
3198
3299
|
return count;
|
|
3199
3300
|
}
|
|
3301
|
+
/**
|
|
3302
|
+
* Get consecutive failures for a specific vector (tool+target).
|
|
3303
|
+
* Returns count of sequential failures where all attempts used the same tool+target.
|
|
3304
|
+
*
|
|
3305
|
+
* TODO: Wire up in tools.ts for per-vector threshold checks alongside global consecutive count.
|
|
3306
|
+
*/
|
|
3307
|
+
getConsecutiveVectorFailures(tool, target) {
|
|
3308
|
+
const lowerTool = tool.toLowerCase();
|
|
3309
|
+
let count = 0;
|
|
3310
|
+
for (let i = this.entries.length - 1; i >= 0; i--) {
|
|
3311
|
+
const e = this.entries[i];
|
|
3312
|
+
if (e.category !== "failure") break;
|
|
3313
|
+
if (e.fingerprint && e.fingerprint.tool === lowerTool && (!target || e.fingerprint.target.includes(target))) {
|
|
3314
|
+
count++;
|
|
3315
|
+
} else {
|
|
3316
|
+
break;
|
|
3317
|
+
}
|
|
3318
|
+
}
|
|
3319
|
+
return count;
|
|
3320
|
+
}
|
|
3200
3321
|
/**
|
|
3201
3322
|
* Format for prompt injection.
|
|
3202
3323
|
*/
|
|
@@ -3207,14 +3328,20 @@ var WorkingMemory = class {
|
|
|
3207
3328
|
const insights = this.entries.filter((e) => e.category === "insight" || e.category === "discovery");
|
|
3208
3329
|
const lines = ["<working-memory>"];
|
|
3209
3330
|
if (failures.length > 0) {
|
|
3210
|
-
lines.push(`\u26A0\uFE0F FAILED ATTEMPTS (${failures.length} \u2014 DO NOT REPEAT):`);
|
|
3331
|
+
lines.push(`\u26A0\uFE0F FAILED ATTEMPTS (${failures.length} \u2014 DO NOT REPEAT EXACT SAME PARAMS):`);
|
|
3211
3332
|
for (const f of failures.slice(-DISPLAY_LIMITS.RECENT_FAILURES)) {
|
|
3212
|
-
|
|
3333
|
+
const fp = f.fingerprint;
|
|
3334
|
+
if (fp) {
|
|
3335
|
+
lines.push(` \u2717 ${formatFingerprint(fp)} \u2192 ${f.content.split("\u2192").pop()?.trim() || ""}`);
|
|
3336
|
+
} else {
|
|
3337
|
+
lines.push(` \u2717 ${f.content}`);
|
|
3338
|
+
}
|
|
3213
3339
|
}
|
|
3340
|
+
lines.push(...this.buildAttackCoverageLines(failures));
|
|
3214
3341
|
}
|
|
3215
3342
|
const consecutiveFails = this.getConsecutiveFailures();
|
|
3216
3343
|
if (consecutiveFails >= MEMORY_LIMITS.CONSECUTIVE_FAIL_THRESHOLD) {
|
|
3217
|
-
lines.push(`\u{1F534} ${consecutiveFails} CONSECUTIVE FAILURES \u2014
|
|
3344
|
+
lines.push(`\u{1F534} ${consecutiveFails} CONSECUTIVE FAILURES \u2014 consider changing approach or parameters`);
|
|
3218
3345
|
}
|
|
3219
3346
|
if (successes.length > 0) {
|
|
3220
3347
|
lines.push(`\u2705 RECENT SUCCESSES (${successes.length}):`);
|
|
@@ -3231,6 +3358,29 @@ var WorkingMemory = class {
|
|
|
3231
3358
|
lines.push("</working-memory>");
|
|
3232
3359
|
return lines.join("\n");
|
|
3233
3360
|
}
|
|
3361
|
+
/**
|
|
3362
|
+
* Build ATTACK COVERAGE lines grouped by tool+target.
|
|
3363
|
+
* WHY: Shows the LLM which parameter combinations have been tried per vector,
|
|
3364
|
+
* so it can pick genuinely untried variations instead of repeating.
|
|
3365
|
+
*/
|
|
3366
|
+
buildAttackCoverageLines(failures) {
|
|
3367
|
+
const vectorMap = /* @__PURE__ */ new Map();
|
|
3368
|
+
for (const f of failures) {
|
|
3369
|
+
if (f.fingerprint) {
|
|
3370
|
+
const key = `${f.fingerprint.tool}\u2192${f.fingerprint.target}`;
|
|
3371
|
+
const detail = f.fingerprint.wordlist ? f.fingerprint.wordlist.split("/").pop() || "" : f.fingerprint.flags || "default";
|
|
3372
|
+
const existing = vectorMap.get(key) || [];
|
|
3373
|
+
if (!existing.includes(detail)) existing.push(detail);
|
|
3374
|
+
vectorMap.set(key, existing);
|
|
3375
|
+
}
|
|
3376
|
+
}
|
|
3377
|
+
if (vectorMap.size === 0) return [];
|
|
3378
|
+
const lines = [` \u{1F4CA} ATTACK COVERAGE (tried variations):`];
|
|
3379
|
+
for (const [vector, variations] of vectorMap) {
|
|
3380
|
+
lines.push(` ${vector}: tried [${variations.join(", ")}] \u2014 try DIFFERENT params`);
|
|
3381
|
+
}
|
|
3382
|
+
return lines;
|
|
3383
|
+
}
|
|
3234
3384
|
getEntries() {
|
|
3235
3385
|
return [...this.entries];
|
|
3236
3386
|
}
|
|
@@ -3467,6 +3617,25 @@ var DynamicTechniqueLibrary = class {
|
|
|
3467
3617
|
}
|
|
3468
3618
|
}
|
|
3469
3619
|
}
|
|
3620
|
+
/**
|
|
3621
|
+
* Record that a technique failed in practice — decrease confidence.
|
|
3622
|
+
*
|
|
3623
|
+
* WHY: Web search results can contain outdated, wrong, or irrelevant techniques.
|
|
3624
|
+
* If the agent tries a learned technique and it fails, we must degrade its
|
|
3625
|
+
* confidence so the agent stops wasting time on bad advice.
|
|
3626
|
+
* After enough failures the technique is auto-pruned entirely.
|
|
3627
|
+
*/
|
|
3628
|
+
recordFailure(techniqueSubstring) {
|
|
3629
|
+
if (!techniqueSubstring || techniqueSubstring.length < 3) return;
|
|
3630
|
+
const lower = techniqueSubstring.toLowerCase();
|
|
3631
|
+
this.techniques = this.techniques.filter((t) => {
|
|
3632
|
+
if (t.technique.toLowerCase().includes(lower)) {
|
|
3633
|
+
t.confidence -= MEMORY_LIMITS.TECHNIQUE_FAILURE_DECAY;
|
|
3634
|
+
return t.confidence > MEMORY_LIMITS.TECHNIQUE_PRUNE_THRESHOLD;
|
|
3635
|
+
}
|
|
3636
|
+
return true;
|
|
3637
|
+
});
|
|
3638
|
+
}
|
|
3470
3639
|
/**
|
|
3471
3640
|
* Get techniques relevant to a specific service/technology.
|
|
3472
3641
|
*/
|
|
@@ -8924,6 +9093,7 @@ var ToolRegistry = class {
|
|
|
8924
9093
|
this.state.workingMemory.recordSuccess(toolCall.name, command, result2.output || "");
|
|
8925
9094
|
} else {
|
|
8926
9095
|
this.state.workingMemory.recordFailure(toolCall.name, command, result2.error || "Unknown error");
|
|
9096
|
+
this.state.dynamicTechniques.recordFailure(command);
|
|
8927
9097
|
}
|
|
8928
9098
|
if (result2.success && result2.output) {
|
|
8929
9099
|
const structured = autoExtractStructured(toolCall.name, result2.output);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pentesting",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.52.1",
|
|
4
4
|
"description": "Autonomous Penetration Testing AI Agent",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/main.js",
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
"dev:tsx": "tsx src/platform/tui/main.tsx",
|
|
20
20
|
"build": "tsup",
|
|
21
21
|
"start": "node dist/main.js",
|
|
22
|
-
"test": "mkdir -p
|
|
22
|
+
"test": "mkdir -p .pentesting && TMPDIR=.pentesting vitest run",
|
|
23
23
|
"test:watch": "vitest",
|
|
24
24
|
"lint": "tsc --noEmit",
|
|
25
25
|
"prepublishOnly": "npm run build",
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
"release:minor": "npm version minor && npm run build && npm run publish:token",
|
|
30
30
|
"release:major": "npm version major && npm run build && npm run publish:token",
|
|
31
31
|
"release:docker": "docker buildx build --platform linux/amd64,linux/arm64 -t agnusdei1207/pentesting:latest --push . && docker system prune -af",
|
|
32
|
-
"check": "
|
|
32
|
+
"check": "npm run test && npm run build && npm run release:docker && bash test.sh"
|
|
33
33
|
},
|
|
34
34
|
"repository": {
|
|
35
35
|
"type": "git",
|