pentesting 0.54.0 → 0.55.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.js +114 -41
- package/dist/prompts/base.md +126 -15
- package/dist/prompts/exploit.md +3 -14
- package/dist/prompts/offensive-playbook.md +9 -32
- package/dist/prompts/orchestrator.md +6 -11
- package/dist/prompts/strategist-system.md +36 -0
- package/dist/prompts/strategy.md +6 -6
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -342,7 +342,7 @@ var ORPHAN_PROCESS_NAMES = [
|
|
|
342
342
|
|
|
343
343
|
// src/shared/constants/agent.ts
|
|
344
344
|
var APP_NAME = "Pentest AI";
|
|
345
|
-
var APP_VERSION = "0.
|
|
345
|
+
var APP_VERSION = "0.55.0";
|
|
346
346
|
var APP_DESCRIPTION = "Autonomous Penetration Testing AI Agent";
|
|
347
347
|
var LLM_ROLES = {
|
|
348
348
|
SYSTEM: "system",
|
|
@@ -10488,9 +10488,20 @@ FORMAT YOUR RESPONSE EXACTLY LIKE THIS:
|
|
|
10488
10488
|
- (write "None identified" if none)
|
|
10489
10489
|
|
|
10490
10490
|
## Failures/Errors
|
|
10491
|
-
|
|
10492
|
-
|
|
10493
|
-
|
|
10491
|
+
Classify EVERY failure using one of these types. Format: [TYPE] exact_command \u2192 why_failed \u2192 recommended_pivot
|
|
10492
|
+
|
|
10493
|
+
Failure types:
|
|
10494
|
+
- [FILTERED]: WAF/IDS/firewall blocked \u2192 suggest: encoding bypass, payload_mutate, different protocol/port
|
|
10495
|
+
- [WRONG_VECTOR]: Vulnerability not present here \u2192 suggest: pivot to different vuln class entirely
|
|
10496
|
+
- [AUTH_REQUIRED]: Credential or session needed first \u2192 suggest: brute force login or find creds in config files
|
|
10497
|
+
- [TOOL_ERROR]: Command syntax error, missing dep, or tool bug \u2192 suggest: run --help, use alternative tool
|
|
10498
|
+
- [TIMEOUT]: Service too slow or connection timed out \u2192 suggest: increase timeout, reduce scope, or use background mode
|
|
10499
|
+
- [PATCHED]: CVE/technique exists but target is patched \u2192 suggest: search bypass or newer CVE on same service
|
|
10500
|
+
|
|
10501
|
+
Examples:
|
|
10502
|
+
- "[FILTERED] sqlmap -u /login --tamper=space2comment \u2192 ModSecurity WAF, blocking all payloads \u2192 try charencode,randomcase tampers or payload_mutate"
|
|
10503
|
+
- "[AUTH_REQUIRED] curl http://target/admin \u2192 HTTP 401 Basic Auth \u2192 hydra -l admin -P rockyou.txt http-get://target/admin"
|
|
10504
|
+
- "[TIMEOUT] nmap -sV -p- target --min-rate=5000 \u2192 timed out 5min \u2192 rustscan first, then targeted nmap on found ports"
|
|
10494
10505
|
- (write "No failures" if everything succeeded)
|
|
10495
10506
|
|
|
10496
10507
|
## Suspicious Signals
|
|
@@ -10510,7 +10521,7 @@ FORMAT YOUR RESPONSE EXACTLY LIKE THIS:
|
|
|
10510
10521
|
|
|
10511
10522
|
RULES:
|
|
10512
10523
|
- Include EXACT values: port numbers, versions, usernames, file paths, IPs, full commands used
|
|
10513
|
-
- For failures:
|
|
10524
|
+
- For failures: ALWAYS classify with [TYPE] \u2014 "brute force failed" alone is USELESS. Include full command.
|
|
10514
10525
|
- Look for the UNEXPECTED \u2014 non-standard ports, unusual banners, timing anomalies, error leaks
|
|
10515
10526
|
- Credentials include: passwords, hashes, API keys, tokens, private keys, cookies, session IDs
|
|
10516
10527
|
- Flag any information disclosure: server versions, internal paths, stack traces, debug output
|
|
@@ -11313,7 +11324,7 @@ Suggested Action: ${errorInfo.suggestedAction || actionHints[errorInfo.type] ||
|
|
|
11313
11324
|
};
|
|
11314
11325
|
|
|
11315
11326
|
// src/agents/prompt-builder.ts
|
|
11316
|
-
import { readFileSync as readFileSync6, existsSync as existsSync10
|
|
11327
|
+
import { readFileSync as readFileSync6, existsSync as existsSync10 } from "fs";
|
|
11317
11328
|
import { join as join11, dirname as dirname4 } from "path";
|
|
11318
11329
|
import { fileURLToPath as fileURLToPath2 } from "url";
|
|
11319
11330
|
|
|
@@ -11816,12 +11827,12 @@ var PHASE_TECHNIQUE_MAP = {
|
|
|
11816
11827
|
[PHASES.EXPLOIT]: ["injection", "shells", "file-attacks", "network-svc", "pwn", "container-escape", "reversing"],
|
|
11817
11828
|
[PHASES.POST_EXPLOIT]: ["privesc", "lateral", "auth-access", "shells", "container-escape", "forensics"],
|
|
11818
11829
|
[PHASES.PRIV_ESC]: ["privesc", "auth-access", "shells", "pwn", "container-escape"],
|
|
11819
|
-
[PHASES.LATERAL]: ["lateral", "ad-attack", "auth-access", "container-escape"],
|
|
11820
|
-
[PHASES.PERSISTENCE]: ["shells", "privesc"],
|
|
11830
|
+
[PHASES.LATERAL]: ["lateral", "ad-attack", "auth-access", "container-escape", "network-svc"],
|
|
11831
|
+
[PHASES.PERSISTENCE]: ["shells", "privesc", "lateral"],
|
|
11821
11832
|
[PHASES.EXFIL]: ["lateral", "network-svc", "forensics"],
|
|
11822
|
-
[PHASES.WEB]: ["injection", "file-attacks", "auth-access", "crypto"],
|
|
11833
|
+
[PHASES.WEB]: ["injection", "file-attacks", "auth-access", "crypto", "shells"],
|
|
11823
11834
|
[PHASES.REPORT]: []
|
|
11824
|
-
// Report phase
|
|
11835
|
+
// Report phase: no attack techniques needed
|
|
11825
11836
|
};
|
|
11826
11837
|
var PromptBuilder = class {
|
|
11827
11838
|
state;
|
|
@@ -11947,20 +11958,20 @@ ${content}
|
|
|
11947
11958
|
/**
|
|
11948
11959
|
* Load technique files relevant to the current phase.
|
|
11949
11960
|
*
|
|
11950
|
-
* Loading strategy (
|
|
11951
|
-
* 1. PHASE_TECHNIQUE_MAP defines
|
|
11952
|
-
* 2.
|
|
11953
|
-
*
|
|
11961
|
+
* Loading strategy (Improvement #7 — explicit phase mapping, no auto-discovery):
|
|
11962
|
+
* 1. PHASE_TECHNIQUE_MAP defines EXACTLY which techniques load per phase.
|
|
11963
|
+
* 2. Auto-discovery is DISABLED to prevent irrelevant technique loading
|
|
11964
|
+
* (e.g., pwn.md 18K in RECON phase, forensics.md 16K in REPORT phase).
|
|
11965
|
+
* 3. To add a new technique: add the file to techniques/ AND add it to
|
|
11966
|
+
* the relevant phase entries in PHASE_TECHNIQUE_MAP above.
|
|
11954
11967
|
*
|
|
11955
|
-
*
|
|
11956
|
-
* "Drop a markdown file in the folder, PromptBuilder auto-discovers and loads it."
|
|
11968
|
+
* Token savings: ~5-15K per turn vs unrestricted auto-discovery.
|
|
11957
11969
|
*/
|
|
11958
11970
|
loadPhaseRelevantTechniques(phase) {
|
|
11959
11971
|
if (!existsSync10(TECHNIQUES_DIR)) return "";
|
|
11960
|
-
const
|
|
11961
|
-
const loadedSet = /* @__PURE__ */ new Set();
|
|
11972
|
+
const techniquesForPhase = PHASE_TECHNIQUE_MAP[phase] ?? [];
|
|
11962
11973
|
const fragments = [];
|
|
11963
|
-
for (const technique of
|
|
11974
|
+
for (const technique of techniquesForPhase) {
|
|
11964
11975
|
const filePath = join11(TECHNIQUES_DIR, `${technique}.md`);
|
|
11965
11976
|
try {
|
|
11966
11977
|
if (!existsSync10(filePath)) continue;
|
|
@@ -11969,25 +11980,10 @@ ${content}
|
|
|
11969
11980
|
fragments.push(`<technique-reference category="${technique}">
|
|
11970
11981
|
${content}
|
|
11971
11982
|
</technique-reference>`);
|
|
11972
|
-
loadedSet.add(`${technique}.md`);
|
|
11973
11983
|
}
|
|
11974
11984
|
} catch {
|
|
11975
11985
|
}
|
|
11976
11986
|
}
|
|
11977
|
-
try {
|
|
11978
|
-
const allFiles = readdirSync4(TECHNIQUES_DIR).filter((f) => f.endsWith(".md") && f !== "README.md" && !loadedSet.has(f));
|
|
11979
|
-
for (const file of allFiles) {
|
|
11980
|
-
const filePath = join11(TECHNIQUES_DIR, file);
|
|
11981
|
-
const content = readFileSync6(filePath, PROMPT_CONFIG.ENCODING);
|
|
11982
|
-
if (content) {
|
|
11983
|
-
const category = file.replace(".md", "");
|
|
11984
|
-
fragments.push(`<technique-reference category="${category}">
|
|
11985
|
-
${content}
|
|
11986
|
-
</technique-reference>`);
|
|
11987
|
-
}
|
|
11988
|
-
}
|
|
11989
|
-
} catch {
|
|
11990
|
-
}
|
|
11991
11987
|
return fragments.join("\n\n");
|
|
11992
11988
|
}
|
|
11993
11989
|
getScopeFragment() {
|
|
@@ -12113,6 +12109,8 @@ import { join as join12, dirname as dirname5 } from "path";
|
|
|
12113
12109
|
import { fileURLToPath as fileURLToPath3 } from "url";
|
|
12114
12110
|
var __dirname3 = dirname5(fileURLToPath3(import.meta.url));
|
|
12115
12111
|
var STRATEGIST_PROMPT_PATH = join12(__dirname3, "prompts", "strategist-system.md");
|
|
12112
|
+
var CACHE_TTL_MS = 3 * 60 * 1e3;
|
|
12113
|
+
var STALL_TURNS_THRESHOLD = 2;
|
|
12116
12114
|
var Strategist = class {
|
|
12117
12115
|
llm;
|
|
12118
12116
|
state;
|
|
@@ -12121,23 +12119,42 @@ var Strategist = class {
|
|
|
12121
12119
|
totalTokenCost = 0;
|
|
12122
12120
|
totalCalls = 0;
|
|
12123
12121
|
lastDirective = null;
|
|
12122
|
+
// Cooldown state (Improvement #8)
|
|
12123
|
+
lastPhase = "";
|
|
12124
|
+
turnsWithoutProgress = 0;
|
|
12125
|
+
lastFindingCount = 0;
|
|
12124
12126
|
constructor(llm, state) {
|
|
12125
12127
|
this.llm = llm;
|
|
12126
12128
|
this.state = state;
|
|
12127
12129
|
this.systemPrompt = this.loadSystemPrompt();
|
|
12128
12130
|
}
|
|
12129
12131
|
/**
|
|
12130
|
-
* Generate a
|
|
12131
|
-
* Called
|
|
12132
|
+
* Generate a strategic directive for this turn.
|
|
12133
|
+
* Called each iteration by PromptBuilder.
|
|
12134
|
+
*
|
|
12135
|
+
* COOLDOWN POLICY (Improvement #8):
|
|
12136
|
+
* Only issues a new LLM call when needed. Otherwise reuses cached directive.
|
|
12137
|
+
* Conditions for a new call: first call, phase changed, stall detected, or TTL expired.
|
|
12132
12138
|
*
|
|
12133
12139
|
* @returns Formatted directive string for prompt injection, or '' on failure
|
|
12134
12140
|
*/
|
|
12135
12141
|
async generateDirective() {
|
|
12142
|
+
this.updateProgressTracking();
|
|
12143
|
+
const shouldCall = this.shouldCallLLM();
|
|
12144
|
+
if (!shouldCall && this.lastDirective) {
|
|
12145
|
+
debugLog("general", "Strategist: reusing cached directive (cooldown active)", {
|
|
12146
|
+
age: Math.floor((Date.now() - this.lastDirective.generatedAt) / 1e3),
|
|
12147
|
+
turnsWithoutProgress: this.turnsWithoutProgress
|
|
12148
|
+
});
|
|
12149
|
+
return this.formatForPrompt(this.lastDirective, true);
|
|
12150
|
+
}
|
|
12136
12151
|
try {
|
|
12137
12152
|
const input = this.buildInput();
|
|
12138
12153
|
const directive = await this.callLLM(input);
|
|
12139
12154
|
this.lastDirective = directive;
|
|
12140
12155
|
this.totalCalls++;
|
|
12156
|
+
this.turnsWithoutProgress = 0;
|
|
12157
|
+
this.lastPhase = this.state.getPhase();
|
|
12141
12158
|
debugLog("general", "Strategist directive generated", {
|
|
12142
12159
|
tokens: directive.tokenCost,
|
|
12143
12160
|
totalCalls: this.totalCalls,
|
|
@@ -12145,7 +12162,7 @@ var Strategist = class {
|
|
|
12145
12162
|
});
|
|
12146
12163
|
return this.formatForPrompt(directive);
|
|
12147
12164
|
} catch (err) {
|
|
12148
|
-
debugLog("general", "Strategist failed \u2014 agent will proceed
|
|
12165
|
+
debugLog("general", "Strategist failed \u2014 agent will proceed with cached/no directive", {
|
|
12149
12166
|
error: String(err)
|
|
12150
12167
|
});
|
|
12151
12168
|
if (this.lastDirective?.content) {
|
|
@@ -12154,6 +12171,54 @@ var Strategist = class {
|
|
|
12154
12171
|
return "";
|
|
12155
12172
|
}
|
|
12156
12173
|
}
|
|
12174
|
+
// ─── Cooldown Logic ─────────────────────────────────────────
|
|
12175
|
+
/**
|
|
12176
|
+
* Determine whether to call the Strategist LLM this turn.
|
|
12177
|
+
*
|
|
12178
|
+
* Calls are triggered when:
|
|
12179
|
+
* 1. No cached directive exists (first call ever)
|
|
12180
|
+
* 2. Phase changed since last call (new strategic situation)
|
|
12181
|
+
* 3. Stall detected: no new findings for 2+ turns
|
|
12182
|
+
* 4. Cache TTL expired (3 minutes — directive may be outdated)
|
|
12183
|
+
*/
|
|
12184
|
+
shouldCallLLM() {
|
|
12185
|
+
if (!this.lastDirective) return true;
|
|
12186
|
+
const currentPhase = this.state.getPhase();
|
|
12187
|
+
if (currentPhase !== this.lastPhase) {
|
|
12188
|
+
debugLog("general", "Strategist: phase changed \u2014 forcing LLM call", {
|
|
12189
|
+
from: this.lastPhase,
|
|
12190
|
+
to: currentPhase
|
|
12191
|
+
});
|
|
12192
|
+
return true;
|
|
12193
|
+
}
|
|
12194
|
+
if (this.turnsWithoutProgress >= STALL_TURNS_THRESHOLD) {
|
|
12195
|
+
debugLog("general", "Strategist: stall detected \u2014 forcing LLM call", {
|
|
12196
|
+
turnsWithoutProgress: this.turnsWithoutProgress
|
|
12197
|
+
});
|
|
12198
|
+
return true;
|
|
12199
|
+
}
|
|
12200
|
+
const age = Date.now() - this.lastDirective.generatedAt;
|
|
12201
|
+
if (age >= CACHE_TTL_MS) {
|
|
12202
|
+
debugLog("general", "Strategist: cache TTL expired \u2014 forcing LLM call", {
|
|
12203
|
+
ageMs: age
|
|
12204
|
+
});
|
|
12205
|
+
return true;
|
|
12206
|
+
}
|
|
12207
|
+
return false;
|
|
12208
|
+
}
|
|
12209
|
+
/**
|
|
12210
|
+
* Update progress tracking for stall detection.
|
|
12211
|
+
* Compares current finding count to last known count.
|
|
12212
|
+
*/
|
|
12213
|
+
updateProgressTracking() {
|
|
12214
|
+
const currentFindings = this.state.getFindings().length;
|
|
12215
|
+
if (currentFindings > this.lastFindingCount) {
|
|
12216
|
+
this.turnsWithoutProgress = 0;
|
|
12217
|
+
this.lastFindingCount = currentFindings;
|
|
12218
|
+
} else {
|
|
12219
|
+
this.turnsWithoutProgress++;
|
|
12220
|
+
}
|
|
12221
|
+
}
|
|
12157
12222
|
// ─── Input Construction ─────────────────────────────────────
|
|
12158
12223
|
/**
|
|
12159
12224
|
* Build the user message for the Strategist LLM.
|
|
@@ -12233,18 +12298,19 @@ ${input}`
|
|
|
12233
12298
|
// ─── Formatting ─────────────────────────────────────────────
|
|
12234
12299
|
/**
|
|
12235
12300
|
* Format directive for injection into the attack agent's system prompt.
|
|
12301
|
+
* @param isStale - true when reusing a cached directive (cooldown) or after error
|
|
12236
12302
|
*/
|
|
12237
12303
|
formatForPrompt(directive, isStale = false) {
|
|
12238
12304
|
if (!directive.content) return "";
|
|
12239
12305
|
const age = Math.floor((Date.now() - directive.generatedAt) / MS_PER_MINUTE);
|
|
12240
|
-
const
|
|
12241
|
-
|
|
12306
|
+
const staleMark = isStale ? `
|
|
12307
|
+
[CACHED \u2014 ${age}min old. Follow unless directly contradicted by new tool output.]` : "";
|
|
12242
12308
|
return [
|
|
12243
12309
|
"<strategic-directive>",
|
|
12244
12310
|
"TACTICAL DIRECTIVE (generated by Strategist LLM \u2014 follow these priorities):",
|
|
12245
12311
|
"",
|
|
12246
12312
|
directive.content,
|
|
12247
|
-
|
|
12313
|
+
staleMark,
|
|
12248
12314
|
"</strategic-directive>"
|
|
12249
12315
|
].filter(Boolean).join("\n");
|
|
12250
12316
|
}
|
|
@@ -12263,7 +12329,7 @@ NOTE: This directive is from ${age}min ago (Strategist call failed this turn). V
|
|
|
12263
12329
|
getTotalTokenCost() {
|
|
12264
12330
|
return this.totalTokenCost;
|
|
12265
12331
|
}
|
|
12266
|
-
/** Get number of Strategist calls this session. */
|
|
12332
|
+
/** Get number of Strategist LLM calls this session (excludes cache hits). */
|
|
12267
12333
|
getTotalCalls() {
|
|
12268
12334
|
return this.totalCalls;
|
|
12269
12335
|
}
|
|
@@ -12271,11 +12337,18 @@ NOTE: This directive is from ${age}min ago (Strategist call failed this turn). V
|
|
|
12271
12337
|
getLastDirective() {
|
|
12272
12338
|
return this.lastDirective;
|
|
12273
12339
|
}
|
|
12340
|
+
/** Current stall counter (turns without new findings). */
|
|
12341
|
+
getTurnsWithoutProgress() {
|
|
12342
|
+
return this.turnsWithoutProgress;
|
|
12343
|
+
}
|
|
12274
12344
|
/** Reset strategist state (for /clear command). */
|
|
12275
12345
|
reset() {
|
|
12276
12346
|
this.lastDirective = null;
|
|
12277
12347
|
this.totalTokenCost = 0;
|
|
12278
12348
|
this.totalCalls = 0;
|
|
12349
|
+
this.lastPhase = "";
|
|
12350
|
+
this.turnsWithoutProgress = 0;
|
|
12351
|
+
this.lastFindingCount = 0;
|
|
12279
12352
|
}
|
|
12280
12353
|
};
|
|
12281
12354
|
var FALLBACK_SYSTEM_PROMPT = `You are an elite autonomous penetration testing STRATEGIST \u2014 a red team tactical commander.
|
package/dist/prompts/base.md
CHANGED
|
@@ -20,14 +20,113 @@ Speed mindset: every second without a tool call is wasted time.
|
|
|
20
20
|
|
|
21
21
|
## OODA Loop Protocol (MANDATORY)
|
|
22
22
|
|
|
23
|
-
Before calling ANY tool
|
|
24
|
-
1. **[OBSERVE]**: What
|
|
25
|
-
2. **[ORIENT]**:
|
|
26
|
-
3. **[DECIDE]**:
|
|
27
|
-
4. **[ACT]**: Call the appropriate tool(s)
|
|
23
|
+
Before calling ANY tool, structure your reasoning using this exact format:
|
|
24
|
+
1. **[OBSERVE]**: What did the last tool/Analyst summary yield? Include attackValue, suspicions, failures.
|
|
25
|
+
2. **[ORIENT]**: Kill chain position? How does this update our attack hypothesis? What's exhausted?
|
|
26
|
+
3. **[DECIDE]**: Highest-probability unexplored vector? Check Strategic Directive PRIORITY list first.
|
|
27
|
+
4. **[ACT]**: Call the appropriate tool(s). Prefer parallel calls for independent operations.
|
|
28
28
|
|
|
29
29
|
*Never blindly call tools without explicit OBSERVATION and DECISION.*
|
|
30
30
|
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Reading the ANALYST MEMO (CRITICAL — process every turn)
|
|
34
|
+
|
|
35
|
+
Every tool result contains an **Analyst LLM summary** with structured sections.
|
|
36
|
+
You MUST process these fields in your OBSERVE step:
|
|
37
|
+
|
|
38
|
+
### Attack Value → Priority Signal
|
|
39
|
+
```
|
|
40
|
+
HIGH → Drop everything. Drill deeper into this NOW. Make it PRIORITY 1.
|
|
41
|
+
MED → Queue as next action after current PRIORITY 1 completes.
|
|
42
|
+
LOW → Pursue only if nothing else available.
|
|
43
|
+
NONE → Mark vector as EXHAUSTED. Do NOT retry without a fundamentally new approach.
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Suspicious Signals → Immediate Investigation Queue
|
|
47
|
+
When Analyst lists suspicious signals:
|
|
48
|
+
1. Add each one to `update_todo` with HIGH priority immediately
|
|
49
|
+
2. If time permits THIS turn, test it — suspicious signals are often the real attack surface
|
|
50
|
+
3. Examples: unusual response timing, debug headers, verbose errors, redirect leaks
|
|
51
|
+
|
|
52
|
+
### Next Steps → Analyst SEARCH ORDERS
|
|
53
|
+
The Analyst's "Next Steps" are **mandatory search/action orders**:
|
|
54
|
+
- Execute them THIS turn or NEXT turn without exception
|
|
55
|
+
- Skip only if working memory shows the exact same approach already failed 2+ times
|
|
56
|
+
|
|
57
|
+
### Failures → Escalation Protocol
|
|
58
|
+
When Analyst reports failures:
|
|
59
|
+
```
|
|
60
|
+
1st same failure: Retry with DIFFERENT parameters (wordlist, encoding, port)
|
|
61
|
+
2nd same failure: Switch approach — fundamentally different vector
|
|
62
|
+
3rd+ same failure: web_search("{tool} {error} bypass") → apply solution
|
|
63
|
+
```
|
|
64
|
+
*A failure with different parameters is a NEW attempt, not a repeat.*
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
## Strategic Directive (MANDATORY COMPLIANCE)
|
|
69
|
+
|
|
70
|
+
When `<strategic-directive>` appears in your context:
|
|
71
|
+
|
|
72
|
+
1. **PRIORITY items = ORDERS, not suggestions.** Execute them in sequence.
|
|
73
|
+
2. **EXHAUSTED list = absolute blocklist.** NEVER attempt these vectors again this session.
|
|
74
|
+
3. **SEARCH ORDERS = mandatory web_search calls.** Execute if not already done this session.
|
|
75
|
+
4. **FALLBACK = your next action when primary fails.** Use it — don't improvise blindly.
|
|
76
|
+
5. **Conflict resolution:**
|
|
77
|
+
- Direct tool evidence contradicts directive → trust the evidence, note the discrepancy
|
|
78
|
+
- Working memory shows 2+ failures on suggested approach → use FALLBACK instead
|
|
79
|
+
- Otherwise → the directive ALWAYS wins over your own assessment
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## Examples — Correct OODA Execution
|
|
84
|
+
|
|
85
|
+
### Example 1: SQL Error → Correct Response
|
|
86
|
+
```
|
|
87
|
+
[OBSERVE]: run_cmd("curl /login -d 'user=admin'") returned "SQL syntax error near '''"
|
|
88
|
+
Analyst attackValue: HIGH | Next Steps: ["sqlmap -u /login --forms --batch"]
|
|
89
|
+
[ORIENT]: SQLi confirmed on /login POST. Kill chain: SQLi → dump → creds → shell.
|
|
90
|
+
Strategic Directive PRIORITY 1 says: "Exploit /login SQLi immediately."
|
|
91
|
+
[DECIDE]: Run sqlmap now. attackValue HIGH + Directive alignment → top priority.
|
|
92
|
+
[ACT]: run_cmd("sqlmap -u 'http://10.10.10.5/login' --forms --batch --risk=3 --level=3 --threads=5")
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Example 2: Stall Detection → Correct Pivot
|
|
96
|
+
```
|
|
97
|
+
[OBSERVE]: 3rd gobuster attempt on /admin returned 403 again. Same as turns 4 and 6.
|
|
98
|
+
Analyst attackValue: NONE | Failures: "[FILTERED] gobuster /admin → WAF blocking"
|
|
99
|
+
[ORIENT]: Directory fuzzing on /admin is EXHAUSTED (3 identical failures).
|
|
100
|
+
Working memory shows 3 consecutive failures on same vector.
|
|
101
|
+
Analyst classified as FILTERED — try bypass headers.
|
|
102
|
+
[DECIDE]: Auth bypass headers: X-Forwarded-For: 127.0.0.1, X-Original-URL: /admin
|
|
103
|
+
This is a fundamentally different approach, not a repeat.
|
|
104
|
+
[ACT]: run_cmd("curl -H 'X-Original-URL: /admin' http://10.10.10.5/")
|
|
105
|
+
run_cmd("curl -H 'X-Forwarded-For: 127.0.0.1' http://10.10.10.5/admin")
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Example 3: HIGH attackValue → Correct Drill-Down
|
|
109
|
+
```
|
|
110
|
+
[OBSERVE]: Analyst on ssh-audit output: attackValue: HIGH
|
|
111
|
+
"SSH accepts CBC mode ciphers (CVE-2008-5161) + user enumeration via timing"
|
|
112
|
+
Next Steps: ["Test SSH user enum: use timing attack to enumerate valid users"]
|
|
113
|
+
[ORIENT]: SSH is a HIGH value target. Kill chain: user enum → brute force → shell.
|
|
114
|
+
Strategic Directive PRIORITY 2 confirms SSH exploitation path.
|
|
115
|
+
[DECIDE]: Enumerate users first, then targeted brute force with found usernames.
|
|
116
|
+
[ACT]: web_search("ssh-audit CVE-2008-5161 exploit PoC")
|
|
117
|
+
run_cmd("ssh-audit --timeout=10 10.10.10.5", background: true)
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Example 4: EXHAUSTED List Application
|
|
121
|
+
```
|
|
122
|
+
[OBSERVE]: Strategic Directive EXHAUSTED list: "FTP anonymous login — connection refused (port filtered)"
|
|
123
|
+
[ORIENT]: FTP is confirmed dead. No need to test. Skip entirely.
|
|
124
|
+
[DECIDE]: Focus on HTTP (port 80) — not in EXHAUSTED list, not yet tested.
|
|
125
|
+
[ACT]: run_cmd("whatweb http://10.10.10.5") — start web fingerprinting
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
31
130
|
## Absolute Rules
|
|
32
131
|
|
|
33
132
|
### 0. ⚠️ LOCAL FILE PATHS — ALWAYS USE `.pentesting/workspace/`
|
|
@@ -56,10 +155,20 @@ You are prone to imagining non-existent tool flags or incorrect syntax for compl
|
|
|
56
155
|
- `add_finding` — immediately when vulnerability confirmed (if reproducible, record it NOW)
|
|
57
156
|
- `add_target` — new host or service discovered
|
|
58
157
|
- `add_loot` — credentials, tokens, keys, hashes found
|
|
59
|
-
- `update_phase` — when activity changes (
|
|
158
|
+
- `update_phase` — when activity changes (see Phase Transition Signals below)
|
|
60
159
|
|
|
61
160
|
Self-check every turn: Did I find a vuln but not call `add_finding`? Call it now.
|
|
62
161
|
|
|
162
|
+
### 2.5. Phase Transition Signals — When to Call `update_phase`
|
|
163
|
+
```
|
|
164
|
+
RECON → vuln_analysis: 3+ services fingerprinted with versions confirmed
|
|
165
|
+
vuln_analysis → exploit: 1+ finding (confidence ≥ 50) with exploit path identified
|
|
166
|
+
exploit → post_exploitation: Shell obtained AND promoted (active_shell process active)
|
|
167
|
+
post_exploitation → lateral: root/SYSTEM achieved on current host
|
|
168
|
+
ANY_PHASE → report: All targets compromised OR time is up
|
|
169
|
+
```
|
|
170
|
+
**NEVER transition away from a phase while HIGH-priority vectors remain untested.**
|
|
171
|
+
|
|
63
172
|
### 3. ask_user Rules
|
|
64
173
|
|
|
65
174
|
Use received values immediately. Never ask for the same thing twice.
|
|
@@ -124,10 +233,12 @@ Writing code is not a fallback. It's your primary weapon.
|
|
|
124
233
|
- Automate multi-step attacks
|
|
125
234
|
- Iterate: `write_file` → `run_cmd` → observe error → fix → repeat
|
|
126
235
|
|
|
127
|
-
##
|
|
236
|
+
## Shell Lifecycle (SINGLE SOURCE — referenced by exploit.md and post.md)
|
|
237
|
+
|
|
238
|
+
### Processes = Operational Assets
|
|
128
239
|
|
|
129
240
|
| Role | Meaning |
|
|
130
|
-
|------|---------|
|
|
241
|
+
|------|---------|
|
|
131
242
|
| `listener` 👂 | Waiting for connection — start before attack |
|
|
132
243
|
| `active_shell` 🐚 | **Target shell — top priority, never terminate** |
|
|
133
244
|
| `server` 📡 | File serving — clean up after use |
|
|
@@ -136,9 +247,8 @@ Writing code is not a fallback. It's your primary weapon.
|
|
|
136
247
|
**Reverse shell flow**: start listener → exploit → check status → `promote` on connection
|
|
137
248
|
→ `interact` to execute commands → upgrade shell → post-exploit through it.
|
|
138
249
|
|
|
139
|
-
|
|
250
|
+
### On Getting a Shell — Immediate Actions
|
|
140
251
|
|
|
141
|
-
On getting a shell, immediately:
|
|
142
252
|
1. Detect type: `echo $TERM && tty && echo $SHELL`
|
|
143
253
|
- `dumb` or `tty: not a tty` → upgrade required
|
|
144
254
|
- `xterm` + `/dev/pts/X` → good
|
|
@@ -151,12 +261,11 @@ On getting a shell, immediately:
|
|
|
151
261
|
|
|
152
262
|
3. **Protect the shell** — never terminate needlessly. On drop: reuse backdoor/web shell/re-exploit.
|
|
153
263
|
|
|
154
|
-
### Process Management
|
|
155
|
-
|
|
156
|
-
- Never terminate `active_shell`
|
|
264
|
+
### Process Management Rules
|
|
265
|
+
- **Never terminate `active_shell`**
|
|
157
266
|
- Clean up servers/sniffers after task completion
|
|
158
267
|
- Port conflict → switch port, update_mission with new port
|
|
159
|
-
- `bg_process stop_all` on task completion
|
|
268
|
+
- `bg_process stop_all` on task completion only
|
|
160
269
|
|
|
161
270
|
## Mission Context
|
|
162
271
|
|
|
@@ -180,8 +289,10 @@ Record parallel processes in checklist (e.g., "🔍 [bg_xxx] Port scan in progre
|
|
|
180
289
|
1. Active shell available? → use it
|
|
181
290
|
2. Shell is dumb? → upgrade
|
|
182
291
|
3. Unnecessary processes? → stop
|
|
183
|
-
4. Stuck? → search + different vector
|
|
292
|
+
4. Stuck? → check Strategic Directive FALLBACK first, then search + different vector
|
|
184
293
|
5. Repeating same method 2+ times? → switch immediately
|
|
294
|
+
6. Analyst said attackValue HIGH? → is it PRIORITY 1?
|
|
295
|
+
7. Any suspicions from last Analyst memo not yet tested? → add to TODO now
|
|
185
296
|
|
|
186
297
|
## Output Format
|
|
187
298
|
|
package/dist/prompts/exploit.md
CHANGED
|
@@ -68,23 +68,12 @@ Connection received but drops immediately?
|
|
|
68
68
|
└── EOFError → stdin not properly redirected, try different reverse shell variant
|
|
69
69
|
```
|
|
70
70
|
|
|
71
|
-
## 🐚 Shell Stabilization —
|
|
72
|
-
|
|
73
|
-
After receiving any shell, **immediately** follow base.md "Shell Lifecycle Mastery" protocol:
|
|
74
|
-
|
|
75
|
-
### Upgrade Priority Order:
|
|
76
|
-
```
|
|
77
|
-
1. Python PTY → python3 -c 'import pty;pty.spawn("/bin/bash")' + Ctrl+Z + stty raw -echo; fg
|
|
78
|
-
2. Script → script -qc /bin/bash /dev/null + Ctrl+Z + stty raw -echo; fg
|
|
79
|
-
3. Socat → upload socat binary, connect with full PTY
|
|
80
|
-
4. rlwrap → restart listener with rlwrap nc -lvnp PORT (readline support)
|
|
81
|
-
5. SSH back-connect → plant SSH key on target, connect back via SSH
|
|
82
|
-
6. pwncat → use pwncat-cs for auto-upgrade + features
|
|
83
|
-
7. ConPTY → Windows full interactive shell
|
|
84
|
-
```
|
|
71
|
+
## 🐚 Shell Stabilization — See base.md "Shell Lifecycle"
|
|
85
72
|
|
|
73
|
+
After receiving any shell, **immediately** follow the PTY upgrade order in base.md.
|
|
86
74
|
**Without a proper TTY:** sudo, su, ssh, screen, vim won't work. Upgrade is MANDATORY.
|
|
87
75
|
|
|
76
|
+
|
|
88
77
|
## 🔗 Exploit Chaining — Combine Vulnerabilities
|
|
89
78
|
|
|
90
79
|
Think in chains, not individual exploits. **Every vulnerability is a stepping stone to the next.**
|
|
@@ -11,45 +11,22 @@ This playbook drives **aggressive exploitation, time-aware strategy, and proof c
|
|
|
11
11
|
- Multiple proofs per target are common — **keep hunting after the first**
|
|
12
12
|
- **Environment variables** and **database entries** often contain flags/secrets
|
|
13
13
|
|
|
14
|
-
## ⏱️ Time Management
|
|
14
|
+
## ⏱️ Time Management — Follow Strategist's time-strategy
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
The `<time-strategy>` tag in your context contains exact time pressure and phase directives.
|
|
17
|
+
**Always read and follow it — it overrides any fixed-duration assumptions.**
|
|
17
18
|
|
|
19
|
+
Quick reference (use time-strategy for exact numbers):
|
|
18
20
|
```
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
├── Check for low-hanging fruit: default creds, exposed files, known CVEs
|
|
24
|
-
└── Record ALL findings → update_mission immediately
|
|
25
|
-
|
|
26
|
-
10-30 MINUTES (Targeted Attack):
|
|
27
|
-
├── Focus on highest-probability attack vector
|
|
28
|
-
├── Version+service → web_search("{service} {version} exploit CVE") IMMEDIATELY
|
|
29
|
-
├── Web: directory fuzzing + injection probes in parallel
|
|
30
|
-
├── Credential brute force on login services (hydra + rockyou.txt in background)
|
|
31
|
-
├── If stuck after 15 min on one vector → SWITCH to next
|
|
32
|
-
└── Background: hash cracking, brute force if applicable
|
|
33
|
-
|
|
34
|
-
30-60 MINUTES (Deep Exploitation):
|
|
35
|
-
├── Chain findings: LFI→RCE, SQLi→file write→shell, SSRF→internal
|
|
36
|
-
├── Custom exploit development: write_file → run_cmd
|
|
37
|
-
├── Source code analysis if .git, .bak, .swp found
|
|
38
|
-
└── Multiple attack paths simultaneously (background processes)
|
|
39
|
-
|
|
40
|
-
60+ MINUTES (Pivot & Escalate):
|
|
41
|
-
├── Privilege escalation: ALL categories systematically
|
|
42
|
-
├── Lateral movement if internal network exists
|
|
43
|
-
├── Creative hunting: unusual files, hidden services, config secrets
|
|
44
|
-
└── Re-examine ALL earlier findings with new context/access
|
|
21
|
+
SPRINT (0-25%): Broad recon, parallel scans, identify all attack surfaces
|
|
22
|
+
EXPLOIT (25-50%): Focus on top-3 highest-scoring surfaces. Quick wins only.
|
|
23
|
+
CREATIVE (50-75%): Chained exploits, custom tools. If stuck >5min → switch.
|
|
24
|
+
HARVEST (75-100%): Stop exploring. Exploit what you HAVE. Collect all proof.
|
|
45
25
|
```
|
|
46
26
|
|
|
47
27
|
### Time-Boxing Rule
|
|
48
28
|
**If stuck on ONE vector for more than 15 minutes → SWITCH.**
|
|
49
|
-
|
|
50
|
-
- Move to next highest-probability vector
|
|
51
|
-
- Come back later with new information/tools
|
|
52
|
-
- **Never tunnel-vision on a single approach**
|
|
29
|
+
Record what you tried in `update_mission`. Move to next priority. Come back with new context.
|
|
53
30
|
|
|
54
31
|
## 🧠 Challenge & Target Quick-Start Protocols
|
|
55
32
|
|
|
@@ -21,8 +21,8 @@ You are a developer AND a hacker. Coding is your superpower.
|
|
|
21
21
|
**Question everything**: Why is this port open? What data flows through this connection?
|
|
22
22
|
What shortcuts did the admin take? What systems depend on this one? Follow every question.
|
|
23
23
|
|
|
24
|
-
## Tactical Reasoning (
|
|
25
|
-
Your thought process must be visible.
|
|
24
|
+
## Tactical Reasoning — OODA (See base.md for full protocol)
|
|
25
|
+
Your thought process must be visible. Before each tool call: OBSERVE what changed, ORIENT on the kill chain, DECIDE the next attack, ACT with the right tool.
|
|
26
26
|
|
|
27
27
|
## Kill Chain Position — Know Where You Are
|
|
28
28
|
|
|
@@ -33,16 +33,11 @@ External Recon → Service Discovery → Vuln ID → Initial Access → Shell St
|
|
|
33
33
|
|
|
34
34
|
Know your position before every turn. Act accordingly.
|
|
35
35
|
|
|
36
|
-
## After First Shell —
|
|
36
|
+
## After First Shell — See base.md "Shell Lifecycle" + post.md pipeline
|
|
37
37
|
|
|
38
|
-
1. Shell stabilization (PTY upgrade
|
|
39
|
-
2.
|
|
40
|
-
3.
|
|
41
|
-
4. Credential hunting: `.bash_history`, `.ssh/`, config files, DB connection strings
|
|
42
|
-
5. Network mapping: `ip route`, `/etc/hosts`, ARP, internal services
|
|
43
|
-
6. Privesc path exploration → on success, repeat from step 2 with new privileges
|
|
44
|
-
7. Lateral movement: SSH key reuse, credential spray, internal service access
|
|
45
|
-
8. New targets discovered → `add_target` → full recon restart
|
|
38
|
+
1. Shell stabilization (PTY upgrade per base.md)
|
|
39
|
+
2. Immediate awareness + privesc enumeration (post.md pipeline)
|
|
40
|
+
3. Credential harvest + lateral movement + persistence
|
|
46
41
|
|
|
47
42
|
## Decision Forks — Never Give Up
|
|
48
43
|
|
|
@@ -269,3 +269,39 @@ Cloud/Container:
|
|
|
269
269
|
├─ ❌ Generic reconnaissance → ✅ Targeted recon with specific goals
|
|
270
270
|
└─ ❌ "I recommend..." or "You should consider..." → ✅ Direct imperative: "Run: ..."
|
|
271
271
|
```
|
|
272
|
+
|
|
273
|
+
### Rule 11: PHASE TRANSITION SIGNALS
|
|
274
|
+
```
|
|
275
|
+
ORDER update_phase when these conditions are met:
|
|
276
|
+
|
|
277
|
+
recon → vuln_analysis:
|
|
278
|
+
├─ 3+ services fingerprinted with exact versions confirmed
|
|
279
|
+
├─ OSINT complete (shodan/github/crt.sh checked)
|
|
280
|
+
└─ Web surface mapped (get_web_attack_surface called if HTTP found)
|
|
281
|
+
|
|
282
|
+
vuln_analysis → exploit:
|
|
283
|
+
├─ 1+ finding with confidence ≥ 50 AND a concrete exploit path identified
|
|
284
|
+
├─ Specific CVE confirmed applicable (version matches, PoC available)
|
|
285
|
+
└─ Or: critical misconfiguration found (default creds, exposed .env, anon access)
|
|
286
|
+
|
|
287
|
+
exploit → post_exploitation:
|
|
288
|
+
├─ Shell obtained AND promoted (active_shell process is running)
|
|
289
|
+
├─ Interactive commands confirmed working via bg_process interact
|
|
290
|
+
└─ Shell stabilized (PTY upgrade attempted)
|
|
291
|
+
|
|
292
|
+
post_exploitation → lateral:
|
|
293
|
+
├─ root or SYSTEM access achieved on current host
|
|
294
|
+
├─ Additional network segments discovered (new /24 subnet, internal services)
|
|
295
|
+
└─ Or: domain credentials obtained (AD context)
|
|
296
|
+
|
|
297
|
+
ANY phase → report:
|
|
298
|
+
├─ All high-priority targets compromised
|
|
299
|
+
├─ Time remaining < 10% of total engagement time
|
|
300
|
+
└─ Or: scope exhausted (all vectors tried, no new surface)
|
|
301
|
+
|
|
302
|
+
CRITICAL RULES:
|
|
303
|
+
├─ NEVER order phase transition while HIGH or CRITICAL priority vectors remain untested
|
|
304
|
+
├─ Phase transitions do NOT prevent using tools from previous phases
|
|
305
|
+
├─ If recon yields nothing after 10 min → still transition to vuln_analysis and probe
|
|
306
|
+
└─ If stuck in a phase > 5 turns with no progress → evaluate if transition is needed
|
|
307
|
+
```
|
package/dist/prompts/strategy.md
CHANGED
|
@@ -36,14 +36,14 @@ TIER 4 — Last resort:
|
|
|
36
36
|
Patch diffing · Race conditions · Supply chain analysis
|
|
37
37
|
```
|
|
38
38
|
|
|
39
|
-
## Every-Turn Decision Flow
|
|
39
|
+
## Every-Turn Decision Flow — Use OODA from base.md
|
|
40
40
|
|
|
41
|
-
|
|
42
|
-
1.
|
|
43
|
-
2. Highest-probability unexplored surface from
|
|
41
|
+
During the ORIENT/DECIDE steps of base.md's OODA protocol, check:
|
|
42
|
+
1. Strategic Directive PRIORITY list — what did Strategist order first?
|
|
43
|
+
2. Highest-probability unexplored surface from the matrix below?
|
|
44
44
|
3. Have I searched for attacks on EVERY discovered service? → if not, search NOW
|
|
45
|
-
4. Can I chain existing findings?
|
|
46
|
-
5. Stuck 15+ min? → switch approach immediately
|
|
45
|
+
4. Can I chain existing findings? (check attack-intelligence in context)
|
|
46
|
+
5. Stuck 15+ min? → switch approach immediately, refer to FALLBACK in directive
|
|
47
47
|
|
|
48
48
|
## Service Intelligence Protocol
|
|
49
49
|
|