pentesting 0.54.0 → 0.55.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -342,7 +342,7 @@ var ORPHAN_PROCESS_NAMES = [
342
342
 
343
343
  // src/shared/constants/agent.ts
344
344
  var APP_NAME = "Pentest AI";
345
- var APP_VERSION = "0.54.0";
345
+ var APP_VERSION = "0.55.0";
346
346
  var APP_DESCRIPTION = "Autonomous Penetration Testing AI Agent";
347
347
  var LLM_ROLES = {
348
348
  SYSTEM: "system",
@@ -10488,9 +10488,20 @@ FORMAT YOUR RESPONSE EXACTLY LIKE THIS:
10488
10488
  - (write "None identified" if none)
10489
10489
 
10490
10490
  ## Failures/Errors
10491
- - [what was attempted and FAILED \u2014 include the FULL command, wordlist, target, and the reason WHY it failed]
10492
- - [e.g.: "SSH brute force: hydra -l admin -P /usr/share/wordlists/rockyou.txt ssh://10.0.0.1 \u2014 connection refused (port filtered)"]
10493
- - [e.g.: "SQLi on /login with sqlmap --tamper=space2comment \u2014 input sanitized, WAF detected (ModSecurity)"]
10491
+ Classify EVERY failure using one of these types. Format: [TYPE] exact_command \u2192 why_failed \u2192 recommended_pivot
10492
+
10493
+ Failure types:
10494
+ - [FILTERED]: WAF/IDS/firewall blocked \u2192 suggest: encoding bypass, payload_mutate, different protocol/port
10495
+ - [WRONG_VECTOR]: Vulnerability not present here \u2192 suggest: pivot to different vuln class entirely
10496
+ - [AUTH_REQUIRED]: Credential or session needed first \u2192 suggest: brute force login or find creds in config files
10497
+ - [TOOL_ERROR]: Command syntax error, missing dep, or tool bug \u2192 suggest: run --help, use alternative tool
10498
+ - [TIMEOUT]: Service too slow or connection timed out \u2192 suggest: increase timeout, reduce scope, or use background mode
10499
+ - [PATCHED]: CVE/technique exists but target is patched \u2192 suggest: search bypass or newer CVE on same service
10500
+
10501
+ Examples:
10502
+ - "[FILTERED] sqlmap -u /login --tamper=space2comment \u2192 ModSecurity WAF, blocking all payloads \u2192 try charencode,randomcase tampers or payload_mutate"
10503
+ - "[AUTH_REQUIRED] curl http://target/admin \u2192 HTTP 401 Basic Auth \u2192 hydra -l admin -P rockyou.txt http-get://target/admin"
10504
+ - "[TIMEOUT] nmap -sV -p- target --min-rate=5000 \u2192 timed out 5min \u2192 rustscan first, then targeted nmap on found ports"
10494
10505
  - (write "No failures" if everything succeeded)
10495
10506
 
10496
10507
  ## Suspicious Signals
@@ -10510,7 +10521,7 @@ FORMAT YOUR RESPONSE EXACTLY LIKE THIS:
10510
10521
 
10511
10522
  RULES:
10512
10523
  - Include EXACT values: port numbers, versions, usernames, file paths, IPs, full commands used
10513
- - For failures: include the COMPLETE command with all flags, wordlists, and targets \u2014 "brute force failed" alone is USELESS
10524
+ - For failures: ALWAYS classify with [TYPE] \u2014 "brute force failed" alone is USELESS. Include full command.
10514
10525
  - Look for the UNEXPECTED \u2014 non-standard ports, unusual banners, timing anomalies, error leaks
10515
10526
  - Credentials include: passwords, hashes, API keys, tokens, private keys, cookies, session IDs
10516
10527
  - Flag any information disclosure: server versions, internal paths, stack traces, debug output
@@ -11313,7 +11324,7 @@ Suggested Action: ${errorInfo.suggestedAction || actionHints[errorInfo.type] ||
11313
11324
  };
11314
11325
 
11315
11326
  // src/agents/prompt-builder.ts
11316
- import { readFileSync as readFileSync6, existsSync as existsSync10, readdirSync as readdirSync4 } from "fs";
11327
+ import { readFileSync as readFileSync6, existsSync as existsSync10 } from "fs";
11317
11328
  import { join as join11, dirname as dirname4 } from "path";
11318
11329
  import { fileURLToPath as fileURLToPath2 } from "url";
11319
11330
 
@@ -11816,12 +11827,12 @@ var PHASE_TECHNIQUE_MAP = {
11816
11827
  [PHASES.EXPLOIT]: ["injection", "shells", "file-attacks", "network-svc", "pwn", "container-escape", "reversing"],
11817
11828
  [PHASES.POST_EXPLOIT]: ["privesc", "lateral", "auth-access", "shells", "container-escape", "forensics"],
11818
11829
  [PHASES.PRIV_ESC]: ["privesc", "auth-access", "shells", "pwn", "container-escape"],
11819
- [PHASES.LATERAL]: ["lateral", "ad-attack", "auth-access", "container-escape"],
11820
- [PHASES.PERSISTENCE]: ["shells", "privesc"],
11830
+ [PHASES.LATERAL]: ["lateral", "ad-attack", "auth-access", "container-escape", "network-svc"],
11831
+ [PHASES.PERSISTENCE]: ["shells", "privesc", "lateral"],
11821
11832
  [PHASES.EXFIL]: ["lateral", "network-svc", "forensics"],
11822
- [PHASES.WEB]: ["injection", "file-attacks", "auth-access", "crypto"],
11833
+ [PHASES.WEB]: ["injection", "file-attacks", "auth-access", "crypto", "shells"],
11823
11834
  [PHASES.REPORT]: []
11824
- // Report phase needs no attack techniques
11835
+ // Report phase: no attack techniques needed
11825
11836
  };
11826
11837
  var PromptBuilder = class {
11827
11838
  state;
@@ -11947,20 +11958,20 @@ ${content}
11947
11958
  /**
11948
11959
  * Load technique files relevant to the current phase.
11949
11960
  *
11950
- * Loading strategy (Philosophy §11zero-code extension):
11951
- * 1. PHASE_TECHNIQUE_MAP defines priority techniques per phase (loaded first)
11952
- * 2. Any .md file in techniques/ NOT in the map is auto-discovered and loaded
11953
- * as general reference NO code change needed to add new techniques.
11961
+ * Loading strategy (Improvement #7explicit phase mapping, no auto-discovery):
11962
+ * 1. PHASE_TECHNIQUE_MAP defines EXACTLY which techniques load per phase.
11963
+ * 2. Auto-discovery is DISABLED to prevent irrelevant technique loading
11964
+ * (e.g., pwn.md 18K in RECON phase, forensics.md 16K in REPORT phase).
11965
+ * 3. To add a new technique: add the file to techniques/ AND add it to
11966
+ * the relevant phase entries in PHASE_TECHNIQUE_MAP above.
11954
11967
  *
11955
- * The map is an optimization (priority ordering), not a gate.
11956
- * "Drop a markdown file in the folder, PromptBuilder auto-discovers and loads it."
11968
+ * Token savings: ~5-15K per turn vs unrestricted auto-discovery.
11957
11969
  */
11958
11970
  loadPhaseRelevantTechniques(phase) {
11959
11971
  if (!existsSync10(TECHNIQUES_DIR)) return "";
11960
- const priorityTechniques = PHASE_TECHNIQUE_MAP[phase] || [];
11961
- const loadedSet = /* @__PURE__ */ new Set();
11972
+ const techniquesForPhase = PHASE_TECHNIQUE_MAP[phase] ?? [];
11962
11973
  const fragments = [];
11963
- for (const technique of priorityTechniques) {
11974
+ for (const technique of techniquesForPhase) {
11964
11975
  const filePath = join11(TECHNIQUES_DIR, `${technique}.md`);
11965
11976
  try {
11966
11977
  if (!existsSync10(filePath)) continue;
@@ -11969,25 +11980,10 @@ ${content}
11969
11980
  fragments.push(`<technique-reference category="${technique}">
11970
11981
  ${content}
11971
11982
  </technique-reference>`);
11972
- loadedSet.add(`${technique}.md`);
11973
11983
  }
11974
11984
  } catch {
11975
11985
  }
11976
11986
  }
11977
- try {
11978
- const allFiles = readdirSync4(TECHNIQUES_DIR).filter((f) => f.endsWith(".md") && f !== "README.md" && !loadedSet.has(f));
11979
- for (const file of allFiles) {
11980
- const filePath = join11(TECHNIQUES_DIR, file);
11981
- const content = readFileSync6(filePath, PROMPT_CONFIG.ENCODING);
11982
- if (content) {
11983
- const category = file.replace(".md", "");
11984
- fragments.push(`<technique-reference category="${category}">
11985
- ${content}
11986
- </technique-reference>`);
11987
- }
11988
- }
11989
- } catch {
11990
- }
11991
11987
  return fragments.join("\n\n");
11992
11988
  }
11993
11989
  getScopeFragment() {
@@ -12113,6 +12109,8 @@ import { join as join12, dirname as dirname5 } from "path";
12113
12109
  import { fileURLToPath as fileURLToPath3 } from "url";
12114
12110
  var __dirname3 = dirname5(fileURLToPath3(import.meta.url));
12115
12111
  var STRATEGIST_PROMPT_PATH = join12(__dirname3, "prompts", "strategist-system.md");
12112
+ var CACHE_TTL_MS = 3 * 60 * 1e3;
12113
+ var STALL_TURNS_THRESHOLD = 2;
12116
12114
  var Strategist = class {
12117
12115
  llm;
12118
12116
  state;
@@ -12121,23 +12119,42 @@ var Strategist = class {
12121
12119
  totalTokenCost = 0;
12122
12120
  totalCalls = 0;
12123
12121
  lastDirective = null;
12122
+ // Cooldown state (Improvement #8)
12123
+ lastPhase = "";
12124
+ turnsWithoutProgress = 0;
12125
+ lastFindingCount = 0;
12124
12126
  constructor(llm, state) {
12125
12127
  this.llm = llm;
12126
12128
  this.state = state;
12127
12129
  this.systemPrompt = this.loadSystemPrompt();
12128
12130
  }
12129
12131
  /**
12130
- * Generate a fresh strategic directive for this turn.
12131
- * Called every iteration by PromptBuilder.
12132
+ * Generate a strategic directive for this turn.
12133
+ * Called each iteration by PromptBuilder.
12134
+ *
12135
+ * COOLDOWN POLICY (Improvement #8):
12136
+ * Only issues a new LLM call when needed. Otherwise reuses cached directive.
12137
+ * Conditions for a new call: first call, phase changed, stall detected, or TTL expired.
12132
12138
  *
12133
12139
  * @returns Formatted directive string for prompt injection, or '' on failure
12134
12140
  */
12135
12141
  async generateDirective() {
12142
+ this.updateProgressTracking();
12143
+ const shouldCall = this.shouldCallLLM();
12144
+ if (!shouldCall && this.lastDirective) {
12145
+ debugLog("general", "Strategist: reusing cached directive (cooldown active)", {
12146
+ age: Math.floor((Date.now() - this.lastDirective.generatedAt) / 1e3),
12147
+ turnsWithoutProgress: this.turnsWithoutProgress
12148
+ });
12149
+ return this.formatForPrompt(this.lastDirective, true);
12150
+ }
12136
12151
  try {
12137
12152
  const input = this.buildInput();
12138
12153
  const directive = await this.callLLM(input);
12139
12154
  this.lastDirective = directive;
12140
12155
  this.totalCalls++;
12156
+ this.turnsWithoutProgress = 0;
12157
+ this.lastPhase = this.state.getPhase();
12141
12158
  debugLog("general", "Strategist directive generated", {
12142
12159
  tokens: directive.tokenCost,
12143
12160
  totalCalls: this.totalCalls,
@@ -12145,7 +12162,7 @@ var Strategist = class {
12145
12162
  });
12146
12163
  return this.formatForPrompt(directive);
12147
12164
  } catch (err) {
12148
- debugLog("general", "Strategist failed \u2014 agent will proceed without directive", {
12165
+ debugLog("general", "Strategist failed \u2014 agent will proceed with cached/no directive", {
12149
12166
  error: String(err)
12150
12167
  });
12151
12168
  if (this.lastDirective?.content) {
@@ -12154,6 +12171,54 @@ var Strategist = class {
12154
12171
  return "";
12155
12172
  }
12156
12173
  }
12174
+ // ─── Cooldown Logic ─────────────────────────────────────────
12175
+ /**
12176
+ * Determine whether to call the Strategist LLM this turn.
12177
+ *
12178
+ * Calls are triggered when:
12179
+ * 1. No cached directive exists (first call ever)
12180
+ * 2. Phase changed since last call (new strategic situation)
12181
+ * 3. Stall detected: no new findings for 2+ turns
12182
+ * 4. Cache TTL expired (3 minutes — directive may be outdated)
12183
+ */
12184
+ shouldCallLLM() {
12185
+ if (!this.lastDirective) return true;
12186
+ const currentPhase = this.state.getPhase();
12187
+ if (currentPhase !== this.lastPhase) {
12188
+ debugLog("general", "Strategist: phase changed \u2014 forcing LLM call", {
12189
+ from: this.lastPhase,
12190
+ to: currentPhase
12191
+ });
12192
+ return true;
12193
+ }
12194
+ if (this.turnsWithoutProgress >= STALL_TURNS_THRESHOLD) {
12195
+ debugLog("general", "Strategist: stall detected \u2014 forcing LLM call", {
12196
+ turnsWithoutProgress: this.turnsWithoutProgress
12197
+ });
12198
+ return true;
12199
+ }
12200
+ const age = Date.now() - this.lastDirective.generatedAt;
12201
+ if (age >= CACHE_TTL_MS) {
12202
+ debugLog("general", "Strategist: cache TTL expired \u2014 forcing LLM call", {
12203
+ ageMs: age
12204
+ });
12205
+ return true;
12206
+ }
12207
+ return false;
12208
+ }
12209
+ /**
12210
+ * Update progress tracking for stall detection.
12211
+ * Compares current finding count to last known count.
12212
+ */
12213
+ updateProgressTracking() {
12214
+ const currentFindings = this.state.getFindings().length;
12215
+ if (currentFindings > this.lastFindingCount) {
12216
+ this.turnsWithoutProgress = 0;
12217
+ this.lastFindingCount = currentFindings;
12218
+ } else {
12219
+ this.turnsWithoutProgress++;
12220
+ }
12221
+ }
12157
12222
  // ─── Input Construction ─────────────────────────────────────
12158
12223
  /**
12159
12224
  * Build the user message for the Strategist LLM.
@@ -12233,18 +12298,19 @@ ${input}`
12233
12298
  // ─── Formatting ─────────────────────────────────────────────
12234
12299
  /**
12235
12300
  * Format directive for injection into the attack agent's system prompt.
12301
+ * @param isStale - true when reusing a cached directive (cooldown) or after error
12236
12302
  */
12237
12303
  formatForPrompt(directive, isStale = false) {
12238
12304
  if (!directive.content) return "";
12239
12305
  const age = Math.floor((Date.now() - directive.generatedAt) / MS_PER_MINUTE);
12240
- const staleWarning = isStale ? `
12241
- NOTE: This directive is from ${age}min ago (Strategist call failed this turn). Verify assumptions are still valid.` : "";
12306
+ const staleMark = isStale ? `
12307
+ [CACHED \u2014 ${age}min old. Follow unless directly contradicted by new tool output.]` : "";
12242
12308
  return [
12243
12309
  "<strategic-directive>",
12244
12310
  "TACTICAL DIRECTIVE (generated by Strategist LLM \u2014 follow these priorities):",
12245
12311
  "",
12246
12312
  directive.content,
12247
- staleWarning,
12313
+ staleMark,
12248
12314
  "</strategic-directive>"
12249
12315
  ].filter(Boolean).join("\n");
12250
12316
  }
@@ -12263,7 +12329,7 @@ NOTE: This directive is from ${age}min ago (Strategist call failed this turn). V
12263
12329
  getTotalTokenCost() {
12264
12330
  return this.totalTokenCost;
12265
12331
  }
12266
- /** Get number of Strategist calls this session. */
12332
+ /** Get number of Strategist LLM calls this session (excludes cache hits). */
12267
12333
  getTotalCalls() {
12268
12334
  return this.totalCalls;
12269
12335
  }
@@ -12271,11 +12337,18 @@ NOTE: This directive is from ${age}min ago (Strategist call failed this turn). V
12271
12337
  getLastDirective() {
12272
12338
  return this.lastDirective;
12273
12339
  }
12340
+ /** Current stall counter (turns without new findings). */
12341
+ getTurnsWithoutProgress() {
12342
+ return this.turnsWithoutProgress;
12343
+ }
12274
12344
  /** Reset strategist state (for /clear command). */
12275
12345
  reset() {
12276
12346
  this.lastDirective = null;
12277
12347
  this.totalTokenCost = 0;
12278
12348
  this.totalCalls = 0;
12349
+ this.lastPhase = "";
12350
+ this.turnsWithoutProgress = 0;
12351
+ this.lastFindingCount = 0;
12279
12352
  }
12280
12353
  };
12281
12354
  var FALLBACK_SYSTEM_PROMPT = `You are an elite autonomous penetration testing STRATEGIST \u2014 a red team tactical commander.
@@ -20,14 +20,113 @@ Speed mindset: every second without a tool call is wasted time.
20
20
 
21
21
  ## OODA Loop Protocol (MANDATORY)
22
22
 
23
- Before calling ANY tool or taking action, you MUST structure your reasoning process using this exact OODA format:
24
- 1. **[OBSERVE]**: What concrete info did the last command yield? (Errors, ports, paths)
25
- 2. **[ORIENT]**: Where are we in the kill chain? How does this update our attack hypothesis?
26
- 3. **[DECIDE]**: What is the most promising next step? Why?
27
- 4. **[ACT]**: Call the appropriate tool(s) to execute this step.
23
+ Before calling ANY tool, structure your reasoning using this exact format:
24
+ 1. **[OBSERVE]**: What did the last tool/Analyst summary yield? Include attackValue, suspicions, failures.
25
+ 2. **[ORIENT]**: Kill chain position? How does this update our attack hypothesis? What's exhausted?
26
+ 3. **[DECIDE]**: Highest-probability unexplored vector? Check Strategic Directive PRIORITY list first.
27
+ 4. **[ACT]**: Call the appropriate tool(s). Prefer parallel calls for independent operations.
28
28
 
29
29
  *Never blindly call tools without explicit OBSERVATION and DECISION.*
30
30
 
31
+ ---
32
+
33
+ ## Reading the ANALYST MEMO (CRITICAL — process every turn)
34
+
35
+ Every tool result contains an **Analyst LLM summary** with structured sections.
36
+ You MUST process these fields in your OBSERVE step:
37
+
38
+ ### Attack Value → Priority Signal
39
+ ```
40
+ HIGH → Drop everything. Drill deeper into this NOW. Make it PRIORITY 1.
41
+ MED → Queue as next action after current PRIORITY 1 completes.
42
+ LOW → Pursue only if nothing else available.
43
+ NONE → Mark vector as EXHAUSTED. Do NOT retry without a fundamentally new approach.
44
+ ```
45
+
46
+ ### Suspicious Signals → Immediate Investigation Queue
47
+ When Analyst lists suspicious signals:
48
+ 1. Add each one to `update_todo` with HIGH priority immediately
49
+ 2. If time permits THIS turn, test it — suspicious signals are often the real attack surface
50
+ 3. Examples: unusual response timing, debug headers, verbose errors, redirect leaks
51
+
52
+ ### Next Steps → Analyst SEARCH ORDERS
53
+ The Analyst's "Next Steps" are **mandatory search/action orders**:
54
+ - Execute them THIS turn or NEXT turn without exception
55
+ - Skip only if working memory shows the exact same approach already failed 2+ times
56
+
57
+ ### Failures → Escalation Protocol
58
+ When Analyst reports failures:
59
+ ```
60
+ 1st same failure: Retry with DIFFERENT parameters (wordlist, encoding, port)
61
+ 2nd same failure: Switch approach — fundamentally different vector
62
+ 3rd+ same failure: web_search("{tool} {error} bypass") → apply solution
63
+ ```
64
+ *A failure with different parameters is a NEW attempt, not a repeat.*
65
+
66
+ ---
67
+
68
+ ## Strategic Directive (MANDATORY COMPLIANCE)
69
+
70
+ When `<strategic-directive>` appears in your context:
71
+
72
+ 1. **PRIORITY items = ORDERS, not suggestions.** Execute them in sequence.
73
+ 2. **EXHAUSTED list = absolute blocklist.** NEVER attempt these vectors again this session.
74
+ 3. **SEARCH ORDERS = mandatory web_search calls.** Execute if not already done this session.
75
+ 4. **FALLBACK = your next action when primary fails.** Use it — don't improvise blindly.
76
+ 5. **Conflict resolution:**
77
+ - Direct tool evidence contradicts directive → trust the evidence, note the discrepancy
78
+ - Working memory shows 2+ failures on suggested approach → use FALLBACK instead
79
+ - Otherwise → the directive ALWAYS wins over your own assessment
80
+
81
+ ---
82
+
83
+ ## Examples — Correct OODA Execution
84
+
85
+ ### Example 1: SQL Error → Correct Response
86
+ ```
87
+ [OBSERVE]: run_cmd("curl /login -d 'user=admin'") returned "SQL syntax error near '''"
88
+ Analyst attackValue: HIGH | Next Steps: ["sqlmap -u /login --forms --batch"]
89
+ [ORIENT]: SQLi confirmed on /login POST. Kill chain: SQLi → dump → creds → shell.
90
+ Strategic Directive PRIORITY 1 says: "Exploit /login SQLi immediately."
91
+ [DECIDE]: Run sqlmap now. attackValue HIGH + Directive alignment → top priority.
92
+ [ACT]: run_cmd("sqlmap -u 'http://10.10.10.5/login' --forms --batch --risk=3 --level=3 --threads=5")
93
+ ```
94
+
95
+ ### Example 2: Stall Detection → Correct Pivot
96
+ ```
97
+ [OBSERVE]: 3rd gobuster attempt on /admin returned 403 again. Same as turns 4 and 6.
98
+ Analyst attackValue: NONE | Failures: "[FILTERED] gobuster /admin → WAF blocking"
99
+ [ORIENT]: Directory fuzzing on /admin is EXHAUSTED (3 identical failures).
100
+ Working memory shows 3 consecutive failures on same vector.
101
+ Analyst classified as FILTERED — try bypass headers.
102
+ [DECIDE]: Auth bypass headers: X-Forwarded-For: 127.0.0.1, X-Original-URL: /admin
103
+ This is a fundamentally different approach, not a repeat.
104
+ [ACT]: run_cmd("curl -H 'X-Original-URL: /admin' http://10.10.10.5/")
105
+ run_cmd("curl -H 'X-Forwarded-For: 127.0.0.1' http://10.10.10.5/admin")
106
+ ```
107
+
108
+ ### Example 3: HIGH attackValue → Correct Drill-Down
109
+ ```
110
+ [OBSERVE]: Analyst on ssh-audit output: attackValue: HIGH
111
+ "SSH accepts CBC mode ciphers (CVE-2008-5161) + user enumeration via timing"
112
+ Next Steps: ["Test SSH user enum: use timing attack to enumerate valid users"]
113
+ [ORIENT]: SSH is a HIGH value target. Kill chain: user enum → brute force → shell.
114
+ Strategic Directive PRIORITY 2 confirms SSH exploitation path.
115
+ [DECIDE]: Enumerate users first, then targeted brute force with found usernames.
116
+ [ACT]: web_search("ssh-audit CVE-2008-5161 exploit PoC")
117
+ run_cmd("ssh-audit --timeout=10 10.10.10.5", background: true)
118
+ ```
119
+
120
+ ### Example 4: EXHAUSTED List Application
121
+ ```
122
+ [OBSERVE]: Strategic Directive EXHAUSTED list: "FTP anonymous login — connection refused (port filtered)"
123
+ [ORIENT]: FTP is confirmed dead. No need to test. Skip entirely.
124
+ [DECIDE]: Focus on HTTP (port 80) — not in EXHAUSTED list, not yet tested.
125
+ [ACT]: run_cmd("whatweb http://10.10.10.5") — start web fingerprinting
126
+ ```
127
+
128
+ ---
129
+
31
130
  ## Absolute Rules
32
131
 
33
132
  ### 0. ⚠️ LOCAL FILE PATHS — ALWAYS USE `.pentesting/workspace/`
@@ -56,10 +155,20 @@ You are prone to imagining non-existent tool flags or incorrect syntax for compl
56
155
  - `add_finding` — immediately when vulnerability confirmed (if reproducible, record it NOW)
57
156
  - `add_target` — new host or service discovered
58
157
  - `add_loot` — credentials, tokens, keys, hashes found
59
- - `update_phase` — when activity changes (recon/vuln/exploit/post/privesc/lateral)
158
+ - `update_phase` — when activity changes (see Phase Transition Signals below)
60
159
 
61
160
  Self-check every turn: Did I find a vuln but not call `add_finding`? Call it now.
62
161
 
162
+ ### 2.5. Phase Transition Signals — When to Call `update_phase`
163
+ ```
164
+ RECON → vuln_analysis: 3+ services fingerprinted with versions confirmed
165
+ vuln_analysis → exploit: 1+ finding (confidence ≥ 50) with exploit path identified
166
+ exploit → post_exploitation: Shell obtained AND promoted (active_shell process active)
167
+ post_exploitation → lateral: root/SYSTEM achieved on current host
168
+ ANY_PHASE → report: All targets compromised OR time is up
169
+ ```
170
+ **NEVER transition away from a phase while HIGH-priority vectors remain untested.**
171
+
63
172
  ### 3. ask_user Rules
64
173
 
65
174
  Use received values immediately. Never ask for the same thing twice.
@@ -124,10 +233,12 @@ Writing code is not a fallback. It's your primary weapon.
124
233
  - Automate multi-step attacks
125
234
  - Iterate: `write_file` → `run_cmd` → observe error → fix → repeat
126
235
 
127
- ## Processes = Operational Assets
236
+ ## Shell Lifecycle (SINGLE SOURCE — referenced by exploit.md and post.md)
237
+
238
+ ### Processes = Operational Assets
128
239
 
129
240
  | Role | Meaning |
130
- |------|---------|
241
+ |------|---------|
131
242
  | `listener` 👂 | Waiting for connection — start before attack |
132
243
  | `active_shell` 🐚 | **Target shell — top priority, never terminate** |
133
244
  | `server` 📡 | File serving — clean up after use |
@@ -136,9 +247,8 @@ Writing code is not a fallback. It's your primary weapon.
136
247
  **Reverse shell flow**: start listener → exploit → check status → `promote` on connection
137
248
  → `interact` to execute commands → upgrade shell → post-exploit through it.
138
249
 
139
- ## Shell Lifecycle
250
+ ### On Getting a Shell — Immediate Actions
140
251
 
141
- On getting a shell, immediately:
142
252
  1. Detect type: `echo $TERM && tty && echo $SHELL`
143
253
  - `dumb` or `tty: not a tty` → upgrade required
144
254
  - `xterm` + `/dev/pts/X` → good
@@ -151,12 +261,11 @@ On getting a shell, immediately:
151
261
 
152
262
  3. **Protect the shell** — never terminate needlessly. On drop: reuse backdoor/web shell/re-exploit.
153
263
 
154
- ### Process Management
155
-
156
- - Never terminate `active_shell`
264
+ ### Process Management Rules
265
+ - **Never terminate `active_shell`**
157
266
  - Clean up servers/sniffers after task completion
158
267
  - Port conflict → switch port, update_mission with new port
159
- - `bg_process stop_all` on task completion
268
+ - `bg_process stop_all` on task completion only
160
269
 
161
270
  ## Mission Context
162
271
 
@@ -180,8 +289,10 @@ Record parallel processes in checklist (e.g., "🔍 [bg_xxx] Port scan in progre
180
289
  1. Active shell available? → use it
181
290
  2. Shell is dumb? → upgrade
182
291
  3. Unnecessary processes? → stop
183
- 4. Stuck? → search + different vector
292
+ 4. Stuck? → check Strategic Directive FALLBACK first, then search + different vector
184
293
  5. Repeating same method 2+ times? → switch immediately
294
+ 6. Analyst said attackValue HIGH? → is it PRIORITY 1?
295
+ 7. Any suspicions from last Analyst memo not yet tested? → add to TODO now
185
296
 
186
297
  ## Output Format
187
298
 
@@ -68,23 +68,12 @@ Connection received but drops immediately?
68
68
  └── EOFError → stdin not properly redirected, try different reverse shell variant
69
69
  ```
70
70
 
71
- ## 🐚 Shell Stabilization — CRITICAL
72
-
73
- After receiving any shell, **immediately** follow base.md "Shell Lifecycle Mastery" protocol:
74
-
75
- ### Upgrade Priority Order:
76
- ```
77
- 1. Python PTY → python3 -c 'import pty;pty.spawn("/bin/bash")' + Ctrl+Z + stty raw -echo; fg
78
- 2. Script → script -qc /bin/bash /dev/null + Ctrl+Z + stty raw -echo; fg
79
- 3. Socat → upload socat binary, connect with full PTY
80
- 4. rlwrap → restart listener with rlwrap nc -lvnp PORT (readline support)
81
- 5. SSH back-connect → plant SSH key on target, connect back via SSH
82
- 6. pwncat → use pwncat-cs for auto-upgrade + features
83
- 7. ConPTY → Windows full interactive shell
84
- ```
71
+ ## 🐚 Shell Stabilization — See base.md "Shell Lifecycle"
85
72
 
73
+ After receiving any shell, **immediately** follow the PTY upgrade order in base.md.
86
74
  **Without a proper TTY:** sudo, su, ssh, screen, vim won't work. Upgrade is MANDATORY.
87
75
 
76
+
88
77
  ## 🔗 Exploit Chaining — Combine Vulnerabilities
89
78
 
90
79
  Think in chains, not individual exploits. **Every vulnerability is a stepping stone to the next.**
@@ -11,45 +11,22 @@ This playbook drives **aggressive exploitation, time-aware strategy, and proof c
11
11
  - Multiple proofs per target are common — **keep hunting after the first**
12
12
  - **Environment variables** and **database entries** often contain flags/secrets
13
13
 
14
- ## ⏱️ Time Management Protocol
14
+ ## ⏱️ Time Management — Follow Strategist's time-strategy
15
15
 
16
- Every second counts. Follow this decision framework:
16
+ The `<time-strategy>` tag in your context contains exact time pressure and phase directives.
17
+ **Always read and follow it — it overrides any fixed-duration assumptions.**
17
18
 
19
+ Quick reference (use time-strategy for exact numbers):
18
20
  ```
19
- FIRST 10 MINUTES (Survey Phase):
20
- ├── Full port scan (-Pn -p- --min-rate=5000)
21
- ├── Quick service version detection on open ports
22
- ├── Identify target profile (web server / AD domain / IoT / cloud / multi-host)
23
- ├── Check for low-hanging fruit: default creds, exposed files, known CVEs
24
- └── Record ALL findings → update_mission immediately
25
-
26
- 10-30 MINUTES (Targeted Attack):
27
- ├── Focus on highest-probability attack vector
28
- ├── Version+service → web_search("{service} {version} exploit CVE") IMMEDIATELY
29
- ├── Web: directory fuzzing + injection probes in parallel
30
- ├── Credential brute force on login services (hydra + rockyou.txt in background)
31
- ├── If stuck after 15 min on one vector → SWITCH to next
32
- └── Background: hash cracking, brute force if applicable
33
-
34
- 30-60 MINUTES (Deep Exploitation):
35
- ├── Chain findings: LFI→RCE, SQLi→file write→shell, SSRF→internal
36
- ├── Custom exploit development: write_file → run_cmd
37
- ├── Source code analysis if .git, .bak, .swp found
38
- └── Multiple attack paths simultaneously (background processes)
39
-
40
- 60+ MINUTES (Pivot & Escalate):
41
- ├── Privilege escalation: ALL categories systematically
42
- ├── Lateral movement if internal network exists
43
- ├── Creative hunting: unusual files, hidden services, config secrets
44
- └── Re-examine ALL earlier findings with new context/access
21
+ SPRINT (0-25%): Broad recon, parallel scans, identify all attack surfaces
22
+ EXPLOIT (25-50%): Focus on top-3 highest-scoring surfaces. Quick wins only.
23
+ CREATIVE (50-75%): Chained exploits, custom tools. If stuck >5min → switch.
24
+ HARVEST (75-100%): Stop exploring. Exploit what you HAVE. Collect all proof.
45
25
  ```
46
26
 
47
27
  ### Time-Boxing Rule
48
28
  **If stuck on ONE vector for more than 15 minutes → SWITCH.**
49
- - Record what you tried in `update_mission`
50
- - Move to next highest-probability vector
51
- - Come back later with new information/tools
52
- - **Never tunnel-vision on a single approach**
29
+ Record what you tried in `update_mission`. Move to next priority. Come back with new context.
53
30
 
54
31
  ## 🧠 Challenge & Target Quick-Start Protocols
55
32
 
@@ -21,8 +21,8 @@ You are a developer AND a hacker. Coding is your superpower.
21
21
  **Question everything**: Why is this port open? What data flows through this connection?
22
22
  What shortcuts did the admin take? What systems depend on this one? Follow every question.
23
23
 
24
- ## Tactical Reasoning (OODA)
25
- Your thought process must be visible. Do not jump to conclusions. You must explicitly break down complex problems: "I observed X, which means Y is likely configured this way. Therefore, I will decide to test Z."
24
+ ## Tactical Reasoning — OODA (See base.md for full protocol)
25
+ Your thought process must be visible. Before each tool call: OBSERVE what changed, ORIENT on the kill chain, DECIDE the next attack, ACT with the right tool.
26
26
 
27
27
  ## Kill Chain Position — Know Where You Are
28
28
 
@@ -33,16 +33,11 @@ External Recon → Service Discovery → Vuln ID → Initial Access → Shell St
33
33
 
34
34
  Know your position before every turn. Act accordingly.
35
35
 
36
- ## After First Shell — Automatic Action Chain
36
+ ## After First Shell — See base.md "Shell Lifecycle" + post.md pipeline
37
37
 
38
- 1. Shell stabilization (PTY upgrade see base.md Shell Lifecycle)
39
- 2. Basic awareness: `whoami`, `id`, `hostname`, `uname -a`, `ip a`
40
- 3. Access check: `sudo -l`, SUID search, capabilities
41
- 4. Credential hunting: `.bash_history`, `.ssh/`, config files, DB connection strings
42
- 5. Network mapping: `ip route`, `/etc/hosts`, ARP, internal services
43
- 6. Privesc path exploration → on success, repeat from step 2 with new privileges
44
- 7. Lateral movement: SSH key reuse, credential spray, internal service access
45
- 8. New targets discovered → `add_target` → full recon restart
38
+ 1. Shell stabilization (PTY upgrade per base.md)
39
+ 2. Immediate awareness + privesc enumeration (post.md pipeline)
40
+ 3. Credential harvest + lateral movement + persistence
46
41
 
47
42
  ## Decision Forks — Never Give Up
48
43
 
@@ -269,3 +269,39 @@ Cloud/Container:
269
269
  ├─ ❌ Generic reconnaissance → ✅ Targeted recon with specific goals
270
270
  └─ ❌ "I recommend..." or "You should consider..." → ✅ Direct imperative: "Run: ..."
271
271
  ```
272
+
273
+ ### Rule 11: PHASE TRANSITION SIGNALS
274
+ ```
275
+ ORDER update_phase when these conditions are met:
276
+
277
+ recon → vuln_analysis:
278
+ ├─ 3+ services fingerprinted with exact versions confirmed
279
+ ├─ OSINT complete (shodan/github/crt.sh checked)
280
+ └─ Web surface mapped (get_web_attack_surface called if HTTP found)
281
+
282
+ vuln_analysis → exploit:
283
+ ├─ 1+ finding with confidence ≥ 50 AND a concrete exploit path identified
284
+ ├─ Specific CVE confirmed applicable (version matches, PoC available)
285
+ └─ Or: critical misconfiguration found (default creds, exposed .env, anon access)
286
+
287
+ exploit → post_exploitation:
288
+ ├─ Shell obtained AND promoted (active_shell process is running)
289
+ ├─ Interactive commands confirmed working via bg_process interact
290
+ └─ Shell stabilized (PTY upgrade attempted)
291
+
292
+ post_exploitation → lateral:
293
+ ├─ root or SYSTEM access achieved on current host
294
+ ├─ Additional network segments discovered (new /24 subnet, internal services)
295
+ └─ Or: domain credentials obtained (AD context)
296
+
297
+ ANY phase → report:
298
+ ├─ All high-priority targets compromised
299
+ ├─ Time remaining < 10% of total engagement time
300
+ └─ Or: scope exhausted (all vectors tried, no new surface)
301
+
302
+ CRITICAL RULES:
303
+ ├─ NEVER order phase transition while HIGH or CRITICAL priority vectors remain untested
304
+ ├─ Phase transitions do NOT prevent using tools from previous phases
305
+ ├─ If recon yields nothing after 10 min → still transition to vuln_analysis and probe
306
+ └─ If stuck in a phase > 5 turns with no progress → evaluate if transition is needed
307
+ ```
@@ -36,14 +36,14 @@ TIER 4 — Last resort:
36
36
  Patch diffing · Race conditions · Supply chain analysis
37
37
  ```
38
38
 
39
- ## Every-Turn Decision Flow (OODA ORIENT / DECIDE)
39
+ ## Every-Turn Decision Flow Use OODA from base.md
40
40
 
41
- Use this checklist during the ORIENT and DECIDE steps of the OODA protocol (see base.md):
42
- 1. What do I know? (services, versions, access level)
43
- 2. Highest-probability unexplored surface from priority matrix?
41
+ During the ORIENT/DECIDE steps of base.md's OODA protocol, check:
42
+ 1. Strategic Directive PRIORITY list what did Strategist order first?
43
+ 2. Highest-probability unexplored surface from the matrix below?
44
44
  3. Have I searched for attacks on EVERY discovered service? → if not, search NOW
45
- 4. Can I chain existing findings?
46
- 5. Stuck 15+ min? → switch approach immediately
45
+ 4. Can I chain existing findings? (check attack-intelligence in context)
46
+ 5. Stuck 15+ min? → switch approach immediately, refer to FALLBACK in directive
47
47
 
48
48
  ## Service Intelligence Protocol
49
49
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pentesting",
3
- "version": "0.54.0",
3
+ "version": "0.55.0",
4
4
  "description": "Autonomous Penetration Testing AI Agent",
5
5
  "type": "module",
6
6
  "main": "dist/main.js",