pentesting 0.54.1 → 0.55.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -342,7 +342,7 @@ var ORPHAN_PROCESS_NAMES = [
342
342
 
343
343
  // src/shared/constants/agent.ts
344
344
  var APP_NAME = "Pentest AI";
345
- var APP_VERSION = "0.54.1";
345
+ var APP_VERSION = "0.55.1";
346
346
  var APP_DESCRIPTION = "Autonomous Penetration Testing AI Agent";
347
347
  var LLM_ROLES = {
348
348
  SYSTEM: "system",
@@ -4940,19 +4940,145 @@ var CHALLENGE_TYPE_SIGNALS = {
4940
4940
  ]
4941
4941
  };
4942
4942
 
4943
+ // src/shared/constants/prompts.ts
4944
+ var PROMPT_PATHS = {
4945
+ BASE: "base.md",
4946
+ OFFENSIVE_PLAYBOOK: "offensive-playbook.md",
4947
+ AGENT_FILES: {
4948
+ ORCHESTRATOR: "orchestrator.md",
4949
+ RECON: "recon.md",
4950
+ VULN: "vuln.md",
4951
+ WEB: "web.md",
4952
+ EXPLOIT: "exploit.md",
4953
+ POST: "post.md",
4954
+ REPORT: "report.md",
4955
+ INFRA: "infra.md",
4956
+ // Strategic methodology (always loaded as reference)
4957
+ STRATEGY: "strategy.md",
4958
+ EVASION: "evasion.md",
4959
+ ZERO_DAY: "zero-day.md",
4960
+ PAYLOAD_CRAFT: "payload-craft.md"
4961
+ },
4962
+ /** Technique reference library directory */
4963
+ TECHNIQUES_DIR: "techniques"
4964
+ };
4965
+ var TECHNIQUE_FILES = {
4966
+ INJECTION: "injection",
4967
+ PRIVESC: "privesc",
4968
+ LATERAL: "lateral",
4969
+ AD_ATTACK: "ad-attack",
4970
+ AUTH_ACCESS: "auth-access",
4971
+ CONTAINER_ESCAPE: "container-escape",
4972
+ SANDBOX_ESCAPE: "sandbox-escape",
4973
+ FILE_ATTACKS: "file-attacks",
4974
+ NETWORK_SVC: "network-svc",
4975
+ CRYPTO: "crypto",
4976
+ REVERSING: "reversing",
4977
+ FORENSICS: "forensics",
4978
+ PWN: "pwn",
4979
+ SHELLS: "shells"
4980
+ };
4981
+ var PROMPT_XML = {
4982
+ PHASE: (phase, content) => `<phase-instructions phase="${phase}">
4983
+ ${content}
4984
+ </phase-instructions>`,
4985
+ SCOPE: (allowed, domains, exclude, flags) => `<scope type="ABSOLUTE_CONSTRAINT">
4986
+ Authorized CIDR: ${allowed}
4987
+ Authorized Domains: ${domains}
4988
+ Exclusions: ${exclude}
4989
+ Constraints: ${flags}
4990
+ </scope>`,
4991
+ STATE: (content) => `<current-state>
4992
+ ${content}
4993
+ </current-state>`,
4994
+ TODO: (content) => `<todo>
4995
+ ${content}
4996
+ </todo>`
4997
+ };
4998
+ var PROMPT_DEFAULTS = {
4999
+ NO_SCOPE: "<scope>NO SCOPE DEFINED. STOP.</scope>",
5000
+ EMPTY_TODO: "Create initial plan",
5001
+ USER_CONTEXT: (context) => `
5002
+ <user-input>
5003
+ "${context}"
5004
+ </user-input>
5005
+
5006
+ <intent-rules>
5007
+ ANALYZE the user's intent before acting. Classify into ONE:
5008
+ ABORT \u2192 stop current work, confirm with \`ask_user\`
5009
+ CORRECTION \u2192 adjust approach, continue
5010
+ INFORMATION \u2192 store and USE immediately (credentials, paths, hints)
5011
+ COMMAND \u2192 execute EXACTLY what was asked, nothing more
5012
+ TARGET_CHANGE \u2192 \`add_target\`, then begin testing
5013
+ GUIDANCE \u2192 acknowledge via \`ask_user\`, adjust strategy, continue
5014
+ STATUS_QUERY \u2192 report via \`ask_user\`, then RESUME previous work
5015
+ CONVERSATION \u2192 respond via \`ask_user\`, do NOT scan or attack
5016
+
5017
+ RULES:
5018
+ - No target set and none provided \u2192 \`ask_user\` to request target.
5019
+ - Conversation or greeting \u2192 respond conversationally, do NOT attack.
5020
+ - Uncertain intent \u2192 ask for clarification with \`ask_user\`.
5021
+ - This is a collaborative tool. The user is your partner.
5022
+ </intent-rules>`
5023
+ };
5024
+ var PROMPT_CONFIG = {
5025
+ ENCODING: "utf-8"
5026
+ };
5027
+ var INITIAL_TASKS = {
5028
+ RECON: "Initial reconnaissance and target discovery"
5029
+ };
5030
+ var CONTEXT_EXTRACTOR_PROMPT = `You are extracting actionable intelligence from a penetration testing session.
5031
+ DO NOT simply summarize or shorten. EXTRACT critical facts:
5032
+
5033
+ 1. DISCOVERED: Services, versions, paths, parameters (exact IPs, ports, versions)
5034
+ 2. CONFIRMED: Vulnerabilities or access confirmed
5035
+ 3. CREDENTIALS: Usernames, passwords, tokens, keys
5036
+ 4. DEAD ENDS: What failed \u2014 include EXACT command, tool, arguments, wordlist/file used.
5037
+ Distinguish between:
5038
+ - "This approach itself is impossible" (e.g., SSH key-only \u2192 no password brute force works)
5039
+ - "This specific attempt failed" (e.g., sqlmap with default tamper \u2192 try different tamper)
5040
+ 5. OPEN LEADS: Unexplored paths worth pursuing
5041
+
5042
+ Every line must include exact commands/tools/files used.
5043
+ The reader must be able to judge whether a retry with different parameters is worthwhile.`;
5044
+ var REFLECTION_PROMPT = `You are a tactical reviewer for a penetration testing agent.
5045
+ Review ALL actions from this turn \u2014 successes AND failures.
5046
+
5047
+ 1. ASSESSMENT: What did this turn accomplish? Rate: HIGH / MED / LOW / NONE.
5048
+ 2. SUCCESSES: What worked? Can this pattern be replicated elsewhere?
5049
+ 3. FAILURES: What failed? Is this a repeated pattern? If so \u2192 STOP this approach.
5050
+ 4. BLIND SPOTS: What was missed or overlooked?
5051
+ 5. NEXT PRIORITY: Single most valuable next action.
5052
+
5053
+ 3-5 lines. Every word must be actionable.`;
5054
+ var SUMMARY_REGENERATOR_PROMPT = `Update this penetration testing session summary with the new turn data.
5055
+
5056
+ Must include:
5057
+ - All discovered hosts, services, versions (exact IPs, ports, software versions)
5058
+ - All confirmed vulnerabilities
5059
+ - All obtained credentials
5060
+ - Failed attempts with EXACT commands/tools/arguments/files used.
5061
+ For each failure, state:
5062
+ - The root cause (auth method? WAF? patched? wrong params?)
5063
+ - Whether retrying with different parameters could work
5064
+ - Top unexplored leads
5065
+
5066
+ Remove outdated/superseded info. Keep concise but COMPLETE.
5067
+ The reader must be able to decide what to retry and what to never attempt again.`;
5068
+
4943
5069
  // src/shared/utils/auto-prompter.ts
4944
5070
  var SECONDARY_TYPE_RATIO = 0.5;
4945
5071
  var MIN_CHALLENGE_CONFIDENCE = 0.2;
4946
5072
  var MIN_RECON_OUTPUT_LENGTH = 100;
4947
5073
  var TYPE_TECHNIQUE_MAP = {
4948
- web: ["injection", "auth-access", "file-attacks", "crypto"],
4949
- pwn: ["pwn", "shells", "reversing"],
4950
- crypto: ["crypto"],
4951
- forensics: ["forensics", "reversing", "crypto"],
4952
- reversing: ["reversing", "pwn"],
4953
- misc: ["sandbox-escape", "crypto", "forensics"],
4954
- network: ["network-svc", "shells", "lateral", "ad-attack"],
4955
- unknown: ["network-svc", "injection", "shells", "file-attacks"]
5074
+ web: [TECHNIQUE_FILES.INJECTION, TECHNIQUE_FILES.AUTH_ACCESS, TECHNIQUE_FILES.FILE_ATTACKS, TECHNIQUE_FILES.CRYPTO],
5075
+ pwn: [TECHNIQUE_FILES.PWN, TECHNIQUE_FILES.SHELLS, TECHNIQUE_FILES.REVERSING],
5076
+ crypto: [TECHNIQUE_FILES.CRYPTO],
5077
+ forensics: [TECHNIQUE_FILES.FORENSICS, TECHNIQUE_FILES.REVERSING, TECHNIQUE_FILES.CRYPTO],
5078
+ reversing: [TECHNIQUE_FILES.REVERSING, TECHNIQUE_FILES.PWN],
5079
+ misc: [TECHNIQUE_FILES.SANDBOX_ESCAPE, TECHNIQUE_FILES.CRYPTO, TECHNIQUE_FILES.FORENSICS],
5080
+ network: [TECHNIQUE_FILES.NETWORK_SVC, TECHNIQUE_FILES.SHELLS, TECHNIQUE_FILES.LATERAL, TECHNIQUE_FILES.AD_ATTACK],
5081
+ unknown: [TECHNIQUE_FILES.NETWORK_SVC, TECHNIQUE_FILES.INJECTION, TECHNIQUE_FILES.SHELLS, TECHNIQUE_FILES.FILE_ATTACKS]
4956
5082
  };
4957
5083
  var TYPE_PHASE_PROMPT_MAP = {
4958
5084
  web: "web.md",
@@ -10475,25 +10601,36 @@ var ANALYST_SYSTEM_PROMPT = `You are an independent pentesting output analyst. Y
10475
10601
 
10476
10602
  FORMAT YOUR RESPONSE EXACTLY LIKE THIS:
10477
10603
 
10478
- ## Key Findings
10604
+ ## ${MEMO_SECTIONS.KEY_FINDINGS}
10479
10605
  - [finding 1 with exact values: ports, versions, paths]
10480
10606
  - [finding 2]
10481
10607
 
10482
- ## Credentials/Secrets
10608
+ ## ${MEMO_SECTIONS.CREDENTIALS}
10483
10609
  - [any discovered credentials, hashes, tokens, keys, certificates]
10484
10610
  - (write "None found" if none)
10485
10611
 
10486
- ## Attack Vectors
10612
+ ## ${MEMO_SECTIONS.ATTACK_VECTORS}
10487
10613
  - [exploitable services, vulnerabilities, misconfigurations, CVEs]
10488
10614
  - (write "None identified" if none)
10489
10615
 
10490
- ## Failures/Errors
10491
- - [what was attempted and FAILED \u2014 include the FULL command, wordlist, target, and the reason WHY it failed]
10492
- - [e.g.: "SSH brute force: hydra -l admin -P /usr/share/wordlists/rockyou.txt ssh://10.0.0.1 \u2014 connection refused (port filtered)"]
10493
- - [e.g.: "SQLi on /login with sqlmap --tamper=space2comment \u2014 input sanitized, WAF detected (ModSecurity)"]
10616
+ ## ${MEMO_SECTIONS.FAILURES}
10617
+ Classify EVERY failure using one of these types. Format: [TYPE] exact_command \u2192 why_failed \u2192 recommended_pivot
10618
+
10619
+ Failure types:
10620
+ - [FILTERED]: WAF/IDS/firewall blocked \u2192 suggest: encoding bypass, payload_mutate, different protocol/port
10621
+ - [WRONG_VECTOR]: Vulnerability not present here \u2192 suggest: pivot to different vuln class entirely
10622
+ - [AUTH_REQUIRED]: Credential or session needed first \u2192 suggest: brute force login or find creds in config files
10623
+ - [TOOL_ERROR]: Command syntax error, missing dep, or tool bug \u2192 suggest: run --help, use alternative tool
10624
+ - [TIMEOUT]: Service too slow or connection timed out \u2192 suggest: increase timeout, reduce scope, or use background mode
10625
+ - [PATCHED]: CVE/technique exists but target is patched \u2192 suggest: search bypass or newer CVE on same service
10626
+
10627
+ Examples:
10628
+ - "[FILTERED] sqlmap -u /login --tamper=space2comment \u2192 ModSecurity WAF, blocking all payloads \u2192 try charencode,randomcase tampers or payload_mutate"
10629
+ - "[AUTH_REQUIRED] curl http://target/admin \u2192 HTTP 401 Basic Auth \u2192 hydra -l admin -P rockyou.txt http-get://target/admin"
10630
+ - "[TIMEOUT] nmap -sV -p- target --min-rate=5000 \u2192 timed out 5min \u2192 rustscan first, then targeted nmap on found ports"
10494
10631
  - (write "No failures" if everything succeeded)
10495
10632
 
10496
- ## Suspicious Signals
10633
+ ## ${MEMO_SECTIONS.SUSPICIONS}
10497
10634
  - [anomalies that are NOT confirmed vulnerabilities but suggest exploitable surface]
10498
10635
  - [e.g.: "Response time 3x slower on /admin path \u2014 possible auth check or backend processing"]
10499
10636
  - [e.g.: "X-Debug-Token header present \u2014 debug mode may be enabled"]
@@ -10501,16 +10638,16 @@ FORMAT YOUR RESPONSE EXACTLY LIKE THIS:
10501
10638
  - [e.g.: "Unexpected 302 redirect with session param leaked in URL"]
10502
10639
  - (write "No suspicious signals" if nothing anomalous)
10503
10640
 
10504
- ## Attack Value
10641
+ ## ${MEMO_SECTIONS.ATTACK_VALUE}
10505
10642
  - [ONE word: HIGH / MED / LOW / NONE]
10506
10643
  - Reasoning: [1 sentence why \u2014 what makes this worth pursuing or abandoning]
10507
10644
 
10508
- ## Next Steps
10645
+ ## ${MEMO_SECTIONS.NEXT_STEPS}
10509
10646
  - [recommended immediate actions based on findings]
10510
10647
 
10511
10648
  RULES:
10512
10649
  - Include EXACT values: port numbers, versions, usernames, file paths, IPs, full commands used
10513
- - For failures: include the COMPLETE command with all flags, wordlists, and targets \u2014 "brute force failed" alone is USELESS
10650
+ - For failures: ALWAYS classify with [TYPE] \u2014 "brute force failed" alone is USELESS. Include full command.
10514
10651
  - Look for the UNEXPECTED \u2014 non-standard ports, unusual banners, timing anomalies, error leaks
10515
10652
  - Credentials include: passwords, hashes, API keys, tokens, private keys, cookies, session IDs
10516
10653
  - Flag any information disclosure: server versions, internal paths, stack traces, debug output
@@ -10519,7 +10656,7 @@ RULES:
10519
10656
  - Do NOT miss subtle signals: unusual HTTP headers, non-standard responses, timing differences
10520
10657
  - Write as much detail as needed \u2014 do NOT artificially shorten. Every detail matters for strategy.
10521
10658
 
10522
- ## Reflection
10659
+ ## ${MEMO_SECTIONS.REFLECTION}
10523
10660
  - What this output tells us: [1-line assessment]
10524
10661
  - Recommended next action: [1-2 specific follow-up actions]`;
10525
10662
  function parseAnalystMemo(response) {
@@ -11313,120 +11450,10 @@ Suggested Action: ${errorInfo.suggestedAction || actionHints[errorInfo.type] ||
11313
11450
  };
11314
11451
 
11315
11452
  // src/agents/prompt-builder.ts
11316
- import { readFileSync as readFileSync6, existsSync as existsSync10, readdirSync as readdirSync4 } from "fs";
11453
+ import { readFileSync as readFileSync6, existsSync as existsSync10 } from "fs";
11317
11454
  import { join as join11, dirname as dirname4 } from "path";
11318
11455
  import { fileURLToPath as fileURLToPath2 } from "url";
11319
11456
 
11320
- // src/shared/constants/prompts.ts
11321
- var PROMPT_PATHS = {
11322
- BASE: "base.md",
11323
- OFFENSIVE_PLAYBOOK: "offensive-playbook.md",
11324
- AGENT_FILES: {
11325
- ORCHESTRATOR: "orchestrator.md",
11326
- RECON: "recon.md",
11327
- VULN: "vuln.md",
11328
- WEB: "web.md",
11329
- EXPLOIT: "exploit.md",
11330
- POST: "post.md",
11331
- REPORT: "report.md",
11332
- INFRA: "infra.md",
11333
- // Strategic methodology (always loaded as reference)
11334
- STRATEGY: "strategy.md",
11335
- EVASION: "evasion.md",
11336
- ZERO_DAY: "zero-day.md",
11337
- PAYLOAD_CRAFT: "payload-craft.md"
11338
- },
11339
- /** Technique reference library directory (auto-discovered) */
11340
- TECHNIQUES_DIR: "techniques"
11341
- };
11342
- var PROMPT_XML = {
11343
- PHASE: (phase, content) => `<phase-instructions phase="${phase}">
11344
- ${content}
11345
- </phase-instructions>`,
11346
- SCOPE: (allowed, domains, exclude, flags) => `<scope type="ABSOLUTE_CONSTRAINT">
11347
- Authorized CIDR: ${allowed}
11348
- Authorized Domains: ${domains}
11349
- Exclusions: ${exclude}
11350
- Constraints: ${flags}
11351
- </scope>`,
11352
- STATE: (content) => `<current-state>
11353
- ${content}
11354
- </current-state>`,
11355
- TODO: (content) => `<todo>
11356
- ${content}
11357
- </todo>`
11358
- };
11359
- var PROMPT_DEFAULTS = {
11360
- NO_SCOPE: "<scope>NO SCOPE DEFINED. STOP.</scope>",
11361
- EMPTY_TODO: "Create initial plan",
11362
- USER_CONTEXT: (context) => `
11363
- <user-input>
11364
- "${context}"
11365
- </user-input>
11366
-
11367
- <intent-rules>
11368
- ANALYZE the user's intent before acting. Classify into ONE:
11369
- ABORT \u2192 stop current work, confirm with \`ask_user\`
11370
- CORRECTION \u2192 adjust approach, continue
11371
- INFORMATION \u2192 store and USE immediately (credentials, paths, hints)
11372
- COMMAND \u2192 execute EXACTLY what was asked, nothing more
11373
- TARGET_CHANGE \u2192 \`add_target\`, then begin testing
11374
- GUIDANCE \u2192 acknowledge via \`ask_user\`, adjust strategy, continue
11375
- STATUS_QUERY \u2192 report via \`ask_user\`, then RESUME previous work
11376
- CONVERSATION \u2192 respond via \`ask_user\`, do NOT scan or attack
11377
-
11378
- RULES:
11379
- - No target set and none provided \u2192 \`ask_user\` to request target.
11380
- - Conversation or greeting \u2192 respond conversationally, do NOT attack.
11381
- - Uncertain intent \u2192 ask for clarification with \`ask_user\`.
11382
- - This is a collaborative tool. The user is your partner.
11383
- </intent-rules>`
11384
- };
11385
- var PROMPT_CONFIG = {
11386
- ENCODING: "utf-8"
11387
- };
11388
- var INITIAL_TASKS = {
11389
- RECON: "Initial reconnaissance and target discovery"
11390
- };
11391
- var CONTEXT_EXTRACTOR_PROMPT = `You are extracting actionable intelligence from a penetration testing session.
11392
- DO NOT simply summarize or shorten. EXTRACT critical facts:
11393
-
11394
- 1. DISCOVERED: Services, versions, paths, parameters (exact IPs, ports, versions)
11395
- 2. CONFIRMED: Vulnerabilities or access confirmed
11396
- 3. CREDENTIALS: Usernames, passwords, tokens, keys
11397
- 4. DEAD ENDS: What failed \u2014 include EXACT command, tool, arguments, wordlist/file used.
11398
- Distinguish between:
11399
- - "This approach itself is impossible" (e.g., SSH key-only \u2192 no password brute force works)
11400
- - "This specific attempt failed" (e.g., sqlmap with default tamper \u2192 try different tamper)
11401
- 5. OPEN LEADS: Unexplored paths worth pursuing
11402
-
11403
- Every line must include exact commands/tools/files used.
11404
- The reader must be able to judge whether a retry with different parameters is worthwhile.`;
11405
- var REFLECTION_PROMPT = `You are a tactical reviewer for a penetration testing agent.
11406
- Review ALL actions from this turn \u2014 successes AND failures.
11407
-
11408
- 1. ASSESSMENT: What did this turn accomplish? Rate: HIGH / MED / LOW / NONE.
11409
- 2. SUCCESSES: What worked? Can this pattern be replicated elsewhere?
11410
- 3. FAILURES: What failed? Is this a repeated pattern? If so \u2192 STOP this approach.
11411
- 4. BLIND SPOTS: What was missed or overlooked?
11412
- 5. NEXT PRIORITY: Single most valuable next action.
11413
-
11414
- 3-5 lines. Every word must be actionable.`;
11415
- var SUMMARY_REGENERATOR_PROMPT = `Update this penetration testing session summary with the new turn data.
11416
-
11417
- Must include:
11418
- - All discovered hosts, services, versions (exact IPs, ports, software versions)
11419
- - All confirmed vulnerabilities
11420
- - All obtained credentials
11421
- - Failed attempts with EXACT commands/tools/arguments/files used.
11422
- For each failure, state:
11423
- - The root cause (auth method? WAF? patched? wrong params?)
11424
- - Whether retrying with different parameters could work
11425
- - Top unexplored leads
11426
-
11427
- Remove outdated/superseded info. Keep concise but COMPLETE.
11428
- The reader must be able to decide what to retry and what to never attempt again.`;
11429
-
11430
11457
  // src/shared/constants/scoring.ts
11431
11458
  var ATTACK_SCORING = {
11432
11459
  /** Base score for all attack prioritization */
@@ -11811,17 +11838,17 @@ var CORE_KNOWLEDGE_FILES = [
11811
11838
  // Active Directory / infrastructure attack methodology
11812
11839
  ];
11813
11840
  var PHASE_TECHNIQUE_MAP = {
11814
- [PHASES.RECON]: ["network-svc", "shells", "crypto"],
11815
- [PHASES.VULN_ANALYSIS]: ["injection", "network-svc", "file-attacks", "crypto", "reversing"],
11816
- [PHASES.EXPLOIT]: ["injection", "shells", "file-attacks", "network-svc", "pwn", "container-escape", "reversing"],
11817
- [PHASES.POST_EXPLOIT]: ["privesc", "lateral", "auth-access", "shells", "container-escape", "forensics"],
11818
- [PHASES.PRIV_ESC]: ["privesc", "auth-access", "shells", "pwn", "container-escape"],
11819
- [PHASES.LATERAL]: ["lateral", "ad-attack", "auth-access", "container-escape"],
11820
- [PHASES.PERSISTENCE]: ["shells", "privesc"],
11821
- [PHASES.EXFIL]: ["lateral", "network-svc", "forensics"],
11822
- [PHASES.WEB]: ["injection", "file-attacks", "auth-access", "crypto"],
11841
+ [PHASES.RECON]: [TECHNIQUE_FILES.NETWORK_SVC, TECHNIQUE_FILES.SHELLS, TECHNIQUE_FILES.CRYPTO],
11842
+ [PHASES.VULN_ANALYSIS]: [TECHNIQUE_FILES.INJECTION, TECHNIQUE_FILES.NETWORK_SVC, TECHNIQUE_FILES.FILE_ATTACKS, TECHNIQUE_FILES.CRYPTO, TECHNIQUE_FILES.REVERSING],
11843
+ [PHASES.EXPLOIT]: [TECHNIQUE_FILES.INJECTION, TECHNIQUE_FILES.SHELLS, TECHNIQUE_FILES.FILE_ATTACKS, TECHNIQUE_FILES.NETWORK_SVC, TECHNIQUE_FILES.PWN, TECHNIQUE_FILES.CONTAINER_ESCAPE, TECHNIQUE_FILES.SANDBOX_ESCAPE, TECHNIQUE_FILES.REVERSING],
11844
+ [PHASES.POST_EXPLOIT]: [TECHNIQUE_FILES.PRIVESC, TECHNIQUE_FILES.LATERAL, TECHNIQUE_FILES.AUTH_ACCESS, TECHNIQUE_FILES.SHELLS, TECHNIQUE_FILES.CONTAINER_ESCAPE, TECHNIQUE_FILES.SANDBOX_ESCAPE, TECHNIQUE_FILES.FORENSICS],
11845
+ [PHASES.PRIV_ESC]: [TECHNIQUE_FILES.PRIVESC, TECHNIQUE_FILES.AUTH_ACCESS, TECHNIQUE_FILES.SHELLS, TECHNIQUE_FILES.PWN, TECHNIQUE_FILES.CONTAINER_ESCAPE, TECHNIQUE_FILES.SANDBOX_ESCAPE],
11846
+ [PHASES.LATERAL]: [TECHNIQUE_FILES.LATERAL, TECHNIQUE_FILES.AD_ATTACK, TECHNIQUE_FILES.AUTH_ACCESS, TECHNIQUE_FILES.CONTAINER_ESCAPE, TECHNIQUE_FILES.NETWORK_SVC],
11847
+ [PHASES.PERSISTENCE]: [TECHNIQUE_FILES.SHELLS, TECHNIQUE_FILES.PRIVESC, TECHNIQUE_FILES.LATERAL],
11848
+ [PHASES.EXFIL]: [TECHNIQUE_FILES.LATERAL, TECHNIQUE_FILES.NETWORK_SVC, TECHNIQUE_FILES.FORENSICS],
11849
+ [PHASES.WEB]: [TECHNIQUE_FILES.INJECTION, TECHNIQUE_FILES.FILE_ATTACKS, TECHNIQUE_FILES.AUTH_ACCESS, TECHNIQUE_FILES.CRYPTO, TECHNIQUE_FILES.SHELLS],
11823
11850
  [PHASES.REPORT]: []
11824
- // Report phase needs no attack techniques
11851
+ // Report phase: no attack techniques needed
11825
11852
  };
11826
11853
  var PromptBuilder = class {
11827
11854
  state;
@@ -11947,20 +11974,20 @@ ${content}
11947
11974
  /**
11948
11975
  * Load technique files relevant to the current phase.
11949
11976
  *
11950
- * Loading strategy (Philosophy §11zero-code extension):
11951
- * 1. PHASE_TECHNIQUE_MAP defines priority techniques per phase (loaded first)
11952
- * 2. Any .md file in techniques/ NOT in the map is auto-discovered and loaded
11953
- * as general reference NO code change needed to add new techniques.
11977
+ * Loading strategy (Improvement #7explicit phase mapping, no auto-discovery):
11978
+ * 1. PHASE_TECHNIQUE_MAP defines EXACTLY which techniques load per phase.
11979
+ * 2. Auto-discovery is DISABLED to prevent irrelevant technique loading
11980
+ * (e.g., pwn.md 18K in RECON phase, forensics.md 16K in REPORT phase).
11981
+ * 3. To add a new technique: add the file to techniques/ AND add it to
11982
+ * the relevant phase entries in PHASE_TECHNIQUE_MAP above.
11954
11983
  *
11955
- * The map is an optimization (priority ordering), not a gate.
11956
- * "Drop a markdown file in the folder, PromptBuilder auto-discovers and loads it."
11984
+ * Token savings: ~5-15K per turn vs unrestricted auto-discovery.
11957
11985
  */
11958
11986
  loadPhaseRelevantTechniques(phase) {
11959
11987
  if (!existsSync10(TECHNIQUES_DIR)) return "";
11960
- const priorityTechniques = PHASE_TECHNIQUE_MAP[phase] || [];
11961
- const loadedSet = /* @__PURE__ */ new Set();
11988
+ const techniquesForPhase = PHASE_TECHNIQUE_MAP[phase] ?? [];
11962
11989
  const fragments = [];
11963
- for (const technique of priorityTechniques) {
11990
+ for (const technique of techniquesForPhase) {
11964
11991
  const filePath = join11(TECHNIQUES_DIR, `${technique}.md`);
11965
11992
  try {
11966
11993
  if (!existsSync10(filePath)) continue;
@@ -11969,25 +11996,10 @@ ${content}
11969
11996
  fragments.push(`<technique-reference category="${technique}">
11970
11997
  ${content}
11971
11998
  </technique-reference>`);
11972
- loadedSet.add(`${technique}.md`);
11973
11999
  }
11974
12000
  } catch {
11975
12001
  }
11976
12002
  }
11977
- try {
11978
- const allFiles = readdirSync4(TECHNIQUES_DIR).filter((f) => f.endsWith(".md") && f !== "README.md" && !loadedSet.has(f));
11979
- for (const file of allFiles) {
11980
- const filePath = join11(TECHNIQUES_DIR, file);
11981
- const content = readFileSync6(filePath, PROMPT_CONFIG.ENCODING);
11982
- if (content) {
11983
- const category = file.replace(".md", "");
11984
- fragments.push(`<technique-reference category="${category}">
11985
- ${content}
11986
- </technique-reference>`);
11987
- }
11988
- }
11989
- } catch {
11990
- }
11991
12003
  return fragments.join("\n\n");
11992
12004
  }
11993
12005
  getScopeFragment() {
@@ -12113,6 +12125,8 @@ import { join as join12, dirname as dirname5 } from "path";
12113
12125
  import { fileURLToPath as fileURLToPath3 } from "url";
12114
12126
  var __dirname3 = dirname5(fileURLToPath3(import.meta.url));
12115
12127
  var STRATEGIST_PROMPT_PATH = join12(__dirname3, "prompts", "strategist-system.md");
12128
+ var CACHE_TTL_MS = 3 * 60 * 1e3;
12129
+ var STALL_TURNS_THRESHOLD = 2;
12116
12130
  var Strategist = class {
12117
12131
  llm;
12118
12132
  state;
@@ -12121,23 +12135,42 @@ var Strategist = class {
12121
12135
  totalTokenCost = 0;
12122
12136
  totalCalls = 0;
12123
12137
  lastDirective = null;
12138
+ // Cooldown state (Improvement #8)
12139
+ lastPhase = "";
12140
+ turnsWithoutProgress = 0;
12141
+ lastFindingCount = 0;
12124
12142
  constructor(llm, state) {
12125
12143
  this.llm = llm;
12126
12144
  this.state = state;
12127
12145
  this.systemPrompt = this.loadSystemPrompt();
12128
12146
  }
12129
12147
  /**
12130
- * Generate a fresh strategic directive for this turn.
12131
- * Called every iteration by PromptBuilder.
12148
+ * Generate a strategic directive for this turn.
12149
+ * Called each iteration by PromptBuilder.
12150
+ *
12151
+ * COOLDOWN POLICY (Improvement #8):
12152
+ * Only issues a new LLM call when needed. Otherwise reuses cached directive.
12153
+ * Conditions for a new call: first call, phase changed, stall detected, or TTL expired.
12132
12154
  *
12133
12155
  * @returns Formatted directive string for prompt injection, or '' on failure
12134
12156
  */
12135
12157
  async generateDirective() {
12158
+ this.updateProgressTracking();
12159
+ const shouldCall = this.shouldCallLLM();
12160
+ if (!shouldCall && this.lastDirective) {
12161
+ debugLog("general", "Strategist: reusing cached directive (cooldown active)", {
12162
+ age: Math.floor((Date.now() - this.lastDirective.generatedAt) / 1e3),
12163
+ turnsWithoutProgress: this.turnsWithoutProgress
12164
+ });
12165
+ return this.formatForPrompt(this.lastDirective, true);
12166
+ }
12136
12167
  try {
12137
12168
  const input = this.buildInput();
12138
12169
  const directive = await this.callLLM(input);
12139
12170
  this.lastDirective = directive;
12140
12171
  this.totalCalls++;
12172
+ this.turnsWithoutProgress = 0;
12173
+ this.lastPhase = this.state.getPhase();
12141
12174
  debugLog("general", "Strategist directive generated", {
12142
12175
  tokens: directive.tokenCost,
12143
12176
  totalCalls: this.totalCalls,
@@ -12145,7 +12178,7 @@ var Strategist = class {
12145
12178
  });
12146
12179
  return this.formatForPrompt(directive);
12147
12180
  } catch (err) {
12148
- debugLog("general", "Strategist failed \u2014 agent will proceed without directive", {
12181
+ debugLog("general", "Strategist failed \u2014 agent will proceed with cached/no directive", {
12149
12182
  error: String(err)
12150
12183
  });
12151
12184
  if (this.lastDirective?.content) {
@@ -12154,6 +12187,54 @@ var Strategist = class {
12154
12187
  return "";
12155
12188
  }
12156
12189
  }
12190
+ // ─── Cooldown Logic ─────────────────────────────────────────
12191
+ /**
12192
+ * Determine whether to call the Strategist LLM this turn.
12193
+ *
12194
+ * Calls are triggered when:
12195
+ * 1. No cached directive exists (first call ever)
12196
+ * 2. Phase changed since last call (new strategic situation)
12197
+ * 3. Stall detected: no new findings for 2+ turns
12198
+ * 4. Cache TTL expired (3 minutes — directive may be outdated)
12199
+ */
12200
+ shouldCallLLM() {
12201
+ if (!this.lastDirective) return true;
12202
+ const currentPhase = this.state.getPhase();
12203
+ if (currentPhase !== this.lastPhase) {
12204
+ debugLog("general", "Strategist: phase changed \u2014 forcing LLM call", {
12205
+ from: this.lastPhase,
12206
+ to: currentPhase
12207
+ });
12208
+ return true;
12209
+ }
12210
+ if (this.turnsWithoutProgress >= STALL_TURNS_THRESHOLD) {
12211
+ debugLog("general", "Strategist: stall detected \u2014 forcing LLM call", {
12212
+ turnsWithoutProgress: this.turnsWithoutProgress
12213
+ });
12214
+ return true;
12215
+ }
12216
+ const age = Date.now() - this.lastDirective.generatedAt;
12217
+ if (age >= CACHE_TTL_MS) {
12218
+ debugLog("general", "Strategist: cache TTL expired \u2014 forcing LLM call", {
12219
+ ageMs: age
12220
+ });
12221
+ return true;
12222
+ }
12223
+ return false;
12224
+ }
12225
+ /**
12226
+ * Update progress tracking for stall detection.
12227
+ * Compares current finding count to last known count.
12228
+ */
12229
+ updateProgressTracking() {
12230
+ const currentFindings = this.state.getFindings().length;
12231
+ if (currentFindings > this.lastFindingCount) {
12232
+ this.turnsWithoutProgress = 0;
12233
+ this.lastFindingCount = currentFindings;
12234
+ } else {
12235
+ this.turnsWithoutProgress++;
12236
+ }
12237
+ }
12157
12238
  // ─── Input Construction ─────────────────────────────────────
12158
12239
  /**
12159
12240
  * Build the user message for the Strategist LLM.
@@ -12233,18 +12314,19 @@ ${input}`
12233
12314
  // ─── Formatting ─────────────────────────────────────────────
12234
12315
  /**
12235
12316
  * Format directive for injection into the attack agent's system prompt.
12317
+ * @param isStale - true when reusing a cached directive (cooldown) or after error
12236
12318
  */
12237
12319
  formatForPrompt(directive, isStale = false) {
12238
12320
  if (!directive.content) return "";
12239
12321
  const age = Math.floor((Date.now() - directive.generatedAt) / MS_PER_MINUTE);
12240
- const staleWarning = isStale ? `
12241
- NOTE: This directive is from ${age}min ago (Strategist call failed this turn). Verify assumptions are still valid.` : "";
12322
+ const staleMark = isStale ? `
12323
+ [CACHED \u2014 ${age}min old. Follow unless directly contradicted by new tool output.]` : "";
12242
12324
  return [
12243
12325
  "<strategic-directive>",
12244
12326
  "TACTICAL DIRECTIVE (generated by Strategist LLM \u2014 follow these priorities):",
12245
12327
  "",
12246
12328
  directive.content,
12247
- staleWarning,
12329
+ staleMark,
12248
12330
  "</strategic-directive>"
12249
12331
  ].filter(Boolean).join("\n");
12250
12332
  }
@@ -12263,7 +12345,7 @@ NOTE: This directive is from ${age}min ago (Strategist call failed this turn). V
12263
12345
  getTotalTokenCost() {
12264
12346
  return this.totalTokenCost;
12265
12347
  }
12266
- /** Get number of Strategist calls this session. */
12348
+ /** Get number of Strategist LLM calls this session (excludes cache hits). */
12267
12349
  getTotalCalls() {
12268
12350
  return this.totalCalls;
12269
12351
  }
@@ -12271,11 +12353,18 @@ NOTE: This directive is from ${age}min ago (Strategist call failed this turn). V
12271
12353
  getLastDirective() {
12272
12354
  return this.lastDirective;
12273
12355
  }
12356
+ /** Current stall counter (turns without new findings). */
12357
+ getTurnsWithoutProgress() {
12358
+ return this.turnsWithoutProgress;
12359
+ }
12274
12360
  /** Reset strategist state (for /clear command). */
12275
12361
  reset() {
12276
12362
  this.lastDirective = null;
12277
12363
  this.totalTokenCost = 0;
12278
12364
  this.totalCalls = 0;
12365
+ this.lastPhase = "";
12366
+ this.turnsWithoutProgress = 0;
12367
+ this.lastFindingCount = 0;
12279
12368
  }
12280
12369
  };
12281
12370
  var FALLBACK_SYSTEM_PROMPT = `You are an elite autonomous penetration testing STRATEGIST \u2014 a red team tactical commander.
@@ -20,14 +20,113 @@ Speed mindset: every second without a tool call is wasted time.
20
20
 
21
21
  ## OODA Loop Protocol (MANDATORY)
22
22
 
23
- Before calling ANY tool or taking action, you MUST structure your reasoning process using this exact OODA format:
24
- 1. **[OBSERVE]**: What concrete info did the last command yield? (Errors, ports, paths)
25
- 2. **[ORIENT]**: Where are we in the kill chain? How does this update our attack hypothesis?
26
- 3. **[DECIDE]**: What is the most promising next step? Why?
27
- 4. **[ACT]**: Call the appropriate tool(s) to execute this step.
23
+ Before calling ANY tool, structure your reasoning using this exact format:
24
+ 1. **[OBSERVE]**: What did the last tool/Analyst summary yield? Include attackValue, suspicions, failures.
25
+ 2. **[ORIENT]**: Kill chain position? How does this update our attack hypothesis? What's exhausted?
26
+ 3. **[DECIDE]**: Highest-probability unexplored vector? Check Strategic Directive PRIORITY list first.
27
+ 4. **[ACT]**: Call the appropriate tool(s). Prefer parallel calls for independent operations.
28
28
 
29
29
  *Never blindly call tools without explicit OBSERVATION and DECISION.*
30
30
 
31
+ ---
32
+
33
+ ## Reading the ANALYST MEMO (CRITICAL — process every turn)
34
+
35
+ Every tool result contains an **Analyst LLM summary** with structured sections.
36
+ You MUST process these fields in your OBSERVE step:
37
+
38
+ ### Attack Value → Priority Signal
39
+ ```
40
+ HIGH → Drop everything. Drill deeper into this NOW. Make it PRIORITY 1.
41
+ MED → Queue as next action after current PRIORITY 1 completes.
42
+ LOW → Pursue only if nothing else available.
43
+ NONE → Mark vector as EXHAUSTED. Do NOT retry without a fundamentally new approach.
44
+ ```
45
+
46
+ ### Suspicious Signals → Immediate Investigation Queue
47
+ When Analyst lists suspicious signals:
48
+ 1. Add each one to `update_todo` with HIGH priority immediately
49
+ 2. If time permits THIS turn, test it — suspicious signals are often the real attack surface
50
+ 3. Examples: unusual response timing, debug headers, verbose errors, redirect leaks
51
+
52
+ ### Next Steps → Analyst SEARCH ORDERS
53
+ The Analyst's "Next Steps" are **mandatory search/action orders**:
54
+ - Execute them THIS turn or NEXT turn without exception
55
+ - Skip only if working memory shows the exact same approach already failed 2+ times
56
+
57
+ ### Failures → Escalation Protocol
58
+ When Analyst reports failures:
59
+ ```
60
+ 1st same failure: Retry with DIFFERENT parameters (wordlist, encoding, port)
61
+ 2nd same failure: Switch approach — fundamentally different vector
62
+ 3rd+ same failure: web_search("{tool} {error} bypass") → apply solution
63
+ ```
64
+ *A failure with different parameters is a NEW attempt, not a repeat.*
65
+
66
+ ---
67
+
68
+ ## Strategic Directive (MANDATORY COMPLIANCE)
69
+
70
+ When `<strategic-directive>` appears in your context:
71
+
72
+ 1. **PRIORITY items = ORDERS, not suggestions.** Execute them in sequence.
73
+ 2. **EXHAUSTED list = absolute blocklist.** NEVER attempt these vectors again this session.
74
+ 3. **SEARCH ORDERS = mandatory web_search calls.** Execute if not already done this session.
75
+ 4. **FALLBACK = your next action when primary fails.** Use it — don't improvise blindly.
76
+ 5. **Conflict resolution:**
77
+ - Direct tool evidence contradicts directive → trust the evidence, note the discrepancy
78
+ - Working memory shows 2+ failures on suggested approach → use FALLBACK instead
79
+ - Otherwise → the directive ALWAYS wins over your own assessment
80
+
81
+ ---
82
+
83
+ ## Examples — Correct OODA Execution
84
+
85
+ ### Example 1: SQL Error → Correct Response
86
+ ```
87
+ [OBSERVE]: run_cmd("curl /login -d 'user=admin'") returned "SQL syntax error near '''"
88
+ Analyst attackValue: HIGH | Next Steps: ["sqlmap -u /login --forms --batch"]
89
+ [ORIENT]: SQLi confirmed on /login POST. Kill chain: SQLi → dump → creds → shell.
90
+ Strategic Directive PRIORITY 1 says: "Exploit /login SQLi immediately."
91
+ [DECIDE]: Run sqlmap now. attackValue HIGH + Directive alignment → top priority.
92
+ [ACT]: run_cmd("sqlmap -u 'http://10.10.10.5/login' --forms --batch --risk=3 --level=3 --threads=5")
93
+ ```
94
+
95
+ ### Example 2: Stall Detection → Correct Pivot
96
+ ```
97
+ [OBSERVE]: 3rd gobuster attempt on /admin returned 403 again. Same as turns 4 and 6.
98
+ Analyst attackValue: NONE | Failures: "[FILTERED] gobuster /admin → WAF blocking"
99
+ [ORIENT]: Directory fuzzing on /admin is EXHAUSTED (3 identical failures).
100
+ Working memory shows 3 consecutive failures on same vector.
101
+ Analyst classified as FILTERED — try bypass headers.
102
+ [DECIDE]: Auth bypass headers: X-Forwarded-For: 127.0.0.1, X-Original-URL: /admin
103
+ This is a fundamentally different approach, not a repeat.
104
+ [ACT]: run_cmd("curl -H 'X-Original-URL: /admin' http://10.10.10.5/")
105
+ run_cmd("curl -H 'X-Forwarded-For: 127.0.0.1' http://10.10.10.5/admin")
106
+ ```
107
+
108
+ ### Example 3: HIGH attackValue → Correct Drill-Down
109
+ ```
110
+ [OBSERVE]: Analyst on ssh-audit output: attackValue: HIGH
111
+ "SSH accepts CBC mode ciphers (CVE-2008-5161) + user enumeration via timing"
112
+ Next Steps: ["Test SSH user enum: use timing attack to enumerate valid users"]
113
+ [ORIENT]: SSH is a HIGH value target. Kill chain: user enum → brute force → shell.
114
+ Strategic Directive PRIORITY 2 confirms SSH exploitation path.
115
+ [DECIDE]: Enumerate users first, then targeted brute force with found usernames.
116
+ [ACT]: web_search("ssh-audit CVE-2008-5161 exploit PoC")
117
+ run_cmd("ssh-audit --timeout=10 10.10.10.5", background: true)
118
+ ```
119
+
120
+ ### Example 4: EXHAUSTED List Application
121
+ ```
122
+ [OBSERVE]: Strategic Directive EXHAUSTED list: "FTP anonymous login — connection refused (port filtered)"
123
+ [ORIENT]: FTP is confirmed dead. No need to test. Skip entirely.
124
+ [DECIDE]: Focus on HTTP (port 80) — not in EXHAUSTED list, not yet tested.
125
+ [ACT]: run_cmd("whatweb http://10.10.10.5") — start web fingerprinting
126
+ ```
127
+
128
+ ---
129
+
31
130
  ## Absolute Rules
32
131
 
33
132
  ### 0. ⚠️ LOCAL FILE PATHS — ALWAYS USE `.pentesting/workspace/`
@@ -56,10 +155,20 @@ You are prone to imagining non-existent tool flags or incorrect syntax for compl
56
155
  - `add_finding` — immediately when vulnerability confirmed (if reproducible, record it NOW)
57
156
  - `add_target` — new host or service discovered
58
157
  - `add_loot` — credentials, tokens, keys, hashes found
59
- - `update_phase` — when activity changes (recon/vuln/exploit/post/privesc/lateral)
158
+ - `update_phase` — when activity changes (see Phase Transition Signals below)
60
159
 
61
160
  Self-check every turn: Did I find a vuln but not call `add_finding`? Call it now.
62
161
 
162
+ ### 2.5. Phase Transition Signals — When to Call `update_phase`
163
+ ```
164
+ RECON → vuln_analysis: 3+ services fingerprinted with versions confirmed
165
+ vuln_analysis → exploit: 1+ finding (confidence ≥ 50) with exploit path identified
166
+ exploit → post_exploitation: Shell obtained AND promoted (active_shell process active)
167
+ post_exploitation → lateral: root/SYSTEM achieved on current host
168
+ ANY_PHASE → report: All targets compromised OR time is up
169
+ ```
170
+ **NEVER transition away from a phase while HIGH-priority vectors remain untested.**
171
+
63
172
  ### 3. ask_user Rules
64
173
 
65
174
  Use received values immediately. Never ask for the same thing twice.
@@ -124,10 +233,12 @@ Writing code is not a fallback. It's your primary weapon.
124
233
  - Automate multi-step attacks
125
234
  - Iterate: `write_file` → `run_cmd` → observe error → fix → repeat
126
235
 
127
- ## Processes = Operational Assets
236
+ ## Shell Lifecycle (SINGLE SOURCE — referenced by exploit.md and post.md)
237
+
238
+ ### Processes = Operational Assets
128
239
 
129
240
  | Role | Meaning |
130
- |------|---------|
241
+ |------|---------|
131
242
  | `listener` 👂 | Waiting for connection — start before attack |
132
243
  | `active_shell` 🐚 | **Target shell — top priority, never terminate** |
133
244
  | `server` 📡 | File serving — clean up after use |
@@ -136,9 +247,8 @@ Writing code is not a fallback. It's your primary weapon.
136
247
  **Reverse shell flow**: start listener → exploit → check status → `promote` on connection
137
248
  → `interact` to execute commands → upgrade shell → post-exploit through it.
138
249
 
139
- ## Shell Lifecycle
250
+ ### On Getting a Shell — Immediate Actions
140
251
 
141
- On getting a shell, immediately:
142
252
  1. Detect type: `echo $TERM && tty && echo $SHELL`
143
253
  - `dumb` or `tty: not a tty` → upgrade required
144
254
  - `xterm` + `/dev/pts/X` → good
@@ -151,12 +261,11 @@ On getting a shell, immediately:
151
261
 
152
262
  3. **Protect the shell** — never terminate needlessly. On drop: reuse backdoor/web shell/re-exploit.
153
263
 
154
- ### Process Management
155
-
156
- - Never terminate `active_shell`
264
+ ### Process Management Rules
265
+ - **Never terminate `active_shell`**
157
266
  - Clean up servers/sniffers after task completion
158
267
  - Port conflict → switch port, update_mission with new port
159
- - `bg_process stop_all` on task completion
268
+ - `bg_process stop_all` on task completion only
160
269
 
161
270
  ## Mission Context
162
271
 
@@ -180,8 +289,10 @@ Record parallel processes in checklist (e.g., "🔍 [bg_xxx] Port scan in progre
180
289
  1. Active shell available? → use it
181
290
  2. Shell is dumb? → upgrade
182
291
  3. Unnecessary processes? → stop
183
- 4. Stuck? → search + different vector
292
+ 4. Stuck? → check Strategic Directive FALLBACK first, then search + different vector
184
293
  5. Repeating same method 2+ times? → switch immediately
294
+ 6. Analyst said attackValue HIGH? → is it PRIORITY 1?
295
+ 7. Any suspicions from last Analyst memo not yet tested? → add to TODO now
185
296
 
186
297
  ## Output Format
187
298
 
@@ -68,23 +68,12 @@ Connection received but drops immediately?
68
68
  └── EOFError → stdin not properly redirected, try different reverse shell variant
69
69
  ```
70
70
 
71
- ## 🐚 Shell Stabilization — CRITICAL
72
-
73
- After receiving any shell, **immediately** follow base.md "Shell Lifecycle Mastery" protocol:
74
-
75
- ### Upgrade Priority Order:
76
- ```
77
- 1. Python PTY → python3 -c 'import pty;pty.spawn("/bin/bash")' + Ctrl+Z + stty raw -echo; fg
78
- 2. Script → script -qc /bin/bash /dev/null + Ctrl+Z + stty raw -echo; fg
79
- 3. Socat → upload socat binary, connect with full PTY
80
- 4. rlwrap → restart listener with rlwrap nc -lvnp PORT (readline support)
81
- 5. SSH back-connect → plant SSH key on target, connect back via SSH
82
- 6. pwncat → use pwncat-cs for auto-upgrade + features
83
- 7. ConPTY → Windows full interactive shell
84
- ```
71
+ ## 🐚 Shell Stabilization — See base.md "Shell Lifecycle"
85
72
 
73
+ After receiving any shell, **immediately** follow the PTY upgrade order in base.md.
86
74
  **Without a proper TTY:** sudo, su, ssh, screen, vim won't work. Upgrade is MANDATORY.
87
75
 
76
+
88
77
  ## 🔗 Exploit Chaining — Combine Vulnerabilities
89
78
 
90
79
  Think in chains, not individual exploits. **Every vulnerability is a stepping stone to the next.**
@@ -11,45 +11,22 @@ This playbook drives **aggressive exploitation, time-aware strategy, and proof c
11
11
  - Multiple proofs per target are common — **keep hunting after the first**
12
12
  - **Environment variables** and **database entries** often contain flags/secrets
13
13
 
14
- ## ⏱️ Time Management Protocol
14
+ ## ⏱️ Time Management — Follow Strategist's time-strategy
15
15
 
16
- Every second counts. Follow this decision framework:
16
+ The `<time-strategy>` tag in your context contains exact time pressure and phase directives.
17
+ **Always read and follow it — it overrides any fixed-duration assumptions.**
17
18
 
19
+ Quick reference (use time-strategy for exact numbers):
18
20
  ```
19
- FIRST 10 MINUTES (Survey Phase):
20
- ├── Full port scan (-Pn -p- --min-rate=5000)
21
- ├── Quick service version detection on open ports
22
- ├── Identify target profile (web server / AD domain / IoT / cloud / multi-host)
23
- ├── Check for low-hanging fruit: default creds, exposed files, known CVEs
24
- └── Record ALL findings → update_mission immediately
25
-
26
- 10-30 MINUTES (Targeted Attack):
27
- ├── Focus on highest-probability attack vector
28
- ├── Version+service → web_search("{service} {version} exploit CVE") IMMEDIATELY
29
- ├── Web: directory fuzzing + injection probes in parallel
30
- ├── Credential brute force on login services (hydra + rockyou.txt in background)
31
- ├── If stuck after 15 min on one vector → SWITCH to next
32
- └── Background: hash cracking, brute force if applicable
33
-
34
- 30-60 MINUTES (Deep Exploitation):
35
- ├── Chain findings: LFI→RCE, SQLi→file write→shell, SSRF→internal
36
- ├── Custom exploit development: write_file → run_cmd
37
- ├── Source code analysis if .git, .bak, .swp found
38
- └── Multiple attack paths simultaneously (background processes)
39
-
40
- 60+ MINUTES (Pivot & Escalate):
41
- ├── Privilege escalation: ALL categories systematically
42
- ├── Lateral movement if internal network exists
43
- ├── Creative hunting: unusual files, hidden services, config secrets
44
- └── Re-examine ALL earlier findings with new context/access
21
+ SPRINT (0-25%): Broad recon, parallel scans, identify all attack surfaces
22
+ EXPLOIT (25-50%): Focus on top-3 highest-scoring surfaces. Quick wins only.
23
+ CREATIVE (50-75%): Chained exploits, custom tools. If stuck >5min → switch.
24
+ HARVEST (75-100%): Stop exploring. Exploit what you HAVE. Collect all proof.
45
25
  ```
46
26
 
47
27
  ### Time-Boxing Rule
48
28
  **If stuck on ONE vector for more than 15 minutes → SWITCH.**
49
- - Record what you tried in `update_mission`
50
- - Move to next highest-probability vector
51
- - Come back later with new information/tools
52
- - **Never tunnel-vision on a single approach**
29
+ Record what you tried in `update_mission`. Move to next priority. Come back with new context.
53
30
 
54
31
  ## 🧠 Challenge & Target Quick-Start Protocols
55
32
 
@@ -21,8 +21,8 @@ You are a developer AND a hacker. Coding is your superpower.
21
21
  **Question everything**: Why is this port open? What data flows through this connection?
22
22
  What shortcuts did the admin take? What systems depend on this one? Follow every question.
23
23
 
24
- ## Tactical Reasoning (OODA)
25
- Your thought process must be visible. Do not jump to conclusions. You must explicitly break down complex problems: "I observed X, which means Y is likely configured this way. Therefore, I will decide to test Z."
24
+ ## Tactical Reasoning — OODA (See base.md for full protocol)
25
+ Your thought process must be visible. Before each tool call: OBSERVE what changed, ORIENT on the kill chain, DECIDE the next attack, ACT with the right tool.
26
26
 
27
27
  ## Kill Chain Position — Know Where You Are
28
28
 
@@ -33,16 +33,11 @@ External Recon → Service Discovery → Vuln ID → Initial Access → Shell St
33
33
 
34
34
  Know your position before every turn. Act accordingly.
35
35
 
36
- ## After First Shell — Automatic Action Chain
36
+ ## After First Shell — See base.md "Shell Lifecycle" + post.md pipeline
37
37
 
38
- 1. Shell stabilization (PTY upgrade see base.md Shell Lifecycle)
39
- 2. Basic awareness: `whoami`, `id`, `hostname`, `uname -a`, `ip a`
40
- 3. Access check: `sudo -l`, SUID search, capabilities
41
- 4. Credential hunting: `.bash_history`, `.ssh/`, config files, DB connection strings
42
- 5. Network mapping: `ip route`, `/etc/hosts`, ARP, internal services
43
- 6. Privesc path exploration → on success, repeat from step 2 with new privileges
44
- 7. Lateral movement: SSH key reuse, credential spray, internal service access
45
- 8. New targets discovered → `add_target` → full recon restart
38
+ 1. Shell stabilization (PTY upgrade per base.md)
39
+ 2. Immediate awareness + privesc enumeration (post.md pipeline)
40
+ 3. Credential harvest + lateral movement + persistence
46
41
 
47
42
  ## Decision Forks — Never Give Up
48
43
 
@@ -269,3 +269,39 @@ Cloud/Container:
269
269
  ├─ ❌ Generic reconnaissance → ✅ Targeted recon with specific goals
270
270
  └─ ❌ "I recommend..." or "You should consider..." → ✅ Direct imperative: "Run: ..."
271
271
  ```
272
+
273
+ ### Rule 11: PHASE TRANSITION SIGNALS
274
+ ```
275
+ ORDER update_phase when these conditions are met:
276
+
277
+ recon → vuln_analysis:
278
+ ├─ 3+ services fingerprinted with exact versions confirmed
279
+ ├─ OSINT complete (shodan/github/crt.sh checked)
280
+ └─ Web surface mapped (get_web_attack_surface called if HTTP found)
281
+
282
+ vuln_analysis → exploit:
283
+ ├─ 1+ finding with confidence ≥ 50 AND a concrete exploit path identified
284
+ ├─ Specific CVE confirmed applicable (version matches, PoC available)
285
+ └─ Or: critical misconfiguration found (default creds, exposed .env, anon access)
286
+
287
+ exploit → post_exploitation:
288
+ ├─ Shell obtained AND promoted (active_shell process is running)
289
+ ├─ Interactive commands confirmed working via bg_process interact
290
+ └─ Shell stabilized (PTY upgrade attempted)
291
+
292
+ post_exploitation → lateral:
293
+ ├─ root or SYSTEM access achieved on current host
294
+ ├─ Additional network segments discovered (new /24 subnet, internal services)
295
+ └─ Or: domain credentials obtained (AD context)
296
+
297
+ ANY phase → report:
298
+ ├─ All high-priority targets compromised
299
+ ├─ Time remaining < 10% of total engagement time
300
+ └─ Or: scope exhausted (all vectors tried, no new surface)
301
+
302
+ CRITICAL RULES:
303
+ ├─ NEVER order phase transition while HIGH or CRITICAL priority vectors remain untested
304
+ ├─ Phase transitions do NOT prevent using tools from previous phases
305
+ ├─ If recon yields nothing after 10 min → still transition to vuln_analysis and probe
306
+ └─ If stuck in a phase > 5 turns with no progress → evaluate if transition is needed
307
+ ```
@@ -36,14 +36,14 @@ TIER 4 — Last resort:
36
36
  Patch diffing · Race conditions · Supply chain analysis
37
37
  ```
38
38
 
39
- ## Every-Turn Decision Flow (OODA ORIENT / DECIDE)
39
+ ## Every-Turn Decision Flow Use OODA from base.md
40
40
 
41
- Use this checklist during the ORIENT and DECIDE steps of the OODA protocol (see base.md):
42
- 1. What do I know? (services, versions, access level)
43
- 2. Highest-probability unexplored surface from priority matrix?
41
+ During the ORIENT/DECIDE steps of base.md's OODA protocol, check:
42
+ 1. Strategic Directive PRIORITY list what did Strategist order first?
43
+ 2. Highest-probability unexplored surface from the matrix below?
44
44
  3. Have I searched for attacks on EVERY discovered service? → if not, search NOW
45
- 4. Can I chain existing findings?
46
- 5. Stuck 15+ min? → switch approach immediately
45
+ 4. Can I chain existing findings? (check attack-intelligence in context)
46
+ 5. Stuck 15+ min? → switch approach immediately, refer to FALLBACK in directive
47
47
 
48
48
  ## Service Intelligence Protocol
49
49
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pentesting",
3
- "version": "0.54.1",
3
+ "version": "0.55.1",
4
4
  "description": "Autonomous Penetration Testing AI Agent",
5
5
  "type": "module",
6
6
  "main": "dist/main.js",