pentesting 0.54.1 → 0.55.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.js +262 -173
- package/dist/prompts/base.md +126 -15
- package/dist/prompts/exploit.md +3 -14
- package/dist/prompts/offensive-playbook.md +9 -32
- package/dist/prompts/orchestrator.md +6 -11
- package/dist/prompts/strategist-system.md +36 -0
- package/dist/prompts/strategy.md +6 -6
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -342,7 +342,7 @@ var ORPHAN_PROCESS_NAMES = [
|
|
|
342
342
|
|
|
343
343
|
// src/shared/constants/agent.ts
|
|
344
344
|
var APP_NAME = "Pentest AI";
|
|
345
|
-
var APP_VERSION = "0.
|
|
345
|
+
var APP_VERSION = "0.55.1";
|
|
346
346
|
var APP_DESCRIPTION = "Autonomous Penetration Testing AI Agent";
|
|
347
347
|
var LLM_ROLES = {
|
|
348
348
|
SYSTEM: "system",
|
|
@@ -4940,19 +4940,145 @@ var CHALLENGE_TYPE_SIGNALS = {
|
|
|
4940
4940
|
]
|
|
4941
4941
|
};
|
|
4942
4942
|
|
|
4943
|
+
// src/shared/constants/prompts.ts
|
|
4944
|
+
var PROMPT_PATHS = {
|
|
4945
|
+
BASE: "base.md",
|
|
4946
|
+
OFFENSIVE_PLAYBOOK: "offensive-playbook.md",
|
|
4947
|
+
AGENT_FILES: {
|
|
4948
|
+
ORCHESTRATOR: "orchestrator.md",
|
|
4949
|
+
RECON: "recon.md",
|
|
4950
|
+
VULN: "vuln.md",
|
|
4951
|
+
WEB: "web.md",
|
|
4952
|
+
EXPLOIT: "exploit.md",
|
|
4953
|
+
POST: "post.md",
|
|
4954
|
+
REPORT: "report.md",
|
|
4955
|
+
INFRA: "infra.md",
|
|
4956
|
+
// Strategic methodology (always loaded as reference)
|
|
4957
|
+
STRATEGY: "strategy.md",
|
|
4958
|
+
EVASION: "evasion.md",
|
|
4959
|
+
ZERO_DAY: "zero-day.md",
|
|
4960
|
+
PAYLOAD_CRAFT: "payload-craft.md"
|
|
4961
|
+
},
|
|
4962
|
+
/** Technique reference library directory */
|
|
4963
|
+
TECHNIQUES_DIR: "techniques"
|
|
4964
|
+
};
|
|
4965
|
+
var TECHNIQUE_FILES = {
|
|
4966
|
+
INJECTION: "injection",
|
|
4967
|
+
PRIVESC: "privesc",
|
|
4968
|
+
LATERAL: "lateral",
|
|
4969
|
+
AD_ATTACK: "ad-attack",
|
|
4970
|
+
AUTH_ACCESS: "auth-access",
|
|
4971
|
+
CONTAINER_ESCAPE: "container-escape",
|
|
4972
|
+
SANDBOX_ESCAPE: "sandbox-escape",
|
|
4973
|
+
FILE_ATTACKS: "file-attacks",
|
|
4974
|
+
NETWORK_SVC: "network-svc",
|
|
4975
|
+
CRYPTO: "crypto",
|
|
4976
|
+
REVERSING: "reversing",
|
|
4977
|
+
FORENSICS: "forensics",
|
|
4978
|
+
PWN: "pwn",
|
|
4979
|
+
SHELLS: "shells"
|
|
4980
|
+
};
|
|
4981
|
+
var PROMPT_XML = {
|
|
4982
|
+
PHASE: (phase, content) => `<phase-instructions phase="${phase}">
|
|
4983
|
+
${content}
|
|
4984
|
+
</phase-instructions>`,
|
|
4985
|
+
SCOPE: (allowed, domains, exclude, flags) => `<scope type="ABSOLUTE_CONSTRAINT">
|
|
4986
|
+
Authorized CIDR: ${allowed}
|
|
4987
|
+
Authorized Domains: ${domains}
|
|
4988
|
+
Exclusions: ${exclude}
|
|
4989
|
+
Constraints: ${flags}
|
|
4990
|
+
</scope>`,
|
|
4991
|
+
STATE: (content) => `<current-state>
|
|
4992
|
+
${content}
|
|
4993
|
+
</current-state>`,
|
|
4994
|
+
TODO: (content) => `<todo>
|
|
4995
|
+
${content}
|
|
4996
|
+
</todo>`
|
|
4997
|
+
};
|
|
4998
|
+
var PROMPT_DEFAULTS = {
|
|
4999
|
+
NO_SCOPE: "<scope>NO SCOPE DEFINED. STOP.</scope>",
|
|
5000
|
+
EMPTY_TODO: "Create initial plan",
|
|
5001
|
+
USER_CONTEXT: (context) => `
|
|
5002
|
+
<user-input>
|
|
5003
|
+
"${context}"
|
|
5004
|
+
</user-input>
|
|
5005
|
+
|
|
5006
|
+
<intent-rules>
|
|
5007
|
+
ANALYZE the user's intent before acting. Classify into ONE:
|
|
5008
|
+
ABORT \u2192 stop current work, confirm with \`ask_user\`
|
|
5009
|
+
CORRECTION \u2192 adjust approach, continue
|
|
5010
|
+
INFORMATION \u2192 store and USE immediately (credentials, paths, hints)
|
|
5011
|
+
COMMAND \u2192 execute EXACTLY what was asked, nothing more
|
|
5012
|
+
TARGET_CHANGE \u2192 \`add_target\`, then begin testing
|
|
5013
|
+
GUIDANCE \u2192 acknowledge via \`ask_user\`, adjust strategy, continue
|
|
5014
|
+
STATUS_QUERY \u2192 report via \`ask_user\`, then RESUME previous work
|
|
5015
|
+
CONVERSATION \u2192 respond via \`ask_user\`, do NOT scan or attack
|
|
5016
|
+
|
|
5017
|
+
RULES:
|
|
5018
|
+
- No target set and none provided \u2192 \`ask_user\` to request target.
|
|
5019
|
+
- Conversation or greeting \u2192 respond conversationally, do NOT attack.
|
|
5020
|
+
- Uncertain intent \u2192 ask for clarification with \`ask_user\`.
|
|
5021
|
+
- This is a collaborative tool. The user is your partner.
|
|
5022
|
+
</intent-rules>`
|
|
5023
|
+
};
|
|
5024
|
+
var PROMPT_CONFIG = {
|
|
5025
|
+
ENCODING: "utf-8"
|
|
5026
|
+
};
|
|
5027
|
+
var INITIAL_TASKS = {
|
|
5028
|
+
RECON: "Initial reconnaissance and target discovery"
|
|
5029
|
+
};
|
|
5030
|
+
var CONTEXT_EXTRACTOR_PROMPT = `You are extracting actionable intelligence from a penetration testing session.
|
|
5031
|
+
DO NOT simply summarize or shorten. EXTRACT critical facts:
|
|
5032
|
+
|
|
5033
|
+
1. DISCOVERED: Services, versions, paths, parameters (exact IPs, ports, versions)
|
|
5034
|
+
2. CONFIRMED: Vulnerabilities or access confirmed
|
|
5035
|
+
3. CREDENTIALS: Usernames, passwords, tokens, keys
|
|
5036
|
+
4. DEAD ENDS: What failed \u2014 include EXACT command, tool, arguments, wordlist/file used.
|
|
5037
|
+
Distinguish between:
|
|
5038
|
+
- "This approach itself is impossible" (e.g., SSH key-only \u2192 no password brute force works)
|
|
5039
|
+
- "This specific attempt failed" (e.g., sqlmap with default tamper \u2192 try different tamper)
|
|
5040
|
+
5. OPEN LEADS: Unexplored paths worth pursuing
|
|
5041
|
+
|
|
5042
|
+
Every line must include exact commands/tools/files used.
|
|
5043
|
+
The reader must be able to judge whether a retry with different parameters is worthwhile.`;
|
|
5044
|
+
var REFLECTION_PROMPT = `You are a tactical reviewer for a penetration testing agent.
|
|
5045
|
+
Review ALL actions from this turn \u2014 successes AND failures.
|
|
5046
|
+
|
|
5047
|
+
1. ASSESSMENT: What did this turn accomplish? Rate: HIGH / MED / LOW / NONE.
|
|
5048
|
+
2. SUCCESSES: What worked? Can this pattern be replicated elsewhere?
|
|
5049
|
+
3. FAILURES: What failed? Is this a repeated pattern? If so \u2192 STOP this approach.
|
|
5050
|
+
4. BLIND SPOTS: What was missed or overlooked?
|
|
5051
|
+
5. NEXT PRIORITY: Single most valuable next action.
|
|
5052
|
+
|
|
5053
|
+
3-5 lines. Every word must be actionable.`;
|
|
5054
|
+
var SUMMARY_REGENERATOR_PROMPT = `Update this penetration testing session summary with the new turn data.
|
|
5055
|
+
|
|
5056
|
+
Must include:
|
|
5057
|
+
- All discovered hosts, services, versions (exact IPs, ports, software versions)
|
|
5058
|
+
- All confirmed vulnerabilities
|
|
5059
|
+
- All obtained credentials
|
|
5060
|
+
- Failed attempts with EXACT commands/tools/arguments/files used.
|
|
5061
|
+
For each failure, state:
|
|
5062
|
+
- The root cause (auth method? WAF? patched? wrong params?)
|
|
5063
|
+
- Whether retrying with different parameters could work
|
|
5064
|
+
- Top unexplored leads
|
|
5065
|
+
|
|
5066
|
+
Remove outdated/superseded info. Keep concise but COMPLETE.
|
|
5067
|
+
The reader must be able to decide what to retry and what to never attempt again.`;
|
|
5068
|
+
|
|
4943
5069
|
// src/shared/utils/auto-prompter.ts
|
|
4944
5070
|
var SECONDARY_TYPE_RATIO = 0.5;
|
|
4945
5071
|
var MIN_CHALLENGE_CONFIDENCE = 0.2;
|
|
4946
5072
|
var MIN_RECON_OUTPUT_LENGTH = 100;
|
|
4947
5073
|
var TYPE_TECHNIQUE_MAP = {
|
|
4948
|
-
web: [
|
|
4949
|
-
pwn: [
|
|
4950
|
-
crypto: [
|
|
4951
|
-
forensics: [
|
|
4952
|
-
reversing: [
|
|
4953
|
-
misc: [
|
|
4954
|
-
network: [
|
|
4955
|
-
unknown: [
|
|
5074
|
+
web: [TECHNIQUE_FILES.INJECTION, TECHNIQUE_FILES.AUTH_ACCESS, TECHNIQUE_FILES.FILE_ATTACKS, TECHNIQUE_FILES.CRYPTO],
|
|
5075
|
+
pwn: [TECHNIQUE_FILES.PWN, TECHNIQUE_FILES.SHELLS, TECHNIQUE_FILES.REVERSING],
|
|
5076
|
+
crypto: [TECHNIQUE_FILES.CRYPTO],
|
|
5077
|
+
forensics: [TECHNIQUE_FILES.FORENSICS, TECHNIQUE_FILES.REVERSING, TECHNIQUE_FILES.CRYPTO],
|
|
5078
|
+
reversing: [TECHNIQUE_FILES.REVERSING, TECHNIQUE_FILES.PWN],
|
|
5079
|
+
misc: [TECHNIQUE_FILES.SANDBOX_ESCAPE, TECHNIQUE_FILES.CRYPTO, TECHNIQUE_FILES.FORENSICS],
|
|
5080
|
+
network: [TECHNIQUE_FILES.NETWORK_SVC, TECHNIQUE_FILES.SHELLS, TECHNIQUE_FILES.LATERAL, TECHNIQUE_FILES.AD_ATTACK],
|
|
5081
|
+
unknown: [TECHNIQUE_FILES.NETWORK_SVC, TECHNIQUE_FILES.INJECTION, TECHNIQUE_FILES.SHELLS, TECHNIQUE_FILES.FILE_ATTACKS]
|
|
4956
5082
|
};
|
|
4957
5083
|
var TYPE_PHASE_PROMPT_MAP = {
|
|
4958
5084
|
web: "web.md",
|
|
@@ -10475,25 +10601,36 @@ var ANALYST_SYSTEM_PROMPT = `You are an independent pentesting output analyst. Y
|
|
|
10475
10601
|
|
|
10476
10602
|
FORMAT YOUR RESPONSE EXACTLY LIKE THIS:
|
|
10477
10603
|
|
|
10478
|
-
##
|
|
10604
|
+
## ${MEMO_SECTIONS.KEY_FINDINGS}
|
|
10479
10605
|
- [finding 1 with exact values: ports, versions, paths]
|
|
10480
10606
|
- [finding 2]
|
|
10481
10607
|
|
|
10482
|
-
##
|
|
10608
|
+
## ${MEMO_SECTIONS.CREDENTIALS}
|
|
10483
10609
|
- [any discovered credentials, hashes, tokens, keys, certificates]
|
|
10484
10610
|
- (write "None found" if none)
|
|
10485
10611
|
|
|
10486
|
-
##
|
|
10612
|
+
## ${MEMO_SECTIONS.ATTACK_VECTORS}
|
|
10487
10613
|
- [exploitable services, vulnerabilities, misconfigurations, CVEs]
|
|
10488
10614
|
- (write "None identified" if none)
|
|
10489
10615
|
|
|
10490
|
-
##
|
|
10491
|
-
|
|
10492
|
-
|
|
10493
|
-
|
|
10616
|
+
## ${MEMO_SECTIONS.FAILURES}
|
|
10617
|
+
Classify EVERY failure using one of these types. Format: [TYPE] exact_command \u2192 why_failed \u2192 recommended_pivot
|
|
10618
|
+
|
|
10619
|
+
Failure types:
|
|
10620
|
+
- [FILTERED]: WAF/IDS/firewall blocked \u2192 suggest: encoding bypass, payload_mutate, different protocol/port
|
|
10621
|
+
- [WRONG_VECTOR]: Vulnerability not present here \u2192 suggest: pivot to different vuln class entirely
|
|
10622
|
+
- [AUTH_REQUIRED]: Credential or session needed first \u2192 suggest: brute force login or find creds in config files
|
|
10623
|
+
- [TOOL_ERROR]: Command syntax error, missing dep, or tool bug \u2192 suggest: run --help, use alternative tool
|
|
10624
|
+
- [TIMEOUT]: Service too slow or connection timed out \u2192 suggest: increase timeout, reduce scope, or use background mode
|
|
10625
|
+
- [PATCHED]: CVE/technique exists but target is patched \u2192 suggest: search bypass or newer CVE on same service
|
|
10626
|
+
|
|
10627
|
+
Examples:
|
|
10628
|
+
- "[FILTERED] sqlmap -u /login --tamper=space2comment \u2192 ModSecurity WAF, blocking all payloads \u2192 try charencode,randomcase tampers or payload_mutate"
|
|
10629
|
+
- "[AUTH_REQUIRED] curl http://target/admin \u2192 HTTP 401 Basic Auth \u2192 hydra -l admin -P rockyou.txt http-get://target/admin"
|
|
10630
|
+
- "[TIMEOUT] nmap -sV -p- target --min-rate=5000 \u2192 timed out 5min \u2192 rustscan first, then targeted nmap on found ports"
|
|
10494
10631
|
- (write "No failures" if everything succeeded)
|
|
10495
10632
|
|
|
10496
|
-
##
|
|
10633
|
+
## ${MEMO_SECTIONS.SUSPICIONS}
|
|
10497
10634
|
- [anomalies that are NOT confirmed vulnerabilities but suggest exploitable surface]
|
|
10498
10635
|
- [e.g.: "Response time 3x slower on /admin path \u2014 possible auth check or backend processing"]
|
|
10499
10636
|
- [e.g.: "X-Debug-Token header present \u2014 debug mode may be enabled"]
|
|
@@ -10501,16 +10638,16 @@ FORMAT YOUR RESPONSE EXACTLY LIKE THIS:
|
|
|
10501
10638
|
- [e.g.: "Unexpected 302 redirect with session param leaked in URL"]
|
|
10502
10639
|
- (write "No suspicious signals" if nothing anomalous)
|
|
10503
10640
|
|
|
10504
|
-
##
|
|
10641
|
+
## ${MEMO_SECTIONS.ATTACK_VALUE}
|
|
10505
10642
|
- [ONE word: HIGH / MED / LOW / NONE]
|
|
10506
10643
|
- Reasoning: [1 sentence why \u2014 what makes this worth pursuing or abandoning]
|
|
10507
10644
|
|
|
10508
|
-
##
|
|
10645
|
+
## ${MEMO_SECTIONS.NEXT_STEPS}
|
|
10509
10646
|
- [recommended immediate actions based on findings]
|
|
10510
10647
|
|
|
10511
10648
|
RULES:
|
|
10512
10649
|
- Include EXACT values: port numbers, versions, usernames, file paths, IPs, full commands used
|
|
10513
|
-
- For failures:
|
|
10650
|
+
- For failures: ALWAYS classify with [TYPE] \u2014 "brute force failed" alone is USELESS. Include full command.
|
|
10514
10651
|
- Look for the UNEXPECTED \u2014 non-standard ports, unusual banners, timing anomalies, error leaks
|
|
10515
10652
|
- Credentials include: passwords, hashes, API keys, tokens, private keys, cookies, session IDs
|
|
10516
10653
|
- Flag any information disclosure: server versions, internal paths, stack traces, debug output
|
|
@@ -10519,7 +10656,7 @@ RULES:
|
|
|
10519
10656
|
- Do NOT miss subtle signals: unusual HTTP headers, non-standard responses, timing differences
|
|
10520
10657
|
- Write as much detail as needed \u2014 do NOT artificially shorten. Every detail matters for strategy.
|
|
10521
10658
|
|
|
10522
|
-
##
|
|
10659
|
+
## ${MEMO_SECTIONS.REFLECTION}
|
|
10523
10660
|
- What this output tells us: [1-line assessment]
|
|
10524
10661
|
- Recommended next action: [1-2 specific follow-up actions]`;
|
|
10525
10662
|
function parseAnalystMemo(response) {
|
|
@@ -11313,120 +11450,10 @@ Suggested Action: ${errorInfo.suggestedAction || actionHints[errorInfo.type] ||
|
|
|
11313
11450
|
};
|
|
11314
11451
|
|
|
11315
11452
|
// src/agents/prompt-builder.ts
|
|
11316
|
-
import { readFileSync as readFileSync6, existsSync as existsSync10
|
|
11453
|
+
import { readFileSync as readFileSync6, existsSync as existsSync10 } from "fs";
|
|
11317
11454
|
import { join as join11, dirname as dirname4 } from "path";
|
|
11318
11455
|
import { fileURLToPath as fileURLToPath2 } from "url";
|
|
11319
11456
|
|
|
11320
|
-
// src/shared/constants/prompts.ts
|
|
11321
|
-
var PROMPT_PATHS = {
|
|
11322
|
-
BASE: "base.md",
|
|
11323
|
-
OFFENSIVE_PLAYBOOK: "offensive-playbook.md",
|
|
11324
|
-
AGENT_FILES: {
|
|
11325
|
-
ORCHESTRATOR: "orchestrator.md",
|
|
11326
|
-
RECON: "recon.md",
|
|
11327
|
-
VULN: "vuln.md",
|
|
11328
|
-
WEB: "web.md",
|
|
11329
|
-
EXPLOIT: "exploit.md",
|
|
11330
|
-
POST: "post.md",
|
|
11331
|
-
REPORT: "report.md",
|
|
11332
|
-
INFRA: "infra.md",
|
|
11333
|
-
// Strategic methodology (always loaded as reference)
|
|
11334
|
-
STRATEGY: "strategy.md",
|
|
11335
|
-
EVASION: "evasion.md",
|
|
11336
|
-
ZERO_DAY: "zero-day.md",
|
|
11337
|
-
PAYLOAD_CRAFT: "payload-craft.md"
|
|
11338
|
-
},
|
|
11339
|
-
/** Technique reference library directory (auto-discovered) */
|
|
11340
|
-
TECHNIQUES_DIR: "techniques"
|
|
11341
|
-
};
|
|
11342
|
-
var PROMPT_XML = {
|
|
11343
|
-
PHASE: (phase, content) => `<phase-instructions phase="${phase}">
|
|
11344
|
-
${content}
|
|
11345
|
-
</phase-instructions>`,
|
|
11346
|
-
SCOPE: (allowed, domains, exclude, flags) => `<scope type="ABSOLUTE_CONSTRAINT">
|
|
11347
|
-
Authorized CIDR: ${allowed}
|
|
11348
|
-
Authorized Domains: ${domains}
|
|
11349
|
-
Exclusions: ${exclude}
|
|
11350
|
-
Constraints: ${flags}
|
|
11351
|
-
</scope>`,
|
|
11352
|
-
STATE: (content) => `<current-state>
|
|
11353
|
-
${content}
|
|
11354
|
-
</current-state>`,
|
|
11355
|
-
TODO: (content) => `<todo>
|
|
11356
|
-
${content}
|
|
11357
|
-
</todo>`
|
|
11358
|
-
};
|
|
11359
|
-
var PROMPT_DEFAULTS = {
|
|
11360
|
-
NO_SCOPE: "<scope>NO SCOPE DEFINED. STOP.</scope>",
|
|
11361
|
-
EMPTY_TODO: "Create initial plan",
|
|
11362
|
-
USER_CONTEXT: (context) => `
|
|
11363
|
-
<user-input>
|
|
11364
|
-
"${context}"
|
|
11365
|
-
</user-input>
|
|
11366
|
-
|
|
11367
|
-
<intent-rules>
|
|
11368
|
-
ANALYZE the user's intent before acting. Classify into ONE:
|
|
11369
|
-
ABORT \u2192 stop current work, confirm with \`ask_user\`
|
|
11370
|
-
CORRECTION \u2192 adjust approach, continue
|
|
11371
|
-
INFORMATION \u2192 store and USE immediately (credentials, paths, hints)
|
|
11372
|
-
COMMAND \u2192 execute EXACTLY what was asked, nothing more
|
|
11373
|
-
TARGET_CHANGE \u2192 \`add_target\`, then begin testing
|
|
11374
|
-
GUIDANCE \u2192 acknowledge via \`ask_user\`, adjust strategy, continue
|
|
11375
|
-
STATUS_QUERY \u2192 report via \`ask_user\`, then RESUME previous work
|
|
11376
|
-
CONVERSATION \u2192 respond via \`ask_user\`, do NOT scan or attack
|
|
11377
|
-
|
|
11378
|
-
RULES:
|
|
11379
|
-
- No target set and none provided \u2192 \`ask_user\` to request target.
|
|
11380
|
-
- Conversation or greeting \u2192 respond conversationally, do NOT attack.
|
|
11381
|
-
- Uncertain intent \u2192 ask for clarification with \`ask_user\`.
|
|
11382
|
-
- This is a collaborative tool. The user is your partner.
|
|
11383
|
-
</intent-rules>`
|
|
11384
|
-
};
|
|
11385
|
-
var PROMPT_CONFIG = {
|
|
11386
|
-
ENCODING: "utf-8"
|
|
11387
|
-
};
|
|
11388
|
-
var INITIAL_TASKS = {
|
|
11389
|
-
RECON: "Initial reconnaissance and target discovery"
|
|
11390
|
-
};
|
|
11391
|
-
var CONTEXT_EXTRACTOR_PROMPT = `You are extracting actionable intelligence from a penetration testing session.
|
|
11392
|
-
DO NOT simply summarize or shorten. EXTRACT critical facts:
|
|
11393
|
-
|
|
11394
|
-
1. DISCOVERED: Services, versions, paths, parameters (exact IPs, ports, versions)
|
|
11395
|
-
2. CONFIRMED: Vulnerabilities or access confirmed
|
|
11396
|
-
3. CREDENTIALS: Usernames, passwords, tokens, keys
|
|
11397
|
-
4. DEAD ENDS: What failed \u2014 include EXACT command, tool, arguments, wordlist/file used.
|
|
11398
|
-
Distinguish between:
|
|
11399
|
-
- "This approach itself is impossible" (e.g., SSH key-only \u2192 no password brute force works)
|
|
11400
|
-
- "This specific attempt failed" (e.g., sqlmap with default tamper \u2192 try different tamper)
|
|
11401
|
-
5. OPEN LEADS: Unexplored paths worth pursuing
|
|
11402
|
-
|
|
11403
|
-
Every line must include exact commands/tools/files used.
|
|
11404
|
-
The reader must be able to judge whether a retry with different parameters is worthwhile.`;
|
|
11405
|
-
var REFLECTION_PROMPT = `You are a tactical reviewer for a penetration testing agent.
|
|
11406
|
-
Review ALL actions from this turn \u2014 successes AND failures.
|
|
11407
|
-
|
|
11408
|
-
1. ASSESSMENT: What did this turn accomplish? Rate: HIGH / MED / LOW / NONE.
|
|
11409
|
-
2. SUCCESSES: What worked? Can this pattern be replicated elsewhere?
|
|
11410
|
-
3. FAILURES: What failed? Is this a repeated pattern? If so \u2192 STOP this approach.
|
|
11411
|
-
4. BLIND SPOTS: What was missed or overlooked?
|
|
11412
|
-
5. NEXT PRIORITY: Single most valuable next action.
|
|
11413
|
-
|
|
11414
|
-
3-5 lines. Every word must be actionable.`;
|
|
11415
|
-
var SUMMARY_REGENERATOR_PROMPT = `Update this penetration testing session summary with the new turn data.
|
|
11416
|
-
|
|
11417
|
-
Must include:
|
|
11418
|
-
- All discovered hosts, services, versions (exact IPs, ports, software versions)
|
|
11419
|
-
- All confirmed vulnerabilities
|
|
11420
|
-
- All obtained credentials
|
|
11421
|
-
- Failed attempts with EXACT commands/tools/arguments/files used.
|
|
11422
|
-
For each failure, state:
|
|
11423
|
-
- The root cause (auth method? WAF? patched? wrong params?)
|
|
11424
|
-
- Whether retrying with different parameters could work
|
|
11425
|
-
- Top unexplored leads
|
|
11426
|
-
|
|
11427
|
-
Remove outdated/superseded info. Keep concise but COMPLETE.
|
|
11428
|
-
The reader must be able to decide what to retry and what to never attempt again.`;
|
|
11429
|
-
|
|
11430
11457
|
// src/shared/constants/scoring.ts
|
|
11431
11458
|
var ATTACK_SCORING = {
|
|
11432
11459
|
/** Base score for all attack prioritization */
|
|
@@ -11811,17 +11838,17 @@ var CORE_KNOWLEDGE_FILES = [
|
|
|
11811
11838
|
// Active Directory / infrastructure attack methodology
|
|
11812
11839
|
];
|
|
11813
11840
|
var PHASE_TECHNIQUE_MAP = {
|
|
11814
|
-
[PHASES.RECON]: [
|
|
11815
|
-
[PHASES.VULN_ANALYSIS]: [
|
|
11816
|
-
[PHASES.EXPLOIT]: [
|
|
11817
|
-
[PHASES.POST_EXPLOIT]: [
|
|
11818
|
-
[PHASES.PRIV_ESC]: [
|
|
11819
|
-
[PHASES.LATERAL]: [
|
|
11820
|
-
[PHASES.PERSISTENCE]: [
|
|
11821
|
-
[PHASES.EXFIL]: [
|
|
11822
|
-
[PHASES.WEB]: [
|
|
11841
|
+
[PHASES.RECON]: [TECHNIQUE_FILES.NETWORK_SVC, TECHNIQUE_FILES.SHELLS, TECHNIQUE_FILES.CRYPTO],
|
|
11842
|
+
[PHASES.VULN_ANALYSIS]: [TECHNIQUE_FILES.INJECTION, TECHNIQUE_FILES.NETWORK_SVC, TECHNIQUE_FILES.FILE_ATTACKS, TECHNIQUE_FILES.CRYPTO, TECHNIQUE_FILES.REVERSING],
|
|
11843
|
+
[PHASES.EXPLOIT]: [TECHNIQUE_FILES.INJECTION, TECHNIQUE_FILES.SHELLS, TECHNIQUE_FILES.FILE_ATTACKS, TECHNIQUE_FILES.NETWORK_SVC, TECHNIQUE_FILES.PWN, TECHNIQUE_FILES.CONTAINER_ESCAPE, TECHNIQUE_FILES.SANDBOX_ESCAPE, TECHNIQUE_FILES.REVERSING],
|
|
11844
|
+
[PHASES.POST_EXPLOIT]: [TECHNIQUE_FILES.PRIVESC, TECHNIQUE_FILES.LATERAL, TECHNIQUE_FILES.AUTH_ACCESS, TECHNIQUE_FILES.SHELLS, TECHNIQUE_FILES.CONTAINER_ESCAPE, TECHNIQUE_FILES.SANDBOX_ESCAPE, TECHNIQUE_FILES.FORENSICS],
|
|
11845
|
+
[PHASES.PRIV_ESC]: [TECHNIQUE_FILES.PRIVESC, TECHNIQUE_FILES.AUTH_ACCESS, TECHNIQUE_FILES.SHELLS, TECHNIQUE_FILES.PWN, TECHNIQUE_FILES.CONTAINER_ESCAPE, TECHNIQUE_FILES.SANDBOX_ESCAPE],
|
|
11846
|
+
[PHASES.LATERAL]: [TECHNIQUE_FILES.LATERAL, TECHNIQUE_FILES.AD_ATTACK, TECHNIQUE_FILES.AUTH_ACCESS, TECHNIQUE_FILES.CONTAINER_ESCAPE, TECHNIQUE_FILES.NETWORK_SVC],
|
|
11847
|
+
[PHASES.PERSISTENCE]: [TECHNIQUE_FILES.SHELLS, TECHNIQUE_FILES.PRIVESC, TECHNIQUE_FILES.LATERAL],
|
|
11848
|
+
[PHASES.EXFIL]: [TECHNIQUE_FILES.LATERAL, TECHNIQUE_FILES.NETWORK_SVC, TECHNIQUE_FILES.FORENSICS],
|
|
11849
|
+
[PHASES.WEB]: [TECHNIQUE_FILES.INJECTION, TECHNIQUE_FILES.FILE_ATTACKS, TECHNIQUE_FILES.AUTH_ACCESS, TECHNIQUE_FILES.CRYPTO, TECHNIQUE_FILES.SHELLS],
|
|
11823
11850
|
[PHASES.REPORT]: []
|
|
11824
|
-
// Report phase
|
|
11851
|
+
// Report phase: no attack techniques needed
|
|
11825
11852
|
};
|
|
11826
11853
|
var PromptBuilder = class {
|
|
11827
11854
|
state;
|
|
@@ -11947,20 +11974,20 @@ ${content}
|
|
|
11947
11974
|
/**
|
|
11948
11975
|
* Load technique files relevant to the current phase.
|
|
11949
11976
|
*
|
|
11950
|
-
* Loading strategy (
|
|
11951
|
-
* 1. PHASE_TECHNIQUE_MAP defines
|
|
11952
|
-
* 2.
|
|
11953
|
-
*
|
|
11977
|
+
* Loading strategy (Improvement #7 — explicit phase mapping, no auto-discovery):
|
|
11978
|
+
* 1. PHASE_TECHNIQUE_MAP defines EXACTLY which techniques load per phase.
|
|
11979
|
+
* 2. Auto-discovery is DISABLED to prevent irrelevant technique loading
|
|
11980
|
+
* (e.g., pwn.md 18K in RECON phase, forensics.md 16K in REPORT phase).
|
|
11981
|
+
* 3. To add a new technique: add the file to techniques/ AND add it to
|
|
11982
|
+
* the relevant phase entries in PHASE_TECHNIQUE_MAP above.
|
|
11954
11983
|
*
|
|
11955
|
-
*
|
|
11956
|
-
* "Drop a markdown file in the folder, PromptBuilder auto-discovers and loads it."
|
|
11984
|
+
* Token savings: ~5-15K per turn vs unrestricted auto-discovery.
|
|
11957
11985
|
*/
|
|
11958
11986
|
loadPhaseRelevantTechniques(phase) {
|
|
11959
11987
|
if (!existsSync10(TECHNIQUES_DIR)) return "";
|
|
11960
|
-
const
|
|
11961
|
-
const loadedSet = /* @__PURE__ */ new Set();
|
|
11988
|
+
const techniquesForPhase = PHASE_TECHNIQUE_MAP[phase] ?? [];
|
|
11962
11989
|
const fragments = [];
|
|
11963
|
-
for (const technique of
|
|
11990
|
+
for (const technique of techniquesForPhase) {
|
|
11964
11991
|
const filePath = join11(TECHNIQUES_DIR, `${technique}.md`);
|
|
11965
11992
|
try {
|
|
11966
11993
|
if (!existsSync10(filePath)) continue;
|
|
@@ -11969,25 +11996,10 @@ ${content}
|
|
|
11969
11996
|
fragments.push(`<technique-reference category="${technique}">
|
|
11970
11997
|
${content}
|
|
11971
11998
|
</technique-reference>`);
|
|
11972
|
-
loadedSet.add(`${technique}.md`);
|
|
11973
11999
|
}
|
|
11974
12000
|
} catch {
|
|
11975
12001
|
}
|
|
11976
12002
|
}
|
|
11977
|
-
try {
|
|
11978
|
-
const allFiles = readdirSync4(TECHNIQUES_DIR).filter((f) => f.endsWith(".md") && f !== "README.md" && !loadedSet.has(f));
|
|
11979
|
-
for (const file of allFiles) {
|
|
11980
|
-
const filePath = join11(TECHNIQUES_DIR, file);
|
|
11981
|
-
const content = readFileSync6(filePath, PROMPT_CONFIG.ENCODING);
|
|
11982
|
-
if (content) {
|
|
11983
|
-
const category = file.replace(".md", "");
|
|
11984
|
-
fragments.push(`<technique-reference category="${category}">
|
|
11985
|
-
${content}
|
|
11986
|
-
</technique-reference>`);
|
|
11987
|
-
}
|
|
11988
|
-
}
|
|
11989
|
-
} catch {
|
|
11990
|
-
}
|
|
11991
12003
|
return fragments.join("\n\n");
|
|
11992
12004
|
}
|
|
11993
12005
|
getScopeFragment() {
|
|
@@ -12113,6 +12125,8 @@ import { join as join12, dirname as dirname5 } from "path";
|
|
|
12113
12125
|
import { fileURLToPath as fileURLToPath3 } from "url";
|
|
12114
12126
|
var __dirname3 = dirname5(fileURLToPath3(import.meta.url));
|
|
12115
12127
|
var STRATEGIST_PROMPT_PATH = join12(__dirname3, "prompts", "strategist-system.md");
|
|
12128
|
+
var CACHE_TTL_MS = 3 * 60 * 1e3;
|
|
12129
|
+
var STALL_TURNS_THRESHOLD = 2;
|
|
12116
12130
|
var Strategist = class {
|
|
12117
12131
|
llm;
|
|
12118
12132
|
state;
|
|
@@ -12121,23 +12135,42 @@ var Strategist = class {
|
|
|
12121
12135
|
totalTokenCost = 0;
|
|
12122
12136
|
totalCalls = 0;
|
|
12123
12137
|
lastDirective = null;
|
|
12138
|
+
// Cooldown state (Improvement #8)
|
|
12139
|
+
lastPhase = "";
|
|
12140
|
+
turnsWithoutProgress = 0;
|
|
12141
|
+
lastFindingCount = 0;
|
|
12124
12142
|
constructor(llm, state) {
|
|
12125
12143
|
this.llm = llm;
|
|
12126
12144
|
this.state = state;
|
|
12127
12145
|
this.systemPrompt = this.loadSystemPrompt();
|
|
12128
12146
|
}
|
|
12129
12147
|
/**
|
|
12130
|
-
* Generate a
|
|
12131
|
-
* Called
|
|
12148
|
+
* Generate a strategic directive for this turn.
|
|
12149
|
+
* Called each iteration by PromptBuilder.
|
|
12150
|
+
*
|
|
12151
|
+
* COOLDOWN POLICY (Improvement #8):
|
|
12152
|
+
* Only issues a new LLM call when needed. Otherwise reuses cached directive.
|
|
12153
|
+
* Conditions for a new call: first call, phase changed, stall detected, or TTL expired.
|
|
12132
12154
|
*
|
|
12133
12155
|
* @returns Formatted directive string for prompt injection, or '' on failure
|
|
12134
12156
|
*/
|
|
12135
12157
|
async generateDirective() {
|
|
12158
|
+
this.updateProgressTracking();
|
|
12159
|
+
const shouldCall = this.shouldCallLLM();
|
|
12160
|
+
if (!shouldCall && this.lastDirective) {
|
|
12161
|
+
debugLog("general", "Strategist: reusing cached directive (cooldown active)", {
|
|
12162
|
+
age: Math.floor((Date.now() - this.lastDirective.generatedAt) / 1e3),
|
|
12163
|
+
turnsWithoutProgress: this.turnsWithoutProgress
|
|
12164
|
+
});
|
|
12165
|
+
return this.formatForPrompt(this.lastDirective, true);
|
|
12166
|
+
}
|
|
12136
12167
|
try {
|
|
12137
12168
|
const input = this.buildInput();
|
|
12138
12169
|
const directive = await this.callLLM(input);
|
|
12139
12170
|
this.lastDirective = directive;
|
|
12140
12171
|
this.totalCalls++;
|
|
12172
|
+
this.turnsWithoutProgress = 0;
|
|
12173
|
+
this.lastPhase = this.state.getPhase();
|
|
12141
12174
|
debugLog("general", "Strategist directive generated", {
|
|
12142
12175
|
tokens: directive.tokenCost,
|
|
12143
12176
|
totalCalls: this.totalCalls,
|
|
@@ -12145,7 +12178,7 @@ var Strategist = class {
|
|
|
12145
12178
|
});
|
|
12146
12179
|
return this.formatForPrompt(directive);
|
|
12147
12180
|
} catch (err) {
|
|
12148
|
-
debugLog("general", "Strategist failed \u2014 agent will proceed
|
|
12181
|
+
debugLog("general", "Strategist failed \u2014 agent will proceed with cached/no directive", {
|
|
12149
12182
|
error: String(err)
|
|
12150
12183
|
});
|
|
12151
12184
|
if (this.lastDirective?.content) {
|
|
@@ -12154,6 +12187,54 @@ var Strategist = class {
|
|
|
12154
12187
|
return "";
|
|
12155
12188
|
}
|
|
12156
12189
|
}
|
|
12190
|
+
// ─── Cooldown Logic ─────────────────────────────────────────
|
|
12191
|
+
/**
|
|
12192
|
+
* Determine whether to call the Strategist LLM this turn.
|
|
12193
|
+
*
|
|
12194
|
+
* Calls are triggered when:
|
|
12195
|
+
* 1. No cached directive exists (first call ever)
|
|
12196
|
+
* 2. Phase changed since last call (new strategic situation)
|
|
12197
|
+
* 3. Stall detected: no new findings for 2+ turns
|
|
12198
|
+
* 4. Cache TTL expired (3 minutes — directive may be outdated)
|
|
12199
|
+
*/
|
|
12200
|
+
shouldCallLLM() {
|
|
12201
|
+
if (!this.lastDirective) return true;
|
|
12202
|
+
const currentPhase = this.state.getPhase();
|
|
12203
|
+
if (currentPhase !== this.lastPhase) {
|
|
12204
|
+
debugLog("general", "Strategist: phase changed \u2014 forcing LLM call", {
|
|
12205
|
+
from: this.lastPhase,
|
|
12206
|
+
to: currentPhase
|
|
12207
|
+
});
|
|
12208
|
+
return true;
|
|
12209
|
+
}
|
|
12210
|
+
if (this.turnsWithoutProgress >= STALL_TURNS_THRESHOLD) {
|
|
12211
|
+
debugLog("general", "Strategist: stall detected \u2014 forcing LLM call", {
|
|
12212
|
+
turnsWithoutProgress: this.turnsWithoutProgress
|
|
12213
|
+
});
|
|
12214
|
+
return true;
|
|
12215
|
+
}
|
|
12216
|
+
const age = Date.now() - this.lastDirective.generatedAt;
|
|
12217
|
+
if (age >= CACHE_TTL_MS) {
|
|
12218
|
+
debugLog("general", "Strategist: cache TTL expired \u2014 forcing LLM call", {
|
|
12219
|
+
ageMs: age
|
|
12220
|
+
});
|
|
12221
|
+
return true;
|
|
12222
|
+
}
|
|
12223
|
+
return false;
|
|
12224
|
+
}
|
|
12225
|
+
/**
|
|
12226
|
+
* Update progress tracking for stall detection.
|
|
12227
|
+
* Compares current finding count to last known count.
|
|
12228
|
+
*/
|
|
12229
|
+
updateProgressTracking() {
|
|
12230
|
+
const currentFindings = this.state.getFindings().length;
|
|
12231
|
+
if (currentFindings > this.lastFindingCount) {
|
|
12232
|
+
this.turnsWithoutProgress = 0;
|
|
12233
|
+
this.lastFindingCount = currentFindings;
|
|
12234
|
+
} else {
|
|
12235
|
+
this.turnsWithoutProgress++;
|
|
12236
|
+
}
|
|
12237
|
+
}
|
|
12157
12238
|
// ─── Input Construction ─────────────────────────────────────
|
|
12158
12239
|
/**
|
|
12159
12240
|
* Build the user message for the Strategist LLM.
|
|
@@ -12233,18 +12314,19 @@ ${input}`
|
|
|
12233
12314
|
// ─── Formatting ─────────────────────────────────────────────
|
|
12234
12315
|
/**
|
|
12235
12316
|
* Format directive for injection into the attack agent's system prompt.
|
|
12317
|
+
* @param isStale - true when reusing a cached directive (cooldown) or after error
|
|
12236
12318
|
*/
|
|
12237
12319
|
formatForPrompt(directive, isStale = false) {
|
|
12238
12320
|
if (!directive.content) return "";
|
|
12239
12321
|
const age = Math.floor((Date.now() - directive.generatedAt) / MS_PER_MINUTE);
|
|
12240
|
-
const
|
|
12241
|
-
|
|
12322
|
+
const staleMark = isStale ? `
|
|
12323
|
+
[CACHED \u2014 ${age}min old. Follow unless directly contradicted by new tool output.]` : "";
|
|
12242
12324
|
return [
|
|
12243
12325
|
"<strategic-directive>",
|
|
12244
12326
|
"TACTICAL DIRECTIVE (generated by Strategist LLM \u2014 follow these priorities):",
|
|
12245
12327
|
"",
|
|
12246
12328
|
directive.content,
|
|
12247
|
-
|
|
12329
|
+
staleMark,
|
|
12248
12330
|
"</strategic-directive>"
|
|
12249
12331
|
].filter(Boolean).join("\n");
|
|
12250
12332
|
}
|
|
@@ -12263,7 +12345,7 @@ NOTE: This directive is from ${age}min ago (Strategist call failed this turn). V
|
|
|
12263
12345
|
getTotalTokenCost() {
|
|
12264
12346
|
return this.totalTokenCost;
|
|
12265
12347
|
}
|
|
12266
|
-
/** Get number of Strategist calls this session. */
|
|
12348
|
+
/** Get number of Strategist LLM calls this session (excludes cache hits). */
|
|
12267
12349
|
getTotalCalls() {
|
|
12268
12350
|
return this.totalCalls;
|
|
12269
12351
|
}
|
|
@@ -12271,11 +12353,18 @@ NOTE: This directive is from ${age}min ago (Strategist call failed this turn). V
|
|
|
12271
12353
|
getLastDirective() {
|
|
12272
12354
|
return this.lastDirective;
|
|
12273
12355
|
}
|
|
12356
|
+
/** Current stall counter (turns without new findings). */
|
|
12357
|
+
getTurnsWithoutProgress() {
|
|
12358
|
+
return this.turnsWithoutProgress;
|
|
12359
|
+
}
|
|
12274
12360
|
/** Reset strategist state (for /clear command). */
|
|
12275
12361
|
reset() {
|
|
12276
12362
|
this.lastDirective = null;
|
|
12277
12363
|
this.totalTokenCost = 0;
|
|
12278
12364
|
this.totalCalls = 0;
|
|
12365
|
+
this.lastPhase = "";
|
|
12366
|
+
this.turnsWithoutProgress = 0;
|
|
12367
|
+
this.lastFindingCount = 0;
|
|
12279
12368
|
}
|
|
12280
12369
|
};
|
|
12281
12370
|
var FALLBACK_SYSTEM_PROMPT = `You are an elite autonomous penetration testing STRATEGIST \u2014 a red team tactical commander.
|
package/dist/prompts/base.md
CHANGED
|
@@ -20,14 +20,113 @@ Speed mindset: every second without a tool call is wasted time.
|
|
|
20
20
|
|
|
21
21
|
## OODA Loop Protocol (MANDATORY)
|
|
22
22
|
|
|
23
|
-
Before calling ANY tool
|
|
24
|
-
1. **[OBSERVE]**: What
|
|
25
|
-
2. **[ORIENT]**:
|
|
26
|
-
3. **[DECIDE]**:
|
|
27
|
-
4. **[ACT]**: Call the appropriate tool(s)
|
|
23
|
+
Before calling ANY tool, structure your reasoning using this exact format:
|
|
24
|
+
1. **[OBSERVE]**: What did the last tool/Analyst summary yield? Include attackValue, suspicions, failures.
|
|
25
|
+
2. **[ORIENT]**: Kill chain position? How does this update our attack hypothesis? What's exhausted?
|
|
26
|
+
3. **[DECIDE]**: Highest-probability unexplored vector? Check Strategic Directive PRIORITY list first.
|
|
27
|
+
4. **[ACT]**: Call the appropriate tool(s). Prefer parallel calls for independent operations.
|
|
28
28
|
|
|
29
29
|
*Never blindly call tools without explicit OBSERVATION and DECISION.*
|
|
30
30
|
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Reading the ANALYST MEMO (CRITICAL — process every turn)
|
|
34
|
+
|
|
35
|
+
Every tool result contains an **Analyst LLM summary** with structured sections.
|
|
36
|
+
You MUST process these fields in your OBSERVE step:
|
|
37
|
+
|
|
38
|
+
### Attack Value → Priority Signal
|
|
39
|
+
```
|
|
40
|
+
HIGH → Drop everything. Drill deeper into this NOW. Make it PRIORITY 1.
|
|
41
|
+
MED → Queue as next action after current PRIORITY 1 completes.
|
|
42
|
+
LOW → Pursue only if nothing else available.
|
|
43
|
+
NONE → Mark vector as EXHAUSTED. Do NOT retry without a fundamentally new approach.
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Suspicious Signals → Immediate Investigation Queue
|
|
47
|
+
When Analyst lists suspicious signals:
|
|
48
|
+
1. Add each one to `update_todo` with HIGH priority immediately
|
|
49
|
+
2. If time permits THIS turn, test it — suspicious signals are often the real attack surface
|
|
50
|
+
3. Examples: unusual response timing, debug headers, verbose errors, redirect leaks
|
|
51
|
+
|
|
52
|
+
### Next Steps → Analyst SEARCH ORDERS
|
|
53
|
+
The Analyst's "Next Steps" are **mandatory search/action orders**:
|
|
54
|
+
- Execute them THIS turn or NEXT turn without exception
|
|
55
|
+
- Skip only if working memory shows the exact same approach already failed 2+ times
|
|
56
|
+
|
|
57
|
+
### Failures → Escalation Protocol
|
|
58
|
+
When Analyst reports failures:
|
|
59
|
+
```
|
|
60
|
+
1st same failure: Retry with DIFFERENT parameters (wordlist, encoding, port)
|
|
61
|
+
2nd same failure: Switch approach — fundamentally different vector
|
|
62
|
+
3rd+ same failure: web_search("{tool} {error} bypass") → apply solution
|
|
63
|
+
```
|
|
64
|
+
*A failure with different parameters is a NEW attempt, not a repeat.*
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
## Strategic Directive (MANDATORY COMPLIANCE)
|
|
69
|
+
|
|
70
|
+
When `<strategic-directive>` appears in your context:
|
|
71
|
+
|
|
72
|
+
1. **PRIORITY items = ORDERS, not suggestions.** Execute them in sequence.
|
|
73
|
+
2. **EXHAUSTED list = absolute blocklist.** NEVER attempt these vectors again this session.
|
|
74
|
+
3. **SEARCH ORDERS = mandatory web_search calls.** Execute if not already done this session.
|
|
75
|
+
4. **FALLBACK = your next action when primary fails.** Use it — don't improvise blindly.
|
|
76
|
+
5. **Conflict resolution:**
|
|
77
|
+
- Direct tool evidence contradicts directive → trust the evidence, note the discrepancy
|
|
78
|
+
- Working memory shows 2+ failures on suggested approach → use FALLBACK instead
|
|
79
|
+
- Otherwise → the directive ALWAYS wins over your own assessment
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## Examples — Correct OODA Execution
|
|
84
|
+
|
|
85
|
+
### Example 1: SQL Error → Correct Response
|
|
86
|
+
```
|
|
87
|
+
[OBSERVE]: run_cmd("curl /login -d 'user=admin'") returned "SQL syntax error near '''"
|
|
88
|
+
Analyst attackValue: HIGH | Next Steps: ["sqlmap -u /login --forms --batch"]
|
|
89
|
+
[ORIENT]: SQLi confirmed on /login POST. Kill chain: SQLi → dump → creds → shell.
|
|
90
|
+
Strategic Directive PRIORITY 1 says: "Exploit /login SQLi immediately."
|
|
91
|
+
[DECIDE]: Run sqlmap now. attackValue HIGH + Directive alignment → top priority.
|
|
92
|
+
[ACT]: run_cmd("sqlmap -u 'http://10.10.10.5/login' --forms --batch --risk=3 --level=3 --threads=5")
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Example 2: Stall Detection → Correct Pivot
|
|
96
|
+
```
|
|
97
|
+
[OBSERVE]: 3rd gobuster attempt on /admin returned 403 again. Same as turns 4 and 6.
|
|
98
|
+
Analyst attackValue: NONE | Failures: "[FILTERED] gobuster /admin → WAF blocking"
|
|
99
|
+
[ORIENT]: Directory fuzzing on /admin is EXHAUSTED (3 identical failures).
|
|
100
|
+
Working memory shows 3 consecutive failures on same vector.
|
|
101
|
+
Analyst classified as FILTERED — try bypass headers.
|
|
102
|
+
[DECIDE]: Auth bypass headers: X-Forwarded-For: 127.0.0.1, X-Original-URL: /admin
|
|
103
|
+
This is a fundamentally different approach, not a repeat.
|
|
104
|
+
[ACT]: run_cmd("curl -H 'X-Original-URL: /admin' http://10.10.10.5/")
|
|
105
|
+
run_cmd("curl -H 'X-Forwarded-For: 127.0.0.1' http://10.10.10.5/admin")
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Example 3: HIGH attackValue → Correct Drill-Down
|
|
109
|
+
```
|
|
110
|
+
[OBSERVE]: Analyst on ssh-audit output: attackValue: HIGH
|
|
111
|
+
"SSH accepts CBC mode ciphers (CVE-2008-5161) + user enumeration via timing"
|
|
112
|
+
Next Steps: ["Test SSH user enum: use timing attack to enumerate valid users"]
|
|
113
|
+
[ORIENT]: SSH is a HIGH value target. Kill chain: user enum → brute force → shell.
|
|
114
|
+
Strategic Directive PRIORITY 2 confirms SSH exploitation path.
|
|
115
|
+
[DECIDE]: Enumerate users first, then targeted brute force with found usernames.
|
|
116
|
+
[ACT]: web_search("ssh-audit CVE-2008-5161 exploit PoC")
|
|
117
|
+
run_cmd("ssh-audit --timeout=10 10.10.10.5", background: true)
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Example 4: EXHAUSTED List Application
|
|
121
|
+
```
|
|
122
|
+
[OBSERVE]: Strategic Directive EXHAUSTED list: "FTP anonymous login — connection refused (port filtered)"
|
|
123
|
+
[ORIENT]: FTP is confirmed dead. No need to test. Skip entirely.
|
|
124
|
+
[DECIDE]: Focus on HTTP (port 80) — not in EXHAUSTED list, not yet tested.
|
|
125
|
+
[ACT]: run_cmd("whatweb http://10.10.10.5") — start web fingerprinting
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
31
130
|
## Absolute Rules
|
|
32
131
|
|
|
33
132
|
### 0. ⚠️ LOCAL FILE PATHS — ALWAYS USE `.pentesting/workspace/`
|
|
@@ -56,10 +155,20 @@ You are prone to imagining non-existent tool flags or incorrect syntax for compl
|
|
|
56
155
|
- `add_finding` — immediately when vulnerability confirmed (if reproducible, record it NOW)
|
|
57
156
|
- `add_target` — new host or service discovered
|
|
58
157
|
- `add_loot` — credentials, tokens, keys, hashes found
|
|
59
|
-
- `update_phase` — when activity changes (
|
|
158
|
+
- `update_phase` — when activity changes (see Phase Transition Signals below)
|
|
60
159
|
|
|
61
160
|
Self-check every turn: Did I find a vuln but not call `add_finding`? Call it now.
|
|
62
161
|
|
|
162
|
+
### 2.5. Phase Transition Signals — When to Call `update_phase`
|
|
163
|
+
```
|
|
164
|
+
RECON → vuln_analysis: 3+ services fingerprinted with versions confirmed
|
|
165
|
+
vuln_analysis → exploit: 1+ finding (confidence ≥ 50) with exploit path identified
|
|
166
|
+
exploit → post_exploitation: Shell obtained AND promoted (active_shell process active)
|
|
167
|
+
post_exploitation → lateral: root/SYSTEM achieved on current host
|
|
168
|
+
ANY_PHASE → report: All targets compromised OR time is up
|
|
169
|
+
```
|
|
170
|
+
**NEVER transition away from a phase while HIGH-priority vectors remain untested.**
|
|
171
|
+
|
|
63
172
|
### 3. ask_user Rules
|
|
64
173
|
|
|
65
174
|
Use received values immediately. Never ask for the same thing twice.
|
|
@@ -124,10 +233,12 @@ Writing code is not a fallback. It's your primary weapon.
|
|
|
124
233
|
- Automate multi-step attacks
|
|
125
234
|
- Iterate: `write_file` → `run_cmd` → observe error → fix → repeat
|
|
126
235
|
|
|
127
|
-
##
|
|
236
|
+
## Shell Lifecycle (SINGLE SOURCE — referenced by exploit.md and post.md)
|
|
237
|
+
|
|
238
|
+
### Processes = Operational Assets
|
|
128
239
|
|
|
129
240
|
| Role | Meaning |
|
|
130
|
-
|------|---------|
|
|
241
|
+
|------|---------|
|
|
131
242
|
| `listener` 👂 | Waiting for connection — start before attack |
|
|
132
243
|
| `active_shell` 🐚 | **Target shell — top priority, never terminate** |
|
|
133
244
|
| `server` 📡 | File serving — clean up after use |
|
|
@@ -136,9 +247,8 @@ Writing code is not a fallback. It's your primary weapon.
|
|
|
136
247
|
**Reverse shell flow**: start listener → exploit → check status → `promote` on connection
|
|
137
248
|
→ `interact` to execute commands → upgrade shell → post-exploit through it.
|
|
138
249
|
|
|
139
|
-
|
|
250
|
+
### On Getting a Shell — Immediate Actions
|
|
140
251
|
|
|
141
|
-
On getting a shell, immediately:
|
|
142
252
|
1. Detect type: `echo $TERM && tty && echo $SHELL`
|
|
143
253
|
- `dumb` or `tty: not a tty` → upgrade required
|
|
144
254
|
- `xterm` + `/dev/pts/X` → good
|
|
@@ -151,12 +261,11 @@ On getting a shell, immediately:
|
|
|
151
261
|
|
|
152
262
|
3. **Protect the shell** — never terminate needlessly. On drop: reuse backdoor/web shell/re-exploit.
|
|
153
263
|
|
|
154
|
-
### Process Management
|
|
155
|
-
|
|
156
|
-
- Never terminate `active_shell`
|
|
264
|
+
### Process Management Rules
|
|
265
|
+
- **Never terminate `active_shell`**
|
|
157
266
|
- Clean up servers/sniffers after task completion
|
|
158
267
|
- Port conflict → switch port, update_mission with new port
|
|
159
|
-
- `bg_process stop_all` on task completion
|
|
268
|
+
- `bg_process stop_all` on task completion only
|
|
160
269
|
|
|
161
270
|
## Mission Context
|
|
162
271
|
|
|
@@ -180,8 +289,10 @@ Record parallel processes in checklist (e.g., "🔍 [bg_xxx] Port scan in progre
|
|
|
180
289
|
1. Active shell available? → use it
|
|
181
290
|
2. Shell is dumb? → upgrade
|
|
182
291
|
3. Unnecessary processes? → stop
|
|
183
|
-
4. Stuck? → search + different vector
|
|
292
|
+
4. Stuck? → check Strategic Directive FALLBACK first, then search + different vector
|
|
184
293
|
5. Repeating same method 2+ times? → switch immediately
|
|
294
|
+
6. Analyst said attackValue HIGH? → is it PRIORITY 1?
|
|
295
|
+
7. Any suspicions from last Analyst memo not yet tested? → add to TODO now
|
|
185
296
|
|
|
186
297
|
## Output Format
|
|
187
298
|
|
package/dist/prompts/exploit.md
CHANGED
|
@@ -68,23 +68,12 @@ Connection received but drops immediately?
|
|
|
68
68
|
└── EOFError → stdin not properly redirected, try different reverse shell variant
|
|
69
69
|
```
|
|
70
70
|
|
|
71
|
-
## 🐚 Shell Stabilization —
|
|
72
|
-
|
|
73
|
-
After receiving any shell, **immediately** follow base.md "Shell Lifecycle Mastery" protocol:
|
|
74
|
-
|
|
75
|
-
### Upgrade Priority Order:
|
|
76
|
-
```
|
|
77
|
-
1. Python PTY → python3 -c 'import pty;pty.spawn("/bin/bash")' + Ctrl+Z + stty raw -echo; fg
|
|
78
|
-
2. Script → script -qc /bin/bash /dev/null + Ctrl+Z + stty raw -echo; fg
|
|
79
|
-
3. Socat → upload socat binary, connect with full PTY
|
|
80
|
-
4. rlwrap → restart listener with rlwrap nc -lvnp PORT (readline support)
|
|
81
|
-
5. SSH back-connect → plant SSH key on target, connect back via SSH
|
|
82
|
-
6. pwncat → use pwncat-cs for auto-upgrade + features
|
|
83
|
-
7. ConPTY → Windows full interactive shell
|
|
84
|
-
```
|
|
71
|
+
## 🐚 Shell Stabilization — See base.md "Shell Lifecycle"
|
|
85
72
|
|
|
73
|
+
After receiving any shell, **immediately** follow the PTY upgrade order in base.md.
|
|
86
74
|
**Without a proper TTY:** sudo, su, ssh, screen, vim won't work. Upgrade is MANDATORY.
|
|
87
75
|
|
|
76
|
+
|
|
88
77
|
## 🔗 Exploit Chaining — Combine Vulnerabilities
|
|
89
78
|
|
|
90
79
|
Think in chains, not individual exploits. **Every vulnerability is a stepping stone to the next.**
|
|
@@ -11,45 +11,22 @@ This playbook drives **aggressive exploitation, time-aware strategy, and proof c
|
|
|
11
11
|
- Multiple proofs per target are common — **keep hunting after the first**
|
|
12
12
|
- **Environment variables** and **database entries** often contain flags/secrets
|
|
13
13
|
|
|
14
|
-
## ⏱️ Time Management
|
|
14
|
+
## ⏱️ Time Management — Follow Strategist's time-strategy
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
The `<time-strategy>` tag in your context contains exact time pressure and phase directives.
|
|
17
|
+
**Always read and follow it — it overrides any fixed-duration assumptions.**
|
|
17
18
|
|
|
19
|
+
Quick reference (use time-strategy for exact numbers):
|
|
18
20
|
```
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
├── Check for low-hanging fruit: default creds, exposed files, known CVEs
|
|
24
|
-
└── Record ALL findings → update_mission immediately
|
|
25
|
-
|
|
26
|
-
10-30 MINUTES (Targeted Attack):
|
|
27
|
-
├── Focus on highest-probability attack vector
|
|
28
|
-
├── Version+service → web_search("{service} {version} exploit CVE") IMMEDIATELY
|
|
29
|
-
├── Web: directory fuzzing + injection probes in parallel
|
|
30
|
-
├── Credential brute force on login services (hydra + rockyou.txt in background)
|
|
31
|
-
├── If stuck after 15 min on one vector → SWITCH to next
|
|
32
|
-
└── Background: hash cracking, brute force if applicable
|
|
33
|
-
|
|
34
|
-
30-60 MINUTES (Deep Exploitation):
|
|
35
|
-
├── Chain findings: LFI→RCE, SQLi→file write→shell, SSRF→internal
|
|
36
|
-
├── Custom exploit development: write_file → run_cmd
|
|
37
|
-
├── Source code analysis if .git, .bak, .swp found
|
|
38
|
-
└── Multiple attack paths simultaneously (background processes)
|
|
39
|
-
|
|
40
|
-
60+ MINUTES (Pivot & Escalate):
|
|
41
|
-
├── Privilege escalation: ALL categories systematically
|
|
42
|
-
├── Lateral movement if internal network exists
|
|
43
|
-
├── Creative hunting: unusual files, hidden services, config secrets
|
|
44
|
-
└── Re-examine ALL earlier findings with new context/access
|
|
21
|
+
SPRINT (0-25%): Broad recon, parallel scans, identify all attack surfaces
|
|
22
|
+
EXPLOIT (25-50%): Focus on top-3 highest-scoring surfaces. Quick wins only.
|
|
23
|
+
CREATIVE (50-75%): Chained exploits, custom tools. If stuck >5min → switch.
|
|
24
|
+
HARVEST (75-100%): Stop exploring. Exploit what you HAVE. Collect all proof.
|
|
45
25
|
```
|
|
46
26
|
|
|
47
27
|
### Time-Boxing Rule
|
|
48
28
|
**If stuck on ONE vector for more than 15 minutes → SWITCH.**
|
|
49
|
-
|
|
50
|
-
- Move to next highest-probability vector
|
|
51
|
-
- Come back later with new information/tools
|
|
52
|
-
- **Never tunnel-vision on a single approach**
|
|
29
|
+
Record what you tried in `update_mission`. Move to next priority. Come back with new context.
|
|
53
30
|
|
|
54
31
|
## 🧠 Challenge & Target Quick-Start Protocols
|
|
55
32
|
|
|
@@ -21,8 +21,8 @@ You are a developer AND a hacker. Coding is your superpower.
|
|
|
21
21
|
**Question everything**: Why is this port open? What data flows through this connection?
|
|
22
22
|
What shortcuts did the admin take? What systems depend on this one? Follow every question.
|
|
23
23
|
|
|
24
|
-
## Tactical Reasoning (
|
|
25
|
-
Your thought process must be visible.
|
|
24
|
+
## Tactical Reasoning — OODA (See base.md for full protocol)
|
|
25
|
+
Your thought process must be visible. Before each tool call: OBSERVE what changed, ORIENT on the kill chain, DECIDE the next attack, ACT with the right tool.
|
|
26
26
|
|
|
27
27
|
## Kill Chain Position — Know Where You Are
|
|
28
28
|
|
|
@@ -33,16 +33,11 @@ External Recon → Service Discovery → Vuln ID → Initial Access → Shell St
|
|
|
33
33
|
|
|
34
34
|
Know your position before every turn. Act accordingly.
|
|
35
35
|
|
|
36
|
-
## After First Shell —
|
|
36
|
+
## After First Shell — See base.md "Shell Lifecycle" + post.md pipeline
|
|
37
37
|
|
|
38
|
-
1. Shell stabilization (PTY upgrade
|
|
39
|
-
2.
|
|
40
|
-
3.
|
|
41
|
-
4. Credential hunting: `.bash_history`, `.ssh/`, config files, DB connection strings
|
|
42
|
-
5. Network mapping: `ip route`, `/etc/hosts`, ARP, internal services
|
|
43
|
-
6. Privesc path exploration → on success, repeat from step 2 with new privileges
|
|
44
|
-
7. Lateral movement: SSH key reuse, credential spray, internal service access
|
|
45
|
-
8. New targets discovered → `add_target` → full recon restart
|
|
38
|
+
1. Shell stabilization (PTY upgrade per base.md)
|
|
39
|
+
2. Immediate awareness + privesc enumeration (post.md pipeline)
|
|
40
|
+
3. Credential harvest + lateral movement + persistence
|
|
46
41
|
|
|
47
42
|
## Decision Forks — Never Give Up
|
|
48
43
|
|
|
@@ -269,3 +269,39 @@ Cloud/Container:
|
|
|
269
269
|
├─ ❌ Generic reconnaissance → ✅ Targeted recon with specific goals
|
|
270
270
|
└─ ❌ "I recommend..." or "You should consider..." → ✅ Direct imperative: "Run: ..."
|
|
271
271
|
```
|
|
272
|
+
|
|
273
|
+
### Rule 11: PHASE TRANSITION SIGNALS
|
|
274
|
+
```
|
|
275
|
+
ORDER update_phase when these conditions are met:
|
|
276
|
+
|
|
277
|
+
recon → vuln_analysis:
|
|
278
|
+
├─ 3+ services fingerprinted with exact versions confirmed
|
|
279
|
+
├─ OSINT complete (shodan/github/crt.sh checked)
|
|
280
|
+
└─ Web surface mapped (get_web_attack_surface called if HTTP found)
|
|
281
|
+
|
|
282
|
+
vuln_analysis → exploit:
|
|
283
|
+
├─ 1+ finding with confidence ≥ 50 AND a concrete exploit path identified
|
|
284
|
+
├─ Specific CVE confirmed applicable (version matches, PoC available)
|
|
285
|
+
└─ Or: critical misconfiguration found (default creds, exposed .env, anon access)
|
|
286
|
+
|
|
287
|
+
exploit → post_exploitation:
|
|
288
|
+
├─ Shell obtained AND promoted (active_shell process is running)
|
|
289
|
+
├─ Interactive commands confirmed working via bg_process interact
|
|
290
|
+
└─ Shell stabilized (PTY upgrade attempted)
|
|
291
|
+
|
|
292
|
+
post_exploitation → lateral:
|
|
293
|
+
├─ root or SYSTEM access achieved on current host
|
|
294
|
+
├─ Additional network segments discovered (new /24 subnet, internal services)
|
|
295
|
+
└─ Or: domain credentials obtained (AD context)
|
|
296
|
+
|
|
297
|
+
ANY phase → report:
|
|
298
|
+
├─ All high-priority targets compromised
|
|
299
|
+
├─ Time remaining < 10% of total engagement time
|
|
300
|
+
└─ Or: scope exhausted (all vectors tried, no new surface)
|
|
301
|
+
|
|
302
|
+
CRITICAL RULES:
|
|
303
|
+
├─ NEVER order phase transition while HIGH or CRITICAL priority vectors remain untested
|
|
304
|
+
├─ Phase transitions do NOT prevent using tools from previous phases
|
|
305
|
+
├─ If recon yields nothing after 10 min → still transition to vuln_analysis and probe
|
|
306
|
+
└─ If stuck in a phase > 5 turns with no progress → evaluate if transition is needed
|
|
307
|
+
```
|
package/dist/prompts/strategy.md
CHANGED
|
@@ -36,14 +36,14 @@ TIER 4 — Last resort:
|
|
|
36
36
|
Patch diffing · Race conditions · Supply chain analysis
|
|
37
37
|
```
|
|
38
38
|
|
|
39
|
-
## Every-Turn Decision Flow
|
|
39
|
+
## Every-Turn Decision Flow — Use OODA from base.md
|
|
40
40
|
|
|
41
|
-
|
|
42
|
-
1.
|
|
43
|
-
2. Highest-probability unexplored surface from
|
|
41
|
+
During the ORIENT/DECIDE steps of base.md's OODA protocol, check:
|
|
42
|
+
1. Strategic Directive PRIORITY list — what did Strategist order first?
|
|
43
|
+
2. Highest-probability unexplored surface from the matrix below?
|
|
44
44
|
3. Have I searched for attacks on EVERY discovered service? → if not, search NOW
|
|
45
|
-
4. Can I chain existing findings?
|
|
46
|
-
5. Stuck 15+ min? → switch approach immediately
|
|
45
|
+
4. Can I chain existing findings? (check attack-intelligence in context)
|
|
46
|
+
5. Stuck 15+ min? → switch approach immediately, refer to FALLBACK in directive
|
|
47
47
|
|
|
48
48
|
## Service Intelligence Protocol
|
|
49
49
|
|