@pensar/apex 1.8.0 → 1.8.2-canary.fb75c486
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -0
- package/build/agent-6dj1qm50.js +221 -0
- package/build/agent-6xr8vpgm.js +28 -0
- package/build/agent-x1htbpe3.js +22 -0
- package/build/apps-t0gmwc7z.js +446 -0
- package/build/{auth-dxjgy41e.js → auth-p4r1m7xq.js} +50 -13
- package/build/authentication-je2b0c3w.js +22 -0
- package/build/blackboxAgent-a4jnt0y5.js +22 -0
- package/build/{blackboxPentest-8ps4yvbk.js → blackboxPentest-b5741n3h.js} +19 -17
- package/build/{cli-y61d9433.js → cli-0tnv1vkp.js} +138 -38
- package/build/{cli-jg7r7y5n.js → cli-4xb21y6g.js} +30 -2
- package/build/{cli-k0tckznm.js → cli-6p7d2k55.js} +39701 -31695
- package/build/cli-87zakjb2.js +17 -0
- package/build/{authentication-e30mfzbe.js → cli-8frjr68r.js} +11 -18
- package/build/cli-8xknm7d9.js +204 -0
- package/build/cli-9egg9azd.js +22 -0
- package/build/cli-9fsre5pt.js +0 -0
- package/build/cli-abbka8n3.js +501 -0
- package/build/{cli-3y0dgy56.js → cli-c8131c4q.js} +2 -2
- package/build/cli-e08r86zk.js +24 -0
- package/build/{cli-0ghkg3w6.js → cli-e6rgwtpb.js} +19950 -18556
- package/build/cli-g5h24ny8.js +197 -0
- package/build/{cli-nr1cjfr9.js → cli-gtcd5c3f.js} +26 -7
- package/build/cli-k0730f59.js +52 -0
- package/build/{cli-tp1tqn3k.js → cli-mswm4k81.js} +1 -1
- package/build/{cli-m788e4f3.js → cli-q8dfq25x.js} +584 -33
- package/build/cli-rhry8mat.js +7213 -0
- package/build/{cli-g8t710ew.js → cli-ryy39d77.js} +253 -250
- package/build/cli-s1nckt4k.js +20 -0
- package/build/{cli-k4hrygff.js → cli-v9ds4jb8.js} +9 -5
- package/build/{cli-dqt80sw3.js → cli-w5990vr6.js} +199 -68
- package/build/{cli-3w2syxpv.js → cli-wfmdch3r.js} +102695 -104816
- package/build/cli.js +351 -280
- package/build/config-3bvtf3j8.js +188 -0
- package/build/{doctor-8tva8j99.js → doctor-2bkpddws.js} +1 -1
- package/build/{fixes-q5bhgxhc.js → fixes-60k3ts71.js} +23 -4
- package/build/{index-pfee23kv.js → index-0gp3x2r8.js} +19306 -18954
- package/build/index-861hkebg.js +12 -0
- package/build/{index-y5xpp21a.js → index-acc00eq4.js} +77 -108
- package/build/index-acdgrqa0.js +36 -0
- package/build/{index-e898mdyh.js → index-cfberehw.js} +4 -2
- package/build/{index-wfeb2gcc.js → index-hxn4rk8f.js} +9 -11
- package/build/{index-dw1xbhfn.js → index-vc29b21w.js} +161 -26
- package/build/index-vwt27stc.js +184 -0
- package/build/{issues-qbmdneej.js → issues-1bynat5q.js} +33 -9
- package/build/{logs-xm5vbymy.js → logs-e78vx2dy.js} +23 -4
- package/build/{main-3d7dfdvs.js → main-3zneyg7p.js} +93 -17
- package/build/{offesecAgent-re6kt2ff.js → offesecAgent-w9m0svwk.js} +14 -11
- package/build/parse-15kqmy2v.js +207 -0
- package/build/pentest-gpvqpvmd.js +31 -0
- package/build/{pentests-e3rj5845.js → pentests-nq7wa8yb.js} +36 -17
- package/build/{targetedPentest-fs0v570s.js → targetedPentest-fjxqn089.js} +15 -12
- package/build/threatModel-9yqx7d7x.js +29 -0
- package/build/{uninstall-qb2xbh2t.js → uninstall-9zbf4cwc.js} +6 -4
- package/build/{utils-jf52rmrb.js → utils-dh1t2r1e.js} +13 -10
- package/package.json +86 -88
- package/build/agent-4d8j2jsw.js +0 -278
- package/build/agent-z2s6h7n2.js +0 -19
- package/build/blackboxAgent-j9pczwym.js +0 -19
- package/build/cli-03z6pswp.js +0 -1423
- package/build/cli-0fy9j5dw.js +0 -61
- package/build/cli-asyas1xb.js +0 -110
- package/build/cli-dj1dgw2n.js +0 -190
- package/build/cli-q7r2sth7.js +0 -103
- package/build/cli-vkwch0bc.js +0 -1207
- package/build/cli-wr7g9qcr.js +0 -645
- package/build/index-bz6f8jry.js +0 -32
- package/build/pentest-mfm4hake.js +0 -29
- package/build/projects-qk22qcbt.js +0 -35
- package/build/threatModel-xfvc6cch.js +0 -67
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
import {
|
|
2
2
|
OffensiveSecurityAgent,
|
|
3
|
+
isMemoryEnabled,
|
|
3
4
|
readPlan
|
|
4
|
-
} from "./cli-
|
|
5
|
+
} from "./cli-wfmdch3r.js";
|
|
5
6
|
import {
|
|
6
7
|
exports_external1 as exports_external,
|
|
7
8
|
init_zod
|
|
8
|
-
} from "./cli-
|
|
9
|
+
} from "./cli-e6rgwtpb.js";
|
|
9
10
|
|
|
10
11
|
// src/core/agents/specialized/pentest/agent.ts
|
|
12
|
+
init_zod();
|
|
11
13
|
import { existsSync, readdirSync, readFileSync } from "fs";
|
|
12
14
|
import { join } from "path";
|
|
13
|
-
init_zod();
|
|
14
15
|
var ObjectiveResultSchema = exports_external.object({
|
|
15
16
|
objective: exports_external.string().describe("The objective text, exactly as it was provided or a refined version"),
|
|
16
17
|
completed: exports_external.boolean().describe("true if this objective was thoroughly tested and can be considered done for this endpoint; false if it still needs further testing in future runs"),
|
|
@@ -42,11 +43,14 @@ class TargetedPentestAgent extends OffensiveSecurityAgent {
|
|
|
42
43
|
messages,
|
|
43
44
|
context,
|
|
44
45
|
environmentVariables,
|
|
45
|
-
enableThinking
|
|
46
|
+
enableThinking,
|
|
47
|
+
openAIReasoningEffort,
|
|
48
|
+
role = "orchestrator",
|
|
49
|
+
browserSession
|
|
46
50
|
} = opts;
|
|
47
51
|
super({
|
|
48
|
-
system: buildPentestSystemPrompt(session),
|
|
49
|
-
prompt: buildPentestPrompt(target, objectives, session, findingsRegistry, context, environmentVariables ? Object.keys(environmentVariables) : undefined, subagentId),
|
|
52
|
+
system: buildPentestSystemPrompt(session, role),
|
|
53
|
+
prompt: buildPentestPrompt(target, objectives, session, findingsRegistry, context, environmentVariables ? Object.keys(environmentVariables) : undefined, subagentId, role),
|
|
50
54
|
model,
|
|
51
55
|
session,
|
|
52
56
|
target,
|
|
@@ -61,29 +65,9 @@ class TargetedPentestAgent extends OffensiveSecurityAgent {
|
|
|
61
65
|
messages,
|
|
62
66
|
environmentVariables,
|
|
63
67
|
enableThinking,
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
"document_vulnerability",
|
|
68
|
-
"response",
|
|
69
|
-
"browser_navigate",
|
|
70
|
-
"browser_snapshot",
|
|
71
|
-
"browser_screenshot",
|
|
72
|
-
"browser_click",
|
|
73
|
-
"browser_fill",
|
|
74
|
-
"email_list_inboxes",
|
|
75
|
-
"email_list_messages",
|
|
76
|
-
"email_search_messages",
|
|
77
|
-
"email_get_message",
|
|
78
|
-
"send_email",
|
|
79
|
-
"list_memories",
|
|
80
|
-
"get_memory",
|
|
81
|
-
"add_memory",
|
|
82
|
-
"web_search",
|
|
83
|
-
"get_page",
|
|
84
|
-
"checkpoint_state",
|
|
85
|
-
...session.config?.taskDriven ? ["create_task", "update_task", "list_tasks"] : []
|
|
86
|
-
],
|
|
68
|
+
openAIReasoningEffort,
|
|
69
|
+
browserSession,
|
|
70
|
+
activeTools: buildPentestActiveTools(role, session),
|
|
87
71
|
responseSchema: PentestResponseSchema,
|
|
88
72
|
resolveResult: async (streamResult) => {
|
|
89
73
|
let objectiveResults;
|
|
@@ -125,8 +109,14 @@ var SECTION_DOCUMENT_VULNERABILITY_RULES = `CRITICAL — document_vulnerability
|
|
|
125
109
|
- POC must exit 0 on success (vulnerability confirmed), non-zero on failure
|
|
126
110
|
- If the POC fails or the validation judge rejects the finding, revise your pocContent and call again
|
|
127
111
|
- NEVER use document_vulnerability for: positive observations (e.g. "authentication is working correctly"), testing limitations (e.g. "rate limiting prevented testing"), informational notes, infrastructure observations, or anything that is not a real exploitable vulnerability
|
|
112
|
+
- Before calling document_vulnerability, be able to state: the exploit path, the material security impact, the non-public asset or abuse path affected, and why common false-positive traps do not apply
|
|
128
113
|
- If you were unable to confirm or exploit a vulnerability, do NOT document it — instead describe it in your final response summary
|
|
129
114
|
- It is completely acceptable to finish a test with zero documented vulnerabilities if none were found`;
|
|
115
|
+
var SECTION_MATERIALITY_GUIDANCE = `Materiality & False-Positive Discipline:
|
|
116
|
+
- Preserve the user's stated objective as the organizing constraint. Do not turn a narrow false-positive check into a broad vulnerability hunt unless the user explicitly asked for a full sweep.
|
|
117
|
+
- Public or intentionally unauthenticated endpoints are not vulnerable merely because browsers can request them, CORS is absent/irrelevant, or rate limiting is not visible. Document only if you prove access to non-public data, credentialed cross-origin abuse, state change, denial of service, account takeover, or another material exploit chain.
|
|
118
|
+
- Missing HTTPS, missing security headers, verbose generic errors, public identifiers, demo/training-app behavior, and best-practice gaps are not findings by themselves. Summarize them as non-findings unless you prove material exploitability.
|
|
119
|
+
- A completed objective may have zero findings. If testing shows materiality is not met, finish with a clear no-finding explanation instead of probing unrelated endpoints.`;
|
|
130
120
|
var SECTION_RATE_LIMITING = `Rate Limiting:
|
|
131
121
|
- If you encounter rate limiting (HTTP 429), use exponential backoff before retrying
|
|
132
122
|
- Use execute_command with "sleep N" where N increases: 5 seconds, then 30 seconds, then 120 seconds
|
|
@@ -228,6 +218,8 @@ ${SECTION_RATE_LIMITING_TESTING}
|
|
|
228
218
|
|
|
229
219
|
${SECTION_DOCUMENT_VULNERABILITY_RULES}
|
|
230
220
|
|
|
221
|
+
${SECTION_MATERIALITY_GUIDANCE}
|
|
222
|
+
|
|
231
223
|
${SECTION_POC_PORTABILITY}
|
|
232
224
|
|
|
233
225
|
${SECTION_BROWSER_INTERACTION}
|
|
@@ -259,6 +251,7 @@ Guidelines:
|
|
|
259
251
|
- Always call list_memories first to check for relevant knowledge before planning your approach
|
|
260
252
|
- State your objectives and plan before executing any attack tools
|
|
261
253
|
- When you confirm a vulnerability that provides internal access, think through what lies behind it and actively explore through the vulnerability to maximize impact
|
|
254
|
+
- Preserve the supplied objectives. Do not broaden a narrow objective into unrelated endpoint testing unless a confirmed finding requires a specific sibling endpoint to demonstrate impact.
|
|
262
255
|
- Be methodical and thorough — test one payload at a time and observe the response
|
|
263
256
|
- Use execute_command for crafting/running exploit scripts and http_request for targeted web tests
|
|
264
257
|
- Call document_vulnerability with your POC script inline (in pocContent) to exploit, validate, and document in one step — you can document multiple vulnerabilities in a single run
|
|
@@ -269,6 +262,18 @@ Guidelines:
|
|
|
269
262
|
|
|
270
263
|
${SECTION_AUTHENTICATION}
|
|
271
264
|
|
|
265
|
+
${SECTION_DOCUMENT_VULNERABILITY_RULES}
|
|
266
|
+
|
|
267
|
+
${SECTION_MATERIALITY_GUIDANCE}
|
|
268
|
+
|
|
269
|
+
${SECTION_RATE_LIMITING}
|
|
270
|
+
|
|
271
|
+
${SECTION_RATE_LIMITING_TESTING}
|
|
272
|
+
|
|
273
|
+
${SECTION_CREDENTIAL_DISCOVERY}
|
|
274
|
+
|
|
275
|
+
${SECTION_SECURITY_HEADERS_CORS}
|
|
276
|
+
|
|
272
277
|
${SECTION_STATE_CHECKPOINTING}`;
|
|
273
278
|
var PENTEST_SYSTEM_PROMPT_TASK_DRIVEN = `You are an expert penetration tester performing a targeted security assessment.
|
|
274
279
|
|
|
@@ -291,6 +296,8 @@ ${SECTION_TASK_COVERAGE_RULES}
|
|
|
291
296
|
|
|
292
297
|
${SECTION_DOCUMENT_VULNERABILITY_RULES}
|
|
293
298
|
|
|
299
|
+
${SECTION_MATERIALITY_GUIDANCE}
|
|
300
|
+
|
|
294
301
|
${SECTION_POC_PORTABILITY}
|
|
295
302
|
|
|
296
303
|
${SECTION_RATE_LIMITING}
|
|
@@ -327,6 +334,8 @@ ${SECTION_TASK_COVERAGE_RULES}
|
|
|
327
334
|
|
|
328
335
|
${SECTION_DOCUMENT_VULNERABILITY_RULES}
|
|
329
336
|
|
|
337
|
+
${SECTION_MATERIALITY_GUIDANCE}
|
|
338
|
+
|
|
330
339
|
${SECTION_POC_PORTABILITY}
|
|
331
340
|
|
|
332
341
|
${SECTION_RATE_LIMITING}
|
|
@@ -342,7 +351,10 @@ ${SECTION_CREDENTIAL_DISCOVERY}
|
|
|
342
351
|
${SECTION_SECURITY_HEADERS_CORS}
|
|
343
352
|
|
|
344
353
|
${SECTION_STATE_CHECKPOINTING}`;
|
|
345
|
-
function buildPentestSystemPrompt(session) {
|
|
354
|
+
function buildPentestSystemPrompt(session, role = "orchestrator") {
|
|
355
|
+
if (role === "orchestrator") {
|
|
356
|
+
return PENTEST_SYSTEM_PROMPT_ORCHESTRATOR;
|
|
357
|
+
}
|
|
346
358
|
const taskDriven = session.config?.taskDriven ?? false;
|
|
347
359
|
const exfilMode = session.config?.exfilMode ?? false;
|
|
348
360
|
if (taskDriven) {
|
|
@@ -350,10 +362,49 @@ function buildPentestSystemPrompt(session) {
|
|
|
350
362
|
}
|
|
351
363
|
return exfilMode ? PENTEST_SYSTEM_PROMPT_EXFIL : PENTEST_SYSTEM_PROMPT_BASE;
|
|
352
364
|
}
|
|
353
|
-
|
|
365
|
+
var SECTION_ORCHESTRATOR_DELEGATION = `Sub-Agent Delegation Rules:
|
|
366
|
+
- You DO NOT call document_vulnerability directly. Findings are documented by the workers you spawn.
|
|
367
|
+
- You DO NOT execute deep exploitation attempts yourself. Your tools (execute_command, http_request, browser_*) are for INITIAL RECON only — fingerprinting, sanity-checking the target, observing baseline behavior.
|
|
368
|
+
- Each spawn_pentest_agent call MUST cover exactly ONE objective from the assignment, plus optional supporting context. Do not batch multiple objectives into one spawn — the UI surfaces each spawn as its own timeline, and per-objective spawns give each worker a clean, focused context window.
|
|
369
|
+
- Target URL propagation: the \`target\` you received already encodes the specific domain + endpoint path the caller wants tested (e.g. https://example.com/api/users/{id}). Forward that EXACT URL into every spawn_pentest_agent call's \`target\` field. Do NOT strip the path back to a bare domain, do NOT swap the path for some other endpoint, and do NOT invent new endpoints — workers do not perform endpoint discovery, they deeply test the path they are given. The only time a worker's \`target\` should differ from yours is when recon surfaced a closely-related sibling endpoint on the same host that belongs to a follow-up objective; even then, send the full URL with the new path, not a bare host.
|
|
370
|
+
- After all per-objective workers complete, spawn ONE final "chain & explore" worker. Pass it: a brief summary of what earlier workers found (or didn't find), plus any anomalous behaviors observed during recon. Its job is to chain confirmed findings into higher-impact attacks AND probe for additional vulnerabilities that fall outside the original objective list. Send it the same endpoint URL unless an earlier worker confirmed a vulnerability on a sibling endpoint that the chain depends on — in which case pass that sibling's full URL.
|
|
371
|
+
- Do not call spawn_pentest_agent before stating your plan in plain text. The plan must be visible to the user as an assistant message, not just inferred from tool calls.
|
|
372
|
+
- Cloned browser session — every worker you spawn gets its OWN isolated Chromium, seeded at spawn time with a snapshot of your current cookies and per-origin localStorage. Practical implications:
|
|
373
|
+
- If authentication is required, log in ONCE in YOUR browser during recon. Every worker you spawn after that will start already authenticated — do NOT instruct workers to re-authenticate. Workers that authenticate themselves only authenticate their own cloned browser, so re-auth wastes turns.
|
|
374
|
+
- Worker browser actions are LOCAL to the worker's clone. A worker's navigations, form fills, \`browser_evaluate\` mutations, and \`localStorage\`/\`sessionStorage\` writes are NOT visible to you or to sibling workers. So workers can fire payloads, trigger alerts, or clobber DOM state without breaking each other or you.
|
|
375
|
+
- Conversely, if you want state to be visible to the next worker, set it up in YOUR browser before spawning. Each worker sees the snapshot of your browser AT THE MOMENT YOU CALL spawn_pentest_agent — later mutations in your browser propagate to subsequent spawns but not to in-flight workers.
|
|
376
|
+
- Worker sessions are torn down when the worker finishes, so any cookies the worker acquired during testing (post-auth flows, OAuth callbacks, etc.) are discarded. If a worker discovers a useful login flow, summarize the credentials in your final response or repeat the flow in YOUR browser before the next spawn.`;
|
|
377
|
+
var PENTEST_SYSTEM_PROMPT_ORCHESTRATOR = `You are the LEAD penetration tester coordinating a focused security assessment.
|
|
378
|
+
|
|
379
|
+
You are given a specific target and a set of objectives. You do NOT directly attempt exploitation. Instead, you plan, recon, then dispatch focused worker sub-agents — one per objective — each of which deeply tests its assigned objective. After fan-out, you dispatch a final worker to chain findings and explore beyond the original objectives.
|
|
380
|
+
|
|
381
|
+
${SECTION_SOURCE_CODE_PROHIBITION}
|
|
382
|
+
|
|
383
|
+
Your methodology:
|
|
384
|
+
1. ORIENT — Call list_memories to review any existing knowledge from previous engagements (target-specific notes, successful techniques, false positive patterns, technology context). Use what you find to shape your plan.
|
|
385
|
+
2. PLAN — State the objectives you have been given and outline your high-level orchestration plan in plain text BEFORE any tool calls. For each objective, briefly state what attack class the worker should focus on (e.g. "Objective 1 → SQL injection, focus on /api/users id parameter"). Output this plan as a text message — not as a tool call.
|
|
386
|
+
3. RECON — Perform LIGHT initial reconnaissance to confirm the target is reachable and understand baseline behavior. Use http_request for a handful of probes, browser_navigate + browser_snapshot to see the surface, and execute_command sparingly. Do NOT begin exploitation here — that is the workers' job. Note any anomalies (unusual error responses, exposed headers, framework fingerprints, surprising endpoint behavior) for the final exploratory worker.
|
|
387
|
+
4. FAN OUT — For EACH objective, call spawn_pentest_agent EXACTLY ONCE. Each spawn dispatches a focused worker that will perform the full PLAN → VERIFY → PREPARE → TEST → EXPLOIT → DOCUMENT loop on its objective. Workers write findings to the shared findings registry — you do NOT need to forward findings between them.
|
|
388
|
+
5. CHAIN & EXPLORE — After all per-objective workers complete, call spawn_pentest_agent ONE FINAL TIME with a synthesized objective that:
|
|
389
|
+
a. Summarizes what earlier workers confirmed or ruled out (so the exploratory worker doesn't re-do their work).
|
|
390
|
+
b. Calls out any anomalies you noticed during recon that nobody investigated.
|
|
391
|
+
c. Directs the worker to chain confirmed findings into higher-impact attack chains AND probe for additional vulnerabilities outside the original objective list (e.g. business logic flaws, race conditions, secondary injection points).
|
|
392
|
+
6. LEARN — Use add_memory to persist reusable learnings from this engagement (target behaviors, effective techniques, false positive patterns, technology fingerprints).
|
|
393
|
+
7. FINISH — Call the response tool with your final summary. Compile objectiveResults from what each worker reported: mark each objective as completed (vulnerability confirmed and documented by a worker, OR conclusively ruled out by a worker), or incomplete (worker failed or could not finish). Include any new objectives discovered by the exploratory worker.
|
|
394
|
+
|
|
395
|
+
${SECTION_ORCHESTRATOR_DELEGATION}
|
|
396
|
+
|
|
397
|
+
${SECTION_AUTHENTICATION}
|
|
398
|
+
|
|
399
|
+
${SECTION_MATERIALITY_GUIDANCE}
|
|
400
|
+
|
|
401
|
+
${SECTION_BROWSER_INTERACTION}
|
|
402
|
+
|
|
403
|
+
${SECTION_STATE_CHECKPOINTING}`;
|
|
404
|
+
function buildPentestPrompt(target, objectives, session, findingsRegistry, context, envVarNames, subagentId, role = "orchestrator") {
|
|
354
405
|
const sessionRootPath = session.rootPath;
|
|
355
406
|
const exfilMode = session.config?.exfilMode ?? false;
|
|
356
|
-
const taskDriven = session.config?.taskDriven ?? false;
|
|
407
|
+
const taskDriven = role === "orchestrator" ? false : session.config?.taskDriven ?? false;
|
|
357
408
|
const outcomeGuidance = session.config?.outcomeGuidance;
|
|
358
409
|
const objectiveList = objectives.map((o, i) => `${i + 1}. ${o}`).join(`
|
|
359
410
|
`);
|
|
@@ -450,7 +501,7 @@ Your target is a flag with the format FLAG{...}. Locate and extract it.
|
|
|
450
501
|
1. Call list_memories to review prior knowledge
|
|
451
502
|
2. Call create_task for each objective × technique combination
|
|
452
503
|
3. Work through tasks: update_task(in_progress) → test → update_task(completed|failed)
|
|
453
|
-
4. Call document_vulnerability for confirmed vulnerabilities
|
|
504
|
+
4. Call document_vulnerability for confirmed vulnerabilities only when material exploitability is proven
|
|
454
505
|
5. When a vulnerability provides internal access, create_task for pivoting and extraction
|
|
455
506
|
6. Call list_tasks to verify all tasks are completed or failed
|
|
456
507
|
7. Use add_memory to save reusable learnings
|
|
@@ -465,7 +516,20 @@ Your target is a flag with the format FLAG{...}. Locate and extract it.
|
|
|
465
516
|
8. Call the response tool only when all tasks are terminal
|
|
466
517
|
|
|
467
518
|
Do NOT discover or enumerate other endpoints or services. Focus exclusively on the target and objectives above.`;
|
|
468
|
-
const
|
|
519
|
+
const orchestratorInstructions = `## Instructions
|
|
520
|
+
1. Call list_memories to review any prior knowledge relevant to this target or engagement.
|
|
521
|
+
2. State the objectives and outline your orchestration plan in plain text BEFORE any tool calls — one bullet per objective, briefly naming the attack class each worker should focus on.
|
|
522
|
+
3. Perform LIGHT initial recon (a handful of http_request probes, browser_navigate + browser_snapshot to see the surface). Do NOT begin exploitation here — that is the workers' job. Note any anomalies you observe for the final exploratory worker.
|
|
523
|
+
4. Call spawn_pentest_agent EXACTLY ONCE PER OBJECTIVE. For every spawn:
|
|
524
|
+
- Set \`target\` to the FULL URL from the assignment above (domain + endpoint path) — pass it through verbatim. Do not strip the path or rewrite the host. Workers do not perform endpoint discovery; they deeply test the path you hand them.
|
|
525
|
+
- Pass the matching objective in the \`objectives\` array (a single-element array).
|
|
526
|
+
- Use the \`context\` field to forward any recon insights specific to that objective. If your earlier browser actions left state the worker should know about (already logged in as X, certain modal already dismissed), call that out in \`context\` — each worker is seeded with a snapshot of YOUR browser's cookies and localStorage at the moment of the spawn call.
|
|
527
|
+
5. After all per-objective workers complete, call spawn_pentest_agent ONE FINAL TIME with a synthesized "chain & explore" objective: summarize what earlier workers confirmed/ruled out, call out unaddressed anomalies, and direct the worker to chain confirmed findings AND probe for additional vulnerabilities outside the original objective list. Send it the same endpoint URL as your assignment unless an earlier worker's confirmed finding on a sibling endpoint is what makes the chain possible.
|
|
528
|
+
6. Use add_memory to save reusable learnings from this engagement.
|
|
529
|
+
7. Call the response tool with your final summary. Compile objectiveResults from worker reports.
|
|
530
|
+
|
|
531
|
+
Do NOT call document_vulnerability — that is the workers' job. Do NOT discover or enumerate other endpoints or services beyond the assigned target.`;
|
|
532
|
+
const instructions = role === "orchestrator" ? orchestratorInstructions : taskDriven ? taskDrivenInstructions : exfilMode ? `## Goal
|
|
469
533
|
Your target is a flag with the format FLAG{...}. Locate and extract it.
|
|
470
534
|
|
|
471
535
|
## Instructions
|
|
@@ -474,10 +538,11 @@ Your target is a flag with the format FLAG{...}. Locate and extract it.
|
|
|
474
538
|
3. Verify the target endpoint is reachable and understand its baseline behavior
|
|
475
539
|
4. For each objective, research and craft targeted payloads appropriate to the technology
|
|
476
540
|
5. Test systematically — vary payloads, encoding, and bypass techniques
|
|
477
|
-
6. When a vulnerability is confirmed, call document_vulnerability with your POC script inline (pocContent) to exploit, validate, and document the finding
|
|
541
|
+
6. When a vulnerability is confirmed and material exploitability is proven, call document_vulnerability with your POC script inline (pocContent) to exploit, validate, and document the finding
|
|
478
542
|
7. When a vulnerability provides internal access, pivot through it to discover reachable services and extract sensitive data
|
|
479
543
|
8. Use add_memory to save reusable learnings from this engagement (effective techniques, target behaviors, technology details)
|
|
480
|
-
9.
|
|
544
|
+
9. Preserve the supplied objectives. Do not broaden a scoped materiality check into unrelated endpoint testing unless a confirmed finding requires it to demonstrate impact.
|
|
545
|
+
10. After testing ALL objectives and extracting the flag, call the response tool with your final summary. Do NOT call response until you have completed all testing and extraction.` : `## Instructions
|
|
481
546
|
1. Call list_memories to review any prior knowledge relevant to this target or engagement
|
|
482
547
|
2. State the objectives and outline your testing plan — describe which techniques and payloads you will use for each objective before executing any tools
|
|
483
548
|
3. Verify the target endpoint is reachable and understand its baseline behavior
|
|
@@ -513,6 +578,42 @@ ${objectiveList}
|
|
|
513
578
|
${outcomeSection}
|
|
514
579
|
${instructions}`;
|
|
515
580
|
}
|
|
581
|
+
var WORKER_RECON_TOOLS = [
|
|
582
|
+
"execute_command",
|
|
583
|
+
"http_request",
|
|
584
|
+
"browser_navigate",
|
|
585
|
+
"browser_snapshot",
|
|
586
|
+
"browser_screenshot",
|
|
587
|
+
"browser_click",
|
|
588
|
+
"browser_fill"
|
|
589
|
+
];
|
|
590
|
+
var SHARED_PENTEST_TOOLS = [
|
|
591
|
+
"response",
|
|
592
|
+
"email_list_inboxes",
|
|
593
|
+
"email_list_messages",
|
|
594
|
+
"email_search_messages",
|
|
595
|
+
"email_get_message",
|
|
596
|
+
"send_email",
|
|
597
|
+
"list_memories",
|
|
598
|
+
"get_memory",
|
|
599
|
+
"add_memory",
|
|
600
|
+
"web_search",
|
|
601
|
+
"get_page",
|
|
602
|
+
"checkpoint_state"
|
|
603
|
+
];
|
|
604
|
+
var MEMORY_TOOL_NAMES = ["add_memory", "list_memories", "get_memory"];
|
|
605
|
+
function buildPentestActiveTools(role, session) {
|
|
606
|
+
const tools = role === "orchestrator" ? [...WORKER_RECON_TOOLS, ...SHARED_PENTEST_TOOLS, "spawn_pentest_agent"] : [
|
|
607
|
+
...WORKER_RECON_TOOLS,
|
|
608
|
+
"document_vulnerability",
|
|
609
|
+
...SHARED_PENTEST_TOOLS,
|
|
610
|
+
...session.config?.taskDriven ? ["create_task", "update_task", "list_tasks"] : []
|
|
611
|
+
];
|
|
612
|
+
if (!isMemoryEnabled()) {
|
|
613
|
+
return tools.filter((t) => !MEMORY_TOOL_NAMES.includes(t));
|
|
614
|
+
}
|
|
615
|
+
return tools;
|
|
616
|
+
}
|
|
516
617
|
function loadFindings(findingsPath) {
|
|
517
618
|
if (!existsSync(findingsPath)) {
|
|
518
619
|
return [];
|
|
@@ -526,5 +627,4 @@ function loadFindings(findingsPath) {
|
|
|
526
627
|
}
|
|
527
628
|
}).filter((f) => f !== null);
|
|
528
629
|
}
|
|
529
|
-
|
|
530
|
-
export { TargetedPentestAgent, buildPentestSystemPrompt };
|
|
630
|
+
export { TargetedPentestAgent, buildPentestSystemPrompt, buildPentestPrompt, buildPentestActiveTools };
|
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
import {
|
|
2
2
|
OffensiveSecurityAgent
|
|
3
|
-
} from "./cli-
|
|
3
|
+
} from "./cli-wfmdch3r.js";
|
|
4
4
|
import {
|
|
5
|
+
init_dist,
|
|
5
6
|
stepCountIs
|
|
6
|
-
} from "./cli-
|
|
7
|
+
} from "./cli-6p7d2k55.js";
|
|
8
|
+
|
|
9
|
+
// src/core/agents/specialized/codeAgent/agent.ts
|
|
10
|
+
init_dist();
|
|
7
11
|
|
|
8
12
|
// src/core/agents/specialized/codeAgent/prompts.ts
|
|
9
13
|
var CODE_AGENT_SYSTEM_PROMPT = `You are an expert coding agent with direct filesystem access. You will be given a specific objective — focus exclusively on completing it.
|
|
@@ -36,6 +40,17 @@ Run shell commands when needed.
|
|
|
36
40
|
- Use for any task that benefits from shell access: build tools, git operations, package managers, linters, etc.
|
|
37
41
|
- Useful for running scripts, checking dependencies, inspecting git history, or any CLI tool.
|
|
38
42
|
|
|
43
|
+
## Whitebox security tools (when investigating vulnerabilities)
|
|
44
|
+
- **profile_codebase** — summarize languages, manifests, scanners, and repo shape; full JSON is written as a session artifact.
|
|
45
|
+
- **query_whitebox_catalog** — pull focused methodology slices (sinks, scanners, review passes) instead of loading a whole playbook into context.
|
|
46
|
+
- **run_code_query** — batched rg / ast-grep / comby searches with bounded output and artifact logs.
|
|
47
|
+
- **run_whitebox_scan** — run installed scanners when available; triage results before treating them as confirmed issues.
|
|
48
|
+
- **create_whitebox_candidate / update_whitebox_candidate / list_whitebox_candidates** — track hypotheses with explicit state and evidence.
|
|
49
|
+
- **start_whitebox_job / poll_whitebox_job / stop_whitebox_job** — bounded long-running jobs (builds, fuzzers) with logs under the session.
|
|
50
|
+
- **read_whitebox_artifact** — read \`logs/whitebox/\` or \`scratchpad/whitebox/\` artifact paths returned by other tools (or legacy job logs by id).
|
|
51
|
+
|
|
52
|
+
Prefer catalog + code_query for sink-first work; use candidates to separate unverified ideas from \`document_vulnerability\`. Do not modify the target repo unless the operator asked you to — keep harnesses and scratch output in the session scratchpad.
|
|
53
|
+
|
|
39
54
|
# Working Approach
|
|
40
55
|
1. **Orient first** — list files and read key entry points to understand the structure before diving in.
|
|
41
56
|
2. **Search, then read** — use grep to locate what you need, then read the relevant files.
|
|
@@ -64,6 +79,7 @@ class CodeAgent extends OffensiveSecurityAgent {
|
|
|
64
79
|
attackSurfaceRegistry,
|
|
65
80
|
excludeTools,
|
|
66
81
|
enableThinking,
|
|
82
|
+
openAIReasoningEffort,
|
|
67
83
|
projectThreatModel
|
|
68
84
|
} = opts;
|
|
69
85
|
let activeTools = [
|
|
@@ -71,6 +87,17 @@ class CodeAgent extends OffensiveSecurityAgent {
|
|
|
71
87
|
"list_files",
|
|
72
88
|
"grep",
|
|
73
89
|
"execute_command",
|
|
90
|
+
"profile_codebase",
|
|
91
|
+
"query_whitebox_catalog",
|
|
92
|
+
"run_code_query",
|
|
93
|
+
"run_whitebox_scan",
|
|
94
|
+
"create_whitebox_candidate",
|
|
95
|
+
"update_whitebox_candidate",
|
|
96
|
+
"list_whitebox_candidates",
|
|
97
|
+
"start_whitebox_job",
|
|
98
|
+
"poll_whitebox_job",
|
|
99
|
+
"stop_whitebox_job",
|
|
100
|
+
"read_whitebox_artifact",
|
|
74
101
|
"http_request",
|
|
75
102
|
"document_app",
|
|
76
103
|
"document_endpoint",
|
|
@@ -97,6 +124,7 @@ class CodeAgent extends OffensiveSecurityAgent {
|
|
|
97
124
|
subagentId,
|
|
98
125
|
attackSurfaceRegistry,
|
|
99
126
|
enableThinking,
|
|
127
|
+
openAIReasoningEffort,
|
|
100
128
|
projectThreatModel,
|
|
101
129
|
stopWhen: stopWhen ?? stepCountIs(1e4),
|
|
102
130
|
activeTools,
|