@pensar/apex 1.8.0 → 1.8.2-canary.fb75c486

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/README.md +11 -0
  2. package/build/agent-6dj1qm50.js +221 -0
  3. package/build/agent-6xr8vpgm.js +28 -0
  4. package/build/agent-x1htbpe3.js +22 -0
  5. package/build/apps-t0gmwc7z.js +446 -0
  6. package/build/{auth-dxjgy41e.js → auth-p4r1m7xq.js} +50 -13
  7. package/build/authentication-je2b0c3w.js +22 -0
  8. package/build/blackboxAgent-a4jnt0y5.js +22 -0
  9. package/build/{blackboxPentest-8ps4yvbk.js → blackboxPentest-b5741n3h.js} +19 -17
  10. package/build/{cli-y61d9433.js → cli-0tnv1vkp.js} +138 -38
  11. package/build/{cli-jg7r7y5n.js → cli-4xb21y6g.js} +30 -2
  12. package/build/{cli-k0tckznm.js → cli-6p7d2k55.js} +39701 -31695
  13. package/build/cli-87zakjb2.js +17 -0
  14. package/build/{authentication-e30mfzbe.js → cli-8frjr68r.js} +11 -18
  15. package/build/cli-8xknm7d9.js +204 -0
  16. package/build/cli-9egg9azd.js +22 -0
  17. package/build/cli-9fsre5pt.js +0 -0
  18. package/build/cli-abbka8n3.js +501 -0
  19. package/build/{cli-3y0dgy56.js → cli-c8131c4q.js} +2 -2
  20. package/build/cli-e08r86zk.js +24 -0
  21. package/build/{cli-0ghkg3w6.js → cli-e6rgwtpb.js} +19950 -18556
  22. package/build/cli-g5h24ny8.js +197 -0
  23. package/build/{cli-nr1cjfr9.js → cli-gtcd5c3f.js} +26 -7
  24. package/build/cli-k0730f59.js +52 -0
  25. package/build/{cli-tp1tqn3k.js → cli-mswm4k81.js} +1 -1
  26. package/build/{cli-m788e4f3.js → cli-q8dfq25x.js} +584 -33
  27. package/build/cli-rhry8mat.js +7213 -0
  28. package/build/{cli-g8t710ew.js → cli-ryy39d77.js} +253 -250
  29. package/build/cli-s1nckt4k.js +20 -0
  30. package/build/{cli-k4hrygff.js → cli-v9ds4jb8.js} +9 -5
  31. package/build/{cli-dqt80sw3.js → cli-w5990vr6.js} +199 -68
  32. package/build/{cli-3w2syxpv.js → cli-wfmdch3r.js} +102695 -104816
  33. package/build/cli.js +351 -280
  34. package/build/config-3bvtf3j8.js +188 -0
  35. package/build/{doctor-8tva8j99.js → doctor-2bkpddws.js} +1 -1
  36. package/build/{fixes-q5bhgxhc.js → fixes-60k3ts71.js} +23 -4
  37. package/build/{index-pfee23kv.js → index-0gp3x2r8.js} +19306 -18954
  38. package/build/index-861hkebg.js +12 -0
  39. package/build/{index-y5xpp21a.js → index-acc00eq4.js} +77 -108
  40. package/build/index-acdgrqa0.js +36 -0
  41. package/build/{index-e898mdyh.js → index-cfberehw.js} +4 -2
  42. package/build/{index-wfeb2gcc.js → index-hxn4rk8f.js} +9 -11
  43. package/build/{index-dw1xbhfn.js → index-vc29b21w.js} +161 -26
  44. package/build/index-vwt27stc.js +184 -0
  45. package/build/{issues-qbmdneej.js → issues-1bynat5q.js} +33 -9
  46. package/build/{logs-xm5vbymy.js → logs-e78vx2dy.js} +23 -4
  47. package/build/{main-3d7dfdvs.js → main-3zneyg7p.js} +93 -17
  48. package/build/{offesecAgent-re6kt2ff.js → offesecAgent-w9m0svwk.js} +14 -11
  49. package/build/parse-15kqmy2v.js +207 -0
  50. package/build/pentest-gpvqpvmd.js +31 -0
  51. package/build/{pentests-e3rj5845.js → pentests-nq7wa8yb.js} +36 -17
  52. package/build/{targetedPentest-fs0v570s.js → targetedPentest-fjxqn089.js} +15 -12
  53. package/build/threatModel-9yqx7d7x.js +29 -0
  54. package/build/{uninstall-qb2xbh2t.js → uninstall-9zbf4cwc.js} +6 -4
  55. package/build/{utils-jf52rmrb.js → utils-dh1t2r1e.js} +13 -10
  56. package/package.json +86 -88
  57. package/build/agent-4d8j2jsw.js +0 -278
  58. package/build/agent-z2s6h7n2.js +0 -19
  59. package/build/blackboxAgent-j9pczwym.js +0 -19
  60. package/build/cli-03z6pswp.js +0 -1423
  61. package/build/cli-0fy9j5dw.js +0 -61
  62. package/build/cli-asyas1xb.js +0 -110
  63. package/build/cli-dj1dgw2n.js +0 -190
  64. package/build/cli-q7r2sth7.js +0 -103
  65. package/build/cli-vkwch0bc.js +0 -1207
  66. package/build/cli-wr7g9qcr.js +0 -645
  67. package/build/index-bz6f8jry.js +0 -32
  68. package/build/pentest-mfm4hake.js +0 -29
  69. package/build/projects-qk22qcbt.js +0 -35
  70. package/build/threatModel-xfvc6cch.js +0 -67
@@ -1,16 +1,17 @@
1
1
  import {
2
2
  OffensiveSecurityAgent,
3
+ isMemoryEnabled,
3
4
  readPlan
4
- } from "./cli-3w2syxpv.js";
5
+ } from "./cli-wfmdch3r.js";
5
6
  import {
6
7
  exports_external1 as exports_external,
7
8
  init_zod
8
- } from "./cli-0ghkg3w6.js";
9
+ } from "./cli-e6rgwtpb.js";
9
10
 
10
11
  // src/core/agents/specialized/pentest/agent.ts
12
+ init_zod();
11
13
  import { existsSync, readdirSync, readFileSync } from "fs";
12
14
  import { join } from "path";
13
- init_zod();
14
15
  var ObjectiveResultSchema = exports_external.object({
15
16
  objective: exports_external.string().describe("The objective text, exactly as it was provided or a refined version"),
16
17
  completed: exports_external.boolean().describe("true if this objective was thoroughly tested and can be considered done for this endpoint; false if it still needs further testing in future runs"),
@@ -42,11 +43,14 @@ class TargetedPentestAgent extends OffensiveSecurityAgent {
42
43
  messages,
43
44
  context,
44
45
  environmentVariables,
45
- enableThinking
46
+ enableThinking,
47
+ openAIReasoningEffort,
48
+ role = "orchestrator",
49
+ browserSession
46
50
  } = opts;
47
51
  super({
48
- system: buildPentestSystemPrompt(session),
49
- prompt: buildPentestPrompt(target, objectives, session, findingsRegistry, context, environmentVariables ? Object.keys(environmentVariables) : undefined, subagentId),
52
+ system: buildPentestSystemPrompt(session, role),
53
+ prompt: buildPentestPrompt(target, objectives, session, findingsRegistry, context, environmentVariables ? Object.keys(environmentVariables) : undefined, subagentId, role),
50
54
  model,
51
55
  session,
52
56
  target,
@@ -61,29 +65,9 @@ class TargetedPentestAgent extends OffensiveSecurityAgent {
61
65
  messages,
62
66
  environmentVariables,
63
67
  enableThinking,
64
- activeTools: [
65
- "execute_command",
66
- "http_request",
67
- "document_vulnerability",
68
- "response",
69
- "browser_navigate",
70
- "browser_snapshot",
71
- "browser_screenshot",
72
- "browser_click",
73
- "browser_fill",
74
- "email_list_inboxes",
75
- "email_list_messages",
76
- "email_search_messages",
77
- "email_get_message",
78
- "send_email",
79
- "list_memories",
80
- "get_memory",
81
- "add_memory",
82
- "web_search",
83
- "get_page",
84
- "checkpoint_state",
85
- ...session.config?.taskDriven ? ["create_task", "update_task", "list_tasks"] : []
86
- ],
68
+ openAIReasoningEffort,
69
+ browserSession,
70
+ activeTools: buildPentestActiveTools(role, session),
87
71
  responseSchema: PentestResponseSchema,
88
72
  resolveResult: async (streamResult) => {
89
73
  let objectiveResults;
@@ -125,8 +109,14 @@ var SECTION_DOCUMENT_VULNERABILITY_RULES = `CRITICAL — document_vulnerability
125
109
  - POC must exit 0 on success (vulnerability confirmed), non-zero on failure
126
110
  - If the POC fails or the validation judge rejects the finding, revise your pocContent and call again
127
111
  - NEVER use document_vulnerability for: positive observations (e.g. "authentication is working correctly"), testing limitations (e.g. "rate limiting prevented testing"), informational notes, infrastructure observations, or anything that is not a real exploitable vulnerability
112
+ - Before calling document_vulnerability, be able to state: the exploit path, the material security impact, the non-public asset or abuse path affected, and why common false-positive traps do not apply
128
113
  - If you were unable to confirm or exploit a vulnerability, do NOT document it — instead describe it in your final response summary
129
114
  - It is completely acceptable to finish a test with zero documented vulnerabilities if none were found`;
115
+ var SECTION_MATERIALITY_GUIDANCE = `Materiality & False-Positive Discipline:
116
+ - Preserve the user's stated objective as the organizing constraint. Do not turn a narrow false-positive check into a broad vulnerability hunt unless the user explicitly asked for a full sweep.
117
+ - Public or intentionally unauthenticated endpoints are not vulnerable merely because browsers can request them, CORS is absent/irrelevant, or rate limiting is not visible. Document only if you prove access to non-public data, credentialed cross-origin abuse, state change, denial of service, account takeover, or another material exploit chain.
118
+ - Missing HTTPS, missing security headers, verbose generic errors, public identifiers, demo/training-app behavior, and best-practice gaps are not findings by themselves. Summarize them as non-findings unless you prove material exploitability.
119
+ - A completed objective may have zero findings. If testing shows materiality is not met, finish with a clear no-finding explanation instead of probing unrelated endpoints.`;
130
120
  var SECTION_RATE_LIMITING = `Rate Limiting:
131
121
  - If you encounter rate limiting (HTTP 429), use exponential backoff before retrying
132
122
  - Use execute_command with "sleep N" where N increases: 5 seconds, then 30 seconds, then 120 seconds
@@ -228,6 +218,8 @@ ${SECTION_RATE_LIMITING_TESTING}
228
218
 
229
219
  ${SECTION_DOCUMENT_VULNERABILITY_RULES}
230
220
 
221
+ ${SECTION_MATERIALITY_GUIDANCE}
222
+
231
223
  ${SECTION_POC_PORTABILITY}
232
224
 
233
225
  ${SECTION_BROWSER_INTERACTION}
@@ -259,6 +251,7 @@ Guidelines:
259
251
  - Always call list_memories first to check for relevant knowledge before planning your approach
260
252
  - State your objectives and plan before executing any attack tools
261
253
  - When you confirm a vulnerability that provides internal access, think through what lies behind it and actively explore through the vulnerability to maximize impact
254
+ - Preserve the supplied objectives. Do not broaden a narrow objective into unrelated endpoint testing unless a confirmed finding requires a specific sibling endpoint to demonstrate impact.
262
255
  - Be methodical and thorough — test one payload at a time and observe the response
263
256
  - Use execute_command for crafting/running exploit scripts and http_request for targeted web tests
264
257
  - Call document_vulnerability with your POC script inline (in pocContent) to exploit, validate, and document in one step — you can document multiple vulnerabilities in a single run
@@ -269,6 +262,18 @@ Guidelines:
269
262
 
270
263
  ${SECTION_AUTHENTICATION}
271
264
 
265
+ ${SECTION_DOCUMENT_VULNERABILITY_RULES}
266
+
267
+ ${SECTION_MATERIALITY_GUIDANCE}
268
+
269
+ ${SECTION_RATE_LIMITING}
270
+
271
+ ${SECTION_RATE_LIMITING_TESTING}
272
+
273
+ ${SECTION_CREDENTIAL_DISCOVERY}
274
+
275
+ ${SECTION_SECURITY_HEADERS_CORS}
276
+
272
277
  ${SECTION_STATE_CHECKPOINTING}`;
273
278
  var PENTEST_SYSTEM_PROMPT_TASK_DRIVEN = `You are an expert penetration tester performing a targeted security assessment.
274
279
 
@@ -291,6 +296,8 @@ ${SECTION_TASK_COVERAGE_RULES}
291
296
 
292
297
  ${SECTION_DOCUMENT_VULNERABILITY_RULES}
293
298
 
299
+ ${SECTION_MATERIALITY_GUIDANCE}
300
+
294
301
  ${SECTION_POC_PORTABILITY}
295
302
 
296
303
  ${SECTION_RATE_LIMITING}
@@ -327,6 +334,8 @@ ${SECTION_TASK_COVERAGE_RULES}
327
334
 
328
335
  ${SECTION_DOCUMENT_VULNERABILITY_RULES}
329
336
 
337
+ ${SECTION_MATERIALITY_GUIDANCE}
338
+
330
339
  ${SECTION_POC_PORTABILITY}
331
340
 
332
341
  ${SECTION_RATE_LIMITING}
@@ -342,7 +351,10 @@ ${SECTION_CREDENTIAL_DISCOVERY}
342
351
  ${SECTION_SECURITY_HEADERS_CORS}
343
352
 
344
353
  ${SECTION_STATE_CHECKPOINTING}`;
345
- function buildPentestSystemPrompt(session) {
354
+ function buildPentestSystemPrompt(session, role = "orchestrator") {
355
+ if (role === "orchestrator") {
356
+ return PENTEST_SYSTEM_PROMPT_ORCHESTRATOR;
357
+ }
346
358
  const taskDriven = session.config?.taskDriven ?? false;
347
359
  const exfilMode = session.config?.exfilMode ?? false;
348
360
  if (taskDriven) {
@@ -350,10 +362,49 @@ function buildPentestSystemPrompt(session) {
350
362
  }
351
363
  return exfilMode ? PENTEST_SYSTEM_PROMPT_EXFIL : PENTEST_SYSTEM_PROMPT_BASE;
352
364
  }
353
- function buildPentestPrompt(target, objectives, session, findingsRegistry, context, envVarNames, subagentId) {
365
+ var SECTION_ORCHESTRATOR_DELEGATION = `Sub-Agent Delegation Rules:
366
+ - You DO NOT call document_vulnerability directly. Findings are documented by the workers you spawn.
367
+ - You DO NOT execute deep exploitation attempts yourself. Your tools (execute_command, http_request, browser_*) are for INITIAL RECON only — fingerprinting, sanity-checking the target, observing baseline behavior.
368
+ - Each spawn_pentest_agent call MUST cover exactly ONE objective from the assignment, plus optional supporting context. Do not batch multiple objectives into one spawn — the UI surfaces each spawn as its own timeline, and per-objective spawns give each worker a clean, focused context window.
369
+ - Target URL propagation: the \`target\` you received already encodes the specific domain + endpoint path the caller wants tested (e.g. https://example.com/api/users/{id}). Forward that EXACT URL into every spawn_pentest_agent call's \`target\` field. Do NOT strip the path back to a bare domain, do NOT swap the path for some other endpoint, and do NOT invent new endpoints — workers do not perform endpoint discovery, they deeply test the path they are given. The only time a worker's \`target\` should differ from yours is when recon surfaced a closely-related sibling endpoint on the same host that belongs to a follow-up objective; even then, send the full URL with the new path, not a bare host.
370
+ - After all per-objective workers complete, spawn ONE final "chain & explore" worker. Pass it: a brief summary of what earlier workers found (or didn't find), plus any anomalous behaviors observed during recon. Its job is to chain confirmed findings into higher-impact attacks AND probe for additional vulnerabilities that fall outside the original objective list. Send it the same endpoint URL unless an earlier worker confirmed a vulnerability on a sibling endpoint that the chain depends on — in which case pass that sibling's full URL.
371
+ - Do not call spawn_pentest_agent before stating your plan in plain text. The plan must be visible to the user as an assistant message, not just inferred from tool calls.
372
+ - Cloned browser session — every worker you spawn gets its OWN isolated Chromium, seeded at spawn time with a snapshot of your current cookies and per-origin localStorage. Practical implications:
373
+ - If authentication is required, log in ONCE in YOUR browser during recon. Every worker you spawn after that will start already authenticated — do NOT instruct workers to re-authenticate. Workers that authenticate themselves only authenticate their own cloned browser, so re-auth wastes turns.
374
+ - Worker browser actions are LOCAL to the worker's clone. A worker's navigations, form fills, \`browser_evaluate\` mutations, and \`localStorage\`/\`sessionStorage\` writes are NOT visible to you or to sibling workers. So workers can fire payloads, trigger alerts, or clobber DOM state without breaking each other or you.
375
+ - Conversely, if you want state to be visible to the next worker, set it up in YOUR browser before spawning. Each worker sees the snapshot of your browser AT THE MOMENT YOU CALL spawn_pentest_agent — later mutations in your browser propagate to subsequent spawns but not to in-flight workers.
376
+ - Worker sessions are torn down when the worker finishes, so any cookies the worker acquired during testing (post-auth flows, OAuth callbacks, etc.) are discarded. If a worker discovers a useful login flow, summarize the credentials in your final response or repeat the flow in YOUR browser before the next spawn.`;
377
+ var PENTEST_SYSTEM_PROMPT_ORCHESTRATOR = `You are the LEAD penetration tester coordinating a focused security assessment.
378
+
379
+ You are given a specific target and a set of objectives. You do NOT directly attempt exploitation. Instead, you plan, recon, then dispatch focused worker sub-agents — one per objective — each of which deeply tests its assigned objective. After fan-out, you dispatch a final worker to chain findings and explore beyond the original objectives.
380
+
381
+ ${SECTION_SOURCE_CODE_PROHIBITION}
382
+
383
+ Your methodology:
384
+ 1. ORIENT — Call list_memories to review any existing knowledge from previous engagements (target-specific notes, successful techniques, false positive patterns, technology context). Use what you find to shape your plan.
385
+ 2. PLAN — State the objectives you have been given and outline your high-level orchestration plan in plain text BEFORE any tool calls. For each objective, briefly state what attack class the worker should focus on (e.g. "Objective 1 → SQL injection, focus on /api/users id parameter"). Output this plan as a text message — not as a tool call.
386
+ 3. RECON — Perform LIGHT initial reconnaissance to confirm the target is reachable and understand baseline behavior. Use http_request for a handful of probes, browser_navigate + browser_snapshot to see the surface, and execute_command sparingly. Do NOT begin exploitation here — that is the workers' job. Note any anomalies (unusual error responses, exposed headers, framework fingerprints, surprising endpoint behavior) for the final exploratory worker.
387
+ 4. FAN OUT — For EACH objective, call spawn_pentest_agent EXACTLY ONCE. Each spawn dispatches a focused worker that will perform the full PLAN → VERIFY → PREPARE → TEST → EXPLOIT → DOCUMENT loop on its objective. Workers write findings to the shared findings registry — you do NOT need to forward findings between them.
388
+ 5. CHAIN & EXPLORE — After all per-objective workers complete, call spawn_pentest_agent ONE FINAL TIME with a synthesized objective that:
389
+ a. Summarizes what earlier workers confirmed or ruled out (so the exploratory worker doesn't re-do their work).
390
+ b. Calls out any anomalies you noticed during recon that nobody investigated.
391
+ c. Directs the worker to chain confirmed findings into higher-impact attack chains AND probe for additional vulnerabilities outside the original objective list (e.g. business logic flaws, race conditions, secondary injection points).
392
+ 6. LEARN — Use add_memory to persist reusable learnings from this engagement (target behaviors, effective techniques, false positive patterns, technology fingerprints).
393
+ 7. FINISH — Call the response tool with your final summary. Compile objectiveResults from what each worker reported: mark each objective as completed (vulnerability confirmed and documented by a worker, OR conclusively ruled out by a worker), or incomplete (worker failed or could not finish). Include any new objectives discovered by the exploratory worker.
394
+
395
+ ${SECTION_ORCHESTRATOR_DELEGATION}
396
+
397
+ ${SECTION_AUTHENTICATION}
398
+
399
+ ${SECTION_MATERIALITY_GUIDANCE}
400
+
401
+ ${SECTION_BROWSER_INTERACTION}
402
+
403
+ ${SECTION_STATE_CHECKPOINTING}`;
404
+ function buildPentestPrompt(target, objectives, session, findingsRegistry, context, envVarNames, subagentId, role = "orchestrator") {
354
405
  const sessionRootPath = session.rootPath;
355
406
  const exfilMode = session.config?.exfilMode ?? false;
356
- const taskDriven = session.config?.taskDriven ?? false;
407
+ const taskDriven = role === "orchestrator" ? false : session.config?.taskDriven ?? false;
357
408
  const outcomeGuidance = session.config?.outcomeGuidance;
358
409
  const objectiveList = objectives.map((o, i) => `${i + 1}. ${o}`).join(`
359
410
  `);
@@ -450,7 +501,7 @@ Your target is a flag with the format FLAG{...}. Locate and extract it.
450
501
  1. Call list_memories to review prior knowledge
451
502
  2. Call create_task for each objective × technique combination
452
503
  3. Work through tasks: update_task(in_progress) → test → update_task(completed|failed)
453
- 4. Call document_vulnerability for confirmed vulnerabilities
504
+ 4. Call document_vulnerability for confirmed vulnerabilities only when material exploitability is proven
454
505
  5. When a vulnerability provides internal access, create_task for pivoting and extraction
455
506
  6. Call list_tasks to verify all tasks are completed or failed
456
507
  7. Use add_memory to save reusable learnings
@@ -465,7 +516,20 @@ Your target is a flag with the format FLAG{...}. Locate and extract it.
465
516
  8. Call the response tool only when all tasks are terminal
466
517
 
467
518
  Do NOT discover or enumerate other endpoints or services. Focus exclusively on the target and objectives above.`;
468
- const instructions = taskDriven ? taskDrivenInstructions : exfilMode ? `## Goal
519
+ const orchestratorInstructions = `## Instructions
520
+ 1. Call list_memories to review any prior knowledge relevant to this target or engagement.
521
+ 2. State the objectives and outline your orchestration plan in plain text BEFORE any tool calls — one bullet per objective, briefly naming the attack class each worker should focus on.
522
+ 3. Perform LIGHT initial recon (a handful of http_request probes, browser_navigate + browser_snapshot to see the surface). Do NOT begin exploitation here — that is the workers' job. Note any anomalies you observe for the final exploratory worker.
523
+ 4. Call spawn_pentest_agent EXACTLY ONCE PER OBJECTIVE. For every spawn:
524
+ - Set \`target\` to the FULL URL from the assignment above (domain + endpoint path) — pass it through verbatim. Do not strip the path or rewrite the host. Workers do not perform endpoint discovery; they deeply test the path you hand them.
525
+ - Pass the matching objective in the \`objectives\` array (a single-element array).
526
+ - Use the \`context\` field to forward any recon insights specific to that objective. If your earlier browser actions left state the worker should know about (already logged in as X, certain modal already dismissed), call that out in \`context\` — each worker is seeded with a snapshot of YOUR browser's cookies and localStorage at the moment of the spawn call.
527
+ 5. After all per-objective workers complete, call spawn_pentest_agent ONE FINAL TIME with a synthesized "chain & explore" objective: summarize what earlier workers confirmed/ruled out, call out unaddressed anomalies, and direct the worker to chain confirmed findings AND probe for additional vulnerabilities outside the original objective list. Send it the same endpoint URL as your assignment unless an earlier worker's confirmed finding on a sibling endpoint is what makes the chain possible.
528
+ 6. Use add_memory to save reusable learnings from this engagement.
529
+ 7. Call the response tool with your final summary. Compile objectiveResults from worker reports.
530
+
531
+ Do NOT call document_vulnerability — that is the workers' job. Do NOT discover or enumerate other endpoints or services beyond the assigned target.`;
532
+ const instructions = role === "orchestrator" ? orchestratorInstructions : taskDriven ? taskDrivenInstructions : exfilMode ? `## Goal
469
533
  Your target is a flag with the format FLAG{...}. Locate and extract it.
470
534
 
471
535
  ## Instructions
@@ -474,10 +538,11 @@ Your target is a flag with the format FLAG{...}. Locate and extract it.
474
538
  3. Verify the target endpoint is reachable and understand its baseline behavior
475
539
  4. For each objective, research and craft targeted payloads appropriate to the technology
476
540
  5. Test systematically — vary payloads, encoding, and bypass techniques
477
- 6. When a vulnerability is confirmed, call document_vulnerability with your POC script inline (pocContent) to exploit, validate, and document the finding
541
+ 6. When a vulnerability is confirmed and material exploitability is proven, call document_vulnerability with your POC script inline (pocContent) to exploit, validate, and document the finding
478
542
  7. When a vulnerability provides internal access, pivot through it to discover reachable services and extract sensitive data
479
543
  8. Use add_memory to save reusable learnings from this engagement (effective techniques, target behaviors, technology details)
480
- 9. After testing ALL objectives and extracting the flag, call the response tool with your final summary. Do NOT call response until you have completed all testing and extraction.` : `## Instructions
544
+ 9. Preserve the supplied objectives. Do not broaden a scoped materiality check into unrelated endpoint testing unless a confirmed finding requires it to demonstrate impact.
545
+ 10. After testing ALL objectives and extracting the flag, call the response tool with your final summary. Do NOT call response until you have completed all testing and extraction.` : `## Instructions
481
546
  1. Call list_memories to review any prior knowledge relevant to this target or engagement
482
547
  2. State the objectives and outline your testing plan — describe which techniques and payloads you will use for each objective before executing any tools
483
548
  3. Verify the target endpoint is reachable and understand its baseline behavior
@@ -513,6 +578,42 @@ ${objectiveList}
513
578
  ${outcomeSection}
514
579
  ${instructions}`;
515
580
  }
581
+ var WORKER_RECON_TOOLS = [
582
+ "execute_command",
583
+ "http_request",
584
+ "browser_navigate",
585
+ "browser_snapshot",
586
+ "browser_screenshot",
587
+ "browser_click",
588
+ "browser_fill"
589
+ ];
590
+ var SHARED_PENTEST_TOOLS = [
591
+ "response",
592
+ "email_list_inboxes",
593
+ "email_list_messages",
594
+ "email_search_messages",
595
+ "email_get_message",
596
+ "send_email",
597
+ "list_memories",
598
+ "get_memory",
599
+ "add_memory",
600
+ "web_search",
601
+ "get_page",
602
+ "checkpoint_state"
603
+ ];
604
+ var MEMORY_TOOL_NAMES = ["add_memory", "list_memories", "get_memory"];
605
+ function buildPentestActiveTools(role, session) {
606
+ const tools = role === "orchestrator" ? [...WORKER_RECON_TOOLS, ...SHARED_PENTEST_TOOLS, "spawn_pentest_agent"] : [
607
+ ...WORKER_RECON_TOOLS,
608
+ "document_vulnerability",
609
+ ...SHARED_PENTEST_TOOLS,
610
+ ...session.config?.taskDriven ? ["create_task", "update_task", "list_tasks"] : []
611
+ ];
612
+ if (!isMemoryEnabled()) {
613
+ return tools.filter((t) => !MEMORY_TOOL_NAMES.includes(t));
614
+ }
615
+ return tools;
616
+ }
516
617
  function loadFindings(findingsPath) {
517
618
  if (!existsSync(findingsPath)) {
518
619
  return [];
@@ -526,5 +627,4 @@ function loadFindings(findingsPath) {
526
627
  }
527
628
  }).filter((f) => f !== null);
528
629
  }
529
-
530
- export { TargetedPentestAgent, buildPentestSystemPrompt };
630
+ export { TargetedPentestAgent, buildPentestSystemPrompt, buildPentestPrompt, buildPentestActiveTools };
@@ -1,9 +1,13 @@
1
1
  import {
2
2
  OffensiveSecurityAgent
3
- } from "./cli-3w2syxpv.js";
3
+ } from "./cli-wfmdch3r.js";
4
4
  import {
5
+ init_dist,
5
6
  stepCountIs
6
- } from "./cli-k0tckznm.js";
7
+ } from "./cli-6p7d2k55.js";
8
+
9
+ // src/core/agents/specialized/codeAgent/agent.ts
10
+ init_dist();
7
11
 
8
12
  // src/core/agents/specialized/codeAgent/prompts.ts
9
13
  var CODE_AGENT_SYSTEM_PROMPT = `You are an expert coding agent with direct filesystem access. You will be given a specific objective — focus exclusively on completing it.
@@ -36,6 +40,17 @@ Run shell commands when needed.
36
40
  - Use for any task that benefits from shell access: build tools, git operations, package managers, linters, etc.
37
41
  - Useful for running scripts, checking dependencies, inspecting git history, or any CLI tool.
38
42
 
43
+ ## Whitebox security tools (when investigating vulnerabilities)
44
+ - **profile_codebase** — summarize languages, manifests, scanners, and repo shape; full JSON is written as a session artifact.
45
+ - **query_whitebox_catalog** — pull focused methodology slices (sinks, scanners, review passes) instead of loading a whole playbook into context.
46
+ - **run_code_query** — batched rg / ast-grep / comby searches with bounded output and artifact logs.
47
+ - **run_whitebox_scan** — run installed scanners when available; triage results before treating them as confirmed issues.
48
+ - **create_whitebox_candidate / update_whitebox_candidate / list_whitebox_candidates** — track hypotheses with explicit state and evidence.
49
+ - **start_whitebox_job / poll_whitebox_job / stop_whitebox_job** — bounded long-running jobs (builds, fuzzers) with logs under the session.
50
+ - **read_whitebox_artifact** — read \`logs/whitebox/\` or \`scratchpad/whitebox/\` artifact paths returned by other tools (or legacy job logs by id).
51
+
52
+ Prefer catalog + code_query for sink-first work; use candidates to separate unverified ideas from \`document_vulnerability\`. Do not modify the target repo unless the operator asked you to — keep harnesses and scratch output in the session scratchpad.
53
+
39
54
  # Working Approach
40
55
  1. **Orient first** — list files and read key entry points to understand the structure before diving in.
41
56
  2. **Search, then read** — use grep to locate what you need, then read the relevant files.
@@ -64,6 +79,7 @@ class CodeAgent extends OffensiveSecurityAgent {
64
79
  attackSurfaceRegistry,
65
80
  excludeTools,
66
81
  enableThinking,
82
+ openAIReasoningEffort,
67
83
  projectThreatModel
68
84
  } = opts;
69
85
  let activeTools = [
@@ -71,6 +87,17 @@ class CodeAgent extends OffensiveSecurityAgent {
71
87
  "list_files",
72
88
  "grep",
73
89
  "execute_command",
90
+ "profile_codebase",
91
+ "query_whitebox_catalog",
92
+ "run_code_query",
93
+ "run_whitebox_scan",
94
+ "create_whitebox_candidate",
95
+ "update_whitebox_candidate",
96
+ "list_whitebox_candidates",
97
+ "start_whitebox_job",
98
+ "poll_whitebox_job",
99
+ "stop_whitebox_job",
100
+ "read_whitebox_artifact",
74
101
  "http_request",
75
102
  "document_app",
76
103
  "document_endpoint",
@@ -97,6 +124,7 @@ class CodeAgent extends OffensiveSecurityAgent {
97
124
  subagentId,
98
125
  attackSurfaceRegistry,
99
126
  enableThinking,
127
+ openAIReasoningEffort,
100
128
  projectThreatModel,
101
129
  stopWhen: stopWhen ?? stepCountIs(1e4),
102
130
  activeTools,