npm - @pensar/apex - Versions diffs - 1.8.0 → 1.8.2-canary.fb75c486 - Mend

@pensar/apex 1.8.0 → 1.8.2-canary.fb75c486

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

package/README.md +11 -0
package/build/agent-6dj1qm50.js +221 -0
package/build/agent-6xr8vpgm.js +28 -0
package/build/agent-x1htbpe3.js +22 -0
package/build/apps-t0gmwc7z.js +446 -0
package/build/{auth-dxjgy41e.js → auth-p4r1m7xq.js} +50 -13
package/build/authentication-je2b0c3w.js +22 -0
package/build/blackboxAgent-a4jnt0y5.js +22 -0
package/build/{blackboxPentest-8ps4yvbk.js → blackboxPentest-b5741n3h.js} +19 -17
package/build/{cli-y61d9433.js → cli-0tnv1vkp.js} +138 -38
package/build/{cli-jg7r7y5n.js → cli-4xb21y6g.js} +30 -2
package/build/{cli-k0tckznm.js → cli-6p7d2k55.js} +39701 -31695
package/build/cli-87zakjb2.js +17 -0
package/build/{authentication-e30mfzbe.js → cli-8frjr68r.js} +11 -18
package/build/cli-8xknm7d9.js +204 -0
package/build/cli-9egg9azd.js +22 -0
package/build/cli-9fsre5pt.js +0 -0
package/build/cli-abbka8n3.js +501 -0
package/build/{cli-3y0dgy56.js → cli-c8131c4q.js} +2 -2
package/build/cli-e08r86zk.js +24 -0
package/build/{cli-0ghkg3w6.js → cli-e6rgwtpb.js} +19950 -18556
package/build/cli-g5h24ny8.js +197 -0
package/build/{cli-nr1cjfr9.js → cli-gtcd5c3f.js} +26 -7
package/build/cli-k0730f59.js +52 -0
package/build/{cli-tp1tqn3k.js → cli-mswm4k81.js} +1 -1
package/build/{cli-m788e4f3.js → cli-q8dfq25x.js} +584 -33
package/build/cli-rhry8mat.js +7213 -0
package/build/{cli-g8t710ew.js → cli-ryy39d77.js} +253 -250
package/build/cli-s1nckt4k.js +20 -0
package/build/{cli-k4hrygff.js → cli-v9ds4jb8.js} +9 -5
package/build/{cli-dqt80sw3.js → cli-w5990vr6.js} +199 -68
package/build/{cli-3w2syxpv.js → cli-wfmdch3r.js} +102695 -104816
package/build/cli.js +351 -280
package/build/config-3bvtf3j8.js +188 -0
package/build/{doctor-8tva8j99.js → doctor-2bkpddws.js} +1 -1
package/build/{fixes-q5bhgxhc.js → fixes-60k3ts71.js} +23 -4
package/build/{index-pfee23kv.js → index-0gp3x2r8.js} +19306 -18954
package/build/index-861hkebg.js +12 -0
package/build/{index-y5xpp21a.js → index-acc00eq4.js} +77 -108
package/build/index-acdgrqa0.js +36 -0
package/build/{index-e898mdyh.js → index-cfberehw.js} +4 -2
package/build/{index-wfeb2gcc.js → index-hxn4rk8f.js} +9 -11
package/build/{index-dw1xbhfn.js → index-vc29b21w.js} +161 -26
package/build/index-vwt27stc.js +184 -0
package/build/{issues-qbmdneej.js → issues-1bynat5q.js} +33 -9
package/build/{logs-xm5vbymy.js → logs-e78vx2dy.js} +23 -4
package/build/{main-3d7dfdvs.js → main-3zneyg7p.js} +93 -17
package/build/{offesecAgent-re6kt2ff.js → offesecAgent-w9m0svwk.js} +14 -11
package/build/parse-15kqmy2v.js +207 -0
package/build/pentest-gpvqpvmd.js +31 -0
package/build/{pentests-e3rj5845.js → pentests-nq7wa8yb.js} +36 -17
package/build/{targetedPentest-fs0v570s.js → targetedPentest-fjxqn089.js} +15 -12
package/build/threatModel-9yqx7d7x.js +29 -0
package/build/{uninstall-qb2xbh2t.js → uninstall-9zbf4cwc.js} +6 -4
package/build/{utils-jf52rmrb.js → utils-dh1t2r1e.js} +13 -10
package/package.json +86 -88
package/build/agent-4d8j2jsw.js +0 -278
package/build/agent-z2s6h7n2.js +0 -19
package/build/blackboxAgent-j9pczwym.js +0 -19
package/build/cli-03z6pswp.js +0 -1423
package/build/cli-0fy9j5dw.js +0 -61
package/build/cli-asyas1xb.js +0 -110
package/build/cli-dj1dgw2n.js +0 -190
package/build/cli-q7r2sth7.js +0 -103
package/build/cli-vkwch0bc.js +0 -1207
package/build/cli-wr7g9qcr.js +0 -645
package/build/index-bz6f8jry.js +0 -32
package/build/pentest-mfm4hake.js +0 -29
package/build/projects-qk22qcbt.js +0 -35
package/build/threatModel-xfvc6cch.js +0 -67

package/build/{cli-y61d9433.js → cli-0tnv1vkp.js} RENAMED Viewed

@@ -1,16 +1,17 @@
 import {
   OffensiveSecurityAgent,
+  isMemoryEnabled,
   readPlan
-} from "./cli-3w2syxpv.js";
+} from "./cli-wfmdch3r.js";
 import {
   exports_external1 as exports_external,
   init_zod
-} from "./cli-0ghkg3w6.js";
+} from "./cli-e6rgwtpb.js";
 // src/core/agents/specialized/pentest/agent.ts
+init_zod();
 import { existsSync, readdirSync, readFileSync } from "fs";
 import { join } from "path";
-init_zod();
 var ObjectiveResultSchema = exports_external.object({
   objective: exports_external.string().describe("The objective text, exactly as it was provided or a refined version"),
   completed: exports_external.boolean().describe("true if this objective was thoroughly tested and can be considered done for this endpoint; false if it still needs further testing in future runs"),
@@ -42,11 +43,14 @@ class TargetedPentestAgent extends OffensiveSecurityAgent {
       messages,
       context,
       environmentVariables,
-      enableThinking
+      enableThinking,
+      openAIReasoningEffort,
+      role = "orchestrator",
+      browserSession
     } = opts;
     super({
-      system: buildPentestSystemPrompt(session),
-      prompt: buildPentestPrompt(target, objectives, session, findingsRegistry, context, environmentVariables ? Object.keys(environmentVariables) : undefined, subagentId),
+      system: buildPentestSystemPrompt(session, role),
+      prompt: buildPentestPrompt(target, objectives, session, findingsRegistry, context, environmentVariables ? Object.keys(environmentVariables) : undefined, subagentId, role),
       model,
       session,
       target,
@@ -61,29 +65,9 @@ class TargetedPentestAgent extends OffensiveSecurityAgent {
       messages,
       environmentVariables,
       enableThinking,
-      activeTools: [
-        "execute_command",
-        "http_request",
-        "document_vulnerability",
-        "response",
-        "browser_navigate",
-        "browser_snapshot",
-        "browser_screenshot",
-        "browser_click",
-        "browser_fill",
-        "email_list_inboxes",
-        "email_list_messages",
-        "email_search_messages",
-        "email_get_message",
-        "send_email",
-        "list_memories",
-        "get_memory",
-        "add_memory",
-        "web_search",
-        "get_page",
-        "checkpoint_state",
-        ...session.config?.taskDriven ? ["create_task", "update_task", "list_tasks"] : []
-      ],
+      openAIReasoningEffort,
+      browserSession,
+      activeTools: buildPentestActiveTools(role, session),
       responseSchema: PentestResponseSchema,
       resolveResult: async (streamResult) => {
         let objectiveResults;
@@ -125,8 +109,14 @@ var SECTION_DOCUMENT_VULNERABILITY_RULES = `CRITICAL — document_vulnerability
 - POC must exit 0 on success (vulnerability confirmed), non-zero on failure
 - If the POC fails or the validation judge rejects the finding, revise your pocContent and call again
 - NEVER use document_vulnerability for: positive observations (e.g. "authentication is working correctly"), testing limitations (e.g. "rate limiting prevented testing"), informational notes, infrastructure observations, or anything that is not a real exploitable vulnerability
+- Before calling document_vulnerability, be able to state: the exploit path, the material security impact, the non-public asset or abuse path affected, and why common false-positive traps do not apply
 - If you were unable to confirm or exploit a vulnerability, do NOT document it — instead describe it in your final response summary
 - It is completely acceptable to finish a test with zero documented vulnerabilities if none were found`;
+var SECTION_MATERIALITY_GUIDANCE = `Materiality & False-Positive Discipline:
+- Preserve the user's stated objective as the organizing constraint. Do not turn a narrow false-positive check into a broad vulnerability hunt unless the user explicitly asked for a full sweep.
+- Public or intentionally unauthenticated endpoints are not vulnerable merely because browsers can request them, CORS is absent/irrelevant, or rate limiting is not visible. Document only if you prove access to non-public data, credentialed cross-origin abuse, state change, denial of service, account takeover, or another material exploit chain.
+- Missing HTTPS, missing security headers, verbose generic errors, public identifiers, demo/training-app behavior, and best-practice gaps are not findings by themselves. Summarize them as non-findings unless you prove material exploitability.
+- A completed objective may have zero findings. If testing shows materiality is not met, finish with a clear no-finding explanation instead of probing unrelated endpoints.`;
 var SECTION_RATE_LIMITING = `Rate Limiting:
 - If you encounter rate limiting (HTTP 429), use exponential backoff before retrying
 - Use execute_command with "sleep N" where N increases: 5 seconds, then 30 seconds, then 120 seconds
@@ -228,6 +218,8 @@ ${SECTION_RATE_LIMITING_TESTING}
 ${SECTION_DOCUMENT_VULNERABILITY_RULES}
+${SECTION_MATERIALITY_GUIDANCE}
 ${SECTION_POC_PORTABILITY}
 ${SECTION_BROWSER_INTERACTION}
@@ -259,6 +251,7 @@ Guidelines:
 - Always call list_memories first to check for relevant knowledge before planning your approach
 - State your objectives and plan before executing any attack tools
 - When you confirm a vulnerability that provides internal access, think through what lies behind it and actively explore through the vulnerability to maximize impact
+- Preserve the supplied objectives. Do not broaden a narrow objective into unrelated endpoint testing unless a confirmed finding requires a specific sibling endpoint to demonstrate impact.
 - Be methodical and thorough — test one payload at a time and observe the response
 - Use execute_command for crafting/running exploit scripts and http_request for targeted web tests
 - Call document_vulnerability with your POC script inline (in pocContent) to exploit, validate, and document in one step — you can document multiple vulnerabilities in a single run
@@ -269,6 +262,18 @@ Guidelines:
 ${SECTION_AUTHENTICATION}
+${SECTION_DOCUMENT_VULNERABILITY_RULES}
+${SECTION_MATERIALITY_GUIDANCE}
+${SECTION_RATE_LIMITING}
+${SECTION_RATE_LIMITING_TESTING}
+${SECTION_CREDENTIAL_DISCOVERY}
+${SECTION_SECURITY_HEADERS_CORS}
 ${SECTION_STATE_CHECKPOINTING}`;
 var PENTEST_SYSTEM_PROMPT_TASK_DRIVEN = `You are an expert penetration tester performing a targeted security assessment.
@@ -291,6 +296,8 @@ ${SECTION_TASK_COVERAGE_RULES}
 ${SECTION_DOCUMENT_VULNERABILITY_RULES}
+${SECTION_MATERIALITY_GUIDANCE}
 ${SECTION_POC_PORTABILITY}
 ${SECTION_RATE_LIMITING}
@@ -327,6 +334,8 @@ ${SECTION_TASK_COVERAGE_RULES}
 ${SECTION_DOCUMENT_VULNERABILITY_RULES}
+${SECTION_MATERIALITY_GUIDANCE}
 ${SECTION_POC_PORTABILITY}
 ${SECTION_RATE_LIMITING}
@@ -342,7 +351,10 @@ ${SECTION_CREDENTIAL_DISCOVERY}
 ${SECTION_SECURITY_HEADERS_CORS}
 ${SECTION_STATE_CHECKPOINTING}`;
-function buildPentestSystemPrompt(session) {
+function buildPentestSystemPrompt(session, role = "orchestrator") {
+  if (role === "orchestrator") {
+    return PENTEST_SYSTEM_PROMPT_ORCHESTRATOR;
+  }
   const taskDriven = session.config?.taskDriven ?? false;
   const exfilMode = session.config?.exfilMode ?? false;
   if (taskDriven) {
@@ -350,10 +362,49 @@ function buildPentestSystemPrompt(session) {
   }
   return exfilMode ? PENTEST_SYSTEM_PROMPT_EXFIL : PENTEST_SYSTEM_PROMPT_BASE;
 }
-function buildPentestPrompt(target, objectives, session, findingsRegistry, context, envVarNames, subagentId) {
+var SECTION_ORCHESTRATOR_DELEGATION = `Sub-Agent Delegation Rules:
+- You DO NOT call document_vulnerability directly. Findings are documented by the workers you spawn.
+- You DO NOT execute deep exploitation attempts yourself. Your tools (execute_command, http_request, browser_*) are for INITIAL RECON only — fingerprinting, sanity-checking the target, observing baseline behavior.
+- Each spawn_pentest_agent call MUST cover exactly ONE objective from the assignment, plus optional supporting context. Do not batch multiple objectives into one spawn — the UI surfaces each spawn as its own timeline, and per-objective spawns give each worker a clean, focused context window.
+- Target URL propagation: the \`target\` you received already encodes the specific domain + endpoint path the caller wants tested (e.g. https://example.com/api/users/{id}). Forward that EXACT URL into every spawn_pentest_agent call's \`target\` field. Do NOT strip the path back to a bare domain, do NOT swap the path for some other endpoint, and do NOT invent new endpoints — workers do not perform endpoint discovery, they deeply test the path they are given. The only time a worker's \`target\` should differ from yours is when recon surfaced a closely-related sibling endpoint on the same host that belongs to a follow-up objective; even then, send the full URL with the new path, not a bare host.
+- After all per-objective workers complete, spawn ONE final "chain & explore" worker. Pass it: a brief summary of what earlier workers found (or didn't find), plus any anomalous behaviors observed during recon. Its job is to chain confirmed findings into higher-impact attacks AND probe for additional vulnerabilities that fall outside the original objective list. Send it the same endpoint URL unless an earlier worker confirmed a vulnerability on a sibling endpoint that the chain depends on — in which case pass that sibling's full URL.
+- Do not call spawn_pentest_agent before stating your plan in plain text. The plan must be visible to the user as an assistant message, not just inferred from tool calls.
+- Cloned browser session — every worker you spawn gets its OWN isolated Chromium, seeded at spawn time with a snapshot of your current cookies and per-origin localStorage. Practical implications:
+  - If authentication is required, log in ONCE in YOUR browser during recon. Every worker you spawn after that will start already authenticated — do NOT instruct workers to re-authenticate. Workers that authenticate themselves only authenticate their own cloned browser, so re-auth wastes turns.
+  - Worker browser actions are LOCAL to the worker's clone. A worker's navigations, form fills, \`browser_evaluate\` mutations, and \`localStorage\`/\`sessionStorage\` writes are NOT visible to you or to sibling workers. So workers can fire payloads, trigger alerts, or clobber DOM state without breaking each other or you.
+  - Conversely, if you want state to be visible to the next worker, set it up in YOUR browser before spawning. Each worker sees the snapshot of your browser AT THE MOMENT YOU CALL spawn_pentest_agent — later mutations in your browser propagate to subsequent spawns but not to in-flight workers.
+  - Worker sessions are torn down when the worker finishes, so any cookies the worker acquired during testing (post-auth flows, OAuth callbacks, etc.) are discarded. If a worker discovers a useful login flow, summarize the credentials in your final response or repeat the flow in YOUR browser before the next spawn.`;
+var PENTEST_SYSTEM_PROMPT_ORCHESTRATOR = `You are the LEAD penetration tester coordinating a focused security assessment.
+You are given a specific target and a set of objectives. You do NOT directly attempt exploitation. Instead, you plan, recon, then dispatch focused worker sub-agents — one per objective — each of which deeply tests its assigned objective. After fan-out, you dispatch a final worker to chain findings and explore beyond the original objectives.
+${SECTION_SOURCE_CODE_PROHIBITION}
+Your methodology:
+1. ORIENT — Call list_memories to review any existing knowledge from previous engagements (target-specific notes, successful techniques, false positive patterns, technology context). Use what you find to shape your plan.
+2. PLAN — State the objectives you have been given and outline your high-level orchestration plan in plain text BEFORE any tool calls. For each objective, briefly state what attack class the worker should focus on (e.g. "Objective 1 → SQL injection, focus on /api/users id parameter"). Output this plan as a text message — not as a tool call.
+3. RECON — Perform LIGHT initial reconnaissance to confirm the target is reachable and understand baseline behavior. Use http_request for a handful of probes, browser_navigate + browser_snapshot to see the surface, and execute_command sparingly. Do NOT begin exploitation here — that is the workers' job. Note any anomalies (unusual error responses, exposed headers, framework fingerprints, surprising endpoint behavior) for the final exploratory worker.
+4. FAN OUT — For EACH objective, call spawn_pentest_agent EXACTLY ONCE. Each spawn dispatches a focused worker that will perform the full PLAN → VERIFY → PREPARE → TEST → EXPLOIT → DOCUMENT loop on its objective. Workers write findings to the shared findings registry — you do NOT need to forward findings between them.
+5. CHAIN & EXPLORE — After all per-objective workers complete, call spawn_pentest_agent ONE FINAL TIME with a synthesized objective that:
+   a. Summarizes what earlier workers confirmed or ruled out (so the exploratory worker doesn't re-do their work).
+   b. Calls out any anomalies you noticed during recon that nobody investigated.
+   c. Directs the worker to chain confirmed findings into higher-impact attack chains AND probe for additional vulnerabilities outside the original objective list (e.g. business logic flaws, race conditions, secondary injection points).
+6. LEARN — Use add_memory to persist reusable learnings from this engagement (target behaviors, effective techniques, false positive patterns, technology fingerprints).
+7. FINISH — Call the response tool with your final summary. Compile objectiveResults from what each worker reported: mark each objective as completed (vulnerability confirmed and documented by a worker, OR conclusively ruled out by a worker), or incomplete (worker failed or could not finish). Include any new objectives discovered by the exploratory worker.
+${SECTION_ORCHESTRATOR_DELEGATION}
+${SECTION_AUTHENTICATION}
+${SECTION_MATERIALITY_GUIDANCE}
+${SECTION_BROWSER_INTERACTION}
+${SECTION_STATE_CHECKPOINTING}`;
+function buildPentestPrompt(target, objectives, session, findingsRegistry, context, envVarNames, subagentId, role = "orchestrator") {
   const sessionRootPath = session.rootPath;
   const exfilMode = session.config?.exfilMode ?? false;
-  const taskDriven = session.config?.taskDriven ?? false;
+  const taskDriven = role === "orchestrator" ? false : session.config?.taskDriven ?? false;
   const outcomeGuidance = session.config?.outcomeGuidance;
   const objectiveList = objectives.map((o, i) => `${i + 1}. ${o}`).join(`
 `);
@@ -450,7 +501,7 @@ Your target is a flag with the format FLAG{...}. Locate and extract it.
 1. Call list_memories to review prior knowledge
 2. Call create_task for each objective × technique combination
 3. Work through tasks: update_task(in_progress) → test → update_task(completed|failed)
-4. Call document_vulnerability for confirmed vulnerabilities
+4. Call document_vulnerability for confirmed vulnerabilities only when material exploitability is proven
 5. When a vulnerability provides internal access, create_task for pivoting and extraction
 6. Call list_tasks to verify all tasks are completed or failed
 7. Use add_memory to save reusable learnings
@@ -465,7 +516,20 @@ Your target is a flag with the format FLAG{...}. Locate and extract it.
 8. Call the response tool only when all tasks are terminal
 Do NOT discover or enumerate other endpoints or services. Focus exclusively on the target and objectives above.`;
-  const instructions = taskDriven ? taskDrivenInstructions : exfilMode ? `## Goal
+  const orchestratorInstructions = `## Instructions
+1. Call list_memories to review any prior knowledge relevant to this target or engagement.
+2. State the objectives and outline your orchestration plan in plain text BEFORE any tool calls — one bullet per objective, briefly naming the attack class each worker should focus on.
+3. Perform LIGHT initial recon (a handful of http_request probes, browser_navigate + browser_snapshot to see the surface). Do NOT begin exploitation here — that is the workers' job. Note any anomalies you observe for the final exploratory worker.
+4. Call spawn_pentest_agent EXACTLY ONCE PER OBJECTIVE. For every spawn:
+   - Set \`target\` to the FULL URL from the assignment above (domain + endpoint path) — pass it through verbatim. Do not strip the path or rewrite the host. Workers do not perform endpoint discovery; they deeply test the path you hand them.
+   - Pass the matching objective in the \`objectives\` array (a single-element array).
+   - Use the \`context\` field to forward any recon insights specific to that objective. If your earlier browser actions left state the worker should know about (already logged in as X, certain modal already dismissed), call that out in \`context\` — each worker is seeded with a snapshot of YOUR browser's cookies and localStorage at the moment of the spawn call.
+5. After all per-objective workers complete, call spawn_pentest_agent ONE FINAL TIME with a synthesized "chain & explore" objective: summarize what earlier workers confirmed/ruled out, call out unaddressed anomalies, and direct the worker to chain confirmed findings AND probe for additional vulnerabilities outside the original objective list. Send it the same endpoint URL as your assignment unless an earlier worker's confirmed finding on a sibling endpoint is what makes the chain possible.
+6. Use add_memory to save reusable learnings from this engagement.
+7. Call the response tool with your final summary. Compile objectiveResults from worker reports.
+Do NOT call document_vulnerability — that is the workers' job. Do NOT discover or enumerate other endpoints or services beyond the assigned target.`;
+  const instructions = role === "orchestrator" ? orchestratorInstructions : taskDriven ? taskDrivenInstructions : exfilMode ? `## Goal
 Your target is a flag with the format FLAG{...}. Locate and extract it.
 ## Instructions
@@ -474,10 +538,11 @@ Your target is a flag with the format FLAG{...}. Locate and extract it.
 3. Verify the target endpoint is reachable and understand its baseline behavior
 4. For each objective, research and craft targeted payloads appropriate to the technology
 5. Test systematically — vary payloads, encoding, and bypass techniques
-6. When a vulnerability is confirmed, call document_vulnerability with your POC script inline (pocContent) to exploit, validate, and document the finding
+6. When a vulnerability is confirmed and material exploitability is proven, call document_vulnerability with your POC script inline (pocContent) to exploit, validate, and document the finding
 7. When a vulnerability provides internal access, pivot through it to discover reachable services and extract sensitive data
 8. Use add_memory to save reusable learnings from this engagement (effective techniques, target behaviors, technology details)
-9. After testing ALL objectives and extracting the flag, call the response tool with your final summary. Do NOT call response until you have completed all testing and extraction.` : `## Instructions
+9. Preserve the supplied objectives. Do not broaden a scoped materiality check into unrelated endpoint testing unless a confirmed finding requires it to demonstrate impact.
+10. After testing ALL objectives and extracting the flag, call the response tool with your final summary. Do NOT call response until you have completed all testing and extraction.` : `## Instructions
 1. Call list_memories to review any prior knowledge relevant to this target or engagement
 2. State the objectives and outline your testing plan — describe which techniques and payloads you will use for each objective before executing any tools
 3. Verify the target endpoint is reachable and understand its baseline behavior
@@ -513,6 +578,42 @@ ${objectiveList}
 ${outcomeSection}
 ${instructions}`;
 }
+var WORKER_RECON_TOOLS = [
+  "execute_command",
+  "http_request",
+  "browser_navigate",
+  "browser_snapshot",
+  "browser_screenshot",
+  "browser_click",
+  "browser_fill"
+];
+var SHARED_PENTEST_TOOLS = [
+  "response",
+  "email_list_inboxes",
+  "email_list_messages",
+  "email_search_messages",
+  "email_get_message",
+  "send_email",
+  "list_memories",
+  "get_memory",
+  "add_memory",
+  "web_search",
+  "get_page",
+  "checkpoint_state"
+];
+var MEMORY_TOOL_NAMES = ["add_memory", "list_memories", "get_memory"];
+function buildPentestActiveTools(role, session) {
+  const tools = role === "orchestrator" ? [...WORKER_RECON_TOOLS, ...SHARED_PENTEST_TOOLS, "spawn_pentest_agent"] : [
+    ...WORKER_RECON_TOOLS,
+    "document_vulnerability",
+    ...SHARED_PENTEST_TOOLS,
+    ...session.config?.taskDriven ? ["create_task", "update_task", "list_tasks"] : []
+  ];
+  if (!isMemoryEnabled()) {
+    return tools.filter((t) => !MEMORY_TOOL_NAMES.includes(t));
+  }
+  return tools;
+}
 function loadFindings(findingsPath) {
   if (!existsSync(findingsPath)) {
     return [];
@@ -526,5 +627,4 @@ function loadFindings(findingsPath) {
     }
   }).filter((f) => f !== null);
 }
-export { TargetedPentestAgent, buildPentestSystemPrompt };
+export { TargetedPentestAgent, buildPentestSystemPrompt, buildPentestPrompt, buildPentestActiveTools };

package/build/{cli-jg7r7y5n.js → cli-4xb21y6g.js} RENAMED Viewed

@@ -1,9 +1,13 @@
 import {
   OffensiveSecurityAgent
-} from "./cli-3w2syxpv.js";
+} from "./cli-wfmdch3r.js";
 import {
+  init_dist,
   stepCountIs
-} from "./cli-k0tckznm.js";
+} from "./cli-6p7d2k55.js";
+// src/core/agents/specialized/codeAgent/agent.ts
+init_dist();
 // src/core/agents/specialized/codeAgent/prompts.ts
 var CODE_AGENT_SYSTEM_PROMPT = `You are an expert coding agent with direct filesystem access. You will be given a specific objective — focus exclusively on completing it.
@@ -36,6 +40,17 @@ Run shell commands when needed.
 - Use for any task that benefits from shell access: build tools, git operations, package managers, linters, etc.
 - Useful for running scripts, checking dependencies, inspecting git history, or any CLI tool.
+## Whitebox security tools (when investigating vulnerabilities)
+- **profile_codebase** — summarize languages, manifests, scanners, and repo shape; full JSON is written as a session artifact.
+- **query_whitebox_catalog** — pull focused methodology slices (sinks, scanners, review passes) instead of loading a whole playbook into context.
+- **run_code_query** — batched rg / ast-grep / comby searches with bounded output and artifact logs.
+- **run_whitebox_scan** — run installed scanners when available; triage results before treating them as confirmed issues.
+- **create_whitebox_candidate / update_whitebox_candidate / list_whitebox_candidates** — track hypotheses with explicit state and evidence.
+- **start_whitebox_job / poll_whitebox_job / stop_whitebox_job** — bounded long-running jobs (builds, fuzzers) with logs under the session.
+- **read_whitebox_artifact** — read \`logs/whitebox/\` or \`scratchpad/whitebox/\` artifact paths returned by other tools (or legacy job logs by id).
+Prefer catalog + code_query for sink-first work; use candidates to separate unverified ideas from \`document_vulnerability\`. Do not modify the target repo unless the operator asked you to — keep harnesses and scratch output in the session scratchpad.
 # Working Approach
 1. **Orient first** — list files and read key entry points to understand the structure before diving in.
 2. **Search, then read** — use grep to locate what you need, then read the relevant files.
@@ -64,6 +79,7 @@ class CodeAgent extends OffensiveSecurityAgent {
       attackSurfaceRegistry,
       excludeTools,
       enableThinking,
+      openAIReasoningEffort,
       projectThreatModel
     } = opts;
     let activeTools = [
@@ -71,6 +87,17 @@ class CodeAgent extends OffensiveSecurityAgent {
       "list_files",
       "grep",
       "execute_command",
+      "profile_codebase",
+      "query_whitebox_catalog",
+      "run_code_query",
+      "run_whitebox_scan",
+      "create_whitebox_candidate",
+      "update_whitebox_candidate",
+      "list_whitebox_candidates",
+      "start_whitebox_job",
+      "poll_whitebox_job",
+      "stop_whitebox_job",
+      "read_whitebox_artifact",
       "http_request",
       "document_app",
       "document_endpoint",
@@ -97,6 +124,7 @@ class CodeAgent extends OffensiveSecurityAgent {
       subagentId,
       attackSurfaceRegistry,
       enableThinking,
+      openAIReasoningEffort,
       projectThreatModel,
       stopWhen: stopWhen ?? stepCountIs(1e4),
       activeTools,