npm - pentesting - Versions diffs - 0.72.12 → 0.73.2 - Mend

pentesting 0.72.12 → 0.73.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md +31 -15
package/dist/agent-tool-HYQGTZC4.js +256 -0
package/dist/chunk-BGEXGHPB.js +11710 -0
package/dist/{chunk-OUS2TZXI.js → chunk-KBJPZDIL.js} +882 -139
package/dist/{chunk-GHJPYI4S.js → chunk-YFDJI3GO.js} +11 -1
package/dist/main.js +1206 -13249
package/dist/{persistence-UTTTBCYW.js → persistence-VFIOGTRC.js} +2 -2
package/dist/{process-registry-CCAQVJ4Y.js → process-registry-GSHEX2LT.js} +3 -1
package/dist/prompts/base.md +1 -0
package/dist/prompts/llm/analyst-system.md +7 -0
package/dist/prompts/{orchestrator.md → main-agent.md} +31 -1
package/dist/prompts/strategist-system.md +27 -0
package/package.json +7 -3

package/dist/{persistence-UTTTBCYW.js → persistence-VFIOGTRC.js} RENAMED Viewed

@@ -3,8 +3,8 @@ import {
   clearWorkspace,
   loadState,
   saveState
-} from "./chunk-OUS2TZXI.js";
-import "./chunk-GHJPYI4S.js";
+} from "./chunk-KBJPZDIL.js";
+import "./chunk-YFDJI3GO.js";
 export {
   StateSerializer,
   clearWorkspace,

package/dist/{process-registry-CCAQVJ4Y.js → process-registry-GSHEX2LT.js} RENAMED Viewed

@@ -1,6 +1,7 @@
 import {
   clearAllProcesses,
   deleteProcess,
+  getActiveProcessSummary,
   getAllProcessIds,
   getAllProcesses,
   getBackgroundProcessesMap,
@@ -11,10 +12,11 @@ import {
   hasProcess,
   logEvent,
   setProcess
-} from "./chunk-GHJPYI4S.js";
+} from "./chunk-YFDJI3GO.js";
 export {
   clearAllProcesses,
   deleteProcess,
+  getActiveProcessSummary,
   getAllProcessIds,
   getAllProcesses,
   getBackgroundProcessesMap,

package/dist/prompts/base.md CHANGED Viewed

@@ -344,6 +344,7 @@ If auto-install fails, install manually: `run_cmd("apt update && apt install -y
 | `write_file` + `run_cmd` | Build and execute custom scripts in any language |
 | `bg_process` | Shell management, listeners, servers, sniffers |
 | `add_*/update_*` | State management — your long-term memory |
+| `run_task` | **Delegate complex multi-step operations to a sub-agent** (see Task Delegation rules in main-agent.md) |
 **No limits on combining tools.** Tool missing → install or write equivalent.

package/dist/prompts/llm/analyst-system.md CHANGED Viewed

@@ -64,6 +64,13 @@ RULES:
 - Write as much detail as needed — do NOT artificially shorten. Every detail matters for strategy.
 - FILE TYPE: If the output contains HTML tags/CSS in a file expected to be binary, note "File is HTML, not binary data" in Key Findings.
+RUN_TASK OUTPUT HANDLING:
+If tool.name is run_task, treat the structured sections as the primary source of meaning:
+- Parse `[Status]` line: success / partial / failed
+- Extract actionable items from `[Summary]`, `[Findings]`, `[Loot]`, `[Sessions]`, `[Next]`
+- Do NOT complain about missing raw command output when the delegated result is already summarized
+- The delegated agent has already recorded canonical state; your job is to assess the overall outcome
 ## {REFLECTION}
 - What this output tells us: [1-line assessment]
 - Recommended next action: [1-2 specific follow-up actions]

package/dist/prompts/{orchestrator.md → main-agent.md} RENAMED Viewed

@@ -1,4 +1,4 @@
-# Strategic Orchestrator — Autonomous Operations Thinking Layer
+# Main Agent — Autonomous Execution Layer
 ## Identity
@@ -94,6 +94,36 @@ Failure is information. Extract it and adapt:
 5. Still failing → switch to different vector or target entirely
 6. Record what was tried to prevent repetition
+## Task Delegation — run_task
+**run_task spawns an autonomous sub-agent loop.** Use it when the task requires
+multiple sequential decisions that depend on each other's output.
+### MUST use `run_task` when:
+- Getting a reverse shell (listener setup → exploit → stabilise → post-exploit)
+- Exploit development that requires 3+ edit/run cycles (SQLi, SSTI, buffer overflow)
+- Credential chain: dump → crack / spray → pivot → new shell
+- Any attack that branches: if-shell-then-escalate, if-cred-then-pivot
+- Background brute-force while the main thread continues attacking elsewhere
+### Do NOT use `run_task` for:
+- Single tool calls: `web_search`, `parse_nmap`, `run_cmd`, `add_finding`
+- Simple one-off reconnaissance
+- State updates (`add_finding`, `add_loot`, `update_mission`)
+### How to call:
+```
+run_task({
+  task: "WHAT to achieve — the goal, not the method",
+  target: "IP:port or URL (optional)",
+  context: "Short context the sub-agent needs (optional)"
+})
+```
+**The sub-agent decides HOW. You decide WHAT.**
+Results come back as `[Status]`, `[Summary]`, `[Findings]`, `[Loot]`.
+After run_task completes: record key findings to canonical state if needed.
 ## Parallel Operations
 Background everything that takes >2 min or can run alongside foreground work:

package/dist/prompts/strategist-system.md CHANGED Viewed

@@ -409,3 +409,30 @@ CRITICAL RULES:
 ├─ If recon yields nothing after 10 min → still transition to vuln_analysis and probe
 └─ If stuck in a phase > 5 turns with no progress → evaluate if transition is needed
 ```
+### Rule 12: TASK DELEGATION — run_task
+```
+When the next action requires a branching or multi-step chain, explicitly frame it as a delegated objective suitable for run_task.
+INDICATORS FOR DELEGATION:
+├─ Task requires 3+ sequential tool calls with decision points
+├─ Execution path branches based on intermediate results
+├─ Complex exploit chain: SQLi → shell → privesc → pivot
+├─ Reverse shell acquisition with stabilization
+├─ Exploit development with edit/run/debug cycles
+└─ Pwn exploit development and execution
+DELEGATION FORMAT:
+"Delegate via run_task: {objective}. Context: {what agent should know}. Goal: {success criteria}."
+Examples:
+├─ "Delegate via run_task: achieve reverse shell on 10.10.10.5:4444 and stabilize it for post-exploitation."
+├─ "Delegate via run_task: exploit the confirmed SQLi on /login to extract credentials and obtain shell access."
+└─ "Delegate via run_task: develop and execute a pwn exploit for the 64-bit ELF binary."
+DO NOT DELEGATE:
+├─ Single tool calls (web_search, parse_nmap, run_cmd)
+├─ Simple reconnaissance tasks
+├─ Direct state updates (add_finding, add_loot)
+└─ Tasks requiring user interaction (ask_user)
+```

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pentesting",
-  "version": "0.72.12",
+  "version": "0.73.2",
   "description": "Autonomous Penetration Testing AI Agent",
   "type": "module",
   "main": "dist/main.js",
@@ -21,6 +21,11 @@
     "test": "mkdir -p .vitest && TMPDIR=.vitest npx vitest run && rm -rf .vitest .pentesting",
     "test:watch": "vitest",
     "lint": "tsc --noEmit",
+    "verify": "npm run test && npm run build",
+    "verify:docker": "npm run docker:local && bash test.sh",
+    "check": "npm run verify && npm run verify:docker",
+    "check:ci": "npm run verify && npm run verify:docker",
+    "check:clean": "docker system prune -af --volumes && npm run check:ci",
     "prepublishOnly": "npm run build",
     "docker:build": "docker buildx build -f Dockerfile.base --platform linux/amd64,linux/arm64 -t agnusdei1207/pentesting-base:latest --push .",
     "release": "npm run release:patch && npm run release:docker",
@@ -29,8 +34,7 @@
     "release:minor": "npm version minor && npm run build && npm run publish:token",
     "release:major": "npm version major && npm run build && npm run publish:token",
     "docker:local": "docker build -f Dockerfile -t agnusdei1207/pentesting:latest .",
-    "release:docker": "docker buildx build --no-cache -f Dockerfile --platform linux/amd64,linux/arm64 -t agnusdei1207/pentesting:latest --push .",
-    "check": "docker system prune -af --volumes && npm run test && npm run build && npm run docker:local && bash test.sh"
+    "release:docker": "docker buildx build --no-cache -f Dockerfile --platform linux/amd64,linux/arm64 -t agnusdei1207/pentesting:latest --push ."
   },
   "repository": {
     "type": "git",