npm - ghost-dragon - Versions diffs - 4.2.1 - Mend

ghost-dragon 4.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (226) hide show

package/.github/workflows/ci.yml +23 -0
package/CHANGELOG.md +96 -0
package/README.md +193 -0
package/bootstrap.ps1 +83 -0
package/bootstrap.sh +71 -0
package/dist/agent/loop.d.ts +68 -0
package/dist/agent/loop.d.ts.map +1 -0
package/dist/agent/loop.js +135 -0
package/dist/agent/mcp.d.ts +33 -0
package/dist/agent/mcp.d.ts.map +1 -0
package/dist/agent/mcp.js +107 -0
package/dist/agent/session.d.ts +16 -0
package/dist/agent/session.d.ts.map +1 -0
package/dist/agent/session.js +55 -0
package/dist/agent/skills.d.ts +36 -0
package/dist/agent/skills.d.ts.map +1 -0
package/dist/agent/skills.js +153 -0
package/dist/agent/stack.d.ts +21 -0
package/dist/agent/stack.d.ts.map +1 -0
package/dist/agent/stack.js +158 -0
package/dist/agent/task.d.ts +21 -0
package/dist/agent/task.d.ts.map +1 -0
package/dist/agent/task.js +45 -0
package/dist/agent/tools.d.ts +44 -0
package/dist/agent/tools.d.ts.map +1 -0
package/dist/agent/tools.js +262 -0
package/dist/agent/trace.d.ts +34 -0
package/dist/agent/trace.d.ts.map +1 -0
package/dist/agent/trace.js +72 -0
package/dist/agent.d.ts +46 -0
package/dist/agent.d.ts.map +1 -0
package/dist/agent.js +103 -0
package/dist/auth.d.ts +74 -0
package/dist/auth.d.ts.map +1 -0
package/dist/auth.js +116 -0
package/dist/brain/anthropic.d.ts +19 -0
package/dist/brain/anthropic.d.ts.map +1 -0
package/dist/brain/anthropic.js +74 -0
package/dist/brain/claude-cli.d.ts +20 -0
package/dist/brain/claude-cli.d.ts.map +1 -0
package/dist/brain/claude-cli.js +79 -0
package/dist/brain/ghost-ember.d.ts +28 -0
package/dist/brain/ghost-ember.d.ts.map +1 -0
package/dist/brain/ghost-ember.js +97 -0
package/dist/brain/index.d.ts +22 -0
package/dist/brain/index.d.ts.map +1 -0
package/dist/brain/index.js +95 -0
package/dist/brain/openai-compat.d.ts +21 -0
package/dist/brain/openai-compat.d.ts.map +1 -0
package/dist/brain/openai-compat.js +119 -0
package/dist/brain/router/classify.d.ts +23 -0
package/dist/brain/router/classify.d.ts.map +1 -0
package/dist/brain/router/classify.js +160 -0
package/dist/brain/router/execute.d.ts +23 -0
package/dist/brain/router/execute.d.ts.map +1 -0
package/dist/brain/router/execute.js +84 -0
package/dist/brain/router/index.d.ts +26 -0
package/dist/brain/router/index.d.ts.map +1 -0
package/dist/brain/router/index.js +118 -0
package/dist/brain/router/routing-memory.d.ts +27 -0
package/dist/brain/router/routing-memory.d.ts.map +1 -0
package/dist/brain/router/routing-memory.js +77 -0
package/dist/brain/router/select.d.ts +32 -0
package/dist/brain/router/select.d.ts.map +1 -0
package/dist/brain/router/select.js +146 -0
package/dist/brain/router/two-hop.d.ts +23 -0
package/dist/brain/router/two-hop.d.ts.map +1 -0
package/dist/brain/router/two-hop.js +39 -0
package/dist/brain/router/verify.d.ts +37 -0
package/dist/brain/router/verify.d.ts.map +1 -0
package/dist/brain/router/verify.js +111 -0
package/dist/brain/types.d.ts +55 -0
package/dist/brain/types.d.ts.map +1 -0
package/dist/brain/types.js +16 -0
package/dist/brain/worker.d.ts +27 -0
package/dist/brain/worker.d.ts.map +1 -0
package/dist/brain/worker.js +71 -0
package/dist/commands/ai.d.ts +24 -0
package/dist/commands/ai.d.ts.map +1 -0
package/dist/commands/ai.js +137 -0
package/dist/commands/alerts.d.ts +19 -0
package/dist/commands/alerts.d.ts.map +1 -0
package/dist/commands/alerts.js +114 -0
package/dist/commands/billing.d.ts +13 -0
package/dist/commands/billing.d.ts.map +1 -0
package/dist/commands/billing.js +55 -0
package/dist/commands/chat.d.ts +22 -0
package/dist/commands/chat.d.ts.map +1 -0
package/dist/commands/chat.js +422 -0
package/dist/commands/config.d.ts +18 -0
package/dist/commands/config.d.ts.map +1 -0
package/dist/commands/config.js +136 -0
package/dist/commands/doctor.d.ts +11 -0
package/dist/commands/doctor.d.ts.map +1 -0
package/dist/commands/doctor.js +73 -0
package/dist/commands/global.d.ts +11 -0
package/dist/commands/global.d.ts.map +1 -0
package/dist/commands/global.js +253 -0
package/dist/commands/keep.d.ts +12 -0
package/dist/commands/keep.d.ts.map +1 -0
package/dist/commands/keep.js +58 -0
package/dist/commands/lifecycle.d.ts +17 -0
package/dist/commands/lifecycle.d.ts.map +1 -0
package/dist/commands/lifecycle.js +267 -0
package/dist/commands/login.d.ts +16 -0
package/dist/commands/login.d.ts.map +1 -0
package/dist/commands/login.js +234 -0
package/dist/commands/maintenance.d.ts +12 -0
package/dist/commands/maintenance.d.ts.map +1 -0
package/dist/commands/maintenance.js +76 -0
package/dist/commands/mcp.d.ts +16 -0
package/dist/commands/mcp.d.ts.map +1 -0
package/dist/commands/mcp.js +56 -0
package/dist/commands/memory.d.ts +13 -0
package/dist/commands/memory.d.ts.map +1 -0
package/dist/commands/memory.js +218 -0
package/dist/commands/osint.d.ts +14 -0
package/dist/commands/osint.d.ts.map +1 -0
package/dist/commands/osint.js +161 -0
package/dist/commands/pentest.d.ts +13 -0
package/dist/commands/pentest.d.ts.map +1 -0
package/dist/commands/pentest.js +131 -0
package/dist/commands/scale.d.ts +14 -0
package/dist/commands/scale.d.ts.map +1 -0
package/dist/commands/scale.js +191 -0
package/dist/commands/serve.d.ts +16 -0
package/dist/commands/serve.d.ts.map +1 -0
package/dist/commands/serve.js +167 -0
package/dist/commands/tui.d.ts +17 -0
package/dist/commands/tui.d.ts.map +1 -0
package/dist/commands/tui.js +138 -0
package/dist/commands/wyrm.d.ts +20 -0
package/dist/commands/wyrm.d.ts.map +1 -0
package/dist/commands/wyrm.js +274 -0
package/dist/config.d.ts +67 -0
package/dist/config.d.ts.map +1 -0
package/dist/config.js +54 -0
package/dist/index.d.ts +16 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +85 -0
package/dist/manifest.d.ts +31 -0
package/dist/manifest.d.ts.map +1 -0
package/dist/manifest.js +83 -0
package/dist/ui.d.ts +57 -0
package/dist/ui.d.ts.map +1 -0
package/dist/ui.js +174 -0
package/dist/utils.d.ts +33 -0
package/dist/utils.d.ts.map +1 -0
package/dist/utils.js +155 -0
package/dist/wyrm/mcp.d.ts +37 -0
package/dist/wyrm/mcp.d.ts.map +1 -0
package/dist/wyrm/mcp.js +137 -0
package/docs/SYSTEM-PREMORTEM.md +397 -0
package/dragon-manifest.toml +241 -0
package/dragon.py +177 -0
package/install/launchd/lk.ghosts.dragonkeep.plist +57 -0
package/install/systemd/dragonkeep.service +40 -0
package/media/dragon-silver-lockup.svg +931 -0
package/media/dragon-silver-mark.svg +931 -0
package/media/dragon-silver.png +0 -0
package/package.json +45 -0
package/specs/001-godmode/constitution.md +54 -0
package/specs/001-godmode/plan.md +30 -0
package/specs/001-godmode/spec.md +64 -0
package/specs/001-godmode/tasks.md +35 -0
package/specs/002-premortem-positioning/premortem.md +211 -0
package/src/agent/loop.ts +165 -0
package/src/agent/mcp.ts +92 -0
package/src/agent/session.ts +48 -0
package/src/agent/skills.ts +138 -0
package/src/agent/stack.ts +154 -0
package/src/agent/task.ts +55 -0
package/src/agent/tools.ts +255 -0
package/src/agent/trace.ts +76 -0
package/src/agent.ts +114 -0
package/src/auth.ts +133 -0
package/src/brain/anthropic.ts +83 -0
package/src/brain/claude-cli.ts +78 -0
package/src/brain/ghost-ember.ts +94 -0
package/src/brain/index.ts +99 -0
package/src/brain/openai-compat.ts +115 -0
package/src/brain/router/classify.ts +167 -0
package/src/brain/router/execute.ts +80 -0
package/src/brain/router/index.ts +125 -0
package/src/brain/router/routing-memory.ts +71 -0
package/src/brain/router/select.ts +156 -0
package/src/brain/router/two-hop.ts +62 -0
package/src/brain/router/verify.ts +123 -0
package/src/brain/types.ts +61 -0
package/src/brain/worker.ts +72 -0
package/src/commands/ai.ts +144 -0
package/src/commands/alerts.ts +131 -0
package/src/commands/billing.ts +59 -0
package/src/commands/chat.ts +318 -0
package/src/commands/config.ts +137 -0
package/src/commands/doctor.ts +71 -0
package/src/commands/global.ts +256 -0
package/src/commands/keep.ts +67 -0
package/src/commands/lifecycle.ts +273 -0
package/src/commands/login.ts +184 -0
package/src/commands/maintenance.ts +54 -0
package/src/commands/mcp.ts +57 -0
package/src/commands/memory.ts +229 -0
package/src/commands/osint.ts +171 -0
package/src/commands/pentest.ts +140 -0
package/src/commands/scale.ts +185 -0
package/src/commands/serve.ts +171 -0
package/src/commands/tui.ts +126 -0
package/src/commands/wyrm.ts +269 -0
package/src/config.ts +93 -0
package/src/index.ts +92 -0
package/src/manifest.ts +104 -0
package/src/ui.ts +188 -0
package/src/utils.ts +153 -0
package/src/wyrm/mcp.ts +130 -0
package/test/auth.test.ts +70 -0
package/test/brain.test.ts +39 -0
package/test/security.test.ts +104 -0
package/test/skills.test.ts +38 -0
package/test/ui.test.ts +46 -0
package/tsconfig.json +19 -0
package/worker/package-lock.json +1527 -0
package/worker/package.json +17 -0
package/worker/src/index.ts +76 -0
package/worker/tsconfig.json +15 -0
package/worker/wrangler.toml +26 -0

package/media/dragon-silver.png ADDED Viewed

Binary file

package/package.json ADDED Viewed

@@ -0,0 +1,45 @@
+{
+  "name": "ghost-dragon",
+  "version": "4.2.1",
+  "description": "Ghost Protocol's operator CLI — a Claude-Code-style coding + ops agent on any brain, with deep Wyrm memory, a 200+ skill library, the dragon stack, and any MCP server.",
+  "type": "module",
+  "bin": {
+    "dragon": "./dist/index.js"
+  },
+  "scripts": {
+    "build": "tsc",
+    "dev": "tsx src/index.ts",
+    "start": "node dist/index.js",
+    "test": "npm run build && vitest run",
+    "typecheck": "tsc --noUnusedLocals --noUnusedParameters --noEmit",
+    "link": "npm run build && npm link"
+  },
+  "keywords": [
+    "cli",
+    "ghost-protocol",
+    "dragonscale",
+    "wyrm",
+    "phantomdragon"
+  ],
+  "author": "Ryan Sebastian <ryan@ghosts.lk>",
+  "license": "Proprietary",
+  "dependencies": {
+    "@anthropic-ai/sdk": "^0.100.1",
+    "@modelcontextprotocol/sdk": "^1.29.0",
+    "boxen": "^8.0.1",
+    "chalk": "^5.6.2",
+    "commander": "^12.1.0",
+    "conf": "^13.1.0",
+    "ora": "^8.2.0"
+  },
+  "devDependencies": {
+    "@types/node": "^22.19.17",
+    "tsx": "^4.21.0",
+    "typescript": "^5.9.3",
+    "vitest": "^4.1.8"
+  },
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/ghosts-lk/dragon-cli.git"
+  }
+}

package/specs/001-godmode/constitution.md ADDED Viewed

@@ -0,0 +1,54 @@
+# Dragon CLI — Constitution (v1.0 "Godmode")
+The non-negotiables for turning `dragon` into Ghost Protocol's flagship operator
+toolkit: **Claude-Code + GitHub-CLI in one**, industrial/military-grade under the
+hood, designer-grade on the surface. Every change is checked against these.
+## North star
+One terminal command — `dragon` — that an operator lives in: an AI coding/ops agent
+that edits code, runs the Ghost stack, remembers across sessions (Wyrm), and works on
+**any** brain (Claude, our free Cloudflare worker, local Ollama, our own DragonSpark, or
+any open OpenAI-compatible endpoint). It should feel like a flagship product on first
+keystroke and never lose your trust.
+## Principles
+1. **Safe by default, powerful on request.** The agent can run shell + edit files, so
+   destructive actions are gated (approve / `--auto` / per-tool allowlists). The default
+   profile cannot rm-rf your machine. Power is opt-in, never the default.
+2. **It always works.** No dead ends. Missing key → fall back to the free worker → local
+   Ollama. Unreachable service → clear message + path forward. A crash prints a friendly,
+   actionable error (stack only under `--debug`), never a raw trace.
+3. **Sovereign-capable.** A fully local mode (Ollama + Wyrm) where nothing leaves the
+   host. Sovereignty is a first-class option, not an afterthought — it's a selling point.
+4. **Quality is enforced, not hoped for.** Pure logic is unit-tested; commands are
+   smoke-tested; security-critical paths have adversarial tests. `tsc --noUnusedLocals`
+   stays green. CI gates merges.
+5. **Hardened like we'd pentest a client.** We DEF-CON our own tool: no command injection,
+   no path traversal, no token leakage (config 0600, secrets redacted from logs/traces),
+   bounded loops + output, SSRF/restricted-port guards on every URL.
+6. **Designer-grade surface.** The "ops console" look (stealth silver) is consistent
+   across every command — framed panels, status dots, aligned rows, chrome wordmark.
+   Output is scannable; errors are beautiful; `--json` everywhere for machines.
+7. **Quality-of-life in abundance.** First-run model picker, `dragon doctor`, shell
+   completions, self-update, resume/transcripts, `@file` attach, sane flags, fast help.
+   Every friction we hit, we file as a QoL task.
+8. **Memory + skills are the moat.** Wyrm long-term memory and the 200+ skill library are
+   wired into the agent (recall, capture, skill-as-tool). This is what no other CLI has.
+9. **Extensible.** Any MCP server, any model endpoint, custom tools. The toolkit grows
+   without forking.
+## Definition of "Godmode" (v1.0 ship gate)
+- Test suite green (unit + smoke + adversarial security tests) in CI.
+- Self-pentest findings: zero CRITICAL/HIGH open; MEDIUM triaged.
+- Global crash handler + `--debug`; every command degrades gracefully.
+- Brain: claude / worker / local / ghost / openai / **custom endpoint**, with picker +
+  graceful fallback. Fully-local sovereign mode verified.
+- Safe-by-default permission profiles for the bash/write tools.
+- `dragon doctor` green; shell completions + self-update shipped.
+- The ops-console theme applied to every command surface.
+## Non-negotiable NOs
+- No telemetry / phone-home. Traces + audit stay local unless the operator ships them.
+- No silent network calls. No secrets in logs, traces, or error output.
+- No unbounded agent loops or tool output. No auto-approve as a default.

package/specs/001-godmode/plan.md ADDED Viewed

@@ -0,0 +1,30 @@
+# Dragon CLI v1.0 "Godmode" — Plan
+Phased so each lands shippable. **Phase 1 is this session's down payment.**
+## Phase 1 — Foundation: hardening + tests + first QoL  ← THIS SESSION
+- Global crash handler + `--debug` (F1, F2).
+- vitest + unit tests for security-critical pure fns (F3) + a smoke test (F4).
+- Apply self-pentest CRITICAL/HIGH fixes (F6, F7): config 0600 on all writes, secret
+  redaction, bash deny-list safe-by-default, bounded output/loops verified.
+- `dragon brains` (list providers + readiness) + custom OpenAI-compatible endpoint (F9 partial, F10).
+- CI workflow (build + tsc-noUnused + test).
+## Phase 2 — Surface + QoL polish
+- Ops-console theme rolled to every command + `--json` everywhere (F11).
+- `dragon doctor` (F12), shell completions + `dragon upgrade` (F13).
+- Permission profiles (safe/plan/auto), session resume + transcript export (F14).
+## Phase 3 — The unique moat
+- Stack-fused agent tools (scale/wyrm/pentest/keep/net as agent tools).
+- Skills-as-tools (skill_search/apply over the 200+ library).
+- MCP hub (`dragon mcp add`), sub-agents (`task` tool).
+## Phase 4 — Sovereign + commercial
+- `--sovereign` (Ollama+Wyrm only), sandboxed bash, live `dragon tui`.
+- Bundle into the DragonBrain appliance; package as the operator console product.
+## Sequencing notes
+- DragonSpark (the `ghost` brain) trains in parallel (its own repo); the `ghost` provider
+  is already wired and lights up when weights land.
+- Each phase ends green on the CI gate. Security findings never carry across a phase.

package/specs/001-godmode/spec.md ADDED Viewed

@@ -0,0 +1,64 @@
+# Dragon CLI v1.0 "Godmode" — Spec
+## Goal
+Elevate `dragon` from "a good start" to a flagship, release-grade operator toolkit:
+hardened, tested, beautiful, and uniquely Ghost Protocol.
+## Functional requirements
+### Reliability & errors
+- F1. Global crash handler: catch uncaughtException + unhandledRejection → friendly,
+  actionable message; full stack only under `--debug`/`DRAGON_DEBUG`.
+- F2. Every command degrades gracefully (no dead ends); network/auth/model failures give
+  a clear next step.
+### Quality / testing
+- F3. Unit tests (vitest) for pure logic: `isBrowsableHttpUrl`, `sanitizeApiBase`, brain
+  factory resolution, SSE/tool-call parsing, trace redaction, worker flatten.
+- F4. Smoke tests: `--help`, `config`, brain fallback, login validation — runnable in CI.
+- F5. Adversarial tests for security-critical paths (URL guard, redaction, edit-file
+  uniqueness). CI gate: build + tsc-noUnused + tests.
+### Security (self-pentest → fix)
+- F6. Address every CRITICAL/HIGH from the self-pentest. Bash/write tools safe-by-default
+  (approval gate; deny-list of obviously-destructive commands; `--auto` opt-in only).
+- F7. Token hygiene: config 0600 on every write path; secrets never in logs/traces/errors;
+  bearer/cookie sent only to the resolved, validated origin.
+- F8. Bounded everything: agent step cap, tool output cap, request timeouts.
+### Brains / model picker (incl. open source)
+- F9. Providers: claude · worker (free CF) · local (Ollama) · ghost (DragonSpark) · openai
+  · **custom** (any OpenAI-compatible base URL → OpenRouter, vLLM, LM Studio, etc.).
+- F10. First-run picker + `dragon brains` (list providers, readiness ● /○, active) +
+  `dragon config brain <p>` + per-session `--brain/--model`. Graceful fallback chain.
+### Surface / QoL
+- F11. Ops-console theme on every command (panels, status dots, `--json` everywhere).
+- F12. `dragon doctor` (brain/auth/wyrm/ollama/disk health, green/amber/red).
+- F13. Shell completions (bash/zsh/fish), `dragon upgrade` (self-update), man/`--help` polish.
+- F14. Agent QoL: session resume + transcript export, `/` slash-command help, `@file`,
+  Ctrl-C aborts a turn, permission profiles (safe/auto/plan).
+## Unique Ghost Protocol toolkit ideas (the "interesting" backlog)
+- **Stack-fused agent** — the same agent that edits code can DRIVE the Ghost stack:
+  `dragon` already wraps scale/wyrm/pentest/keep/net, so expose those as agent tools
+  ("run a pentest on X", "deploy upalis", "what changed in Wyrm?"). Coding agent + ops
+  console in one — nobody else has this.
+- **Skills-as-tools** — wire the 200+ skill library in as a `skill_search`/`skill_apply`
+  tool so the agent pulls the right playbook mid-task. The library becomes live capability.
+- **Wyrm flywheel** — every session already trains DragonSpark (traces). Surface it:
+  `dragon recall`, memory-aware answers, "what did we decide about X?".
+- **Sub-agents** — a `task` tool to fan out parallel sub-agents for big jobs (audits,
+  migrations) like Claude Code's Task.
+- **MCP hub** — connect ANY MCP server, not just Wyrm; `dragon mcp add/list`.
+- **Purple-team mode** — an agent persona that orchestrates PhantomDragon + DragonKeep for
+  authorized security work, with the audit log as evidence.
+- **Sovereign mode** — `dragon --sovereign`: Ollama + Wyrm only, airtight, for sensitive
+  repos / the DragonBrain appliance.
+- **Sandboxed bash** — optional namespace/container isolation for tool execution.
+- **Live TUI** — `dragon tui` operator dashboard (running services, recent agent actions,
+  Wyrm activity), and a `dragon` statusline.
+- **Audit everything** — local + Wyrm audit log of every agent action; exportable evidence.
+## Non-goals (v1.0)
+- A GUI (that's Dragon Console). Telemetry. Windows-first (Linux/macOS lead).

package/specs/001-godmode/tasks.md ADDED Viewed

@@ -0,0 +1,35 @@
+# Dragon CLI v1.0 "Godmode" — Tasks
+## Phase 1 — Foundation ✅ SHIPPED
+- [x] Global crash handler + `--debug`
+- [x] vitest unit/adversarial tests for security-critical pure logic (21 tests)
+- [x] smoke + CI workflow (build + tsc --noUnusedLocals + test)
+- [x] self-pentest fixes: cwd confinement, protected paths, bash always-dangerous,
+      0600 config/traces everywhere, broad redaction, credential header guard
+- [x] `dragon brains` + custom OpenAI-compatible endpoint provider
+- [x] atomic skill `industrial-grade-cli-craft`
+## Phase 2 — Surface + QoL ✅ SHIPPED
+- [x] ops-console theme (header + agent panel + brains/doctor/mcp panels; silver)
+- [x] `dragon doctor` (brain/auth/wyrm/ollama/skills/disk/perms)
+- [x] shell completions (bash/zsh/fish); `dragon upgrade` self-update
+- [x] permission profiles: `--auto` (safe in-cwd) + `--plan` (read-only) + `/plan`
+- [x] session resume (`--resume`) + transcript export (`/save`)
+## Phase 3 — Moat ✅ SHIPPED
+- [x] skills-as-tools (skill_search/skill_read over the 200+ library)
+- [x] stack-fused agent tools (stack_status + stack_run)
+- [x] MCP hub (`dragon mcp add/list/remove` → any MCP server's tools)
+- [x] sub-agents (`task` tool — read-only, no recursion)
+## Phase 4 — Sovereign + commercial ✅ (one item is per-product)
+- [x] `--sovereign` mode (local brain + Wyrm only)
+- [x] sandboxed bash — opt-in `--sandbox` via bwrap (cwd writable, rest read-only,
+      credential dirs masked; verified)
+- [x] live `dragon tui` operator dashboard (system + live trace activity, no heavy deps)
+- [x] bundle the agent into the DragonBrain appliance image (Node + dragon CLI)
+- [x] stack actions as STRUCTURED first-class tools — `stack_pentest` (PhantomDragon
+      findings.json) + `stack_keep` (DragonKeep --format json); OSINT via the MCP hub
+      (`dragon mcp add dragonnet`). Contracts mapped by parallel investigation agents.
+ALL phases shipped. v4.0 "Godmode" + v4.1 structured stack actions. Nothing open.

package/specs/002-premortem-positioning/premortem.md ADDED Viewed

@@ -0,0 +1,211 @@
+# dragon-cli — Premortem & Competitive Positioning
+> Deep analysis, applying the `agentic-cli-premortem` methodology to dragon-cli **v4.2.0**.
+> Author: Ghost Protocol · 2026-06-04. Honest by design — a premortem that flatters is theater.
+**TL;DR.** dragon-cli is a genuinely well-built agentic CLI. It is *also* competing head-on
+with two of the best-resourced dev tools on earth (Anthropic's Claude Code, GitHub's Copilot
+CLI), with a user base of **one**, while the company's actual Q2 revenue engine is the $2,499
+PTaaS product. As a general-purpose "Claude Code, but ours" it is most likely to die of
+**opportunity cost**, not a technical flaw. As an **operator console for the Ghost Protocol
+stack** — an internal force-multiplier and a sovereign, bundled differentiator for PTaaS /
+DragonBrain — it has a real, defensible wedge that neither incumbent can structurally follow.
+The recommendation is to *reframe what it's for*, not to out-feature the incumbents.
+---
+## Move 1 — The premortem (it's June 2027; dragon-cli is abandoned. Why?)
+**The one-sentence obituary, written first and honestly:**
+> *It died because every hour spent polishing a coding CLI that competes with Anthropic and
+> GitHub was an hour not spent closing PTaaS deals — and the CLI had exactly one user, so the
+> polish never returned a cent.*
+Everything below is downstream of that sentence. Ranked by `likelihood × impact`:
+| # | Cause of death | Likelihood | Impact | Verdict |
+|---|---|---|---|---|
+| 1 | **Strategic opportunity-cost** — competes with the $2,499 PTaaS focus for the only scarce resource (Ryan's hours). | High | Fatal | **FATAL** |
+| 2 | **Distribution-zero** — N=1 users; no funnel, no reason for anyone to switch from Claude Code. | High | Fatal (as a *product*) | **FATAL** |
+| 3 | **Model-quality ceiling** — default brain is rented (`claude`); the free fallback (`worker`, Llama 3.3 70B) is a flaky tool-caller; `ghost`/DragonSpark is scaffolded, not trained. Can't out-quality Anthropic. | Certain | High | **STRUCTURAL** |
+| 4 | **Solo-maintenance burden** — model APIs churn, MCP spec moves, terminals differ; the breadth shipped (coding+ops+pentest+memory+TUI+auth backend) is now the surface owed, by one person. | High | High | **SERIOUS** |
+| 5 | **Security surface** — the model can run `bash` and edit files; prompt-injection + over-broad perms are now Ghost Protocol's liability. For a *security* company, one bad auto-run/breach narrative is reputation-fatal. | Medium | Fatal-if-it-lands | **GUARDED** (mitigated, see below) |
+| 6 | **Differentiation fragility** — "we have MCP / subagents / a TUI / skills" — so do both incumbents, with teams. None of those is a moat. | High | Medium | **REFRAME** |
+| 7 | **DragonSpark never ships** — the "own provider" story (`ghost` brain) is the long-term sovereignty bet; if it never trains, the differentiation leans entirely on rented models. | Medium-High | Medium | **DECOUPLE** |
+| 8 | **All-in-one sprawl** — coding + ops + pentest + memory + nano-LLM reads as unfocused; each is a front you can lose on. | Medium | Medium | **NARROW** |
+| 9 | **Untested authed e2e / single-machine** — device-flow + worker-brain only ever exercised on Ryan's box; breaks silently for any second user. | Medium | Low (at N=1) | **WATCH** |
+### Which are actually fatal, and the only honest mitigations
+- **#1 opportunity-cost (FATAL).** The only mitigation is to stop treating dragon-cli as a
+  product that must win attention, and treat it as (a) infrastructure that makes Ryan faster
+  at the work that *does* pay, and (b) a feature of the paid stack — not a standalone bet
+  fighting for mindshare. If it can't be justified on those two grounds, it should be frozen,
+  not polished. (Honors the maintenance-mode discipline and the 90-day PTaaS focus.)
+- **#2 distribution-zero (FATAL as a product).** Don't fund a funnel for it. Either it rides
+  *inside* an existing distribution (bundled with PTaaS/DragonBrain, where the customer is
+  already acquired) or it stays internal. A solo founder will not win consumer dev-tool
+  distribution against GitHub.
+- **#3 model ceiling (STRUCTURAL).** Unfixable by effort — so *design so it matters less*.
+  dragon's value must come from the axes where model quality is not the differentiator
+  (memory, stack-fusion, sovereignty, cost), and it should transparently use the best rented
+  model when quality matters. Stop comparing dragon's brain to Opus; that's a lost axis.
+- **#5 security (GUARDED → keep it that way).** This is the one fatal risk that is *already*
+  well-mitigated, and it's where the security-company DNA actually pays off: cwd-confinement,
+  protected-path blocking, bash-is-always-prompt (never auto), bwrap `--sandbox`, 0600
+  secrets, broad redaction, SSRF/restricted-port guards, header-injection guard, global crash
+  handler. **This must never regress** — it is simultaneously the biggest liability and, told
+  correctly, a *selling point* ("the agentic CLI built by pentesters, hardened like we'd
+  attack it"). The remaining gap is the untested second-machine path (#9).
+The survivable-but-not-fatal lines (#4, #6, #7, #8) all resolve into one instruction:
+**narrow the promise.** Breadth is the maintenance bill and the unfocused pitch at once.
+---
+## Move 2 — Competitive teardown (verified, not from the README)
+### ⚠️ Premise correction: GitHub Copilot CLI is **not** open source
+The working assumption was "GitHub Copilot CLI, which is open source, you can check the
+source." Verified, that is **false**, and the way it's false is the textbook trap from the
+methodology:
+- The public repo **`github/copilot-cli`** contains **documentation and issues only** — *no
+  product source*. It looks like "the source repo" and isn't.
+- The CLI ships as a **prebuilt, minified npm package `@github/copilot`** (~**301 MB**),
+  under a **proprietary "GitHub Copilot CLI" license** (the SDK piece, `@github/copilot-sdk`,
+  is the only openly-licensed sliver).
+- So the honest answer to "is it open source?" is: **No — it is closed-source,
+  source-unavailable, with a public docs/issues repo named like the product.** It is *not*
+  forkable, inspectable, or relicensable. (This is exactly why the methodology says: answer
+  with the license + what's actually in the repo, never the homepage adjective.)
+The genuinely open-source competitors in this category are **aider** (Apache-2.0) and
+**Gemini CLI** (Apache-2.0). Neither GitHub's nor Anthropic's CLI is.
+### Claude Code (Anthropic) — the quality incumbent
+- **Closed source**, Anthropic-operated. Models: **Opus 4.8 / Sonnet 4.6 / Haiku 4.5** —
+  Anthropic *is* the lab, so it sets the quality floor everyone else rents.
+- Tools: Read / Edit / Write / Bash / Glob / Grep / Agent / Skill / WebFetch / WebSearch /
+  Workflow. **Subagents** (Explore / Plan / general — no nesting), **forks**, **agent teams**,
+  **Dynamic Workflows** (deterministic multi-agent orchestration).
+- **Plan mode**; **MCP** across http/sse/stdio/ws with 3 scopes, OAuth, and Tool Search;
+  **Agent Skills** standard (commands merged into skills); **hooks** (30+ lifecycle events);
+  **plugins**; **dual memory** (CLAUDE.md + auto-memory under `~/.claude/projects/<p>/memory/`);
+  **6 permission modes** incl. classifier-gated auto + OS sandboxing.
+- **Moat:** owns the frontier model; deep, coherent agent architecture; ecosystem (skills /
+  plugins / hooks) with real adoption.
+- **Underbelly:** closed + Anthropic-locked (one vendor, one jurisdiction); **cost** (heavy
+  use runs ~$500–2,000/eng/mo); a context-window "quality tax" on very long sessions;
+  prompt-injection surface like any agent; **no on-host sovereign mode, no cross-session
+  project memory you fully own, no built-in security-stack drivers.**
+### GitHub Copilot CLI (GitHub / Microsoft) — the distribution incumbent
+- **Closed source** (see correction above). Agentic, **multi-model**, **default Claude
+  Sonnet 4.5** (so it *also* rents Anthropic's quality); MCP support; **custom agents**;
+  **`/fleet`** parallel subagents; **`/sandbox`**; `@github/copilot-sdk`.
+- **Moat:** GitHub/Microsoft distribution + brand + the place developers already are; tight
+  GitHub/repo/PR integration; effectively unlimited maintenance capacity.
+- **Underbelly:** not OSS/forkable/packageable; **cost burn** (consumes Copilot *premium
+  requests*); a **multi-file edit ceiling** reported in practice; **~90s cloud latency** on
+  some operations; **cloud-tied memory** (no local-owned project memory); no sovereignty, no
+  security-stack fusion.
+### Three-way capability matrix
+| Axis | **dragon-cli v4.2.0** | **Claude Code** | **GitHub Copilot CLI** |
+|---|---|---|---|
+| License / source | **Proprietary, but fully self-owned source** | Closed (Anthropic) | Closed (repo = docs/issues only) |
+| Default model | `claude` (Sonnet 4.6) — rented | Opus/Sonnet/Haiku — **owned** | Claude Sonnet 4.5 — rented |
+| Free / zero-key fallback | **✅ `worker` (Cloudflare Workers AI, Llama 3.3 70B)** — *flaky tool-caller* | ❌ | ❌ |
+| Fully local / sovereign | **✅ `--sovereign` (Ollama + Wyrm, no cloud)** | ❌ | ❌ |
+| Own-model roadmap | `ghost` / DragonSpark — **scaffolded, untrained** | N/A (is the lab) | N/A |
+| Cross-session memory you own | **✅ Wyrm by default (16 tools, project context)** | Partial (CLAUDE.md + auto-memory, Anthropic-side) | ❌ (cloud-tied) |
+| Skills | **✅ as live tools over 200+ GP library** | ✅ Agent Skills standard + ecosystem | Custom agents |
+| MCP | ✅ hub (stdio) | ✅ http/sse/stdio/ws, OAuth, 3 scopes, Tool Search | ✅ |
+| Subagents | ✅ read-only `task` | ✅ Explore/Plan/general + teams + workflows | ✅ `/fleet` |
+| Domain stack-fusion | **✅ `stack_pentest` (PhantomDragon) · `stack_keep` (DragonKeep) · `stack_run`** | ❌ | ❌ |
+| Sandbox / hardening | ✅ bwrap, cwd-confine, 0600, redaction, SSRF guard | ✅ OS sandbox + 6 perm modes | ✅ `/sandbox` |
+| TUI craft | **✅ alt-screen, sync-output, sub-cell gauges/sparklines** | Standard | Standard |
+| Cost control | **✅ free/local options → ~$0 floor** | $$$ ($500–2k/eng/mo) | $$ (premium requests) |
+| Maintenance capacity | **1 person** | Large team | Large team (MS-funded) |
+| Distribution / users | **N=1** | Massive | Massive (GitHub) |
+| Ecosystem maturity | New | High | High |
+---
+## Move 3 — Synthesis & decision
+### Honest scorecard
+- **dragon genuinely WINS (and can defend):**
+  1. **Sovereignty** — `--sovereign` (local Ollama + Wyrm, zero cloud) and a **free zero-key
+     fallback**. *Neither incumbent has this. Structural — they sell cloud.*
+  2. **Cross-session memory you fully own** — Wyrm, on by default, across 42 projects.
+  3. **Security-stack fusion** — it can *drive PhantomDragon and DragonKeep*. No general
+     coding CLI does or will; it's specific to this company's products.
+  4. **Cost floor ~$0** — for a bootstrapped, Sri-Lanka-cost-basis operator this is real.
+  5. **Built-by-pentesters hardening** — credible *because* of the company's DNA.
+- **dragon LOSES (and can't fix by effort):** raw model quality, ecosystem maturity, team
+  patch-cadence, distribution, polish-at-scale, breadth-of-tested-environments.
+- **Parity (a checkbox, not a moat):** MCP, subagents, skills, sandbox, plan mode, a nice
+  TUI. Having these is table stakes; they do **not** differentiate dragon from the incumbents.
+If the pitch is "a better Claude Code," every line of the scorecard that matters is a loss.
+The wins only matter if the tool is *about* those wins.
+### The wedge (one sentence)
+> **dragon-cli is the sovereign operator console for the Ghost Protocol stack** — the only
+> agentic CLI that runs fully on-host (or free), remembers across every Ghost Protocol
+> project via Wyrm, and *drives the company's own security tooling* — built and hardened by
+> the pentesters who sell PTaaS.
+That is an axis the incumbents **structurally cannot** follow: Anthropic and GitHub will
+never run fully offline, will never own Wyrm's memory, and will never ship a `stack_pentest`
+for PhantomDragon.
+### "What would have to be true" for dragon-cli to be worth continuing?
+1. It measurably makes **Ryan** faster at PTaaS/portfolio work (internal force-multiplier). ✅ plausible.
+2. It can be **bundled** with a thing that already has a buyer (PTaaS report-ops, or the
+   DragonBrain sovereign appliance) so distribution is inherited, not funded. ✅ plausible.
+3. It does **not** require winning a consumer dev-tool distribution war vs GitHub. ✅ (only if we don't try).
+4. Its security hardening **never regresses** and the second-machine path gets one real test. ✅ achievable.
+5. DragonSpark/`ghost` is **decoupled** — a nice-to-have sovereignty upgrade, never the thing
+   the value depends on. ✅ achievable.
+It does **NOT** require: a frontier model of our own, thousands of users, or out-featuring
+Anthropic. Good — those were the implausible ones.
+### Decision
+**Reframe, don't compete; freeze breadth, protect the wedge.**
+1. **Stop positioning it as a Claude-Code competitor.** Position = "sovereign operator
+   console for the Ghost Protocol stack." Every README/marketing line should lead with
+   sovereignty + Wyrm memory + stack-fusion, not "coding agent."
+2. **Bundle, don't distribute.** Ship it *with* DragonBrain (sovereign on-box) and as
+   PTaaS report/ops tooling. No standalone growth funnel — that's the PTaaS product's job.
+3. **Freeze feature breadth** for Q2. It's v4.2.0 and complete enough. New work only if it
+   (a) speeds Ryan's paid work or (b) hardens the wedge. No parity-chasing.
+4. **Protect the two fatal-but-mitigated risks:** keep security hardening green (it's the
+   selling point *and* the liability); add one authed second-machine e2e test (#9).
+5. **Decouple DragonSpark** from dragon-cli's value. It's R&D upside, not a dependency.
+6. **Use it as the everyday driver** — the best proof a force-multiplier works is the founder
+   living in it.
+The premortem's verdict in one line: **dragon-cli doesn't die from being badly built — it
+dies from being aimed at the wrong target. Re-aim it at the stack it was born next to, and
+it's a moat the incumbents can't cross.**
+---
+*Methodology captured as the `agentic-cli-premortem` skill. Competitor facts verified
+2026-06-04 (licenses, npm artifacts, model backbones, tool surfaces). The "Copilot CLI is
+open source" premise was checked and corrected — it is closed-source with a docs-only public
+repo.*

package/src/agent/loop.ts ADDED Viewed

@@ -0,0 +1,165 @@
+/**
+ * The agent loop — Dragon's engine. Runs the model↔tool cycle entirely
+ * client-side so tools touch THIS machine:
+ *
+ *   user → brain.turn(tools) → [tool calls?] → execute locally → feed back → repeat
+ *                                  └ no calls → final answer, done
+ *
+ * Tools come from three sources, routed by name: local coding tools (read/edit/
+ * bash/grep…), the curated Wyrm memory tools (`wyrm_*` → MCP), and the optional
+ * hosted Dragon portal (`portal_ask`). Wyrm is wired by default and the system
+ * prompt tells the model to use it as long-term memory.
+ *
+ * Copyright 2026 Ghost Protocol (Pvt) Ltd. All Rights Reserved.
+ */
+import type { Brain, BrainMessage, ToolCall, ToolSpec } from '../brain/types.js'
+import { getLocalTool, localToolSpecs, type ToolContext } from './tools.js'
+import type { Wyrm } from '../wyrm/mcp.js'
+import type { SkillLibrary } from './skills.js'
+import type { StackTools } from './stack.js'
+import type { McpHub } from './mcp.js'
+import type { TaskTool } from './task.js'
+const MAX_STEPS = 60 // hard stop against a runaway tool loop
+export interface PortalTool {
+  spec: ToolSpec
+  call(args: Record<string, unknown>): Promise<string>
+}
+export interface AgentRender {
+  /** First text delta of an assistant segment (print the ◆ prefix etc). */
+  onAssistantStart(): void
+  onDelta(s: string): void
+  onToolStart(summary: string): void
+  onToolEnd(summary: string, resultPreview: string, ok: boolean): void
+}
+export interface AgentDeps {
+  brain: Brain
+  wyrm: Wyrm | null
+  portal: PortalTool | null
+  skills: SkillLibrary | null
+  stack: StackTools | null
+  mcp: McpHub | null
+  task: TaskTool | null
+  cwd: string
+  system: string
+  toolCtx: ToolContext
+  messages: BrainMessage[]
+}
+export function buildSystemPrompt(opts: { cwd: string; wyrm: boolean; portal: boolean; brainId: string; skills?: number; primed?: string | null }): string {
+  const lines = [
+    'You are Dragon — Ghost Protocol\'s terminal coding agent (the `dragon` CLI). You operate like a senior engineer pair: precise, autonomous, and concise.',
+    `Working directory: ${opts.cwd}. Platform: ${process.platform}. Reasoning brain: ${opts.brainId}.`,
+    '',
+    'TOOLS — you have real local tools (read_file, write_file, edit_file, list_dir, glob, grep, bash). Use them to DO the work, not describe it. Explore before you edit (grep/glob/read). Prefer edit_file over rewriting whole files. After code changes, build/test with bash to verify. Keep going until the task is genuinely done.',
+    'Be surgical: match the surrounding code style, make the smallest change that works, and never invent file paths — find them.',
+  ]
+  lines.push('', 'STACK — you ARE the `dragon` operator console for the Ghost Protocol stack (scale · wyrm · pentest · keep · net). stack_status shows what is installed/running; stack_pentest runs a PhantomDragon web scan (authorized targets only) and stack_keep runs a DragonKeep system scan — both return STRUCTURED findings; stack_run drives any other dragon subcommand. You are a coding agent AND an operator — reach for the stack when the task is ops, not code.')
+  if (opts.skills) {
+    lines.push('', `SKILLS — you have ${opts.skills} reusable expert playbooks (design, security, infra, brand, project-specific). Before solving a non-trivial task from scratch, call skill_search for a relevant one and skill_read it, then apply its guidance. This institutional knowledge is your edge — reach for it first.`)
+  }
+  if (opts.wyrm) {
+    lines.push(
+      '',
+      'MEMORY (Wyrm) — you have persistent cross-session memory via the wyrm_* tools, and you should use it proactively:',
+      '• Before non-trivial work, recall context with wyrm_recall / wyrm_search / wyrm_project_context.',
+      '• Capture durable decisions, lessons, and gotchas with wyrm_remember / wyrm_capture so future sessions inherit them.',
+      '• Track multi-step work as quests (wyrm_quest_add / wyrm_quest_complete). Record decisions with wyrm_decided_because.',
+      '• Reach for an existing skill via wyrm_skill_search before solving something from scratch.',
+      'Read memory freely without asking. This is what makes you better than a stateless assistant.',
+    )
+  }
+  if (opts.portal) {
+    lines.push('', 'PORTAL — use portal_ask for the operator\'s Ghost Protocol account/licenses/catalog/services questions (it reaches the hosted account.ghosts.lk assistant). Not for coding.')
+  }
+  lines.push('', 'STYLE — terse, operator-grade, GitHub-flavored markdown. No filler, no emoji. Reference code as `path:line`. State what you did, not what you\'re "about to" do.')
+  if (opts.primed) {
+    lines.push('', '── Project context recalled from Wyrm (treat as background, verify before relying on specifics) ──', opts.primed)
+  }
+  return lines.join('\n')
+}
+/** Build the full tool surface handed to the brain this session. */
+export function buildToolSpecs(deps: { wyrm: Wyrm | null; portal: PortalTool | null; skills: SkillLibrary | null; stack: StackTools | null; mcp: McpHub | null; task: TaskTool | null }): ToolSpec[] {
+  return [
+    ...localToolSpecs(),
+    ...(deps.skills?.toolSpecs() ?? []),
+    ...(deps.stack?.specs ?? []),
+    ...(deps.mcp?.toolSpecs() ?? []),
+    ...(deps.task ? [deps.task.spec] : []),
+    ...(deps.wyrm?.toolSpecs() ?? []),
+    ...(deps.portal ? [deps.portal.spec] : []),
+  ]
+}
+async function dispatch(call: ToolCall, deps: AgentDeps): Promise<string> {
+  const { name, arguments: args } = call
+  if (deps.skills?.handles(name)) return deps.skills.call(name, args)
+  if (deps.stack?.handles(name)) return deps.stack.call(name, args, deps.toolCtx)
+  if (deps.mcp?.handles(name)) return deps.mcp.call(name, args)
+  if (deps.task && name === deps.task.spec.name) return deps.task.call(args)
+  if (deps.wyrm?.handles(name)) return deps.wyrm.call(name, args)
+  if (deps.portal && name === deps.portal.spec.name) return deps.portal.call(args)
+  const tool = getLocalTool(name)
+  if (!tool) return `error: unknown tool "${name}"`
+  try {
+    return await tool.run(args, deps.toolCtx)
+  } catch (e) {
+    return `error running ${name}: ${String(e instanceof Error ? e.message : e)}`
+  }
+}
+function toolSummary(call: ToolCall, deps: AgentDeps): string {
+  const local = getLocalTool(call.name)
+  if (local) return local.summary(call.arguments)
+  if (deps.skills?.handles(call.name)) return `${call.name}: ${String((call.arguments as { query?: string; name?: string }).query ?? (call.arguments as { name?: string }).name ?? '')}`
+  if (deps.stack?.handles(call.name)) {
+    if (call.name === 'stack_pentest') return `pentest: ${String((call.arguments as { url?: string }).url ?? '')}`
+    if (call.name === 'stack_keep') return `keep: ${String((call.arguments as { scan_type?: string }).scan_type ?? 'quick')}`
+    if (call.name === 'stack_status') return 'stack_status'
+    return `stack: ${String((call.arguments as { command?: string }).command ?? '')}`
+  }
+  if (call.name === deps.task?.spec.name) return `task: ${String((call.arguments as { task?: string }).task ?? '').slice(0, 60)}`
+  if (deps.mcp?.handles(call.name)) return call.name
+  if (deps.wyrm?.handles(call.name)) return `${call.name}`
+  if (call.name === deps.portal?.spec.name) return `portal: ${String((call.arguments as { question?: string }).question ?? '').slice(0, 60)}`
+  return call.name
+}
+/**
+ * Run one user message to completion (through any number of tool steps).
+ * Mutates deps.messages so the conversation persists across calls.
+ */
+export async function runAgent(deps: AgentDeps, userText: string, render: AgentRender, signal: AbortSignal): Promise<void> {
+  const tools = buildToolSpecs(deps)
+  deps.messages.push({ role: 'user', content: userText })
+  for (let step = 0; step < MAX_STEPS; step++) {
+    let started = false
+    const turn = await deps.brain.turn({
+      system: deps.system,
+      messages: deps.messages,
+      tools,
+      signal,
+      onDelta: (d) => { if (!started) { render.onAssistantStart(); started = true } render.onDelta(d) },
+    })
+    deps.messages.push({ role: 'assistant', content: turn.text, toolCalls: turn.toolCalls.length ? turn.toolCalls : undefined })
+    if (!turn.toolCalls.length) return // settled — final answer streamed
+    for (const call of turn.toolCalls) {
+      const summary = toolSummary(call, deps)
+      render.onToolStart(summary)
+      const result = await dispatch(call, deps)
+      const ok = !result.startsWith('error')
+      render.onToolEnd(summary, result.split('\n')[0]?.slice(0, 120) ?? '', ok)
+      deps.messages.push({ role: 'tool', content: result, toolCallId: call.id, toolName: call.name })
+    }
+  }
+  render.onDelta(`\n[stopped after ${MAX_STEPS} tool steps]`)
+}