ghost-dragon 4.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. package/.github/workflows/ci.yml +23 -0
  2. package/CHANGELOG.md +96 -0
  3. package/README.md +193 -0
  4. package/bootstrap.ps1 +83 -0
  5. package/bootstrap.sh +71 -0
  6. package/dist/agent/loop.d.ts +68 -0
  7. package/dist/agent/loop.d.ts.map +1 -0
  8. package/dist/agent/loop.js +135 -0
  9. package/dist/agent/mcp.d.ts +33 -0
  10. package/dist/agent/mcp.d.ts.map +1 -0
  11. package/dist/agent/mcp.js +107 -0
  12. package/dist/agent/session.d.ts +16 -0
  13. package/dist/agent/session.d.ts.map +1 -0
  14. package/dist/agent/session.js +55 -0
  15. package/dist/agent/skills.d.ts +36 -0
  16. package/dist/agent/skills.d.ts.map +1 -0
  17. package/dist/agent/skills.js +153 -0
  18. package/dist/agent/stack.d.ts +21 -0
  19. package/dist/agent/stack.d.ts.map +1 -0
  20. package/dist/agent/stack.js +158 -0
  21. package/dist/agent/task.d.ts +21 -0
  22. package/dist/agent/task.d.ts.map +1 -0
  23. package/dist/agent/task.js +45 -0
  24. package/dist/agent/tools.d.ts +44 -0
  25. package/dist/agent/tools.d.ts.map +1 -0
  26. package/dist/agent/tools.js +262 -0
  27. package/dist/agent/trace.d.ts +34 -0
  28. package/dist/agent/trace.d.ts.map +1 -0
  29. package/dist/agent/trace.js +72 -0
  30. package/dist/agent.d.ts +46 -0
  31. package/dist/agent.d.ts.map +1 -0
  32. package/dist/agent.js +103 -0
  33. package/dist/auth.d.ts +74 -0
  34. package/dist/auth.d.ts.map +1 -0
  35. package/dist/auth.js +116 -0
  36. package/dist/brain/anthropic.d.ts +19 -0
  37. package/dist/brain/anthropic.d.ts.map +1 -0
  38. package/dist/brain/anthropic.js +74 -0
  39. package/dist/brain/claude-cli.d.ts +20 -0
  40. package/dist/brain/claude-cli.d.ts.map +1 -0
  41. package/dist/brain/claude-cli.js +79 -0
  42. package/dist/brain/ghost-ember.d.ts +28 -0
  43. package/dist/brain/ghost-ember.d.ts.map +1 -0
  44. package/dist/brain/ghost-ember.js +97 -0
  45. package/dist/brain/index.d.ts +22 -0
  46. package/dist/brain/index.d.ts.map +1 -0
  47. package/dist/brain/index.js +95 -0
  48. package/dist/brain/openai-compat.d.ts +21 -0
  49. package/dist/brain/openai-compat.d.ts.map +1 -0
  50. package/dist/brain/openai-compat.js +119 -0
  51. package/dist/brain/router/classify.d.ts +23 -0
  52. package/dist/brain/router/classify.d.ts.map +1 -0
  53. package/dist/brain/router/classify.js +160 -0
  54. package/dist/brain/router/execute.d.ts +23 -0
  55. package/dist/brain/router/execute.d.ts.map +1 -0
  56. package/dist/brain/router/execute.js +84 -0
  57. package/dist/brain/router/index.d.ts +26 -0
  58. package/dist/brain/router/index.d.ts.map +1 -0
  59. package/dist/brain/router/index.js +118 -0
  60. package/dist/brain/router/routing-memory.d.ts +27 -0
  61. package/dist/brain/router/routing-memory.d.ts.map +1 -0
  62. package/dist/brain/router/routing-memory.js +77 -0
  63. package/dist/brain/router/select.d.ts +32 -0
  64. package/dist/brain/router/select.d.ts.map +1 -0
  65. package/dist/brain/router/select.js +146 -0
  66. package/dist/brain/router/two-hop.d.ts +23 -0
  67. package/dist/brain/router/two-hop.d.ts.map +1 -0
  68. package/dist/brain/router/two-hop.js +39 -0
  69. package/dist/brain/router/verify.d.ts +37 -0
  70. package/dist/brain/router/verify.d.ts.map +1 -0
  71. package/dist/brain/router/verify.js +111 -0
  72. package/dist/brain/types.d.ts +55 -0
  73. package/dist/brain/types.d.ts.map +1 -0
  74. package/dist/brain/types.js +16 -0
  75. package/dist/brain/worker.d.ts +27 -0
  76. package/dist/brain/worker.d.ts.map +1 -0
  77. package/dist/brain/worker.js +71 -0
  78. package/dist/commands/ai.d.ts +24 -0
  79. package/dist/commands/ai.d.ts.map +1 -0
  80. package/dist/commands/ai.js +137 -0
  81. package/dist/commands/alerts.d.ts +19 -0
  82. package/dist/commands/alerts.d.ts.map +1 -0
  83. package/dist/commands/alerts.js +114 -0
  84. package/dist/commands/billing.d.ts +13 -0
  85. package/dist/commands/billing.d.ts.map +1 -0
  86. package/dist/commands/billing.js +55 -0
  87. package/dist/commands/chat.d.ts +22 -0
  88. package/dist/commands/chat.d.ts.map +1 -0
  89. package/dist/commands/chat.js +422 -0
  90. package/dist/commands/config.d.ts +18 -0
  91. package/dist/commands/config.d.ts.map +1 -0
  92. package/dist/commands/config.js +136 -0
  93. package/dist/commands/doctor.d.ts +11 -0
  94. package/dist/commands/doctor.d.ts.map +1 -0
  95. package/dist/commands/doctor.js +73 -0
  96. package/dist/commands/global.d.ts +11 -0
  97. package/dist/commands/global.d.ts.map +1 -0
  98. package/dist/commands/global.js +253 -0
  99. package/dist/commands/keep.d.ts +12 -0
  100. package/dist/commands/keep.d.ts.map +1 -0
  101. package/dist/commands/keep.js +58 -0
  102. package/dist/commands/lifecycle.d.ts +17 -0
  103. package/dist/commands/lifecycle.d.ts.map +1 -0
  104. package/dist/commands/lifecycle.js +267 -0
  105. package/dist/commands/login.d.ts +16 -0
  106. package/dist/commands/login.d.ts.map +1 -0
  107. package/dist/commands/login.js +234 -0
  108. package/dist/commands/maintenance.d.ts +12 -0
  109. package/dist/commands/maintenance.d.ts.map +1 -0
  110. package/dist/commands/maintenance.js +76 -0
  111. package/dist/commands/mcp.d.ts +16 -0
  112. package/dist/commands/mcp.d.ts.map +1 -0
  113. package/dist/commands/mcp.js +56 -0
  114. package/dist/commands/memory.d.ts +13 -0
  115. package/dist/commands/memory.d.ts.map +1 -0
  116. package/dist/commands/memory.js +218 -0
  117. package/dist/commands/osint.d.ts +14 -0
  118. package/dist/commands/osint.d.ts.map +1 -0
  119. package/dist/commands/osint.js +161 -0
  120. package/dist/commands/pentest.d.ts +13 -0
  121. package/dist/commands/pentest.d.ts.map +1 -0
  122. package/dist/commands/pentest.js +131 -0
  123. package/dist/commands/scale.d.ts +14 -0
  124. package/dist/commands/scale.d.ts.map +1 -0
  125. package/dist/commands/scale.js +191 -0
  126. package/dist/commands/serve.d.ts +16 -0
  127. package/dist/commands/serve.d.ts.map +1 -0
  128. package/dist/commands/serve.js +167 -0
  129. package/dist/commands/tui.d.ts +17 -0
  130. package/dist/commands/tui.d.ts.map +1 -0
  131. package/dist/commands/tui.js +138 -0
  132. package/dist/commands/wyrm.d.ts +20 -0
  133. package/dist/commands/wyrm.d.ts.map +1 -0
  134. package/dist/commands/wyrm.js +274 -0
  135. package/dist/config.d.ts +67 -0
  136. package/dist/config.d.ts.map +1 -0
  137. package/dist/config.js +54 -0
  138. package/dist/index.d.ts +16 -0
  139. package/dist/index.d.ts.map +1 -0
  140. package/dist/index.js +85 -0
  141. package/dist/manifest.d.ts +31 -0
  142. package/dist/manifest.d.ts.map +1 -0
  143. package/dist/manifest.js +83 -0
  144. package/dist/ui.d.ts +57 -0
  145. package/dist/ui.d.ts.map +1 -0
  146. package/dist/ui.js +174 -0
  147. package/dist/utils.d.ts +33 -0
  148. package/dist/utils.d.ts.map +1 -0
  149. package/dist/utils.js +155 -0
  150. package/dist/wyrm/mcp.d.ts +37 -0
  151. package/dist/wyrm/mcp.d.ts.map +1 -0
  152. package/dist/wyrm/mcp.js +137 -0
  153. package/docs/SYSTEM-PREMORTEM.md +397 -0
  154. package/dragon-manifest.toml +241 -0
  155. package/dragon.py +177 -0
  156. package/install/launchd/lk.ghosts.dragonkeep.plist +57 -0
  157. package/install/systemd/dragonkeep.service +40 -0
  158. package/media/dragon-silver-lockup.svg +931 -0
  159. package/media/dragon-silver-mark.svg +931 -0
  160. package/media/dragon-silver.png +0 -0
  161. package/package.json +45 -0
  162. package/specs/001-godmode/constitution.md +54 -0
  163. package/specs/001-godmode/plan.md +30 -0
  164. package/specs/001-godmode/spec.md +64 -0
  165. package/specs/001-godmode/tasks.md +35 -0
  166. package/specs/002-premortem-positioning/premortem.md +211 -0
  167. package/src/agent/loop.ts +165 -0
  168. package/src/agent/mcp.ts +92 -0
  169. package/src/agent/session.ts +48 -0
  170. package/src/agent/skills.ts +138 -0
  171. package/src/agent/stack.ts +154 -0
  172. package/src/agent/task.ts +55 -0
  173. package/src/agent/tools.ts +255 -0
  174. package/src/agent/trace.ts +76 -0
  175. package/src/agent.ts +114 -0
  176. package/src/auth.ts +133 -0
  177. package/src/brain/anthropic.ts +83 -0
  178. package/src/brain/claude-cli.ts +78 -0
  179. package/src/brain/ghost-ember.ts +94 -0
  180. package/src/brain/index.ts +99 -0
  181. package/src/brain/openai-compat.ts +115 -0
  182. package/src/brain/router/classify.ts +167 -0
  183. package/src/brain/router/execute.ts +80 -0
  184. package/src/brain/router/index.ts +125 -0
  185. package/src/brain/router/routing-memory.ts +71 -0
  186. package/src/brain/router/select.ts +156 -0
  187. package/src/brain/router/two-hop.ts +62 -0
  188. package/src/brain/router/verify.ts +123 -0
  189. package/src/brain/types.ts +61 -0
  190. package/src/brain/worker.ts +72 -0
  191. package/src/commands/ai.ts +144 -0
  192. package/src/commands/alerts.ts +131 -0
  193. package/src/commands/billing.ts +59 -0
  194. package/src/commands/chat.ts +318 -0
  195. package/src/commands/config.ts +137 -0
  196. package/src/commands/doctor.ts +71 -0
  197. package/src/commands/global.ts +256 -0
  198. package/src/commands/keep.ts +67 -0
  199. package/src/commands/lifecycle.ts +273 -0
  200. package/src/commands/login.ts +184 -0
  201. package/src/commands/maintenance.ts +54 -0
  202. package/src/commands/mcp.ts +57 -0
  203. package/src/commands/memory.ts +229 -0
  204. package/src/commands/osint.ts +171 -0
  205. package/src/commands/pentest.ts +140 -0
  206. package/src/commands/scale.ts +185 -0
  207. package/src/commands/serve.ts +171 -0
  208. package/src/commands/tui.ts +126 -0
  209. package/src/commands/wyrm.ts +269 -0
  210. package/src/config.ts +93 -0
  211. package/src/index.ts +92 -0
  212. package/src/manifest.ts +104 -0
  213. package/src/ui.ts +188 -0
  214. package/src/utils.ts +153 -0
  215. package/src/wyrm/mcp.ts +130 -0
  216. package/test/auth.test.ts +70 -0
  217. package/test/brain.test.ts +39 -0
  218. package/test/security.test.ts +104 -0
  219. package/test/skills.test.ts +38 -0
  220. package/test/ui.test.ts +46 -0
  221. package/tsconfig.json +19 -0
  222. package/worker/package-lock.json +1527 -0
  223. package/worker/package.json +17 -0
  224. package/worker/src/index.ts +76 -0
  225. package/worker/tsconfig.json +15 -0
  226. package/worker/wrangler.toml +26 -0
@@ -0,0 +1,125 @@
1
+ /**
2
+ * Ghost Router — the spine of the multi-model stack (ROUTER-BLUEPRINT.md §1).
3
+ *
4
+ * It IS a Brain, so the agent loop stays unchanged: every turn it classifies the
5
+ * request (intent × difficulty × stakes), selects the best {provider, model} for
6
+ * 8 GB, then DELEGATES to that underlying brain. The factory is injected as
7
+ * `resolve` to avoid a circular import with brain/index.ts.
8
+ *
9
+ * Each decision is appended to ~/.dragon/routing.jsonl (observability + the
10
+ * DragonSpark flywheel) and shown to the operator on stderr (silence with
11
+ * DRAGON_ROUTER_QUIET=1).
12
+ *
13
+ * MVP scope: single-hop selection over the resident Ollama models + Claude
14
+ * escalation. The reason→tool two-hop and llama-swap co-residency are later phases.
15
+ *
16
+ * Copyright 2026 Ghost Protocol (Pvt) Ltd. All Rights Reserved.
17
+ */
18
+
19
+ import { appendFileSync, mkdirSync } from 'node:fs'
20
+ import { homedir } from 'node:os'
21
+ import { join } from 'node:path'
22
+ import type { Brain, BrainTurn, TurnOpts } from '../types.js'
23
+ import { classify } from './classify.js'
24
+ import { selectTarget } from './select.js'
25
+ import { verifyReasoning } from './verify.js'
26
+ import { twoHop } from './two-hop.js'
27
+
28
+ const ROUTE_LOG = process.env.DRAGON_ROUTING_LOG || join(homedir(), '.dragon', 'routing.jsonl')
29
+
30
+ function logDecision(rec: Record<string, unknown>): void {
31
+ try {
32
+ mkdirSync(join(homedir(), '.dragon'), { recursive: true })
33
+ appendFileSync(ROUTE_LOG, JSON.stringify({ ts: new Date().toISOString(), ...rec }) + '\n')
34
+ } catch { /* best-effort */ }
35
+ }
36
+
37
+ function show(line: string): void {
38
+ if (process.env.DRAGON_ROUTER_QUIET === '1') return
39
+ try { process.stderr.write(`\x1b[2m⟐ ${line}\x1b[0m\n`) } catch { /* ignore */ }
40
+ }
41
+
42
+ export interface RouterOpts {
43
+ /** Factory injected by brain/index.ts to avoid a circular import. */
44
+ resolve: (provider: string, model?: string) => Brain
45
+ /** Ollama base (…/v1) used for embeddings + tags/ps lookups. */
46
+ localBaseURL: string
47
+ }
48
+
49
+ export function makeRouterBrain(opts: RouterOpts): Brain {
50
+ const cache = new Map<string, Brain>()
51
+ const get = (provider: string, model?: string): Brain => {
52
+ const safe = provider === 'router' ? 'local' : provider // never recurse
53
+ const key = `${safe}:${model ?? ''}`
54
+ let b = cache.get(key)
55
+ if (!b) { b = opts.resolve(safe, model); cache.set(key, b) }
56
+ return b
57
+ }
58
+
59
+ return {
60
+ id: 'router',
61
+ model: 'auto',
62
+ async turn(t: TurnOpts): Promise<BrainTurn> {
63
+ const hasTools = t.tools.length > 0
64
+ const c = await classify(opts.localBaseURL, t.messages, t.tools.length, t.signal)
65
+ const target = await selectTarget(c, hasTools, opts.localBaseURL, t.signal)
66
+
67
+ const label = `${target.provider}${target.model ? ':' + target.model : ''}`
68
+ const base = {
69
+ intent: c.intent, difficulty: c.difficulty, stakes: c.stakes, via: c.via,
70
+ hasTools, provider: target.provider, model: target.model ?? null,
71
+ swap: target.swap, penalty: target.penalty ?? 0, why: target.why,
72
+ }
73
+
74
+ // Reason→tool two-hop: the reasoner plans (no tools), the workhorse executes.
75
+ if (target.twoHop && target.provider === 'local' && target.model && target.reasoner) {
76
+ show(`router → two-hop: ${target.reasoner} plans → ${target.model} executes [${c.intent}/${c.difficulty.toFixed(2)}/${c.stakes}]`)
77
+ try {
78
+ const { turn, planChars } = await twoHop(opts.localBaseURL, target.reasoner, target.model, t, get)
79
+ show(` ↳ plan ${planChars} chars → ${turn.toolCalls.length} tool call(s)`)
80
+ logDecision({ ...base, twoHop: true, planChars, toolCalls: turn.toolCalls.length })
81
+ return turn
82
+ } catch (e) {
83
+ show(` ↳ two-hop failed (${(e as Error).message}) — single call`)
84
+ // Log the failure so routing-memory accrues a penalty (a config where
85
+ // two-hop keeps failing — e.g. swap OOM/timeout — adaptively backs off).
86
+ logDecision({ ...base, twoHop: true, twoHopFailed: true })
87
+ }
88
+ }
89
+
90
+ // Verified hard-reasoning path: best-of-N + vote (+ optional execution check),
91
+ // then a CONFIDENCE CASCADE — escalate to Claude on low agreement / failed
92
+ // execution. A model with a bad routing-memory track record escalates sooner.
93
+ if (target.verify && target.provider === 'local' && target.model) {
94
+ const votes = Math.max(1, parseInt(process.env.DRAGON_ROUTER_VOTES || '3', 10) || 3)
95
+ show(`router → ${label} ×${votes} verified [${c.intent}/${c.difficulty.toFixed(2)}/${c.stakes}]`)
96
+ try {
97
+ const { turn, meta } = await verifyReasoning(opts.localBaseURL, target.model, t, votes)
98
+ const execFail = !!(meta.exec?.ran && meta.exec.ok === false)
99
+ show(` ↳ agreement ${meta.agreement ?? 'n/a'} (${meta.votes} votes${meta.exec?.ran ? `, exec ${meta.exec.ok ? 'pass' : 'fail'}` : ''})`)
100
+
101
+ const floor = parseFloat(process.env.DRAGON_ROUTER_ESCALATE_BELOW || '0.5') || 0.5
102
+ const threshold = Math.min(0.85, floor + (target.penalty ?? 0) * 0.3) // worse history → escalate sooner
103
+ const lowConf = meta.agreement != null && meta.agreement < threshold
104
+ if ((lowConf || execFail) && process.env.DRAGON_ROUTER_NO_ESCALATE !== '1') {
105
+ try {
106
+ const claude = get('claude')
107
+ show(` ↳ ${execFail ? 'execution failed' : `low confidence (${meta.agreement} < ${threshold.toFixed(2)})`} → escalating to Claude`)
108
+ const esc = await claude.turn(t)
109
+ logDecision({ ...base, verify: meta, escalated: true, reason: execFail ? 'exec-fail' : 'low-agreement', threshold })
110
+ return esc
111
+ } catch { /* Claude unavailable → keep the verified local answer */ }
112
+ }
113
+ logDecision({ ...base, verify: meta, escalated: false })
114
+ return turn
115
+ } catch (e) {
116
+ show(` ↳ verify failed (${(e as Error).message}) — single call`)
117
+ }
118
+ }
119
+
120
+ show(`router → ${label} [${c.intent}/${c.difficulty.toFixed(2)}/${c.stakes}${target.swap ? ' · swap' : ''}] ${target.why}`)
121
+ logDecision(base)
122
+ return get(target.provider, target.model).turn(t)
123
+ },
124
+ }
125
+ }
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Routing-memory — negative learning over the router's own history
3
+ * (ROUTER-BLUEPRINT.md §3, dragon-cli-local MVP).
4
+ *
5
+ * The router already logs every decision (+ verify agreement + exec pass/fail +
6
+ * whether it escalated) to ~/.dragon/routing.jsonl. This reads that log and, per
7
+ * (intent, model), computes a PENALTY in [0..1]: how often that model needed help
8
+ * for that kind of task (low agreement, failed execution, or had to escalate). The
9
+ * selector uses it to demote chronically-failing models, and the cascade uses it to
10
+ * escalate sooner for a model with a bad track record.
11
+ *
12
+ * Local now; promotable to the Wyrm memory substrate (the wyrm-routing-rerank
13
+ * subsystem) — same signal, durable + cross-device.
14
+ *
15
+ * Copyright 2026 Ghost Protocol (Pvt) Ltd. All Rights Reserved.
16
+ */
17
+
18
+ import { readFileSync } from 'node:fs'
19
+ import { homedir } from 'node:os'
20
+ import { join } from 'node:path'
21
+
22
+ const LOG = process.env.DRAGON_ROUTING_LOG || join(homedir(), '.dragon', 'routing.jsonl')
23
+ const MIN_SAMPLES = 3 // below this we have no opinion (penalty 0)
24
+ const LOW_AGREEMENT = 0.5
25
+
26
+ interface Stat { n: number; bad: number }
27
+ type Stats = Record<string, Record<string, Stat>> // intent → model → stat
28
+
29
+ let cache: { at: number; stats: Stats } | null = null
30
+
31
+ function compute(): Stats {
32
+ const stats: Stats = {}
33
+ let raw: string
34
+ try { raw = readFileSync(LOG, 'utf-8') } catch { return stats }
35
+ for (const line of raw.split('\n')) {
36
+ if (!line.trim()) continue
37
+ let r: Record<string, any>
38
+ try { r = JSON.parse(line) } catch { continue }
39
+ const intent = r.intent, model = r.model
40
+ if (!intent || !model) continue
41
+ const v = r.verify || {}
42
+ const lowAgree = typeof v.agreement === 'number' && v.agreement < LOW_AGREEMENT
43
+ const execFail = v.exec && v.exec.ran && v.exec.ok === false
44
+ const escalated = r.escalated === true
45
+ const twoHopFailed = r.twoHopFailed === true
46
+ const bad = lowAgree || execFail || escalated || twoHopFailed
47
+ const byModel = (stats[intent] ||= {})
48
+ const s = (byModel[model] ||= { n: 0, bad: 0 })
49
+ s.n++
50
+ if (bad) s.bad++
51
+ }
52
+ return stats
53
+ }
54
+
55
+ function load(): Stats {
56
+ if (cache && Date.now() - cache.at < 30_000) return cache.stats
57
+ cache = { at: Date.now(), stats: compute() }
58
+ return cache.stats
59
+ }
60
+
61
+ /** 0 = no opinion / reliable; →1 = this model keeps needing help for this intent. */
62
+ export function penalty(intent: string, model: string): number {
63
+ const s = load()[intent]?.[model]
64
+ if (!s || s.n < MIN_SAMPLES) return 0
65
+ return Math.max(0, Math.min(1, s.bad / s.n))
66
+ }
67
+
68
+ /** For telemetry / `dragon` introspection. */
69
+ export function routingStats(): Stats {
70
+ return load()
71
+ }
@@ -0,0 +1,156 @@
1
+ /**
2
+ * Ghost Router selection policy — (intent × difficulty × stakes × hasTools) → a
3
+ * concrete {provider, model}, constrained by what's actually installed and what
4
+ * fits 8 GB.
5
+ *
6
+ * Roles (Ollama tags, all overridable via env/config):
7
+ * workhorse (tool/agent turns) → mistral-nemo (verified tool-caller)
8
+ * reasoner (hard, NO tools) → vibethinker (can't tool-call → only when tools=[])
9
+ * cheap (simple chat) → qwen2.5:1.5b (fast, tiny)
10
+ * escalate (high stakes / hard) → claude (cloud, only if available)
11
+ *
12
+ * VRAM rule: only one big model fits at a time. When two candidates are equally
13
+ * acceptable, prefer the one already resident in Ollama (avoids a reload/“swap”).
14
+ * EMBER is intentionally NOT a default role yet — it earns its way in via the
15
+ * DragonSpark flywheel.
16
+ *
17
+ * Copyright 2026 Ghost Protocol (Pvt) Ltd. All Rights Reserved.
18
+ */
19
+
20
+ import { execSync } from 'node:child_process'
21
+ import { loadConfig } from '../../config.js'
22
+ import type { Classification } from './classify.js'
23
+ import { penalty } from './routing-memory.js'
24
+
25
+ export interface Target {
26
+ provider: string
27
+ model?: string
28
+ why: string
29
+ swap: boolean // chosen big model is not currently resident → a reload will occur
30
+ verify?: boolean // run the test-time-scaling verified path (best-of-N + vote)
31
+ penalty?: number // routing-memory: how unreliable this model has been for this intent (0..1)
32
+ twoHop?: boolean // hard tool turn → reasoner plans, then this model executes
33
+ reasoner?: string // the reasoner model for the two-hop plan step
34
+ }
35
+
36
+ const env = (k: string, d: string) => process.env[k] || d
37
+ function roles() {
38
+ const c = loadConfig().brain as Record<string, string> | undefined
39
+ return {
40
+ workhorse: env('DRAGON_ROUTER_WORKHORSE', c?.routerWorkhorse || 'mistral-nemo'),
41
+ reasoner: env('DRAGON_ROUTER_REASONER', c?.routerReasoner || 'vibethinker'),
42
+ cheap: env('DRAGON_ROUTER_CHEAP', c?.routerCheap || 'qwen2.5:1.5b'),
43
+ }
44
+ }
45
+
46
+ const OLLAMA = (base: string) => base.replace(/\/v1\/?$/, '').replace(/\/+$/, '')
47
+
48
+ let installedCache: { at: number; names: Set<string> } | null = null
49
+ async function installed(base: string, signal?: AbortSignal): Promise<Set<string>> {
50
+ if (installedCache && Date.now() - installedCache.at < 60_000) return installedCache.names
51
+ try {
52
+ const res = await fetch(OLLAMA(base) + '/api/tags', { signal })
53
+ if (!res.ok) return new Set() // don't cache an error as "nothing installed"
54
+ const data = (await res.json()) as { models?: { name: string }[] }
55
+ const names = new Set((data.models || []).map((m) => m.name.replace(/:latest$/, '')))
56
+ installedCache = { at: Date.now(), names }
57
+ return names
58
+ } catch {
59
+ return new Set()
60
+ }
61
+ }
62
+
63
+ async function resident(base: string, signal?: AbortSignal): Promise<Set<string>> {
64
+ try {
65
+ const res = await fetch(OLLAMA(base) + '/api/ps', { signal })
66
+ if (!res.ok) return new Set()
67
+ const data = (await res.json()) as { models?: { name: string }[] }
68
+ return new Set((data.models || []).map((m) => m.name.replace(/:latest$/, '')))
69
+ } catch {
70
+ return new Set()
71
+ }
72
+ }
73
+
74
+ function claudeAvailable(): boolean {
75
+ const cfg = loadConfig()
76
+ if (process.env.ANTHROPIC_API_KEY || cfg.brain?.keys?.anthropic) return true
77
+ try { execSync('command -v claude', { stdio: 'ignore' }); return true } catch { return false }
78
+ }
79
+
80
+ const has = (set: Set<string>, name: string) => set.has(name) || set.has(name.replace(/:latest$/, ''))
81
+
82
+ /** Decide where this turn goes. Pure policy + availability; never throws. */
83
+ export async function selectTarget(
84
+ c: Classification,
85
+ hasTools: boolean,
86
+ base: string,
87
+ signal?: AbortSignal,
88
+ ): Promise<Target> {
89
+ const r = roles()
90
+ const have = await installed(base, signal)
91
+ const live = await resident(base, signal)
92
+ const local = (model: string, why: string): Target => ({
93
+ provider: 'local', model, why, swap: !has(live, model),
94
+ })
95
+
96
+ // 1) Stakes → escalate to Claude. PRIVACY-FIRST: this ships the turn to the cloud,
97
+ // so it is OPT-IN (DRAGON_ROUTER_ESCALATE_STAKES=1) and NEVER fires for
98
+ // security-sensitive content. `security` stakes (credentials, ssh, secrets,
99
+ // prod, sudo) and any ops_security intent ALWAYS stay local — we never ship
100
+ // secrets or target data off the box. Only financial/critical business-
101
+ // irreversible decisions may escalate, and only when explicitly enabled.
102
+ // (Low-confidence reasoning still escalates separately via the verify cascade.)
103
+ if (
104
+ process.env.DRAGON_ROUTER_ESCALATE_STAKES === '1' &&
105
+ (c.stakes === 'financial' || c.stakes === 'critical') &&
106
+ c.intent !== 'ops_security' &&
107
+ claudeAvailable()
108
+ ) {
109
+ return { provider: 'claude', why: `stakes=${c.stakes} → escalate to Claude (opt-in)`, swap: false }
110
+ }
111
+ // 2) Reasoning with NO tools in play → the reasoning specialist (that's exactly
112
+ // what it's for; it can't tool-call, so only ever route here when tools=[]).
113
+ // Opt into the verified (best-of-N) path with DRAGON_ROUTER_VERIFY=1.
114
+ if (!hasTools && c.intent === 'reasoning' && has(have, r.reasoner)) {
115
+ const t = local(r.reasoner, `reasoning, no tools → ${r.reasoner}`)
116
+ t.verify = process.env.DRAGON_ROUTER_VERIFY === '1'
117
+ t.penalty = penalty(c.intent, r.reasoner)
118
+ return t
119
+ }
120
+ // 3) Simple chat, no tools → the cheap/tiny model.
121
+ if (!hasTools && c.intent === 'chat' && c.difficulty < 0.35 && has(have, r.cheap)) {
122
+ return local(r.cheap, `simple chat → ${r.cheap}`)
123
+ }
124
+ // 4) Default workhorse for tool/agent turns (and everything else): the tool-caller.
125
+ // Negative learning: if the workhorse has been unreliable for this intent and a
126
+ // better-scoring installed alternative exists, demote to it.
127
+ // Two-hop: a HARD tool turn first gets a plan from the reasoner (gated).
128
+ if (has(have, r.workhorse)) {
129
+ const wantTwoHop =
130
+ process.env.DRAGON_ROUTER_TWOHOP === '1' && hasTools && has(have, r.reasoner) &&
131
+ (c.difficulty >= 0.5 || c.intent === 'reasoning' || c.intent === 'ops_security')
132
+ const withHop = (t: Target): Target => {
133
+ if (wantTwoHop) { t.twoHop = true; t.reasoner = r.reasoner; t.why += ` (+two-hop via ${r.reasoner})` }
134
+ return t
135
+ }
136
+ const p = penalty(c.intent, r.workhorse)
137
+ if (p >= 0.7) {
138
+ const alt = [...have].find((m) => !/embed/.test(m) && m !== r.workhorse && m !== r.reasoner)
139
+ if (alt && penalty(c.intent, alt) < p) {
140
+ const t = local(alt, `routing-memory: ${r.workhorse} unreliable here (penalty ${p.toFixed(2)}) → ${alt}`)
141
+ t.penalty = penalty(c.intent, alt)
142
+ return withHop(t)
143
+ }
144
+ }
145
+ const t = local(r.workhorse, `${hasTools ? 'tool turn' : c.intent} → workhorse ${r.workhorse}`)
146
+ t.penalty = p
147
+ return withHop(t)
148
+ }
149
+ // 5) Workhorse missing → degrade: any resident big local model, else Claude, else cheap.
150
+ // Exclude the reasoner (can't tool-call) so a tool turn never lands on it.
151
+ const usable = (m: string) => !/embed/.test(m) && m !== r.reasoner
152
+ const fallback = [...live].find(usable) || [...have].find(usable)
153
+ if (fallback) return local(fallback, `workhorse '${r.workhorse}' not installed → ${fallback}`)
154
+ if (claudeAvailable()) return { provider: 'claude', why: 'no local model available → Claude', swap: false }
155
+ return { provider: 'local', model: r.cheap, why: 'last-resort cheap local', swap: true }
156
+ }
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Reason→tool two-hop (ROUTER-BLUEPRINT.md §1).
3
+ *
4
+ * vibethinker reasons brilliantly but CANNOT tool-call; mistral-nemo tool-calls but
5
+ * reasons less deeply. For a HARD tool turn the router splits the work:
6
+ * hop 1 — the reasoner produces a concrete PLAN (no tools), then
7
+ * hop 2 — the tool model executes that plan with the real tools.
8
+ *
9
+ * On 8 GB this is sequential (the two models can't co-reside) — Ollama swaps them,
10
+ * so it's gated behind DRAGON_ROUTER_TWOHOP=1 and only fires on hard tool turns.
11
+ *
12
+ * Copyright 2026 Ghost Protocol (Pvt) Ltd. All Rights Reserved.
13
+ */
14
+
15
+ import type { Brain, BrainTurn, TurnOpts } from '../types.js'
16
+ import { ollamaChat, toOllamaMessages } from './verify.js'
17
+
18
+ const PLAN_SYSTEM =
19
+ '\n\n── PLAN-ONLY MODE ──\n' +
20
+ 'You are the analyst. Read the user request and produce a brief, concrete, numbered ' +
21
+ 'PLAN to accomplish it (which tools to use, in what order, and why). Do NOT call any ' +
22
+ 'tools and do NOT write code blocks — output only the plan.'
23
+
24
+ export interface TwoHopResult {
25
+ turn: BrainTurn
26
+ planChars: number
27
+ }
28
+
29
+ /**
30
+ * @param resolve factory to build the executor brain (from router/index.ts)
31
+ */
32
+ export async function twoHop(
33
+ localBaseURL: string,
34
+ reasonerModel: string,
35
+ workhorseModel: string,
36
+ t: TurnOpts,
37
+ resolve: (provider: string, model?: string) => Brain,
38
+ ): Promise<TwoHopResult> {
39
+ // hop 1 — reasoning/plan, no tools (so vibethinker never has to tool-call)
40
+ const plan = (await ollamaChat(
41
+ localBaseURL,
42
+ reasonerModel,
43
+ toOllamaMessages(t.system + PLAN_SYSTEM, t.messages),
44
+ 0.4,
45
+ Math.min(t.maxTokens ?? 1024, 1024),
46
+ t.signal,
47
+ )).trim()
48
+
49
+ // hop 2 — execution with the real tools, plan injected as guidance.
50
+ // Clean the plan (drop stray code fences) and cap it so the executor ACTS on it
51
+ // rather than engaging with it as prose, plus a firm act-now directive.
52
+ const cleanPlan = plan.replace(/```[\s\S]*?```/g, '').replace(/```/g, '').trim().slice(0, 700)
53
+ const enriched = cleanPlan
54
+ ? t.system +
55
+ '\n\n── PLAN (from the analyst) ──\n' + cleanPlan +
56
+ '\n\nExecute this plan NOW by calling the appropriate tool. Do NOT restate or ' +
57
+ 'explain the plan — issue the tool call.'
58
+ : t.system
59
+ const worker = resolve('local', workhorseModel)
60
+ const turn = await worker.turn({ ...t, system: enriched })
61
+ return { turn, planChars: cleanPlan.length }
62
+ }
@@ -0,0 +1,123 @@
1
+ /**
2
+ * Verified hard-reasoning path (ROUTER-BLUEPRINT.md §2).
3
+ *
4
+ * Ollama exposes no logits, so confidence = ANSWER-AGREEMENT: sample the reasoner
5
+ * N times at spread temperatures, extract each final answer, and majority-vote.
6
+ * The agreement ratio is the router's confidence signal (→ later: escalate if low).
7
+ * For code/security candidates we additionally run EXECUTION-based verification
8
+ * (see execute.ts) and use pass/fail as a hard reward.
9
+ *
10
+ * optillm: if DRAGON_OPTILLM_URL is set we treat it as a drop-in OpenAI-compatible
11
+ * test-time-scaling proxy and let IT do the scaling in one call (you run optillm
12
+ * pointed at Ollama). Otherwise we do best-of-N here — self-contained, no extra
13
+ * service, which suits the 8 GB local-first box.
14
+ *
15
+ * Copyright 2026 Ghost Protocol (Pvt) Ltd. All Rights Reserved.
16
+ */
17
+
18
+ import type { BrainMessage, BrainTurn, TurnOpts } from '../types.js'
19
+ import { executeVerify, type ExecResult } from './execute.js'
20
+
21
+ export interface VerifyMeta {
22
+ via: 'self-consistency' | 'optillm'
23
+ votes: number
24
+ agreement: number | null // null when via=optillm (single call)
25
+ distribution?: Record<string, number>
26
+ exec?: ExecResult | null
27
+ }
28
+
29
+ const OLLAMA = (base: string) => base.replace(/\/v1\/?$/, '').replace(/\/+$/, '')
30
+
31
+ export function toOllamaMessages(system: string, messages: BrainMessage[]) {
32
+ const out: { role: string; content: string }[] = [{ role: 'system', content: system }]
33
+ for (const m of messages) {
34
+ if (m.role === 'tool') out.push({ role: 'user', content: `[observed] ${m.toolName ?? 'tool'} → ${m.content}`.slice(0, 1200) })
35
+ else out.push({ role: m.role === 'assistant' ? 'assistant' : 'user', content: m.content })
36
+ }
37
+ return out
38
+ }
39
+
40
+ export async function ollamaChat(base: string, model: string, messages: unknown, temperature: number, maxTokens: number, signal?: AbortSignal): Promise<string> {
41
+ const res = await fetch(OLLAMA(base) + '/api/chat', {
42
+ method: 'POST',
43
+ headers: { 'content-type': 'application/json' },
44
+ body: JSON.stringify({ model, messages, stream: false, options: { temperature, top_p: 0.95, num_predict: maxTokens } }),
45
+ signal,
46
+ })
47
+ if (!res.ok) throw new Error(`reasoner HTTP ${res.status}`)
48
+ const data = (await res.json()) as { message?: { content?: string } }
49
+ return data.message?.content ?? ''
50
+ }
51
+
52
+ async function openaiChat(base: string, model: string, messages: unknown, temperature: number, maxTokens: number, signal?: AbortSignal): Promise<string> {
53
+ const res = await fetch(base.replace(/\/+$/, '') + '/chat/completions', {
54
+ method: 'POST',
55
+ headers: { 'content-type': 'application/json', authorization: 'Bearer optillm' },
56
+ body: JSON.stringify({ model, messages, temperature, top_p: 0.95, max_tokens: maxTokens, stream: false }),
57
+ signal,
58
+ })
59
+ if (!res.ok) throw new Error(`optillm HTTP ${res.status}`)
60
+ const data = (await res.json()) as { choices?: { message?: { content?: string } }[] }
61
+ return data.choices?.[0]?.message?.content ?? ''
62
+ }
63
+
64
+ export function extractAnswer(text: string): string | null {
65
+ const boxed = [...text.matchAll(/\\boxed\{([^}]*)\}/g)]
66
+ if (boxed.length) return boxed[boxed.length - 1][1].trim()
67
+ const ans = [...text.matchAll(/(?:final answer|answer)\s*(?:is|:|=)\s*([^\n.]+)/gi)]
68
+ if (ans.length) return ans[ans.length - 1][1].trim().replace(/\.$/, '')
69
+ const nums = text.match(/-?\d[\d,]*\.?\d*/g)
70
+ return nums ? nums[nums.length - 1].replace(/,/g, '') : null
71
+ }
72
+
73
+ const norm = (a: string | null) => (a || '').toLowerCase().replace(/\s+/g, '').replace(/\.$/, '')
74
+
75
+ /** Run the reasoner with test-time scaling. Returns the chosen turn + how confident. */
76
+ export async function verifyReasoning(
77
+ localBaseURL: string,
78
+ model: string,
79
+ t: TurnOpts,
80
+ votes: number,
81
+ ): Promise<{ turn: BrainTurn; meta: VerifyMeta }> {
82
+ const maxTokens = t.maxTokens ?? 2048
83
+ const messages = toOllamaMessages(t.system, t.messages)
84
+ const optillm = process.env.DRAGON_OPTILLM_URL
85
+
86
+ let chosen: string
87
+ let meta: VerifyMeta
88
+
89
+ if (optillm) {
90
+ chosen = await openaiChat(optillm, process.env.DRAGON_OPTILLM_MODEL || model, messages, 0.7, maxTokens, t.signal)
91
+ meta = { via: 'optillm', votes: 1, agreement: null }
92
+ } else {
93
+ const temps = [0.3, 0.6, 0.8, 1.0, 1.1, 0.5, 0.9].slice(0, Math.max(1, votes))
94
+ while (temps.length < votes) temps.push(0.7)
95
+ const samples: string[] = []
96
+ for (const temp of temps) {
97
+ try { samples.push(await ollamaChat(localBaseURL, model, messages, temp, maxTokens, t.signal)) }
98
+ catch { /* a failed sample just doesn't vote */ }
99
+ }
100
+ if (!samples.length) throw new Error('reasoner produced no samples')
101
+ const dist: Record<string, number> = {}
102
+ const byNorm: Record<string, string> = {} // normalized answer → a full sample text
103
+ for (const s of samples) {
104
+ const a = extractAnswer(s)
105
+ const key = norm(a)
106
+ if (!key) continue
107
+ dist[key] = (dist[key] || 0) + 1
108
+ if (!byNorm[key]) byNorm[key] = s
109
+ }
110
+ const winner = Object.entries(dist).sort((a, b) => b[1] - a[1])[0]
111
+ chosen = winner ? byNorm[winner[0]] : samples[0] // no extractable answer → first sample
112
+ meta = {
113
+ via: 'self-consistency', votes: samples.length,
114
+ agreement: winner ? winner[1] / samples.length : 0, distribution: dist,
115
+ }
116
+ }
117
+
118
+ // Execution-based verification for code/security candidates (opt-in + sandboxed).
119
+ meta.exec = await executeVerify(chosen, t.signal)
120
+
121
+ t.onDelta?.(chosen)
122
+ return { turn: { text: chosen, toolCalls: [] }, meta }
123
+ }
@@ -0,0 +1,61 @@
1
+ /**
2
+ * Brain abstraction — the reasoning layer behind the Dragon agent.
3
+ *
4
+ * A Brain runs ONE model turn: given the system prompt, the running message
5
+ * history, and the available tool specs, it streams text deltas and returns the
6
+ * assembled text plus any tool calls the model wants executed. The agent loop
7
+ * (src/agent/loop.ts) owns the loop; the Brain owns only "talk to the model".
8
+ *
9
+ * Tool calls are normalized to a single shape across providers so the loop is
10
+ * provider-agnostic — Anthropic content blocks and OpenAI `tool_calls` both map
11
+ * onto {id,name,arguments}.
12
+ *
13
+ * Copyright 2026 Ghost Protocol (Pvt) Ltd. All Rights Reserved.
14
+ */
15
+
16
+ export interface ToolSpec {
17
+ name: string
18
+ description: string
19
+ /** JSON Schema for the arguments object. */
20
+ parameters: Record<string, unknown>
21
+ }
22
+
23
+ export interface ToolCall {
24
+ id: string
25
+ name: string
26
+ arguments: Record<string, unknown>
27
+ }
28
+
29
+ export type Role = 'user' | 'assistant' | 'tool'
30
+
31
+ export interface BrainMessage {
32
+ role: Role
33
+ /** Natural-language text (assistant prose, user input, or a tool result string). */
34
+ content: string
35
+ /** Present on assistant turns that requested tools. */
36
+ toolCalls?: ToolCall[]
37
+ /** Present on role:'tool' — links the result to the assistant's call. */
38
+ toolCallId?: string
39
+ toolName?: string
40
+ }
41
+
42
+ export interface BrainTurn {
43
+ text: string
44
+ toolCalls: ToolCall[]
45
+ }
46
+
47
+ export interface TurnOpts {
48
+ system: string
49
+ messages: BrainMessage[]
50
+ tools: ToolSpec[]
51
+ onDelta?: (s: string) => void
52
+ signal?: AbortSignal
53
+ maxTokens?: number
54
+ }
55
+
56
+ export interface Brain {
57
+ /** provider id: 'claude' | 'openai' | 'local' */
58
+ id: string
59
+ model: string
60
+ turn(opts: TurnOpts): Promise<BrainTurn>
61
+ }
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Worker brain — Ghost Protocol's Cloudflare Workers AI (Llama 3.3 70B) as the
3
+ * agent's reasoning brain. The free, zero-key fallback: no API key, just
4
+ * `dragon login`. Tools still execute locally in the CLI; this only does
5
+ * inference, via POST /api/v1/cli/brain (one turn, non-streaming).
6
+ *
7
+ * Copyright 2026 Ghost Protocol (Pvt) Ltd. All Rights Reserved.
8
+ */
9
+
10
+ import { resolveAuth } from '../auth.js'
11
+ import type { Brain, BrainTurn, TurnOpts } from './types.js'
12
+
13
+ function safeParse(s: string): Record<string, unknown> {
14
+ try { return JSON.parse(s) } catch { return {} }
15
+ }
16
+
17
+ /** The raw JSON shape the /api/v1/cli/brain endpoint returns. */
18
+ export interface WorkerResponse {
19
+ response?: string
20
+ tool_calls?: { name?: string; arguments?: unknown }[]
21
+ }
22
+
23
+ /**
24
+ * Normalize the Cloudflare brain's (Llama 3.3, fp8) raw JSON into a BrainTurn.
25
+ * Deliberately tolerant — the fp8 tool-caller is flaky: arguments arrive as a
26
+ * JSON string OR an object OR not at all, tool_calls can be nameless/garbage,
27
+ * and `response` can be missing. Every one of those degrades gracefully instead
28
+ * of throwing into the agent loop. (Pure → unit-tested in test/brain.test.ts.)
29
+ */
30
+ export function normalizeWorkerTurn(data: WorkerResponse): BrainTurn {
31
+ const text = data.response ?? ''
32
+ const toolCalls = (data.tool_calls ?? [])
33
+ .filter((c) => c && c.name)
34
+ .map((c, i) => ({
35
+ id: `wc_${i}`,
36
+ name: String(c.name),
37
+ arguments: typeof c.arguments === 'string' ? safeParse(c.arguments) : ((c.arguments as Record<string, unknown>) ?? {}),
38
+ }))
39
+ return { text, toolCalls }
40
+ }
41
+
42
+ export function makeWorkerBrain(): Brain {
43
+ return {
44
+ id: 'worker',
45
+ model: 'cloudflare:llama-3.3-70b',
46
+ async turn(t: TurnOpts): Promise<BrainTurn> {
47
+ const { apiBase, headers, mode } = resolveAuth()
48
+ if (mode === 'none') throw new Error('the Cloudflare brain needs sign-in — run `dragon login` (or use `--brain local`).')
49
+
50
+ let res: Response
51
+ try {
52
+ res = await fetch(`${apiBase}/api/v1/cli/brain`, {
53
+ method: 'POST',
54
+ headers: { 'content-type': 'application/json', ...headers },
55
+ body: JSON.stringify({ system: t.system, messages: t.messages, tools: t.tools, max_tokens: t.maxTokens ?? 1024 }),
56
+ signal: t.signal,
57
+ })
58
+ } catch (e) {
59
+ if ((e as { name?: string })?.name === 'AbortError') throw e
60
+ throw new Error(`can't reach the Cloudflare brain at ${apiBase} — ${e instanceof Error ? e.message : String(e)}.`)
61
+ }
62
+
63
+ if (res.status === 401) throw new Error('not signed in — run `dragon login`.')
64
+ if (res.status === 429) throw new Error('daily quota reached on the Cloudflare brain — try tomorrow, or `--brain claude`/`--brain local`.')
65
+ if (!res.ok) throw new Error(`Cloudflare brain HTTP ${res.status}: ${(await res.text().catch(() => '')).slice(0, 200)}`)
66
+
67
+ const turn = normalizeWorkerTurn((await res.json()) as WorkerResponse)
68
+ if (turn.text && t.onDelta) t.onDelta(turn.text) // non-streaming endpoint → emit the whole answer once
69
+ return turn
70
+ },
71
+ }
72
+ }