openhermes 4.9.2 → 4.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/CONTEXT.md +1 -1
  2. package/README.md +32 -31
  3. package/bootstrap.ts +262 -45
  4. package/harness/agents/oh-planner.md +1 -1
  5. package/harness/agents/openhermes.md +27 -126
  6. package/harness/codex/AUTOPILOT.md +99 -3
  7. package/harness/codex/CHARTER.md +3 -4
  8. package/harness/lib/background/background.test.ts +197 -0
  9. package/harness/lib/background/index.ts +7 -0
  10. package/harness/lib/background/interfaces.ts +31 -0
  11. package/harness/lib/background/manager.ts +320 -0
  12. package/harness/lib/composer/compose.test.ts +168 -0
  13. package/harness/lib/composer/compose.ts +65 -0
  14. package/harness/lib/composer/fragments/01-identity.md +1 -0
  15. package/harness/lib/composer/fragments/02-delegation.md +6 -0
  16. package/harness/lib/composer/fragments/03-permissions.md +13 -0
  17. package/harness/lib/composer/fragments/04-task-flow.md +15 -0
  18. package/harness/lib/composer/fragments/05-confidence.md +5 -0
  19. package/harness/lib/composer/fragments/06-parallelization.md +17 -0
  20. package/harness/lib/composer/fragments/07-shell.md +41 -0
  21. package/harness/lib/composer/fragments/08-routing.md +8 -0
  22. package/harness/lib/composer/fragments/09-guardrails.md +12 -0
  23. package/harness/lib/composer/index.ts +1 -0
  24. package/harness/lib/hooks/builtins/confidence-gate-hook.ts +70 -0
  25. package/harness/lib/hooks/builtins/delegation-depth-hook.ts +59 -0
  26. package/harness/lib/hooks/builtins/error-recovery-hook.ts +107 -0
  27. package/harness/lib/hooks/builtins/memory-sync-hook.ts +73 -0
  28. package/harness/lib/hooks/builtins/plan-check-hook.ts +43 -0
  29. package/harness/lib/hooks/builtins/route-tracking-hook.ts +147 -0
  30. package/harness/lib/hooks/builtins/sanity-check-hook.ts +52 -0
  31. package/harness/lib/hooks/builtins/shell-detect-hook.ts +96 -0
  32. package/harness/lib/hooks/hooks.test.ts +1016 -0
  33. package/harness/lib/hooks/index.ts +30 -0
  34. package/harness/lib/hooks/registry.ts +416 -0
  35. package/harness/lib/hooks/types.ts +71 -0
  36. package/harness/lib/memory/index.ts +18 -0
  37. package/harness/lib/memory/interfaces.ts +53 -0
  38. package/harness/lib/memory/memory-manager.ts +205 -0
  39. package/harness/lib/memory/memory.test.ts +491 -0
  40. package/harness/lib/memory/plan-store.ts +366 -0
  41. package/harness/lib/recovery/handler.ts +243 -0
  42. package/harness/lib/recovery/index.ts +14 -0
  43. package/harness/lib/recovery/interfaces.ts +48 -0
  44. package/harness/lib/recovery/patterns.ts +149 -0
  45. package/harness/lib/recovery/recovery.test.ts +312 -0
  46. package/harness/lib/sanity/anomaly-tracker.ts +127 -0
  47. package/harness/lib/sanity/checker.ts +178 -0
  48. package/harness/lib/sanity/index.ts +13 -0
  49. package/harness/lib/sanity/interfaces.ts +24 -0
  50. package/harness/lib/sanity/sanity.test.ts +472 -0
  51. package/harness/lib/sync/file-watcher.ts +174 -0
  52. package/harness/lib/sync/index.ts +11 -0
  53. package/harness/lib/sync/interfaces.ts +27 -0
  54. package/harness/lib/sync/plan-sync.ts +536 -0
  55. package/harness/lib/sync/sync.test.ts +832 -0
  56. package/harness/skills/oh-init/DEEP.md +2 -2
  57. package/harness/skills/oh-manifest/SKILL.md +1 -1
  58. package/harness/skills/oh-plan-review/DEEP.md +1 -1
  59. package/harness/skills/oh-planner/DEEP.md +3 -3
  60. package/harness/skills/oh-ship/SKILL.md +1 -1
  61. package/harness/skills/oh-skill-craft/SKILL.md +1 -4
  62. package/package.json +5 -5
  63. package/tsconfig.json +1 -1
  64. package/harness/commands/oh-doctor.md +0 -205
  65. package/harness/commands/oh-log.md +0 -18
  66. package/harness/skills/oh-learn/DEEP.md +0 -44
  67. package/harness/skills/oh-learn/SKILL.md +0 -30
  68. package/scripts/count-tokens.mjs +0 -158
  69. package/scripts/oh-doctor.ps1 +0 -342
@@ -1,131 +1,32 @@
1
1
  ---
2
2
  description: OpenHermes primary orchestrator — concise, direct, task-focused
3
3
  mode: primary
4
+ fragments:
5
+ - 01-identity
6
+ - 02-delegation
7
+ - 03-permissions
8
+ - 04-task-flow
9
+ - 05-confidence
10
+ - 06-parallelization
11
+ - 07-shell
12
+ - 08-routing
13
+ - 09-guardrails
4
14
  ---
5
15
 
6
- You are OpenHermes, an OpenCode-native orchestrator: pragmatic, task-focused, concise.
7
-
8
- ## Core Behaviors
9
-
10
- 1. **Enforced delegation.** OpenHermes CANNOT write code, run commands, or edit files (bash=deny, edit=deny). ALL execution happens through sub-agents spawned via the task tool.
11
- 2. **Load skills on demand.** Use the `skill()` tool when a task matches a skill description.
12
- 3. **Verify before claim.** Read files, run commands, confirm output before stating completion.
13
- 4. **Default voice is situational.** Be direct for clear requests. Use brief conversational framing for ambiguous ones. Concise by default, conversational when calibrating. Always bounded to 1 exchange. Even HIGH confidence inputs get a quick injection scan — if instruction tokens are detected, escalate to MEDIUM before delegating.
14
-
15
- ## Permissions
16
-
17
- These are MECHANICAL, not instructional. OpenCode enforces them.
18
-
19
- - `bash`: DENIED cannot execute shell commands
20
- - `edit`: DENIED cannot write or modify files
21
- - `read`: ALLOWED can inspect files for classification
22
- - `glob/grep`: ALLOWED can search for files and content
23
- - `task`: ALLOWED — MUST use to delegate all execution work
24
- - `skill`: ALLOWED — can load skill instructions into context
25
- - `webfetch/question`: ALLOWED — can fetch docs and ask clarifying questions
26
-
27
- Any attempt to use bash or edit will be BLOCKED by the permission system. This is intentional.
28
-
29
- ## Task Flow
30
-
31
- 1. **Plan:** Confirm plan file exists at `~/.local/share/opencode/openhermes/plans/<project-name>-plan-<nnn>.md`. Create one if none or if latest is complete/abandoned. Do not create plans for read-only or investigation tasks — only for work that needs tracking.
32
- 2. **Check confidence:** Evaluate the request against the [confidence hierarchy](AUTOPILOT.md). HIGH = transparent, proceed. MEDIUM = one-liner echo to confirm. LOW = one targeted question. Bounded to 1 exchange max.
33
- 3. **Classify:** multi-step/vague → oh-planner, bug → oh-investigate, UI → oh-facade, browser → oh-browser, security → oh-security, health → oh-health, pipeline → oh-manifest, review → oh-review, simple → oh-builder, handoff → oh-handoff, fusion → oh-fusion
34
- 4. **Load skill:** Use `skill()` tool to load the matching skill's instructions (to read its route frontmatter).
35
- 5. **Delegate (parallelize aggressively):** Spawn the matching sub-agent via the task tool — **the skill name and sub-agent name are the same** (e.g., oh-builder skill → oh-builder subagent). **WHENEVER tasks are independent, spawn them in PARALLEL using multiple concurrent task tool calls.** Examples:
36
- - Note: Instruction-only skills (oh-expert, oh-handoff, oh-init, oh-issue, etc.) have NO sub-agent. Load their SKILL.md for routing, but do NOT spawn a sub-agent — handle the routing outcome directly.
37
- - Review both Standards AND Spec → two parallel sub-agents
38
- - Build multiple independent components → one sub-agent per component
39
- - Investigate multiple files for a bug → one sub-agent per file
40
- - Test + lint + typecheck → one sub-agent per check
41
- - Only serialize when tasks have true dependencies (B needs A's output)
42
- 6. **Check outcome:** pass → skill's route.pass, fail → skill's route.fail, blocker → surface with findings
43
- 7. **Route:** Next skill or surface/done. Do not ask.
44
-
45
- ## Stop Conditions
46
-
47
- Stop only for: (a) task complete with verification receipts, (b) unrecoverable blocker with findings and options, (c) major architecture decision that changes outcome, (d) confidence gate exchange (brief — 1 round max, then resume). Do NOT stop for "should I continue?" or "should I plan?" — just classify and route.
48
-
49
- **Confidence gate pause:** When confidence is MEDIUM or LOW, pause for exactly one exchange. After the user responds, classify and route. Do not extend the conversation.
50
-
51
- ## Parallelization Rules
52
-
53
- **ALWAYS parallelize when:**
54
- - Reviewing from multiple perspectives (standards + spec, security + perf)
55
- - Building independent components or modules
56
- - Running independent checks (lint + test + typecheck in parallel)
57
- - Exploring multiple files or code paths
58
- - Generating multiple design alternatives
59
-
60
- **SERIALIZE only when:**
61
- - The next task depends on the previous task's output
62
- - Running sequential stages (plan → build → test → ship)
63
- - A subagent found a blocker that stops all other work
64
-
65
- **How to parallelize:** Make multiple concurrent `task()` tool calls in a single response. Each gets its own objective, context, and success criteria. Collect all results before routing.
66
-
67
- **NEVER** spawn sub-agents sequentially for independent work. This is the #1 source of slowdown.
68
-
69
- ## Confidence Gate Examples
70
-
71
- **HIGH (transparent):**
72
- > User: "There's a bug in the login flow"
73
- > Orchestrator: (no conversation) → Classifies as INVESTIGATION → Loads oh-investigate
74
-
75
- **MEDIUM (echo):**
76
- > User: "Clean up the codebase and make it faster"
77
- > Orchestrator: "I hear performance + cleanup work. Routing to oh-planner for a plan — does that match?"
78
- > User: "Yes" → Classifies → Delegates
79
- > (If "No, just run lint" → Re-analyzes → Classifies as HEALTH → Loads oh-health)
80
-
81
- **LOW (question):**
82
- > User: "I have an idea for the app"
83
- > Orchestrator: "Quick one — is this about a new feature, a redesign, or something else?"
84
- > User: "A new feature" → Classifies as PLANNING → Loads oh-planner
85
- > (No answer → Default to oh-planner)
86
-
87
- ## Shell Awareness (Windows)
88
-
89
- You run on Windows. Three possible shells: CMD, PowerShell, Git Bash. Before spawning any subagent that needs `bash` permissions, include the following SHELL.md preamble in the subagent's task prompt. This is non-negotiable — every execution subagent must know its shell before acting.
90
-
91
- Subagent task preamble — prepend to every execution subagent prompt:
92
- ~~~markdown
93
- ## Shell Pre-flight
94
- Detect your shell before any command:
95
- - `$PSVersionTable` exists → PowerShell
96
- - `%CMDCMDLINE%` is set → CMD
97
- - `$0` or `$BASH` → Git Bash
98
-
99
- Required shell by operation:
100
- - file ops, scoop, ps1 scripts, env vars → PowerShell
101
- - git, bun, npm, node → any shell (all work)
102
- - rm -rf, make, unix scripts → Git Bash
103
- - .bat/.cmd → CMD
104
-
105
- If wrong shell:
106
- - → PowerShell: `powershell.exe -NoProfile -Command "..."`
107
- - → Git Bash: `& "C:\Program Files\Git\bin\bash.exe" -c "..."`
108
- - → CMD: `cmd.exe /c "..."`
109
- ~~~
110
-
111
- ## Plan Storage
112
-
113
- Canonical path: `~/.local/share/opencode/openhermes/plans/<project-name>-plan-<nnn>.md`
114
-
115
- - Plan files use `<project-name>-plan-<nnn>.md` naming — project name from directory basename (lowercase), sequence zero-padded to 3 digits
116
- - Status lifecycle: keep `active`/`in-progress`/`blocked`, delete `complete`/`abandoned`
117
- - Entries are direct filesystem operations — no tracking DB
118
- - The bootstrap plugin's `ensurePlanFile()` handles creation and reuse; delegate to sub-agents when possible
119
-
120
- ## Guardrails
121
-
122
- - Same skill 5+ times in one chain → STOP, write OptiRoute report to plan, surface
123
- - 5 subagent failures on same task → surface BLOCKER
124
- - Before routing: if next skill's required input is missing and cannot be discovered → surface
125
- - Confidence is evaluated once per session, not per routing hop — only re-evaluate when new user input arrives
126
- - User skills at `~/.agents/skills/` and `~/.config/opencode/skills/` load on demand via skill tool
127
- - Subagent sessions: give narrow objective, relevant context, boundaries, success criteria. One level deep only. Verify results after return.
128
-
129
- ## Routing
130
-
131
- After every skill: read its `route:` frontmatter (pass / fail / blocker). Route immediately. Do not ask. Route values: `oh-<name>` (another skill), `surface` (report to user), `done` (terminal), `mode` (internal switch), `[a, b]` (choose best for context).
16
+ This is a composed agent prompt. The body is assembled at bootstrap time from
17
+ 9 fragments in `harness/lib/composer/fragments/`. See the `compose()` function
18
+ in `harness/lib/composer/compose.ts` for the composition logic.
19
+
20
+ To view or edit individual sections, modify the corresponding fragment file:
21
+
22
+ | Fragment | Content |
23
+ |----------|---------|
24
+ | 01-identity.md | "You are OpenHermes..." (intro paragraph) |
25
+ | 02-delegation.md | Core Behaviors — enforced delegation rules |
26
+ | 03-permissions.md | Permission matrix |
27
+ | 04-task-flow.md | Task flow steps |
28
+ | 05-confidence.md | Stop Conditions — confidence gate protocol |
29
+ | 06-parallelization.md | Parallelization rules |
30
+ | 07-shell.md | Confidence Gate Examples + Shell Awareness (Windows) |
31
+ | 08-routing.md | Plan Storage |
32
+ | 09-guardrails.md | Guardrails + Routing rules |
@@ -8,7 +8,7 @@ Closed-loop routing engine. Every task auto-classifies, auto-routes, auto-chains
8
8
 
9
9
  ## Plan Pre-condition
10
10
 
11
- Before any classification, verify plan file at `~/.local/share/opencode/openhermes/plans/<project-name>-plan-<nnn>.md`:
11
+ Before any classification, verify plan file at `~/.local/share/openhermes/plans/<project-name>/plan-<nnn>.md`:
12
12
  - No plan exists → create one (status: `active`)
13
13
  - Latest is complete/abandoned → create next sequential plan
14
14
  - Latest is active/in-progress → reuse it
@@ -147,8 +147,13 @@ Every skill routes somewhere — no leaf nodes. Route by outcome, not convention
147
147
 
148
148
  ## Safety Valves
149
149
 
150
- ### Loop Guard
151
- If the same skill is visited 5+ times in one chain, or 8+ hops pass without producing a new artifact STOP. Write OptiRoute report to plan file (routing chain, trigger, current state, blocker). Surface to user. Do not keep looping.
150
+ ### Loop Guard (Mechanical)
151
+ Enforced by the `route-tracking` hookno LLM instruction needed.
152
+
153
+ - **Same skill 5+ times** → STOP (configurable via `hooks.route_tracking.max_skill_repeats`)
154
+ - **Unproductive hops** after 8 consecutive no-artifact hops → STOP (configurable via `hooks.route_tracking.max_unproductive_hops`)
155
+
156
+ On violation, the hook injects an OptiRoute report with the full hop chain, skill counts, and the trigger reason. Orchestrator surfaces to user with findings.
152
157
 
153
158
  ### Question Gate
154
159
  Before each routing hop, check: "Can I proceed without guessing?" If the next skill's input is missing and you cannot discover or create it independently — surface to user. Do not route into guaranteed failure. For plan issues, create the plan yourself — do not ask the user to do it.
@@ -167,6 +172,97 @@ Before each routing hop, check: "Can I proceed without guessing?" If the next sk
167
172
  - "Is this OK?" — Verify and present evidence. Do not ask.
168
173
  - "Do you want me to X?" — If next routing step, just do it. Do not ask.
169
174
 
175
+ ## Hook System
176
+
177
+ Pluggable lifecycle hooks with topological sort. Hooks register with priority, phase (early/normal/late), and dependencies. Deterministic execution order via Kahn's algorithm.
178
+
179
+ ### Hook Lifecycle
180
+
181
+ ```
182
+ User Input
183
+
184
+
185
+ Session Start Hook ────► SessionHook.onSessionStart()
186
+
187
+
188
+ PreToolUse Hook ◄── PlanCheck, ShellDetect, DelegationDepth
189
+ │ (phase: EARLY → NORMAL)
190
+
191
+ Tool / Sub-Agent Call
192
+
193
+
194
+ PostToolUse Hook ◄── ErrorRecovery, MemorySync
195
+ │ (phase: LATE)
196
+
197
+ Route Hook ◄── ConfidenceGate
198
+ │ (phase: NORMAL)
199
+
200
+ Next Skill / Surface
201
+
202
+
203
+ Session End Hook ──► SessionHook.onSessionEnd()
204
+ ```
205
+
206
+ ### Hook Types
207
+
208
+ | Type | Interface | Purpose |
209
+ |------|-----------|---------|
210
+ | `PreToolUseHook` | `execute(context)` | Before sub-agent call — modify context, inject instructions, stop on loop guard |
211
+ | `PostToolUseHook` | `execute(context, output)` | After sub-agent call — modify output, inject recovery actions, sync memory |
212
+ | `RouteHook` | `execute(context, route)` | During routing — modify destination, pause on low confidence |
213
+ | `SessionHook` | `onSessionStart/End(context)` | Session lifecycle — setup/teardown |
214
+
215
+ ### Hook Result Values
216
+
217
+ | Value | Meaning |
218
+ |-------|---------|
219
+ | `CONTINUE` | Proceed to next hook or tool call |
220
+ | `STOP` | Abort immediately — all subsequent hooks are skipped |
221
+ | `INJECT` | Context/output was modified — subsequent hooks still run, final result reflects injection |
222
+
223
+ ### Phase Ordering
224
+
225
+ 1. **EARLY** — Plan verification, shell detection (priority 80-90)
226
+ 2. **NORMAL** — Depth tracking, confidence gating (priority 60-70)
227
+ 3. **LATE** — Error recovery, memory sync (priority 40-50)
228
+
229
+ Within same phase, hooks run by priority DESC then topological dependency order.
230
+
231
+ ### Built-in Hooks
232
+
233
+ | Name | Type | Phase | Priority | Purpose |
234
+ |------|------|-------|----------|---------|
235
+ | `plan-check` | PreToolUse | EARLY | 90 | Verify plan file exists before sub-agent delegation |
236
+ | `shell-detect` | PreToolUse | EARLY | 80 | Detect platform, inject shell preamble context |
237
+ | `confidence-gate` | Route | NORMAL | 70 | Adjust route based on confidence level |
238
+ | `delegation-depth` | PreToolUse | NORMAL | 60 | Loop guard — stops at depth >= max (default 10-25) |
239
+ | `route-tracking` | Route | LATE | 55 | Enforce max skill repeats (5) and unproductive hop limits (8) mechanically |
240
+ | `error-recovery` | PostToolUse | LATE | 50 | Match error patterns, inject recovery instructions |
241
+ | `memory-sync` | PostToolUse | LATE | 40 | Sync task findings and decisions to plan file |
242
+ | `sanity-check` | PostToolUse | LATE | 30 | Detect LLM output degeneration patterns, inject recovery on anomaly |
243
+
244
+ ### Configuration
245
+
246
+ All hooks enabled by default. Disable individual hooks via `openhermes.json`:
247
+ ```json
248
+ {
249
+ "experimental": {
250
+ "hooks": {
251
+ "enabled": true,
252
+ "plan_check": false,
253
+ "memory_sync": false
254
+ }
255
+ }
256
+ }
257
+ ```
258
+
259
+ ### Adding Custom Hooks
260
+
261
+ 1. Create a hook implementing one of the four hook interfaces
262
+ 2. Import `HookRegistry` from `openhermes/harness/lib/hooks`
263
+ 3. Register via `HookRegistry.getInstance().registerPreTool(myHook)`
264
+ 4. Hooks are topologically sorted by phase, priority, and dependencies
265
+
170
266
  ## User Skills
171
267
 
172
268
  Skills in `~/.agents/skills/` and `~/.config/opencode/skills/` auto-discover on every session. On name conflict with built-in `oh-*` skill, user version wins. User skills survive `npm update openhermes`.
@@ -24,7 +24,7 @@ Non-negotiable operating core. All skills, commands, and agents follow these pri
24
24
 
25
25
  8. **Rules over hidden state** — Prefer AGENTS.md, instructions, and manifests over implicit state.
26
26
 
27
- 9. **Memory deferred** — Intentional absence for this pass.
27
+ 9. **Memory implemented** — 4-tier hierarchical memory with importance scoring, budget enforcement, and plan-file persistence via MemoryManager + PlanStore.
28
28
 
29
29
  10. **Closed-loop autonomy** — Auto-classify, auto-route after every skill. Only stop for blockers and major decisions.
30
30
 
@@ -63,7 +63,7 @@ Detect shell before spawning subagents. PowerShell (`powershell`/`pwsh`), CMD (`
63
63
 
64
64
  ## Plan Lifecycle
65
65
 
66
- Plans at `~/.local/share/opencode/openhermes/plans/<project-name>-plan-<nnn>.md`.
66
+ Plans at `~/.local/share/openhermes/plans/<project-name>/plan-<nnn>.md`.
67
67
  - **Keep**: `active`, `in-progress`, `blocked`
68
68
  - **Delete**: `complete`, `abandoned`
69
69
  - Cleanup is direct filesystem operation — AI knows project name, derives path, keeps by status. Surface summary only.
@@ -77,5 +77,4 @@ Plans at `~/.local/share/opencode/openhermes/plans/<project-name>-plan-<nnn>.md`
77
77
 
78
78
  ## Shared State
79
79
 
80
- - **Plans**: `~/.local/share/opencode/openhermes/plans/<project-name>-plan-<nnn>.md`
81
- - **Instincts**: `~/.local/share/opencode/openhermes/plans/<project-name>-instincts.jsonl`
80
+ - **Plans**: `~/.local/share/openhermes/plans/<project-name>/plan-<nnn>.md`
@@ -0,0 +1,197 @@
1
+ import { describe, it, afterEach } from "node:test";
2
+ import assert from "node:assert/strict";
3
+ import { BackgroundManager } from "./manager.ts";
4
+ import type { BackgroundTaskStatus } from "./interfaces.ts";
5
+
6
+ // ---------------------------------------------------------------------------
7
+ // Helpers
8
+ // ---------------------------------------------------------------------------
9
+
10
+ function delay(ms: number): Promise<void> {
11
+ return new Promise((resolve) => setTimeout(resolve, ms));
12
+ }
13
+
14
+ /**
15
+ * Poll check() until the task reaches one of the given statuses, or until
16
+ * a generous timeout elapses (2.5 s).
17
+ */
18
+ async function waitForStatus(
19
+ manager: BackgroundManager,
20
+ id: string,
21
+ ...expected: BackgroundTaskStatus[]
22
+ ): Promise<void> {
23
+ for (let i = 0; i < 50; i++) {
24
+ const task = manager.check(id);
25
+ if (task && expected.includes(task.status)) return;
26
+ await delay(50);
27
+ }
28
+ const task = manager.check(id);
29
+ const actual = task?.status ?? "(not found)";
30
+ throw new Error(
31
+ `Timed out waiting for status [${expected.join("/")}], got "${actual}"`,
32
+ );
33
+ }
34
+
35
+ // Windows detection — some assertions differ per platform
36
+ const IS_WIN = process.platform === "win32";
37
+
38
+ // ---------------------------------------------------------------------------
39
+ // Tests
40
+ // ---------------------------------------------------------------------------
41
+
42
+ describe("BackgroundManager", () => {
43
+ afterEach(() => {
44
+ BackgroundManager.resetInstance();
45
+ });
46
+
47
+ // ---- 1: run() returns ID immediately ----------------------------------
48
+
49
+ it("run() returns a task ID immediately", () => {
50
+ const mgr = BackgroundManager.getInstance();
51
+ const id = mgr.run({ command: IS_WIN ? "echo" : "echo", args: ["hello"] });
52
+ assert.ok(typeof id === "string");
53
+ assert.ok(id.length > 0, "id must not be empty");
54
+ });
55
+
56
+ // ---- 2: check() shows pending → running → completed -------------------
57
+
58
+ it("check() transitions pending -> running -> completed", async () => {
59
+ const mgr = BackgroundManager.getInstance();
60
+ const id = mgr.run({ command: IS_WIN ? "echo" : "echo", args: ["hello"] });
61
+
62
+ // Immediately after run() the task should be "pending"
63
+ // (spawn is deferred via setImmediate)
64
+ const initial = mgr.check(id);
65
+ assert.ok(initial, "task must exist immediately");
66
+ assert.equal(initial!.status, "pending");
67
+
68
+ // Wait for it to complete
69
+ await waitForStatus(mgr, id, "completed");
70
+ const done = mgr.check(id);
71
+ assert.equal(done!.exitCode, 0);
72
+ });
73
+
74
+ // ---- 3: capture stdout -------------------------------------------------
75
+
76
+ it("captures stdout from a simple command", async () => {
77
+ const mgr = BackgroundManager.getInstance();
78
+ const id = mgr.run({
79
+ command: IS_WIN ? "echo" : "echo",
80
+ args: ["hello-background"],
81
+ });
82
+
83
+ await waitForStatus(mgr, id, "completed");
84
+ const task = mgr.check(id);
85
+ assert.ok(task, "task must exist");
86
+ assert.match(task!.output, /hello-background/);
87
+ });
88
+
89
+ // ---- 4: failed command (non-zero exit) ---------------------------------
90
+
91
+ it("detects a failed command (non-zero exit)", async () => {
92
+ const mgr = BackgroundManager.getInstance();
93
+ const id = mgr.run({
94
+ command: IS_WIN ? "cmd.exe" : "bash",
95
+ args: IS_WIN ? ["/c", "exit", "1"] : ["-c", "exit 1"],
96
+ });
97
+
98
+ await waitForStatus(mgr, id, "failed");
99
+ const task = mgr.check(id);
100
+ assert.ok(task);
101
+ assert.equal(task!.exitCode, 1);
102
+ assert.equal(task!.status, "failed");
103
+ });
104
+
105
+ // ---- 5: timeout enforcement --------------------------------------------
106
+
107
+ it("enforces timeout and marks task as timed_out", async () => {
108
+ const mgr = BackgroundManager.getInstance();
109
+
110
+ // Use a long-running command with a very short timeout (100 ms)
111
+ const id = mgr.run({
112
+ command: IS_WIN ? "powershell.exe" : "sleep",
113
+ args: IS_WIN
114
+ ? ["-NoProfile", "-Command", "Start-Sleep -Seconds 30"]
115
+ : ["30"],
116
+ timeout: 100,
117
+ });
118
+
119
+ await waitForStatus(mgr, id, "timed_out");
120
+ const task = mgr.check(id);
121
+ assert.ok(task);
122
+ assert.equal(task!.status, "timed_out");
123
+ });
124
+
125
+ // ---- 6: kill() marks as cancelled --------------------------------------
126
+
127
+ it("kill() marks a running task as cancelled", async () => {
128
+ const mgr = BackgroundManager.getInstance();
129
+
130
+ const id = mgr.run({
131
+ command: IS_WIN ? "powershell.exe" : "sleep",
132
+ args: IS_WIN
133
+ ? ["-NoProfile", "-Command", "Start-Sleep -Seconds 30"]
134
+ : ["30"],
135
+ timeout: 0, // no timeout
136
+ });
137
+
138
+ // Wait for the task to enter "running"
139
+ await waitForStatus(mgr, id, "running");
140
+
141
+ // Kill it
142
+ const killed = mgr.kill(id);
143
+ assert.ok(killed, "kill() must return true");
144
+
145
+ const task = mgr.check(id);
146
+ assert.ok(task);
147
+ assert.equal(task!.status, "cancelled");
148
+ });
149
+
150
+ // ---- 7: list() returns all tasks ---------------------------------------
151
+
152
+ it("list() returns all tracked tasks", async () => {
153
+ const mgr = BackgroundManager.getInstance();
154
+ const id1 = mgr.run({ command: IS_WIN ? "echo" : "echo", args: ["a"] });
155
+ const id2 = mgr.run({ command: IS_WIN ? "echo" : "echo", args: ["b"] });
156
+
157
+ const tasks = mgr.list();
158
+ const ids = tasks.map((t) => t.id);
159
+ assert.ok(ids.includes(id1), "list must contain first task");
160
+ assert.ok(ids.includes(id2), "list must contain second task");
161
+ });
162
+
163
+ // ---- 8: kill() on already-terminal task returns false ------------------
164
+
165
+ it("kill() returns false for already-completed task", async () => {
166
+ const mgr = BackgroundManager.getInstance();
167
+ const id = mgr.run({ command: IS_WIN ? "echo" : "echo", args: ["quick"] });
168
+
169
+ await waitForStatus(mgr, id, "completed");
170
+ const result = mgr.kill(id);
171
+ assert.equal(result, false, "kill() must return false on complete task");
172
+ });
173
+
174
+ // ---- 9: check() returns undefined for unknown ID -----------------------
175
+
176
+ it("check() returns undefined for unknown task ID", () => {
177
+ const mgr = BackgroundManager.getInstance();
178
+ const result = mgr.check("nonexistent-id");
179
+ assert.equal(result, undefined);
180
+ });
181
+
182
+ // ---- 10: error output captured on command-not-found --------------------
183
+
184
+ it("captures error output when command does not exist", async () => {
185
+ const mgr = BackgroundManager.getInstance();
186
+ const id = mgr.run({ command: "this-command-does-not-exist-hopefully" });
187
+
188
+ await waitForStatus(mgr, id, "failed");
189
+ const task = mgr.check(id);
190
+ assert.ok(task);
191
+ // On Windows cmd.exe will emit an error; on Unix spawn error will fire
192
+ assert.ok(
193
+ task!.errorOutput.length > 0 || task!.output.length > 0,
194
+ "should have some error output",
195
+ );
196
+ });
197
+ });
@@ -0,0 +1,7 @@
1
+ export type {
2
+ BackgroundTask,
3
+ BackgroundTaskStatus,
4
+ BackgroundRunOptions,
5
+ } from "./interfaces.ts";
6
+
7
+ export { BackgroundManager } from "./manager.ts";
@@ -0,0 +1,31 @@
1
+ export interface BackgroundTask {
2
+ id: string;
3
+ command: string;
4
+ args: string[];
5
+ cwd: string;
6
+ status: BackgroundTaskStatus;
7
+ output: string;
8
+ errorOutput: string;
9
+ exitCode: number | null;
10
+ startTime: number;
11
+ endTime: number | null;
12
+ timeout: number; // ms, 0 = no timeout
13
+ label?: string;
14
+ }
15
+
16
+ export type BackgroundTaskStatus =
17
+ | "pending"
18
+ | "running"
19
+ | "completed"
20
+ | "failed"
21
+ | "timed_out"
22
+ | "cancelled";
23
+
24
+ export interface BackgroundRunOptions {
25
+ command: string;
26
+ args?: string[];
27
+ cwd?: string;
28
+ timeout?: number; // ms, default 30000
29
+ label?: string;
30
+ env?: Record<string, string>;
31
+ }