npm - openhermes - Versions diffs - 4.0.0 → 4.1.0 - Mend

openhermes 4.0.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +5 -5
package/{bootstrap.mjs → bootstrap.ts} +53 -28
package/harness/codex/ROUTING.md +3 -2
package/harness/instructions/RUNTIME.md +2 -1
package/harness/skills/oh-builder/SKILL.md +15 -9
package/harness/skills/oh-gauntlet/SKILL.md +1 -1
package/harness/skills/oh-init/SKILL.md +141 -8
package/harness/skills/oh-investigate/SKILL.md +47 -8
package/harness/skills/oh-learn/SKILL.md +72 -8
package/harness/skills/oh-manifest/SKILL.md +40 -1
package/harness/skills/oh-planner/SKILL.md +3 -1
package/harness/skills/oh-review/SKILL.md +1 -1
package/index.ts +3 -0
package/lib/{harness-resolver.mjs → harness-resolver.ts} +15 -11
package/lib/{logger.mjs → logger.ts} +21 -14
package/package.json +11 -8
package/tsconfig.json +16 -0
package/harness/instructions/CONVENTIONS.md +0 -206
package/index.mjs +0 -3
package/test/plugins-behavioral.test.mjs +0 -64
package/test/plugins.test.mjs +0 -62

package/README.md CHANGED Viewed

@@ -55,7 +55,7 @@ These seven form a pipeline: **think → plan → build → test → ship → se
 | oh-issue | Break plans into vertical-slice issues |
 | oh-prd | Write structured PRDs |
 | oh-caveman | Ultra-compressed response mode |
-| oh-freeze | Freeze dependencies |
+| oh-freeze | Restrict file edits to a specific directory |
 | oh-learn | Learn patterns from the codebase |
 | oh-guard | Safety confirmations for destructive operations |
 | oh-skills-link | Verify skills discovery |
@@ -111,14 +111,14 @@ openhermes-pkg/
 ├── AGENTS.md              # Skill/command/agent inventory
 ├── CONTEXT.md             # Shared language
 ├── ETHOS.md               # Operating principles
-├── bootstrap.mjs          # Plugin loader — registers everything
-├── index.mjs              # Package entrypoint
+├── bootstrap.ts           # Plugin loader — registers everything
+├── index.ts               # Package entrypoint
 ├── harness/
 │   ├── agents/            # Agent manifests (OpenHermes)
 │   ├── codex/             # CONSTITUTION.md
 │   ├── commands/          # Slash command manifests (/oh-doctor)
-│   ├── instructions/      # RUNTIME.md, CONVENTIONS.md
-│       └── skills/            # 25 skill SKILL.md files
+│   ├── instructions/      # RUNTIME.md
+│   └── skills/            # 25 skill SKILL.md files
 └── test/
 ```

package/{bootstrap.mjs → bootstrap.ts} RENAMED Viewed

@@ -1,8 +1,9 @@
 import path from "node:path"
 import fs from "node:fs"
 import { fileURLToPath } from "node:url"
-import { createLogger } from "./lib/logger.mjs"
-import { getHarnessDir, setHarnessRootForTest, resolveHarnessRoot } from "./lib/harness-resolver.mjs"
+import type { Plugin } from "@opencode-ai/plugin"
+import { createLogger } from "./lib/logger.ts"
+import { getHarnessDir, setHarnessRootForTest, resolveHarnessRoot } from "./lib/harness-resolver.ts"
 const log = createLogger("bootstrap")
 const __dirname = path.dirname(fileURLToPath(import.meta.url))
@@ -11,8 +12,8 @@ const OPENHERMES_AGENT = "OpenHermes"
 export { resolveHarnessRoot, setHarnessRootForTest, getHarnessDir }
-function parseFrontmatter(raw) {
-  const frontmatter = {}
+function parseFrontmatter(raw: string | undefined): Record<string, string> {
+  const frontmatter: Record<string, string> = {}
   if (!raw) return frontmatter
   for (const line of raw.split(/\r?\n/)) {
     const idx = line.indexOf(":")
@@ -24,16 +25,25 @@ function parseFrontmatter(raw) {
   return frontmatter
 }
-function readMarkdownDocument(filePath) {
+interface MarkdownDocument {
+  frontmatter: Record<string, string>
+  body: string
+}
+function readMarkdownDocument(filePath: string): MarkdownDocument | null {
   if (!fs.existsSync(filePath)) return null
   const source = fs.readFileSync(filePath, "utf8")
-  const match = source.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/)
+  const match = source.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/)
   const frontmatter = parseFrontmatter(match?.[1] ?? "")
   const body = (match ? match[2] : source).trim()
   return { frontmatter, body }
 }
-function readMarkdownDirectory(dir) {
+interface DirEntry extends MarkdownDocument {
+  name: string
+}
+function readMarkdownDirectory(dir: string): DirEntry[] {
   if (!fs.existsSync(dir)) return []
   return fs.readdirSync(dir)
     .filter(name => name.endsWith(".md") && name.toLowerCase() !== "readme.md")
@@ -43,13 +53,21 @@ function readMarkdownDirectory(dir) {
       const document = readMarkdownDocument(filePath)
       return document ? { name: path.basename(name, ".md"), ...document } : null
     })
-    .filter(Boolean)
+    .filter((e): e is DirEntry => e !== null)
+}
+interface CommandDef {
+  description: string
+  template: string
+  agent?: string
+  model?: string
+  subtask?: boolean
 }
-function commandDefinitions(dir) {
-  const commands = {}
+function commandDefinitions(dir: string): Record<string, CommandDef> {
+  const commands: Record<string, CommandDef> = {}
   for (const doc of readMarkdownDirectory(dir)) {
-    const command = {
+    const command: CommandDef = {
       description: doc.frontmatter.description || `OpenHermes command ${doc.name}`,
       template: doc.body,
     }
@@ -61,8 +79,14 @@ function commandDefinitions(dir) {
   return commands
 }
-function agentDefinitions(dir) {
-  const agents = {}
+interface AgentDef {
+  description: string
+  mode: string
+  prompt: string
+}
+function agentDefinitions(dir: string): Record<string, AgentDef> {
+  const agents: Record<string, AgentDef> = {}
   for (const doc of readMarkdownDirectory(dir)) {
     const name = doc.name === "openhermes" ? OPENHERMES_AGENT : doc.name
     agents[name] = {
@@ -74,7 +98,7 @@ function agentDefinitions(dir) {
   return agents
 }
-function uniqueStrings(existing = [], additions = []) {
+function uniqueStrings(existing: string[] = [], additions: string[] = []): string[] {
   const seen = new Set(existing.filter(Boolean))
   const merged = [...existing]
   for (const item of additions) {
@@ -85,11 +109,11 @@ function uniqueStrings(existing = [], additions = []) {
   return merged
 }
-function readText(filePath) {
+function readText(filePath: string): string {
   return fs.existsSync(filePath) ? fs.readFileSync(filePath, "utf8") : ""
 }
-function buildBootstrapContent(hDir) {
+function buildBootstrapContent(hDir: string): string {
   const parts = [
     `<${BOOTSTRAP_MARKER}>`,
     `You are OpenHermes.`,
@@ -111,7 +135,15 @@ function buildBootstrapContent(hDir) {
   return parts.join("\n\n")
 }
-export const BootstrapPlugin = async () => {
+interface OpenHermesConfig {
+  skills?: { paths?: string[] }
+  command?: Record<string, unknown>
+  agent?: Record<string, unknown>
+  instructions?: string[]
+  default_agent?: string
+}
+export const BootstrapPlugin: Plugin = async () => {
   const hDir = getHarnessDir()
   const skillsDir = path.join(hDir, "skills")
   const commandsDir = path.join(hDir, "commands")
@@ -119,7 +151,7 @@ export const BootstrapPlugin = async () => {
   const bootstrapContent = buildBootstrapContent(hDir)
   return {
-    config: async (config) => {
+    config: async (config: OpenHermesConfig) => {
       config.skills = config.skills || {}
       config.skills.paths = uniqueStrings(config.skills.paths || [], [skillsDir])
@@ -149,16 +181,9 @@ export const BootstrapPlugin = async () => {
       }
       config.default_agent = OPENHERMES_AGENT
-      config.instructions = uniqueStrings(config.instructions || [], [
-        path.join(hDir, "codex", "CONSTITUTION.md"),
-        path.join(hDir, "instructions", "RUNTIME.md"),
-        path.join(__dirname, "CONTEXT.md"),
-        path.join(__dirname, "ETHOS.md"),
-      ])
     },
-    "experimental.chat.messages.transform": async (_input, output) => {
+    "experimental.chat.messages.transform": async (_input: unknown, output: { messages?: Array<{ info?: { role?: string }; parts?: Array<{ text?: string }> }> }) => {
       try {
         if (!output.messages?.length) return
         const firstUser = output.messages.find(m => m?.info?.role === "user")
@@ -166,8 +191,8 @@ export const BootstrapPlugin = async () => {
         if (firstUser.parts.some(p => p.text?.includes(BOOTSTRAP_MARKER))) return
         const ref = firstUser.parts[0]
         firstUser.parts.unshift({ ...ref, type: "text", text: bootstrapContent })
-      } catch (err) {
-        log.error("transform error:", err?.message)
+      } catch (err: unknown) {
+        log.error("transform error:", (err as Error)?.message)
       }
     },
   }

package/harness/codex/ROUTING.md CHANGED Viewed

@@ -17,6 +17,7 @@ If a skill has no explicit route for an outcome, the fallback is always **surfac
 ## Canonical routing table
 ### Workflow skills
+*Includes oh-doctor (command, not skill) for diagnostic routing.*
 | Skill | pass | fail | blocker |
 |-------|------|------|---------|
@@ -49,8 +50,8 @@ If a skill has no explicit route for an outcome, the fallback is always **surfac
 | **oh-triage** | → oh-issue or oh-handoff | → oh-expert (clarify) | surface |
 | **oh-retro** | → oh-planner (next cycle) | → oh-handoff (if blocked) | surface |
 | **oh-handoff** | → [end of session — intended terminal] | → [surface blocker] | surface |
-| **oh-skillcraft** | → oh-skills-link (verify discovery) | → oh-expert (diagnose) | surface |
-| **oh-skills-link** | → [report link status] | → oh-skillcraft (fix skill) | surface |
+| **oh-skill-craft** | → oh-skills-link (verify discovery) | → oh-expert (diagnose) | surface |
+| **oh-skills-link** | → [report link status] | → oh-skill-craft (fix skill) | surface |
 | **oh-skills-list** | → [done — read-only] | → [surface issue] | surface |
 ### Mode skills (no routing — mode switches)

package/harness/instructions/RUNTIME.md CHANGED Viewed

@@ -42,6 +42,7 @@ Key skills:
 - `.opencode/plan.md` — produced by oh-planner, consumed by oh-builder and oh-manifest
 - `.opencode/work-log.md` — progress tracking across subagent delegations
 - `.opencode/todo.md` — task tracking for multi-step work
+- `.opencode/instincts.jsonl` — behavioral patterns (trigger-action-confidence) extracted by oh-learn. On session start, read the highest-confidence entries (≥0.7) into context so past patterns inform current work. This is not durable state — it is an opt-in config that grows organically.
 **Bootstrap**: `harness/codex/CONSTITUTION.md`, this file, `CONTEXT.md`, and `ETHOS.md` are injected into the first user message so the agent starts with the same operating model every session.
@@ -50,5 +51,5 @@ Key skills:
 ## Conventions
 Security, coding style, testing, and orchestration standards:
-- See `CONVENTIONS.md` for the shared baseline.
+- For coding conventions, see the Constitution.
 - Skills provide the detailed walkthroughs for specialized workflows.

package/harness/skills/oh-builder/SKILL.md CHANGED Viewed

@@ -23,15 +23,21 @@ The ALL-arounder builder. Merges prototyping, TDD, implementation from plan, and
 ### Mode A: Prototype (exploratory)
 When you need to answer a question before committing.
-1. Determine what question the prototype answers (data model, state flow, UI direction)
-2. Build minimal — just enough to answer the question
-3. Let user play with it
-4. Collect feedback
-5. Decide: discard, iterate, or promote
-**Sub-modes:**
-- **Terminal** — for state/business logic questions
-- **UI** — several radical design variations from one route
+**Pick a branch based on the question being asked:**
+- **"Does this logic / state model feel right?"** → **Terminal branch.** Build a tiny interactive terminal app that pushes the state machine through cases that are hard to reason about on paper.
+- **"What should this look like?"** → **UI branch.** Generate several radically different visual variations, switchable via a URL param or floating control bar.
+If the question is genuinely ambiguous, default to whichever branch better matches the surrounding code (backend module → terminal, page/component → UI) and state the assumption.
+**Rules that apply to both branches:**
+1. **Throwaway from day one, clearly marked.** Name it so a casual reader sees it's a prototype.
+2. **One command to run.** Whatever the project's task runner supports — `pnpm <name>`, `bun <path>`, etc.
+3. **No persistence by default.** State lives in memory. If the question involves a database, hit a scratch DB with a clear "PROTOTYPE — wipe me" name.
+4. **Skip the polish.** No tests, no error handling beyond what makes it runnable. The point is to learn and then delete.
+5. **Surface the state.** After every action (terminal) or on every variant switch (UI), show the full relevant state so the user sees what changed.
+6. **Delete or absorb when done.** The answer is the only thing worth keeping. Capture it in a commit, ADR, or note — then delete the prototype code.
 ### Mode B: TDD (test-first implementation)
 When building production code from a plan or spec. Red-green-refactor with vertical tracer bullets.

package/harness/skills/oh-gauntlet/SKILL.md CHANGED Viewed

@@ -34,7 +34,7 @@ If tests are missing or weak, flag what should be added. Do not add them here
 Spawn two sub-agents simultaneously:
-**Standards sub-agent:** Read the repo's documented standards (CONTEXT.md, AGENTS.md, eslint config, ADRs, STYLE.md, CONVENTIONS.md). Then read the diff. Report every place the diff violates a documented standard. Cite the standard source. Distinguish hard violations from judgement calls.
+**Standards sub-agent:** Read the repo's documented standards (CONTEXT.md, AGENTS.md, eslint config, ADRs). Then read the diff. Report every place the diff violates a documented standard. Cite the standard source. Distinguish hard violations from judgement calls.
 **Spec sub-agent:** Read the spec source (plan.md, issue, PRD, or user's description). Then read the diff. Report: (a) requirements that are missing or partial, (b) scope creep (behavior not asked for), (c) requirements that look implemented but wrong. Quote the spec.

package/harness/skills/oh-init/SKILL.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: oh-init
-description: "Initialize project for agent-assisted development: scaffold CONTEXT.md, AGENTS.md, docs/adr/, configure issue tracker and triage labels."
+description: "Initialize project for OpenHermes takeover: scaffold .opencode/ runtime skeleton, wire AGENTS.md, configure domain docs, issue tracker, and triage labels."
 tier: 2
 triggers:
   - "init project"
@@ -8,15 +8,144 @@ triggers:
   - "initialize"
   - "onboard"
   - "scaffold"
+  - "takeover"
 ---
 # oh-init
-Per-repo setup for agent-assisted development. Run once per repo. Walks through configuration decisions one at a time.
+Per-repo setup for OpenHermes-assisted development. Run once per repo. Wires the `.opencode/` runtime skeleton, connects `AGENTS.md` to the orchestrator, then walks through domain/issue configuration decisions one at a time.
+Complements OpenCode's built-in `/init` command (which creates `AGENTS.md` with project build/test/architecture notes). Run oh-init after or instead — they serve different layers.
 ## Process
-### 1. Issue Tracker
+### Phase 0: Check Existing State
+Before writing anything, detect what already exists:
+- ☐ `.opencode/` directory present?
+- ☐ `.opencode/plan.md` exists?
+- ☐ `.opencode/todo.md` exists?
+- ☐ `.opencode/work-log.md` exists?
+- ☐ `.opencode/instincts.jsonl` exists?
+- ☐ `AGENTS.md` exists? (If yes, was it created by OpenCode `/init` or manually?)
+- ☐ `opencode.json` / `opencode.jsonc` present?
+Report findings. If everything exists, offer to skip or verify and exit.
+### Phase 1: .opencode/ Runtime Skeleton
+Create `.opencode/` directory if missing. Scaffold shared state files:
+**`.opencode/plan.md`** — working plan for the current session. Uses the same format as the global permanent plan directory (`%USERPROFILE%/.config/opencode/task/<project>-plan-<nnn>.md`). When a plan is completed, copy to the global directory with sequenced naming for permanent archive.
+```markdown
+# PLAN: <project-name>
+Plan ID: <project-name>-plan-<nnn>
+Project: <project-name>
+Status: active
+Created: <local-date-time>
+Updated: <local-date-time>
+Project Path: <absolute-project-path>
+Plan Path: .opencode/plan.md
+Objective: <short objective>
+## Current State
+## Assumptions
+## Tasks
+- [ ] Task 1
+  - [ ] Subtask 1.1
+## Active Task
+## Subagents
+| Agent | Purpose | Status | Findings |
+|---|---|---|---|
+## Completed
+## Blockers
+- None
+## Validation
+- [ ] Static checks
+- [ ] Formatting checks
+- [ ] Type checks
+- [ ] Unit tests
+- [ ] Integration checks
+- [ ] Manual verification
+## Decisions
+## Notes
+```
+**`.opencode/todo.md`** — task tracking for multi-step work (start empty).
+**`.opencode/work-log.md`** — progress tracking across subagent delegations:
+```markdown
+# Work Log
+## <date> — <description>
+- Started: <time>
+- Completed: <task>
+- Next: <next task>
+```
+**`.opencode/instincts.jsonl`** — behavioral pattern store for oh-learn (start as empty file). Will grow organically as the agent extracts patterns from sessions.
+### Phase 2: AGENTS.md Wiring
+Check if AGENTS.md exists:
+**If AGENTS.md does not exist:**
+Create it with OpenHermes orchestrator header + prompts for project info:
+```markdown
+# <project-name>
+OpenHermes is the primary orchestrator. All routing, planning, and delegation flows through oh-* skills.
+## Project Context
+- **Language**: <fill in>
+- **Package manager**: <fill in>
+- **Build command**: <fill in>
+- **Test command**: <fill in>
+- **Lint/type check**: <fill in>
+## Key Directives
+- Plan first. Write to `.opencode/plan.md` before multi-file changes.
+- Verify before claiming success. Read files, run commands, confirm output.
+- Delegate substantive work to subagents — main context orchestrates.
+- Use oh-* skills on demand. Load via OpenCode's skill tool when relevant.
+- Shared state lives in `.opencode/` (plan.md, todo.md, work-log.md, instincts.jsonl).
+```
+Then ask the user to fill in the Project Context fields. Offer to auto-detect from package manifests.
+**If AGENTS.md exists** (e.g., created by OpenCode `/init`):
+Append an `## OpenHermes Orchestrator` section to the end:
+```markdown
+## OpenHermes Orchestrator
+OpenHermes is the primary orchestrator for this session.
+- **Orchestrator**: OpenHermes — hub-and-spoke routing through oh-* skills
+- **Plan**: `.opencode/plan.md` — always check before starting work
+- **Shared state**: `.opencode/todo.md`, `.opencode/work-log.md`, `.opencode/instincts.jsonl`
+- **Verify before claim**: read files, run commands, confirm output
+- **Delegate**: subagents for implementation, main context orchestrates
+```
+### Phase 3: Issue Tracker
 Detect the git hosting platform:
 - **GitHub** — `gh` CLI
 - **GitLab** — `glab` CLI
@@ -25,7 +154,7 @@ Detect the git hosting platform:
 Confirm with the user. Write the result to `docs/agents/issue-tracker.md`.
-### 2. Triage Labels
+### Phase 4: Triage Labels
 The `triage` skill uses these label strings to move issues through a state machine:
 - `needs-triage` — maintainer needs to evaluate
 - `needs-info` — waiting on reporter
@@ -35,7 +164,7 @@ The `triage` skill uses these label strings to move issues through a state machi
 If the repo already has different label names, map them. Write to `docs/agents/triage-labels.md`.
-### 3. Domain Docs
+### Phase 5: Domain Docs
 Configure how the project organizes domain language:
 - **Single-context** — one `CONTEXT.md` + `docs/adr/` at repo root
 - **Multi-context** — `CONTEXT-MAP.md` pointing to per-context files
@@ -44,7 +173,7 @@ Scaffold `CONTEXT.md` with project name, domain description, and placeholder glo
 Write to `docs/agents/domain.md`.
-### 4. Agent Skills Block
+### Phase 6: Agent Skills Block
 Add a `## Agent skills` section to `AGENTS.md` (or `CLAUDE.md` if it exists):
 ```markdown
@@ -60,14 +189,18 @@ Add a `## Agent skills` section to `AGENTS.md` (or `CLAUDE.md` if it exists):
 <summary>. See docs/agents/domain.md.
 ```
-### 5. Decision Record
-Record: "oh-init completed for project \<name\> on \<date\>."
+### Phase 7: Decision Record
+Record: "oh-init completed for project <name> on <date>."
 ## Anti-patterns
 - Running init without understanding the project domain
 - Scaffolding CONTEXT.md without populating any terms
 - Creating ADR directory but never writing ADRs
 - Creating both AGENTS.md and CLAUDE.md — edit the one that exists
+- Overwriting an existing AGENTS.md created by OpenCode `/init` (append instead)
+- Scaffolding `.opencode/` files that already exist (check first, skip duplicates)
+- Empty instinct file never getting populated (run oh-learn extract periodically)
+- Never archiving completed plans to the global task directory (completed plans rot in `.opencode/` instead of becoming permanent records)
 ## Routing

package/harness/skills/oh-investigate/SKILL.md CHANGED Viewed

@@ -8,14 +8,52 @@ description: "Systematic bug diagnosis with root cause investigation"
 ## When to Use
 When a bug is reported, a test fails, or unexpected behavior occurs. Use this before attempting any fix.
-## Workflow
-1. **Reproduce** — get a reliable reproduction case (script, test, or steps)
-2. **Minimise** — strip away unrelated code until the minimal reproduction remains
-3. **Hypothesise** — list possible root causes, rank by likelihood
-4. **Instrument** — add logging, assertions, or debug output to test hypothesis
-5. **Fix** — implement the smallest correct change addressing root cause
-6. **Regression test** — verify fix doesn't break existing behavior
-7. **Document** — log the root cause and fix in the handoff, issue, or docs that are actually in scope
+## Phase 0 — Build a feedback loop
+**This is the actual skill. Everything else is mechanical.**
+If you have a fast, deterministic, agent-runnable pass/fail signal for the bug, you will find the cause — bisection, hypothesis-testing, and instrumentation are just consuming that signal. If you don't have one, no amount of staring at code will save you.
+Spend disproportionate effort here. **Be aggressive. Be creative. Refuse to give up.**
+### Ways to construct a feedback loop (try in this order)
+1. **Failing test** at whatever seam reaches the bug.
+2. **Curl / HTTP script** against a running dev server.
+3. **CLI invocation** with a fixture input, diffing stdout against a known-good snapshot.
+4. **Headless browser script** — drive the UI, assert on DOM/console/network.
+5. **Replay a captured trace** — save a real payload/event log, replay it in isolation.
+6. **Throwaway harness** — minimal subset of the system exercising the bug code path with a single call.
+7. **Property / fuzz loop** — run 1000 random inputs, look for the failure mode.
+8. **Bisection harness** — automate "boot at state X, check, repeat" so you can `git bisect run` it.
+9. **Differential loop** — run same input through old-version vs new-version, diff outputs.
+10. **HITL script** — last resort. Drive a human with a structured loop.
+### Iterate on the loop itself
+- Can I make it faster? (Cache setup, skip unrelated init, narrow the scope.)
+- Can I make the signal sharper? (Assert on the specific symptom, not "didn't crash".)
+- Can I make it more deterministic? (Pin time, seed RNG, isolate filesystem.)
+A 30-second flaky loop is barely better than no loop. A 2-second deterministic loop is a debugging superpower.
+### Non-deterministic bugs
+The goal is not a clean repro but a **higher reproduction rate**. Loop the trigger 100×, parallelise, add stress, narrow timing windows. A 50%-flake bug is debuggable; 1% is not.
+### When you genuinely cannot build a loop
+Stop and say so explicitly. List what you tried. Do **not** proceed to hypothesise without a loop.
+## Workflow (consumes the loop)
+1. **Reproduce** — run the loop, confirm the bug appears. The loop must match the user's described failure, not a different nearby failure.
+2. **Minimise** — strip away unrelated code until the minimal reproduction remains.
+3. **Hypothesise** — generate 3–5 ranked falsifiable hypotheses before testing any. Each must state a prediction: "If X is the cause, then changing Y will make the bug disappear".
+4. **Instrument** — one probe per hypothesis. Change one variable at a time. Tag every debug log with a unique prefix (e.g. `[DEBUG-a4f2]`) for easy cleanup.
+5. **Fix** — write the regression test at a correct seam first. Watch it fail. Apply the smallest correct change. Watch it pass. Re-run the Phase 0 loop against the original scenario.
+6. **Regression test** — verify fix doesn't break existing behavior. If no correct seam exists for a regression test, that itself is a finding — flag the architecture gap.
+7. **Document** — log the root cause and fix in the handoff, issue, or relevant docs. State which hypothesis was correct so the next debugger learns.
 ## Iron Law
 No fixes without root cause. Surface-level fixes compound into technical debt.
@@ -25,6 +63,7 @@ No fixes without root cause. Surface-level fixes compound into technical debt.
 - Changing code without reproducing the bug first
 - "Shotgun" debugging — changing multiple things hoping one sticks
 - Not documenting root cause for future reference
+- Proceeding to hypothesise without a feedback loop
 ## Routing

package/harness/skills/oh-learn/SKILL.md CHANGED Viewed

@@ -1,28 +1,92 @@
 ---
 name: oh-learn
-description: "Review, search, prune, and export session learnings"
+description: "Extract, evolve, and promote session learnings as instincts. Review, search, prune, export."
 ---
 # oh-learn
+Learning engine for the harness. Distills patterns from sessions into **instincts** (trigger-action pairs with confidence), clusters them into skill candidates, and graduates high-signal patterns from project to global scope.
+## Instinct Data Model
+Every learning stored as one JSONL line in `.opencode/instincts.jsonl`:
+```json
+{ "trigger": "situation pattern", "action": "recommended response", "confidence": 0.5, "applications": 1, "successes": 1, "category": "coding", "source": "oh-learn:extract", "ts": "2026-05-15T12:00:00Z" }
+```
+**Rules:**
+- **Trigger** — specific, matchable situation. *Not* general advice.
+- **Action** — executable response. *Not* a belief.
+- **Confidence** — starts at 0.5, increments +0.05 per successful application, decays -0.02 per day without use.
+- **Category** — one of: `coding`, `testing`, `security`, `git`, `planning`, `orchestration`, `debugging`, `ux`.
 ## When to Use
-To review what the agent has learned across sessions, search for specific patterns, prune stale knowledge, or export learnings for documentation.
-## Workflow
-1. **Review** — show recent learnings with context
-2. **Search** — find learnings matching specific topics or patterns
-3. **Prune** — remove stale, redundant, or superseded learnings
-4. **Export** — format learnings for documentation or sharing
+After completing a significant piece of work, at session handoff, or when you notice the same pattern repeat 2+ times in one session. Also on explicit user request.
+## Workflows
+### Extract
+Mine the current session for reusable patterns.
+1. Scan recent conversation + code changes for repeated decision patterns
+2. For each distinct pattern write an instinct: trigger, action, confidence=0.5, category
+3. Read existing `.opencode/instincts.jsonl`, check for near-duplicate triggers
+4. If duplicate found: merge — `confidence = max(existing, 0.8 × new)`, increment applications
+5. If new: append line to file
+**Good instinct:** trigger=`"tsc --noEmit shows 10+ errors after batch edit"`, action=`"Fix errors one at a time, re-running tsc after each, rather than batch-fixing"`, category=`"debugging"`
+**Bad instinct:** `"Write clean code"` — too vague to trigger on.
+### Evolve
+Cluster related instincts into skill/command/agent candidates.
+1. Read all instincts from `.opencode/instincts.jsonl`
+2. Group by `category`, then by trigger topic similarity
+3. **If cluster ≥ 5 instincts AND avg confidence ≥ 0.7** → generate `oh-skill-craft` spec for a new skill
+4. **If cluster 3-4 instincts with confidence ≥ 0.8** → suggest update to existing skill
+5. Output candidate summary with trigger list and extracted core pattern
+### Promote
+Graduate high-confidence instincts from project to global scope.
+1. Scan `.opencode/instincts.jsonl` for instincts with `confidence >= 0.85 AND applications >= 10`
+2. Filter out project-specific patterns (reference paths, local APIs, domain terms)
+3. Append filtered candidates to `%USERPROFILE%\.config\opencode\instincts.jsonl` (global)
+4. Tag promoted instincts with `"promoted": true` in project file
+5. Report: "Promoted N instincts to global scope"
+### Review
+Show instinct summary: total count, confidence distribution, category breakdown, recently promoted.
+### Search
+Find instincts by topic, trigger fragment, category, or confidence range.
+### Prune
+Remove instincts stale for 30+ days with confidence < 0.3, or superseded by a higher-confidence instinct covering the same trigger.
+### Export
+Serialize instincts to portable JSON for sharing across projects or teams:
+```json
+{ "version": 1, "exported": "2026-05-15T12:00:00Z", "instincts": [...] }
+```
 ## Anti-patterns
 - Hoarding every observation (most things aren't learnings)
 - Never pruning (stale knowledge is worse than no knowledge)
 - Storing what, not why (context-less facts are forgettable)
+- Over-promoting: not every pattern is globally useful
+- Extracting without applying: instincts that never trigger are noise
+- Ignoring confidence: treating all instincts as equally reliable
 ## Routing
 | Outcome | Route |
 |---------|-------|
-| pass | → [done — read-only report] |
+| pass | → [done — report summary] |
 | fail | → [surface gaps to user] |
 | blocker | → surface to user |

package/harness/skills/oh-manifest/SKILL.md CHANGED Viewed

@@ -15,10 +15,21 @@ triggers:
 # oh-manifest
-Full build orchestration loop. Runs planner → builder → verify → repeat until done or a blocker is surfaced. Uses gstack decision principles to auto-resolve intermediate questions. Only interrupts the user for genuine blockers.
+Full build orchestration loop. Runs pre-flight checks → planner → builder → verify → repeat until done or a blocker is surfaced. Uses decision principles to auto-resolve intermediate questions. Only interrupts the user for genuine blockers.
 ## Pipeline
+### Phase 0: Pre-Flight
+Before any work begins, ALL of these MUST pass:
+- ☐ **Quality baseline** — existing tests pass (if any). Capture output for before/after comparison.
+- ☐ **Rollback path** — clean `git stash` or a committed state you can return to.
+- ☐ **Branch isolation** — confirm you are on a working branch, not main/master.
+- ☐ **Scope documented** — plan or task description exists and is unambiguous.
+If any check fails → **STOP**. Report which check failed and why. Do not proceed to Phase 1 until the blocker is resolved.
 ### Step 1: Plan
 - If `.opencode/plan.md` exists, load and verify it is current
 - If not, run `oh-planner` (Mode A, B, or C depending on context)
@@ -43,6 +54,32 @@ Full build orchestration loop. Runs planner → builder → verify → repeat un
 - Phase failed and cannot be fixed → BLOCKER (surface to user with context)
 - Phase passed but new work discovered → add to plan, continue loop
+## Loop Patterns
+Select a pattern based on the nature of the work:
+| Pattern | Use When | Behavior |
+|---------|----------|----------|
+| **sequential** | Normal feature work | One phase at a time, verify each before next |
+| **continuous-pr** | Multi-step refactors | Each phase is its own PR — commit, push, PR per phase |
+| **infinite** | Watch mode, CI repair | Continue until external stop signal or budget exhausted |
+| **rfc-dag** | Complex dependency chains | Resolve phase ordering by DAG; parallelize independent branches |
+Default is **sequential**. Switch patterns only when the work structure demands it.
+## Escalation Triggers
+These conditions cause the loop to **pause** and surface to the user:
+| Trigger | Condition | Action |
+|---------|-----------|--------|
+| **Stall** | 2 consecutive checkpoints with zero measurable progress | Pause. Report what was attempted, what blocked. |
+| **Retry storm** | Same error message 3+ times in the loop | Stop retrying. Surface error with attempted fixes. |
+| **Cost drift** | Cumulative changes exceed scope documented in pre-flight | Pause. Show diff between planned and actual scope. |
+| **Quality regression** | Verify phase scores lower than pre-flight baseline | Pause. Report degraded metrics. Do not push through. |
+These are not optional suggestions. When a trigger fires, the loop **must** pause and report.
 ## Decision Principles
 Auto-resolve these without asking the user:
@@ -69,11 +106,13 @@ When a blocker is encountered:
 4. **Wait for user decision** before continuing
 ## Anti-patterns
+- Skipping pre-flight (every loop needs a baseline and a rollback plan)
 - Auto-deciding premises (fundamental assumptions need user input)
 - Pushing through blockers (surface immediately, don't try 5 workarounds silently)
 - Skipping verification (verify every phase, not just the final result)
 - Parallelizing dependent phases (respect the dependency order in plan.md)
 - Forgetting to update plan.md with completion status
+- Ignoring escalation triggers (stall means pause, not try harder)
 ## Routing

package/harness/skills/oh-planner/SKILL.md CHANGED Viewed

@@ -80,7 +80,9 @@ Never auto-decide: premises (need human judgment) or cases where both the plan a
 ## Plan Artifact
-Output goes in `.opencode/plan.md` with this structure (matching the global AGENTS.md schema):
+Output goes in `.opencode/plan.md` (per-project, overwritten each session) with this structure (matching the global AGENTS.md schema).
+**Then save a copy** to `%USERPROFILE%/.config/opencode/task/<project-name>-plan-<nnn>.md` (global, incrementing, persistent) per AGENTS.md persistent plan rules.
 ```markdown
 # PLAN: <project-name>

package/harness/skills/oh-review/SKILL.md CHANGED Viewed

@@ -45,7 +45,7 @@ Collect all files documenting how code should be written:
 - AGENTS.md, CLAUDE.md, CONTRIBUTING.md
 - CONTEXT.md, ADRs
 - eslint/biome/prettier config (note tool-enforced ones — don't re-check)
-- Any STYLE.md, STANDARDS.md, STYLEGUIDE.md
 ### 4. Spawn Both Sub-Agents (parallel)

package/index.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import { BootstrapPlugin } from "./bootstrap.ts"
+export default BootstrapPlugin

package/lib/{harness-resolver.mjs → harness-resolver.ts} RENAMED Viewed

@@ -1,6 +1,5 @@
 // Shared harness directory resolver — canonical implementation.
-// Extracted from bootstrap.mjs to eliminate DRY violation with goal-tracker.mjs.
-// Both consumers import from here.
+// Extracted from bootstrap.ts. Both bootstrap.ts and tests import from here.
 import path from "node:path"
 import fs from "node:fs"
@@ -9,14 +8,14 @@ import { fileURLToPath } from "node:url"
 const __dirname = path.dirname(fileURLToPath(import.meta.url))
 const PKG_DIR = path.resolve(__dirname, "..")
-const REQUIRED_HARNESS_FILES = [
+const REQUIRED_HARNESS_FILES: ReadonlyArray<[string, string, string]> = [
   ["codex", "CONSTITUTION.md"],
   ["instructions", "RUNTIME.md"],
   ["skills", "oh-plan", "SKILL.md"],
 ]
-function ancestorDirs(start, limit = 6) {
-  const dirs = []
+function ancestorDirs(start: string, limit = 6): string[] {
+  const dirs: string[] = []
   let current = path.resolve(start)
   for (let i = 0; i < limit; i++) {
     dirs.push(current)
@@ -27,7 +26,7 @@ function ancestorDirs(start, limit = 6) {
   return dirs
 }
-function buildHarnessCandidates(currentDir, execPath, cwd) {
+function buildHarnessCandidates(currentDir: string, execPath: string, cwd: string): string[] {
   const roots = [path.resolve(currentDir, "harness")]
   const seen = new Set(roots)
@@ -50,7 +49,7 @@ function buildHarnessCandidates(currentDir, execPath, cwd) {
   return roots
 }
-function hasRequiredHarnessFiles(root) {
+function hasRequiredHarnessFiles(root: string): boolean {
   return REQUIRED_HARNESS_FILES.every(parts => fs.existsSync(path.join(root, ...parts)))
 }
@@ -59,7 +58,12 @@ export function resolveHarnessRoot({
   execPath = process.execPath,
   cwd = process.cwd(),
   candidateRoots,
-} = {}) {
+}: {
+  currentDir?: string
+  execPath?: string
+  cwd?: string
+  candidateRoots?: string[]
+} = {}): string {
   const roots = candidateRoots ?? buildHarnessCandidates(currentDir, execPath, cwd)
   for (const root of roots) {
     if (hasRequiredHarnessFiles(root)) return root
@@ -67,11 +71,11 @@ export function resolveHarnessRoot({
   return path.resolve(currentDir, "harness")
 }
-let _harnessDir
+let _harnessDir: string | undefined
-export function setHarnessRootForTest(dir) { _harnessDir = dir }
+export function setHarnessRootForTest(dir: string | undefined): void { _harnessDir = dir }
-export function getHarnessDir() {
+export function getHarnessDir(): string {
   if (!_harnessDir) _harnessDir = resolveHarnessRoot()
   return _harnessDir
 }

package/lib/{logger.mjs → logger.ts} RENAMED Viewed

@@ -2,34 +2,41 @@ import path from "node:path"
 import os from "node:os"
 import fs from "node:fs"
-const LEVELS = { debug: 0, info: 1, warn: 2, error: 3 }
+export interface Logger {
+  debug: (...args: unknown[]) => void
+  info: (...args: unknown[]) => void
+  warn: (...args: unknown[]) => void
+  error: (...args: unknown[]) => void
+}
+const LEVELS: Record<string, number> = { debug: 0, info: 1, warn: 2, error: 3 }
 const CURRENT_LEVEL = LEVELS[process.env.OPENCODE_LOG_LEVEL?.trim().toLowerCase()] ?? (process.env.OPENHERMES_LOG_LEVEL?.trim().toLowerCase() === "debug" ? LEVELS.debug : LEVELS.warn)
 const LOG_DIR = path.join(os.homedir(), ".local", "share", "opencode", "log")
 const LOG_FILE = path.join(LOG_DIR, "openhermes.log")
-function ts() {
+function ts(): string {
   const d = new Date()
   return `${d.getFullYear()}-${(d.getMonth()+1).toString().padStart(2,"0")}-${d.getDate().toString().padStart(2,"0")} ${d.getHours().toString().padStart(2,"0")}:${d.getMinutes().toString().padStart(2,"0")}:${d.getSeconds().toString().padStart(2,"0")}.${d.getMilliseconds().toString().padStart(3,"0")}`
 }
-function formatArgs(args) {
+function formatArgs(args: unknown[]): string {
   return args.map(a => {
     if (a === null) return "null"
     if (a === undefined) return "undefined"
     if (typeof a === "object") {
-      try { return a?.message || JSON.stringify(a) } catch { return String(a) }
+      try { return (a as Error)?.message || JSON.stringify(a) } catch { return String(a) }
     }
     return String(a)
   }).join(" ")
 }
-function shouldLog(levelName) {
+function shouldLog(levelName: string): boolean {
   return LEVELS[levelName] >= CURRENT_LEVEL
 }
-let _fd = null
-function getFd() {
+let _fd: number | null = null
+function getFd(): number {
   if (_fd) return _fd
   try {
     fs.mkdirSync(LOG_DIR, { recursive: true })
@@ -40,10 +47,10 @@ function getFd() {
   return _fd
 }
-export function createLogger(name) {
+export function createLogger(name: string): Logger {
   const prefix = `[openhermes:${name}]`
-  function emit(levelName, ...args) {
+  function emit(levelName: string, ...args: unknown[]): void {
     if (!shouldLog(levelName)) return
     const fd = getFd()
     if (fd < 0) return
@@ -52,11 +59,11 @@ export function createLogger(name) {
   }
   return {
-    debug: (...args) => emit("debug", ...args),
-    info: (...args) => emit("info", ...args),
-    warn: (...args) => emit("warn", ...args),
-    error: (...args) => emit("error", ...args),
+    debug: (...args: unknown[]) => emit("debug", ...args),
+    info: (...args: unknown[]) => emit("info", ...args),
+    warn: (...args: unknown[]) => emit("warn", ...args),
+    error: (...args: unknown[]) => emit("error", ...args),
   }
 }
-export const rootLogger = createLogger("root")
+export const rootLogger: Logger = createLogger("root")

package/package.json CHANGED Viewed

@@ -1,24 +1,27 @@
 {
   "name": "openhermes",
-  "version": "4.0.0",
+  "version": "4.1.0",
   "description": "OpenCode-native skills, commands, and rules orchestration for OpenHermes.",
   "type": "module",
   "license": "MIT",
-  "main": "./index.mjs",
+  "engines": {
+    "bun": ">=1.0"
+  },
+  "main": "./index.ts",
   "dependencies": {
     "@opencode-ai/plugin": "1.14.46"
   },
   "exports": {
-    ".": "./index.mjs",
-    "./bootstrap": "./bootstrap.mjs"
+    ".": "./index.ts",
+    "./bootstrap": "./bootstrap.ts"
   },
   "files": [
-    "index.mjs",
-    "bootstrap.mjs",
+    "index.ts",
+    "bootstrap.ts",
+    "tsconfig.json",
     "ETHOS.md",
     "CONTEXT.md",
     "lib/",
-    "test/",
     "harness/codex/",
     "harness/instructions/",
     "harness/skills/",
@@ -26,7 +29,7 @@
     "harness/agents/"
   ],
   "scripts": {
-    "test": "node --test test/*.test.mjs"
+    "test": "bun test test/*.test.ts"
   },
   "keywords": [
     "opencode",

package/tsconfig.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "compilerOptions": {
+    "target": "ESNext",
+    "module": "ESNext",
+    "moduleResolution": "bundler",
+    "strict": true,
+    "isolatedModules": true,
+    "noEmit": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "allowImportingTsExtensions": true,
+    "types": ["node"]
+  },
+  "include": ["index.ts", "bootstrap.ts", "lib/**/*.ts", "test/**/*.ts"]
+}

package/harness/instructions/CONVENTIONS.md DELETED Viewed

@@ -1,206 +0,0 @@
-# OpenHermes — Coding Conventions & Operational Guidelines
-OpenHermes coding conventions and operational guidelines. Shared baseline for all subagents and skills.
-## Security Guidelines (CRITICAL)
-### Mandatory Pre-Commit Checks
-- [ ] No hardcoded secrets (API keys, passwords, tokens)
-- [ ] All user inputs validated
-- [ ] SQL injection prevention (parameterized queries)
-- [ ] XSS prevention (sanitized output)
-- [ ] CSRF protection enabled
-- [ ] Authentication/authorization verified
-- [ ] Rate limiting on all endpoints
-- [ ] Error messages don't leak sensitive data
-### Secret Management
-```typescript
-// NEVER: Hardcoded secrets
-const apiKey = "sk-proj-xxxxx"
-// ALWAYS: Environment variables
-const apiKey = process.env.OPENAI_API_KEY
-if (!apiKey) throw new Error('OPENAI_API_KEY not configured')
-```
-### Security Response Protocol
-If security issue found:
-1. STOP immediately
-2. Use `security-reviewer` subagent
-3. Fix CRITICAL issues before continuing
-4. Rotate any exposed secrets
-5. Review entire codebase for similar issues
----
-## Coding Style
-### Immutability (CRITICAL)
-ALWAYS create new objects, NEVER mutate:
-```javascript
-// WRONG: Mutation
-function updateUser(user, name) {
-  user.name = name; return user
-}
-// CORRECT: Immutability
-function updateUser(user, name) {
-  return { ...user, name }
-}
-```
-### File Organization
-MANY SMALL FILES > FEW LARGE FILES:
-- High cohesion, low coupling
-- 200-400 lines typical, 800 max
-- Extract utilities from large components
-- Organize by feature/domain, not by type
-### Error Handling
-```typescript
-try {
-  const result = await riskyOperation()
-  return result
-} catch (error) {
-  console.error('Operation failed:', error)
-  throw new Error('Detailed user-friendly message')
-}
-```
-### Input Validation
-```typescript
-import { z } from 'zod'
-const schema = z.object({
-  email: z.string().email(),
-  age: z.number().int().min(0).max(150)
-})
-const validated = schema.parse(input)
-```
-### Code Quality Checklist
-Before marking work complete:
-- [ ] Code is readable and well-named
-- [ ] Functions are small (<50 lines)
-- [ ] Files are focused (<800 lines)
-- [ ] No deep nesting (>4 levels)
-- [ ] Proper error handling
-- [ ] No console.log statements
-- [ ] No hardcoded values
-- [ ] No mutation (immutable patterns used)
----
-## Testing Requirements
-### Minimum Test Coverage: 80%
-Test Types (ALL required):
-1. **Unit Tests** — Individual functions, utilities, components
-2. **Integration Tests** — API endpoints, database operations
-3. **E2E Tests** — Critical user flows (Playwright)
-### TDD Workflow
-MANDATORY workflow:
-1. Write test first (RED)
-2. Run test — it should FAIL
-3. Write minimal implementation (GREEN)
-4. Run test — it should PASS
-5. Refactor (IMPROVE)
-6. Verify coverage (80%+)
----
-## Subagent Orchestration
-| Subagent | Purpose | When to Use |
-|----------|---------|-------------|
-| planner | Implementation planning | Complex features, refactoring |
-| architect | System design | Architectural decisions |
-| tdd-guide | Test-driven development | New features, bug fixes |
-| code-reviewer | Code review | After writing code |
-| security-reviewer | Security analysis | Before commits |
-| build-error-resolver | Fix build errors | When build fails |
-| e2e-runner | E2E testing | Critical user flows |
-| refactor-cleaner | Dead code cleanup | Code maintenance |
-| doc-updater | Documentation | Updating docs |
-| docs-lookup | Live doc queries | API questions |
-| review-go | Go code review | Go projects |
-| build-go | Go build errors | Go build failures |
-| review-database | Database optimization | SQL, schema design |
-| review-rust | Rust code review | Rust projects |
-| build-rust | Rust build errors | Rust build failures |
-| review-python | Python code review | Python projects |
-| review-java | Java/Spring review | Java projects |
-| build-java | Java build errors | Java build failures |
-| review-kotlin | Kotlin/Android review | Kotlin projects |
-| build-kotlin | Kotlin build errors | Kotlin build failures |
-| review-cpp | C++ review | C++ projects |
-| build-cpp | C++ build errors | C++ build failures |
-| loop-operator | Autonomous loops | Iterative workflows |
-### Immediate Subagent Usage
-No user prompt needed:
-1. Complex feature requests — Use `planner`
-2. Code just written/modified — Use `code-reviewer`
-3. Bug fix or new feature — Use `tdd-guide`
-4. Architectural decision — Use `architect`
----
-## Performance
-### Model Selection Strategy
-**Haiku** (lightweight): deterministic changes, simple code gen, worker agents
-**Sonnet** (default): main development, multi-agent orchestration, complex coding
-**Opus** (deep reasoning): architecture decisions, security review, ambiguous requirements
-### Context Window Management
-Avoid last 20% of context window for:
-- Large-scale refactoring
-- Feature implementation spanning multiple files
-- Debugging complex interactions
----
-## Git Workflow
-### Commit Message Format
-```
-<type>: <description>
-```
-Types: feat, fix, refactor, docs, test, chore, perf, ci
-### Feature Implementation Workflow
-1. **Plan** — Use `planner` to create plan with risks and phases
-2. **TDD** — Use `tdd-guide` for red-green-refactor cycle
-3. **Code Review** — Use `code-reviewer` immediately after writing
-4. **Security** — Use `security-reviewer` before commits
-5. **Commit** — Follow conventional commits format
----
-## Success Metrics
-You are successful when:
-- All tests pass (80%+ coverage)
-- No security vulnerabilities
-- Code is readable and maintainable
-- Performance is acceptable
-- User requirements are met

package/index.mjs DELETED Viewed

@@ -1,3 +0,0 @@
-import { BootstrapPlugin } from "./bootstrap.mjs"
-export default BootstrapPlugin

package/test/plugins-behavioral.test.mjs DELETED Viewed

@@ -1,64 +0,0 @@
-import { describe, it, before } from "node:test"
-import assert from "node:assert/strict"
-import path from "node:path"
-import { fileURLToPath } from "node:url"
-const __dirname = path.dirname(fileURLToPath(import.meta.url))
-describe("BootstrapPlugin behavior", () => {
-  let mod
-  before(async () => {
-    mod = await import("../bootstrap.mjs")
-  })
-  it("registers package-local skills, commands, agents, and instructions", async () => {
-    const plugin = await mod.BootstrapPlugin({ directory: __dirname })
-    const config = { skills: { paths: [] }, command: {}, agent: {}, instructions: [] }
-    await plugin.config(config)
-    assert.ok(config.skills.paths.some(p => p.endsWith(path.join("harness", "skills"))))
-    assert.ok(config.command["oh-doctor"])
-    assert.ok(config.agent.OpenHermes)
-    assert.equal(config.default_agent, "OpenHermes")
-    assert.ok(config.instructions.some(p => p.endsWith(path.join("harness", "codex", "CONSTITUTION.md"))))
-    assert.ok(config.instructions.some(p => p.endsWith(path.join("harness", "instructions", "RUNTIME.md"))))
-  })
-  it("loads markdown manifests into command and agent config", async () => {
-    const plugin = await mod.BootstrapPlugin({ directory: __dirname })
-    const config = { skills: { paths: [] }, command: {}, agent: {}, instructions: [] }
-    await plugin.config(config)
-    assert.match(config.command["oh-doctor"].template, /Inspect the current OpenHermes\/OpenCode setup/)
-    assert.equal(config.command["oh-doctor"].agent, "OpenHermes")
-    assert.match(config.agent.OpenHermes.prompt, /You are OpenHermes, the primary orchestrator/)
-    assert.equal(config.agent.OpenHermes.mode, "primary")
-  })
-  it("injects bootstrap text only once", async () => {
-    const plugin = await mod.BootstrapPlugin({ directory: __dirname })
-    const output = {
-      messages: [
-        {
-          info: { role: "user" },
-          parts: [
-            { type: "text", text: "actual user request" },
-          ],
-        },
-      ],
-    }
-    await plugin["experimental.chat.messages.transform"]({}, output)
-    await plugin["experimental.chat.messages.transform"]({}, output)
-    assert.match(output.messages[0].parts[0].text, /OPENHERMES_BOOTSTRAP/)
-    assert.match(output.messages[0].parts[1].text, /actual user request/)
-    assert.equal(
-      output.messages[0].parts.filter(part => typeof part.text === "string" && part.text.includes("OPENHERMES_BOOTSTRAP")).length,
-      1,
-    )
-  })
-})

package/test/plugins.test.mjs DELETED Viewed

@@ -1,62 +0,0 @@
-import { describe, it, before } from "node:test"
-import assert from "node:assert/strict"
-import fs from "node:fs"
-import os from "node:os"
-import path from "node:path"
-describe("plugin exports", () => {
-  it("index.mjs default exports plugin", async () => {
-    const pkg = await import("../index.mjs")
-    assert.ok(typeof pkg.default === "function")
-  })
-  it("bootstrap.mjs exports BootstrapPlugin", async () => {
-    const mod = await import("../bootstrap.mjs")
-    assert.ok(typeof mod.BootstrapPlugin === "function")
-  })
-})
-describe("bootstrap helpers", () => {
-  let mod
-  before(async () => {
-    mod = await import("../bootstrap.mjs")
-  })
-  it("re-exports harness resolver helpers", async () => {
-    const { resolveHarnessRoot, setHarnessRootForTest, getHarnessDir } = mod
-    assert.ok(typeof resolveHarnessRoot === "function")
-    assert.ok(typeof setHarnessRootForTest === "function")
-    assert.ok(typeof getHarnessDir === "function")
-  })
-  it("resolveHarnessRoot picks complete harness root", async () => {
-    const tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), "openhermes-harness-"))
-    const badRoot = path.join(tmpRoot, "bad")
-    const goodRoot = path.join(tmpRoot, "good")
-    fs.mkdirSync(path.join(badRoot, "codex"), { recursive: true })
-    fs.writeFileSync(path.join(badRoot, "codex", "CONSTITUTION.md"), "# incomplete\n")
-    const requiredFiles = [
-      ["codex", "CONSTITUTION.md"],
-      ["instructions", "RUNTIME.md"],
-      ["skills", "oh-plan", "SKILL.md"],
-    ]
-    for (const parts of requiredFiles) {
-      const filePath = path.join(goodRoot, ...parts)
-      fs.mkdirSync(path.dirname(filePath), { recursive: true })
-      fs.writeFileSync(filePath, "ok\n")
-    }
-    const resolved = mod.resolveHarnessRoot({ candidateRoots: [badRoot, goodRoot] })
-    assert.equal(resolved, goodRoot)
-  })
-  it("setHarnessRootForTest overrides harness resolution", async () => {
-    mod.setHarnessRootForTest("/custom/harness")
-    assert.equal(mod.getHarnessDir(), "/custom/harness")
-    mod.setHarnessRootForTest(undefined)
-  })
-})