@loicngr/kobo 1.7.6 → 1.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/AGENTS.md +29 -0
  2. package/README.md +146 -4
  3. package/dist/mcp-server/kobo-tasks-server.js +27 -0
  4. package/dist/server/index.js +2 -0
  5. package/dist/server/routes/health.js +14 -0
  6. package/dist/server/routes/voice.js +149 -0
  7. package/dist/server/routes/workspaces.js +33 -9
  8. package/dist/server/services/agent/engines/claude-code/capabilities.js +7 -0
  9. package/dist/server/services/agent/engines/codex/capabilities.js +18 -0
  10. package/dist/server/services/agent/engines/codex/client.js +36 -0
  11. package/dist/server/services/agent/engines/codex/engine.js +276 -0
  12. package/dist/server/services/agent/engines/codex/event-mapper.js +473 -0
  13. package/dist/server/services/agent/engines/codex/jsonrpc/peer.js +60 -0
  14. package/dist/server/services/agent/engines/codex/jsonrpc/transport.js +31 -0
  15. package/dist/server/services/agent/engines/codex/options-builder.js +81 -0
  16. package/dist/server/services/agent/engines/codex/protocol/types.js +11 -0
  17. package/dist/server/services/agent/engines/codex/server-requests.js +99 -0
  18. package/dist/server/services/agent/engines/codex/spawn.js +27 -0
  19. package/dist/server/services/agent/engines/registry.js +2 -0
  20. package/dist/server/services/agent/orchestrator.js +1 -1
  21. package/dist/server/services/settings-service.js +125 -6
  22. package/dist/server/services/transcription-service.js +206 -0
  23. package/dist/server/utils/paths.js +7 -0
  24. package/dist/shared/codex-models.js +43 -0
  25. package/package.json +13 -10
  26. package/src/client/dist/spa/assets/ActivityFeed-CPZdjJpH.js +8 -0
  27. package/src/client/dist/spa/assets/{ActivityFeed-tE4LVYck.css → ActivityFeed-WjiQ9716.css} +1 -1
  28. package/src/client/dist/spa/assets/{ClosePopup-D_UAdwkA.js → ClosePopup-C5JlH6Hy.js} +1 -1
  29. package/src/client/dist/spa/assets/CreatePage-CdfbFlXf.js +2 -0
  30. package/src/client/dist/spa/assets/CreatePage-ZyBHUbl0.css +1 -0
  31. package/src/client/dist/spa/assets/{DiffViewer-CblFgn8w.js → DiffViewer-DkiP6nWz.js} +3 -3
  32. package/src/client/dist/spa/assets/HealthPage-BHGZJTgS.js +1 -0
  33. package/src/client/dist/spa/assets/{MainLayout-DhaYycak.js → MainLayout-C0tClQZl.js} +17 -17
  34. package/src/client/dist/spa/assets/{MainLayout-drolsINz.css → MainLayout-DKnTGN_Q.css} +1 -1
  35. package/src/client/dist/spa/assets/{QBadge-DWH42dbo.js → QBadge-C7r6oPSi.js} +1 -1
  36. package/src/client/dist/spa/assets/{QBtn-a6jxWjmW.js → QBtn-DEuWKHbR.js} +1 -1
  37. package/src/client/dist/spa/assets/{QCheckbox-D5jfsxLV.js → QCheckbox-BvHfXBFY.js} +1 -1
  38. package/src/client/dist/spa/assets/{QChip-ByxK0Tuf.js → QChip-erWIZgxW.js} +1 -1
  39. package/src/client/dist/spa/assets/{QExpansionItem-CH1ipL9n.js → QExpansionItem-CW6sPoP9.js} +1 -1
  40. package/src/client/dist/spa/assets/QIcon-qfJNZLIW.js +1 -0
  41. package/src/client/dist/spa/assets/{QInput-Cm5-AGQ4.js → QInput-DCJEwE8V.js} +1 -1
  42. package/src/client/dist/spa/assets/{QItemLabel-DrTxqTqV.js → QItemLabel-CHkgkZVj.js} +1 -1
  43. package/src/client/dist/spa/assets/{QItemSection-5YpFpPDm.js → QItemSection-CQUDd0Vg.js} +1 -1
  44. package/src/client/dist/spa/assets/{QList-D0FtnQJI.js → QList-BbnN_oNX.js} +1 -1
  45. package/src/client/dist/spa/assets/{QMenu-B4xMxMGd.js → QMenu-CaVfoMu6.js} +1 -1
  46. package/src/client/dist/spa/assets/{QPage-DFi3K093.js → QPage-Co2h9wd_.js} +1 -1
  47. package/src/client/dist/spa/assets/{QRadio-B3aKjCVu.js → QRadio-DJxOyOA3.js} +1 -1
  48. package/src/client/dist/spa/assets/QSpace-DKIph84L.js +1 -0
  49. package/src/client/dist/spa/assets/{QSpinnerDots-CszPQQ9J.js → QSpinnerDots-Bfl2RMy4.js} +1 -1
  50. package/src/client/dist/spa/assets/{QTabPanels-D2ks0UIA.js → QTabPanels-E66qDYmr.js} +1 -1
  51. package/src/client/dist/spa/assets/{QToggle-1-N9qWq4.js → QToggle-DNOTC_3a.js} +1 -1
  52. package/src/client/dist/spa/assets/{QTooltip-fDNzBEfN.js → QTooltip-DYey0zHV.js} +1 -1
  53. package/src/client/dist/spa/assets/{SearchPage-cZTwP4Lf.js → SearchPage-BaI3iU58.js} +1 -1
  54. package/src/client/dist/spa/assets/SettingsPage-BqBOQKeM.js +9 -0
  55. package/src/client/dist/spa/assets/SettingsPage-Zeu2cZqi.css +1 -0
  56. package/src/client/dist/spa/assets/{TouchPan-DoE24Io3.js → TouchPan-DQILDzd3.js} +1 -1
  57. package/src/client/dist/spa/assets/WorkspacePage-C9eT5LAo.css +1 -0
  58. package/src/client/dist/spa/assets/WorkspacePage-DqMyUSFG.js +4 -0
  59. package/src/client/dist/spa/assets/{build-path-tree-B1Lvvqto.js → build-path-tree-BpcCBm9A.js} +1 -1
  60. package/src/client/dist/spa/assets/{cssMode-BFLYiiEw.js → cssMode-BaeNVqUm.js} +1 -1
  61. package/src/client/dist/spa/assets/{documents-kx0vLfSG.js → documents-soWtna0O.js} +1 -1
  62. package/src/client/dist/spa/assets/{editor.api-2asmmhth.js → editor.api-DMLl_PBy.js} +1 -1
  63. package/src/client/dist/spa/assets/{editor.main-ChCYZyez.js → editor.main-D2pRsQAX.js} +3 -3
  64. package/src/client/dist/spa/assets/{AutoLoopChip-w8D77bI5.js → engineFeatures-RffgP255.js} +1 -1
  65. package/src/client/dist/spa/assets/{expand-template-CXQFkQOJ.js → expand-template-z2wIJOD2.js} +1 -1
  66. package/src/client/dist/spa/assets/{formatters-DCAQ6ANJ.js → formatters-guwb-rzl.js} +1 -1
  67. package/src/client/dist/spa/assets/{freemarker2-BaBL9E9G.js → freemarker2-Bh6ItnVy.js} +1 -1
  68. package/src/client/dist/spa/assets/{handlebars-BxDour4L.js → handlebars-D8OXeysi.js} +1 -1
  69. package/src/client/dist/spa/assets/{html-C6hnkfIL.js → html-9Y1AHhvw.js} +1 -1
  70. package/src/client/dist/spa/assets/{htmlMode-9zT3-dmz.js → htmlMode-z00se0fQ.js} +1 -1
  71. package/src/client/dist/spa/assets/i18n-C-VMW7h5.js +1 -0
  72. package/src/client/dist/spa/assets/index-BLlWqEZC.js +2 -0
  73. package/src/client/dist/spa/assets/{javascript-C3YjvKbE.js → javascript-D0LSb7WU.js} +1 -1
  74. package/src/client/dist/spa/assets/{jsonMode-DcJDgMzf.js → jsonMode-BSmyaoX3.js} +1 -1
  75. package/src/client/dist/spa/assets/{liquid-CsT8SjJM.js → liquid-BsY5UXNl.js} +1 -1
  76. package/src/client/dist/spa/assets/{mdx-CT3yVSyc.js → mdx-BUcXih4e.js} +1 -1
  77. package/src/client/dist/spa/assets/{monaco.contribution-DKGNz1oQ.js → monaco.contribution-DrpufOT3.js} +2 -2
  78. package/src/client/dist/spa/assets/{notifications-OnPq4FrH.js → notifications-C255ApfS.js} +1 -1
  79. package/src/client/dist/spa/assets/permissionModes-BocOmzU8.js +1 -0
  80. package/src/client/dist/spa/assets/{purify.es-CPieV82n.js → purify.es-aV6SU8N4.js} +1 -1
  81. package/src/client/dist/spa/assets/{python-Ca5miKgj.js → python-C0PoB7M8.js} +1 -1
  82. package/src/client/dist/spa/assets/{razor-7qzusGRc.js → razor-Bu0-fwxD.js} +1 -1
  83. package/src/client/dist/spa/assets/{render-chat-markdown-Bqq2G-yI.js → render-chat-markdown-DALCdDVE.js} +1 -1
  84. package/src/client/dist/spa/assets/runtime-core.esm-bundler-9Z0QAO_7.js +1 -0
  85. package/src/client/dist/spa/assets/{tsMode-BdvO8jZ2.js → tsMode-Blc1d2dp.js} +1 -1
  86. package/src/client/dist/spa/assets/{typescript-BfVNzhgs.js → typescript-CV4ME9fo.js} +1 -1
  87. package/src/client/dist/spa/assets/{use-checkbox-D7zmRxGI.js → use-checkbox-y_fOkYZN.js} +1 -1
  88. package/src/client/dist/spa/assets/{use-id-CuaR1RiE.js → use-id-_7wiRcgb.js} +1 -1
  89. package/src/client/dist/spa/assets/{use-panel-D-8nAQns.js → use-panel-DCPiSURS.js} +1 -1
  90. package/src/client/dist/spa/assets/use-quasar-DQYS47mh.js +1 -0
  91. package/src/client/dist/spa/assets/{vue-i18n-BcfTCFFS.js → vue-i18n-DI-gS-CC.js} +1 -1
  92. package/src/client/dist/spa/assets/{xml-DGNXGqXL.js → xml-DLYRBBbI.js} +1 -1
  93. package/src/client/dist/spa/assets/{yaml-CtAtOyt5.js → yaml-QIBjI5Dl.js} +1 -1
  94. package/src/client/dist/spa/index.html +12 -12
  95. package/src/mcp-server/kobo-tasks-server.ts +27 -0
  96. package/src/client/dist/spa/assets/ActivityFeed-BboSPm4b.js +0 -7
  97. package/src/client/dist/spa/assets/CreatePage-BDObLDJc.js +0 -2
  98. package/src/client/dist/spa/assets/CreatePage-DssmsAsV.css +0 -1
  99. package/src/client/dist/spa/assets/HealthPage-CBSw7e5q.js +0 -1
  100. package/src/client/dist/spa/assets/QIcon-BJuyqdsT.js +0 -1
  101. package/src/client/dist/spa/assets/QSpace-CLtL3aPy.js +0 -1
  102. package/src/client/dist/spa/assets/SettingsPage-C1efO0VM.js +0 -1
  103. package/src/client/dist/spa/assets/SettingsPage-CMyeQ9_u.css +0 -1
  104. package/src/client/dist/spa/assets/WorkspacePage-3jcof896.js +0 -4
  105. package/src/client/dist/spa/assets/WorkspacePage-CCtIrBiR.css +0 -1
  106. package/src/client/dist/spa/assets/i18n-CLY0XI9-.js +0 -1
  107. package/src/client/dist/spa/assets/index-D6wj_wQ9.js +0 -2
  108. package/src/client/dist/spa/assets/models-BsjWUKqM.js +0 -1
  109. package/src/client/dist/spa/assets/runtime-core.esm-bundler-C3IgBgY5.js +0 -1
  110. package/src/client/dist/spa/assets/use-quasar-Sdcq6zzV.js +0 -1
package/AGENTS.md CHANGED
@@ -171,6 +171,35 @@ When adding features touching `notion-service.ts`, remember: **no token = no fea
171
171
 
172
172
  See the "Notion integration" section of the README for the end-user setup guide.
173
173
 
174
+ ### Agent engines
175
+
176
+ Two engines live under `src/server/services/agent/engines/`, both implementing the `AgentEngine` contract in `types.ts`:
177
+
178
+ **Claude Code** (`claude-code/`) — uses `@anthropic-ai/claude-agent-sdk` (in-process async iterator). Spawns no subprocess. Auth via `~/.claude.json` or `ANTHROPIC_API_KEY` env var.
179
+
180
+ **OpenAI Codex** (`codex/`) — uses the **`codex app-server` JSON-RPC protocol** (line-delimited JSON over stdio with a long-lived `codex` subprocess). The engine layers are:
181
+ - `jsonrpc/transport.ts` + `jsonrpc/peer.ts` — generic JSON-RPC 2.0 stdio peer (request correlation, notifications, server-initiated requests)
182
+ - `client.ts` — typed `AppServerClient` wrapping the peer (initialize / thread.start / thread.resume / turn.start / turn.interrupt)
183
+ - `protocol/types.ts` — hand-written subset of the Codex v2 protocol types (camelCase field names — `agentMessage`, `commandExecution`, etc.). The full canonical bindings are generated by `codex app-server generate-ts` if the protocol drifts.
184
+ - `event-mapper.ts` — translates app-server notifications (`item/started`, `item/completed`, `item/agentMessage/delta`, `turn/completed`, `thread/tokenUsage/updated`, `account/rateLimits/updated`, `error`) into Kōbō `AgentEvent` union
185
+ - `server-requests.ts` — handles server-initiated approval/elicitation requests (`item/commandExecution/requestApproval`, `item/fileChange/requestApproval`, `item/tool/requestUserInput`, `item/permissions/requestApproval`, plus v1 legacy aliases `execCommandApproval` / `applyPatchApproval`)
186
+ - `engine.ts` — `createCodexEngine()` factory wiring everything into `AgentEngine`
187
+ - `spawn.ts` — locates the `codex` binary via `@openai/codex` dependency and spawns `codex app-server`
188
+
189
+ Auth: delegated to the `codex` CLI which reads `OPENAI_API_KEY` from env or `~/.codex/auth.json`. Kōbō ships no Codex credentials. The `@openai/codex` package (binary) is a direct dependency.
190
+
191
+ Background: the engine was migrated from `@openai/codex-sdk` (one-shot `codex exec`) to `codex app-server` in May 2026 to unlock features the SDK didn't surface (sub-agent visibility, interactive approvals, `request_user_input`, structured rate limits). The original migration plan and wire-capture notes live at `docs/superpowers/plans/2026-05-11-codex-app-server-migration.md` and `2026-05-11-codex-app-server-wire-capture.md`.
192
+
193
+ **Protocol gotchas worth remembering** (post-migration findings):
194
+
195
+ - **`experimentalApi: true` is mandatory in the `initialize` handshake.** Without it, any turn using experimental fields — most importantly `turn/start.collaborationMode` — is rejected with `-32600: requires experimentalApi capability`. See `client.ts:connect()`.
196
+ - **`collaborationMode` is sticky server-side.** Once a turn ran in `mode: 'plan'`, every subsequent turn on the same thread stays in plan until we explicitly send `mode: 'default'` again. The engine therefore always emits the field on `turn/start` — never omits it — so a Plan → Bypass switch actually takes effect. Mapping: Kōbō `plan` → `plan`, every other Kōbō mode → `default`. Plan mode is the only one that unlocks Codex's internal `request_user_input` tool.
197
+ - **Permission mode vs collaboration mode are independent.** Sandbox + approvalPolicy control *what the agent may do at OS level* (read-only / workspace-write, never / on-request / unless-trusted). `collaborationMode` is a separate session-level flag that gates internal Codex behaviour (notably interactive Q&A). Kōbō hides both behind a single "permission mode" selector and maps them together.
198
+ - **Sub-agents map to `collabAgentToolCall`.** Codex's analogue of Claude's Task tool is `collabAgentToolCall` (`spawnAgent` / `sendInput` / `resumeAgent` / `wait` / `closeAgent`). The mapper emits **both** a `tool:call` named `Task` (chat card) and a `subagent:progress` event (right-hand panel) per call — same dual-emission Claude does. See `event-mapper.ts` `handleItemStarted` / `handleItemCompleted` for the `collabAgentToolCall` branch.
199
+ - **`fileChange` items carry a unified-diff blob.** The protocol shape is `{ path, kind: PatchChangeKind, diff: string }` per change; `kind` is a discriminated union, not a string. The mapper flattens the first change into a Claude-style Edit input (`{ file_path, diff, change_kind, move_path? }`) so the existing `ToolCallItem` renderer picks it up. The client parses the unified diff into `DiffLine[]` via `parseUnifiedDiff` in `inline-diff.ts`.
200
+ - **Streaming bursts trip auto-scroll.** Codex emits one `message:text` event per token-delta (50-200 per message), versus Claude which emits ~1 per content block. The naive `eventCount` watcher in `ActivityFeed.vue` triggered an animated `scrollToBottom(180)` per event, causing stacked animations and visible jank. The fix coalesces requests through `requestAnimationFrame` and only animates the *first* scroll after a quiet period — subsequent scrolls during a burst snap instantly.
201
+ - **`MCP tools` need `default_tools_approval_mode: 'auto'` in `config.mcp_servers`.** Without it Codex flags every MCP tool call as needing user approval ("user cancelled MCP tool call"). Kōbō trusts every tool it spawns, so the options-builder pre-approves the namespace.
202
+
174
203
  ## Code conventions
175
204
 
176
205
  **Service layer** throws descriptive errors; the route layer catches and maps to HTTP status codes. Error messages follow the pattern `` `Workspace '${id}' not found` `` / `` `... is already archived` ``.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Kōbō
2
2
 
3
- > **Kōbō** (工房) — Japanese for *workshop*. A multi-workspace agent manager for [Claude Code](https://claude.com/claude-code).
3
+ > **Kōbō** (工房) — Japanese for *workshop*. A multi-workspace agent manager for [Claude Code](https://claude.com/claude-code) and [OpenAI Codex](https://developers.openai.com/codex/) *(Codex support is still experimental — see [the section below](#openai-codex-integration))*.
4
4
 
5
5
  > [!NOTE]
6
6
  > 🚧 **Active development** — breaking changes may still land on `develop`. The database layer ships with forward-only migrations and a timestamped pre-migration backup of `kobo.db` before any schema change, so upgrades preserve your data even across invasive refactors.
@@ -12,7 +12,7 @@ Think of it as an apprentice's hall: you hand out missions, each apprentice sets
12
12
  ## Features
13
13
 
14
14
  - **Isolated git worktrees** — every workspace runs on its own branch in its own directory, with a configurable global worktrees root for new workspaces, so concurrent Claude sessions never step on each other
15
- - **Pluggable agent engine** — Kōbō talks to agents through an `AgentEngine` contract with a normalised `AgentEvent` stream (`src/server/services/agent/engines/`). The `claude-code` engine runs on the official [`@anthropic-ai/claude-agent-sdk`](https://github.com/anthropics/claude-agent-sdk-typescript); adding a second runtime (e.g. Codex) only requires a new adapter, not a rewrite of the UI or orchestration layer
15
+ - **Pluggable agent engine — two runtimes shipped** — Kōbō talks to agents through an `AgentEngine` contract with a normalised `AgentEvent` stream (`src/server/services/agent/engines/`). The `claude-code` engine runs on the official [`@anthropic-ai/claude-agent-sdk`](https://github.com/anthropics/claude-agent-sdk-typescript); the `codex` engine speaks the [`codex app-server`](https://github.com/openai/codex) JSON-RPC protocol over stdio with the official [`@openai/codex`](https://www.npmjs.com/package/@openai/codex) binary. Engine is chosen per-workspace at creation time, with a single normalised UI (sub-agents, tool calls, todos, reasoning, permission modes, MCP servers, auto-loop) covering both. Adding a third runtime only requires a new adapter, not a rewrite of the UI or orchestration layer
16
16
  - **Interactive `AskUserQuestion`** — when the agent invokes `AskUserQuestion`, Kōbō pauses the session via the SDK's `defer` pattern, surfaces a question panel in the UI, and resumes the agent once the user answers. The session does not occupy any resources while it waits
17
17
  - **Rich chat feed** — live streaming text, thinking blocks, inline tool calls with expandable diffs for Edit/Write, per-turn session cards, markdown rendering, jump-to-previous-user-message button, and infinite scroll-up over persisted history
18
18
  - **Task & acceptance criteria tracking** — the agent reports progress through a dedicated MCP server (`kobo-tasks`) that reads and updates tasks directly from the SQLite database
@@ -27,7 +27,7 @@ Think of it as an apprentice's hall: you hand out missions, each apprentice sets
27
27
  - **Prompt templates** — personal library of reusable prompts with variable substitution (`{working_branch}`, `{commit_count}`, etc.), insertable from the chat input via `/` autocomplete; editable in Settings > Templates
28
28
  - **Favorites and tags** — pin workspaces to the top via right-click favourite, organise with per-workspace tags filterable from the sidebar; a global tag catalogue keeps colours consistent across workspaces
29
29
  - **Health panel + config export/import** — inspect backend health (agent sessions, migration state, dev servers, DB size) and roundtrip your Kōbō config (settings, templates, skills) between machines via JSON
30
- - **Account-level quota panel** — a colored mini-bar badge in the chat footer shows the current Claude Code 5-hour and 7-day usage, fed by a backend service that polls Anthropic's OAuth usage endpoint every 60 seconds. Click to open a popover with full bars, reset times, a "Refresh now" button, and a one-click jump to the Stats tab. Pluggable per-provider (Codex-ready), persisted in SQLite so the badge is populated on cold start, and account-level so it's the same across workspaces sharing the same engine
30
+ - **Account-level quota panel** — a colored mini-bar badge in the chat footer shows the current Claude Code 5-hour and 7-day usage (Claude workspaces) or live Codex rate-limit buckets (Codex workspaces driven by the structured `account/rateLimits/updated` app-server notification). Click to open a popover with full bars, reset times, a "Refresh now" button, and a one-click jump to the Stats tab. Pluggable per-provider, persisted in SQLite so the badge is populated on cold start, and account-level so it's the same across workspaces sharing the same engine
31
31
  - **Resizable right drawer** — drag-to-resize horizontally and vertically, with tab state and split ratio persisted to localStorage
32
32
  - **Soft interrupt** — pause an agent mid-execution (SIGINT, like pressing Escape in Claude Code) without killing the process; the agent stops the current tool and waits for the next message
33
33
  - **Archive instead of delete** — soft-remove workspaces without losing the worktree, branches, or history; unarchive restores the exact pre-archive state
@@ -51,7 +51,9 @@ Think of it as an apprentice's hall: you hand out missions, each apprentice sets
51
51
  ### Prerequisites
52
52
 
53
53
  - Node.js ≥ 20
54
- - [Claude Code](https://claude.com/claude-code) authenticated via `claude /login` once. The `claude` CLI is **no longer required at runtime** — Kōbō embeds the official [`@anthropic-ai/claude-agent-sdk`](https://github.com/anthropics/claude-agent-sdk-typescript), which reuses the same login.
54
+ - At least one agent runtime, authenticated:
55
+ - [Claude Code](https://claude.com/claude-code) — `claude /login` once. The `claude` CLI is **no longer required at runtime** — Kōbō embeds the official [`@anthropic-ai/claude-agent-sdk`](https://github.com/anthropics/claude-agent-sdk-typescript), which reuses the same login.
56
+ - [OpenAI Codex](https://developers.openai.com/codex/) — `codex login` once, or export `OPENAI_API_KEY` in the env. See [OpenAI Codex integration](#openai-codex-integration). Workspaces pick an engine at creation time, so you only need to set up the one(s) you use.
55
57
  - Git
56
58
  - Optional: Docker (if you configure per-workspace dev servers)
57
59
  - Optional: `gh` CLI (if you use the PR automation)
@@ -155,6 +157,146 @@ If you need to pin a specific version of the Notion MCP server, use a fork, or a
155
157
 
156
158
  Without a valid token configured, the Notion import field in the workspace creation form will return an error when you click **Refresh** or submit a Notion URL — the rest of Kōbō (workspaces, agents, tasks, Git integration) keeps working independently.
157
159
 
160
+ ## OpenAI Codex integration
161
+
162
+ > [!WARNING]
163
+ > 🧪 **Experimental** — the Codex engine has shipped but is still maturing. The Claude Code engine remains the primary, battle-tested path. Expect occasional rough edges on Codex-only flows (tool rendering for less common item types, sub-agent interactions, edge cases around `collaborationMode` and approval prompts). Bugs and feedback welcome on the issue tracker.
164
+
165
+ Kōbō ships a second agent engine that runs on top of the official **OpenAI Codex** CLI. Pick `OpenAI Codex` in the engine selector when you create a workspace and the agent talks to the `codex` binary instead of Claude Code, with the same UI surface: streaming text, reasoning blocks, tool cards (Bash, Edit, Read, WebSearch, MCP tools, ImageGeneration), sub-agents (Codex's `collabAgentToolCall` family is mapped onto the same `Task` panel Claude's Task tool feeds), todo list, permission modes, interactive approvals, structured rate limits, auto-loop. **This feature is opt-in and requires you to authenticate the `codex` CLI separately from Claude Code** — Kōbō ships no OpenAI credentials.
166
+
167
+ Under the hood, Kōbō spawns a long-lived `codex app-server` subprocess per workspace and speaks the [Codex app-server JSON-RPC protocol](https://github.com/openai/codex/tree/main/codex-rs/app-server) over stdio. The `codex` binary is pulled in via the [`@openai/codex`](https://www.npmjs.com/package/@openai/codex) npm package, which is a direct dependency — no separate install required.
168
+
169
+ ### Authenticating the Codex CLI
170
+
171
+ Two paths, pick one:
172
+
173
+ 1. **`codex login` (recommended)** — run `codex login` once. The CLI writes a token to `~/.codex/auth.json` which Kōbō's spawned `codex app-server` reuses automatically:
174
+
175
+ ```bash
176
+ codex login
177
+ ```
178
+
179
+ 2. **`OPENAI_API_KEY` env var** — set the variable before launching Kōbō:
180
+
181
+ ```bash
182
+ OPENAI_API_KEY=sk-your-key-here PORT=9999 npx @loicngr/kobo@latest
183
+ ```
184
+
185
+ Kōbō does not store or proxy the key. If you change the credential or revoke it, Kōbō follows automatically on the next session start.
186
+
187
+ ### Permission modes (Codex)
188
+
189
+ Kōbō's four permission modes (`plan` / `bypass` / `strict` / `interactive`) map to Codex's `sandbox` + `approvalPolicy` pair, plus a separate `collaborationMode` flag that gates interactive questions:
190
+
191
+ | Kōbō mode | Codex sandbox | Codex approvalPolicy | Codex collaborationMode | Effect |
192
+ |---|---|---|---|---|
193
+ | `plan` | `read-only` | `never` | `plan` | Read-only sandbox + the agent can ask interactive questions (`request_user_input`) |
194
+ | `bypass` | `workspace-write` | `never` | `default` | Full autonomy in the worktree, no approvals |
195
+ | `strict` | `workspace-write` | `on-request` | `default` | Writes allowed, approval prompted on sensitive commands |
196
+ | `interactive` | `workspace-write` | `unless-trusted` | `default` | Writes allowed, approval prompted on every untrusted action |
197
+
198
+ Interactive Q&A (`request_user_input`) is only available in `plan` — this is a constraint of Codex itself, not Kōbō. The typical workflow is: brainstorm in `plan` until the agent has the context it needs, then switch to `bypass`/`strict` for execution.
199
+
200
+ ### Models and reasoning effort
201
+
202
+ The Codex engine exposes the OpenAI model catalogue (`gpt-5-codex`, `gpt-5.4`, `o4-mini`, `o3`) and the standard reasoning-effort scale (`auto` / `minimal` / `low` / `medium` / `high` / `xhigh`). Both selectors switch automatically when you flip the workspace's engine.
203
+
204
+ ### Sub-agents
205
+
206
+ When the Codex agent uses its `spawnAgent` collab tool, Kōbō renders a **Task** card in the chat (like Claude's Task tool) and a live entry in the **SUB-AGENTS** panel of the right drawer — same plumbing the Claude engine uses. The same panel is hidden for engines that don't expose sub-agents.
207
+
208
+ ### MCP servers
209
+
210
+ The `kobo-tasks` MCP server (and any other MCP server you configure on the workspace) is plumbed into Codex through the standard `config.mcp_servers` entry. Tool calls under those servers are pre-approved (`default_tools_approval_mode: 'auto'`) so the agent doesn't get blocked on every call.
211
+
212
+ ### When the binary is missing
213
+
214
+ Without a working `codex` install or a valid credential, creating a `codex`-engine workspace returns a clear error at first turn and the workspace transitions to `error` status. The rest of Kōbō (Claude-engine workspaces, tasks, Git, dev servers) keeps working independently.
215
+
216
+ ## Voice transcription (local Whisper)
217
+
218
+ Kōbō supports local voice transcription with push-to-talk in both:
219
+
220
+ - `WorkspacePage` (chat input)
221
+ - `CreatePage` (workspace instructions textarea)
222
+
223
+ ### Requirements
224
+
225
+ - `whisper-cli` from [`whisper.cpp`](https://github.com/ggml-org/whisper.cpp)
226
+ - `ffmpeg`
227
+ - `cmake` (required to build `whisper.cpp` from source)
228
+ - At least one Whisper model downloaded from **Settings → Voice**
229
+
230
+ ### Install `whisper.cpp` (Linux/macOS)
231
+
232
+ ```bash
233
+ git clone https://github.com/ggml-org/whisper.cpp.git
234
+ cd whisper.cpp
235
+ cmake -B build
236
+ cmake --build build -j
237
+ ```
238
+
239
+ This usually produces `build/bin/whisper-cli`.
240
+
241
+ You can also download a prebuilt archive from the `whisper.cpp` releases page (for example: <https://github.com/ggml-org/whisper.cpp/releases/tag/v1.8.4>) and point Kōbō to the extracted `whisper-cli` binary path.
242
+
243
+ ### Install `ffmpeg`
244
+
245
+ Ubuntu / Debian:
246
+
247
+ ```bash
248
+ sudo apt update
249
+ sudo apt install -y cmake build-essential ffmpeg
250
+ ```
251
+
252
+ Windows:
253
+
254
+ - Install `ffmpeg` (for example via Chocolatey: `choco install ffmpeg`, or via Scoop: `scoop install ffmpeg`)
255
+ - Verify in PowerShell:
256
+
257
+ ```powershell
258
+ where ffmpeg
259
+ ffmpeg -version
260
+ ```
261
+
262
+ ### Windows notes for `whisper.cpp`
263
+
264
+ Install CMake and Visual Studio Build Tools (C/C++), then build `whisper.cpp` (or use a prebuilt `whisper-cli`), then verify:
265
+
266
+ ```powershell
267
+ where whisper-cli
268
+ whisper-cli -h
269
+ ```
270
+
271
+ ### Configure in Kōbō
272
+
273
+ Open **Settings → Voice**:
274
+
275
+ - Enable voice transcription
276
+ - Optionally set:
277
+ - **Whisper binary path (optional)**
278
+ - **ffmpeg binary path (optional)**
279
+ - If left empty, Kōbō falls back to:
280
+ - `whisper-cli` from `PATH` (or `WHISPER_CPP_COMMAND` if set)
281
+ - `ffmpeg` from `PATH`
282
+ - Download a model (e.g. `base`) and select it as active
283
+
284
+ The Voice panel shows runtime status (`ready/missing`) for both Whisper and ffmpeg so setup issues are visible immediately.
285
+
286
+ ### Advanced voice parameters
287
+
288
+ Kōbō exposes additional transcription settings in **Settings → Voice**:
289
+
290
+ - **Temperature** (`0..1`) — decoding stability vs flexibility
291
+ - **Initial prompt** — optional context/jargon for better recognition
292
+ - **Translate to English** — translate non-English speech to English
293
+ - **Suppress non-speech tokens** — reduce non-speech artifacts in output
294
+
295
+ Recommended defaults by model:
296
+
297
+ - `tiny` / `base` → `0.1`
298
+ - `small` / `medium` / `large-v3` → `0.2`
299
+
158
300
  ## Sentry integration
159
301
 
160
302
  Kōbō can turn a Sentry issue into a dedicated "fix workspace" — you paste the issue URL at workspace creation and Kōbō extracts the stacktrace, culprit, tags, offending spans and extra context, writes them as a local markdown file inside the worktree (`.ai/thoughts/SENTRY-<id>.md`), and primes the Claude agent with a TDD fix workflow that points at that file. The agent also keeps access to the Sentry MCP tools (`search_issue_events`, `get_issue_tag_values`, `get_sentry_resource`) so it can dig deeper on its own. **This feature is opt-in and reuses the Sentry MCP configuration you already have for Claude Code** — Kōbō does not manage a Sentry token separately.
@@ -119,6 +119,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
119
119
  name: 'list_tasks',
120
120
  description: 'CALL FIRST on any non-trivial turn to know what the user wants done and what is already completed. Returns every task and acceptance criterion for the current workspace with its id and status. Re-call periodically (before marking something done, or after the user asks for a status) to stay in sync with user-added or external updates.',
121
121
  inputSchema: { type: 'object', properties: {}, required: [] },
122
+ annotations: { readOnlyHint: true, openWorldHint: false },
122
123
  },
123
124
  {
124
125
  name: 'mark_task_done',
@@ -130,11 +131,13 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
130
131
  },
131
132
  required: ['task_id'],
132
133
  },
134
+ annotations: { destructiveHint: false, openWorldHint: false },
133
135
  },
134
136
  {
135
137
  name: 'mark_auto_loop_ready',
136
138
  description: 'CALL ONLY at the end of a `/kobo-prep-autoloop` grooming session, once all tasks look atomic and implementable in one session. Flips a flag on the workspace that unlocks the auto-loop toggle in the UI. Do NOT call during normal sessions.',
137
139
  inputSchema: { type: 'object', properties: {}, required: [] },
140
+ annotations: { destructiveHint: false, openWorldHint: false },
138
141
  },
139
142
  {
140
143
  name: 'create_task',
@@ -150,6 +153,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
150
153
  },
151
154
  required: ['title'],
152
155
  },
156
+ annotations: { destructiveHint: false, openWorldHint: false },
153
157
  },
154
158
  {
155
159
  name: 'update_task',
@@ -171,6 +175,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
171
175
  },
172
176
  required: ['task_id'],
173
177
  },
178
+ annotations: { destructiveHint: false, openWorldHint: false },
174
179
  },
175
180
  {
176
181
  name: 'delete_task',
@@ -182,11 +187,13 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
182
187
  },
183
188
  required: ['task_id'],
184
189
  },
190
+ annotations: { destructiveHint: false, openWorldHint: false },
185
191
  },
186
192
  {
187
193
  name: 'get_workspace_info',
188
194
  description: 'CALL EARLY in a session to confirm project path, working/source branch, worktree path, model, and notion link. Cheap read — useful when the user refers to "this workspace" or when you need the worktree path to locate files.',
189
195
  inputSchema: { type: 'object', properties: {}, required: [] },
196
+ annotations: { readOnlyHint: true, openWorldHint: false },
190
197
  },
191
198
  {
192
199
  name: 'set_workspace_agent_description',
@@ -201,6 +208,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
201
208
  },
202
209
  required: ['description'],
203
210
  },
211
+ annotations: { destructiveHint: false, openWorldHint: false },
204
212
  },
205
213
  {
206
214
  name: 'cron_create',
@@ -232,6 +240,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
232
240
  },
233
241
  required: ['expression', 'prompt'],
234
242
  },
243
+ annotations: { destructiveHint: false, openWorldHint: false },
235
244
  },
236
245
  {
237
246
  name: 'cron_delete',
@@ -243,16 +252,19 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
243
252
  },
244
253
  required: ['id'],
245
254
  },
255
+ annotations: { destructiveHint: false, openWorldHint: false },
246
256
  },
247
257
  {
248
258
  name: 'cron_list',
249
259
  description: 'List all crons currently armed on THIS workspace, including their next and last fire times.',
250
260
  inputSchema: { type: 'object', properties: {}, additionalProperties: false },
261
+ annotations: { readOnlyHint: true, openWorldHint: false },
251
262
  },
252
263
  {
253
264
  name: 'get_git_info',
254
265
  description: 'CALL BEFORE creating a PR, committing in batches, or reporting progress to the user. Returns commit count ahead of source, files changed, insertions/deletions, and existing PR URL if any.',
255
266
  inputSchema: { type: 'object', properties: {}, required: [] },
267
+ annotations: { readOnlyHint: true, openWorldHint: false },
256
268
  },
257
269
  {
258
270
  name: 'set_workspace_status',
@@ -268,26 +280,31 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
268
280
  },
269
281
  required: ['status'],
270
282
  },
283
+ annotations: { destructiveHint: false, openWorldHint: false },
271
284
  },
272
285
  {
273
286
  name: 'get_notion_ticket',
274
287
  description: 'CALL when the user references "the ticket", "the Notion page", or when you need the source-of-truth text for the mission. Returns the Notion URL + locally-extracted ticket content from .ai/thoughts/.',
275
288
  inputSchema: { type: 'object', properties: {}, required: [] },
289
+ annotations: { readOnlyHint: true, openWorldHint: false },
276
290
  },
277
291
  {
278
292
  name: 'get_dev_server_status',
279
293
  description: 'CALL BEFORE asking the user whether the app is running, or when your change is dev-server-sensitive. Returns running/stopped/starting/error + URL, port, container names.',
280
294
  inputSchema: { type: 'object', properties: {}, required: [] },
295
+ annotations: { readOnlyHint: true, openWorldHint: false },
281
296
  },
282
297
  {
283
298
  name: 'start_dev_server',
284
299
  description: 'CALL WHEN the user asks you to test the running app and the dev server is stopped.',
285
300
  inputSchema: { type: 'object', properties: {}, required: [] },
301
+ annotations: { destructiveHint: false, openWorldHint: false },
286
302
  },
287
303
  {
288
304
  name: 'stop_dev_server',
289
305
  description: 'CALL WHEN the user explicitly asks to stop the dev server, or before destructive operations that require a clean boot.',
290
306
  inputSchema: { type: 'object', properties: {}, required: [] },
307
+ annotations: { destructiveHint: false, openWorldHint: false },
291
308
  },
292
309
  {
293
310
  name: 'get_dev_server_logs',
@@ -299,11 +316,13 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
299
316
  },
300
317
  required: [],
301
318
  },
319
+ annotations: { readOnlyHint: true, openWorldHint: false },
302
320
  },
303
321
  {
304
322
  name: 'list_workspace_images',
305
323
  description: 'CALL WHEN the user mentions "the screenshot", "the attached image", or when you need to reference a previously-uploaded image. Returns uid, originalName, relativePath, createdAt for every image in .ai/images/.',
306
324
  inputSchema: { type: 'object', properties: {}, required: [] },
325
+ annotations: { readOnlyHint: true, openWorldHint: false },
307
326
  },
308
327
  {
309
328
  name: 'get_settings',
@@ -318,12 +337,14 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
318
337
  },
319
338
  required: [],
320
339
  },
340
+ annotations: { readOnlyHint: true, openWorldHint: false },
321
341
  },
322
342
  // ── Knowledge / context tools ─────────────────────────────────────────────
323
343
  {
324
344
  name: 'list_documents',
325
345
  description: 'CALL EARLY on a new session to discover plans, specs, and thoughts previously written for this workspace. Recursively lists every .md under docs/plans/, docs/superpowers/, and .ai/thoughts/. Before writing a new plan, check if one already exists.',
326
346
  inputSchema: { type: 'object', properties: {}, required: [] },
347
+ annotations: { readOnlyHint: true, openWorldHint: false },
327
348
  },
328
349
  {
329
350
  name: 'read_document',
@@ -338,6 +359,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
338
359
  },
339
360
  required: ['path'],
340
361
  },
362
+ annotations: { readOnlyHint: true, openWorldHint: false },
341
363
  },
342
364
  {
343
365
  name: 'log_thought',
@@ -354,6 +376,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
354
376
  },
355
377
  required: ['title', 'content'],
356
378
  },
379
+ annotations: { destructiveHint: false, openWorldHint: false },
357
380
  },
358
381
  {
359
382
  name: 'search_codebase',
@@ -375,11 +398,13 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
375
398
  },
376
399
  required: ['query'],
377
400
  },
401
+ annotations: { readOnlyHint: true, openWorldHint: false },
378
402
  },
379
403
  {
380
404
  name: 'get_session_usage',
381
405
  description: 'CALL when you need to self-regulate on long missions — returns token/cost totals for the workspace lifetime and for the currently running agent_session. Useful before spawning heavy subagents or deep reasoning on already-expensive sessions.',
382
406
  inputSchema: { type: 'object', properties: {}, required: [] },
407
+ annotations: { readOnlyHint: true, openWorldHint: false },
383
408
  },
384
409
  {
385
410
  name: 'schedule_wakeup',
@@ -402,11 +427,13 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
402
427
  },
403
428
  required: ['delaySeconds', 'prompt'],
404
429
  },
430
+ annotations: { destructiveHint: false, openWorldHint: false },
405
431
  },
406
432
  {
407
433
  name: 'cancel_wakeup',
408
434
  description: 'CALL to cancel any pending wakeup on this workspace (e.g. the condition you were waiting on resolved early, or you decided not to continue). Idempotent — safe to call when nothing is pending.',
409
435
  inputSchema: { type: 'object', properties: {}, required: [] },
436
+ annotations: { destructiveHint: false, openWorldHint: false },
410
437
  },
411
438
  ],
412
439
  }));
@@ -19,6 +19,7 @@ import sentryRouter from './routes/sentry.js';
19
19
  import settingsRouter from './routes/settings.js';
20
20
  import templatesRouter from './routes/templates.js';
21
21
  import usageRoutes from './routes/usage.js';
22
+ import voiceRouter from './routes/voice.js';
22
23
  import workspacesRouter from './routes/workspaces.js';
23
24
  import { getAvailableSkills, reconcileOrphanSessions, restoreRetryCountsFromDb, sendMessage, setBackendPort, startAgent, startWatchdog, stopAgent, stopWatchdog, } from './services/agent/orchestrator.js';
24
25
  import * as autoLoopService from './services/auto-loop-service.js';
@@ -83,6 +84,7 @@ app.route('/api/search', searchRouter);
83
84
  app.route('/api/health', healthRouter);
84
85
  app.route('/api/engines', enginesRouter);
85
86
  app.route('/api/migration', migrationRouter);
87
+ app.route('/api/voice', voiceRouter);
86
88
  // Skills endpoint
87
89
  app.get('/api/skills', (c) => c.json(getAvailableSkills()));
88
90
  const PORT = parseInt(process.env.SERVER_PORT || process.env.PORT || '3000', 10);
@@ -3,6 +3,7 @@ import fs from 'node:fs';
3
3
  import { Hono } from 'hono';
4
4
  import { getDb } from '../db/index.js';
5
5
  import { SCHEMA_VERSION } from '../db/migrations.js';
6
+ import { resolveCodexBinary } from '../services/agent/engines/codex/spawn.js';
6
7
  import { getGlobalSettings, getProjectSettings, SETTINGS_SCHEMA_VERSION } from '../services/settings-service.js';
7
8
  import { getDbPath, getKoboHome } from '../utils/paths.js';
8
9
  import { slugifyProjectName } from '../utils/project-slug.js';
@@ -19,6 +20,18 @@ function checkClaudeCli() {
19
20
  return { available: false, version: null };
20
21
  }
21
22
  }
23
+ function checkCodexCli() {
24
+ try {
25
+ const bin = resolveCodexBinary();
26
+ const r = spawnSync(bin, ['--version'], { encoding: 'utf-8' });
27
+ if (r.error || r.status !== 0)
28
+ return { available: false, version: null };
29
+ return { available: true, version: (r.stdout ?? '').trim() || null };
30
+ }
31
+ catch {
32
+ return { available: false, version: null };
33
+ }
34
+ }
22
35
  function isProcessAlive(pid) {
23
36
  try {
24
37
  process.kill(pid, 0);
@@ -142,6 +155,7 @@ app.get('/report', (c) => {
142
155
  },
143
156
  settings: { schemaVersion: SETTINGS_SCHEMA_VERSION },
144
157
  claudeCli: checkClaudeCli(),
158
+ codexCli: checkCodexCli(),
145
159
  workspaces: {
146
160
  total: settingsRow.n,
147
161
  archived: archivedRow.n,
@@ -0,0 +1,149 @@
1
+ import { Hono } from 'hono';
2
+ import * as settingsService from '../services/settings-service.js';
3
+ import * as transcriptionService from '../services/transcription-service.js';
4
+ import * as workspaceService from '../services/workspace-service.js';
5
+ const app = new Hono();
6
+ const MAX_AUDIO_SIZE = 10 * 1024 * 1024;
7
+ const ALLOWED_AUDIO_MIME = new Set(['audio/webm', 'audio/ogg', 'audio/wav', 'audio/mpeg', 'audio/mp4']);
8
+ const LANGUAGE_RE = /^[a-z-]+$/i;
9
+ function isVoiceLikeError(err) {
10
+ if (!err || typeof err !== 'object')
11
+ return false;
12
+ const e = err;
13
+ return typeof e.message === 'string' && typeof e.code === 'string' && typeof e.status === 'number';
14
+ }
15
+ function toVoiceHttpStatus(status) {
16
+ return status === 400 ? 400 : 500;
17
+ }
18
+ async function parseAndTranscribeFromBody(c, config) {
19
+ const body = await c.req.parseBody();
20
+ const audio = body.audio;
21
+ const languageRaw = body.language;
22
+ const language = typeof languageRaw === 'string' && languageRaw.trim().length > 0 ? languageRaw.trim() : 'auto';
23
+ if (language !== 'auto' && (!LANGUAGE_RE.test(language) || language.length > 16)) {
24
+ return c.json({ error: `Invalid language '${language}'`, code: 'LANGUAGE_INVALID' }, 400);
25
+ }
26
+ if (!audio || !(audio instanceof File)) {
27
+ return c.json({ error: 'Missing audio field in multipart body', code: 'MIC_AUDIO_INVALID' }, 400);
28
+ }
29
+ if (!ALLOWED_AUDIO_MIME.has(audio.type)) {
30
+ return c.json({ error: `Unsupported audio type '${audio.type}'`, code: 'MIC_AUDIO_INVALID' }, 400);
31
+ }
32
+ const buffer = Buffer.from(await audio.arrayBuffer());
33
+ if (buffer.length === 0 || buffer.length > MAX_AUDIO_SIZE) {
34
+ return c.json({ error: 'Invalid audio size', code: 'MIC_AUDIO_INVALID' }, 400);
35
+ }
36
+ const result = await transcriptionService.transcribeAudio({
37
+ audioBuffer: buffer,
38
+ modelName: config.modelName,
39
+ language,
40
+ temperature: config.temperature,
41
+ prompt: config.prompt,
42
+ translateToEnglish: config.translateToEnglish,
43
+ suppressNonSpeechTokens: config.suppressNonSpeechTokens,
44
+ });
45
+ return c.json(result);
46
+ }
47
+ app.get('/models', (c) => {
48
+ try {
49
+ return c.json(transcriptionService.listVoiceModels());
50
+ }
51
+ catch (err) {
52
+ const message = err instanceof Error ? err.message : String(err);
53
+ return c.json({ error: message }, 500);
54
+ }
55
+ });
56
+ app.get('/runtime', async (c) => {
57
+ try {
58
+ const status = await transcriptionService.getVoiceRuntimeStatus();
59
+ return c.json(status);
60
+ }
61
+ catch (err) {
62
+ const message = err instanceof Error ? err.message : String(err);
63
+ return c.json({ error: message, code: 'VOICE_RUNTIME_CHECK_FAILED' }, 500);
64
+ }
65
+ });
66
+ app.post('/models/:name/download', async (c) => {
67
+ try {
68
+ const name = c.req.param('name');
69
+ const result = await transcriptionService.downloadVoiceModel(name);
70
+ return c.json(result, 201);
71
+ }
72
+ catch (err) {
73
+ if (err instanceof transcriptionService.VoiceError || isVoiceLikeError(err)) {
74
+ return c.json({ error: err.message, code: err.code }, toVoiceHttpStatus(err.status));
75
+ }
76
+ const message = err instanceof Error ? err.message : String(err);
77
+ return c.json({ error: message, code: 'MODEL_DOWNLOAD_FAILED' }, 500);
78
+ }
79
+ });
80
+ app.delete('/models/:name', (c) => {
81
+ try {
82
+ const name = c.req.param('name');
83
+ transcriptionService.deleteVoiceModel(name);
84
+ return c.body(null, 204);
85
+ }
86
+ catch (err) {
87
+ if (err instanceof transcriptionService.VoiceError || isVoiceLikeError(err)) {
88
+ return c.json({ error: err.message, code: err.code }, toVoiceHttpStatus(err.status));
89
+ }
90
+ const message = err instanceof Error ? err.message : String(err);
91
+ return c.json({ error: message, code: 'MODEL_DELETE_FAILED' }, 500);
92
+ }
93
+ });
94
+ app.post('/workspaces/:id/transcribe', async (c) => {
95
+ try {
96
+ const id = c.req.param('id');
97
+ const workspace = workspaceService.getWorkspace(id);
98
+ if (!workspace)
99
+ return c.json({ error: `Workspace '${id}' not found` }, 404);
100
+ const global = settingsService.getGlobalSettings();
101
+ if (!global.voiceEnabled) {
102
+ return c.json({ error: 'Voice transcription is disabled', code: 'VOICE_DISABLED' }, 400);
103
+ }
104
+ if (!global.voiceModel) {
105
+ return c.json({ error: 'No voice model configured', code: 'MODEL_NOT_CONFIGURED' }, 400);
106
+ }
107
+ return await parseAndTranscribeFromBody(c, {
108
+ modelName: global.voiceModel,
109
+ temperature: global.voiceTemperature,
110
+ prompt: global.voicePrompt,
111
+ translateToEnglish: global.voiceTranslateToEnglish,
112
+ suppressNonSpeechTokens: global.voiceSuppressNonSpeechTokens,
113
+ });
114
+ }
115
+ catch (err) {
116
+ if (err instanceof transcriptionService.VoiceError || isVoiceLikeError(err)) {
117
+ return c.json({ error: err.message, code: err.code }, toVoiceHttpStatus(err.status));
118
+ }
119
+ const message = err instanceof Error ? err.message : String(err);
120
+ return c.json({ error: message, code: 'TRANSCRIPTION_FAILED' }, 500);
121
+ }
122
+ });
123
+ // Draft transcription endpoint used before a workspace exists (Create page).
124
+ app.post('/transcribe', async (c) => {
125
+ try {
126
+ const global = settingsService.getGlobalSettings();
127
+ if (!global.voiceEnabled) {
128
+ return c.json({ error: 'Voice transcription is disabled', code: 'VOICE_DISABLED' }, 400);
129
+ }
130
+ if (!global.voiceModel) {
131
+ return c.json({ error: 'No voice model configured', code: 'MODEL_NOT_CONFIGURED' }, 400);
132
+ }
133
+ return await parseAndTranscribeFromBody(c, {
134
+ modelName: global.voiceModel,
135
+ temperature: global.voiceTemperature,
136
+ prompt: global.voicePrompt,
137
+ translateToEnglish: global.voiceTranslateToEnglish,
138
+ suppressNonSpeechTokens: global.voiceSuppressNonSpeechTokens,
139
+ });
140
+ }
141
+ catch (err) {
142
+ if (err instanceof transcriptionService.VoiceError) {
143
+ return c.json({ error: err.message, code: err.code }, toVoiceHttpStatus(err.status));
144
+ }
145
+ const message = err instanceof Error ? err.message : String(err);
146
+ return c.json({ error: message, code: 'TRANSCRIPTION_FAILED' }, 500);
147
+ }
148
+ });
149
+ export default app;