@otto-assistant/otto 0.1.2 → 0.7.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (638) hide show
  1. package/bin.js +2 -0
  2. package/dist/agent-model.e2e.test.js +755 -0
  3. package/dist/ai-tool-to-genai.js +233 -0
  4. package/dist/ai-tool-to-genai.test.js +267 -0
  5. package/dist/ai-tool.js +6 -0
  6. package/dist/anthropic-account-identity.js +62 -0
  7. package/dist/anthropic-account-identity.test.js +38 -0
  8. package/dist/anthropic-auth-plugin.js +917 -0
  9. package/dist/anthropic-auth-state.js +303 -0
  10. package/dist/anthropic-auth-state.test.js +150 -0
  11. package/dist/bin.js +152 -0
  12. package/dist/btw-prefix-detection.js +17 -0
  13. package/dist/btw-prefix-detection.test.js +63 -0
  14. package/dist/channel-management.js +259 -0
  15. package/dist/cli-parsing.test.js +142 -0
  16. package/dist/cli-send-thread.e2e.test.js +353 -0
  17. package/dist/cli-telegram-options.test.js +99 -0
  18. package/dist/cli.js +4210 -568
  19. package/dist/commands/abort.js +65 -0
  20. package/dist/commands/action-buttons.js +245 -0
  21. package/dist/commands/add-dir.js +124 -0
  22. package/dist/commands/add-dir.test.js +126 -0
  23. package/dist/commands/add-project.js +113 -0
  24. package/dist/commands/agent.js +355 -0
  25. package/dist/commands/ask-question.js +320 -0
  26. package/dist/commands/ask-question.test.js +92 -0
  27. package/dist/commands/btw.js +121 -0
  28. package/dist/commands/cli-commands-group-a.test.js +728 -0
  29. package/dist/commands/cli-commands-group-b.test.js +695 -0
  30. package/dist/commands/compact.js +120 -0
  31. package/dist/commands/context-usage.js +140 -0
  32. package/dist/commands/create-new-project.js +130 -0
  33. package/dist/commands/diff.js +63 -0
  34. package/dist/commands/discord-commands-group-a.test.js +621 -0
  35. package/dist/commands/discord-commands-group-b.test.js +595 -0
  36. package/dist/commands/discord-commands-group-c.test.js +739 -0
  37. package/dist/commands/file-upload.js +275 -0
  38. package/dist/commands/fork-subagent.js +177 -0
  39. package/dist/commands/fork.js +262 -0
  40. package/dist/commands/gemini-apikey.js +70 -0
  41. package/dist/commands/login.js +887 -0
  42. package/dist/commands/mcp.js +239 -0
  43. package/dist/commands/memory-snapshot.js +24 -0
  44. package/dist/commands/mention-mode.js +44 -0
  45. package/dist/commands/merge-worktree.js +162 -0
  46. package/dist/commands/model-variant.js +366 -0
  47. package/dist/commands/model.js +794 -0
  48. package/dist/commands/new-worktree.js +465 -0
  49. package/dist/commands/paginated-select.js +57 -0
  50. package/dist/commands/permissions.js +274 -0
  51. package/dist/commands/queue.js +223 -0
  52. package/dist/commands/remove-project.js +115 -0
  53. package/dist/commands/restart-opencode-server.js +127 -0
  54. package/dist/commands/resume.js +149 -0
  55. package/dist/commands/run-command.js +79 -0
  56. package/dist/commands/screenshare.js +303 -0
  57. package/dist/commands/screenshare.test.js +20 -0
  58. package/dist/commands/session-id.js +78 -0
  59. package/dist/commands/session.js +176 -0
  60. package/dist/commands/share.js +80 -0
  61. package/dist/commands/tasks.js +205 -0
  62. package/dist/commands/thread-deletion-sync.js +50 -0
  63. package/dist/commands/types.js +2 -0
  64. package/dist/commands/undo-redo.js +305 -0
  65. package/dist/commands/unset-model.js +139 -0
  66. package/dist/commands/upgrade.js +48 -0
  67. package/dist/commands/user-command.js +155 -0
  68. package/dist/commands/verbosity.js +125 -0
  69. package/dist/commands/vscode.js +269 -0
  70. package/dist/commands/worktree-settings.js +43 -0
  71. package/dist/commands/worktrees.js +468 -0
  72. package/dist/condense-memory.js +33 -0
  73. package/dist/config.js +100 -255
  74. package/dist/context-awareness-plugin.js +340 -0
  75. package/dist/context-awareness-plugin.test.js +126 -0
  76. package/dist/critique-utils.js +95 -0
  77. package/dist/database.js +1355 -0
  78. package/dist/db.js +260 -0
  79. package/dist/db.test.js +138 -0
  80. package/dist/debounce-timeout.js +28 -0
  81. package/dist/debounced-process-flush.js +77 -0
  82. package/dist/discord-bot.js +1124 -0
  83. package/dist/discord-command-registration.js +567 -0
  84. package/dist/discord-urls.js +82 -0
  85. package/dist/discord-utils.js +616 -0
  86. package/dist/discord-utils.test.js +134 -0
  87. package/dist/errors.js +157 -0
  88. package/dist/escape-backticks.test.js +429 -0
  89. package/dist/event-stream-real-capture.e2e.test.js +533 -0
  90. package/dist/eventsource-parser.test.js +327 -0
  91. package/dist/exec-async.js +26 -0
  92. package/dist/external-opencode-sync.js +480 -0
  93. package/dist/format-tables.js +491 -0
  94. package/dist/format-tables.test.js +478 -0
  95. package/dist/forum-sync/config.js +79 -0
  96. package/dist/forum-sync/discord-operations.js +154 -0
  97. package/dist/forum-sync/index.js +5 -0
  98. package/dist/forum-sync/markdown.js +113 -0
  99. package/dist/forum-sync/sync-to-discord.js +417 -0
  100. package/dist/forum-sync/sync-to-files.js +190 -0
  101. package/dist/forum-sync/types.js +53 -0
  102. package/dist/forum-sync/watchers.js +307 -0
  103. package/dist/gateway-proxy-reconnect.e2e.test.js +394 -0
  104. package/dist/gateway-proxy.e2e.test.js +485 -0
  105. package/dist/genai-worker-wrapper.js +111 -0
  106. package/dist/genai-worker.js +311 -0
  107. package/dist/genai.js +232 -0
  108. package/dist/generated/browser.js +17 -0
  109. package/dist/generated/client.js +37 -0
  110. package/dist/generated/commonInputTypes.js +10 -0
  111. package/dist/generated/enums.js +58 -0
  112. package/dist/generated/internal/class.js +49 -0
  113. package/dist/generated/internal/prismaNamespace.js +254 -0
  114. package/dist/generated/internal/prismaNamespaceBrowser.js +224 -0
  115. package/dist/generated/models/bot_api_keys.js +1 -0
  116. package/dist/generated/models/bot_tokens.js +1 -0
  117. package/dist/generated/models/channel_agents.js +1 -0
  118. package/dist/generated/models/channel_directories.js +1 -0
  119. package/dist/generated/models/channel_mention_mode.js +1 -0
  120. package/dist/generated/models/channel_models.js +1 -0
  121. package/dist/generated/models/channel_verbosity.js +1 -0
  122. package/dist/generated/models/channel_worktrees.js +1 -0
  123. package/dist/generated/models/forum_sync_configs.js +1 -0
  124. package/dist/generated/models/global_models.js +1 -0
  125. package/dist/generated/models/ipc_requests.js +1 -0
  126. package/dist/generated/models/part_messages.js +1 -0
  127. package/dist/generated/models/scheduled_tasks.js +1 -0
  128. package/dist/generated/models/session_agents.js +1 -0
  129. package/dist/generated/models/session_events.js +1 -0
  130. package/dist/generated/models/session_models.js +1 -0
  131. package/dist/generated/models/session_start_sources.js +1 -0
  132. package/dist/generated/models/thread_sessions.js +1 -0
  133. package/dist/generated/models/thread_worktrees.js +1 -0
  134. package/dist/generated/models.js +1 -0
  135. package/dist/heap-monitor.js +122 -0
  136. package/dist/hrana-server.js +251 -0
  137. package/dist/hrana-server.test.js +370 -0
  138. package/dist/html-actions.js +123 -0
  139. package/dist/html-actions.test.js +70 -0
  140. package/dist/html-components.js +117 -0
  141. package/dist/html-components.test.js +34 -0
  142. package/dist/image-optimizer-plugin.js +153 -0
  143. package/dist/image-utils.js +112 -0
  144. package/dist/interaction-handler.js +420 -0
  145. package/dist/ipc-polling.js +327 -0
  146. package/dist/ipc-tools-plugin.js +193 -0
  147. package/dist/ipc-utils.js +18 -0
  148. package/dist/limit-heading-depth.js +25 -0
  149. package/dist/limit-heading-depth.test.js +105 -0
  150. package/dist/logger.js +171 -0
  151. package/dist/markdown.js +342 -0
  152. package/dist/markdown.test.js +264 -0
  153. package/dist/memory-overview-plugin.js +128 -0
  154. package/dist/message-finish-field.e2e.test.js +168 -0
  155. package/dist/message-formatting.js +415 -0
  156. package/dist/message-formatting.test.js +115 -0
  157. package/dist/message-preprocessing.js +359 -0
  158. package/dist/onboarding-tutorial.js +163 -0
  159. package/dist/onboarding-welcome.js +37 -0
  160. package/dist/openai-realtime.js +224 -0
  161. package/dist/opencode-command-detection.js +65 -0
  162. package/dist/opencode-command-detection.test.js +240 -0
  163. package/dist/opencode-command.js +131 -0
  164. package/dist/opencode-command.test.js +48 -0
  165. package/dist/opencode-interrupt-plugin.js +388 -0
  166. package/dist/opencode-interrupt-plugin.test.js +463 -0
  167. package/dist/opencode.js +1117 -0
  168. package/dist/otto/branding.js +22 -0
  169. package/dist/otto/index.js +21 -0
  170. package/dist/otto-digital-twin.e2e.test.js +161 -0
  171. package/dist/otto-opencode-plugin-loading.e2e.test.js +94 -0
  172. package/dist/otto-opencode-plugin.js +21 -0
  173. package/dist/otto-opencode-plugin.test.js +98 -0
  174. package/dist/parse-permission-rules.test.js +117 -0
  175. package/dist/patch-text-parser.js +97 -0
  176. package/dist/plugin-logger.js +68 -0
  177. package/dist/privacy-sanitizer.js +105 -0
  178. package/dist/queue-advanced-abort.e2e.test.js +293 -0
  179. package/dist/queue-advanced-action-buttons.e2e.test.js +206 -0
  180. package/dist/queue-advanced-e2e-setup.js +790 -0
  181. package/dist/queue-advanced-footer.e2e.test.js +481 -0
  182. package/dist/queue-advanced-model-switch.e2e.test.js +299 -0
  183. package/dist/queue-advanced-permissions-typing.e2e.test.js +179 -0
  184. package/dist/queue-advanced-question.e2e.test.js +261 -0
  185. package/dist/queue-advanced-typing-interrupt.e2e.test.js +114 -0
  186. package/dist/queue-advanced-typing.e2e.test.js +153 -0
  187. package/dist/queue-drain-after-interactive-ui.e2e.test.js +119 -0
  188. package/dist/queue-interrupt-drain.e2e.test.js +135 -0
  189. package/dist/queue-question-select-drain.e2e.test.js +256 -0
  190. package/dist/runtime-idle-sweeper.js +52 -0
  191. package/dist/runtime-lifecycle.e2e.test.js +514 -0
  192. package/dist/sentry.js +23 -0
  193. package/dist/session-handler/agent-utils.js +67 -0
  194. package/dist/session-handler/event-stream-state.js +475 -0
  195. package/dist/session-handler/event-stream-state.test.js +632 -0
  196. package/dist/session-handler/model-utils.js +147 -0
  197. package/dist/session-handler/opencode-session-event-log.js +94 -0
  198. package/dist/session-handler/thread-runtime-state.js +131 -0
  199. package/dist/session-handler/thread-session-runtime.js +3390 -0
  200. package/dist/session-handler.js +9 -0
  201. package/dist/session-search.js +100 -0
  202. package/dist/session-search.test.js +40 -0
  203. package/dist/session-title-rename.test.js +92 -0
  204. package/dist/skill-filter.js +31 -0
  205. package/dist/skill-filter.test.js +65 -0
  206. package/dist/startup-service.js +153 -0
  207. package/dist/startup-time.e2e.test.js +296 -0
  208. package/dist/store.js +19 -0
  209. package/dist/subagent-rate-limit-plugin.js +175 -0
  210. package/dist/system-message.js +702 -0
  211. package/dist/system-message.test.js +697 -0
  212. package/dist/task-runner.js +530 -0
  213. package/dist/task-schedule.js +213 -0
  214. package/dist/task-schedule.test.js +71 -0
  215. package/dist/test-utils.js +313 -0
  216. package/dist/thinking-utils.js +35 -0
  217. package/dist/thread-message-queue.e2e.test.js +1111 -0
  218. package/dist/tools.js +357 -0
  219. package/dist/undo-redo.e2e.test.js +161 -0
  220. package/dist/unnest-code-blocks.js +146 -0
  221. package/dist/unnest-code-blocks.test.js +673 -0
  222. package/dist/upgrade.js +156 -0
  223. package/dist/utils.js +172 -0
  224. package/dist/utils.test.js +130 -0
  225. package/dist/voice-attachment.js +34 -0
  226. package/dist/voice-handler.js +646 -0
  227. package/dist/voice-message.e2e.test.js +1021 -0
  228. package/dist/voice.js +456 -0
  229. package/dist/voice.test.js +235 -0
  230. package/dist/wait-session.js +171 -0
  231. package/dist/websockify.js +69 -0
  232. package/dist/worker-types.js +4 -0
  233. package/dist/worktree-lifecycle.e2e.test.js +311 -0
  234. package/dist/worktree-utils.js +3 -0
  235. package/dist/worktrees.js +991 -0
  236. package/dist/worktrees.test.js +415 -0
  237. package/dist/xml.js +92 -0
  238. package/dist/xml.test.js +32 -0
  239. package/package.json +90 -38
  240. package/schema.prisma +303 -0
  241. package/skills/batch/SKILL.md +87 -0
  242. package/skills/critique/SKILL.md +112 -0
  243. package/skills/egaki/SKILL.md +100 -0
  244. package/skills/errore/SKILL.md +647 -0
  245. package/skills/event-sourcing-state/SKILL.md +252 -0
  246. package/skills/goke/SKILL.md +38 -0
  247. package/skills/jitter/EDITOR.md +219 -0
  248. package/skills/jitter/EXPORT-INTERNALS.md +309 -0
  249. package/skills/jitter/SKILL.md +158 -0
  250. package/skills/jitter/jitter-clipboard.json +1042 -0
  251. package/skills/jitter/package.json +14 -0
  252. package/skills/jitter/tsconfig.json +15 -0
  253. package/skills/jitter/utils/actions.ts +212 -0
  254. package/skills/jitter/utils/export.ts +114 -0
  255. package/skills/jitter/utils/index.ts +141 -0
  256. package/skills/jitter/utils/snapshot.ts +154 -0
  257. package/skills/jitter/utils/traverse.ts +246 -0
  258. package/skills/jitter/utils/types.ts +279 -0
  259. package/skills/jitter/utils/wait.ts +133 -0
  260. package/skills/lintcn/SKILL.md +873 -0
  261. package/skills/manual-kimaki-upstream-adapt/SKILL.md +114 -0
  262. package/skills/new-skill/SKILL.md +237 -0
  263. package/skills/npm-package/SKILL.md +617 -0
  264. package/skills/opensrc/SKILL.md +78 -0
  265. package/skills/otto-publish/SKILL.md +61 -0
  266. package/skills/playwriter/SKILL.md +35 -0
  267. package/skills/profano/SKILL.md +16 -0
  268. package/skills/proxyman/SKILL.md +215 -0
  269. package/skills/security-review/SKILL.md +208 -0
  270. package/skills/sigillo/SKILL.md +101 -0
  271. package/skills/simplify/SKILL.md +58 -0
  272. package/skills/spiceflow/SKILL.md +28 -0
  273. package/skills/termcast/SKILL.md +945 -0
  274. package/skills/tuistory/SKILL.md +98 -0
  275. package/skills/usecomputer/SKILL.md +264 -0
  276. package/skills/x-articles/SKILL.md +554 -0
  277. package/skills/zele/SKILL.md +49 -0
  278. package/skills/zustand-centralized-state/SKILL.md +1004 -0
  279. package/src/agent-model.e2e.test.ts +979 -0
  280. package/src/ai-tool-to-genai.test.ts +296 -0
  281. package/src/ai-tool-to-genai.ts +283 -0
  282. package/src/ai-tool.ts +39 -0
  283. package/src/anthropic-account-identity.test.ts +52 -0
  284. package/src/anthropic-account-identity.ts +77 -0
  285. package/src/anthropic-auth-plugin.ts +1139 -0
  286. package/src/anthropic-auth-state.test.ts +187 -0
  287. package/src/anthropic-auth-state.ts +386 -0
  288. package/src/bin.ts +182 -0
  289. package/src/btw-prefix-detection.test.ts +73 -0
  290. package/src/btw-prefix-detection.ts +23 -0
  291. package/src/channel-management.ts +376 -0
  292. package/src/cli-parsing.test.ts +197 -0
  293. package/src/cli-send-thread.e2e.test.ts +463 -0
  294. package/src/cli-telegram-options.test.ts +114 -0
  295. package/src/cli.ts +5718 -580
  296. package/src/commands/abort.ts +89 -0
  297. package/src/commands/action-buttons.ts +364 -0
  298. package/src/commands/add-dir.test.ts +154 -0
  299. package/src/commands/add-dir.ts +175 -0
  300. package/src/commands/add-project.ts +149 -0
  301. package/src/commands/agent.ts +496 -0
  302. package/src/commands/ask-question.test.ts +111 -0
  303. package/src/commands/ask-question.ts +455 -0
  304. package/src/commands/btw.ts +184 -0
  305. package/src/commands/cli-commands-group-a.test.ts +837 -0
  306. package/src/commands/cli-commands-group-b.test.ts +800 -0
  307. package/src/commands/compact.ts +157 -0
  308. package/src/commands/context-usage.ts +199 -0
  309. package/src/commands/create-new-project.ts +190 -0
  310. package/src/commands/diff.ts +91 -0
  311. package/src/commands/discord-commands-group-a.test.ts +751 -0
  312. package/src/commands/discord-commands-group-b.test.ts +648 -0
  313. package/src/commands/discord-commands-group-c.test.ts +882 -0
  314. package/src/commands/file-upload.ts +389 -0
  315. package/src/commands/fork-subagent.ts +263 -0
  316. package/src/commands/fork.ts +386 -0
  317. package/src/commands/gemini-apikey.ts +104 -0
  318. package/src/commands/login.ts +1175 -0
  319. package/src/commands/mcp.ts +307 -0
  320. package/src/commands/memory-snapshot.ts +30 -0
  321. package/src/commands/mention-mode.ts +68 -0
  322. package/src/commands/merge-worktree.ts +226 -0
  323. package/src/commands/model-variant.ts +485 -0
  324. package/src/commands/model.ts +1078 -0
  325. package/src/commands/new-worktree.ts +645 -0
  326. package/src/commands/paginated-select.ts +81 -0
  327. package/src/commands/permissions.ts +397 -0
  328. package/src/commands/queue.ts +293 -0
  329. package/src/commands/remove-project.ts +155 -0
  330. package/src/commands/restart-opencode-server.ts +162 -0
  331. package/src/commands/resume.ts +230 -0
  332. package/src/commands/run-command.ts +123 -0
  333. package/src/commands/screenshare.test.ts +30 -0
  334. package/src/commands/screenshare.ts +366 -0
  335. package/src/commands/session-id.ts +109 -0
  336. package/src/commands/session.ts +227 -0
  337. package/src/commands/share.ts +106 -0
  338. package/src/commands/tasks.ts +293 -0
  339. package/src/commands/thread-deletion-sync.ts +80 -0
  340. package/src/commands/types.ts +25 -0
  341. package/src/commands/undo-redo.ts +386 -0
  342. package/src/commands/unset-model.ts +174 -0
  343. package/src/commands/upgrade.ts +59 -0
  344. package/src/commands/user-command.ts +198 -0
  345. package/src/commands/verbosity.ts +173 -0
  346. package/src/commands/vscode.ts +342 -0
  347. package/src/commands/worktree-settings.ts +70 -0
  348. package/src/commands/worktrees.ts +645 -0
  349. package/src/condense-memory.ts +36 -0
  350. package/src/config.ts +103 -339
  351. package/src/context-awareness-plugin.test.ts +144 -0
  352. package/src/context-awareness-plugin.ts +469 -0
  353. package/src/critique-utils.ts +139 -0
  354. package/src/database.ts +1949 -0
  355. package/src/db.test.ts +162 -0
  356. package/src/db.ts +295 -0
  357. package/src/debounce-timeout.ts +43 -0
  358. package/src/debounced-process-flush.ts +104 -0
  359. package/src/discord-bot.ts +1505 -0
  360. package/src/discord-command-registration.ts +752 -0
  361. package/src/discord-urls.ts +89 -0
  362. package/src/discord-utils.test.ts +153 -0
  363. package/src/discord-utils.ts +846 -0
  364. package/src/errors.ts +201 -0
  365. package/src/escape-backticks.test.ts +469 -0
  366. package/src/event-stream-real-capture.e2e.test.ts +692 -0
  367. package/src/eventsource-parser.test.ts +351 -0
  368. package/src/exec-async.ts +35 -0
  369. package/src/external-opencode-sync.ts +685 -0
  370. package/src/format-tables.test.ts +515 -0
  371. package/src/format-tables.ts +718 -0
  372. package/src/forum-sync/config.ts +92 -0
  373. package/src/forum-sync/discord-operations.ts +241 -0
  374. package/src/forum-sync/index.ts +9 -0
  375. package/src/forum-sync/markdown.ts +172 -0
  376. package/src/forum-sync/sync-to-discord.ts +595 -0
  377. package/src/forum-sync/sync-to-files.ts +294 -0
  378. package/src/forum-sync/types.ts +175 -0
  379. package/src/forum-sync/watchers.ts +454 -0
  380. package/src/gateway-proxy-reconnect.e2e.test.ts +523 -0
  381. package/src/gateway-proxy.e2e.test.ts +644 -0
  382. package/src/genai-worker-wrapper.ts +164 -0
  383. package/src/genai-worker.ts +386 -0
  384. package/src/genai.ts +321 -0
  385. package/src/generated/browser.ts +114 -0
  386. package/src/generated/client.ts +138 -0
  387. package/src/generated/commonInputTypes.ts +770 -0
  388. package/src/generated/enums.ts +98 -0
  389. package/src/generated/internal/class.ts +384 -0
  390. package/src/generated/internal/prismaNamespace.ts +2394 -0
  391. package/src/generated/internal/prismaNamespaceBrowser.ts +327 -0
  392. package/src/generated/models/bot_api_keys.ts +1288 -0
  393. package/src/generated/models/bot_tokens.ts +1700 -0
  394. package/src/generated/models/channel_agents.ts +1256 -0
  395. package/src/generated/models/channel_directories.ts +1859 -0
  396. package/src/generated/models/channel_mention_mode.ts +1300 -0
  397. package/src/generated/models/channel_models.ts +1288 -0
  398. package/src/generated/models/channel_verbosity.ts +1228 -0
  399. package/src/generated/models/channel_worktrees.ts +1300 -0
  400. package/src/generated/models/forum_sync_configs.ts +1452 -0
  401. package/src/generated/models/global_models.ts +1288 -0
  402. package/src/generated/models/ipc_requests.ts +1485 -0
  403. package/src/generated/models/part_messages.ts +1302 -0
  404. package/src/generated/models/scheduled_tasks.ts +2320 -0
  405. package/src/generated/models/session_agents.ts +1086 -0
  406. package/src/generated/models/session_events.ts +1439 -0
  407. package/src/generated/models/session_models.ts +1114 -0
  408. package/src/generated/models/session_start_sources.ts +1408 -0
  409. package/src/generated/models/thread_sessions.ts +1781 -0
  410. package/src/generated/models/thread_worktrees.ts +1356 -0
  411. package/src/generated/models.ts +30 -0
  412. package/src/heap-monitor.ts +152 -0
  413. package/src/hrana-server.test.ts +434 -0
  414. package/src/hrana-server.ts +299 -0
  415. package/src/html-actions.test.ts +87 -0
  416. package/src/html-actions.ts +174 -0
  417. package/src/html-components.test.ts +38 -0
  418. package/src/html-components.ts +181 -0
  419. package/src/image-optimizer-plugin.ts +194 -0
  420. package/src/image-utils.ts +149 -0
  421. package/src/interaction-handler.ts +610 -0
  422. package/src/ipc-polling.ts +427 -0
  423. package/src/ipc-tools-plugin.ts +236 -0
  424. package/src/ipc-utils.ts +29 -0
  425. package/src/limit-heading-depth.test.ts +116 -0
  426. package/src/limit-heading-depth.ts +26 -0
  427. package/src/logger.ts +215 -0
  428. package/src/markdown.test.ts +315 -0
  429. package/src/markdown.ts +410 -0
  430. package/src/memory-overview-plugin.ts +163 -0
  431. package/src/message-finish-field.e2e.test.ts +195 -0
  432. package/src/message-formatting.test.ts +126 -0
  433. package/src/message-formatting.ts +535 -0
  434. package/src/message-preprocessing.ts +488 -0
  435. package/src/onboarding-tutorial.ts +167 -0
  436. package/src/onboarding-welcome.ts +49 -0
  437. package/src/openai-realtime.ts +358 -0
  438. package/src/opencode-command-detection.test.ts +307 -0
  439. package/src/opencode-command-detection.ts +76 -0
  440. package/src/opencode-command.test.ts +70 -0
  441. package/src/opencode-command.ts +191 -0
  442. package/src/opencode-interrupt-plugin.test.ts +682 -0
  443. package/src/opencode-interrupt-plugin.ts +507 -0
  444. package/src/opencode.ts +1453 -0
  445. package/src/otto/branding.ts +23 -0
  446. package/src/otto/index.ts +22 -0
  447. package/src/otto-digital-twin.e2e.test.ts +199 -0
  448. package/src/otto-opencode-plugin-loading.e2e.test.ts +117 -0
  449. package/src/otto-opencode-plugin.test.ts +108 -0
  450. package/src/otto-opencode-plugin.ts +22 -0
  451. package/src/parse-permission-rules.test.ts +127 -0
  452. package/src/patch-text-parser.ts +107 -0
  453. package/src/plugin-logger.ts +84 -0
  454. package/src/privacy-sanitizer.ts +142 -0
  455. package/src/queue-advanced-abort.e2e.test.ts +382 -0
  456. package/src/queue-advanced-action-buttons.e2e.test.ts +268 -0
  457. package/src/queue-advanced-e2e-setup.ts +877 -0
  458. package/src/queue-advanced-footer.e2e.test.ts +591 -0
  459. package/src/queue-advanced-model-switch.e2e.test.ts +383 -0
  460. package/src/queue-advanced-permissions-typing.e2e.test.ts +246 -0
  461. package/src/queue-advanced-question.e2e.test.ts +316 -0
  462. package/src/queue-advanced-typing-interrupt.e2e.test.ts +146 -0
  463. package/src/queue-advanced-typing.e2e.test.ts +199 -0
  464. package/src/queue-drain-after-interactive-ui.e2e.test.ts +151 -0
  465. package/src/queue-interrupt-drain.e2e.test.ts +166 -0
  466. package/src/queue-question-select-drain.e2e.test.ts +327 -0
  467. package/src/runtime-idle-sweeper.ts +76 -0
  468. package/src/runtime-lifecycle.e2e.test.ts +651 -0
  469. package/src/schema.sql +174 -0
  470. package/src/sentry.ts +26 -0
  471. package/src/session-handler/agent-utils.ts +99 -0
  472. package/src/session-handler/event-stream-fixtures/real-session-action-buttons.jsonl +45 -0
  473. package/src/session-handler/event-stream-fixtures/real-session-footer-suppressed-on-pre-idle-interrupt.jsonl +40 -0
  474. package/src/session-handler/event-stream-fixtures/real-session-permission-external-file.jsonl +23 -0
  475. package/src/session-handler/event-stream-fixtures/real-session-task-normal.jsonl +22 -0
  476. package/src/session-handler/event-stream-fixtures/real-session-task-three-parallel-sleeps.jsonl +277 -0
  477. package/src/session-handler/event-stream-fixtures/real-session-task-user-interruption.jsonl +46 -0
  478. package/src/session-handler/event-stream-fixtures/session-abort-after-idle-race.jsonl +21 -0
  479. package/src/session-handler/event-stream-fixtures/session-concurrent-messages-serialized.jsonl +56 -0
  480. package/src/session-handler/event-stream-fixtures/session-explicit-abort.jsonl +44 -0
  481. package/src/session-handler/event-stream-fixtures/session-normal-completion.jsonl +29 -0
  482. package/src/session-handler/event-stream-fixtures/session-tool-call-noisy-stream.jsonl +29 -0
  483. package/src/session-handler/event-stream-fixtures/session-two-completions-same-session.jsonl +50 -0
  484. package/src/session-handler/event-stream-fixtures/session-user-interruption.jsonl +59 -0
  485. package/src/session-handler/event-stream-fixtures/session-voice-queued-followup.jsonl +52 -0
  486. package/src/session-handler/event-stream-state.test.ts +717 -0
  487. package/src/session-handler/event-stream-state.ts +706 -0
  488. package/src/session-handler/model-utils.ts +217 -0
  489. package/src/session-handler/opencode-session-event-log.ts +130 -0
  490. package/src/session-handler/thread-runtime-state.ts +247 -0
  491. package/src/session-handler/thread-session-runtime.ts +4440 -0
  492. package/src/session-handler.ts +15 -0
  493. package/src/session-search.test.ts +50 -0
  494. package/src/session-search.ts +148 -0
  495. package/src/session-title-rename.test.ts +130 -0
  496. package/src/skill-filter.test.ts +83 -0
  497. package/src/skill-filter.ts +42 -0
  498. package/src/startup-service.ts +200 -0
  499. package/src/startup-time.e2e.test.ts +373 -0
  500. package/src/store.ts +139 -0
  501. package/src/subagent-rate-limit-plugin.ts +218 -0
  502. package/src/system-message.test.ts +710 -0
  503. package/src/system-message.ts +814 -0
  504. package/src/task-runner.ts +725 -0
  505. package/src/task-schedule.test.ts +84 -0
  506. package/src/task-schedule.ts +317 -0
  507. package/src/test-utils.ts +451 -0
  508. package/src/thinking-utils.ts +61 -0
  509. package/src/thread-message-queue.e2e.test.ts +1350 -0
  510. package/src/tools.ts +430 -0
  511. package/src/undici.d.ts +12 -0
  512. package/src/undo-redo.e2e.test.ts +209 -0
  513. package/src/unnest-code-blocks.test.ts +713 -0
  514. package/src/unnest-code-blocks.ts +185 -0
  515. package/src/upgrade.ts +185 -0
  516. package/src/utils.test.ts +155 -0
  517. package/src/utils.ts +265 -0
  518. package/src/voice-attachment.ts +51 -0
  519. package/src/voice-handler.ts +908 -0
  520. package/src/voice-message.e2e.test.ts +1255 -0
  521. package/src/voice.test.ts +281 -0
  522. package/src/voice.ts +638 -0
  523. package/src/wait-session.ts +273 -0
  524. package/src/websockify.ts +101 -0
  525. package/src/worker-types.ts +64 -0
  526. package/src/worktree-lifecycle.e2e.test.ts +396 -0
  527. package/src/worktree-utils.ts +4 -0
  528. package/src/worktrees.test.ts +489 -0
  529. package/src/worktrees.ts +1370 -0
  530. package/src/xml.test.ts +38 -0
  531. package/src/xml.ts +121 -0
  532. package/README.md +0 -142
  533. package/dist/cli.d.ts +0 -3
  534. package/dist/cli.d.ts.map +0 -1
  535. package/dist/cli.js.map +0 -1
  536. package/dist/config.d.ts +0 -39
  537. package/dist/config.d.ts.map +0 -1
  538. package/dist/config.js.map +0 -1
  539. package/dist/config.test.d.ts +0 -2
  540. package/dist/config.test.d.ts.map +0 -1
  541. package/dist/config.test.js +0 -202
  542. package/dist/config.test.js.map +0 -1
  543. package/dist/detect.d.ts +0 -9
  544. package/dist/detect.d.ts.map +0 -1
  545. package/dist/detect.js +0 -40
  546. package/dist/detect.js.map +0 -1
  547. package/dist/detect.test.d.ts +0 -2
  548. package/dist/detect.test.d.ts.map +0 -1
  549. package/dist/detect.test.js +0 -26
  550. package/dist/detect.test.js.map +0 -1
  551. package/dist/docker.d.ts +0 -7
  552. package/dist/docker.d.ts.map +0 -1
  553. package/dist/docker.js +0 -17
  554. package/dist/docker.js.map +0 -1
  555. package/dist/docker.test.d.ts +0 -2
  556. package/dist/docker.test.d.ts.map +0 -1
  557. package/dist/docker.test.js +0 -12
  558. package/dist/docker.test.js.map +0 -1
  559. package/dist/health.d.ts +0 -31
  560. package/dist/health.d.ts.map +0 -1
  561. package/dist/health.js +0 -117
  562. package/dist/health.js.map +0 -1
  563. package/dist/health.test.d.ts +0 -2
  564. package/dist/health.test.d.ts.map +0 -1
  565. package/dist/health.test.js +0 -52
  566. package/dist/health.test.js.map +0 -1
  567. package/dist/index.d.ts +0 -20
  568. package/dist/index.d.ts.map +0 -1
  569. package/dist/index.js +0 -15
  570. package/dist/index.js.map +0 -1
  571. package/dist/index.test.d.ts +0 -2
  572. package/dist/index.test.d.ts.map +0 -1
  573. package/dist/index.test.js +0 -8
  574. package/dist/index.test.js.map +0 -1
  575. package/dist/installer.d.ts +0 -10
  576. package/dist/installer.d.ts.map +0 -1
  577. package/dist/installer.js +0 -50
  578. package/dist/installer.js.map +0 -1
  579. package/dist/installer.test.d.ts +0 -2
  580. package/dist/installer.test.d.ts.map +0 -1
  581. package/dist/installer.test.js +0 -43
  582. package/dist/installer.test.js.map +0 -1
  583. package/dist/lifecycle.d.ts +0 -10
  584. package/dist/lifecycle.d.ts.map +0 -1
  585. package/dist/lifecycle.js +0 -45
  586. package/dist/lifecycle.js.map +0 -1
  587. package/dist/lifecycle.test.d.ts +0 -2
  588. package/dist/lifecycle.test.d.ts.map +0 -1
  589. package/dist/lifecycle.test.js +0 -20
  590. package/dist/lifecycle.test.js.map +0 -1
  591. package/dist/manifest.d.ts +0 -18
  592. package/dist/manifest.d.ts.map +0 -1
  593. package/dist/manifest.js +0 -30
  594. package/dist/manifest.js.map +0 -1
  595. package/dist/skills-baseline.d.ts +0 -7
  596. package/dist/skills-baseline.d.ts.map +0 -1
  597. package/dist/skills-baseline.js +0 -9
  598. package/dist/skills-baseline.js.map +0 -1
  599. package/dist/skills.d.ts +0 -110
  600. package/dist/skills.d.ts.map +0 -1
  601. package/dist/skills.js +0 -429
  602. package/dist/skills.js.map +0 -1
  603. package/dist/skills.test.d.ts +0 -2
  604. package/dist/skills.test.d.ts.map +0 -1
  605. package/dist/skills.test.js +0 -416
  606. package/dist/skills.test.js.map +0 -1
  607. package/dist/sync.d.ts +0 -10
  608. package/dist/sync.d.ts.map +0 -1
  609. package/dist/sync.js +0 -39
  610. package/dist/sync.js.map +0 -1
  611. package/dist/tenant.d.ts +0 -13
  612. package/dist/tenant.d.ts.map +0 -1
  613. package/dist/tenant.js +0 -105
  614. package/dist/tenant.js.map +0 -1
  615. package/dist/tenant.test.d.ts +0 -2
  616. package/dist/tenant.test.d.ts.map +0 -1
  617. package/dist/tenant.test.js +0 -37
  618. package/dist/tenant.test.js.map +0 -1
  619. package/src/config.test.ts +0 -237
  620. package/src/detect.test.ts +0 -29
  621. package/src/detect.ts +0 -52
  622. package/src/docker.test.ts +0 -12
  623. package/src/docker.ts +0 -23
  624. package/src/health.test.ts +0 -61
  625. package/src/health.ts +0 -158
  626. package/src/index.test.ts +0 -8
  627. package/src/index.ts +0 -62
  628. package/src/installer.test.ts +0 -52
  629. package/src/installer.ts +0 -62
  630. package/src/lifecycle.test.ts +0 -23
  631. package/src/lifecycle.ts +0 -49
  632. package/src/manifest.ts +0 -42
  633. package/src/skills-baseline.ts +0 -14
  634. package/src/skills.test.ts +0 -503
  635. package/src/skills.ts +0 -512
  636. package/src/sync.ts +0 -53
  637. package/src/tenant.test.ts +0 -49
  638. package/src/tenant.ts +0 -120
package/src/voice.ts ADDED
@@ -0,0 +1,638 @@
1
+ // Audio transcription service using AI SDK providers.
2
+ // Both providers use LanguageModelV3 (chat model) with audio file parts + tool calling,
3
+ // so we can pass full context (file tree, session info) for better word recognition.
4
+ // - OpenAI: gpt-4o-audio-preview via .chat() (Chat Completions API). MUST use .chat()
5
+ // because the default Responses API doesn't support audio file parts. The Chat
6
+ // Completions handler converts audio/mpeg file parts to input_audio format.
7
+ // - Gemini: gemini-2.5-flash natively accepts audio file parts in chat.
8
+ // Calls model.doGenerate() directly without the `ai` npm package.
9
+ // Uses errore for type-safe error handling.
10
+
11
+ import type {
12
+ LanguageModelV3,
13
+ LanguageModelV3CallOptions,
14
+ LanguageModelV3FunctionTool,
15
+ LanguageModelV3Content,
16
+ LanguageModelV3ToolCall,
17
+ } from '@ai-sdk/provider'
18
+ import { createGoogleGenerativeAI } from '@ai-sdk/google'
19
+ import { createOpenAI } from '@ai-sdk/openai'
20
+ import { Readable } from 'node:stream'
21
+ import prism from 'prism-media'
22
+ import * as errore from 'errore'
23
+ import { createLogger, LogPrefix } from './logger.js'
24
+ import {
25
+ ApiKeyMissingError,
26
+ InvalidAudioFormatError,
27
+ TranscriptionError,
28
+ EmptyTranscriptionError,
29
+ NoResponseContentError,
30
+ NoToolResponseError,
31
+ } from './errors.js'
32
+
33
+ const voiceLogger = createLogger(LogPrefix.VOICE)
34
+
35
+ // OpenAI input_audio only supports wav and mp3. Other formats (OGG Opus, etc)
36
+ // must be converted before sending.
37
+ const OPENAI_SUPPORTED_AUDIO_TYPES = new Set([
38
+ 'audio/mpeg',
39
+ 'audio/mp3',
40
+ 'audio/wav',
41
+ 'audio/x-wav',
42
+ ])
43
+
44
+ const OGG_AUDIO_TYPES = new Set([
45
+ 'audio/ogg',
46
+ 'audio/opus',
47
+ ])
48
+
49
+ const M4A_AUDIO_TYPES = new Set([
50
+ 'audio/mp4',
51
+ 'audio/m4a',
52
+ 'audio/x-m4a',
53
+ ])
54
+
55
+ export function normalizeAudioMediaType(mediaType: string): string {
56
+ const normalized = mediaType.trim().toLowerCase()
57
+ if (normalized === 'audio/x-m4a' || normalized === 'audio/m4a') {
58
+ return 'audio/mp4'
59
+ }
60
+ return normalized
61
+ }
62
+
63
+ type OpenAIAudioConversionStrategy =
64
+ | 'none'
65
+ | 'convert-ogg-to-wav'
66
+ | 'convert-m4a-to-wav'
67
+ | 'unsupported'
68
+
69
+ export function getOpenAIAudioConversionStrategy(
70
+ mediaType: string,
71
+ ): OpenAIAudioConversionStrategy {
72
+ if (OPENAI_SUPPORTED_AUDIO_TYPES.has(mediaType)) {
73
+ return 'none'
74
+ }
75
+ if (OGG_AUDIO_TYPES.has(mediaType)) {
76
+ return 'convert-ogg-to-wav'
77
+ }
78
+ if (M4A_AUDIO_TYPES.has(mediaType)) {
79
+ return 'convert-m4a-to-wav'
80
+ }
81
+ return 'unsupported'
82
+ }
83
+
84
+ /**
85
+ * Convert OGG Opus audio to WAV using prism-media (already installed for Discord voice).
86
+ * Pipeline: OGG buffer → OggDemuxer → Opus Decoder → PCM → WAV (with header).
87
+ * No ffmpeg needed — uses @discordjs/opus native bindings.
88
+ */
89
+ export function convertOggToWav(input: Buffer): Promise<TranscriptionError | Buffer> {
90
+ return new Promise((resolve) => {
91
+ const pcmChunks: Buffer[] = []
92
+
93
+ const demuxer = new prism.opus.OggDemuxer()
94
+ const decoder = new prism.opus.Decoder({
95
+ rate: 48000,
96
+ channels: 1,
97
+ frameSize: 960,
98
+ })
99
+
100
+ decoder.on('data', (chunk: Buffer) => {
101
+ pcmChunks.push(chunk)
102
+ })
103
+
104
+ decoder.on('end', () => {
105
+ const pcmData = Buffer.concat(pcmChunks)
106
+ const wavHeader = createWavHeader({
107
+ dataLength: pcmData.length,
108
+ sampleRate: 48000,
109
+ numChannels: 1,
110
+ bitsPerSample: 16,
111
+ })
112
+ resolve(Buffer.concat([wavHeader, pcmData]))
113
+ })
114
+
115
+ decoder.on('error', (err: Error) => {
116
+ resolve(
117
+ new TranscriptionError({
118
+ reason: `Opus decode failed: ${err.message}`,
119
+ cause: err,
120
+ }),
121
+ )
122
+ })
123
+
124
+ demuxer.on('error', (err: Error) => {
125
+ resolve(
126
+ new TranscriptionError({
127
+ reason: `OGG demux failed: ${err.message}`,
128
+ cause: err,
129
+ }),
130
+ )
131
+ })
132
+
133
+ Readable.from(input).pipe(demuxer).pipe(decoder)
134
+ })
135
+ }
136
+
137
+ /**
138
+ * Convert M4A/MP4 audio to WAV using prism-media FFmpeg wrapper.
139
+ * This depends on an ffmpeg binary available in PATH.
140
+ */
141
+ export function convertM4aToWav(input: Buffer): Promise<TranscriptionError | Buffer> {
142
+ return new Promise((resolve) => {
143
+ const pcmChunks: Buffer[] = []
144
+ const transcoder = new prism.FFmpeg({
145
+ args: [
146
+ '-analyzeduration',
147
+ '0',
148
+ '-loglevel',
149
+ '0',
150
+ '-f',
151
+ 'mp4',
152
+ '-i',
153
+ 'pipe:0',
154
+ '-f',
155
+ 's16le',
156
+ '-acodec',
157
+ 'pcm_s16le',
158
+ '-ac',
159
+ '1',
160
+ '-ar',
161
+ '48000',
162
+ 'pipe:1',
163
+ ],
164
+ })
165
+
166
+ transcoder.on('data', (chunk: Buffer) => {
167
+ pcmChunks.push(chunk)
168
+ })
169
+
170
+ transcoder.on('end', () => {
171
+ const pcmData = Buffer.concat(pcmChunks)
172
+ if (pcmData.length === 0) {
173
+ resolve(
174
+ new TranscriptionError({
175
+ reason: 'FFmpeg conversion produced empty audio output',
176
+ }),
177
+ )
178
+ return
179
+ }
180
+
181
+ const wavHeader = createWavHeader({
182
+ dataLength: pcmData.length,
183
+ sampleRate: 48000,
184
+ numChannels: 1,
185
+ bitsPerSample: 16,
186
+ })
187
+ resolve(Buffer.concat([wavHeader, pcmData]))
188
+ })
189
+
190
+ transcoder.on('error', (err: Error) => {
191
+ const lower = err.message.toLowerCase()
192
+ const isMissingFfmpeg =
193
+ lower.includes('ffmpeg') &&
194
+ (lower.includes('not found') ||
195
+ lower.includes('enoent') ||
196
+ lower.includes('spawn'))
197
+ if (isMissingFfmpeg) {
198
+ resolve(
199
+ new TranscriptionError({
200
+ reason:
201
+ 'M4A transcription with OpenAI requires ffmpeg to be installed and available in PATH',
202
+ cause: err,
203
+ }),
204
+ )
205
+ return
206
+ }
207
+
208
+ resolve(
209
+ new TranscriptionError({
210
+ reason: `M4A decode failed: ${err.message}`,
211
+ cause: err,
212
+ }),
213
+ )
214
+ })
215
+
216
+ Readable.from(input).pipe(transcoder)
217
+ })
218
+ }
219
+
220
+ function createWavHeader({
221
+ dataLength,
222
+ sampleRate,
223
+ numChannels,
224
+ bitsPerSample,
225
+ }: {
226
+ dataLength: number
227
+ sampleRate: number
228
+ numChannels: number
229
+ bitsPerSample: number
230
+ }): Buffer {
231
+ const byteRate = (sampleRate * numChannels * bitsPerSample) / 8
232
+ const blockAlign = (numChannels * bitsPerSample) / 8
233
+ const buffer = Buffer.alloc(44)
234
+ buffer.write('RIFF', 0)
235
+ buffer.writeUInt32LE(36 + dataLength, 4)
236
+ buffer.write('WAVE', 8)
237
+ buffer.write('fmt ', 12)
238
+ buffer.writeUInt32LE(16, 16)
239
+ buffer.writeUInt16LE(1, 20)
240
+ buffer.writeUInt16LE(numChannels, 22)
241
+ buffer.writeUInt32LE(sampleRate, 24)
242
+ buffer.writeUInt32LE(byteRate, 28)
243
+ buffer.writeUInt16LE(blockAlign, 32)
244
+ buffer.writeUInt16LE(bitsPerSample, 34)
245
+ buffer.write('data', 36)
246
+ buffer.writeUInt32LE(dataLength, 40)
247
+ return buffer
248
+ }
249
+
250
+ type TranscriptionLoopError =
251
+ | NoResponseContentError
252
+ | TranscriptionError
253
+ | EmptyTranscriptionError
254
+ | NoToolResponseError
255
+
256
+ // Build the transcription tool schema dynamically so the agent field can
257
+ // use an enum constrained to the actual available agent names.
258
+ function buildTranscriptionTool({
259
+ agentNames,
260
+ }: {
261
+ agentNames?: string[]
262
+ }): LanguageModelV3FunctionTool {
263
+ const properties: Record<string, Record<string, unknown>> = {
264
+ transcription: {
265
+ type: 'string',
266
+ description:
267
+ 'The final transcription of the audio. MUST be non-empty. If audio is unclear, transcribe your best interpretation. If silent, too short to understand, or completely incomprehensible, use "[inaudible audio]".',
268
+ },
269
+ queueMessage: {
270
+ type: 'boolean',
271
+ description:
272
+ 'Set to true ONLY if the user explicitly says "queue this message", "queue this", or similar phrasing indicating they want this message queued instead of sent immediately. If not mentioned, omit or set to false.',
273
+ },
274
+ }
275
+
276
+ if (agentNames && agentNames.length > 0) {
277
+ properties['agent'] = {
278
+ type: 'string',
279
+ enum: agentNames,
280
+ description:
281
+ 'The agent name ONLY if the user explicitly says "use the X agent", "switch to X agent", "with the X agent", or similar phrasing. Remove the agent instruction from the transcription text. Omit if no agent is mentioned.',
282
+ }
283
+ }
284
+
285
+ return {
286
+ type: 'function',
287
+ name: 'transcriptionResult',
288
+ description:
289
+ 'MANDATORY: You MUST call this tool to complete the task. This is the ONLY way to return results - text responses are ignored. Call this with your transcription, even if imperfect. An imperfect transcription is better than none.',
290
+ inputSchema: {
291
+ type: 'object',
292
+ properties,
293
+ required: ['transcription'],
294
+ },
295
+ }
296
+ }
297
+
298
+ export type TranscriptionResult = {
299
+ transcription: string
300
+ queueMessage: boolean
301
+ /** Agent name extracted from voice message, only set if user explicitly requested an agent. */
302
+ agent?: string
303
+ }
304
+
305
+ /**
306
+ * Extract transcription result from doGenerate content array.
307
+ * Looks for a tool-call named 'transcriptionResult', falls back to text content.
308
+ * Returns structured result with transcription text and queueMessage flag.
309
+ */
310
+ export function extractTranscription(
311
+ content: Array<LanguageModelV3Content>,
312
+ ): TranscriptionLoopError | TranscriptionResult {
313
+ const toolCall = content.find(
314
+ (c): c is LanguageModelV3ToolCall =>
315
+ c.type === 'tool-call' && c.toolName === 'transcriptionResult',
316
+ )
317
+
318
+ if (toolCall) {
319
+ // toolCall.input is a JSON string in LanguageModelV3
320
+ const args: Record<string, unknown> = (() => {
321
+ if (typeof toolCall.input === 'string') {
322
+ return JSON.parse(toolCall.input) as Record<string, unknown>
323
+ }
324
+ return {}
325
+ })()
326
+ const transcription = (typeof args.transcription === 'string' ? args.transcription : '').trim()
327
+ const queueMessage = args.queueMessage === true
328
+ const agent = typeof args.agent === 'string' ? args.agent : undefined
329
+ voiceLogger.log(
330
+ `Transcription result received: "${transcription.slice(0, 100)}..."${queueMessage ? ' [QUEUE]' : ''}${agent ? ` [AGENT:${agent}]` : ''}`,
331
+ )
332
+ if (!transcription) {
333
+ return new EmptyTranscriptionError()
334
+ }
335
+ return { transcription, queueMessage, agent }
336
+ }
337
+
338
+ // Fall back to text content if no tool call
339
+ const textPart = content.find((c) => c.type === 'text')
340
+ if (textPart && textPart.type === 'text' && textPart.text.trim()) {
341
+ voiceLogger.log(
342
+ `No tool call but got text: "${textPart.text.trim().slice(0, 100)}..."`,
343
+ )
344
+ return { transcription: textPart.text.trim(), queueMessage: false }
345
+ }
346
+
347
+ if (content.length === 0) {
348
+ return new NoResponseContentError()
349
+ }
350
+
351
+ return new TranscriptionError({
352
+ reason: 'Model did not produce a transcription',
353
+ })
354
+ }
355
+
356
+ async function runTranscriptionOnce({
357
+ model,
358
+ prompt,
359
+ audioBase64,
360
+ mediaType,
361
+ temperature,
362
+ agentNames,
363
+ provider,
364
+ }: {
365
+ model: LanguageModelV3
366
+ prompt: string
367
+ audioBase64: string
368
+ mediaType: string
369
+ temperature: number
370
+ agentNames?: string[]
371
+ provider?: TranscriptionProvider
372
+ }): Promise<TranscriptionLoopError | TranscriptionResult> {
373
+ const tool = buildTranscriptionTool({ agentNames })
374
+ const options: LanguageModelV3CallOptions = {
375
+ prompt: [
376
+ {
377
+ role: 'user',
378
+ content: [
379
+ { type: 'text', text: prompt },
380
+ {
381
+ type: 'file',
382
+ data: audioBase64,
383
+ mediaType,
384
+ },
385
+ ],
386
+ },
387
+ ],
388
+ temperature,
389
+ maxOutputTokens: 2048,
390
+ tools: [tool],
391
+ toolChoice: { type: 'tool', toolName: 'transcriptionResult' },
392
+ providerOptions: {
393
+ ...(provider === 'openai'
394
+ ? {
395
+ openai: {
396
+ safetyIdentifier: 'otto:voice-transcription',
397
+ user: 'otto:voice-transcription',
398
+ },
399
+ }
400
+ : {}),
401
+ google: {
402
+ thinkingConfig: { thinkingBudget: 1024 },
403
+ },
404
+ },
405
+ }
406
+
407
+ // doGenerate returns PromiseLike, wrap in Promise.resolve for errore compatibility
408
+ const response = await errore.tryAsync({
409
+ try: () => Promise.resolve(model.doGenerate(options)),
410
+ catch: (e: Error) =>
411
+ new TranscriptionError({
412
+ reason: `API call failed: ${String(e)}`,
413
+ cause: e,
414
+ }),
415
+ })
416
+
417
+ if (response instanceof TranscriptionError) {
418
+ return response
419
+ }
420
+
421
+ return extractTranscription(response.content)
422
+ }
423
+
424
+ export type TranscribeAudioErrors =
425
+ | ApiKeyMissingError
426
+ | InvalidAudioFormatError
427
+ | TranscriptionLoopError
428
+
429
+ export type TranscriptionProvider = 'openai' | 'gemini'
430
+
431
+ /**
432
+ * Create a LanguageModelV3 for transcription.
433
+ * Both providers use chat models that accept audio file parts, so we get full
434
+ * context (prompt, session info, tool calling) for better word recognition.
435
+ *
436
+ * OpenAI: must use .chat() to get the Chat Completions API model, because the
437
+ * default callable (Responses API) doesn't support audio file parts.
438
+ * Gemini: language models natively accept audio in chat.
439
+ */
440
+ export function createTranscriptionModel({
441
+ apiKey,
442
+ provider,
443
+ }: {
444
+ apiKey: string
445
+ provider?: TranscriptionProvider
446
+ }): LanguageModelV3 {
447
+ const resolvedProvider: TranscriptionProvider =
448
+ provider || (apiKey.startsWith('sk-') ? 'openai' : 'gemini')
449
+
450
+ if (resolvedProvider === 'openai') {
451
+ const openai = createOpenAI({ apiKey })
452
+ return openai.chat('gpt-4o-audio-preview')
453
+ }
454
+
455
+ const google = createGoogleGenerativeAI({ apiKey })
456
+ return google('gemini-2.5-flash')
457
+ }
458
+
459
+ export async function transcribeAudio({
460
+ audio,
461
+ prompt,
462
+ language,
463
+ temperature,
464
+ apiKey: apiKeyParam,
465
+ model,
466
+ provider,
467
+ mediaType: mediaTypeParam,
468
+ currentSessionContext,
469
+ lastSessionContext,
470
+ agents,
471
+ }: {
472
+ audio: Buffer | Uint8Array | ArrayBuffer | string
473
+ prompt?: string
474
+ language?: string
475
+ temperature?: number
476
+ apiKey?: string
477
+ model?: LanguageModelV3
478
+ provider?: TranscriptionProvider
479
+ /** MIME type of the audio data (e.g. 'audio/ogg'). Defaults to 'audio/mpeg'. */
480
+ mediaType?: string
481
+ currentSessionContext?: string
482
+ lastSessionContext?: string
483
+ /** Available agents for agent selection via voice. Names used as enum values in the tool schema. */
484
+ agents?: Array<{ name: string; description?: string }>
485
+ }): Promise<TranscribeAudioErrors | TranscriptionResult> {
486
+ const apiKey =
487
+ apiKeyParam || process.env.OPENAI_API_KEY || process.env.GEMINI_API_KEY
488
+
489
+ if (!model && !apiKey) {
490
+ return Promise.resolve(new ApiKeyMissingError({ service: 'OpenAI or Gemini' }))
491
+ }
492
+
493
+ const resolvedProvider: TranscriptionProvider = (() => {
494
+ if (provider) {
495
+ return provider
496
+ }
497
+ if (apiKey) {
498
+ return apiKey.startsWith('sk-') ? 'openai' : 'gemini'
499
+ }
500
+ return 'gemini'
501
+ })()
502
+
503
+ const languageModel: LanguageModelV3 =
504
+ model || createTranscriptionModel({ apiKey: apiKey!, provider: resolvedProvider })
505
+
506
+ // Convert audio to Buffer for potential format conversion
507
+ const audioBuffer: Buffer = (() => {
508
+ if (typeof audio === 'string') {
509
+ return Buffer.from(audio, 'base64')
510
+ }
511
+ if (audio instanceof Buffer) {
512
+ return audio
513
+ }
514
+ if (audio instanceof ArrayBuffer) {
515
+ return Buffer.from(new Uint8Array(audio))
516
+ }
517
+ return Buffer.from(audio)
518
+ })()
519
+
520
+ if (audioBuffer.length === 0) {
521
+ return new InvalidAudioFormatError()
522
+ }
523
+
524
+ let mediaType = normalizeAudioMediaType(mediaTypeParam || 'audio/mpeg')
525
+ let finalAudioBase64 = audioBuffer.toString('base64')
526
+
527
+ // OpenAI input_audio supports only a subset of audio formats.
528
+ // Convert based on MIME so OGG conversion runs only for real OGG/Opus inputs.
529
+ if (resolvedProvider === 'openai') {
530
+ const conversionStrategy = getOpenAIAudioConversionStrategy(mediaType)
531
+ if (conversionStrategy === 'convert-ogg-to-wav') {
532
+ voiceLogger.log(`Converting ${mediaType} to WAV for OpenAI compatibility`)
533
+ const converted = await convertOggToWav(audioBuffer)
534
+ if (converted instanceof Error) {
535
+ return converted
536
+ }
537
+ finalAudioBase64 = converted.toString('base64')
538
+ mediaType = 'audio/wav'
539
+ } else if (conversionStrategy === 'convert-m4a-to-wav') {
540
+ voiceLogger.log(`Converting ${mediaType} to WAV for OpenAI compatibility`)
541
+ const converted = await convertM4aToWav(audioBuffer)
542
+ if (converted instanceof Error) {
543
+ return converted
544
+ }
545
+ finalAudioBase64 = converted.toString('base64')
546
+ mediaType = 'audio/wav'
547
+ } else if (conversionStrategy === 'unsupported') {
548
+ return new InvalidAudioFormatError()
549
+ }
550
+ }
551
+
552
+ const languageHint = language ? `The audio is in ${language}.\n\n` : ''
553
+
554
+ // build session context section
555
+ const sessionContextParts: string[] = []
556
+ if (lastSessionContext) {
557
+ sessionContextParts.push(`<last_session>
558
+ ${lastSessionContext}
559
+ </last_session>`)
560
+ }
561
+ if (currentSessionContext) {
562
+ sessionContextParts.push(`<current_session>
563
+ ${currentSessionContext}
564
+ </current_session>`)
565
+ }
566
+ const sessionContextSection =
567
+ sessionContextParts.length > 0
568
+ ? `\n<session_context>
569
+ ${sessionContextParts.join('\n\n')}
570
+ </session_context>`
571
+ : ''
572
+
573
+ const transcriptionPrompt = `${languageHint}Transcribe this audio for a coding agent (like Claude Code or OpenCode).
574
+
575
+ CRITICAL REQUIREMENT: You MUST call the "transcriptionResult" tool to complete this task.
576
+ - The transcriptionResult tool is the ONLY way to return results
577
+ - Text responses are completely ignored - only tool calls work
578
+ - You MUST call transcriptionResult even if you run out of tool calls
579
+ - Always call transcriptionResult with your best approximation of what was said
580
+ - DO NOT end without calling transcriptionResult
581
+
582
+ This is a software development environment. The speaker is giving instructions to an AI coding assistant. Expect:
583
+ - File paths, function names, CLI commands, package names, API endpoints
584
+
585
+ RULES:
586
+ - NEVER change the meaning or intent of the user's message. Your job is ONLY to transcribe, not to respond or answer.
587
+ - If the user asks a question, keep it as a question. Do NOT answer it. Do NOT rephrase it as a statement.
588
+ - Only fix grammar, punctuation, and markdown formatting. Preserve the original content faithfully.
589
+ - If audio is unclear, transcribe your best interpretation, even with strong accents. Always provide an approximation.
590
+ - If audio seems silent/empty, is too short to understand, or is completely incomprehensible, call transcriptionResult with "[inaudible audio]"
591
+ - The session context below is ONLY for understanding technical terms, file names, and function names. It may contain previous transcriptions — NEVER copy or reuse them. Always transcribe fresh from the current audio.
592
+
593
+ QUEUE DETECTION:
594
+ - If the user says "queue this message", "queue this", "add this to the queue", or similar phrasing indicating they want the message queued instead of sent immediately, set queueMessage to true.
595
+ - Remove the queue instruction from the transcription text itself — only include the actual message content.
596
+ - Example: "Queue this message. Fix the login bug in auth.ts" → transcription: "Fix the login bug in auth.ts", queueMessage: true
597
+ - If removing the queue phrase would leave empty content (user only said "queue this" with nothing else), keep the full spoken text as the transcription — never return an empty transcription.
598
+ - If no queue intent is detected, omit queueMessage or set it to false.
599
+ ${agents && agents.length > 0 ? `
600
+ AGENT SELECTION:
601
+ - If the user explicitly says "use the X agent", "switch to X agent", "with the X agent", or similar phrasing naming a specific agent, set the agent field to that agent name.
602
+ - Remove the agent instruction from the transcription text itself — only include the actual message content.
603
+ - Example: "Use the plan agent. Refactor the auth module" → transcription: "Refactor the auth module", agent: "plan"
604
+ - If removing the agent phrase would leave empty content, keep the full spoken text as the transcription.
605
+ - Only set agent if the user explicitly names one. Do not infer an agent from the task content.
606
+ - If no agent is mentioned, omit the agent field entirely.
607
+
608
+ Available agents:
609
+ ${agents.map((a) => { return `- ${a.name}${a.description ? `: ${a.description}` : ''}` }).join('\n')}
610
+ ` : ''}
611
+
612
+ Common corrections (apply without tool calls):
613
+ - "reacked" → "React", "jason" → "JSON", "get hub" → "GitHub", "no JS" → "Node.js", "dacker" → "Docker"
614
+
615
+ Project file structure:
616
+ <file_tree>
617
+ ${prompt}
618
+ </file_tree>
619
+ ${sessionContextSection}
620
+
621
+ REMEMBER: Call "transcriptionResult" tool with your transcription. This is mandatory.
622
+
623
+ Note: "critique" is a CLI tool for showing diffs in the browser.`
624
+
625
+ const agentNames = agents
626
+ ?.map((a) => { return a.name })
627
+ .filter((name) => { return name.length > 0 })
628
+
629
+ return runTranscriptionOnce({
630
+ model: languageModel,
631
+ prompt: transcriptionPrompt,
632
+ audioBase64: finalAudioBase64,
633
+ mediaType,
634
+ temperature: temperature ?? 0.3,
635
+ agentNames: agentNames && agentNames.length > 0 ? agentNames : undefined,
636
+ provider: resolvedProvider,
637
+ })
638
+ }