@oh-my-pi/pi-coding-agent 15.12.3 → 15.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (457) hide show
  1. package/CHANGELOG.md +347 -7
  2. package/dist/cli.js +1615 -1231
  3. package/dist/types/async/job-manager.d.ts +15 -0
  4. package/dist/types/autolearn/controller.d.ts +25 -0
  5. package/dist/types/autolearn/managed-skills.d.ts +45 -0
  6. package/dist/types/autoresearch/state.d.ts +1 -1
  7. package/dist/types/autoresearch/tools/init-experiment.d.ts +1 -1
  8. package/dist/types/autoresearch/tools/log-experiment.d.ts +1 -1
  9. package/dist/types/autoresearch/tools/run-experiment.d.ts +1 -1
  10. package/dist/types/autoresearch/tools/update-notes.d.ts +1 -1
  11. package/dist/types/autoresearch/types.d.ts +1 -1
  12. package/dist/types/cli/args.d.ts +19 -2
  13. package/dist/types/cli/models-cli.d.ts +49 -0
  14. package/dist/types/cli/session-picker.d.ts +1 -1
  15. package/dist/types/cli/setup-cli.d.ts +1 -1
  16. package/dist/types/cli/setup-model-picker.d.ts +14 -0
  17. package/dist/types/collab/protocol.d.ts +1 -1
  18. package/dist/types/commands/launch.d.ts +0 -3
  19. package/dist/types/commands/models.d.ts +33 -0
  20. package/dist/types/commands/say.d.ts +24 -0
  21. package/dist/types/commands/token.d.ts +25 -0
  22. package/dist/types/commit/agentic/tools/analyze-file.d.ts +1 -1
  23. package/dist/types/commit/agentic/tools/git-file-diff.d.ts +1 -1
  24. package/dist/types/commit/agentic/tools/git-hunk.d.ts +1 -1
  25. package/dist/types/commit/agentic/tools/git-overview.d.ts +1 -1
  26. package/dist/types/commit/agentic/tools/propose-changelog.d.ts +1 -1
  27. package/dist/types/commit/agentic/tools/propose-commit.d.ts +1 -1
  28. package/dist/types/commit/agentic/tools/recent-commits.d.ts +1 -1
  29. package/dist/types/commit/agentic/tools/schemas.d.ts +1 -1
  30. package/dist/types/commit/agentic/tools/split-commit.d.ts +1 -1
  31. package/dist/types/commit/changelog/generate.d.ts +1 -1
  32. package/dist/types/commit/shared-llm.d.ts +1 -1
  33. package/dist/types/config/keybindings.d.ts +3 -3
  34. package/dist/types/config/model-registry.d.ts +17 -0
  35. package/dist/types/config/models-config-schema.d.ts +13 -1
  36. package/dist/types/config/models-config.d.ts +8 -2
  37. package/dist/types/config/settings-schema.d.ts +281 -58
  38. package/dist/types/edit/hashline/params.d.ts +1 -1
  39. package/dist/types/edit/modes/apply-patch.d.ts +1 -1
  40. package/dist/types/edit/modes/patch.d.ts +1 -1
  41. package/dist/types/edit/modes/replace.d.ts +1 -1
  42. package/dist/types/export/html/index.d.ts +2 -1
  43. package/dist/types/extensibility/custom-commands/types.d.ts +2 -2
  44. package/dist/types/extensibility/custom-tools/types.d.ts +2 -2
  45. package/dist/types/extensibility/extensions/model-api.d.ts +17 -0
  46. package/dist/types/extensibility/extensions/runner.d.ts +3 -1
  47. package/dist/types/extensibility/extensions/types.d.ts +49 -3
  48. package/dist/types/extensibility/hooks/index.d.ts +2 -1
  49. package/dist/types/extensibility/hooks/types.d.ts +2 -2
  50. package/dist/types/extensibility/plugins/legacy-pi-compat.d.ts +9 -0
  51. package/dist/types/extensibility/plugins/loader.d.ts +11 -0
  52. package/dist/types/extensibility/shared-events.d.ts +1 -1
  53. package/dist/types/extensibility/skills.d.ts +10 -0
  54. package/dist/types/goals/guided-setup.d.ts +18 -0
  55. package/dist/types/goals/state.d.ts +1 -1
  56. package/dist/types/goals/tools/goal-tool.d.ts +1 -1
  57. package/dist/types/hindsight/transcript.d.ts +1 -1
  58. package/dist/types/index.d.ts +5 -0
  59. package/dist/types/internal-urls/local-protocol.d.ts +4 -2
  60. package/dist/types/lsp/types.d.ts +1 -1
  61. package/dist/types/main.d.ts +4 -3
  62. package/dist/types/mcp/manager.d.ts +8 -0
  63. package/dist/types/mcp/startup-events.d.ts +11 -0
  64. package/dist/types/memories/index.d.ts +7 -0
  65. package/dist/types/memory-backend/local-backend.d.ts +4 -3
  66. package/dist/types/mnemopi/config.d.ts +28 -0
  67. package/dist/types/modes/acp/acp-agent.d.ts +1 -2
  68. package/dist/types/modes/components/agent-hub.d.ts +6 -0
  69. package/dist/types/modes/components/assistant-message.d.ts +1 -2
  70. package/dist/types/modes/components/compaction-summary-message.d.ts +15 -1
  71. package/dist/types/modes/components/custom-editor.d.ts +39 -1
  72. package/dist/types/modes/components/custom-editor.test.d.ts +1 -0
  73. package/dist/types/modes/components/index.d.ts +1 -0
  74. package/dist/types/modes/components/logout-account-selector.d.ts +8 -0
  75. package/dist/types/modes/components/session-selector.d.ts +1 -1
  76. package/dist/types/modes/components/status-line/component.d.ts +9 -5
  77. package/dist/types/modes/components/status-line/types.d.ts +2 -1
  78. package/dist/types/modes/components/tool-execution.d.ts +26 -16
  79. package/dist/types/modes/components/transcript-container.d.ts +23 -2
  80. package/dist/types/modes/components/tree-selector.d.ts +1 -1
  81. package/dist/types/modes/components/usage-row.d.ts +3 -0
  82. package/dist/types/modes/controllers/command-controller.d.ts +2 -2
  83. package/dist/types/modes/controllers/event-controller.d.ts +0 -17
  84. package/dist/types/modes/controllers/input-controller.d.ts +14 -0
  85. package/dist/types/modes/controllers/selector-controller.d.ts +3 -1
  86. package/dist/types/modes/gradient-highlight.d.ts +9 -4
  87. package/dist/types/modes/image-references.d.ts +6 -0
  88. package/dist/types/modes/interactive-mode.d.ts +27 -6
  89. package/dist/types/modes/magic-keywords.d.ts +13 -1
  90. package/dist/types/modes/rpc/rpc-mode.d.ts +35 -1
  91. package/dist/types/modes/rpc/rpc-types.d.ts +9 -1
  92. package/dist/types/modes/runtime-init.d.ts +4 -0
  93. package/dist/types/modes/theme/theme.d.ts +13 -2
  94. package/dist/types/modes/types.d.ts +8 -7
  95. package/dist/types/modes/utils/ui-helpers.d.ts +1 -1
  96. package/dist/types/registry/agent-registry.d.ts +17 -0
  97. package/dist/types/secrets/obfuscator.d.ts +1 -1
  98. package/dist/types/session/agent-session.d.ts +28 -35
  99. package/dist/types/session/agent-storage.d.ts +2 -1
  100. package/dist/types/session/indexed-session-storage.d.ts +3 -3
  101. package/dist/types/session/messages.d.ts +8 -10
  102. package/dist/types/session/session-context.d.ts +39 -0
  103. package/dist/types/session/session-entries.d.ts +159 -0
  104. package/dist/types/session/session-listing.d.ts +69 -0
  105. package/dist/types/session/session-loader.d.ts +16 -0
  106. package/dist/types/session/session-manager.d.ts +85 -462
  107. package/dist/types/session/session-migrations.d.ts +12 -0
  108. package/dist/types/session/session-paths.d.ts +25 -0
  109. package/dist/types/session/session-persistence.d.ts +8 -0
  110. package/dist/types/session/session-storage.d.ts +11 -7
  111. package/dist/types/session/snapcompact-inline.d.ts +12 -1
  112. package/dist/types/session/snapcompact-savings-journal.d.ts +46 -0
  113. package/dist/types/session/tool-choice-queue.d.ts +6 -6
  114. package/dist/types/slash-commands/helpers/logout.d.ts +15 -0
  115. package/dist/types/stt/asr-client.d.ts +90 -0
  116. package/dist/types/stt/asr-protocol.d.ts +97 -0
  117. package/dist/types/stt/asr-worker.d.ts +2 -0
  118. package/dist/types/stt/downloader.d.ts +38 -0
  119. package/dist/types/stt/endpointer.d.ts +59 -0
  120. package/dist/types/stt/index.d.ts +5 -1
  121. package/dist/types/stt/models.d.ts +120 -0
  122. package/dist/types/stt/recorder.d.ts +17 -0
  123. package/dist/types/stt/stt-controller.d.ts +6 -0
  124. package/dist/types/stt/transcriber.d.ts +5 -7
  125. package/dist/types/stt/wav.d.ts +29 -0
  126. package/dist/types/system-prompt.d.ts +4 -0
  127. package/dist/types/task/executor.d.ts +2 -0
  128. package/dist/types/task/index.d.ts +9 -1
  129. package/dist/types/task/types.d.ts +37 -1
  130. package/dist/types/tools/ask.d.ts +1 -1
  131. package/dist/types/tools/ast-edit.d.ts +1 -1
  132. package/dist/types/tools/ast-grep.d.ts +1 -1
  133. package/dist/types/tools/bash.d.ts +3 -3
  134. package/dist/types/tools/browser/cmux/cmux-tab.d.ts +202 -0
  135. package/dist/types/tools/browser/cmux/rpc.d.ts +70 -0
  136. package/dist/types/tools/browser/cmux/socket-client.d.ts +19 -0
  137. package/dist/types/tools/browser/registry.d.ts +16 -3
  138. package/dist/types/tools/browser/render.d.ts +2 -0
  139. package/dist/types/tools/browser/tab-protocol.d.ts +2 -0
  140. package/dist/types/tools/browser/tab-supervisor.d.ts +16 -4
  141. package/dist/types/tools/browser.d.ts +3 -1
  142. package/dist/types/tools/checkpoint.d.ts +1 -1
  143. package/dist/types/tools/debug.d.ts +1 -1
  144. package/dist/types/tools/eval-render.d.ts +1 -1
  145. package/dist/types/tools/eval.d.ts +1 -1
  146. package/dist/types/tools/find.d.ts +1 -1
  147. package/dist/types/tools/gh.d.ts +1 -1
  148. package/dist/types/tools/image-gen.d.ts +1 -1
  149. package/dist/types/tools/index.d.ts +14 -2
  150. package/dist/types/tools/inspect-image.d.ts +1 -1
  151. package/dist/types/tools/irc.d.ts +2 -1
  152. package/dist/types/tools/job.d.ts +1 -1
  153. package/dist/types/tools/learn.d.ts +51 -0
  154. package/dist/types/tools/manage-skill.d.ts +40 -0
  155. package/dist/types/tools/memory-edit.d.ts +1 -1
  156. package/dist/types/tools/memory-recall.d.ts +1 -1
  157. package/dist/types/tools/memory-reflect.d.ts +1 -1
  158. package/dist/types/tools/memory-retain.d.ts +1 -1
  159. package/dist/types/tools/plan-mode-guard.d.ts +10 -0
  160. package/dist/types/tools/read.d.ts +1 -1
  161. package/dist/types/tools/render-mermaid.d.ts +1 -1
  162. package/dist/types/tools/renderers.d.ts +7 -11
  163. package/dist/types/tools/resolve.d.ts +1 -1
  164. package/dist/types/tools/review.d.ts +1 -1
  165. package/dist/types/tools/search-tool-bm25.d.ts +1 -1
  166. package/dist/types/tools/search.d.ts +1 -1
  167. package/dist/types/tools/ssh.d.ts +2 -2
  168. package/dist/types/tools/todo.d.ts +2 -2
  169. package/dist/types/tools/tts.d.ts +26 -1
  170. package/dist/types/tools/write.d.ts +2 -2
  171. package/dist/types/tts/downloader.d.ts +20 -0
  172. package/dist/types/tts/index.d.ts +8 -0
  173. package/dist/types/tts/models.d.ts +82 -0
  174. package/dist/types/tts/player.d.ts +32 -0
  175. package/dist/types/tts/runtime.d.ts +6 -0
  176. package/dist/types/tts/streaming-player.d.ts +41 -0
  177. package/dist/types/tts/tts-client.d.ts +93 -0
  178. package/dist/types/tts/tts-protocol.d.ts +95 -0
  179. package/dist/types/tts/tts-worker.d.ts +2 -0
  180. package/dist/types/tts/vocalizer.d.ts +41 -0
  181. package/dist/types/tts/wav.d.ts +8 -0
  182. package/dist/types/utils/clipboard.d.ts +4 -3
  183. package/dist/types/utils/image-loading.d.ts +18 -1
  184. package/dist/types/utils/thinking-display.d.ts +17 -0
  185. package/dist/types/utils/tool-choice.d.ts +8 -0
  186. package/dist/types/utils/tools-manager.d.ts +2 -1
  187. package/dist/types/utils/tools-manager.test.d.ts +1 -0
  188. package/dist/types/web/scrapers/github.d.ts +1 -1
  189. package/dist/types/web/search/index.d.ts +1 -1
  190. package/package.json +17 -16
  191. package/src/async/job-manager.ts +49 -0
  192. package/src/autolearn/controller.ts +139 -0
  193. package/src/autolearn/managed-skills.ts +257 -0
  194. package/src/autoresearch/state.ts +1 -1
  195. package/src/autoresearch/storage.ts +2 -1
  196. package/src/autoresearch/tools/init-experiment.ts +1 -1
  197. package/src/autoresearch/tools/log-experiment.ts +1 -1
  198. package/src/autoresearch/tools/run-experiment.ts +1 -1
  199. package/src/autoresearch/tools/update-notes.ts +1 -1
  200. package/src/autoresearch/types.ts +1 -1
  201. package/src/cli/args.ts +56 -10
  202. package/src/cli/auth-gateway-cli.ts +1 -1
  203. package/src/cli/bench-cli.ts +1 -1
  204. package/src/cli/dry-balance-cli.ts +1 -1
  205. package/src/cli/models-cli.ts +427 -0
  206. package/src/cli/session-picker.ts +2 -1
  207. package/src/cli/setup-cli.ts +148 -47
  208. package/src/cli/setup-model-picker.ts +43 -0
  209. package/src/cli-commands.ts +3 -0
  210. package/src/cli.ts +45 -13
  211. package/src/collab/host.ts +10 -13
  212. package/src/collab/protocol.ts +1 -1
  213. package/src/commands/launch.ts +0 -3
  214. package/src/commands/models.ts +61 -0
  215. package/src/commands/say.ts +102 -0
  216. package/src/commands/setup.ts +1 -1
  217. package/src/commands/token.ts +89 -0
  218. package/src/commit/agentic/tools/analyze-file.ts +4 -1
  219. package/src/commit/agentic/tools/git-file-diff.ts +1 -1
  220. package/src/commit/agentic/tools/git-hunk.ts +1 -1
  221. package/src/commit/agentic/tools/git-overview.ts +1 -1
  222. package/src/commit/agentic/tools/propose-changelog.ts +1 -1
  223. package/src/commit/agentic/tools/propose-commit.ts +1 -1
  224. package/src/commit/agentic/tools/recent-commits.ts +1 -1
  225. package/src/commit/agentic/tools/schemas.ts +1 -1
  226. package/src/commit/agentic/tools/split-commit.ts +1 -1
  227. package/src/commit/analysis/summary.ts +1 -1
  228. package/src/commit/changelog/generate.ts +1 -1
  229. package/src/commit/shared-llm.ts +1 -1
  230. package/src/config/keybindings.ts +2 -2
  231. package/src/config/model-discovery.ts +11 -5
  232. package/src/config/model-registry.ts +79 -21
  233. package/src/config/model-resolver.ts +2 -2
  234. package/src/config/models-config-schema.ts +5 -2
  235. package/src/config/models-config.ts +2 -1
  236. package/src/config/settings-schema.ts +266 -32
  237. package/src/config/settings.ts +10 -0
  238. package/src/discovery/builtin.ts +23 -1
  239. package/src/discovery/claude-plugins.ts +44 -5
  240. package/src/discovery/helpers.ts +41 -1
  241. package/src/edit/hashline/params.ts +1 -1
  242. package/src/edit/modes/apply-patch.ts +1 -1
  243. package/src/edit/modes/patch.ts +1 -1
  244. package/src/edit/modes/replace.ts +1 -1
  245. package/src/eval/__tests__/budget-bridge.test.ts +1 -1
  246. package/src/eval/agent-bridge.ts +1 -1
  247. package/src/eval/completion-bridge.ts +1 -1
  248. package/src/eval/js/shared/prelude.txt +69 -17
  249. package/src/export/html/index.ts +3 -6
  250. package/src/export/html/template.js +24 -2
  251. package/src/export/html/tool-views.generated.js +2 -2
  252. package/src/extensibility/custom-commands/loader.ts +1 -1
  253. package/src/extensibility/custom-commands/types.ts +2 -2
  254. package/src/extensibility/custom-tools/loader.ts +1 -1
  255. package/src/extensibility/custom-tools/types.ts +2 -2
  256. package/src/extensibility/extensions/loader.ts +2 -2
  257. package/src/extensibility/extensions/model-api.ts +41 -0
  258. package/src/extensibility/extensions/runner.ts +4 -0
  259. package/src/extensibility/extensions/types.ts +54 -3
  260. package/src/extensibility/extensions/wrapper.ts +41 -5
  261. package/src/extensibility/hooks/index.ts +2 -1
  262. package/src/extensibility/hooks/loader.ts +1 -1
  263. package/src/extensibility/hooks/types.ts +2 -2
  264. package/src/extensibility/plugins/legacy-pi-compat.ts +43 -13
  265. package/src/extensibility/plugins/loader.ts +30 -19
  266. package/src/extensibility/plugins/manager.ts +221 -90
  267. package/src/extensibility/shared-events.ts +1 -1
  268. package/src/extensibility/skills.ts +101 -5
  269. package/src/goals/guided-setup.ts +133 -0
  270. package/src/goals/state.ts +1 -1
  271. package/src/goals/tools/goal-tool.ts +1 -1
  272. package/src/hindsight/transcript.ts +1 -1
  273. package/src/index.ts +5 -0
  274. package/src/internal-urls/docs-index.generated.ts +13 -10
  275. package/src/internal-urls/history-protocol.ts +1 -1
  276. package/src/internal-urls/local-protocol.ts +29 -7
  277. package/src/lsp/types.ts +1 -1
  278. package/src/main.ts +27 -32
  279. package/src/mcp/config-writer.ts +7 -3
  280. package/src/mcp/manager.ts +11 -0
  281. package/src/mcp/startup-events.ts +21 -0
  282. package/src/mcp/transports/stdio.ts +2 -1
  283. package/src/memories/index.ts +149 -12
  284. package/src/memories/storage.ts +2 -1
  285. package/src/memory-backend/local-backend.ts +11 -5
  286. package/src/mnemopi/backend.ts +1 -0
  287. package/src/mnemopi/config.ts +112 -12
  288. package/src/modes/acp/acp-agent.ts +8 -53
  289. package/src/modes/acp/acp-event-mapper.ts +5 -1
  290. package/src/modes/components/agent-hub.ts +51 -5
  291. package/src/modes/components/assistant-message.ts +12 -44
  292. package/src/modes/components/compaction-summary-message.ts +125 -26
  293. package/src/modes/components/custom-editor.test.ts +96 -0
  294. package/src/modes/components/custom-editor.ts +164 -8
  295. package/src/modes/components/index.ts +1 -0
  296. package/src/modes/components/logout-account-selector.ts +130 -0
  297. package/src/modes/components/mcp-add-wizard.ts +1 -1
  298. package/src/modes/components/model-selector.ts +2 -2
  299. package/src/modes/components/session-selector.ts +1 -1
  300. package/src/modes/components/settings-defs.ts +7 -0
  301. package/src/modes/components/status-line/component.ts +54 -157
  302. package/src/modes/components/status-line/segments.ts +1 -1
  303. package/src/modes/components/status-line/types.ts +2 -1
  304. package/src/modes/components/tool-execution.ts +82 -43
  305. package/src/modes/components/transcript-container.ts +70 -1
  306. package/src/modes/components/tree-selector.ts +1 -1
  307. package/src/modes/components/usage-row.ts +18 -0
  308. package/src/modes/components/user-message.ts +4 -2
  309. package/src/modes/controllers/command-controller.ts +14 -16
  310. package/src/modes/controllers/event-controller.ts +101 -73
  311. package/src/modes/controllers/extension-ui-controller.ts +6 -0
  312. package/src/modes/controllers/input-controller.ts +311 -57
  313. package/src/modes/controllers/mcp-command-controller.ts +44 -3
  314. package/src/modes/controllers/selector-controller.ts +68 -12
  315. package/src/modes/controllers/streaming-reveal.ts +4 -3
  316. package/src/modes/gradient-highlight.ts +21 -9
  317. package/src/modes/image-references.ts +20 -0
  318. package/src/modes/interactive-mode.ts +288 -48
  319. package/src/modes/magic-keywords.ts +27 -5
  320. package/src/modes/rpc/rpc-mode.ts +146 -14
  321. package/src/modes/rpc/rpc-subagents.ts +2 -2
  322. package/src/modes/rpc/rpc-types.ts +8 -2
  323. package/src/modes/runtime-init.ts +28 -3
  324. package/src/modes/theme/theme.ts +99 -51
  325. package/src/modes/types.ts +6 -7
  326. package/src/modes/utils/hotkeys-markdown.ts +1 -1
  327. package/src/modes/utils/ui-helpers.ts +36 -7
  328. package/src/priority.json +5 -1
  329. package/src/prompts/agents/task.md +1 -0
  330. package/src/prompts/goals/guided-goal-interview.md +8 -0
  331. package/src/prompts/goals/guided-goal-system.md +12 -0
  332. package/src/prompts/memories/read-path.md +6 -0
  333. package/src/prompts/system/autolearn-guidance-learn.md +1 -0
  334. package/src/prompts/system/autolearn-guidance.md +7 -0
  335. package/src/prompts/system/autolearn-nudge.md +3 -0
  336. package/src/prompts/system/eager-task.md +7 -0
  337. package/src/prompts/system/eager-todo.md +11 -6
  338. package/src/prompts/system/empty-stop-retry.md +4 -6
  339. package/src/prompts/system/subagent-system-prompt.md +4 -0
  340. package/src/prompts/system/system-prompt.md +10 -5
  341. package/src/prompts/system/title-marker-instruction.md +1 -0
  342. package/src/prompts/system/title-system-marker.md +16 -0
  343. package/src/prompts/tools/job.md +1 -0
  344. package/src/prompts/tools/learn.md +7 -0
  345. package/src/prompts/tools/manage-skill.md +9 -0
  346. package/src/prompts/tools/task.md +3 -0
  347. package/src/registry/agent-registry.ts +30 -0
  348. package/src/sdk.ts +103 -43
  349. package/src/secrets/obfuscator.ts +1 -1
  350. package/src/session/agent-session.ts +331 -318
  351. package/src/session/agent-storage.ts +18 -9
  352. package/src/session/history-storage.ts +3 -2
  353. package/src/session/indexed-session-storage.ts +7 -10
  354. package/src/session/messages.ts +9 -11
  355. package/src/session/session-context.ts +352 -0
  356. package/src/session/session-dump-format.ts +4 -2
  357. package/src/session/session-entries.ts +194 -0
  358. package/src/session/session-listing.ts +588 -0
  359. package/src/session/session-loader.ts +106 -0
  360. package/src/session/session-manager.ts +968 -3064
  361. package/src/session/session-migrations.ts +78 -0
  362. package/src/session/session-paths.ts +193 -0
  363. package/src/session/session-persistence.ts +131 -0
  364. package/src/session/session-storage.ts +91 -30
  365. package/src/session/snapcompact-inline.ts +21 -1
  366. package/src/session/snapcompact-savings-journal.ts +113 -0
  367. package/src/session/tool-choice-queue.ts +23 -11
  368. package/src/slash-commands/builtin-registry.ts +40 -4
  369. package/src/slash-commands/helpers/logout.ts +88 -0
  370. package/src/stt/asr-client.ts +520 -0
  371. package/src/stt/asr-protocol.ts +65 -0
  372. package/src/stt/asr-worker.ts +790 -0
  373. package/src/stt/downloader.ts +107 -47
  374. package/src/stt/endpointer.ts +259 -0
  375. package/src/stt/index.ts +5 -1
  376. package/src/stt/models.ts +150 -0
  377. package/src/stt/recorder.ts +247 -60
  378. package/src/stt/stt-controller.ts +201 -22
  379. package/src/stt/transcriber.ts +37 -68
  380. package/src/stt/wav.ts +173 -0
  381. package/src/system-prompt.ts +8 -0
  382. package/src/task/agents.ts +1 -2
  383. package/src/task/executor.ts +49 -15
  384. package/src/task/index.ts +60 -6
  385. package/src/task/render.ts +83 -8
  386. package/src/task/types.ts +54 -1
  387. package/src/tools/ask.ts +9 -1
  388. package/src/tools/ast-edit.ts +1 -1
  389. package/src/tools/ast-grep.ts +1 -1
  390. package/src/tools/bash.ts +5 -4
  391. package/src/tools/browser/cmux/cmux-tab.ts +1264 -0
  392. package/src/tools/browser/cmux/rpc.ts +156 -0
  393. package/src/tools/browser/cmux/socket-client.ts +309 -0
  394. package/src/tools/browser/registry.ts +37 -3
  395. package/src/tools/browser/render.ts +6 -1
  396. package/src/tools/browser/tab-protocol.ts +2 -0
  397. package/src/tools/browser/tab-supervisor.ts +189 -18
  398. package/src/tools/browser/tab-worker.ts +1 -1
  399. package/src/tools/browser.ts +16 -1
  400. package/src/tools/checkpoint.ts +1 -1
  401. package/src/tools/debug.ts +1 -1
  402. package/src/tools/eval-render.ts +4 -3
  403. package/src/tools/eval.ts +11 -6
  404. package/src/tools/fetch.ts +13 -2
  405. package/src/tools/find.ts +1 -1
  406. package/src/tools/gh.ts +1 -1
  407. package/src/tools/github-cache.ts +2 -1
  408. package/src/tools/image-gen.ts +1 -1
  409. package/src/tools/index.ts +43 -5
  410. package/src/tools/inspect-image.ts +3 -1
  411. package/src/tools/irc.ts +11 -3
  412. package/src/tools/job.ts +15 -3
  413. package/src/tools/learn.ts +144 -0
  414. package/src/tools/manage-skill.ts +104 -0
  415. package/src/tools/memory-edit.ts +1 -1
  416. package/src/tools/memory-recall.ts +1 -1
  417. package/src/tools/memory-reflect.ts +1 -1
  418. package/src/tools/memory-retain.ts +1 -1
  419. package/src/tools/plan-mode-guard.ts +53 -19
  420. package/src/tools/read.ts +8 -2
  421. package/src/tools/render-mermaid.ts +1 -1
  422. package/src/tools/renderers.ts +7 -11
  423. package/src/tools/report-tool-issue.ts +3 -2
  424. package/src/tools/resolve.ts +1 -1
  425. package/src/tools/review.ts +1 -1
  426. package/src/tools/search-tool-bm25.ts +1 -1
  427. package/src/tools/search.ts +1 -1
  428. package/src/tools/ssh.ts +5 -4
  429. package/src/tools/todo.ts +2 -2
  430. package/src/tools/tts.ts +204 -93
  431. package/src/tools/write.ts +19 -3
  432. package/src/tts/downloader.ts +64 -0
  433. package/src/tts/index.ts +8 -0
  434. package/src/tts/models.ts +137 -0
  435. package/src/tts/player.ts +137 -0
  436. package/src/tts/runtime.ts +21 -0
  437. package/src/tts/streaming-player.ts +266 -0
  438. package/src/tts/tts-client.ts +647 -0
  439. package/src/tts/tts-protocol.ts +60 -0
  440. package/src/tts/tts-worker.ts +497 -0
  441. package/src/tts/vocalizer.ts +162 -0
  442. package/src/tts/wav.ts +58 -0
  443. package/src/utils/clipboard.ts +35 -18
  444. package/src/utils/image-loading.ts +35 -4
  445. package/src/utils/thinking-display.ts +37 -0
  446. package/src/utils/title-generator.ts +48 -5
  447. package/src/utils/tool-choice.ts +16 -0
  448. package/src/utils/tools-manager.test.ts +25 -0
  449. package/src/utils/tools-manager.ts +19 -1
  450. package/src/web/scrapers/github.ts +96 -0
  451. package/src/web/search/index.ts +14 -1
  452. package/src/web/search/providers/searxng.ts +13 -1
  453. package/dist/types/cli/list-models.d.ts +0 -30
  454. package/dist/types/stt/setup.d.ts +0 -18
  455. package/src/cli/list-models.ts +0 -194
  456. package/src/stt/setup.ts +0 -52
  457. package/src/stt/transcribe.py +0 -70
@@ -0,0 +1,497 @@
1
+ import { createRequire } from "node:module";
2
+ import * as path from "node:path";
3
+ import type { ProgressInfo, RawAudio } from "@huggingface/transformers";
4
+ import {
5
+ ensureRuntimeInstalled,
6
+ getTinyModelsCacheDir,
7
+ installRuntimeModuleResolver,
8
+ resolveRuntimeModule,
9
+ } from "@oh-my-pi/pi-utils";
10
+ import { resolveTinyModelDevicePreference, type TinyModelDevice, tinyModelDeviceLoadOrder } from "../tiny/device";
11
+ import { resolveTinyModelDtypeOverride, type TinyModelDtype } from "../tiny/dtype";
12
+ import { getTtsLocalModelSpec, resolveTtsVoice, type TtsLocalModelKey, type TtsLocalModelSpec } from "./models";
13
+ import {
14
+ getTtsRuntimeDir,
15
+ KOKORO_PACKAGE,
16
+ KOKORO_VERSION,
17
+ ONNXRUNTIME_NODE_PACKAGE,
18
+ ONNXRUNTIME_NODE_VERSION,
19
+ } from "./runtime";
20
+ import type { TtsProgressEvent, TtsTransport, TtsWorkerInbound } from "./tts-protocol";
21
+
22
+ const TTS_TASK = "text-to-speech";
23
+ const TRANSFORMERS_PACKAGE = "@huggingface/transformers";
24
+ // kokoro-js is NEVER a dependency of the main tree: its transformers@3.8.1 +
25
+ // onnxruntime-node@1.21 graph must not pollute it (1.21 segfaults Bun on session
26
+ // creation). It is lazily `bun install`ed into a side runtime dir on first use,
27
+ // with onnxruntime-node force-pinned to the Bun-safe version the rest of the
28
+ // stack runs. Bump KOKORO_VERSION to roll the cached runtime + model wrapper.
29
+
30
+ const ttsDevicePreference = resolveTinyModelDevicePreference();
31
+ const ttsDtypeOverride = resolveTinyModelDtypeOverride();
32
+
33
+ /** Device values `kokoro-js` accepts; the tiny device order is mapped onto these. */
34
+ type KokoroDevice = "cpu" | "wasm" | "webgpu";
35
+
36
+ /** A loaded Kokoro voice synthesizer (subset of `kokoro-js`'s `KokoroTTS`). */
37
+ interface KokoroTtsInstance {
38
+ generate(text: string, options: { voice: string }): Promise<RawAudio>;
39
+ stream(
40
+ text: string | TextSplitterStreamInstance,
41
+ options: { voice: string },
42
+ ): AsyncGenerator<{ text: string; phonemes: string; audio: RawAudio }, void, void>;
43
+ }
44
+
45
+ /**
46
+ * Incremental text source for {@link KokoroTtsInstance.stream} (subset of
47
+ * `kokoro-js`'s `TextSplitterStream`). Text pushed at any time is split into
48
+ * complete sentences; `close` flushes the trailing buffer and ends the stream.
49
+ */
50
+ interface TextSplitterStreamInstance {
51
+ push(...texts: string[]): void;
52
+ close(): void;
53
+ }
54
+
55
+ /** `KokoroTTS` static surface used to load a model from the Hugging Face Hub. */
56
+ interface KokoroRuntime {
57
+ KokoroTTS: {
58
+ from_pretrained(
59
+ repo: string,
60
+ options: {
61
+ dtype: TinyModelDtype;
62
+ device: KokoroDevice;
63
+ progress_callback: (info: ProgressInfo) => void;
64
+ },
65
+ ): Promise<KokoroTtsInstance>;
66
+ };
67
+ TextSplitterStream: new () => TextSplitterStreamInstance;
68
+ }
69
+
70
+ /**
71
+ * The `@huggingface/transformers` instance `kokoro-js` runs on. We only touch its
72
+ * `env` (cache dir + log level) and `LogLevel`; inference goes through Kokoro.
73
+ */
74
+ interface TransformersEnv {
75
+ env: {
76
+ cacheDir?: string;
77
+ allowLocalModels?: boolean;
78
+ logLevel?: unknown;
79
+ };
80
+ LogLevel: {
81
+ ERROR: unknown;
82
+ };
83
+ }
84
+
85
+ const models = new Map<TtsLocalModelKey, Promise<KokoroTtsInstance>>();
86
+ let synthesizeQueue = Promise.resolve();
87
+ let kokoroRuntime: Promise<KokoroRuntime> | null = null;
88
+
89
+ /**
90
+ * In-flight streaming sessions keyed by request id. A session is created on
91
+ * `stream-start` and torn down when its generator finishes. Text pushed before
92
+ * the model finishes loading is held in `buffered` and flushed into the splitter
93
+ * once it exists; pushes after that go straight to the live splitter.
94
+ */
95
+ interface StreamSession {
96
+ modelKey: TtsLocalModelKey;
97
+ voice: string | undefined;
98
+ buffered: string[];
99
+ splitter: TextSplitterStreamInstance | null;
100
+ ended: boolean;
101
+ cancelled: boolean;
102
+ }
103
+ const streamSessions = new Map<string, StreamSession>();
104
+
105
+ function errorText(error: unknown): string {
106
+ return error instanceof Error ? (error.stack ?? error.message) : String(error);
107
+ }
108
+
109
+ function errorMessage(error: unknown): string {
110
+ return error instanceof Error ? error.message : String(error);
111
+ }
112
+
113
+ function sendLog(
114
+ transport: TtsTransport,
115
+ level: "debug" | "warn" | "error",
116
+ msg: string,
117
+ meta?: Record<string, unknown>,
118
+ ): void {
119
+ transport.send({ type: "log", level, msg, meta });
120
+ }
121
+
122
+ function sendRuntimeInstallProgress(
123
+ transport: TtsTransport,
124
+ requestId: string,
125
+ modelKey: TtsLocalModelKey,
126
+ status: "initiate" | "download" | "done",
127
+ ): void {
128
+ transport.send({
129
+ type: "progress",
130
+ id: requestId,
131
+ event: { modelKey, status, name: `${KOKORO_PACKAGE}@${KOKORO_VERSION}` },
132
+ });
133
+ }
134
+
135
+ /**
136
+ * Map a tiny-model device onto the narrow set `kokoro-js` accepts. The worker
137
+ * always runs `kokoro-js` on Node, where `cpu` (onnxruntime-node) is the only
138
+ * safe option; `webgpu`/`wasm` are honored if explicitly requested.
139
+ */
140
+ function toKokoroDevice(device: TinyModelDevice): KokoroDevice {
141
+ if (device === "wasm") return "wasm";
142
+ if (device === "webgpu" || device === "gpu") return "webgpu";
143
+ return "cpu";
144
+ }
145
+
146
+ function configureTransformers(transformers: TransformersEnv): void {
147
+ transformers.env.cacheDir = getTinyModelsCacheDir();
148
+ transformers.env.allowLocalModels = false;
149
+ transformers.env.logLevel = transformers.LogLevel.ERROR;
150
+ }
151
+
152
+ /**
153
+ * Lazily `bun install` `kokoro-js` into a side runtime dir (idempotent, version-
154
+ * keyed) and return its module, with the `@huggingface/transformers` instance it
155
+ * loads configured (cache dir + quiet logging). `kokoro-js` is NEVER a dependency
156
+ * of the main tree: its transformers@3.8.1 graph pulls onnxruntime-node@1.21,
157
+ * which segfaults Bun on session creation, so the runtime manifest force-pins
158
+ * onnxruntime-node to the Bun-safe version via `overrides`. `sharp` is stubbed —
159
+ * the TTS pipeline is audio-only, so the native image codec transformers eagerly
160
+ * requires is dead weight. Memoized so the runtime loads once per process.
161
+ */
162
+ async function loadKokoroRuntime(
163
+ transport: TtsTransport,
164
+ requestId: string,
165
+ modelKey: TtsLocalModelKey,
166
+ ): Promise<KokoroRuntime> {
167
+ if (kokoroRuntime) return kokoroRuntime;
168
+ kokoroRuntime = (async () => {
169
+ const runtimeDir = await ensureRuntimeInstalled({
170
+ runtimeDir: getTtsRuntimeDir(),
171
+ install: {
172
+ dependencies: { [KOKORO_PACKAGE]: KOKORO_VERSION },
173
+ overrides: { [ONNXRUNTIME_NODE_PACKAGE]: ONNXRUNTIME_NODE_VERSION },
174
+ trustedDependencies: [ONNXRUNTIME_NODE_PACKAGE],
175
+ },
176
+ probePackage: KOKORO_PACKAGE,
177
+ onPhase: phase => sendRuntimeInstallProgress(transport, requestId, modelKey, phase),
178
+ });
179
+ const nodeModules = path.join(runtimeDir, "node_modules");
180
+ const sharpStub = path.join(runtimeDir, "omp-sharp-stub.cjs");
181
+ await Bun.write(sharpStub, "module.exports = {};\n");
182
+ installRuntimeModuleResolver({ runtimeNodeModules: nodeModules, stubs: { sharp: sharpStub } });
183
+ const kokoroEntry = resolveRuntimeModule(nodeModules, KOKORO_PACKAGE);
184
+ if (!kokoroEntry) throw new Error(`Unable to resolve ${KOKORO_PACKAGE} in runtime at ${nodeModules}`);
185
+ const entryRequire = createRequire(kokoroEntry);
186
+ configureTransformers(entryRequire(TRANSFORMERS_PACKAGE) as TransformersEnv);
187
+ return entryRequire(kokoroEntry) as KokoroRuntime;
188
+ })().catch(error => {
189
+ kokoroRuntime = null;
190
+ throw error;
191
+ });
192
+ return kokoroRuntime;
193
+ }
194
+
195
+ function toProgressEvent(modelKey: TtsLocalModelKey, info: ProgressInfo): TtsProgressEvent {
196
+ if (info.status === "ready") {
197
+ return { modelKey, status: info.status, task: info.task, model: info.model };
198
+ }
199
+ if (info.status === "progress_total") {
200
+ return {
201
+ modelKey,
202
+ status: info.status,
203
+ name: info.name,
204
+ progress: info.progress,
205
+ loaded: info.loaded,
206
+ total: info.total,
207
+ files: info.files,
208
+ };
209
+ }
210
+ if (info.status === "progress") {
211
+ return {
212
+ modelKey,
213
+ status: info.status,
214
+ name: info.name,
215
+ file: info.file,
216
+ progress: info.progress,
217
+ loaded: info.loaded,
218
+ total: info.total,
219
+ };
220
+ }
221
+ return { modelKey, status: info.status, name: info.name, file: info.file };
222
+ }
223
+
224
+ function sendProgress(transport: TtsTransport, id: string, modelKey: TtsLocalModelKey, info: ProgressInfo): void {
225
+ transport.send({ type: "progress", id, event: toProgressEvent(modelKey, info) });
226
+ }
227
+
228
+ async function loadModelOnDevice(
229
+ runtime: KokoroRuntime,
230
+ spec: TtsLocalModelSpec,
231
+ modelKey: TtsLocalModelKey,
232
+ transport: TtsTransport,
233
+ requestId: string,
234
+ device: KokoroDevice,
235
+ ): Promise<KokoroTtsInstance> {
236
+ return runtime.KokoroTTS.from_pretrained(spec.repo, {
237
+ device,
238
+ dtype: ttsDtypeOverride ?? spec.dtype,
239
+ progress_callback: info => sendProgress(transport, requestId, modelKey, info),
240
+ });
241
+ }
242
+
243
+ async function loadModelWithDeviceFallback(
244
+ runtime: KokoroRuntime,
245
+ spec: TtsLocalModelSpec,
246
+ modelKey: TtsLocalModelKey,
247
+ transport: TtsTransport,
248
+ requestId: string,
249
+ ): Promise<{ model: KokoroTtsInstance; device: KokoroDevice }> {
250
+ const order = tinyModelDeviceLoadOrder(ttsDevicePreference);
251
+ if (order[0] !== ttsDevicePreference.device) {
252
+ sendLog(transport, "warn", "tts: requested device is unsafe in the worker; using CPU", {
253
+ modelKey,
254
+ repo: spec.repo,
255
+ requestedDevice: ttsDevicePreference.device,
256
+ device: order[0],
257
+ });
258
+ }
259
+ const devices: KokoroDevice[] = [];
260
+ for (const device of order) {
261
+ const mapped = toKokoroDevice(device);
262
+ if (!devices.includes(mapped)) devices.push(mapped);
263
+ }
264
+ for (let i = 0; i < devices.length; i += 1) {
265
+ const device = devices[i]!;
266
+ try {
267
+ return { model: await loadModelOnDevice(runtime, spec, modelKey, transport, requestId, device), device };
268
+ } catch (error) {
269
+ if (i === devices.length - 1) throw error;
270
+ const fallbackDevice = devices[i + 1]!;
271
+ sendLog(transport, "warn", "tts: accelerated device failed; falling back", {
272
+ modelKey,
273
+ repo: spec.repo,
274
+ device,
275
+ fallbackDevice,
276
+ error: errorMessage(error),
277
+ });
278
+ }
279
+ }
280
+ throw new Error("No TTS devices configured");
281
+ }
282
+
283
+ async function loadModel(
284
+ modelKey: TtsLocalModelKey,
285
+ transport: TtsTransport,
286
+ requestId: string,
287
+ ): Promise<KokoroTtsInstance> {
288
+ const spec = getTtsLocalModelSpec(modelKey);
289
+ if (!spec) throw new Error(`Unknown local TTS model: ${modelKey}`);
290
+ const cached = models.get(modelKey);
291
+ if (cached) {
292
+ void cached
293
+ .then(() => {
294
+ transport.send({
295
+ type: "progress",
296
+ id: requestId,
297
+ event: { modelKey, status: "ready", task: TTS_TASK, model: spec.repo },
298
+ });
299
+ })
300
+ .catch(() => undefined);
301
+ return cached;
302
+ }
303
+
304
+ const runtime = await loadKokoroRuntime(transport, requestId, modelKey);
305
+ const startedAt = performance.now();
306
+ const loaded = loadModelWithDeviceFallback(runtime, spec, modelKey, transport, requestId).then(
307
+ ({ model, device }) => {
308
+ sendLog(transport, "debug", "tts: local model loaded", {
309
+ modelKey,
310
+ repo: spec.repo,
311
+ device,
312
+ requestedDevice: ttsDevicePreference.device,
313
+ dtype: ttsDtypeOverride ?? spec.dtype,
314
+ elapsedMs: Math.round(performance.now() - startedAt),
315
+ });
316
+ transport.send({
317
+ type: "progress",
318
+ id: requestId,
319
+ event: { modelKey, status: "ready", task: TTS_TASK, model: spec.repo },
320
+ });
321
+ return model;
322
+ },
323
+ error => {
324
+ models.delete(modelKey);
325
+ throw error;
326
+ },
327
+ );
328
+ models.set(modelKey, loaded);
329
+ return loaded;
330
+ }
331
+
332
+ async function synthesize(
333
+ transport: TtsTransport,
334
+ requestId: string,
335
+ modelKey: TtsLocalModelKey,
336
+ text: string,
337
+ voice: string | undefined,
338
+ ): Promise<{ pcm: Float32Array; sampleRate: number }> {
339
+ const synthesizer = await loadModel(modelKey, transport, requestId);
340
+ const output = await synthesizer.generate(text, { voice: resolveTtsVoice(modelKey, voice) });
341
+ const spec = getTtsLocalModelSpec(modelKey);
342
+ const audio = Array.isArray(output.audio) ? output.audio[0] : output.audio;
343
+ if (!audio) throw new Error("Kokoro synthesis returned no audio samples");
344
+ return { pcm: audio, sampleRate: output.sampling_rate || spec?.sampleRate || 24_000 };
345
+ }
346
+
347
+ function enqueueRequest(
348
+ transport: TtsTransport,
349
+ request: Extract<TtsWorkerInbound, { type: "synthesize" | "download" }>,
350
+ ): void {
351
+ synthesizeQueue = synthesizeQueue.then(
352
+ async () => {
353
+ await handleQueuedRequest(transport, request);
354
+ },
355
+ async () => {
356
+ await handleQueuedRequest(transport, request);
357
+ },
358
+ );
359
+ }
360
+
361
+ async function handleQueuedRequest(
362
+ transport: TtsTransport,
363
+ request: Extract<TtsWorkerInbound, { type: "synthesize" | "download" }>,
364
+ ): Promise<void> {
365
+ try {
366
+ if (request.type === "download") {
367
+ await loadModel(request.modelKey, transport, request.id);
368
+ transport.send({ type: "downloaded", id: request.id });
369
+ return;
370
+ }
371
+ const { pcm, sampleRate } = await synthesize(
372
+ transport,
373
+ request.id,
374
+ request.modelKey,
375
+ request.text,
376
+ request.voice,
377
+ );
378
+ transport.send({ type: "audio", id: request.id, pcm, sampleRate });
379
+ } catch (error) {
380
+ transport.send({ type: "error", id: request.id, error: errorText(error) });
381
+ }
382
+ }
383
+
384
+ /**
385
+ * Drive one streaming session to completion: load the model, create the
386
+ * splitter, flush any text pushed before the model was ready, then emit one
387
+ * `audio-chunk` per synthesized sentence followed by a single `stream-done`.
388
+ * Serialized through {@link synthesizeQueue} so it never interleaves model
389
+ * access with a batch synthesize/download.
390
+ */
391
+ async function runStreamSession(transport: TtsTransport, id: string, session: StreamSession): Promise<void> {
392
+ try {
393
+ if (session.cancelled) return;
394
+ const runtime = await loadKokoroRuntime(transport, id, session.modelKey);
395
+ if (session.cancelled) return;
396
+ const synthesizer = await loadModel(session.modelKey, transport, id);
397
+ if (session.cancelled) return;
398
+ const spec = getTtsLocalModelSpec(session.modelKey);
399
+ const splitter = new runtime.TextSplitterStream();
400
+ // Flush buffered text before exposing the splitter so a push racing this
401
+ // block can't slip ahead of the already-queued fragments.
402
+ for (const text of session.buffered) {
403
+ if (session.cancelled) return;
404
+ splitter.push(text);
405
+ }
406
+ session.buffered = [];
407
+ session.splitter = splitter;
408
+ if (session.ended || session.cancelled) splitter.close();
409
+ const voice = resolveTtsVoice(session.modelKey, session.voice);
410
+ let index = 0;
411
+ for await (const chunk of synthesizer.stream(splitter, { voice })) {
412
+ if (session.cancelled) break;
413
+ const audio = Array.isArray(chunk.audio.audio) ? chunk.audio.audio[0] : chunk.audio.audio;
414
+ if (!audio) continue;
415
+ transport.send({
416
+ type: "audio-chunk",
417
+ id,
418
+ index: index++,
419
+ text: chunk.text,
420
+ pcm: audio,
421
+ sampleRate: chunk.audio.sampling_rate || spec?.sampleRate || 24_000,
422
+ });
423
+ }
424
+ if (!session.cancelled) transport.send({ type: "stream-done", id });
425
+ } catch (error) {
426
+ if (!session.cancelled) transport.send({ type: "error", id, error: errorText(error) });
427
+ } finally {
428
+ streamSessions.delete(id);
429
+ }
430
+ }
431
+
432
+ function startStreamSession(
433
+ transport: TtsTransport,
434
+ message: Extract<TtsWorkerInbound, { type: "stream-start" }>,
435
+ ): void {
436
+ const session: StreamSession = {
437
+ modelKey: message.modelKey,
438
+ voice: message.voice,
439
+ buffered: [],
440
+ splitter: null,
441
+ ended: false,
442
+ cancelled: false,
443
+ };
444
+ streamSessions.set(message.id, session);
445
+ synthesizeQueue = synthesizeQueue.then(
446
+ () => runStreamSession(transport, message.id, session),
447
+ () => runStreamSession(transport, message.id, session),
448
+ );
449
+ }
450
+
451
+ function pushToStreamSession(id: string, text: string): void {
452
+ const session = streamSessions.get(id);
453
+ if (!session || session.cancelled) return;
454
+ if (session.splitter) session.splitter.push(text);
455
+ else session.buffered.push(text);
456
+ }
457
+
458
+ function endStreamSession(id: string): void {
459
+ const session = streamSessions.get(id);
460
+ if (!session || session.cancelled) return;
461
+ session.ended = true;
462
+ session.splitter?.close();
463
+ }
464
+
465
+ function cancelStreamSession(id: string): void {
466
+ const session = streamSessions.get(id);
467
+ if (!session) return;
468
+ session.cancelled = true;
469
+ session.buffered = [];
470
+ session.splitter?.close();
471
+ streamSessions.delete(id);
472
+ }
473
+
474
+ export function startTtsWorker(transport: TtsTransport): void {
475
+ transport.onMessage(message => {
476
+ switch (message.type) {
477
+ case "ping":
478
+ transport.send({ type: "pong", id: message.id });
479
+ return;
480
+ case "stream-start":
481
+ startStreamSession(transport, message);
482
+ return;
483
+ case "stream-push":
484
+ pushToStreamSession(message.id, message.text);
485
+ return;
486
+ case "stream-end":
487
+ endStreamSession(message.id);
488
+ return;
489
+ case "stream-cancel":
490
+ cancelStreamSession(message.id);
491
+ return;
492
+ default:
493
+ enqueueRequest(transport, message);
494
+ return;
495
+ }
496
+ });
497
+ }
@@ -0,0 +1,162 @@
1
+ /**
2
+ * Streaming assistant speech-vocalization.
3
+ *
4
+ * The vocalizer turns the assistant's STREAMING output into spoken audio as a
5
+ * side effect of the normal turn. Text deltas are streamed *straight into the
6
+ * TTS engine* ({@link Vocalizer.pushDelta} → the worker's incremental text
7
+ * input): the engine splits the running text at sentence boundaries and emits
8
+ * one audio chunk per sentence, which a single {@link StreamingAudioPlayer}
9
+ * plays back gaplessly. So the assistant starts speaking sentence 1 while later
10
+ * sentences are still being generated — low latency, never overlapping.
11
+ *
12
+ * Overspeech control:
13
+ * - {@link clear} stops playback instantly (kills the player) and aborts
14
+ * in-flight synthesis — wired to a new turn, an Esc/Ctrl+C interrupt, and a
15
+ * sent message.
16
+ * - {@link duck}/{@link unduck} lower/restore the volume while the user is
17
+ * speaking (push-to-talk), so the assistant doesn't talk over them.
18
+ * - Sessions are chained, so sequential utterances queue and drain in order
19
+ * rather than overlapping.
20
+ *
21
+ * Errors are swallowed (debug-logged) so a synthesis or playback failure never
22
+ * throws into the turn. A process-level singleton ({@link vocalizer}) is shared
23
+ * by the event controller (streaming deltas) and the ask tool (spoken questions).
24
+ */
25
+ import { logger } from "@oh-my-pi/pi-utils";
26
+ import { settings } from "../config/settings";
27
+ import { DEFAULT_TTS_VOICE } from "./models";
28
+ import { createStreamingPlayer, DUCK_GAIN } from "./streaming-player";
29
+ import { type TtsStreamHandle, ttsClient } from "./tts-client";
30
+
31
+ export interface VocalizerPlayer {
32
+ start(sampleRate: number): void;
33
+ write(pcm: Float32Array): void;
34
+ setGain(gain: number): void;
35
+ end(): Promise<void>;
36
+ stop(): void;
37
+ }
38
+
39
+ export class Vocalizer {
40
+ /** Open stream session for the current utterance; null when none is active. */
41
+ #handle: TtsStreamHandle | null = null;
42
+ /** Aborts the in-flight session on {@link clear}; replaced per session. */
43
+ #abort: AbortController | null = null;
44
+ /** The current session's player; stopped on {@link clear}, gain-tracked for ducking. */
45
+ #player: VocalizerPlayer | null = null;
46
+ /** Serialized playback chain across sessions; awaited by {@link idle}. */
47
+ #chain: Promise<void> = Promise.resolve();
48
+ /** Whether the user is currently speaking; new sessions open ducked. */
49
+ #ducked = false;
50
+ #createPlayer: () => VocalizerPlayer;
51
+
52
+ constructor(createPlayer: () => VocalizerPlayer = createStreamingPlayer) {
53
+ this.#createPlayer = createPlayer;
54
+ }
55
+
56
+ /**
57
+ * Stream a delta of assistant text into the engine. No-op when vocalization
58
+ * is disabled. The engine buffers the running text and emits audio for each
59
+ * complete sentence; the trailing partial is flushed by {@link flush}.
60
+ */
61
+ pushDelta(text: string): void {
62
+ if (!settings.get("speech.enabled")) return;
63
+ if (!text) return;
64
+ this.#ensureSession().push(text);
65
+ }
66
+
67
+ /**
68
+ * Close the current input stream (call at message/turn end). The engine
69
+ * flushes its trailing partial as a final chunk; the player keeps draining
70
+ * queued audio until it completes.
71
+ */
72
+ flush(): void {
73
+ this.#handle?.end();
74
+ this.#handle = null;
75
+ }
76
+
77
+ /**
78
+ * Speak a complete piece of text in one shot (ask questions, yield-mode final
79
+ * message): stream it in and immediately close the input. No-op when disabled.
80
+ */
81
+ speak(text: string): void {
82
+ if (!settings.get("speech.enabled")) return;
83
+ if (!text) return;
84
+ this.#ensureSession().push(text);
85
+ this.flush();
86
+ }
87
+
88
+ /**
89
+ * Interrupt and drop the current session, killing in-flight playback and
90
+ * synthesis (new turn / user message / Esc interrupt). Audio stops at once.
91
+ */
92
+ clear(): void {
93
+ this.#handle = null;
94
+ this.#abort?.abort();
95
+ this.#abort = null;
96
+ this.#player?.stop();
97
+ this.#player = null;
98
+ }
99
+
100
+ /** Lower the volume while the user is speaking (push-to-talk), so speech doesn't drown them out. */
101
+ duck(): void {
102
+ this.#ducked = true;
103
+ this.#player?.setGain(DUCK_GAIN);
104
+ }
105
+
106
+ /** Restore full volume once the user stops speaking. */
107
+ unduck(): void {
108
+ this.#ducked = false;
109
+ this.#player?.setGain(1);
110
+ }
111
+
112
+ /** Resolve once the playback chain has drained (tests / shutdown). */
113
+ idle(): Promise<void> {
114
+ return this.#chain;
115
+ }
116
+
117
+ /**
118
+ * Open a streaming-synthesis session lazily on the first delta and chain its
119
+ * playback after any prior session's, so sequential utterances never overlap.
120
+ */
121
+ #ensureSession(): TtsStreamHandle {
122
+ if (this.#handle) return this.#handle;
123
+ const modelKey = settings.get("tts.localModel");
124
+ const voice = settings.get("speech.voice") || DEFAULT_TTS_VOICE;
125
+ const abort = new AbortController();
126
+ this.#abort = abort;
127
+ const handle = ttsClient.synthesizeStream(modelKey, { voice, signal: abort.signal });
128
+ this.#handle = handle;
129
+ const player = this.#createPlayer();
130
+ player.setGain(this.#ducked ? DUCK_GAIN : 1);
131
+ this.#player = player;
132
+ this.#chain = this.#chain.then(() => this.#play(handle, player, abort.signal));
133
+ return handle;
134
+ }
135
+
136
+ /** Feed each synthesized sentence into the player in arrival order; abort stops it. */
137
+ async #play(handle: TtsStreamHandle, player: VocalizerPlayer, signal: AbortSignal): Promise<void> {
138
+ let started = false;
139
+ try {
140
+ for await (const chunk of handle.chunks) {
141
+ if (signal.aborted) break;
142
+ if (!started) {
143
+ player.start(chunk.sampleRate);
144
+ started = true;
145
+ }
146
+ player.write(chunk.pcm);
147
+ }
148
+ if (started && !signal.aborted) {
149
+ await player.end();
150
+ return;
151
+ }
152
+ } catch (error) {
153
+ logger.debug("vocalizer: stream failed", {
154
+ error: error instanceof Error ? error.message : String(error),
155
+ });
156
+ }
157
+ player.stop();
158
+ }
159
+ }
160
+
161
+ /** Process-level vocalizer shared by the event controller and the ask tool. */
162
+ export const vocalizer = new Vocalizer();