@oh-my-pi/pi-coding-agent 15.12.4 → 15.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (291) hide show
  1. package/CHANGELOG.md +304 -6
  2. package/dist/cli.js +1015 -881
  3. package/dist/types/async/job-manager.d.ts +15 -0
  4. package/dist/types/autolearn/controller.d.ts +25 -0
  5. package/dist/types/autolearn/managed-skills.d.ts +45 -0
  6. package/dist/types/autoresearch/state.d.ts +1 -1
  7. package/dist/types/autoresearch/types.d.ts +1 -1
  8. package/dist/types/cli/args.d.ts +19 -1
  9. package/dist/types/cli/session-picker.d.ts +1 -1
  10. package/dist/types/cli/setup-cli.d.ts +1 -1
  11. package/dist/types/cli/setup-model-picker.d.ts +14 -0
  12. package/dist/types/collab/protocol.d.ts +1 -1
  13. package/dist/types/commands/say.d.ts +24 -0
  14. package/dist/types/config/keybindings.d.ts +3 -3
  15. package/dist/types/config/model-registry.d.ts +10 -0
  16. package/dist/types/config/models-config-schema.d.ts +12 -0
  17. package/dist/types/config/models-config.d.ts +8 -2
  18. package/dist/types/config/settings-schema.d.ts +261 -58
  19. package/dist/types/export/html/index.d.ts +2 -1
  20. package/dist/types/extensibility/extensions/model-api.d.ts +17 -0
  21. package/dist/types/extensibility/extensions/runner.d.ts +3 -1
  22. package/dist/types/extensibility/extensions/types.d.ts +47 -1
  23. package/dist/types/extensibility/hooks/index.d.ts +2 -1
  24. package/dist/types/extensibility/plugins/legacy-pi-compat.d.ts +9 -0
  25. package/dist/types/extensibility/plugins/loader.d.ts +11 -0
  26. package/dist/types/extensibility/shared-events.d.ts +1 -1
  27. package/dist/types/extensibility/skills.d.ts +10 -0
  28. package/dist/types/goals/guided-setup.d.ts +18 -0
  29. package/dist/types/goals/state.d.ts +1 -1
  30. package/dist/types/hindsight/transcript.d.ts +1 -1
  31. package/dist/types/index.d.ts +5 -0
  32. package/dist/types/internal-urls/local-protocol.d.ts +4 -2
  33. package/dist/types/main.d.ts +4 -3
  34. package/dist/types/mcp/startup-events.d.ts +11 -0
  35. package/dist/types/memories/index.d.ts +7 -0
  36. package/dist/types/memory-backend/local-backend.d.ts +4 -3
  37. package/dist/types/mnemopi/config.d.ts +4 -4
  38. package/dist/types/modes/components/agent-hub.d.ts +6 -0
  39. package/dist/types/modes/components/assistant-message.d.ts +1 -2
  40. package/dist/types/modes/components/compaction-summary-message.d.ts +15 -1
  41. package/dist/types/modes/components/custom-editor.d.ts +39 -1
  42. package/dist/types/modes/components/custom-editor.test.d.ts +1 -0
  43. package/dist/types/modes/components/session-selector.d.ts +1 -1
  44. package/dist/types/modes/components/tool-execution.d.ts +26 -16
  45. package/dist/types/modes/components/transcript-container.d.ts +23 -2
  46. package/dist/types/modes/components/tree-selector.d.ts +1 -1
  47. package/dist/types/modes/components/usage-row.d.ts +3 -0
  48. package/dist/types/modes/controllers/command-controller.d.ts +2 -2
  49. package/dist/types/modes/controllers/input-controller.d.ts +14 -0
  50. package/dist/types/modes/controllers/selector-controller.d.ts +3 -1
  51. package/dist/types/modes/gradient-highlight.d.ts +9 -4
  52. package/dist/types/modes/image-references.d.ts +6 -0
  53. package/dist/types/modes/interactive-mode.d.ts +27 -3
  54. package/dist/types/modes/magic-keywords.d.ts +13 -1
  55. package/dist/types/modes/rpc/rpc-mode.d.ts +35 -1
  56. package/dist/types/modes/rpc/rpc-types.d.ts +9 -1
  57. package/dist/types/modes/runtime-init.d.ts +4 -0
  58. package/dist/types/modes/theme/theme.d.ts +13 -2
  59. package/dist/types/modes/types.d.ts +8 -2
  60. package/dist/types/modes/utils/ui-helpers.d.ts +1 -1
  61. package/dist/types/registry/agent-registry.d.ts +17 -0
  62. package/dist/types/secrets/obfuscator.d.ts +1 -1
  63. package/dist/types/session/agent-session.d.ts +14 -2
  64. package/dist/types/session/indexed-session-storage.d.ts +3 -4
  65. package/dist/types/session/session-context.d.ts +39 -0
  66. package/dist/types/session/session-entries.d.ts +159 -0
  67. package/dist/types/session/session-listing.d.ts +69 -0
  68. package/dist/types/session/session-loader.d.ts +16 -0
  69. package/dist/types/session/session-manager.d.ts +82 -474
  70. package/dist/types/session/session-migrations.d.ts +12 -0
  71. package/dist/types/session/session-paths.d.ts +25 -0
  72. package/dist/types/session/session-persistence.d.ts +8 -0
  73. package/dist/types/session/session-storage.d.ts +11 -12
  74. package/dist/types/session/snapcompact-inline.d.ts +12 -1
  75. package/dist/types/session/snapcompact-savings-journal.d.ts +46 -0
  76. package/dist/types/session/tool-choice-queue.d.ts +6 -6
  77. package/dist/types/stt/asr-client.d.ts +90 -0
  78. package/dist/types/stt/asr-protocol.d.ts +97 -0
  79. package/dist/types/stt/asr-worker.d.ts +2 -0
  80. package/dist/types/stt/downloader.d.ts +38 -0
  81. package/dist/types/stt/endpointer.d.ts +59 -0
  82. package/dist/types/stt/index.d.ts +5 -1
  83. package/dist/types/stt/models.d.ts +120 -0
  84. package/dist/types/stt/recorder.d.ts +17 -0
  85. package/dist/types/stt/stt-controller.d.ts +6 -0
  86. package/dist/types/stt/transcriber.d.ts +5 -7
  87. package/dist/types/stt/wav.d.ts +29 -0
  88. package/dist/types/system-prompt.d.ts +4 -0
  89. package/dist/types/task/executor.d.ts +2 -0
  90. package/dist/types/task/index.d.ts +9 -1
  91. package/dist/types/task/types.d.ts +36 -0
  92. package/dist/types/tools/bash.d.ts +2 -2
  93. package/dist/types/tools/eval-render.d.ts +1 -1
  94. package/dist/types/tools/index.d.ts +11 -1
  95. package/dist/types/tools/irc.d.ts +1 -0
  96. package/dist/types/tools/learn.d.ts +51 -0
  97. package/dist/types/tools/manage-skill.d.ts +40 -0
  98. package/dist/types/tools/plan-mode-guard.d.ts +10 -0
  99. package/dist/types/tools/renderers.d.ts +7 -11
  100. package/dist/types/tools/ssh.d.ts +1 -1
  101. package/dist/types/tools/todo.d.ts +1 -1
  102. package/dist/types/tools/tts.d.ts +25 -0
  103. package/dist/types/tools/write.d.ts +1 -1
  104. package/dist/types/tts/downloader.d.ts +20 -0
  105. package/dist/types/tts/index.d.ts +8 -0
  106. package/dist/types/tts/models.d.ts +82 -0
  107. package/dist/types/tts/player.d.ts +32 -0
  108. package/dist/types/tts/runtime.d.ts +6 -0
  109. package/dist/types/tts/streaming-player.d.ts +41 -0
  110. package/dist/types/tts/tts-client.d.ts +93 -0
  111. package/dist/types/tts/tts-protocol.d.ts +95 -0
  112. package/dist/types/tts/tts-worker.d.ts +2 -0
  113. package/dist/types/tts/vocalizer.d.ts +41 -0
  114. package/dist/types/tts/wav.d.ts +8 -0
  115. package/dist/types/utils/tool-choice.d.ts +8 -0
  116. package/dist/types/utils/tools-manager.d.ts +2 -1
  117. package/dist/types/utils/tools-manager.test.d.ts +1 -0
  118. package/dist/types/web/scrapers/github.d.ts +1 -1
  119. package/package.json +15 -14
  120. package/src/async/job-manager.ts +49 -0
  121. package/src/autolearn/controller.ts +139 -0
  122. package/src/autolearn/managed-skills.ts +257 -0
  123. package/src/autoresearch/state.ts +1 -1
  124. package/src/autoresearch/types.ts +1 -1
  125. package/src/cli/args.ts +56 -2
  126. package/src/cli/session-picker.ts +2 -1
  127. package/src/cli/setup-cli.ts +148 -47
  128. package/src/cli/setup-model-picker.ts +43 -0
  129. package/src/cli-commands.ts +1 -0
  130. package/src/cli.ts +45 -13
  131. package/src/collab/host.ts +1 -1
  132. package/src/collab/protocol.ts +1 -1
  133. package/src/commands/say.ts +102 -0
  134. package/src/commands/setup.ts +1 -1
  135. package/src/commit/agentic/tools/analyze-file.ts +3 -0
  136. package/src/config/keybindings.ts +2 -2
  137. package/src/config/model-discovery.ts +11 -5
  138. package/src/config/model-registry.ts +64 -9
  139. package/src/config/models-config-schema.ts +4 -1
  140. package/src/config/models-config.ts +2 -1
  141. package/src/config/settings-schema.ts +248 -32
  142. package/src/config/settings.ts +10 -0
  143. package/src/discovery/builtin.ts +23 -1
  144. package/src/discovery/claude-plugins.ts +44 -5
  145. package/src/discovery/helpers.ts +41 -1
  146. package/src/eval/__tests__/budget-bridge.test.ts +1 -1
  147. package/src/eval/js/shared/prelude.txt +69 -17
  148. package/src/export/html/index.ts +3 -6
  149. package/src/extensibility/extensions/model-api.ts +41 -0
  150. package/src/extensibility/extensions/runner.ts +4 -0
  151. package/src/extensibility/extensions/types.ts +52 -1
  152. package/src/extensibility/extensions/wrapper.ts +41 -5
  153. package/src/extensibility/hooks/index.ts +2 -1
  154. package/src/extensibility/plugins/legacy-pi-compat.ts +43 -13
  155. package/src/extensibility/plugins/loader.ts +30 -19
  156. package/src/extensibility/plugins/manager.ts +221 -90
  157. package/src/extensibility/shared-events.ts +1 -1
  158. package/src/extensibility/skills.ts +96 -15
  159. package/src/goals/guided-setup.ts +133 -0
  160. package/src/goals/state.ts +1 -1
  161. package/src/hindsight/transcript.ts +1 -1
  162. package/src/index.ts +5 -0
  163. package/src/internal-urls/docs-index.generated.ts +10 -10
  164. package/src/internal-urls/history-protocol.ts +1 -1
  165. package/src/internal-urls/local-protocol.ts +29 -7
  166. package/src/main.ts +27 -7
  167. package/src/mcp/startup-events.ts +21 -0
  168. package/src/mcp/transports/stdio.ts +2 -1
  169. package/src/memories/index.ts +146 -11
  170. package/src/memory-backend/local-backend.ts +11 -5
  171. package/src/mnemopi/backend.ts +1 -0
  172. package/src/mnemopi/config.ts +26 -10
  173. package/src/modes/acp/acp-agent.ts +3 -5
  174. package/src/modes/components/agent-hub.ts +49 -4
  175. package/src/modes/components/assistant-message.ts +4 -37
  176. package/src/modes/components/compaction-summary-message.ts +125 -26
  177. package/src/modes/components/custom-editor.test.ts +96 -0
  178. package/src/modes/components/custom-editor.ts +164 -8
  179. package/src/modes/components/session-selector.ts +1 -1
  180. package/src/modes/components/settings-defs.ts +7 -0
  181. package/src/modes/components/tool-execution.ts +82 -43
  182. package/src/modes/components/transcript-container.ts +70 -1
  183. package/src/modes/components/tree-selector.ts +1 -1
  184. package/src/modes/components/usage-row.ts +18 -0
  185. package/src/modes/components/user-message.ts +4 -2
  186. package/src/modes/controllers/command-controller.ts +14 -4
  187. package/src/modes/controllers/event-controller.ts +78 -11
  188. package/src/modes/controllers/extension-ui-controller.ts +6 -0
  189. package/src/modes/controllers/input-controller.ts +258 -27
  190. package/src/modes/controllers/selector-controller.ts +12 -2
  191. package/src/modes/gradient-highlight.ts +21 -9
  192. package/src/modes/image-references.ts +20 -0
  193. package/src/modes/interactive-mode.ts +286 -40
  194. package/src/modes/magic-keywords.ts +27 -5
  195. package/src/modes/rpc/rpc-mode.ts +146 -14
  196. package/src/modes/rpc/rpc-subagents.ts +2 -2
  197. package/src/modes/rpc/rpc-types.ts +8 -2
  198. package/src/modes/runtime-init.ts +28 -3
  199. package/src/modes/theme/theme.ts +98 -50
  200. package/src/modes/types.ts +6 -2
  201. package/src/modes/utils/hotkeys-markdown.ts +1 -1
  202. package/src/modes/utils/ui-helpers.ts +34 -6
  203. package/src/priority.json +5 -1
  204. package/src/prompts/agents/task.md +1 -0
  205. package/src/prompts/goals/guided-goal-interview.md +8 -0
  206. package/src/prompts/goals/guided-goal-system.md +12 -0
  207. package/src/prompts/memories/read-path.md +6 -0
  208. package/src/prompts/system/autolearn-guidance-learn.md +1 -0
  209. package/src/prompts/system/autolearn-guidance.md +7 -0
  210. package/src/prompts/system/autolearn-nudge.md +3 -0
  211. package/src/prompts/system/eager-task.md +7 -0
  212. package/src/prompts/system/eager-todo.md +11 -6
  213. package/src/prompts/system/subagent-system-prompt.md +4 -0
  214. package/src/prompts/system/system-prompt.md +10 -5
  215. package/src/prompts/system/title-marker-instruction.md +1 -0
  216. package/src/prompts/system/title-system-marker.md +16 -0
  217. package/src/prompts/tools/job.md +1 -0
  218. package/src/prompts/tools/learn.md +7 -0
  219. package/src/prompts/tools/manage-skill.md +9 -0
  220. package/src/prompts/tools/task.md +3 -0
  221. package/src/registry/agent-registry.ts +30 -0
  222. package/src/sdk.ts +88 -24
  223. package/src/secrets/obfuscator.ts +1 -1
  224. package/src/session/agent-session.ts +209 -87
  225. package/src/session/history-storage.ts +2 -2
  226. package/src/session/indexed-session-storage.ts +7 -17
  227. package/src/session/session-context.ts +352 -0
  228. package/src/session/session-entries.ts +194 -0
  229. package/src/session/session-listing.ts +588 -0
  230. package/src/session/session-loader.ts +106 -0
  231. package/src/session/session-manager.ts +933 -3145
  232. package/src/session/session-migrations.ts +78 -0
  233. package/src/session/session-paths.ts +193 -0
  234. package/src/session/session-persistence.ts +131 -0
  235. package/src/session/session-storage.ts +91 -50
  236. package/src/session/snapcompact-inline.ts +21 -1
  237. package/src/session/snapcompact-savings-journal.ts +113 -0
  238. package/src/session/tool-choice-queue.ts +23 -11
  239. package/src/slash-commands/builtin-registry.ts +25 -3
  240. package/src/stt/asr-client.ts +520 -0
  241. package/src/stt/asr-protocol.ts +65 -0
  242. package/src/stt/asr-worker.ts +790 -0
  243. package/src/stt/downloader.ts +107 -47
  244. package/src/stt/endpointer.ts +259 -0
  245. package/src/stt/index.ts +5 -1
  246. package/src/stt/models.ts +150 -0
  247. package/src/stt/recorder.ts +247 -60
  248. package/src/stt/stt-controller.ts +201 -22
  249. package/src/stt/transcriber.ts +37 -68
  250. package/src/stt/wav.ts +173 -0
  251. package/src/system-prompt.ts +8 -0
  252. package/src/task/agents.ts +1 -2
  253. package/src/task/executor.ts +49 -15
  254. package/src/task/index.ts +60 -6
  255. package/src/task/render.ts +83 -8
  256. package/src/task/types.ts +53 -0
  257. package/src/tools/ask.ts +8 -0
  258. package/src/tools/bash.ts +4 -3
  259. package/src/tools/eval-render.ts +4 -3
  260. package/src/tools/index.ts +40 -4
  261. package/src/tools/irc.ts +10 -2
  262. package/src/tools/job.ts +14 -2
  263. package/src/tools/learn.ts +144 -0
  264. package/src/tools/manage-skill.ts +104 -0
  265. package/src/tools/plan-mode-guard.ts +53 -19
  266. package/src/tools/renderers.ts +7 -11
  267. package/src/tools/ssh.ts +4 -3
  268. package/src/tools/todo.ts +1 -1
  269. package/src/tools/tts.ts +203 -92
  270. package/src/tools/write.ts +18 -2
  271. package/src/tts/downloader.ts +64 -0
  272. package/src/tts/index.ts +8 -0
  273. package/src/tts/models.ts +137 -0
  274. package/src/tts/player.ts +137 -0
  275. package/src/tts/runtime.ts +21 -0
  276. package/src/tts/streaming-player.ts +266 -0
  277. package/src/tts/tts-client.ts +647 -0
  278. package/src/tts/tts-protocol.ts +60 -0
  279. package/src/tts/tts-worker.ts +497 -0
  280. package/src/tts/vocalizer.ts +162 -0
  281. package/src/tts/wav.ts +58 -0
  282. package/src/utils/title-generator.ts +48 -5
  283. package/src/utils/tool-choice.ts +16 -0
  284. package/src/utils/tools-manager.test.ts +25 -0
  285. package/src/utils/tools-manager.ts +19 -1
  286. package/src/web/scrapers/github.ts +96 -0
  287. package/src/web/search/index.ts +13 -0
  288. package/src/web/search/providers/searxng.ts +13 -1
  289. package/dist/types/stt/setup.d.ts +0 -18
  290. package/src/stt/setup.ts +0 -52
  291. package/src/stt/transcribe.py +0 -70
@@ -2,7 +2,9 @@ import * as fs from "node:fs/promises";
2
2
  import * as os from "node:os";
3
3
  import * as path from "node:path";
4
4
  import { $which, logger, Snowflake } from "@oh-my-pi/pi-utils";
5
- import { $ } from "bun";
5
+ import { $, type Subprocess } from "bun";
6
+ import { ensureTool, getToolPath } from "../utils/tools-manager";
7
+ import { decodePcmS16LE } from "./wav";
6
8
 
7
9
  export interface RecordingHandle {
8
10
  stop(): Promise<void>;
@@ -14,18 +16,13 @@ const isWindows = process.platform === "win32";
14
16
  * Returns available recording tools in priority order.
15
17
  */
16
18
  export function detectRecordingTools(): string[] {
17
- const tools: string[] = [];
18
- if ($which("sox")) tools.push("sox");
19
- if ($which("ffmpeg")) tools.push("ffmpeg");
20
- if (!isWindows && $which("arecord")) tools.push("arecord");
21
- if (isWindows) tools.push("powershell");
22
- return tools;
19
+ return [...new Set(detectRecorders().map(recorder => recorder.tool))];
23
20
  }
24
21
 
25
22
  // ── ffmpeg dshow device detection ──────────────────────────────────
26
23
 
27
- async function detectWindowsAudioDevice(): Promise<string> {
28
- const result = await $`ffmpeg -f dshow -list_devices true -i dummy`.quiet().nothrow();
24
+ async function detectWindowsAudioDevice(bin: string): Promise<string> {
25
+ const result = await $`${bin} -f dshow -list_devices true -i dummy`.quiet().nothrow();
29
26
  const output = result.stderr.toString();
30
27
  const audioDevices: string[] = [];
31
28
  const re = /"([^"]+)"\s*\(audio\)/gi;
@@ -41,11 +38,11 @@ async function detectWindowsAudioDevice(): Promise<string> {
41
38
 
42
39
  // ── Recording implementations ──────────────────────────────────────
43
40
 
44
- async function startSoxRecording(outputPath: string): Promise<RecordingHandle> {
41
+ async function startSoxRecording(bin: string, outputPath: string): Promise<RecordingHandle> {
45
42
  // On Windows, "-d" (default device) often fails. Use "-t waveaudio 0" for the first input.
46
43
  const inputArgs = isWindows ? ["-t", "waveaudio", "0"] : ["-d"];
47
44
 
48
- const proc = Bun.spawn(["sox", ...inputArgs, "-r", "16000", "-c", "1", "-b", "16", "-t", "wav", outputPath], {
45
+ const proc = Bun.spawn([bin, ...inputArgs, "-r", "16000", "-c", "1", "-b", "16", "-t", "wav", outputPath], {
49
46
  stdout: "pipe",
50
47
  stderr: "ignore",
51
48
  });
@@ -58,12 +55,12 @@ async function startSoxRecording(outputPath: string): Promise<RecordingHandle> {
58
55
  };
59
56
  }
60
57
 
61
- async function startFFmpegRecording(outputPath: string): Promise<RecordingHandle> {
58
+ async function startFFmpegRecording(bin: string, outputPath: string): Promise<RecordingHandle> {
62
59
  let args: string[];
63
60
  if (isWindows) {
64
- const device = await detectWindowsAudioDevice();
61
+ const device = await detectWindowsAudioDevice(bin);
65
62
  args = [
66
- "ffmpeg",
63
+ bin,
67
64
  "-f",
68
65
  "dshow",
69
66
  "-i",
@@ -79,11 +76,11 @@ async function startFFmpegRecording(outputPath: string): Promise<RecordingHandle
79
76
  ];
80
77
  } else if (process.platform === "darwin") {
81
78
  args = [
82
- "ffmpeg",
79
+ bin,
83
80
  "-f",
84
81
  "avfoundation",
85
82
  "-i",
86
- ":0",
83
+ ":default",
87
84
  "-ar",
88
85
  "16000",
89
86
  "-ac",
@@ -94,21 +91,7 @@ async function startFFmpegRecording(outputPath: string): Promise<RecordingHandle
94
91
  outputPath,
95
92
  ];
96
93
  } else {
97
- args = [
98
- "ffmpeg",
99
- "-f",
100
- "pulse",
101
- "-i",
102
- "default",
103
- "-ar",
104
- "16000",
105
- "-ac",
106
- "1",
107
- "-sample_fmt",
108
- "s16",
109
- "-y",
110
- outputPath,
111
- ];
94
+ args = [bin, "-f", "pulse", "-i", "default", "-ar", "16000", "-ac", "1", "-sample_fmt", "s16", "-y", outputPath];
112
95
  }
113
96
 
114
97
  const proc = Bun.spawn(args, {
@@ -133,8 +116,8 @@ async function startFFmpegRecording(outputPath: string): Promise<RecordingHandle
133
116
  };
134
117
  }
135
118
 
136
- async function startArecordRecording(outputPath: string): Promise<RecordingHandle> {
137
- const proc = Bun.spawn(["arecord", "-f", "S16_LE", "-r", "16000", "-c", "1", outputPath], {
119
+ async function startArecordRecording(bin: string, outputPath: string): Promise<RecordingHandle> {
120
+ const proc = Bun.spawn([bin, "-f", "S16_LE", "-r", "16000", "-c", "1", outputPath], {
138
121
  stdout: "pipe",
139
122
  stderr: "ignore",
140
123
  });
@@ -277,7 +260,9 @@ async function startPowerShellRecording(outputPath: string): Promise<RecordingHa
277
260
 
278
261
  // ── Health check ───────────────────────────────────────────────────
279
262
 
280
- async function verifyProcessAlive(proc: ReturnType<typeof Bun.spawn>, tool: string): Promise<void> {
263
+ type RecorderProcess = Subprocess<"ignore" | "pipe", "pipe", "ignore">;
264
+
265
+ async function verifyProcessAlive(proc: RecorderProcess, tool: string): Promise<void> {
281
266
  await Bun.sleep(300);
282
267
 
283
268
  const exited = await Promise.race([proc.exited.then(code => code), Bun.sleep(0).then(() => "running" as const)]);
@@ -293,38 +278,101 @@ async function verifyProcessAlive(proc: ReturnType<typeof Bun.spawn>, tool: stri
293
278
 
294
279
  // ── Public API ─────────────────────────────────────────────────────
295
280
 
281
+ export interface ResolvedRecorder {
282
+ tool: "sox" | "ffmpeg" | "arecord" | "powershell";
283
+ bin: string;
284
+ }
285
+
286
+ /**
287
+ * Resolve a usable recorder without triggering any download. Priority:
288
+ * sox (PATH) → ffmpeg (PATH or previously-downloaded static binary) →
289
+ * arecord (PATH, non-Windows) → PowerShell mci fallback (Windows) → none.
290
+ */
291
+ function detectRecorders(): ResolvedRecorder[] {
292
+ const recorders: ResolvedRecorder[] = [];
293
+ const sox = $which("sox");
294
+ if (sox) recorders.push({ tool: "sox", bin: sox });
295
+
296
+ const pathFfmpeg = $which("ffmpeg");
297
+ if (pathFfmpeg) recorders.push({ tool: "ffmpeg", bin: pathFfmpeg });
298
+ const bundledFfmpeg = getToolPath("ffmpeg");
299
+ if (bundledFfmpeg && bundledFfmpeg !== pathFfmpeg) recorders.push({ tool: "ffmpeg", bin: bundledFfmpeg });
300
+
301
+ if (!isWindows) {
302
+ const arecord = $which("arecord");
303
+ if (arecord) recorders.push({ tool: "arecord", bin: arecord });
304
+ }
305
+
306
+ if (isWindows) recorders.push({ tool: "powershell", bin: "powershell" });
307
+ return recorders;
308
+ }
309
+
310
+ export function detectRecorder(): ResolvedRecorder | null {
311
+ return detectRecorders()[0] ?? null;
312
+ }
313
+
314
+ /**
315
+ * Ensure a recorder is available, downloading the static ffmpeg binary when
316
+ * nothing is already present. Returns the resolved recorder.
317
+ */
318
+ export async function ensureRecorder(
319
+ onProgress?: (p: { stage: string; percent?: number }) => void,
320
+ signal?: AbortSignal,
321
+ ): Promise<ResolvedRecorder> {
322
+ const existing = detectRecorder();
323
+ if (existing) return existing;
324
+
325
+ const bin = await ensureTool("ffmpeg", { signal, notify: m => onProgress?.({ stage: m }) });
326
+ if (bin) return { tool: "ffmpeg", bin };
327
+
328
+ if (isWindows) return { tool: "powershell", bin: "powershell" };
329
+
330
+ throw new Error(
331
+ "No audio recorder available and automatic ffmpeg download failed. " +
332
+ "Install SoX or FFmpeg manually and add it to PATH.",
333
+ );
334
+ }
335
+
336
+ function recorderFailure(recorder: ResolvedRecorder, error: unknown): string {
337
+ const message = error instanceof Error ? error.message : String(error);
338
+ return `${recorder.tool} (${recorder.bin}): ${message}`;
339
+ }
340
+
341
+ async function startRecordingWithRecorder(recorder: ResolvedRecorder, outputPath: string): Promise<RecordingHandle> {
342
+ logger.debug("Starting audio recording", { tool: recorder.tool, bin: recorder.bin, outputPath });
343
+ switch (recorder.tool) {
344
+ case "sox":
345
+ return startSoxRecording(recorder.bin, outputPath);
346
+ case "ffmpeg":
347
+ return startFFmpegRecording(recorder.bin, outputPath);
348
+ case "arecord":
349
+ return startArecordRecording(recorder.bin, outputPath);
350
+ case "powershell":
351
+ return startPowerShellRecording(outputPath);
352
+ }
353
+ }
354
+
296
355
  export async function startRecording(outputPath: string): Promise<RecordingHandle> {
297
- const tools = detectRecordingTools();
298
- if (tools.length === 0) {
299
- throw new Error(
300
- isWindows
301
- ? "No audio recording tool found. Install FFmpeg or SoX and add to PATH."
302
- : "No audio recording tool found. Install SoX: sudo apt install sox, or FFmpeg: sudo apt install ffmpeg",
303
- );
356
+ const recorders = detectRecorders();
357
+ if (recorders.length === 0) {
358
+ throw new Error("No audio recorder available — run `omp setup speech`");
304
359
  }
305
360
 
306
- const errors: string[] = [];
307
- for (const tool of tools) {
308
- logger.debug("Trying audio recording", { tool, outputPath });
361
+ const failures: string[] = [];
362
+ for (const recorder of recorders) {
309
363
  try {
310
- switch (tool) {
311
- case "sox":
312
- return await startSoxRecording(outputPath);
313
- case "ffmpeg":
314
- return await startFFmpegRecording(outputPath);
315
- case "arecord":
316
- return await startArecordRecording(outputPath);
317
- case "powershell":
318
- return await startPowerShellRecording(outputPath);
319
- }
320
- } catch (err) {
321
- const msg = err instanceof Error ? err.message : String(err);
322
- logger.debug(`Recording tool ${tool} failed, trying next`, { error: msg });
323
- errors.push(`${tool}: ${msg}`);
364
+ return await startRecordingWithRecorder(recorder, outputPath);
365
+ } catch (error) {
366
+ const failure = recorderFailure(recorder, error);
367
+ failures.push(failure);
368
+ logger.warn("STT recorder failed to start; trying fallback", {
369
+ recorder: recorder.tool,
370
+ bin: recorder.bin,
371
+ error: failure,
372
+ });
324
373
  }
325
374
  }
326
-
327
- throw new Error(`All recording tools failed:\n${errors.join("\n")}`);
375
+ throw new Error(`No audio recorder could start — run \`omp setup speech\`.\n${failures.join("\n")}`);
328
376
  }
329
377
 
330
378
  /**
@@ -349,3 +397,142 @@ export async function verifyRecordingFile(filePath: string): Promise<number> {
349
397
  );
350
398
  }
351
399
  }
400
+
401
+ // ── Streaming (live) capture ───────────────────────────────────────
402
+
403
+ export interface StreamingRecordingHandle {
404
+ stop(): Promise<void>;
405
+ }
406
+
407
+ /** Build the argv for a recorder that emits raw 16 kHz mono s16le PCM to stdout. */
408
+ async function streamingRecorderArgs(recorder: ResolvedRecorder): Promise<string[]> {
409
+ const { tool, bin } = recorder;
410
+ switch (tool) {
411
+ case "sox": {
412
+ const input = isWindows ? ["-t", "waveaudio", "0"] : ["-d"];
413
+ return [bin, ...input, "-r", "16000", "-c", "1", "-b", "16", "-e", "signed-integer", "-t", "raw", "-"];
414
+ }
415
+ case "arecord":
416
+ return [bin, "-f", "S16_LE", "-r", "16000", "-c", "1", "-t", "raw", "-"];
417
+ case "ffmpeg": {
418
+ const input = isWindows
419
+ ? ["-f", "dshow", "-i", `audio=${await detectWindowsAudioDevice(bin)}`]
420
+ : process.platform === "darwin"
421
+ ? ["-f", "avfoundation", "-i", ":default"]
422
+ : ["-f", "pulse", "-i", "default"];
423
+ return [bin, ...input, "-ar", "16000", "-ac", "1", "-f", "s16le", "pipe:1"];
424
+ }
425
+ case "powershell":
426
+ throw new Error("PowerShell recorder cannot stream PCM to a pipe");
427
+ }
428
+ }
429
+
430
+ /**
431
+ * Start a recorder that streams raw 16 kHz mono s16le PCM to stdout, decoding it
432
+ * to float frames delivered through `onAudio` as they arrive. Returns `null`
433
+ * when the only available recorder (Windows PowerShell mci) records to a file
434
+ * and cannot pipe — the caller then falls back to file-based batch capture.
435
+ */
436
+ async function startStreamingRecordingWithRecorder(
437
+ recorder: ResolvedRecorder,
438
+ onAudio: (samples: Float32Array) => void,
439
+ ): Promise<StreamingRecordingHandle> {
440
+ const args = await streamingRecorderArgs(recorder);
441
+ logger.debug("Starting streaming audio recording", { tool: recorder.tool, bin: recorder.bin });
442
+ const proc = Bun.spawn(args, { stdin: "pipe", stdout: "pipe", stderr: "ignore" });
443
+
444
+ // Read s16le bytes off stdout, carrying any trailing odd byte across chunk
445
+ // boundaries so a sample is never split. Runs until the process closes stdout.
446
+ const reader = (proc.stdout as ReadableStream<Uint8Array>).getReader();
447
+ let leftover: Uint8Array | null = null;
448
+ const pump = async (): Promise<void> => {
449
+ try {
450
+ for (;;) {
451
+ const { done, value } = await reader.read();
452
+ if (done) break;
453
+ if (!value || value.length === 0) continue;
454
+ let bytes = value;
455
+ if (leftover) {
456
+ const merged = new Uint8Array(leftover.length + value.length);
457
+ merged.set(leftover, 0);
458
+ merged.set(value, leftover.length);
459
+ bytes = merged;
460
+ leftover = null;
461
+ }
462
+ const usable = bytes.length - (bytes.length % 2);
463
+ if (usable < bytes.length) leftover = bytes.slice(usable);
464
+ if (usable > 0) onAudio(decodePcmS16LE(bytes.subarray(0, usable)));
465
+ }
466
+ } catch (error) {
467
+ logger.debug("stt: streaming recorder read ended", {
468
+ error: error instanceof Error ? error.message : String(error),
469
+ });
470
+ }
471
+ };
472
+ void pump();
473
+
474
+ try {
475
+ await verifyProcessAlive(proc, recorder.tool);
476
+ } catch (error) {
477
+ try {
478
+ proc.kill("SIGKILL");
479
+ } catch {
480
+ // Already gone.
481
+ }
482
+ throw error;
483
+ }
484
+
485
+ let stopped = false;
486
+ return {
487
+ async stop() {
488
+ if (stopped) return;
489
+ stopped = true;
490
+ if (recorder.tool === "ffmpeg") {
491
+ try {
492
+ proc.stdin.write("q");
493
+ proc.stdin.end();
494
+ } catch {
495
+ // stdin may already be closed.
496
+ }
497
+ const killTimer = setTimeout(() => proc.kill(), 3000);
498
+ await proc.exited;
499
+ clearTimeout(killTimer);
500
+ } else {
501
+ proc.kill("SIGTERM");
502
+ await proc.exited;
503
+ }
504
+ try {
505
+ await reader.cancel();
506
+ } catch {
507
+ // Reader already released when stdout closed.
508
+ }
509
+ },
510
+ };
511
+ }
512
+
513
+ export async function startStreamingRecording(
514
+ onAudio: (samples: Float32Array) => void,
515
+ ): Promise<StreamingRecordingHandle | null> {
516
+ const recorders = detectRecorders();
517
+ if (recorders.length === 0) {
518
+ throw new Error("No audio recorder available — run `omp setup speech`");
519
+ }
520
+ const streamingRecorders = recorders.filter(recorder => recorder.tool !== "powershell");
521
+ if (streamingRecorders.length === 0) return null;
522
+
523
+ const failures: string[] = [];
524
+ for (const recorder of streamingRecorders) {
525
+ try {
526
+ return await startStreamingRecordingWithRecorder(recorder, onAudio);
527
+ } catch (error) {
528
+ const failure = recorderFailure(recorder, error);
529
+ failures.push(failure);
530
+ logger.warn("STT streaming recorder failed to start; trying fallback", {
531
+ recorder: recorder.tool,
532
+ bin: recorder.bin,
533
+ error: failure,
534
+ });
535
+ }
536
+ }
537
+ throw new Error(`No streaming audio recorder could start.\n${failures.join("\n")}`);
538
+ }
@@ -3,8 +3,17 @@ import * as os from "node:os";
3
3
  import * as path from "node:path";
4
4
  import { logger, Snowflake } from "@oh-my-pi/pi-utils";
5
5
  import { settings } from "../config/settings";
6
+ import { type SttStreamHandle, sttClient } from "./asr-client";
6
7
  import { ensureSTTDependencies } from "./downloader";
7
- import { type RecordingHandle, startRecording, verifyRecordingFile } from "./recorder";
8
+ import { resolveSttModelSpec } from "./models";
9
+ import {
10
+ detectRecorder,
11
+ type RecordingHandle,
12
+ type StreamingRecordingHandle,
13
+ startRecording,
14
+ startStreamingRecording,
15
+ verifyRecordingFile,
16
+ } from "./recorder";
8
17
  import { transcribe } from "./transcriber";
9
18
 
10
19
  export type SttState = "idle" | "recording" | "transcribing";
@@ -13,21 +22,37 @@ interface ToggleOptions {
13
22
  showWarning(msg: string): void;
14
23
  showStatus(msg: string): void;
15
24
  onStateChange(state: SttState): void;
25
+ /** Force a redraw after async edits to the composer (live segment/preview inserts). */
26
+ requestRender?(): void;
16
27
  }
17
28
 
29
+ /** The slice of the composer editor the controller drives. */
18
30
  interface Editor {
19
31
  insertText(text: string): void;
32
+ setVolatileText(text: string): void;
33
+ clearVolatileText(): void;
34
+ commitVolatileText(text: string): void;
20
35
  }
21
36
 
22
37
  export class STTController {
23
38
  #state: SttState = "idle";
24
- #recordingHandle: RecordingHandle | null = null;
25
- #tempFile: string | null = null;
26
39
  #depsResolved = false;
27
40
  #toggling = false;
41
+ #stopAfterStart = false;
28
42
  #disposed = false;
43
+
44
+ // Batch (single-shot) capture.
45
+ #recordingHandle: RecordingHandle | null = null;
46
+ #tempFile: string | null = null;
29
47
  #transcriptionAbort: AbortController | null = null;
30
48
 
49
+ // Live streaming capture.
50
+ #stream: SttStreamHandle | null = null;
51
+ #streamRecorder: StreamingRecordingHandle | null = null;
52
+ #streamEditor: Editor | null = null;
53
+ #streamCommitted = false;
54
+ #streamAbort: AbortController | null = null;
55
+
31
56
  get state(): SttState {
32
57
  return this.#state;
33
58
  }
@@ -38,45 +63,192 @@ export class STTController {
38
63
  }
39
64
 
40
65
  async toggle(editor: Editor, options: ToggleOptions): Promise<void> {
41
- if (this.#toggling) return;
66
+ if (this.#toggling) {
67
+ if (this.#state === "idle" || this.#state === "recording") this.#stopAfterStart = true;
68
+ return;
69
+ }
42
70
  this.#toggling = true;
43
71
  try {
44
72
  switch (this.#state) {
45
73
  case "idle":
46
- await this.#startRecording(options);
74
+ await this.#start(editor, options);
47
75
  break;
48
76
  case "recording":
49
- await this.#stopAndTranscribe(editor, options);
77
+ await this.#stop(editor, options);
50
78
  break;
51
79
  case "transcribing":
52
80
  options.showStatus("Transcription in progress...");
53
81
  break;
54
82
  }
83
+ if (this.#stopAfterStart && this.#state === "recording") {
84
+ this.#stopAfterStart = false;
85
+ await this.#stop(editor, options);
86
+ } else if (this.#state !== "recording") {
87
+ this.#stopAfterStart = false;
88
+ }
55
89
  } finally {
56
90
  this.#toggling = false;
57
91
  }
58
92
  }
59
93
 
60
- async #startRecording(options: ToggleOptions): Promise<void> {
61
- if (!this.#depsResolved) {
62
- try {
63
- options.showStatus("Checking STT dependencies...");
64
- await ensureSTTDependencies({
65
- modelName: settings.get("stt.modelName") as string | undefined,
66
- onProgress: p => options.showStatus(p.stage + (p.percent != null ? ` (${p.percent}%)` : "")),
67
- });
68
- options.showStatus("");
69
- this.#depsResolved = true;
70
- } catch (err) {
71
- const msg = err instanceof Error ? err.message : "Failed to setup STT dependencies";
94
+ async #ensureDeps(options: ToggleOptions): Promise<boolean> {
95
+ if (this.#depsResolved) return true;
96
+ try {
97
+ options.showStatus("Checking STT dependencies...");
98
+ await ensureSTTDependencies({
99
+ modelName: settings.get("stt.modelName") as string | undefined,
100
+ onProgress: p => options.showStatus(p.stage + (p.percent != null ? ` (${p.percent}%)` : "")),
101
+ });
102
+ options.showStatus("");
103
+ this.#depsResolved = true;
104
+ return true;
105
+ } catch (err) {
106
+ const msg = err instanceof Error ? err.message : "Failed to setup STT dependencies";
107
+ options.showWarning(msg);
108
+ logger.error("STT dependency setup failed", { error: msg });
109
+ return false;
110
+ }
111
+ }
112
+
113
+ async #start(editor: Editor, options: ToggleOptions): Promise<void> {
114
+ if (!(await this.#ensureDeps(options))) return;
115
+ // Live transcription needs a recorder that can pipe PCM; the Windows
116
+ // PowerShell mci fallback records to a file, so it stays single-shot.
117
+ if (this.#recorderCanStream()) {
118
+ await this.#startStreaming(editor, options);
119
+ return;
120
+ }
121
+ await this.#startBatchRecording(options);
122
+ }
123
+
124
+ async #stop(editor: Editor, options: ToggleOptions): Promise<void> {
125
+ if (this.#stream) {
126
+ await this.#stopStreaming(options);
127
+ return;
128
+ }
129
+ await this.#stopBatch(editor, options);
130
+ }
131
+
132
+ // ── Live streaming ──────────────────────────────────────────────
133
+
134
+ #recorderCanStream(): boolean {
135
+ const recorder = detectRecorder();
136
+ return recorder !== null && recorder.tool !== "powershell";
137
+ }
138
+
139
+ /** Segment text gets a leading space once a prior segment is committed, so
140
+ * phrases join naturally; the first phrase is inserted at the cursor as-is. */
141
+ #prefixed(text: string): string {
142
+ const normalized = text.replace(/\s+/g, " ").trim();
143
+ if (!normalized) return "";
144
+ return this.#streamCommitted ? ` ${normalized}` : normalized;
145
+ }
146
+
147
+ async #startStreaming(editor: Editor, options: ToggleOptions): Promise<void> {
148
+ const modelKey = resolveSttModelSpec(settings.get("stt.modelName") as string | undefined).key;
149
+ const language = settings.get("stt.language") as string | undefined;
150
+ this.#streamEditor = editor;
151
+ this.#streamCommitted = false;
152
+ this.#streamAbort = new AbortController();
153
+ const stream = sttClient.startStream(modelKey, {
154
+ language: language || undefined,
155
+ signal: this.#streamAbort.signal,
156
+ onPartial: text => {
157
+ if (this.#disposed || this.#state !== "recording") return;
158
+ this.#streamEditor?.setVolatileText(this.#prefixed(text));
159
+ options.requestRender?.();
160
+ },
161
+ onSegment: text => {
162
+ if (this.#disposed) return;
163
+ const prefixed = this.#prefixed(text);
164
+ if (prefixed) {
165
+ this.#streamEditor?.commitVolatileText(prefixed);
166
+ this.#streamCommitted = true;
167
+ } else {
168
+ this.#streamEditor?.clearVolatileText();
169
+ }
170
+ options.requestRender?.();
171
+ },
172
+ });
173
+ this.#stream = stream;
174
+ let recorder: StreamingRecordingHandle | null = null;
175
+ try {
176
+ recorder = await startStreamingRecording(samples => stream.pushAudio(samples));
177
+ } catch (err) {
178
+ logger.warn("STT streaming recorder failed to start; falling back to batch recording", {
179
+ error: err instanceof Error ? err.message : String(err),
180
+ });
181
+ }
182
+ if (!recorder) {
183
+ stream.cancel();
184
+ this.#cleanupStream();
185
+ await this.#startBatchRecording(options);
186
+ return;
187
+ }
188
+ this.#streamRecorder = recorder;
189
+ this.#setState("recording", options);
190
+ logger.debug("STT live recording started", { modelKey });
191
+ }
192
+
193
+ async #stopStreaming(options: ToggleOptions): Promise<void> {
194
+ const stream = this.#stream;
195
+ const recorder = this.#streamRecorder;
196
+ if (!stream) {
197
+ this.#setState("idle", options);
198
+ return;
199
+ }
200
+ this.#setState("transcribing", options);
201
+ // Stop the mic first so no further audio is fed, then flush the worker.
202
+ try {
203
+ await recorder?.stop();
204
+ } catch (err) {
205
+ logger.debug("stt: streaming recorder stop failed", {
206
+ error: err instanceof Error ? err.message : String(err),
207
+ });
208
+ }
209
+ this.#streamRecorder = null;
210
+
211
+ let failed = false;
212
+ let finalText = "";
213
+ try {
214
+ finalText = (await stream.stop()).trim();
215
+ } catch (err) {
216
+ failed = true;
217
+ if (!this.#disposed) {
218
+ const msg = err instanceof Error ? err.message : "Transcription failed";
72
219
  options.showWarning(msg);
73
- logger.error("STT dependency setup failed", { error: msg });
74
- return;
220
+ logger.error("STT live transcription failed", { error: msg });
75
221
  }
76
222
  }
223
+ if (this.#disposed) {
224
+ this.#cleanupStream();
225
+ return;
226
+ }
227
+ if (!this.#streamCommitted && finalText) {
228
+ this.#streamEditor?.commitVolatileText(this.#prefixed(finalText));
229
+ this.#streamCommitted = true;
230
+ } else {
231
+ this.#streamEditor?.clearVolatileText();
232
+ }
233
+ options.requestRender?.();
234
+ if (!failed) options.showStatus(this.#streamCommitted ? "" : "No speech detected.");
235
+ this.#cleanupStream();
236
+ this.#setState("idle", options);
237
+ }
238
+
239
+ #cleanupStream(): void {
240
+ this.#stream = null;
241
+ this.#streamRecorder = null;
242
+ this.#streamEditor = null;
243
+ this.#streamCommitted = false;
244
+ this.#streamAbort = null;
245
+ }
246
+
247
+ // ── Batch (single-shot) ─────────────────────────────────────────
248
+
249
+ async #startBatchRecording(options: ToggleOptions): Promise<void> {
77
250
  const id = Snowflake.next();
78
251
  this.#tempFile = path.join(os.tmpdir(), `omp-stt-${id}.wav`);
79
-
80
252
  try {
81
253
  this.#recordingHandle = await startRecording(this.#tempFile);
82
254
  this.#setState("recording", options);
@@ -89,7 +261,7 @@ export class STTController {
89
261
  }
90
262
  }
91
263
 
92
- async #stopAndTranscribe(editor: Editor, options: ToggleOptions): Promise<void> {
264
+ async #stopBatch(editor: Editor, options: ToggleOptions): Promise<void> {
93
265
  const handle = this.#recordingHandle;
94
266
  const tempFile = this.#tempFile;
95
267
  this.#recordingHandle = null;
@@ -146,6 +318,13 @@ export class STTController {
146
318
  this.#transcriptionAbort.abort();
147
319
  this.#transcriptionAbort = null;
148
320
  }
321
+ if (this.#streamAbort) {
322
+ this.#streamAbort.abort();
323
+ this.#streamAbort = null;
324
+ }
325
+ this.#stream?.cancel();
326
+ this.#streamRecorder?.stop().catch(() => {});
327
+ this.#cleanupStream();
149
328
  if (this.#recordingHandle) {
150
329
  this.#recordingHandle.stop().catch(() => {});
151
330
  this.#recordingHandle = null;