pi-voice-input 0.2.7 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -27,7 +27,7 @@ pi extension: extensions/voice-input.ts
27
27
  │ ├─ Linux preferred: pw-record
28
28
  │ ├─ Linux fallback: arecord
29
29
  │ └─ macOS: afrecord
30
- ├─ records 16 kHz mono 16-bit WAV
30
+ ├─ records a temporary 16 kHz mono 16-bit WAV
31
31
  ├─ parses the WAV container in TypeScript and extracts raw PCM
32
32
  ├─ sends PCM frames to the configured ASR provider via ws
33
33
  │ └─ current provider: VolcEngine /api/v3/sauc/bigmodel_nostream
@@ -134,7 +134,7 @@ Slash commands:
134
134
  /voice start # start recording
135
135
  /voice stop # stop, transcribe, insert text
136
136
  /voice toggle # start if idle, stop if recording
137
- /voice cancel # stop recording without transcribing
137
+ /voice cancel # stop recording and discard local audio without transcribing
138
138
  /voice status # show recorder state
139
139
  /voice config # show effective non-secret config and whether API key is detected
140
140
  /voice init # create or normalize ~/.pi/agent/voice-input.config.json
@@ -144,10 +144,12 @@ Slash commands:
144
144
 
145
145
  ## Notes
146
146
 
147
- - The extension uses post-recording WebSocket ASR: it records locally first, then sends the stopped recording in chunks. It is optimized for fast voice input, not live subtitles.
147
+ - The extension uses post-recording WebSocket ASR: it records locally to a per-run temporary WAV, sends the stopped recording in chunks, then deletes the temporary audio. It is optimized for fast voice input, not live subtitles.
148
148
  - The default ASR segment size is intentionally larger than realtime packet sizes because this workflow sends already-recorded audio.
149
149
  - The transcript is inserted into the editor only; it is not submitted automatically.
150
- - When `polishModel` is set, polishing uses the unsent editor draft and recent session messages as context, but outputs only the refined voice text. The final text is still pasted at the current cursor position without replacing the draft.
150
+ - Recorder stdout/stderr is not logged to disk, to avoid retaining potentially sensitive runtime data.
151
+ - On startup, legacy `~/.pi/agent/voice-input/recordings` and `~/.pi/agent/voice-input/logs` artifacts are cleaned up when they are not part of an active recording.
152
+ - When `polishModel` is set, polishing uses the unsent editor draft and recent session messages as context, but outputs only the refined voice text to insert at the current cursor. It must not reconstruct the full draft; the final text is pasted without replacing existing editor content.
151
153
  - While recording, the status line shows `● Mic on: [device name] — press Ctrl+Shift+R again to stop/transcribe` in the current theme accent color; no separate popup is shown when recording starts.
152
154
 
153
155
  ## Development
@@ -5,16 +5,17 @@ import { spawn, spawnSync } from "node:child_process";
5
5
  import { randomUUID } from "node:crypto";
6
6
  import {
7
7
  chmodSync,
8
- closeSync,
9
8
  existsSync,
10
9
  mkdirSync,
11
- openSync,
10
+ mkdtempSync,
12
11
  readFileSync,
12
+ readdirSync,
13
+ rmdirSync,
13
14
  statSync,
14
15
  unlinkSync,
15
16
  writeFileSync,
16
17
  } from "node:fs";
17
- import { homedir, platform } from "node:os";
18
+ import { homedir, platform, tmpdir } from "node:os";
18
19
  import path from "node:path";
19
20
  import { gzipSync, gunzipSync } from "node:zlib";
20
21
  import WebSocket from "ws";
@@ -23,18 +24,25 @@ const CONFIG_PATH = path.join(homedir(), ".pi", "agent", "voice-input.config.jso
23
24
  const VOLC_API_KEY_URL = "https://console.volcengine.com/speech/new/setting/apikeys?projectName=default";
24
25
  const DEFAULT_SHORTCUT = Key.ctrlShift("r");
25
26
  const DEFAULT_POSTPROCESS_MODEL = "";
26
- const POSTPROCESS_SYSTEM_PROMPT = `你是 pi 语音输入插件的语音识别后处理器。你的唯一任务是润色原始 ASR 文本,使其成为可直接提交给编码智能体的用户指令。
27
-
28
- 规则:
29
- - 只输出润色后的用户指令正文,不要输出解释、标题、前后缀、引号、代码围栏或寒暄。
30
- - 绝对不要回答、执行或解决用户语音中提出的问题;即使原始语音是问题,也只能把这个问题本身整理成清晰文本,不要给出答案、方案、代码或结论。
31
- - 以忠实保留用户信息为最高优先级。不要一味概括、压缩或简述;不要删除条件、约束、例子、数值、文件名、错误信息、多个请求、前后顺序或语气重点。
32
- - 结合上下文理解省略指代、当前任务、文件/项目名称和用户意图;上下文仅用于理解,不要重复上下文内容,除非原始语音明确要求引用或修改它。
33
- - 修正明显的语音识别错误、同音/近音错误、断句和标点错误;保留代码标识符、命令、路径、URL、模型名、包名和专有名词。
34
- - 如果用户口误后自我更正(例如“不是……是……”“不对……”“算了改成……”),只保留更正后的正确指令,删除错误说法和更正过程。
35
- - 让结果完整、符合逻辑、指令明确、有指导性;必要时拆成条目或步骤,但不得丢失原始信息。
36
- - 不要凭空添加原始语音没有表达的新需求;不确定时保留原意并用更清晰的措辞表达。
37
- - 输出语言必须跟随用户原始语音的主要语言,而不是上下文语言;不要因为上下文是中文/英文就把用户语音翻译成上下文语言。`;
27
+ const POSTPROCESS_SYSTEM_PROMPT = `You are the speech-recognition postprocessor for the pi voice input extension. Your only job is to polish the raw ASR text into text that the plugin can paste verbatim at the current cursor position in the pi editor.
28
+
29
+ Interaction contract:
30
+ - The plugin does not replace editor content with your output. It only pastes/inserts your output at the user's current cursor position.
31
+ - The current editor draft and recent conversation are context only. Use them to understand omitted references, the current task, file/project names, and intent. They are not text for you to rewrite and output as a whole.
32
+ - Do not output the draft, a context sentence, or a full sentence/paragraph that represents the draft after insertion. Doing so would duplicate existing editor content.
33
+ - You may not know the real cursor position. Do not guess the cursor location and synthesize a full surrounding sentence; the editor owns the real insertion point.
34
+ - If the raw speech is adding a few words, half a sentence, a phrase, a condition, or a modifier, output only those newly spoken words. Let the paste operation merge them with the existing draft.
35
+ - Only when the raw speech itself explicitly dictates a complete passage to insert may you output that complete passage. Even then, do not add draft text that the user did not speak.
36
+
37
+ Rules:
38
+ - Output only the polished insertion text. Do not output explanations, headings, prefixes, suffixes, quotes, code fences, or greetings.
39
+ - Never answer, execute, or solve anything asked in the user's speech. If the raw speech is a question, only clean up the question text itself; do not provide an answer, plan, code, or conclusion.
40
+ - Preserve the user's information faithfully. Do not over-summarize or compress. Do not delete constraints, examples, numbers, filenames, errors, multiple requests, ordering, or emphasis.
41
+ - Correct obvious ASR mistakes, homophones, segmentation, and punctuation. Preserve code identifiers, commands, paths, URLs, model names, package names, and proper nouns.
42
+ - If the user self-corrects, keep only the corrected intent and remove the false start, correction process, filler, and chatter. Do not lose any other substantive information.
43
+ - Make the output complete relative to the raw speech, logically clear, and actionable. Split into items or steps when helpful, but do not drop raw-speech information or repeat existing draft text.
44
+ - Do not invent requirements that the raw speech did not express. If uncertain, keep the original meaning and express it more clearly.
45
+ - The output language must match the primary language of the raw speech, not the context language and not this English prompt. Do not translate just because the instructions are in English.`;
38
46
 
39
47
  const MSG_TYPE_CLIENT_FULL_REQUEST = 0b0001;
40
48
  const MSG_TYPE_CLIENT_AUDIO_ONLY_REQUEST = 0b0010;
@@ -65,9 +73,7 @@ type VoiceConfig = {
65
73
  requestTimeoutMs: number;
66
74
  finalizeDelayMs: number;
67
75
  recorderTarget: string;
68
- recordingsDir: string;
69
76
  statePath: string;
70
- logDir: string;
71
77
  shortcut: string;
72
78
  enableItn: boolean;
73
79
  enablePunc: boolean;
@@ -83,7 +89,7 @@ type VoiceConfig = {
83
89
  type RecordingState = {
84
90
  pid: number;
85
91
  path: string;
86
- logPath: string;
92
+ logPath?: string;
87
93
  startedAt: string;
88
94
  recorderTarget?: string;
89
95
  deviceName?: string;
@@ -169,9 +175,7 @@ function getConfig(): VoiceConfig {
169
175
  requestTimeoutMs: 90000,
170
176
  finalizeDelayMs: 100,
171
177
  recorderTarget: "",
172
- recordingsDir: path.join(voiceHome, "recordings"),
173
178
  statePath: path.join(voiceHome, "recording.json"),
174
- logDir: path.join(voiceHome, "logs"),
175
179
  shortcut: DEFAULT_SHORTCUT,
176
180
  enableItn: true,
177
181
  enablePunc: true,
@@ -347,6 +351,85 @@ function clearState(config: VoiceConfig) {
347
351
  }
348
352
  }
349
353
 
354
+ function createRecordingPath(): string {
355
+ const dir = mkdtempSync(path.join(tmpdir(), "pi-voice-input-"));
356
+ chmodSync(dir, 0o700);
357
+ return path.join(dir, `recording-${timestampForFilename()}.wav`);
358
+ }
359
+
360
+ function deleteFileIfExists(filePath?: string): string | null {
361
+ if (!filePath) return null;
362
+ try {
363
+ unlinkSync(filePath);
364
+ return null;
365
+ } catch (error) {
366
+ if ((error as NodeJS.ErrnoException).code === "ENOENT") return null;
367
+ return `failed to delete ${filePath}: ${error instanceof Error ? error.message : String(error)}`;
368
+ }
369
+ }
370
+
371
+ function deleteTemporaryRecordingDir(filePath: string): string | null {
372
+ const dir = path.dirname(filePath);
373
+ const parent = path.dirname(dir);
374
+ if (path.resolve(parent) !== path.resolve(tmpdir()) || !path.basename(dir).startsWith("pi-voice-input-")) {
375
+ return null;
376
+ }
377
+
378
+ try {
379
+ rmdirSync(dir);
380
+ return null;
381
+ } catch (error) {
382
+ const code = (error as NodeJS.ErrnoException).code;
383
+ if (code === "ENOENT") return null;
384
+ return `failed to remove temporary directory ${dir}: ${error instanceof Error ? error.message : String(error)}`;
385
+ }
386
+ }
387
+
388
+ function cleanupRecordingArtifacts(state: Pick<RecordingState, "path" | "logPath">): string[] {
389
+ return [deleteFileIfExists(state.path), deleteFileIfExists(state.logPath), deleteTemporaryRecordingDir(state.path)].filter(
390
+ (message): message is string => Boolean(message),
391
+ );
392
+ }
393
+
394
+ function cleanupLegacyDirectory(dir: string, filePattern: RegExp, protectedPaths: Set<string>): string[] {
395
+ if (!existsSync(dir)) return [];
396
+ const warnings: string[] = [];
397
+
398
+ for (const entry of readdirSync(dir, { withFileTypes: true })) {
399
+ if (!entry.isFile() || !filePattern.test(entry.name)) continue;
400
+ const filePath = path.join(dir, entry.name);
401
+ if (protectedPaths.has(path.resolve(filePath))) continue;
402
+ const warning = deleteFileIfExists(filePath);
403
+ if (warning) warnings.push(warning);
404
+ }
405
+
406
+ try {
407
+ rmdirSync(dir);
408
+ } catch (error) {
409
+ const code = (error as NodeJS.ErrnoException).code;
410
+ if (code !== "ENOENT" && code !== "ENOTEMPTY") {
411
+ warnings.push(`failed to remove legacy directory ${dir}: ${error instanceof Error ? error.message : String(error)}`);
412
+ }
413
+ }
414
+
415
+ return warnings;
416
+ }
417
+
418
+ function cleanupLegacyStoredArtifacts(config: VoiceConfig): string[] {
419
+ const state = readState(config);
420
+ const protectedPaths = new Set<string>();
421
+ if (state && pidAlive(state.pid)) {
422
+ protectedPaths.add(path.resolve(state.path));
423
+ if (state.logPath) protectedPaths.add(path.resolve(state.logPath));
424
+ }
425
+
426
+ const voiceHome = path.dirname(config.statePath);
427
+ return [
428
+ ...cleanupLegacyDirectory(path.join(voiceHome, "recordings"), /^recording-.*\.wav$/, protectedPaths),
429
+ ...cleanupLegacyDirectory(path.join(voiceHome, "logs"), /^recording-.*\.log$/, protectedPaths),
430
+ ];
431
+ }
432
+
350
433
  function pidAlive(pid: number): boolean {
351
434
  try {
352
435
  process.kill(pid, 0);
@@ -820,30 +903,67 @@ function cleanPostprocessOutput(output: string): string {
820
903
  let text = output.trim();
821
904
  const fence = text.match(/^```[a-zA-Z0-9_-]*\s*\n([\s\S]*?)\n```$/);
822
905
  if (fence) text = fence[1].trim();
823
- text = text.replace(/^(?:优化后的(?:用户)?指令|整理后的(?:用户)?指令|改写后的(?:用户)?指令)\s*[::]\s*/u, "").trim();
906
+ text = text.replace(/^(?:polished(?: user)? instruction|refined(?: user)? instruction|rewritten(?: user)? instruction|final(?: insertion)? text)\s*:\s*/iu, "").trim();
824
907
  return text;
825
908
  }
826
909
 
910
+ function removeEditorDraftEcho(editorText: string, output: string): string {
911
+ const draft = editorText.trim();
912
+ const text = output.trim();
913
+ if (draft.length < 12 || text.length <= draft.length) return output;
914
+
915
+ let prefixLength = 0;
916
+ while (prefixLength < draft.length && prefixLength < text.length && draft[prefixLength] === text[prefixLength]) {
917
+ prefixLength += 1;
918
+ }
919
+
920
+ let suffixLength = 0;
921
+ while (
922
+ suffixLength < draft.length - prefixLength &&
923
+ suffixLength < text.length - prefixLength &&
924
+ draft[draft.length - 1 - suffixLength] === text[text.length - 1 - suffixLength]
925
+ ) {
926
+ suffixLength += 1;
927
+ }
928
+
929
+ if (prefixLength + suffixLength !== draft.length) return output;
930
+ const insertedText = text.slice(prefixLength, text.length - suffixLength).trim();
931
+ return insertedText || output;
932
+ }
933
+
934
+ function getFullEditorText(ctx: ExtensionContext): string {
935
+ try {
936
+ return ctx.ui.getEditorText();
937
+ } catch {
938
+ return "";
939
+ }
940
+ }
941
+
827
942
  function buildPostprocessPrompt(ctx: ExtensionContext, rawText: string, config: VoiceConfig): string {
828
943
  const contextBudget = config.postprocessContextChars;
829
944
  const editorContext = getEditorContext(ctx, Math.floor(contextBudget / 2));
830
945
  const sessionContext = getRecentSessionContext(ctx, Math.ceil(contextBudget / 2));
831
946
 
832
947
  return [
833
- "请根据上下文只润色下面的原始语音识别结果。",
834
- "如果上下文为空,直接依据原始文本润色。",
835
- "不要回答原始语音里的问题,也不要执行其中的请求;只输出原始语音对应的最终用户指令文本。",
836
- "输出语言必须跟随原始语音的主要语言,不要跟随上下文语言,也不要翻译成上下文语言。",
837
- "务必忠实保留原始语音中的信息和细节,不要为了简洁而概括、压缩或删减。",
838
- "当前输入框草稿只是上下文:语音文本会由插件插入到用户当前光标位置。不要重写、重复、补全、删除或替换草稿里的既有内容。",
948
+ "Polish only the raw ASR text below, using context only when it helps disambiguate the user's intent.",
949
+ "If context is empty or irrelevant, polish the raw text directly.",
950
+ "Do not answer the raw speech, and do not execute its request. Output only the final text that should be inserted into the editor.",
951
+ "The output language must match the primary language of the raw speech, not the context language and not this English prompt. Do not translate.",
952
+ "Faithfully preserve the information and details in the raw speech. Do not summarize, compress, or delete details merely for brevity.",
953
+ "IMPORTANT: your output will be pasted verbatim at the current cursor position. It is not a replacement and not a rewrite of the whole editor draft.",
954
+ "The current editor draft is context only. Do not rewrite, repeat, complete, delete, or replace existing draft text. Do not output the full sentence after insertion.",
955
+ "The true cursor position is not marked in the draft shown here; the pi editor owns the actual insertion point. Do not guess the cursor and synthesize a full surrounding sentence.",
956
+ "If the raw speech is an inline insertion, continuation, a few words, or a phrase, output only the newly spoken words or phrase.",
957
+ "Example: draft is `Please make this function async and [cursor].`, raw speech is `add error handling`, correct output is `add error handling`, not `Please make this function async and add error handling.`.",
958
+ "Example: draft is `This variable name is [cursor]unclear`, raw speech is `still`, correct output is `still`, not `This variable name is still unclear`.",
839
959
  "",
840
- "--- 上下文:当前输入框未发送草稿 ---",
841
- editorContext.trim() || "(空)",
960
+ "--- Context: current unsent editor draft (context only; do not output wholesale) ---",
961
+ editorContext.trim() || "(empty)",
842
962
  "",
843
- "--- 上下文:最近会话 ---",
844
- sessionContext || "(空)",
963
+ "--- Context: recent conversation ---",
964
+ sessionContext || "(empty)",
845
965
  "",
846
- "--- 原始语音识别结果 ---",
966
+ "--- Raw ASR text ---",
847
967
  rawText.trim(),
848
968
  ].join("\n");
849
969
  }
@@ -890,7 +1010,7 @@ async function postprocessTranscript(ctx: ExtensionContext, rawText: string, con
890
1010
  }
891
1011
 
892
1012
  const polished = cleanPostprocessOutput(extractAssistantText(response));
893
- return polished || rawText;
1013
+ return polished ? removeEditorDraftEcho(getFullEditorText(ctx), polished) : rawText;
894
1014
  }
895
1015
 
896
1016
  function insertIntoEditor(ctx: ExtensionContext, text: string) {
@@ -904,6 +1024,14 @@ async function isRecording(config: VoiceConfig): Promise<boolean> {
904
1024
  return Boolean(state && pidAlive(state.pid));
905
1025
  }
906
1026
 
1027
+ function cleanupStaleRecordingState(config: VoiceConfig): string[] {
1028
+ const state = readState(config);
1029
+ if (!state || pidAlive(state.pid)) return [];
1030
+ const cleanupWarnings = cleanupRecordingArtifacts(state);
1031
+ clearState(config);
1032
+ return cleanupWarnings;
1033
+ }
1034
+
907
1035
  function requireInteractiveUi(ctx: ExtensionContext, action: string): boolean {
908
1036
  if (ctx.hasUI) return true;
909
1037
  ctx.ui.notify(`Voice ${action} requires interactive pi UI. Use /voice config or /voice help for setup information.`, "error");
@@ -920,29 +1048,42 @@ async function startRecording(ctx: ExtensionContext) {
920
1048
  ctx.ui.setStatus("voice-input", ctx.ui.theme.fg("accent", recordingStatusText(deviceName)));
921
1049
  return;
922
1050
  }
923
- if (existing) clearState(config);
1051
+ if (existing) {
1052
+ const cleanupWarnings = cleanupRecordingArtifacts(existing);
1053
+ clearState(config);
1054
+ if (cleanupWarnings.length) ctx.ui.notify(`Voice input cleanup warning:\n${cleanupWarnings.join("\n")}`, "warning");
1055
+ }
924
1056
 
925
- ensureDir(config.recordingsDir);
926
- ensureDir(config.logDir);
927
- const outputPath = path.join(config.recordingsDir, `recording-${timestampForFilename()}.wav`);
928
- const logPath = path.join(config.logDir, `recording-${timestampForFilename()}.log`);
929
- const cmd = recorderCommand(config, outputPath);
1057
+ const outputPath = createRecordingPath();
1058
+ let cmd: string[];
1059
+ try {
1060
+ cmd = recorderCommand(config, outputPath);
1061
+ } catch (error) {
1062
+ cleanupRecordingArtifacts({ path: outputPath });
1063
+ throw error;
1064
+ }
930
1065
  const deviceName = recordingDeviceName(config, cmd[0]);
931
1066
 
932
1067
  ctx.ui.setStatus("voice-input", ctx.ui.theme.fg("warning", "● starting mic"));
933
- const logFd = openSync(logPath, "a");
934
- const child = spawn(cmd[0], cmd.slice(1), {
935
- detached: true,
936
- stdio: ["ignore", logFd, logFd],
937
- });
1068
+ let child: ReturnType<typeof spawn>;
1069
+ try {
1070
+ child = spawn(cmd[0], cmd.slice(1), {
1071
+ detached: true,
1072
+ stdio: ["ignore", "ignore", "ignore"],
1073
+ });
1074
+ } catch (error) {
1075
+ cleanupRecordingArtifacts({ path: outputPath });
1076
+ throw error;
1077
+ }
938
1078
  child.unref();
939
- closeSync(logFd);
940
1079
 
941
- if (!child.pid) throw new Error("Recorder failed to start: no pid returned");
1080
+ if (!child.pid) {
1081
+ cleanupRecordingArtifacts({ path: outputPath });
1082
+ throw new Error("Recorder failed to start: no pid returned");
1083
+ }
942
1084
  writeState(config, {
943
1085
  pid: child.pid,
944
1086
  path: outputPath,
945
- logPath,
946
1087
  startedAt: new Date().toISOString(),
947
1088
  recorderTarget: config.recorderTarget || undefined,
948
1089
  deviceName,
@@ -966,21 +1107,41 @@ async function stopRecording(ctx: ExtensionContext, transcribe = true) {
966
1107
  clearState(config);
967
1108
  if (config.finalizeDelayMs > 0) await sleep(config.finalizeDelayMs);
968
1109
 
969
- if (!existsSync(state.path) || statSync(state.path).size === 0) {
970
- const log = existsSync(state.logPath) ? readFileSync(state.logPath, "utf8") : "";
971
- throw new Error(`Recording file missing/empty: ${state.path}\nRecorder log:\n${log}`);
972
- }
973
-
974
1110
  if (!transcribe) {
1111
+ const cleanupWarnings = cleanupRecordingArtifacts(state);
975
1112
  ctx.ui.setStatus("voice-input", undefined);
976
- ctx.ui.notify(`Voice recording stopped: ${state.path}`, "info");
1113
+ ctx.ui.notify(
1114
+ cleanupWarnings.length
1115
+ ? `Voice recording cancelled; local audio discard attempted, but cleanup had warnings:\n${cleanupWarnings.join("\n")}`
1116
+ : "Voice recording cancelled; local audio discarded.",
1117
+ cleanupWarnings.length ? "warning" : "info",
1118
+ );
977
1119
  return;
978
1120
  }
979
1121
 
1122
+ if (!existsSync(state.path) || statSync(state.path).size === 0) {
1123
+ const cleanupWarnings = cleanupRecordingArtifacts(state);
1124
+ throw new Error(
1125
+ `Recording file missing/empty: ${state.path}. Recorder output is not persisted for privacy.${
1126
+ cleanupWarnings.length ? `\nCleanup warnings:\n${cleanupWarnings.join("\n")}` : ""
1127
+ }`,
1128
+ );
1129
+ }
1130
+
1131
+ let decodeMs = 0;
1132
+ let durationMs = 0;
1133
+ let result: TranscriptionResult | undefined;
980
1134
  const decodeStart = Date.now();
981
- const { pcm, durationMs } = parseRecordedWav(state.path);
982
- const decodeMs = Date.now() - decodeStart;
983
- const result = await transcribePcm(pcm, durationMs, config);
1135
+ try {
1136
+ const recording = parseRecordedWav(state.path);
1137
+ durationMs = recording.durationMs;
1138
+ decodeMs = Date.now() - decodeStart;
1139
+ result = await transcribePcm(recording.pcm, recording.durationMs, config);
1140
+ } finally {
1141
+ const cleanupWarnings = cleanupRecordingArtifacts(state);
1142
+ if (cleanupWarnings.length) ctx.ui.notify(`Voice input cleanup warning:\n${cleanupWarnings.join("\n")}`, "warning");
1143
+ }
1144
+ if (!result) throw new Error("Transcription failed before a result was produced");
984
1145
 
985
1146
  if (!result.text.trim()) {
986
1147
  ctx.ui.setStatus("voice-input", undefined);
@@ -1148,7 +1309,14 @@ export default function (pi: ExtensionAPI) {
1148
1309
  });
1149
1310
 
1150
1311
  pi.on("session_start", (_event, ctx) => {
1151
- if (getConfig().apiKey) {
1312
+ const currentConfig = getConfig();
1313
+ const cleanupWarnings = [
1314
+ ...cleanupStaleRecordingState(currentConfig),
1315
+ ...cleanupLegacyStoredArtifacts(currentConfig),
1316
+ ];
1317
+ if (cleanupWarnings.length) ctx.ui.notify(`Voice input cleanup warning:\n${cleanupWarnings.join("\n")}`, "warning");
1318
+
1319
+ if (currentConfig.apiKey) {
1152
1320
  ctx.ui.notify(`Voice input loaded: ${startupConfig.shortcut} toggles recording.`, "info");
1153
1321
  return;
1154
1322
  }
@@ -1156,7 +1324,7 @@ export default function (pi: ExtensionAPI) {
1156
1324
  [
1157
1325
  `Voice input loaded: ${startupConfig.shortcut} toggles recording.`,
1158
1326
  "API key is missing. Run /voice key to set it up, or edit the JSON config file.",
1159
- `Config file: ${startupConfig.configPath}`,
1327
+ `Config file: ${currentConfig.configPath}`,
1160
1328
  `Get/create a VolcEngine Speech API key here: ${VOLC_API_KEY_URL}`,
1161
1329
  ].join("\n"),
1162
1330
  "warning",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-voice-input",
3
- "version": "0.2.7",
3
+ "version": "0.2.8",
4
4
  "description": "Press Ctrl+Shift+R to dictate prompts into Pi using VolcEngine ASR",
5
5
  "type": "module",
6
6
  "keywords": [