@juicesharp/rpiv-voice 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +45 -0
- package/LICENSE +21 -0
- package/README.md +116 -0
- package/audio/error-log.ts +37 -0
- package/audio/hallucination-filter.ts +71 -0
- package/audio/mic-source.ts +38 -0
- package/audio/model-download.ts +268 -0
- package/audio/pcm.ts +45 -0
- package/audio/sherpa-onnx-node.d.ts +55 -0
- package/audio/stt-engine.ts +117 -0
- package/command/pipeline-runner.ts +238 -0
- package/command/splash-runner.ts +72 -0
- package/command/voice-command.ts +251 -0
- package/config/voice-config.ts +80 -0
- package/docs/cover.png +0 -0
- package/docs/cover.svg +173 -0
- package/docs/equalizer.svg +86 -0
- package/docs/overlay.jpg +0 -0
- package/docs/overlay.png +0 -0
- package/docs/vertical-cover.png +0 -0
- package/docs/vertical-cover.svg +239 -0
- package/index.ts +66 -0
- package/locales/de.json +39 -0
- package/locales/en.json +42 -0
- package/locales/es.json +39 -0
- package/locales/fr.json +39 -0
- package/locales/pt-BR.json +39 -0
- package/locales/pt.json +39 -0
- package/locales/ru.json +39 -0
- package/locales/uk.json +39 -0
- package/package.json +94 -0
- package/state/i18n-bridge.ts +51 -0
- package/state/key-router.ts +46 -0
- package/state/screen-intent.ts +27 -0
- package/state/selectors/contract.ts +13 -0
- package/state/selectors/derivations.ts +9 -0
- package/state/selectors/focus.ts +6 -0
- package/state/selectors/projections.ts +112 -0
- package/state/state-reducer.ts +197 -0
- package/state/state.ts +48 -0
- package/state/status-intent.ts +23 -0
- package/state/voice-session.ts +176 -0
- package/view/component-binding.ts +24 -0
- package/view/components/equalizer-view.ts +237 -0
- package/view/components/settings-field-view.ts +77 -0
- package/view/components/settings-form-view.ts +26 -0
- package/view/components/splash-view.ts +98 -0
- package/view/components/status-bar-view.ts +112 -0
- package/view/components/transcript-view.ts +50 -0
- package/view/overlay-view.ts +82 -0
- package/view/props-adapter.ts +29 -0
- package/view/screen-content-strategy.ts +58 -0
- package/view/stateful-view.ts +7 -0
package/package.json
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@juicesharp/rpiv-voice",
|
|
3
|
+
"version": "1.4.2",
|
|
4
|
+
"private": false,
|
|
5
|
+
"description": "Pi extension. Voice dictation via /voice — local on-device STT with sherpa-onnx Whisper (base multilingual int8), microphone capture via decibri.",
|
|
6
|
+
"keywords": [
|
|
7
|
+
"pi-package",
|
|
8
|
+
"pi-extension",
|
|
9
|
+
"rpiv",
|
|
10
|
+
"voice",
|
|
11
|
+
"dictation",
|
|
12
|
+
"stt",
|
|
13
|
+
"sherpa-onnx",
|
|
14
|
+
"overlay"
|
|
15
|
+
],
|
|
16
|
+
"type": "module",
|
|
17
|
+
"license": "MIT",
|
|
18
|
+
"author": "juicesharp",
|
|
19
|
+
"repository": {
|
|
20
|
+
"type": "git",
|
|
21
|
+
"url": "git+https://github.com/juicesharp/rpiv-mono.git",
|
|
22
|
+
"directory": "packages/rpiv-voice"
|
|
23
|
+
},
|
|
24
|
+
"homepage": "https://github.com/juicesharp/rpiv-mono/tree/main/packages/rpiv-voice#readme",
|
|
25
|
+
"bugs": {
|
|
26
|
+
"url": "https://github.com/juicesharp/rpiv-mono/issues"
|
|
27
|
+
},
|
|
28
|
+
"publishConfig": {
|
|
29
|
+
"access": "public"
|
|
30
|
+
},
|
|
31
|
+
"scripts": {
|
|
32
|
+
"test": "vitest run"
|
|
33
|
+
},
|
|
34
|
+
"files": [
|
|
35
|
+
"index.ts",
|
|
36
|
+
"audio/error-log.ts",
|
|
37
|
+
"audio/hallucination-filter.ts",
|
|
38
|
+
"audio/mic-source.ts",
|
|
39
|
+
"audio/model-download.ts",
|
|
40
|
+
"audio/pcm.ts",
|
|
41
|
+
"audio/sherpa-onnx-node.d.ts",
|
|
42
|
+
"audio/stt-engine.ts",
|
|
43
|
+
"command/pipeline-runner.ts",
|
|
44
|
+
"command/splash-runner.ts",
|
|
45
|
+
"command/voice-command.ts",
|
|
46
|
+
"config/voice-config.ts",
|
|
47
|
+
"state/i18n-bridge.ts",
|
|
48
|
+
"state/key-router.ts",
|
|
49
|
+
"state/screen-intent.ts",
|
|
50
|
+
"state/selectors/contract.ts",
|
|
51
|
+
"state/selectors/derivations.ts",
|
|
52
|
+
"state/selectors/focus.ts",
|
|
53
|
+
"state/selectors/projections.ts",
|
|
54
|
+
"state/state-reducer.ts",
|
|
55
|
+
"state/state.ts",
|
|
56
|
+
"state/status-intent.ts",
|
|
57
|
+
"state/voice-session.ts",
|
|
58
|
+
"view/component-binding.ts",
|
|
59
|
+
"view/components/equalizer-view.ts",
|
|
60
|
+
"view/components/settings-field-view.ts",
|
|
61
|
+
"view/components/settings-form-view.ts",
|
|
62
|
+
"view/components/splash-view.ts",
|
|
63
|
+
"view/components/status-bar-view.ts",
|
|
64
|
+
"view/components/transcript-view.ts",
|
|
65
|
+
"view/overlay-view.ts",
|
|
66
|
+
"view/props-adapter.ts",
|
|
67
|
+
"view/screen-content-strategy.ts",
|
|
68
|
+
"view/stateful-view.ts",
|
|
69
|
+
"locales/",
|
|
70
|
+
"docs/",
|
|
71
|
+
"README.md",
|
|
72
|
+
"CHANGELOG.md",
|
|
73
|
+
"LICENSE"
|
|
74
|
+
],
|
|
75
|
+
"pi": {
|
|
76
|
+
"extensions": [
|
|
77
|
+
"./index.ts"
|
|
78
|
+
]
|
|
79
|
+
},
|
|
80
|
+
"dependencies": {
|
|
81
|
+
"sherpa-onnx-node": "^1.13.0",
|
|
82
|
+
"decibri": "^3.4.0"
|
|
83
|
+
},
|
|
84
|
+
"peerDependencies": {
|
|
85
|
+
"@earendil-works/pi-coding-agent": "*",
|
|
86
|
+
"@earendil-works/pi-tui": "*",
|
|
87
|
+
"@juicesharp/rpiv-i18n": "*"
|
|
88
|
+
},
|
|
89
|
+
"peerDependenciesMeta": {
|
|
90
|
+
"@juicesharp/rpiv-i18n": {
|
|
91
|
+
"optional": true
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* i18n bridge for rpiv-voice — single thin import surface so every translation
|
|
3
|
+
* call site routes through one place. Backed by `@juicesharp/rpiv-i18n`'s SDK
|
|
4
|
+
* when available; degrades to canonical-English fallbacks when not.
|
|
5
|
+
*
|
|
6
|
+
* - `t(key, fallback)` is `scope("@juicesharp/rpiv-voice")` if the SDK is
|
|
7
|
+
* installed (live `/languages` updates propagate). If the SDK is missing
|
|
8
|
+
* (standalone install without rpiv-i18n), `t` is an identity passthrough
|
|
9
|
+
* that returns the inline English fallback at every call site, so the
|
|
10
|
+
* extension stays online with English UI.
|
|
11
|
+
* - `getActiveLocale()` exposes the current locale string ("en", "ru", …) so
|
|
12
|
+
* the STT engine can pre-set Whisper's `language` field for accuracy gains.
|
|
13
|
+
* Returns `undefined` if rpiv-i18n isn't installed or no locale is active —
|
|
14
|
+
* in which case Whisper falls back to its built-in auto-detect.
|
|
15
|
+
*
|
|
16
|
+
* Strings are registered ONCE at extension load (see ../index.ts). Call sites
|
|
17
|
+
* MUST use this module at render time — never bake the result into a top-level
|
|
18
|
+
* `const X = t(...)`.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
export const I18N_NAMESPACE = "@juicesharp/rpiv-voice";
|
|
22
|
+
|
|
23
|
+
type ScopeFn = (key: string, fallback: string) => string;
|
|
24
|
+
type LocaleFn = () => string | undefined;
|
|
25
|
+
type I18nSDK = {
|
|
26
|
+
scope: (namespace: string) => ScopeFn;
|
|
27
|
+
getActiveLocale: LocaleFn;
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
// Prefer the live SDK if installed: closures it returns track the active
|
|
31
|
+
// locale, so /languages picker propagates to our render call sites. If the
|
|
32
|
+
// SDK isn't installed (standalone install of this extension without
|
|
33
|
+
// rpiv-i18n), the dynamic import fails, every t(key, fallback) returns the
|
|
34
|
+
// canonical English literal, getActiveLocale returns undefined, and the
|
|
35
|
+
// extension stays online with English UI + Whisper auto-detect.
|
|
36
|
+
//
|
|
37
|
+
// Top-level await is required so a synchronous call sees the resolved scope;
|
|
38
|
+
// ESM module loading awaits this before evaluating any importer.
|
|
39
|
+
let scopeImpl: ScopeFn;
|
|
40
|
+
let activeLocaleImpl: LocaleFn;
|
|
41
|
+
try {
|
|
42
|
+
const sdk = (await import("@juicesharp/rpiv-i18n")) as I18nSDK;
|
|
43
|
+
scopeImpl = sdk.scope(I18N_NAMESPACE);
|
|
44
|
+
activeLocaleImpl = sdk.getActiveLocale;
|
|
45
|
+
} catch {
|
|
46
|
+
scopeImpl = (_key, fallback) => fallback;
|
|
47
|
+
activeLocaleImpl = () => undefined;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export const t: ScopeFn = scopeImpl;
|
|
51
|
+
export const getActiveLocale: LocaleFn = activeLocaleImpl;
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { Key, matchesKey } from "@earendil-works/pi-tui";
|
|
2
|
+
import type { VoiceRuntime, VoiceState } from "./state.js";
|
|
3
|
+
|
|
4
|
+
const KEYBIND_CONFIRM = "tui.select.confirm";
|
|
5
|
+
// Mirrors the rpiv-ask-user-question peer pattern — cancel goes through the
|
|
6
|
+
// user's configurable keybinding (defaults to Esc) instead of a hardcoded
|
|
7
|
+
// `Key.escape`. Users who remap Esc still get a working cancel.
|
|
8
|
+
const KEYBIND_CANCEL = "tui.select.cancel";
|
|
9
|
+
|
|
10
|
+
const SPACE_KEY = " ";
|
|
11
|
+
const CTRL_S = "\x13";
|
|
12
|
+
|
|
13
|
+
export type VoiceAction =
|
|
14
|
+
| { kind: "audio_chunk"; level: number }
|
|
15
|
+
| { kind: "audio_transcript_appended"; text: string }
|
|
16
|
+
| { kind: "audio_partial_transcript_set"; text: string }
|
|
17
|
+
| { kind: "toggle_pause" }
|
|
18
|
+
| { kind: "commit" }
|
|
19
|
+
| { kind: "cancel" }
|
|
20
|
+
| { kind: "open_settings" }
|
|
21
|
+
| { kind: "close_settings" }
|
|
22
|
+
| { kind: "toggle_focused_setting" }
|
|
23
|
+
| { kind: "focus_settings_next" }
|
|
24
|
+
| { kind: "focus_settings_prev" }
|
|
25
|
+
| { kind: "settings_save" }
|
|
26
|
+
| { kind: "ignore" };
|
|
27
|
+
|
|
28
|
+
export function routeKey(data: string, state: VoiceState, runtime: VoiceRuntime): VoiceAction {
|
|
29
|
+
const kb = runtime.keybindings;
|
|
30
|
+
|
|
31
|
+
if (state.currentScreen === "settings") {
|
|
32
|
+
if (data === CTRL_S) return { kind: "settings_save" };
|
|
33
|
+
if (kb.matches(data, KEYBIND_CANCEL)) return { kind: "close_settings" };
|
|
34
|
+
if (matchesKey(data, Key.tab)) return { kind: "close_settings" };
|
|
35
|
+
if (matchesKey(data, Key.up)) return { kind: "focus_settings_prev" };
|
|
36
|
+
if (matchesKey(data, Key.down)) return { kind: "focus_settings_next" };
|
|
37
|
+
if (kb.matches(data, KEYBIND_CONFIRM)) return { kind: "toggle_focused_setting" };
|
|
38
|
+
return { kind: "ignore" };
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (kb.matches(data, KEYBIND_CANCEL)) return { kind: "cancel" };
|
|
42
|
+
if (kb.matches(data, KEYBIND_CONFIRM)) return { kind: "commit" };
|
|
43
|
+
if (matchesKey(data, Key.tab)) return { kind: "open_settings" };
|
|
44
|
+
if (data === SPACE_KEY) return { kind: "toggle_pause" };
|
|
45
|
+
return { kind: "ignore" };
|
|
46
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { ScreenKind } from "./state.js";
|
|
2
|
+
|
|
3
|
+
export type FooterHintKey =
|
|
4
|
+
| "enter_paste"
|
|
5
|
+
| "space_pause"
|
|
6
|
+
| "tab_settings"
|
|
7
|
+
| "esc_cancel"
|
|
8
|
+
| "esc_back"
|
|
9
|
+
| "ctrl_s_save"
|
|
10
|
+
| "enter_toggle"
|
|
11
|
+
| "up_down_select";
|
|
12
|
+
|
|
13
|
+
export interface ScreenIntentMeta {
|
|
14
|
+
label: string;
|
|
15
|
+
footerHints: readonly FooterHintKey[];
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export const SCREEN_META: Record<ScreenKind, ScreenIntentMeta> = {
|
|
19
|
+
dictation: {
|
|
20
|
+
label: "Dictation",
|
|
21
|
+
footerHints: ["enter_paste", "space_pause", "tab_settings", "esc_cancel"],
|
|
22
|
+
},
|
|
23
|
+
settings: {
|
|
24
|
+
label: "Settings",
|
|
25
|
+
footerHints: ["up_down_select", "enter_toggle", "ctrl_s_save", "esc_back"],
|
|
26
|
+
},
|
|
27
|
+
};
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { ActiveView } from "../../view/stateful-view.js";
|
|
2
|
+
import type { ScreenKind, VoiceState } from "../state.js";
|
|
3
|
+
|
|
4
|
+
export interface BindingContext {
|
|
5
|
+
readonly activeView: ActiveView;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export interface PerScreenBindingContext extends BindingContext {
|
|
9
|
+
readonly kind: ScreenKind;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export type GlobalSelector<P> = (state: VoiceState, ctx: BindingContext) => P;
|
|
13
|
+
export type PerScreenSelector<P> = (state: VoiceState, ctx: PerScreenBindingContext) => P;
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
const MIN_RENDER_ROWS = 4;
|
|
2
|
+
const MAX_HEIGHT_RATIO = 0.85;
|
|
3
|
+
|
|
4
|
+
// Top-clip so banner + latest transcript + footer stay visible at the bottom.
|
|
5
|
+
export function clipToTerminalHeight(lines: readonly string[], terminalRows: number): string[] {
|
|
6
|
+
const maxRows = Math.max(MIN_RENDER_ROWS, Math.floor(terminalRows * MAX_HEIGHT_RATIO));
|
|
7
|
+
if (lines.length <= maxRows) return [...lines];
|
|
8
|
+
return lines.slice(lines.length - maxRows);
|
|
9
|
+
}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import type { EqualizerViewProps } from "../../view/components/equalizer-view.js";
|
|
2
|
+
import type { SettingsFieldViewProps } from "../../view/components/settings-field-view.js";
|
|
3
|
+
import type { StatusBarViewProps } from "../../view/components/status-bar-view.js";
|
|
4
|
+
import type { TranscriptViewProps } from "../../view/components/transcript-view.js";
|
|
5
|
+
import { getActiveLocale, t } from "../i18n-bridge.js";
|
|
6
|
+
import type { FooterHintKey } from "../screen-intent.js";
|
|
7
|
+
import { SCREEN_META } from "../screen-intent.js";
|
|
8
|
+
import type { GlobalSelector } from "./contract.js";
|
|
9
|
+
|
|
10
|
+
// Locales the bundled Whisper base multilingual model recognizes well. Mirror
|
|
11
|
+
// of WHISPER_SUPPORTED_LANGUAGES in voice-command.ts — duplicated rather than
|
|
12
|
+
// imported to keep selectors free of command-layer deps. Codes outside this
|
|
13
|
+
// set fall through to the auto-detect display.
|
|
14
|
+
//
|
|
15
|
+
// Endonyms (self-names) so the same string reads naturally regardless of the
|
|
16
|
+
// active UI locale: a Russian user sees `Русский`, an English user sees the
|
|
17
|
+
// same `Русский` — always self-recognizable, no translation matrix needed.
|
|
18
|
+
const LANGUAGE_DISPLAY_BY_CODE: Record<string, string> = {
|
|
19
|
+
de: "Deutsch",
|
|
20
|
+
en: "English",
|
|
21
|
+
es: "Español",
|
|
22
|
+
fr: "Français",
|
|
23
|
+
it: "Italiano",
|
|
24
|
+
ja: "日本語",
|
|
25
|
+
pt: "Português",
|
|
26
|
+
ru: "Русский",
|
|
27
|
+
uk: "Українська",
|
|
28
|
+
zh: "中文",
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
// Selector functions resolve i18n at projection time so /languages flips
|
|
32
|
+
// propagate without re-instantiating the views.
|
|
33
|
+
function hintLabel(key: FooterHintKey, status: { status: string }): string {
|
|
34
|
+
switch (key) {
|
|
35
|
+
case "enter_paste":
|
|
36
|
+
return t("footer.enter_paste", "Enter to paste");
|
|
37
|
+
case "space_pause":
|
|
38
|
+
return status.status === "paused"
|
|
39
|
+
? t("footer.space_resume", "Space to resume")
|
|
40
|
+
: t("footer.space_pause", "Space to pause");
|
|
41
|
+
case "tab_settings":
|
|
42
|
+
return t("footer.tab_settings", "Tab for settings");
|
|
43
|
+
case "esc_cancel":
|
|
44
|
+
return t("footer.esc_cancel", "Esc to cancel");
|
|
45
|
+
case "esc_back":
|
|
46
|
+
return t("footer.esc_back", "Esc to go back");
|
|
47
|
+
case "ctrl_s_save":
|
|
48
|
+
return t("footer.ctrl_s_save", "Ctrl-S to save");
|
|
49
|
+
case "enter_toggle":
|
|
50
|
+
return t("footer.enter_toggle", "Enter to toggle");
|
|
51
|
+
case "up_down_select":
|
|
52
|
+
return t("footer.up_down_select", "↑↓ to select");
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export const selectStatusBarProps: GlobalSelector<StatusBarViewProps> = (state, _ctx) => {
|
|
57
|
+
const meta = SCREEN_META[state.currentScreen];
|
|
58
|
+
const hints: string[] = [];
|
|
59
|
+
for (const key of meta.footerHints) hints.push(hintLabel(key, state));
|
|
60
|
+
return { status: state.status, hints };
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
export const selectTranscriptProps: GlobalSelector<TranscriptViewProps> = (state, _ctx) => ({
|
|
64
|
+
text: state.transcript,
|
|
65
|
+
partial: state.partialTranscript,
|
|
66
|
+
placeholder: t("transcript.placeholder", "Listening..."),
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
export const selectEqualizerProps: GlobalSelector<EqualizerViewProps> = (state, _ctx) => ({
|
|
70
|
+
level: state.audioLevel,
|
|
71
|
+
status: state.status,
|
|
72
|
+
enabled: state.settingsDraft.equalizerEnabled,
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
// `active` reflects the focused settings field — only one toggle wears the
|
|
76
|
+
// pointer at a time. `hint` stays present so the settings body height is
|
|
77
|
+
// stable across focus changes (the field hides its own hint when not active,
|
|
78
|
+
// per settings-field-view's render gate).
|
|
79
|
+
export const selectHallucinationFilterFieldProps: GlobalSelector<SettingsFieldViewProps> = (state, _ctx) => ({
|
|
80
|
+
label: t("settings.hallucination_filter_label", "Filter Whisper noise"),
|
|
81
|
+
active: state.settingsFocus === "hallucination",
|
|
82
|
+
field: { kind: "toggle", enabled: state.settingsDraft.hallucinationFilterEnabled },
|
|
83
|
+
hint: t(
|
|
84
|
+
"settings.hallucination_filter_hint",
|
|
85
|
+
"Drops silence-segment artifacts. Turn off for single-word dictation.",
|
|
86
|
+
),
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
export const selectEqualizerFieldProps: GlobalSelector<SettingsFieldViewProps> = (state, _ctx) => ({
|
|
90
|
+
label: t("settings.equalizer_label", "Equalizer"),
|
|
91
|
+
active: state.settingsFocus === "equalizer",
|
|
92
|
+
field: { kind: "toggle", enabled: state.settingsDraft.equalizerEnabled },
|
|
93
|
+
hint: t("settings.equalizer_hint", "Show the live audio waveform under the transcript. Off by default."),
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
export const selectMicReadonlyFieldProps: GlobalSelector<SettingsFieldViewProps> = (_state, _ctx) => ({
|
|
97
|
+
label: t("settings.microphone_label", "Microphone"),
|
|
98
|
+
active: false,
|
|
99
|
+
field: { kind: "readonly", value: t("settings.microphone_value_default", "System default input") },
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
export const selectLanguageReadonlyFieldProps: GlobalSelector<SettingsFieldViewProps> = (_state, _ctx) => {
|
|
103
|
+
const locale = getActiveLocale();
|
|
104
|
+
const base = locale ? (locale.split("-")[0] ?? locale) : undefined;
|
|
105
|
+
const display = base ? (LANGUAGE_DISPLAY_BY_CODE[base] ?? base) : t("settings.language_value_auto", "Auto-detect");
|
|
106
|
+
return {
|
|
107
|
+
label: t("settings.language_label", "Language"),
|
|
108
|
+
active: false,
|
|
109
|
+
field: { kind: "readonly", value: display },
|
|
110
|
+
hint: t("settings.language_hint", "Run /languages to change."),
|
|
111
|
+
};
|
|
112
|
+
};
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
import { isEqualizerEnabled, isHallucinationFilterEnabled, type VoiceConfig } from "../config/voice-config.js";
|
|
2
|
+
import { t } from "./i18n-bridge.js";
|
|
3
|
+
import type { VoiceAction } from "./key-router.js";
|
|
4
|
+
import { SETTINGS_FIELD_ORDER, type SettingsDraft, type SettingsFieldKey, type VoiceState } from "./state.js";
|
|
5
|
+
import { STATUS_META } from "./status-intent.js";
|
|
6
|
+
|
|
7
|
+
export interface ApplyContext {
|
|
8
|
+
persistedConfig: VoiceConfig;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export type Effect =
|
|
12
|
+
| { kind: "request_render" }
|
|
13
|
+
| { kind: "paste_to_editor"; text: string }
|
|
14
|
+
| { kind: "notify"; level: "error" | "info"; message: string }
|
|
15
|
+
| { kind: "abort_session" }
|
|
16
|
+
| { kind: "stop_mic" }
|
|
17
|
+
| { kind: "set_pipeline_paused"; paused: boolean }
|
|
18
|
+
| { kind: "set_hallucination_filter"; enabled: boolean }
|
|
19
|
+
| { kind: "save_config"; config: VoiceConfig }
|
|
20
|
+
| { kind: "done"; result: VoiceResult };
|
|
21
|
+
|
|
22
|
+
export interface VoiceResult {
|
|
23
|
+
intent: "commit" | "cancel";
|
|
24
|
+
transcript: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export interface ApplyResult {
|
|
28
|
+
state: VoiceState;
|
|
29
|
+
effects: readonly Effect[];
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
type Handler<K extends VoiceAction["kind"]> = (
|
|
33
|
+
state: VoiceState,
|
|
34
|
+
action: Extract<VoiceAction, { kind: K }>,
|
|
35
|
+
ctx: ApplyContext,
|
|
36
|
+
) => ApplyResult;
|
|
37
|
+
|
|
38
|
+
const audioChunk: Handler<"audio_chunk"> = (state, action, _ctx) => {
|
|
39
|
+
if (state.audioLevel === action.level) return { state, effects: [] };
|
|
40
|
+
return { state: { ...state, audioLevel: action.level }, effects: [{ kind: "request_render" }] };
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
// Final commit. Appends to the persisted transcript and clears the in-progress
|
|
44
|
+
// partial — rolling re-decodes get superseded by the authoritative final.
|
|
45
|
+
const audioTranscriptAppended: Handler<"audio_transcript_appended"> = (state, action, _ctx) => {
|
|
46
|
+
if (action.text.length === 0) {
|
|
47
|
+
// Nothing to append, but a finalization still ends the partial.
|
|
48
|
+
if (!state.partialTranscript) return { state, effects: [] };
|
|
49
|
+
return { state: { ...state, partialTranscript: "" }, effects: [{ kind: "request_render" }] };
|
|
50
|
+
}
|
|
51
|
+
const next = state.transcript ? `${state.transcript} ${action.text}` : action.text;
|
|
52
|
+
return {
|
|
53
|
+
state: { ...state, transcript: next, partialTranscript: "" },
|
|
54
|
+
effects: [{ kind: "request_render" }],
|
|
55
|
+
};
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
// Replaces the partial wholesale — each rolling decode produces a fresh
|
|
59
|
+
// reading of the active utterance.
|
|
60
|
+
const audioPartialTranscriptSet: Handler<"audio_partial_transcript_set"> = (state, action, _ctx) => {
|
|
61
|
+
if (state.partialTranscript === action.text) return { state, effects: [] };
|
|
62
|
+
return { state: { ...state, partialTranscript: action.text }, effects: [{ kind: "request_render" }] };
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
const togglePause: Handler<"toggle_pause"> = (state, _action, _ctx) => {
|
|
66
|
+
const nextStatus = state.status === "paused" ? "recording" : "paused";
|
|
67
|
+
return {
|
|
68
|
+
state: { ...state, status: nextStatus },
|
|
69
|
+
effects: [
|
|
70
|
+
{ kind: "set_pipeline_paused", paused: STATUS_META[nextStatus].gatesSttPipeline },
|
|
71
|
+
{ kind: "request_render" },
|
|
72
|
+
],
|
|
73
|
+
};
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
// Include the in-flight partial when committing — what the user sees on
|
|
77
|
+
// screen (committed + grayed preview) is what they get pasted. Waiting for a
|
|
78
|
+
// "proper" final decode here would add 0.5–2 s of latency between Enter and
|
|
79
|
+
// paste; the visible partial is already a complete utterance from Whisper's
|
|
80
|
+
// perspective, just one without the trailing-silence padding.
|
|
81
|
+
const commit: Handler<"commit"> = (state, _action, _ctx) => {
|
|
82
|
+
const merged = state.partialTranscript
|
|
83
|
+
? state.transcript
|
|
84
|
+
? `${state.transcript} ${state.partialTranscript}`
|
|
85
|
+
: state.partialTranscript
|
|
86
|
+
: state.transcript;
|
|
87
|
+
return {
|
|
88
|
+
state,
|
|
89
|
+
effects: [{ kind: "done", result: { intent: "commit", transcript: merged } }],
|
|
90
|
+
};
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
const cancel: Handler<"cancel"> = (state, _action, _ctx) => ({
|
|
94
|
+
state,
|
|
95
|
+
effects: [{ kind: "abort_session" }, { kind: "done", result: { intent: "cancel", transcript: "" } }],
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
const openSettings: Handler<"open_settings"> = (state, _action, _ctx) => ({
|
|
99
|
+
state: { ...state, currentScreen: "settings" },
|
|
100
|
+
effects: [{ kind: "request_render" }],
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
// Leaving the settings screen silently persists the draft. Ctrl-S remains the
|
|
104
|
+
// explicit save path (with a confirmation notify); this just makes the common
|
|
105
|
+
// "toggle then Esc/Tab" flow not lose changes.
|
|
106
|
+
const closeSettings: Handler<"close_settings"> = (state, _action, _ctx) => ({
|
|
107
|
+
state: { ...state, currentScreen: "dictation" },
|
|
108
|
+
effects: [{ kind: "save_config", config: configFromDraft(state.settingsDraft) }, { kind: "request_render" }],
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
const toggleFocusedSetting: Handler<"toggle_focused_setting"> = (state, _action, _ctx) => {
|
|
112
|
+
const focus = state.settingsFocus;
|
|
113
|
+
if (focus === "hallucination") {
|
|
114
|
+
const enabled = !state.settingsDraft.hallucinationFilterEnabled;
|
|
115
|
+
return {
|
|
116
|
+
state: { ...state, settingsDraft: { ...state.settingsDraft, hallucinationFilterEnabled: enabled } },
|
|
117
|
+
effects: [{ kind: "set_hallucination_filter", enabled }, { kind: "request_render" }],
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
// equalizer toggle is purely view-side: no pipeline reconfig needed, the
|
|
121
|
+
// view reads the draft directly to decide whether to render its row pair.
|
|
122
|
+
const enabled = !state.settingsDraft.equalizerEnabled;
|
|
123
|
+
return {
|
|
124
|
+
state: { ...state, settingsDraft: { ...state.settingsDraft, equalizerEnabled: enabled } },
|
|
125
|
+
effects: [{ kind: "request_render" }],
|
|
126
|
+
};
|
|
127
|
+
};
|
|
128
|
+
|
|
129
|
+
const focusSettingsNext: Handler<"focus_settings_next"> = (state, _action, _ctx) => {
|
|
130
|
+
const next = stepFocus(state.settingsFocus, +1);
|
|
131
|
+
if (next === state.settingsFocus) return { state, effects: [] };
|
|
132
|
+
return { state: { ...state, settingsFocus: next }, effects: [{ kind: "request_render" }] };
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
const focusSettingsPrev: Handler<"focus_settings_prev"> = (state, _action, _ctx) => {
|
|
136
|
+
const next = stepFocus(state.settingsFocus, -1);
|
|
137
|
+
if (next === state.settingsFocus) return { state, effects: [] };
|
|
138
|
+
return { state: { ...state, settingsFocus: next }, effects: [{ kind: "request_render" }] };
|
|
139
|
+
};
|
|
140
|
+
|
|
141
|
+
function stepFocus(current: SettingsFieldKey, delta: 1 | -1): SettingsFieldKey {
|
|
142
|
+
const order = SETTINGS_FIELD_ORDER;
|
|
143
|
+
const idx = order.indexOf(current);
|
|
144
|
+
const next = (idx + delta + order.length) % order.length;
|
|
145
|
+
return order[next] ?? current;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const settingsSave: Handler<"settings_save"> = (state, _action, _ctx) => {
|
|
149
|
+
const config = configFromDraft(state.settingsDraft);
|
|
150
|
+
return {
|
|
151
|
+
state,
|
|
152
|
+
effects: [
|
|
153
|
+
{ kind: "save_config", config },
|
|
154
|
+
{ kind: "notify", level: "info", message: t("notify.settings_saved", "Voice settings saved") },
|
|
155
|
+
],
|
|
156
|
+
};
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
const ignore: Handler<"ignore"> = (state, _action, _ctx) => ({ state, effects: [] });
|
|
160
|
+
|
|
161
|
+
const HANDLERS: { [K in VoiceAction["kind"]]: Handler<K> } = {
|
|
162
|
+
audio_chunk: audioChunk,
|
|
163
|
+
audio_transcript_appended: audioTranscriptAppended,
|
|
164
|
+
audio_partial_transcript_set: audioPartialTranscriptSet,
|
|
165
|
+
toggle_pause: togglePause,
|
|
166
|
+
commit,
|
|
167
|
+
cancel,
|
|
168
|
+
open_settings: openSettings,
|
|
169
|
+
close_settings: closeSettings,
|
|
170
|
+
toggle_focused_setting: toggleFocusedSetting,
|
|
171
|
+
focus_settings_next: focusSettingsNext,
|
|
172
|
+
focus_settings_prev: focusSettingsPrev,
|
|
173
|
+
settings_save: settingsSave,
|
|
174
|
+
ignore,
|
|
175
|
+
};
|
|
176
|
+
|
|
177
|
+
export function reduce(state: VoiceState, action: VoiceAction, ctx: ApplyContext): ApplyResult {
|
|
178
|
+
const handler = HANDLERS[action.kind] as Handler<typeof action.kind>;
|
|
179
|
+
return handler(state, action as never, ctx);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
export function configFromDraft(draft: SettingsDraft): VoiceConfig {
|
|
183
|
+
const out: { -readonly [K in keyof VoiceConfig]: VoiceConfig[K] } = {};
|
|
184
|
+
// Only persist the non-default state. Hallucination filter defaults ON, so
|
|
185
|
+
// only the off-state lands on disk; equalizer defaults OFF, so only the
|
|
186
|
+
// on-state does. Both rules keep voice.json minimal and forward-compatible.
|
|
187
|
+
if (draft.hallucinationFilterEnabled === false) out.hallucinationFilterEnabled = false;
|
|
188
|
+
if (draft.equalizerEnabled === true) out.equalizerEnabled = true;
|
|
189
|
+
return out;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
export function draftFromConfig(config: VoiceConfig): SettingsDraft {
|
|
193
|
+
return {
|
|
194
|
+
hallucinationFilterEnabled: isHallucinationFilterEnabled(config),
|
|
195
|
+
equalizerEnabled: isEqualizerEnabled(config),
|
|
196
|
+
};
|
|
197
|
+
}
|
package/state/state.ts
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
export type RecordingStatus = "recording" | "paused";
|
|
2
|
+
export type ScreenKind = "dictation" | "settings";
|
|
3
|
+
|
|
4
|
+
export type SettingsFieldKey = "hallucination" | "equalizer";
|
|
5
|
+
|
|
6
|
+
export interface SettingsDraft {
|
|
7
|
+
hallucinationFilterEnabled: boolean;
|
|
8
|
+
equalizerEnabled: boolean;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export interface VoiceState {
|
|
12
|
+
currentScreen: ScreenKind;
|
|
13
|
+
status: RecordingStatus;
|
|
14
|
+
/** Committed text — produced by the final decode at each VAD silence /
|
|
15
|
+
* cap boundary. This is what the editor receives on commit. */
|
|
16
|
+
transcript: string;
|
|
17
|
+
/** In-progress text — rolling re-decodes of the still-active utterance,
|
|
18
|
+
* emitted every ~1 s. Replaced wholesale by each successive partial.
|
|
19
|
+
* Cleared (and concatenated into `transcript`) on the final decode of
|
|
20
|
+
* the utterance. Rendered after `transcript` in a dim style. */
|
|
21
|
+
partialTranscript: string;
|
|
22
|
+
audioLevel: number;
|
|
23
|
+
/** In-flight editor draft — auto-persists on `close_settings` (Esc/Tab) so
|
|
24
|
+
* toggling and exiting saves; Ctrl-S (`settings_save`) is the explicit
|
|
25
|
+
* save+notify path. */
|
|
26
|
+
settingsDraft: SettingsDraft;
|
|
27
|
+
/** Which interactive settings field is currently focused. Up/Down arrows
|
|
28
|
+
* cycle through `SETTINGS_FIELD_ORDER`; Enter toggles the focused one. */
|
|
29
|
+
settingsFocus: SettingsFieldKey;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export const SETTINGS_FIELD_ORDER: readonly SettingsFieldKey[] = ["hallucination", "equalizer"];
|
|
33
|
+
|
|
34
|
+
export interface VoiceRuntime {
|
|
35
|
+
keybindings: { matches(data: string, name: string): boolean };
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function initialVoiceState(draft: SettingsDraft): VoiceState {
|
|
39
|
+
return {
|
|
40
|
+
currentScreen: "dictation",
|
|
41
|
+
status: "recording",
|
|
42
|
+
transcript: "",
|
|
43
|
+
partialTranscript: "",
|
|
44
|
+
audioLevel: 0,
|
|
45
|
+
settingsDraft: draft,
|
|
46
|
+
settingsFocus: SETTINGS_FIELD_ORDER[0]!,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { RecordingStatus } from "./state.js";
|
|
2
|
+
|
|
3
|
+
export interface StatusIntentMeta {
|
|
4
|
+
glyph: string;
|
|
5
|
+
glyphColorKey: "error" | "warning";
|
|
6
|
+
label: string;
|
|
7
|
+
gatesSttPipeline: boolean;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export const STATUS_META: Record<RecordingStatus, StatusIntentMeta> = {
|
|
11
|
+
recording: {
|
|
12
|
+
glyph: "●",
|
|
13
|
+
glyphColorKey: "error",
|
|
14
|
+
label: "Recording",
|
|
15
|
+
gatesSttPipeline: false,
|
|
16
|
+
},
|
|
17
|
+
paused: {
|
|
18
|
+
glyph: "⏸",
|
|
19
|
+
glyphColorKey: "warning",
|
|
20
|
+
label: "Paused",
|
|
21
|
+
gatesSttPipeline: true,
|
|
22
|
+
},
|
|
23
|
+
};
|