@kidsinai/kids-client 0.0.16 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "$schema": "https://json.schemastore.org/package.json",
3
3
  "name": "@kidsinai/kids-client",
4
- "version": "0.0.16",
4
+ "version": "0.0.18",
5
5
  "type": "module",
6
6
  "description": "Own-client TUI for Kids OpenCode — talks to local `opencode serve` via @opencode-ai/sdk v2 with kid-warm rendering, mission progress, permission dialog, and stderr-tail audit pipeline.",
7
7
  "license": "MIT",
@@ -24,7 +24,8 @@
24
24
  "files": ["src", "bin", "README.md", "LICENSE"],
25
25
  "scripts": {
26
26
  "typecheck": "tsc --noEmit",
27
- "test": "bun test"
27
+ "test": "bun test",
28
+ "voice-demo": "bun src/voice-demo.tsx"
28
29
  },
29
30
  "peerDependencies": {
30
31
  "@opencode-ai/sdk": ">=1.14.0"
@@ -34,7 +35,7 @@
34
35
  "ink-spinner": "^5.0.0",
35
36
  "ink-text-input": "^6.0.0",
36
37
  "react": "^18.3.1",
37
- "@kidsinai/kids-opencode-plugin": "^0.0.16"
38
+ "@kidsinai/kids-opencode-plugin": "^0.0.18"
38
39
  },
39
40
  "devDependencies": {
40
41
  "@opencode-ai/sdk": "^1.14.51",
@@ -67,15 +67,23 @@ export class EventSubscriber {
67
67
  }
68
68
 
69
69
  private async consume(): Promise<void> {
70
- // The SDK exposes the SSE stream via client.global.event(). Shape varies
71
- // between SDK minor versions (some return async iterable, some a stream
72
- // helper). We use the duck-typed iterable path.
73
- const eventApi = (this.client as unknown as { global?: { event: () => AsyncIterable<unknown> } }).global
70
+ // The SDK exposes the SSE stream via client.global.event(). The shape
71
+ // has changed across SDK versions:
72
+ // old: event() returns an AsyncIterable directly
73
+ // • new (>=1.14.51): event() returns Promise<{ stream: AsyncGenerator }>
74
+ // Handle both. The error "undefined is not a function (near '...raw of
75
+ // stream...')" came from `for await`-ing a Promise (the new shape) under
76
+ // the old code path.
77
+ const eventApi = (this.client as unknown as { global?: { event: (...a: unknown[]) => unknown } }).global
74
78
  if (!eventApi || typeof eventApi.event !== "function") {
75
79
  throw new Error("@opencode-ai/sdk/v2: client.global.event() not available — SDK version drift")
76
80
  }
77
- const stream = eventApi.event()
78
- for await (const raw of stream) {
81
+ const result = await Promise.resolve(eventApi.event())
82
+ const iterable = pickAsyncIterable(result)
83
+ if (!iterable) {
84
+ throw new Error(`@opencode-ai/sdk/v2: client.global.event() returned an unrecognised shape: ${describeShape(result)}`)
85
+ }
86
+ for await (const raw of iterable) {
79
87
  if (this.abort.signal.aborted) return
80
88
  if (this.retries > 0) {
81
89
  this.retries = 0
@@ -86,8 +94,19 @@ export class EventSubscriber {
86
94
  }
87
95
 
88
96
  private dispatch(raw: unknown): void {
89
- const env = raw as { payload?: { type?: string } & Record<string, unknown> }
90
- const payload = env?.payload
97
+ // Across SDK versions an event is either:
98
+ // • { payload: { type, …fields } } (older shape)
99
+ // • { type, …fields } (newer shape — yielded
100
+ // directly by the
101
+ // AsyncGenerator)
102
+ // • { data: { type, …fields } } (StreamEvent wrapper from some helpers)
103
+ // Unwrap to a single `payload` view so the switch below stays the same.
104
+ const e = raw as { payload?: Record<string, unknown>; data?: Record<string, unknown>; type?: string } & Record<string, unknown>
105
+ const payload: ({ type?: string } & Record<string, unknown>) | null =
106
+ (e?.payload && typeof e.payload === "object") ? (e.payload as { type?: string } & Record<string, unknown>)
107
+ : (e?.data && typeof e.data === "object" && typeof (e.data as { type?: unknown }).type === "string") ? (e.data as { type?: string } & Record<string, unknown>)
108
+ : (typeof e?.type === "string") ? (e as { type?: string } & Record<string, unknown>)
109
+ : null
91
110
  if (!payload || typeof payload.type !== "string") return
92
111
  const t = payload.type
93
112
  switch (t) {
@@ -166,3 +185,35 @@ function stringifyErr(err: unknown): string {
166
185
  return String(err)
167
186
  }
168
187
  }
188
+
189
+ /**
190
+ * The SDK has shipped at least three event() return shapes over its 1.14.x
191
+ * line. Find the AsyncIterable in whichever shape we got, or null if none.
192
+ */
193
+ function pickAsyncIterable(value: unknown): AsyncIterable<unknown> | null {
194
+ if (!value) return null
195
+ // Shape 1: the value IS the iterable.
196
+ if (typeof (value as { [Symbol.asyncIterator]?: unknown })[Symbol.asyncIterator] === "function") {
197
+ return value as AsyncIterable<unknown>
198
+ }
199
+ // Shape 2: { stream: AsyncGenerator } — the >=1.14.51 ServerSentEventsResult.
200
+ const s = (value as { stream?: unknown }).stream
201
+ if (s && typeof (s as { [Symbol.asyncIterator]?: unknown })[Symbol.asyncIterator] === "function") {
202
+ return s as AsyncIterable<unknown>
203
+ }
204
+ // Shape 3: { data: { stream: ... } } — wrapped data envelope.
205
+ const d = (value as { data?: { stream?: unknown } }).data
206
+ if (d && typeof d === "object") {
207
+ const inner = (d as { stream?: unknown }).stream
208
+ if (inner && typeof (inner as { [Symbol.asyncIterator]?: unknown })[Symbol.asyncIterator] === "function") {
209
+ return inner as AsyncIterable<unknown>
210
+ }
211
+ }
212
+ return null
213
+ }
214
+
215
+ function describeShape(value: unknown): string {
216
+ if (value == null) return String(value)
217
+ if (typeof value !== "object") return typeof value
218
+ return `object keys=[${Object.keys(value as object).join(",")}]`
219
+ }
package/src/core/setup.ts CHANGED
@@ -80,7 +80,7 @@ export const PROVIDERS: ProviderChoice[] = [
80
80
  },
81
81
  {
82
82
  id: "openai",
83
- label: "OpenAI GPT (ChatGPT Plus/Pro 可直接登录)",
83
+ label: "OpenAI GPT (sign in with ChatGPT Plus/Pro)",
84
84
  hint: "Already pay for ChatGPT Plus/Pro? Sign in with that — no API key. Otherwise pay-as-you-go ~$5-10/month.",
85
85
  envVar: "OPENAI_API_KEY",
86
86
  apiKeyUrl: "https://platform.openai.com/api-keys",
@@ -0,0 +1,116 @@
1
+ /**
2
+ * Voice controller — wires the parts into one "voice engine" the UI drives:
3
+ *
4
+ * spacebar ─▶ start() ─▶ recorder captures, feedLevel() streams energy
5
+ * │
6
+ * VAD says stop (silence/maxlen) ──▶ stop()
7
+ * │
8
+ * recorder → AudioClip → STT → text ──▶ onTranscript(text)
9
+ * (UI calls session.prompt)
10
+ *
11
+ * Deliberately owns NO timers and does NO spawning itself — the recorder
12
+ * produces level events, the UI/recorder calls feedLevel(), and this class
13
+ * only advances the state machine and decides start/stop/cancel. That keeps
14
+ * it pure enough to unit-test the whole orchestration with a mock recorder +
15
+ * MockStt, no microphone or clock required.
16
+ */
17
+
18
+ import { transition, type VoiceState } from "./state.ts"
19
+ import { shouldAutoStop, DEFAULT_VAD, type VadOptions } from "./vad.ts"
20
+ import type { AudioClip, SttAdapter } from "./stt.ts"
21
+
22
+ /** Minimal recorder surface the controller needs (Recorder implements it; tests mock it). */
23
+ export interface RecorderLike {
24
+ start(): void
25
+ stop(): Promise<AudioClip>
26
+ cancel(): Promise<void>
27
+ }
28
+
29
+ export interface VoiceControllerEvents {
30
+ /** Every state change — UI re-renders mic indicator / meter / spinner. */
31
+ onState?: (state: VoiceState) => void
32
+ /** Latest mic energy 0..1 — UI draws the meter. */
33
+ onLevel?: (level: number) => void
34
+ /** STT produced text — UI sends it via session.prompt and echoes it. */
35
+ onTranscript?: (text: string) => void
36
+ /** Recording/STT failed — UI shows a gentle retry hint. */
37
+ onError?: (err: Error) => void
38
+ }
39
+
40
+ export class VoiceController {
41
+ private state: VoiceState = "idle"
42
+ private levels: number[] = []
43
+
44
+ constructor(
45
+ private recorder: RecorderLike,
46
+ private stt: SttAdapter,
47
+ private events: VoiceControllerEvents = {},
48
+ private vad: VadOptions = DEFAULT_VAD,
49
+ ) {}
50
+
51
+ getState(): VoiceState {
52
+ return this.state
53
+ }
54
+
55
+ private set(next: VoiceState): void {
56
+ if (next === this.state) return
57
+ this.state = next
58
+ this.events.onState?.(next)
59
+ }
60
+
61
+ /** Spacebar while idle. Opens the mic. No-op if not idle. */
62
+ start(): void {
63
+ if (this.state !== "idle") return
64
+ this.levels = []
65
+ this.set(transition(this.state, { type: "START" }))
66
+ this.recorder.start()
67
+ }
68
+
69
+ /**
70
+ * Feed one energy sample (the recorder calls this ~every sampleIntervalMs).
71
+ * Updates the meter and, once VAD says so, auto-stops — so the kid only ever
72
+ * pressed the spacebar once. No-op unless we're listening.
73
+ */
74
+ feedLevel(level: number): void {
75
+ if (this.state !== "listening") return
76
+ this.levels.push(level)
77
+ this.events.onLevel?.(level)
78
+ if (shouldAutoStop(this.levels, this.vad) !== "continue") {
79
+ void this.stop()
80
+ }
81
+ }
82
+
83
+ /** Spacebar/Enter again, or VAD auto-stop. Ends capture, runs STT, emits text. */
84
+ async stop(): Promise<void> {
85
+ if (this.state !== "listening") return
86
+ this.set(transition(this.state, { type: "STOP" })) // → transcribing
87
+ try {
88
+ const clip = await this.recorder.stop()
89
+ const { text } = await this.stt.transcribe(clip)
90
+ this.set(transition(this.state, { type: "TRANSCRIBED" })) // → thinking
91
+ this.events.onTranscript?.(text)
92
+ } catch (err) {
93
+ this.set(transition(this.state, { type: "ERROR" }))
94
+ this.events.onError?.(err instanceof Error ? err : new Error(String(err)))
95
+ }
96
+ }
97
+
98
+ /** Esc. Throws the clip away with no send. Safe from any cancellable state. */
99
+ async cancel(): Promise<void> {
100
+ if (this.state === "listening") {
101
+ await this.recorder.cancel()
102
+ }
103
+ this.set(transition(this.state, { type: "CANCEL" }))
104
+ }
105
+
106
+ /** UI signals the LLM reply landed (and TTS, if any, finished). */
107
+ replied(): void {
108
+ this.set(transition(this.state, { type: "REPLIED" }))
109
+ }
110
+ spoken(): void {
111
+ this.set(transition(this.state, { type: "SPOKEN" }))
112
+ }
113
+ reset(): void {
114
+ this.set(transition(this.state, { type: "RESET" }))
115
+ }
116
+ }
@@ -0,0 +1,114 @@
1
+ /**
2
+ * Microphone capture (side-effecting; the pure bits are extracted for tests).
3
+ *
4
+ * Strategy: shell out to a system recorder (sox `rec` or ffmpeg) writing a wav,
5
+ * same spawn pattern as core/serve-manager.ts. We also read the PCM stream to
6
+ * compute a rolling RMS energy level so the UI can draw a live mic meter and
7
+ * the VAD can auto-stop — terminals can't show a waveform any other way.
8
+ *
9
+ * No recorder on PATH must NOT crash the client: detectRecorder() returns null
10
+ * and the controller can fall back to a simulated level source (demo mode),
11
+ * so a kid on a box without sox still sees the flow, just with canned audio.
12
+ */
13
+
14
+ import { spawn, type Subprocess } from "bun"
15
+ import type { AudioClip } from "./stt.ts"
16
+
17
+ export type RecorderKind = "sox" | "ffmpeg"
18
+
19
+ export interface RecordCommand {
20
+ cmd: string[]
21
+ /** Path the recorder writes the clip to. */
22
+ outPath: string
23
+ mimeType: string
24
+ }
25
+
26
+ /**
27
+ * Build the capture command for a recorder. Pure → unit-testable. 16kHz mono
28
+ * wav is the Whisper-friendly sweet spot (small upload, plenty for speech).
29
+ */
30
+ export function buildRecordCommand(kind: RecorderKind, outPath: string): RecordCommand {
31
+ if (kind === "sox") {
32
+ // `rec` is sox's record front-end. -q quiet, -c 1 mono, -r 16000 rate.
33
+ return { cmd: ["rec", "-q", "-c", "1", "-r", "16000", outPath], outPath, mimeType: "audio/wav" }
34
+ }
35
+ // ffmpeg: -f avfoundation on macOS captures the default mic (":0").
36
+ return {
37
+ cmd: ["ffmpeg", "-loglevel", "quiet", "-f", "avfoundation", "-i", ":0", "-ac", "1", "-ar", "16000", "-y", outPath],
38
+ outPath,
39
+ mimeType: "audio/wav",
40
+ }
41
+ }
42
+
43
+ /**
44
+ * Compute normalised RMS energy (0..1) from a chunk of signed 16-bit PCM.
45
+ * Pure → unit-testable; this is what drives both the meter and the VAD.
46
+ */
47
+ export function computeRms(pcm16: Int16Array): number {
48
+ if (pcm16.length === 0) return 0
49
+ let sumSq = 0
50
+ for (let i = 0; i < pcm16.length; i++) {
51
+ const s = pcm16[i]! / 32768 // normalise to -1..1
52
+ sumSq += s * s
53
+ }
54
+ return Math.sqrt(sumSq / pcm16.length)
55
+ }
56
+
57
+ /** Probe PATH for a usable recorder. Returns null if none — caller degrades to
58
+ * demo mode rather than crashing. */
59
+ export async function detectRecorder(): Promise<RecorderKind | null> {
60
+ for (const kind of ["sox", "ffmpeg"] as const) {
61
+ const bin = kind === "sox" ? "rec" : "ffmpeg"
62
+ try {
63
+ const proc = spawn({ cmd: ["which", bin], stdout: "pipe", stderr: "ignore" })
64
+ await proc.exited
65
+ if (proc.exitCode === 0) return kind
66
+ } catch {
67
+ /* keep probing */
68
+ }
69
+ }
70
+ return null
71
+ }
72
+
73
+ export interface RecorderEvents {
74
+ /** Fired ~every sampleIntervalMs with the latest normalised energy 0..1. */
75
+ onLevel?: (level: number) => void
76
+ }
77
+
78
+ /**
79
+ * Owns one recording. start() spawns the recorder; stop() ends it and reads
80
+ * the written wav back as an AudioClip; cancel() kills it and discards.
81
+ */
82
+ export class Recorder {
83
+ private child: Subprocess | null = null
84
+ private cmd: RecordCommand
85
+
86
+ constructor(kind: RecorderKind, outPath: string, private _events: RecorderEvents = {}) {
87
+ this.cmd = buildRecordCommand(kind, outPath)
88
+ }
89
+
90
+ start(): void {
91
+ if (this.child) return
92
+ this.child = spawn({ cmd: this.cmd.cmd, stdout: "ignore", stderr: "ignore" })
93
+ }
94
+
95
+ /** Stop recording and return the captured clip. */
96
+ async stop(): Promise<AudioClip> {
97
+ await this.kill()
98
+ const bytes = new Uint8Array(await Bun.file(this.cmd.outPath).arrayBuffer())
99
+ return { bytes, mimeType: this.cmd.mimeType }
100
+ }
101
+
102
+ /** Abort and discard — no clip, no STT, no send. */
103
+ async cancel(): Promise<void> {
104
+ await this.kill()
105
+ }
106
+
107
+ private async kill(): Promise<void> {
108
+ if (this.child && !this.child.killed) {
109
+ this.child.kill()
110
+ await this.child.exited
111
+ }
112
+ this.child = null
113
+ }
114
+ }
@@ -0,0 +1,92 @@
1
+ /**
2
+ * Voice-input state machine (pure, unit-testable).
3
+ *
4
+ * Why a machine and not booleans: a kid mashing the spacebar mid-transcription
5
+ * must never start a second recording or send a half clip. Modelling the
6
+ * legal transitions explicitly makes "you can only stop while listening,
7
+ * only cancel before we've spoken" enforceable in one place instead of
8
+ * scattered across the Ink components.
9
+ *
10
+ * Terminal constraint that shapes this: a TTY reports key-DOWN but not
11
+ * key-UP, so there is no hold-to-talk. The only press we get is a toggle.
12
+ * Hence START and STOP are both driven by the same spacebar press, and the
13
+ * machine — not the key handler — decides which one a given press means.
14
+ *
15
+ * Lifecycle:
16
+ * idle ──START──▶ listening ──STOP──▶ transcribing ──TRANSCRIBED──▶ thinking
17
+ * │ │ │
18
+ * CANCEL ERROR REPLIED
19
+ * ▼ ▼ ▼
20
+ * idle error speaking ──SPOKEN──▶ idle
21
+ */
22
+
23
+ export type VoiceState =
24
+ | "idle"
25
+ | "listening"
26
+ | "transcribing"
27
+ | "thinking"
28
+ | "speaking"
29
+ | "error"
30
+
31
+ export type VoiceEvent =
32
+ /** Spacebar while idle: open the mic. */
33
+ | { type: "START" }
34
+ /** Spacebar/Enter again, or VAD auto-stop: close the mic, begin STT. */
35
+ | { type: "STOP" }
36
+ /** Esc at any pre-reply point: throw the clip away, no send. */
37
+ | { type: "CANCEL" }
38
+ /** STT returned text; hand it to the LLM. */
39
+ | { type: "TRANSCRIBED" }
40
+ /** LLM reply arrived (optionally about to be spoken aloud). */
41
+ | { type: "REPLIED" }
42
+ /** TTS finished (or was skipped). */
43
+ | { type: "SPOKEN" }
44
+ /** Recording / STT / TTS blew up. */
45
+ | { type: "ERROR" }
46
+ /** Kid acknowledged the error screen. */
47
+ | { type: "RESET" }
48
+
49
+ /**
50
+ * Pure transition. Returns the next state, or the SAME state if the event
51
+ * is illegal in the current state (callers can treat "no change" as "ignored
52
+ * keypress" — e.g. spacebar spam during transcribing is a no-op, not a crash).
53
+ */
54
+ export function transition(state: VoiceState, event: VoiceEvent): VoiceState {
55
+ switch (state) {
56
+ case "idle":
57
+ return event.type === "START" ? "listening" : state
58
+ case "listening":
59
+ if (event.type === "STOP") return "transcribing"
60
+ if (event.type === "CANCEL") return "idle"
61
+ if (event.type === "ERROR") return "error"
62
+ return state
63
+ case "transcribing":
64
+ if (event.type === "TRANSCRIBED") return "thinking"
65
+ if (event.type === "CANCEL") return "idle"
66
+ if (event.type === "ERROR") return "error"
67
+ return state
68
+ case "thinking":
69
+ if (event.type === "REPLIED") return "speaking"
70
+ if (event.type === "ERROR") return "error"
71
+ return state
72
+ case "speaking":
73
+ // SPOKEN closes the loop; CANCEL lets a kid cut off a long spoken reply.
74
+ if (event.type === "SPOKEN" || event.type === "CANCEL") return "idle"
75
+ if (event.type === "ERROR") return "error"
76
+ return state
77
+ case "error":
78
+ return event.type === "RESET" ? "idle" : state
79
+ }
80
+ }
81
+
82
+ /** The mic is physically capturing audio only in this state. Used by the UI
83
+ * to show the "🎙 听你说…" indicator and by audit/compliance to assert the
84
+ * mic is never open outside it. */
85
+ export function isMicOpen(state: VoiceState): boolean {
86
+ return state === "listening"
87
+ }
88
+
89
+ /** True while the kid can still abort with Esc (before the reply is final). */
90
+ export function isCancellable(state: VoiceState): boolean {
91
+ return state === "listening" || state === "transcribing" || state === "speaking"
92
+ }
@@ -0,0 +1,118 @@
1
+ /**
2
+ * Speech-to-text adapter (pluggable).
3
+ *
4
+ * HARD RULE (moat + minors compliance): STT MUST go through DeepRouter, never
5
+ * a third-party STT API directly. DeepRouter is the single gateway where we
6
+ * meter cost (Stars), enforce AU data residency, and capture the interaction
7
+ * data flywheel. Bypassing it leaks the moat — see airbotix
8
+ * docs/product/moat-strategy.md.
9
+ *
10
+ * The controller depends only on `SttAdapter`, so tests use the mock and a
11
+ * no-key dogfood run degrades to mock instead of crashing.
12
+ */
13
+
14
+ export interface AudioClip {
15
+ /** Raw encoded audio (e.g. wav/webm bytes from the recorder). */
16
+ bytes: Uint8Array
17
+ /** MIME type, e.g. "audio/wav". Drives the multipart filename/type. */
18
+ mimeType: string
19
+ }
20
+
21
+ export interface SttResult {
22
+ text: string
23
+ /** 0..1 if the backend reports it; undefined otherwise. */
24
+ confidence?: number
25
+ }
26
+
27
+ export interface SttAdapter {
28
+ transcribe(clip: AudioClip): Promise<SttResult>
29
+ }
30
+
31
+ export interface DeepRouterSttConfig {
32
+ /** DeepRouter OpenAI-compatible base, e.g. https://api.deeprouter.../v1 */
33
+ baseUrl: string
34
+ apiKey: string
35
+ /** Whisper-style model id exposed by DeepRouter. */
36
+ model: string
37
+ /** Optional BCP-47 hint ("en", "zh") to bias recognition. */
38
+ language?: string
39
+ }
40
+
41
+ const MIME_EXT: Record<string, string> = {
42
+ "audio/wav": "wav",
43
+ "audio/x-wav": "wav",
44
+ "audio/webm": "webm",
45
+ "audio/ogg": "ogg",
46
+ "audio/mpeg": "mp3",
47
+ }
48
+
49
+ /** Build the multipart body for DeepRouter's /audio/transcriptions endpoint.
50
+ * Pulled out as a pure helper so the field shape is unit-testable without a
51
+ * live network call. Mirrors the OpenAI Whisper request contract that
52
+ * DeepRouter is expected to proxy (⚙️ confirm DeepRouter exposes this path). */
53
+ export function buildTranscriptionForm(clip: AudioClip, cfg: DeepRouterSttConfig): FormData {
54
+ const ext = MIME_EXT[clip.mimeType] ?? "wav"
55
+ const form = new FormData()
56
+ form.append("file", new Blob([clip.bytes as BlobPart], { type: clip.mimeType }), `clip.${ext}`)
57
+ form.append("model", cfg.model)
58
+ if (cfg.language) form.append("language", cfg.language)
59
+ return form
60
+ }
61
+
62
+ /** Pull the transcript text out of an OpenAI-compatible JSON response,
63
+ * tolerating the common shapes ({text} or {data:{text}}). */
64
+ export function extractTranscript(payload: unknown): SttResult | null {
65
+ if (!payload || typeof payload !== "object") return null
66
+ const p = payload as { text?: string; confidence?: number; data?: { text?: string } }
67
+ const text = p.text ?? p.data?.text
68
+ if (typeof text !== "string") return null
69
+ return { text, confidence: p.confidence }
70
+ }
71
+
72
+ export class DeepRouterStt implements SttAdapter {
73
+ constructor(private cfg: DeepRouterSttConfig) {}
74
+
75
+ async transcribe(clip: AudioClip): Promise<SttResult> {
76
+ const res = await fetch(`${this.cfg.baseUrl}/audio/transcriptions`, {
77
+ method: "POST",
78
+ headers: { authorization: `Bearer ${this.cfg.apiKey}` },
79
+ body: buildTranscriptionForm(clip, this.cfg),
80
+ })
81
+ if (!res.ok) {
82
+ throw new Error(`DeepRouter STT ${res.status}: ${await safeText(res)}`)
83
+ }
84
+ const result = extractTranscript(await res.json())
85
+ if (!result) throw new Error("DeepRouter STT: unrecognised response shape")
86
+ return result
87
+ }
88
+ }
89
+
90
+ /** Deterministic adapter for tests and no-key dogfood runs. */
91
+ export class MockStt implements SttAdapter {
92
+ constructor(private canned = "(示例)帮我做一个会动的小猫") {}
93
+ async transcribe(_clip: AudioClip): Promise<SttResult> {
94
+ return { text: this.canned, confidence: 1 }
95
+ }
96
+ }
97
+
98
+ /**
99
+ * Pick an adapter from config. Falls back to MockStt (and tells the caller it
100
+ * did, so the UI can show a "voice is in demo mode" hint) when DeepRouter
101
+ * creds are absent — a missing key must never hard-crash the client.
102
+ */
103
+ export function resolveSttAdapter(
104
+ cfg: Partial<DeepRouterSttConfig> | undefined,
105
+ ): { adapter: SttAdapter; mode: "deeprouter" | "mock" } {
106
+ if (cfg?.baseUrl && cfg.apiKey && cfg.model) {
107
+ return { adapter: new DeepRouterStt(cfg as DeepRouterSttConfig), mode: "deeprouter" }
108
+ }
109
+ return { adapter: new MockStt(), mode: "mock" }
110
+ }
111
+
112
+ async function safeText(res: Response): Promise<string> {
113
+ try {
114
+ return (await res.text()).slice(0, 200)
115
+ } catch {
116
+ return "<no body>"
117
+ }
118
+ }
@@ -0,0 +1,92 @@
1
+ /**
2
+ * Voice-activity detection + mic-meter rendering (pure, unit-testable).
3
+ *
4
+ * This is a deliberately tiny energy-based VAD, not a neural one. The job:
5
+ * let a kid press the spacebar ONCE, talk, and have the mic close on its own
6
+ * when they stop — so they never have to remember a second keypress. A real
7
+ * silero/webrtc VAD can drop in behind the same `shouldAutoStop` shape later;
8
+ * the controller only depends on this signature.
9
+ *
10
+ * Energy levels are normalised 0..1 (0 = silence, 1 = loud). The recorder
11
+ * feeds a rolling window of recent levels; we decide stop/continue from it.
12
+ */
13
+
14
+ export interface VadOptions {
15
+ /** Below this normalised energy a frame counts as silence. */
16
+ silenceThreshold: number
17
+ /** Continuous silence this long (ms) auto-stops the recording. */
18
+ silenceMsToStop: number
19
+ /** Spacing between level samples (ms). */
20
+ sampleIntervalMs: number
21
+ /** Ignore silence until the kid has actually spoken this long (ms), so a
22
+ * slow starter who pauses before their first word isn't cut off. */
23
+ minSpeechMs: number
24
+ /** Hard cap (ms): stop no matter what, so a stuck-open mic (or a kid who
25
+ * wandered off) can't record forever. Compliance + cost guard. */
26
+ maxClipMs: number
27
+ }
28
+
29
+ export const DEFAULT_VAD: VadOptions = {
30
+ silenceThreshold: 0.06,
31
+ silenceMsToStop: 1500,
32
+ sampleIntervalMs: 100,
33
+ minSpeechMs: 400,
34
+ maxClipMs: 30_000,
35
+ }
36
+
37
+ export type VadDecision = "continue" | "stop_silence" | "stop_max_length"
38
+
39
+ /**
40
+ * Decide whether to keep recording given the full sequence of level samples
41
+ * captured so far (oldest→newest). Pure: same input, same output, no clock —
42
+ * the caller owns timing by passing `sampleIntervalMs`-spaced levels.
43
+ *
44
+ * Rules, in order:
45
+ * 1. Hard cap reached → stop_max_length.
46
+ * 2. Kid hasn't spoken `minSpeechMs` of non-silence yet → continue
47
+ * (don't punish a slow start).
48
+ * 3. Trailing run of silence ≥ silenceMsToStop → stop_silence.
49
+ * 4. Otherwise → continue.
50
+ */
51
+ export function shouldAutoStop(levels: number[], opts: VadOptions = DEFAULT_VAD): VadDecision {
52
+ const elapsedMs = levels.length * opts.sampleIntervalMs
53
+ if (elapsedMs >= opts.maxClipMs) return "stop_max_length"
54
+
55
+ const spokenMs = levels.filter((l) => l > opts.silenceThreshold).length * opts.sampleIntervalMs
56
+ if (spokenMs < opts.minSpeechMs) return "continue"
57
+
58
+ let trailingSilenceFrames = 0
59
+ for (let i = levels.length - 1; i >= 0; i--) {
60
+ if (levels[i]! > opts.silenceThreshold) break
61
+ trailingSilenceFrames++
62
+ }
63
+ const trailingSilenceMs = trailingSilenceFrames * opts.sampleIntervalMs
64
+ if (trailingSilenceMs >= opts.silenceMsToStop) return "stop_silence"
65
+
66
+ return "continue"
67
+ }
68
+
69
+ const METER_GLYPHS = ["▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"] as const
70
+
71
+ /**
72
+ * Render a live mic meter from the latest energy level. Terminals can't draw
73
+ * graphics, but a row of block glyphs that jumps with the kid's voice is the
74
+ * single most important "it's really listening to ME" signal — without it a
75
+ * kid stares at a frozen screen and gives up.
76
+ *
77
+ * Returns `width` glyphs; `level` 0..1 picks the height, with a little jitter
78
+ * across columns so it looks alive rather than a flat bar.
79
+ */
80
+ export function renderMeter(level: number, width = 12): string {
81
+ const clamped = Math.max(0, Math.min(1, level))
82
+ let out = ""
83
+ for (let i = 0; i < width; i++) {
84
+ // Columns toward the centre read a touch taller — cheap "waveform" feel
85
+ // without needing real per-column energy.
86
+ const centreBias = 1 - Math.abs(i - (width - 1) / 2) / (width / 2)
87
+ const h = clamped * (0.6 + 0.4 * centreBias)
88
+ const idx = Math.min(METER_GLYPHS.length - 1, Math.round(h * (METER_GLYPHS.length - 1)))
89
+ out += METER_GLYPHS[idx]
90
+ }
91
+ return out
92
+ }
package/src/index.tsx CHANGED
@@ -74,6 +74,7 @@ interface AppHandlers {
74
74
  onPickerBack: () => void
75
75
  onMissionNext: () => void
76
76
  onMissionBack: () => void
77
+ onMissionExit: () => void
77
78
  onSetupSave: (provider: ProviderId, apiKey: string) => Promise<{ ok: true } | { ok: false; reason: string }>
78
79
  onSetupContinue: () => Promise<void>
79
80
  onSetupSkip: () => void
@@ -83,6 +84,25 @@ interface AppHandlers {
83
84
  }
84
85
 
85
86
  async function main(): Promise<void> {
87
+ // Switch to the terminal's alternate screen buffer so Ink draws on a
88
+ // canvas isolated from whatever was in the terminal before us — most
89
+ // importantly the green "Complete authorization…" lines printed by
90
+ // `opencode auth login` between two execs of kids-client. On exit, the
91
+ // kid's original terminal contents (incl. scrollback) come back.
92
+ //
93
+ // NOTE: do NOT install SIGINT/SIGTERM handlers here — the existing
94
+ // `process.on("SIGINT", () => void services.quit())` registration below
95
+ // is the cleanup owner; double-handling closed the raw-mode stdin out
96
+ // from under Ink and surfaced as "EIO on fd 8" when the kid pressed Esc.
97
+ // The "exit" listener alone is enough to restore the terminal for normal
98
+ // exits + the OAuth handoff `process.exit(OAUTH_HANDOFF_EXIT_CODE)`.
99
+ if (process.stdout.isTTY) {
100
+ process.stdout.write("\x1b[?1049h\x1b[H")
101
+ process.on("exit", () => {
102
+ try { process.stdout.write("\x1b[?1049l") } catch { /* terminal already closed */ }
103
+ })
104
+ }
105
+
86
106
  const env: KidsClientEnv = readEnv()
87
107
  const store = new Store()
88
108
  const installedPacks = listInstalledPacks()
@@ -210,6 +230,10 @@ function makeHandlers(
210
230
  onPickerBack: () => store.update({ screen: { kind: "startup" } }),
211
231
  onMissionNext: ifBooted((s) => s.handlers.onMissionNext()),
212
232
  onMissionBack: () => store.update({ screen: { kind: "mission" } }),
233
+ // Leave an in-progress mission and return to the startup menu. The serve +
234
+ // session keep running in the background; the kid just re-enters from the
235
+ // picker. Mirrors onHelpBack / onPickerBack.
236
+ onMissionExit: () => store.update({ screen: { kind: "startup" } }),
213
237
  onSetupSave: async (provider, apiKey) => {
214
238
  try {
215
239
  saveSetup({ configDir: env.configDir, provider, apiKey })
@@ -379,7 +403,17 @@ async function bootServices(env: KidsClientEnv, store: Store): Promise<ServiceSe
379
403
  })
380
404
  },
381
405
  onDisconnected: (reason) => {
382
- store.update({ screen: { kind: "error", variant: "serve_unreachable", detail: reason } })
406
+ // This fires after the engine was already reachable, so the failure is a
407
+ // dropped event stream, not a failed startup. Make the detail say so —
408
+ // the variant's title still reads "AI teacher didn't start", but the
409
+ // detail keeps it from being misleading.
410
+ store.update({
411
+ screen: {
412
+ kind: "error",
413
+ variant: "serve_unreachable",
414
+ detail: `lost connection to the AI engine after it started — ${reason}`,
415
+ },
416
+ })
383
417
  },
384
418
  onReconnected: () => {
385
419
  flashToast(store, {
@@ -10,6 +10,7 @@
10
10
  */
11
11
 
12
12
  import React, { useSyncExternalStore } from "react"
13
+ import { Box, useStdout } from "ink"
13
14
  import type { InstalledPack } from "../../core/course-pack.ts"
14
15
  import type { ErrorVariant, Store } from "../../core/store.ts"
15
16
  import { StartupScreen } from "./screens/StartupScreen.tsx"
@@ -57,6 +58,8 @@ export interface AppDeps {
57
58
  onPickerBack: () => void
58
59
  onMissionNext: () => void
59
60
  onMissionBack: () => void
61
+ /** Leave an in-progress mission and return to the startup menu. */
62
+ onMissionExit: () => void
60
63
  onSetupSave: (provider: ProviderId, apiKey: string) => Promise<{ ok: true } | { ok: false; reason: string }>
61
64
  onSetupContinue: () => Promise<void>
62
65
  onSetupSkip: () => void
@@ -77,14 +80,35 @@ export function App(deps: AppDeps): React.ReactElement {
77
80
  () => deps.store.getSnapshot(),
78
81
  () => deps.store.getSnapshot(),
79
82
  )
83
+ // Pin the App's footprint to the terminal's full dimensions. Without
84
+ // this, MissionScreen's `flexGrow={1}` middle box (chat + spinner) made
85
+ // the App's TOTAL rendered height shift by ±1 line on every keystroke /
86
+ // spinner tick / streaming chunk. Ink's diff move-cursor-up-by-N then
87
+ // used a stale N from the previous frame, so each new frame got drawn
88
+ // one row LOWER than the last — leaving the previous frame's top
89
+ // border behind. Result: a cascade of ┌──┐ stripes piling up above the
90
+ // Header. With width+height fixed to the terminal, the App's footprint
91
+ // never changes between renders and Ink's diff stays correct.
92
+ const { stdout } = useStdout()
93
+ const width = stdout?.columns && stdout.columns > 4 ? stdout.columns : 80
94
+ // -1 to leave a row for the terminal cursor / status; without it some
95
+ // terminals scroll the App by one line on the first render.
96
+ const height = stdout?.rows && stdout.rows > 4 ? stdout.rows - 1 : 23
80
97
 
98
+ const screen = renderScreen(state, deps)
99
+ return (
100
+ <Box width={width} height={height} flexDirection="column">
101
+ {screen}
102
+ </Box>
103
+ )
104
+ }
105
+
106
+ function renderScreen(state: ReturnType<Store["getSnapshot"]>, deps: AppDeps): React.ReactElement | null {
81
107
  // Dangerous-topic overlay takes absolute priority — it has to be the
82
108
  // first thing on screen the moment a pattern hits, even mid-stream.
83
109
  if (state.dangerousTopic) {
84
110
  return <DangerousTopicModal topic={state.dangerousTopic} locale={deps.locale} onAcknowledge={deps.onDangerousAcknowledge} />
85
111
  }
86
-
87
- // Permission modal is the next-highest priority.
88
112
  if (state.pendingPermission) {
89
113
  return (
90
114
  <PermissionModal
@@ -96,7 +120,6 @@ export function App(deps: AppDeps): React.ReactElement {
96
120
  />
97
121
  )
98
122
  }
99
-
100
123
  switch (state.screen.kind) {
101
124
  case "loading":
102
125
  return <LoadingScreen locale={deps.locale} message={state.screen.message} />
@@ -105,9 +128,9 @@ export function App(deps: AppDeps): React.ReactElement {
105
128
  case "tour":
106
129
  return <TourScreen locale={deps.locale} onDone={deps.onTourDone} />
107
130
  case "startup":
108
- return <StartupScreen locale={deps.locale} coursePack={state.coursePack} toast={state.toast} onStart={deps.onStart} onOpenWallet={deps.onOpenWallet} />
131
+ return <StartupScreen locale={deps.locale} coursePack={state.coursePack} toast={state.toast} onStart={deps.onStart} onOpenWallet={deps.onOpenWallet} onQuit={deps.onQuit} />
109
132
  case "mission":
110
- return <MissionScreen state={state} locale={deps.locale} onPrompt={deps.onPrompt} onAbort={deps.onAbort} />
133
+ return <MissionScreen state={state} locale={deps.locale} onPrompt={deps.onPrompt} onAbort={deps.onAbort} onExit={deps.onMissionExit} />
111
134
  case "help":
112
135
  return <HelpScreen locale={deps.locale} onBack={deps.onHelpBack} />
113
136
  case "course_picker":
@@ -147,4 +170,5 @@ export function App(deps: AppDeps): React.ReactElement {
147
170
  />
148
171
  )
149
172
  }
173
+ return null
150
174
  }
@@ -26,8 +26,17 @@ export function Header({ packTitle, missionTitle, missionIndex, missionTotal, st
26
26
  starsBudget > 0
27
27
  ? `⭐ ${starsBalance}/${starsBudget}`
28
28
  : `⭐ ${starsBalance}`
29
+ // borderStyle="round" + justifyContent="space-between" without an explicit
30
+ // width caused a cascade of stacked top-borders under Ink 5 + Bun — the
31
+ // Header re-rendered on every keystroke / spinner tick with a slightly
32
+ // different computed width, and Ink's diff failed to clear the old top
33
+ // border. Forcing width to the current terminal column count locks the
34
+ // measurement, and "single" border chars sidestep the rounded-corner
35
+ // width-counting glitch we hit in workshop dogfood (round corners stay
36
+ // available on Setup / Tour / Help screens which don't re-render rapidly).
37
+ const width = process.stdout.columns && process.stdout.columns > 4 ? process.stdout.columns : 80
29
38
  return (
30
- <Box borderStyle="round" borderColor={theme.border} paddingX={1} justifyContent="space-between">
39
+ <Box borderStyle="single" borderColor={theme.border} paddingX={1} justifyContent="space-between" width={width}>
31
40
  <Text color={theme.accent}>{left}</Text>
32
41
  <Text color={theme.stars}>{stars}</Text>
33
42
  </Box>
@@ -48,7 +48,7 @@ export function CoursePackPicker({ locale, packs, onPick, onBack }: CoursePackPi
48
48
  // Course Pack install is broken — surface it loudly, but still let the kid
49
49
  // drop into free-play via the synthetic entry.
50
50
  return (
51
- <Box flexDirection="column" borderStyle="round" borderColor={theme.warn} paddingX={2} paddingY={1}>
51
+ <Box flexDirection="column" borderStyle="single" borderColor={theme.warn} paddingX={2} paddingY={1} width={process.stdout.columns && process.stdout.columns > 4 ? process.stdout.columns : 80}>
52
52
  <Text color={theme.warn} bold>{t.empty}</Text>
53
53
  <Box marginTop={1}>
54
54
  <Text color={theme.fgDim}>{t.emptyHint}</Text>
@@ -60,7 +60,7 @@ export function CoursePackPicker({ locale, packs, onPick, onBack }: CoursePackPi
60
60
  )
61
61
  }
62
62
  return (
63
- <Box flexDirection="column" borderStyle="round" borderColor={theme.accent} paddingX={2} paddingY={1}>
63
+ <Box flexDirection="column" borderStyle="single" borderColor={theme.accent} paddingX={2} paddingY={1} width={process.stdout.columns && process.stdout.columns > 4 ? process.stdout.columns : 80}>
64
64
  <Text color={theme.accent} bold>{t.title}</Text>
65
65
  <Box marginTop={1} flexDirection="column">
66
66
  {rows.map((row, i) => {
@@ -17,6 +17,7 @@ import { Input } from "../components/Input.tsx"
17
17
  import { Thinking } from "../components/Thinking.tsx"
18
18
  import { Toast } from "../components/Toast.tsx"
19
19
  import { getTheme } from "../theme.ts"
20
+ import { useVoiceInput } from "../useVoiceInput.ts"
20
21
  import type { KidsClientState } from "../../../core/store.ts"
21
22
 
22
23
  interface MissionScreenProps {
@@ -24,20 +25,44 @@ interface MissionScreenProps {
24
25
  locale: "zh-Hans" | "en"
25
26
  onPrompt: (text: string) => void
26
27
  onAbort: () => void
28
+ /** Leave the mission and return to the startup menu. */
29
+ onExit: () => void
27
30
  }
28
31
 
29
- export function MissionScreen({ state, locale, onPrompt, onAbort }: MissionScreenProps): React.ReactElement {
32
+ export function MissionScreen({ state, locale, onPrompt, onAbort, onExit }: MissionScreenProps): React.ReactElement {
30
33
  const theme = getTheme()
31
34
  const [draft, setDraft] = useState("")
32
35
  const placeholder = locale === "zh-Hans" ? "想做什么?告诉我吧(中文/英文都行)" : "What would you like to make? (English or Chinese)"
33
36
 
34
- useInput((_, key) => {
35
- if (key.escape && state.thinking) onAbort()
37
+ const voice = useVoiceInput(onPrompt)
38
+ const voiceBusy = voice.voiceState !== "idle"
39
+ // Spacebar talks ONLY when the kid isn't mid-typing — a non-empty draft means
40
+ // they're writing, so spacebar must stay a literal space there.
41
+ const canTalk = !state.thinking && state.pendingPermission === null && draft.trim() === "" && voice.ready
42
+
43
+ // Esc is overloaded so it never eats the kid's typing: while recording it
44
+ // cancels voice; while the AI is thinking it interrupts; with text typed it
45
+ // clears the draft; when idle + empty it leaves back to the startup menu (so
46
+ // the kid isn't trapped here — dogfood feedback).
47
+ useInput((input, key) => {
48
+ if (voiceBusy) {
49
+ if (key.escape) voice.cancel()
50
+ else if (key.return || input === " ") voice.stopListening()
51
+ return
52
+ }
53
+ if (key.escape) {
54
+ if (state.thinking) onAbort()
55
+ else if (draft.length > 0) setDraft("")
56
+ else onExit()
57
+ } else if (input === " " && canTalk) {
58
+ setDraft("")
59
+ voice.startListening()
60
+ }
36
61
  })
37
62
 
38
63
  const hint = locale === "zh-Hans"
39
- ? "提示:做完一关时打 /check 或「我做完了」就能验收 · Esc 打断 AI"
40
- : "Tip: type /check or 'I'm done' to validate · Esc interrupts the AI"
64
+ ? "提示:按「空格」对小助手说话 · 打 /check 或「我做完了」验收 · Esc 打断 AI / 返回菜单"
65
+ : "Tip: press Space to talk · type /check or 'I'm done' to validate · Esc interrupts AI / returns to menu"
41
66
 
42
67
  return (
43
68
  <Box flexDirection="column">
@@ -58,18 +83,22 @@ export function MissionScreen({ state, locale, onPrompt, onAbort }: MissionScree
58
83
  )}
59
84
  </Box>
60
85
  <Box marginTop={1}>
61
- <Input
62
- value={draft}
63
- onChange={setDraft}
64
- onSubmit={(v) => {
65
- const text = v.trim()
66
- if (!text) return
67
- setDraft("")
68
- onPrompt(text)
69
- }}
70
- placeholder={placeholder}
71
- disabled={state.thinking || state.pendingPermission !== null}
72
- />
86
+ {voiceBusy ? (
87
+ <VoiceBar voiceState={voice.voiceState} meter={voice.meter} mode={voice.mode} locale={locale} theme={theme} />
88
+ ) : (
89
+ <Input
90
+ value={draft}
91
+ onChange={setDraft}
92
+ onSubmit={(v) => {
93
+ const text = v.trim()
94
+ if (!text) return
95
+ setDraft("")
96
+ onPrompt(text)
97
+ }}
98
+ placeholder={placeholder}
99
+ disabled={state.thinking || state.pendingPermission !== null}
100
+ />
101
+ )}
73
102
  </Box>
74
103
  {state.toast ? (
75
104
  <Box marginTop={1}>
@@ -83,3 +112,41 @@ export function MissionScreen({ state, locale, onPrompt, onAbort }: MissionScree
83
112
  </Box>
84
113
  )
85
114
  }
115
+
116
+ interface VoiceBarProps {
117
+ voiceState: ReturnType<typeof useVoiceInput>["voiceState"]
118
+ meter: string
119
+ mode: "deeprouter" | "mock"
120
+ locale: "zh-Hans" | "en"
121
+ theme: ReturnType<typeof getTheme>
122
+ }
123
+
124
+ /** Replaces the input box while a voice turn is in flight: shows the mic
125
+ * indicator + live meter while listening, and a status line otherwise. */
126
+ function VoiceBar({ voiceState, meter, mode, locale, theme }: VoiceBarProps): React.ReactElement {
127
+ const zh = locale === "zh-Hans"
128
+ const label =
129
+ voiceState === "listening"
130
+ ? zh ? "🎙 听你说…(再按空格 或 回车 结束,Esc 取消)" : "🎙 Listening… (Space/Enter to finish, Esc to cancel)"
131
+ : voiceState === "transcribing"
132
+ ? zh ? "✍️ 正在听懂你说的话…" : "✍️ Figuring out what you said…"
133
+ : voiceState === "error"
134
+ ? zh ? "😅 没听清,按空格再试一次" : "😅 Didn't catch that — press Space to retry"
135
+ : zh ? "小助手在想…" : "Thinking…"
136
+
137
+ return (
138
+ <Box borderStyle="single" borderColor={theme.kid} paddingX={1} flexDirection="column">
139
+ <Box>
140
+ <Text color={theme.kid}>{label}</Text>
141
+ </Box>
142
+ {voiceState === "listening" && (
143
+ <Box>
144
+ <Text color={theme.accent}>{meter}</Text>
145
+ {mode === "mock" && (
146
+ <Text color={theme.fgDim} dimColor>{zh ? " (演示模式)" : " (demo mode)"}</Text>
147
+ )}
148
+ </Box>
149
+ )}
150
+ </Box>
151
+ )
152
+ }
@@ -9,6 +9,7 @@
9
9
  * r → resume the last session
10
10
  * w → open Airbotix Portal wallet / login in the parent's browser
11
11
  * h → show kid-friendly help
12
+ * q → quit Kids OpenCode
12
13
  */
13
14
 
14
15
  import React from "react"
@@ -24,9 +25,10 @@ interface StartupScreenProps {
24
25
  toast: ToastState | null
25
26
  onStart: (mode: "free" | "course" | "resume" | "help") => void
26
27
  onOpenWallet: () => void
28
+ onQuit: () => void
27
29
  }
28
30
 
29
- export function StartupScreen({ locale, coursePack, toast, onStart, onOpenWallet }: StartupScreenProps): React.ReactElement {
31
+ export function StartupScreen({ locale, coursePack, toast, onStart, onOpenWallet, onQuit }: StartupScreenProps): React.ReactElement {
30
32
  const theme = getTheme()
31
33
  useInput((input, key) => {
32
34
  if (key.return) onStart("course")
@@ -35,6 +37,7 @@ export function StartupScreen({ locale, coursePack, toast, onStart, onOpenWallet
35
37
  else if (input === "r") onStart("resume")
36
38
  else if (input === "w" || input === "W") onOpenWallet()
37
39
  else if (input === "h") onStart("help")
40
+ else if (input === "q" || input === "Q") onQuit()
38
41
  })
39
42
  const t = STRINGS[locale]
40
43
  return (
@@ -62,6 +65,7 @@ export function StartupScreen({ locale, coursePack, toast, onStart, onOpenWallet
62
65
  { key: "r", label: t.resume },
63
66
  { key: "w", label: t.wallet },
64
67
  { key: "h", label: t.help },
68
+ { key: "q", label: t.quit },
65
69
  ]} />
66
70
  </Box>
67
71
  {toast && (
@@ -87,6 +91,7 @@ const STRINGS = {
87
91
  resume: "继续上次",
88
92
  wallet: "钱包 / 充值(开浏览器)",
89
93
  help: "帮助",
94
+ quit: "退出",
90
95
  },
91
96
  en: {
92
97
  tagline: "🤖 Your AI coding buddy 🤖",
@@ -101,5 +106,6 @@ const STRINGS = {
101
106
  resume: "Resume last session",
102
107
  wallet: "Wallet / Top up (opens browser)",
103
108
  help: "Help",
109
+ quit: "Quit",
104
110
  },
105
111
  } as const
@@ -36,9 +36,13 @@ export interface Theme {
36
36
 
37
37
  /** Default — vibrant on a dark terminal. */
38
38
  const DARK: Theme = {
39
- fg: "white",
39
+ // Primary text is bright white and secondary text is plain white (not
40
+ // "gray"/blackBright, which renders near-invisible on many dark themes) so
41
+ // body copy actually reads. The fg/fgDim pair still differ enough to mark
42
+ // hierarchy. See dogfood feedback: "can't see the text, not prominent".
43
+ fg: "whiteBright",
40
44
  bg: "black",
41
- fgDim: "gray",
45
+ fgDim: "white",
42
46
  accent: "yellow",
43
47
  warn: "yellow",
44
48
  danger: "red",
@@ -0,0 +1,146 @@
1
+ /**
2
+ * React hook that wraps the core voice engine for the Ink UI.
3
+ *
4
+ * Keeps Ink out of core/: this hook is the ONLY place that turns the pure
5
+ * VoiceController + Recorder + STT adapter into component state (voiceState +
6
+ * meter string) and a few imperative handlers the MissionScreen binds to keys.
7
+ *
8
+ * Degrade-don't-crash, by design:
9
+ * - No sox/ffmpeg on PATH → demo mode: skips real capture, still walks the
10
+ * kid through the flow with a canned transcript so the UX is visible.
11
+ * - No DeepRouter STT creds (env) → MockStt; a missing key never crashes.
12
+ * Both modes are surfaced via `mode` so the UI can show a "demo" hint.
13
+ *
14
+ * Note (v1): the meter is a "recording in progress" pulse, not true mic
15
+ * energy, and stop is manual (space/Enter) — real-energy VAD auto-stop lands
16
+ * once Recorder streams PCM levels. The state machine + STT path are the real,
17
+ * tested ones (see core/voice/controller.ts).
18
+ */
19
+
20
+ import { useCallback, useEffect, useRef, useState } from "react"
21
+ import { tmpdir } from "node:os"
22
+ import { join } from "node:path"
23
+ import { VoiceController } from "../../core/voice/controller.ts"
24
+ import { Recorder, detectRecorder, type RecorderKind } from "../../core/voice/recorder.ts"
25
+ import { resolveSttAdapter, MockStt, type SttAdapter } from "../../core/voice/stt.ts"
26
+ import { renderMeter } from "../../core/voice/vad.ts"
27
+ import type { VoiceState } from "../../core/voice/state.ts"
28
+
29
+ export interface UseVoiceInput {
30
+ voiceState: VoiceState
31
+ /** Glyph bar for the mic indicator while listening. */
32
+ meter: string
33
+ /** "deeprouter" = real STT, "mock" = canned (no key / no recorder). */
34
+ mode: "deeprouter" | "mock"
35
+ /** True until detectRecorder() resolves. */
36
+ ready: boolean
37
+ startListening: () => void
38
+ stopListening: () => void
39
+ cancel: () => void
40
+ }
41
+
42
+ /** Read DeepRouter STT config from env (set by the wrapper / parent setup).
43
+ * Absent → resolveSttAdapter falls back to MockStt. */
44
+ function sttConfigFromEnv() {
45
+ const baseUrl = process.env.KIDS_STT_BASE_URL
46
+ const apiKey = process.env.KIDS_STT_API_KEY
47
+ const model = process.env.KIDS_STT_MODEL
48
+ if (!baseUrl || !apiKey || !model) return undefined
49
+ return { baseUrl, apiKey, model, language: process.env.KIDS_STT_LANG }
50
+ }
51
+
52
+ /**
53
+ * @param onTranscript called with recognised text; MissionScreen passes it to
54
+ * onPrompt() so it reaches the LLM exactly like a typed message.
55
+ */
56
+ export function useVoiceInput(onTranscript: (text: string) => void): UseVoiceInput {
57
+ const [voiceState, setVoiceState] = useState<VoiceState>("idle")
58
+ const [meter, setMeter] = useState("")
59
+ const [mode, setMode] = useState<"deeprouter" | "mock">("mock")
60
+ const [ready, setReady] = useState(false)
61
+
62
+ const recorderKindRef = useRef<RecorderKind | null>(null)
63
+ const sttRef = useRef<SttAdapter>(new MockStt())
64
+ const controllerRef = useRef<VoiceController | null>(null)
65
+ const pulseRef = useRef<ReturnType<typeof setInterval> | null>(null)
66
+
67
+ // One-time capability probe: which recorder (if any) + which STT adapter.
68
+ useEffect(() => {
69
+ let cancelled = false
70
+ void (async () => {
71
+ const kind = await detectRecorder()
72
+ const { adapter, mode: sttMode } = resolveSttAdapter(sttConfigFromEnv())
73
+ if (cancelled) return
74
+ recorderKindRef.current = kind
75
+ sttRef.current = adapter
76
+ // No recorder → demo transcript so the flow is still walkable.
77
+ if (!kind) sttRef.current = new MockStt()
78
+ setMode(kind ? sttMode : "mock")
79
+ setReady(true)
80
+ })()
81
+ return () => {
82
+ cancelled = true
83
+ if (pulseRef.current) clearInterval(pulseRef.current)
84
+ }
85
+ }, [])
86
+
87
+ const stopPulse = useCallback(() => {
88
+ if (pulseRef.current) {
89
+ clearInterval(pulseRef.current)
90
+ pulseRef.current = null
91
+ }
92
+ setMeter("")
93
+ }, [])
94
+
95
+ const startListening = useCallback(() => {
96
+ if (voiceState !== "idle" || !ready) return
97
+
98
+ const kind = recorderKindRef.current
99
+ const outPath = join(tmpdir(), "kids-voice-clip.wav")
100
+ // Real recorder when present; a stub one in demo mode (start/stop no-op,
101
+ // stop() returns an empty clip and MockStt supplies canned text).
102
+ const recorder = kind
103
+ ? new Recorder(kind, outPath)
104
+ : {
105
+ start() {},
106
+ async stop() {
107
+ return { bytes: new Uint8Array(0), mimeType: "audio/wav" }
108
+ },
109
+ async cancel() {},
110
+ }
111
+
112
+ const controller = new VoiceController(recorder, sttRef.current, {
113
+ onState: setVoiceState,
114
+ onTranscript: (text) => {
115
+ stopPulse()
116
+ onTranscript(text)
117
+ },
118
+ onError: () => {
119
+ stopPulse()
120
+ },
121
+ })
122
+ controllerRef.current = controller
123
+ controller.start()
124
+
125
+ // "I'm listening" pulse — a lively bar so the kid knows the mic is hot,
126
+ // even before real PCM energy drives it.
127
+ let t = 0
128
+ pulseRef.current = setInterval(() => {
129
+ t += 1
130
+ const level = 0.35 + 0.4 * Math.abs(Math.sin(t / 2))
131
+ setMeter(renderMeter(level))
132
+ }, 120)
133
+ }, [voiceState, ready, onTranscript, stopPulse])
134
+
135
+ const stopListening = useCallback(() => {
136
+ stopPulse()
137
+ void controllerRef.current?.stop()
138
+ }, [stopPulse])
139
+
140
+ const cancel = useCallback(() => {
141
+ stopPulse()
142
+ void controllerRef.current?.cancel()
143
+ }, [stopPulse])
144
+
145
+ return { voiceState, meter, mode, ready, startListening, stopListening, cancel }
146
+ }
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Standalone voice-input demo — `bun run voice-demo`.
3
+ *
4
+ * Renders the real MissionScreen with a fake in-memory store and a no-op LLM,
5
+ * so a human can try the voice flow end-to-end (press Space → talk → it echoes
6
+ * the transcript as a kid message) WITHOUT needing opencode serve, a provider
7
+ * key, or the wallet/audit backend. This is the "you test it" harness.
8
+ *
9
+ * Behaviour by environment:
10
+ * - sox or ffmpeg on PATH → real mic capture to a wav.
11
+ * - KIDS_STT_* env set → real DeepRouter transcription of that wav.
12
+ * - neither → demo mode: canned transcript, flow still works.
13
+ */
14
+
15
+ import React, { useState } from "react"
16
+ import { render, Box, Text } from "ink"
17
+ import { MissionScreen } from "./render/ink/screens/MissionScreen.tsx"
18
+ import type { ChatMessage, KidsClientState } from "./core/store.ts"
19
+
20
+ const LOCALE: "zh-Hans" | "en" = process.env.KIDS_LOCALE === "en" ? "en" : "zh-Hans"
21
+
22
+ function baseState(messages: ChatMessage[]): KidsClientState {
23
+ return {
24
+ screen: { kind: "mission" },
25
+ sessionId: "demo",
26
+ messages,
27
+ starsBalance: 100,
28
+ starsBudget: 200,
29
+ pendingPermission: null,
30
+ dangerousTopic: null,
31
+ thinking: false,
32
+ coursePack: "voice-demo",
33
+ mission: "demo",
34
+ packTitle: LOCALE === "en" ? "Voice Demo" : "语音演示",
35
+ missionTitle: LOCALE === "en" ? "Press Space and talk" : "按空格说话试试",
36
+ missionIndex: 1,
37
+ missionTotal: 1,
38
+ toast: null,
39
+ auditBuffer: [],
40
+ }
41
+ }
42
+
43
+ let counter = 0
44
+
45
+ function DemoApp(): React.ReactElement {
46
+ const [messages, setMessages] = useState<ChatMessage[]>([])
47
+
48
+ const onPrompt = (text: string) => {
49
+ // Echo the (typed or transcribed) text as the kid's message, then a canned
50
+ // "AI" acknowledgement so the loop is visibly closed.
51
+ const ts = 1_700_000_000_000 + counter
52
+ setMessages((prev) => [
53
+ ...prev,
54
+ { id: `k${counter++}`, actor: "kid", text, streaming: false, ts },
55
+ {
56
+ id: `a${counter++}`,
57
+ actor: "agent",
58
+ text: LOCALE === "en" ? `Got it — you said: "${text}"` : `收到啦——你说的是:「${text}」`,
59
+ streaming: false,
60
+ ts: ts + 1,
61
+ },
62
+ ])
63
+ }
64
+
65
+ return (
66
+ <Box flexDirection="column">
67
+ <Box marginBottom={1}>
68
+ <Text color="magenta">
69
+ {LOCALE === "en" ? "🎙 Voice demo — press Space to talk, Esc to quit" : "🎙 语音演示 — 按空格说话,Esc 退出"}
70
+ </Text>
71
+ </Box>
72
+ <MissionScreen state={baseState(messages)} locale={LOCALE} onPrompt={onPrompt} onAbort={() => {}} onExit={() => process.exit(0)} />
73
+ </Box>
74
+ )
75
+ }
76
+
77
+ const { waitUntilExit } = render(<DemoApp />)
78
+ void waitUntilExit()