zerg-ztc 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/dist/App.d.ts.map +1 -1
  2. package/dist/App.js +63 -2
  3. package/dist/App.js.map +1 -1
  4. package/dist/agent/commands/dictation.d.ts +3 -0
  5. package/dist/agent/commands/dictation.d.ts.map +1 -0
  6. package/dist/agent/commands/dictation.js +10 -0
  7. package/dist/agent/commands/dictation.js.map +1 -0
  8. package/dist/agent/commands/index.d.ts.map +1 -1
  9. package/dist/agent/commands/index.js +2 -1
  10. package/dist/agent/commands/index.js.map +1 -1
  11. package/dist/agent/commands/types.d.ts +7 -0
  12. package/dist/agent/commands/types.d.ts.map +1 -1
  13. package/dist/components/InputArea.d.ts +1 -0
  14. package/dist/components/InputArea.d.ts.map +1 -1
  15. package/dist/components/InputArea.js +591 -43
  16. package/dist/components/InputArea.js.map +1 -1
  17. package/dist/components/SingleMessage.d.ts.map +1 -1
  18. package/dist/components/SingleMessage.js +157 -7
  19. package/dist/components/SingleMessage.js.map +1 -1
  20. package/dist/config/types.d.ts +6 -0
  21. package/dist/config/types.d.ts.map +1 -1
  22. package/dist/ui/views/status_bar.js +2 -2
  23. package/dist/ui/views/status_bar.js.map +1 -1
  24. package/dist/utils/dictation.d.ts +46 -0
  25. package/dist/utils/dictation.d.ts.map +1 -0
  26. package/dist/utils/dictation.js +409 -0
  27. package/dist/utils/dictation.js.map +1 -0
  28. package/dist/utils/dictation_native.d.ts +51 -0
  29. package/dist/utils/dictation_native.d.ts.map +1 -0
  30. package/dist/utils/dictation_native.js +216 -0
  31. package/dist/utils/dictation_native.js.map +1 -0
  32. package/dist/utils/path_format.d.ts +20 -0
  33. package/dist/utils/path_format.d.ts.map +1 -0
  34. package/dist/utils/path_format.js +90 -0
  35. package/dist/utils/path_format.js.map +1 -0
  36. package/dist/utils/table.d.ts +38 -0
  37. package/dist/utils/table.d.ts.map +1 -0
  38. package/dist/utils/table.js +133 -0
  39. package/dist/utils/table.js.map +1 -0
  40. package/dist/utils/tool_trace.d.ts +7 -2
  41. package/dist/utils/tool_trace.d.ts.map +1 -1
  42. package/dist/utils/tool_trace.js +156 -51
  43. package/dist/utils/tool_trace.js.map +1 -1
  44. package/package.json +4 -1
  45. package/packages/ztc-dictation/Cargo.toml +43 -0
  46. package/packages/ztc-dictation/README.md +65 -0
  47. package/packages/ztc-dictation/bin/.gitkeep +0 -0
  48. package/packages/ztc-dictation/index.d.ts +16 -0
  49. package/packages/ztc-dictation/index.js +74 -0
  50. package/packages/ztc-dictation/package.json +41 -0
  51. package/packages/ztc-dictation/src/main.rs +430 -0
  52. package/src/App.tsx +98 -1
  53. package/src/agent/commands/dictation.ts +11 -0
  54. package/src/agent/commands/index.ts +2 -0
  55. package/src/agent/commands/types.ts +8 -0
  56. package/src/components/InputArea.tsx +606 -42
  57. package/src/components/SingleMessage.tsx +248 -9
  58. package/src/config/types.ts +7 -0
  59. package/src/ui/views/status_bar.ts +2 -2
  60. package/src/utils/dictation.ts +467 -0
  61. package/src/utils/dictation_native.ts +258 -0
  62. package/src/utils/path_format.ts +99 -0
  63. package/src/utils/table.ts +171 -0
  64. package/src/utils/tool_trace.ts +184 -54
@@ -0,0 +1,430 @@
1
+ use clap::Parser;
2
+ use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
3
+ use ringbuf::{HeapRb, traits::{Consumer, Producer, Split}};
4
+ use serde::Serialize;
5
+ use std::io::{self, Write};
6
+ use std::path::PathBuf;
7
+ use std::sync::atomic::{AtomicBool, Ordering};
8
+ use std::sync::{Arc, Mutex};
9
+ use std::time::{Duration, Instant};
10
+ use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};
11
+
12
+ const SAMPLE_RATE: u32 = 16000;
13
+ const CHANNELS: u16 = 1;
14
+
15
+ #[derive(Parser, Debug)]
16
+ #[command(name = "ztc-audio")]
17
+ #[command(about = "Real-time audio capture and transcription for ZTC")]
18
+ struct Args {
19
+ /// Whisper model to use: tiny, base, small, medium, large
20
+ #[arg(short, long, default_value = "base")]
21
+ model: String,
22
+
23
+ /// Just list audio devices and exit
24
+ #[arg(long)]
25
+ list_devices: bool,
26
+
27
+ /// Download model if not present and exit
28
+ #[arg(long)]
29
+ download_model: bool,
30
+
31
+ /// Audio input device name (uses default if not specified)
32
+ #[arg(short, long)]
33
+ device: Option<String>,
34
+ }
35
+
36
+ #[derive(Serialize)]
37
+ #[serde(tag = "type")]
38
+ enum OutputMessage {
39
+ #[serde(rename = "ready")]
40
+ Ready { device: String, model: String },
41
+
42
+ #[serde(rename = "level")]
43
+ Level { db: f32, rms: f32 },
44
+
45
+ #[serde(rename = "text")]
46
+ Text { content: String, partial: bool },
47
+
48
+ #[serde(rename = "error")]
49
+ Error { message: String },
50
+
51
+ #[serde(rename = "device")]
52
+ Device { name: String, is_default: bool },
53
+ }
54
+
55
+ fn emit(msg: &OutputMessage) {
56
+ if let Ok(json) = serde_json::to_string(msg) {
57
+ println!("{}", json);
58
+ let _ = io::stdout().flush();
59
+ }
60
+ }
61
+
62
+ fn get_model_path(model_name: &str) -> PathBuf {
63
+ let models_dir = dirs::home_dir()
64
+ .unwrap_or_else(|| PathBuf::from("."))
65
+ .join(".ztc")
66
+ .join("models");
67
+
68
+ std::fs::create_dir_all(&models_dir).ok();
69
+ models_dir.join(format!("ggml-{}.bin", model_name))
70
+ }
71
+
72
+ fn download_model(model_name: &str) -> Result<PathBuf, String> {
73
+ let path = get_model_path(model_name);
74
+
75
+ if path.exists() {
76
+ return Ok(path);
77
+ }
78
+
79
+ let url = format!(
80
+ "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-{}.bin",
81
+ model_name
82
+ );
83
+
84
+ eprintln!("Downloading whisper model '{}' from {}", model_name, url);
85
+
86
+ let response = ureq::get(&url)
87
+ .call()
88
+ .map_err(|e| format!("Failed to download model: {}", e))?;
89
+
90
+ let total_size = response
91
+ .header("Content-Length")
92
+ .and_then(|s| s.parse::<u64>().ok())
93
+ .unwrap_or(0);
94
+
95
+ let pb = indicatif::ProgressBar::new(total_size);
96
+ pb.set_style(
97
+ indicatif::ProgressStyle::default_bar()
98
+ .template("{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {bytes}/{total_bytes} ({eta})")
99
+ .unwrap()
100
+ .progress_chars("#>-"),
101
+ );
102
+
103
+ let mut reader = response.into_reader();
104
+ let mut file = std::fs::File::create(&path)
105
+ .map_err(|e| format!("Failed to create model file: {}", e))?;
106
+
107
+ let mut buffer = [0u8; 8192];
108
+ let mut downloaded = 0u64;
109
+
110
+ loop {
111
+ let bytes_read = reader.read(&mut buffer)
112
+ .map_err(|e| format!("Failed to read from network: {}", e))?;
113
+
114
+ if bytes_read == 0 {
115
+ break;
116
+ }
117
+
118
+ file.write_all(&buffer[..bytes_read])
119
+ .map_err(|e| format!("Failed to write to file: {}", e))?;
120
+
121
+ downloaded += bytes_read as u64;
122
+ pb.set_position(downloaded);
123
+ }
124
+
125
+ pb.finish_with_message("Download complete");
126
+ eprintln!("Model saved to {:?}", path);
127
+
128
+ Ok(path)
129
+ }
130
+
131
+ fn list_devices() {
132
+ let host = cpal::default_host();
133
+
134
+ if let Some(device) = host.default_input_device() {
135
+ if let Ok(name) = device.name() {
136
+ emit(&OutputMessage::Device {
137
+ name,
138
+ is_default: true,
139
+ });
140
+ }
141
+ }
142
+
143
+ if let Ok(devices) = host.input_devices() {
144
+ for device in devices {
145
+ if let Ok(name) = device.name() {
146
+ emit(&OutputMessage::Device {
147
+ name,
148
+ is_default: false,
149
+ });
150
+ }
151
+ }
152
+ }
153
+ }
154
+
155
+ fn calculate_db(samples: &[f32]) -> (f32, f32) {
156
+ if samples.is_empty() {
157
+ return (-60.0, 0.0);
158
+ }
159
+
160
+ let sum_squares: f32 = samples.iter().map(|s| s * s).sum();
161
+ let rms = (sum_squares / samples.len() as f32).sqrt();
162
+
163
+ // Convert to dB (with floor at -60dB)
164
+ let db = if rms > 0.0 {
165
+ 20.0 * rms.log10()
166
+ } else {
167
+ -60.0
168
+ };
169
+
170
+ (db.max(-60.0), rms)
171
+ }
172
+
173
+ fn run_audio_capture(
174
+ device_name: Option<&str>,
175
+ model_path: PathBuf,
176
+ ) -> Result<(), String> {
177
+ let host = cpal::default_host();
178
+
179
+ // Find the audio device
180
+ let device = if let Some(name) = device_name {
181
+ host.input_devices()
182
+ .map_err(|e| format!("Failed to enumerate devices: {}", e))?
183
+ .find(|d| d.name().map(|n| n.contains(name)).unwrap_or(false))
184
+ .ok_or_else(|| format!("Device '{}' not found", name))?
185
+ } else {
186
+ host.default_input_device()
187
+ .ok_or_else(|| "No default input device".to_string())?
188
+ };
189
+
190
+ let device_name_str = device.name().unwrap_or_else(|_| "Unknown".to_string());
191
+
192
+ // Initialize Whisper
193
+ let ctx = WhisperContext::new_with_params(
194
+ model_path.to_str().unwrap(),
195
+ WhisperContextParameters::default(),
196
+ )
197
+ .map_err(|e| format!("Failed to load whisper model: {}", e))?;
198
+
199
+ // Get the device's default config and use its sample rate
200
+ let default_config = device.default_input_config()
201
+ .map_err(|e| format!("Failed to get default input config: {}", e))?;
202
+
203
+ let device_sample_rate = default_config.sample_rate().0;
204
+ let device_channels = default_config.channels();
205
+
206
+ emit(&OutputMessage::Ready {
207
+ device: device_name_str.clone(),
208
+ model: model_path.file_stem()
209
+ .and_then(|s| s.to_str())
210
+ .unwrap_or("unknown")
211
+ .to_string(),
212
+ });
213
+
214
+ // Use device's native sample rate and resample to 16kHz for Whisper
215
+ let config = cpal::StreamConfig {
216
+ channels: device_channels,
217
+ sample_rate: cpal::SampleRate(device_sample_rate),
218
+ buffer_size: cpal::BufferSize::Default,
219
+ };
220
+
221
+ let resample_ratio = SAMPLE_RATE as f32 / device_sample_rate as f32;
222
+
223
+ // Shared state
224
+ let running = Arc::new(AtomicBool::new(true));
225
+ let running_clone = running.clone();
226
+
227
+ // Ring buffer for audio samples (5 seconds of audio at 16kHz)
228
+ let ring_buffer = HeapRb::<f32>::new(SAMPLE_RATE as usize * 5);
229
+ let (mut producer, mut consumer) = ring_buffer.split();
230
+
231
+ // Buffer for level calculation (100ms chunks)
232
+ let level_buffer = Arc::new(Mutex::new(Vec::with_capacity(SAMPLE_RATE as usize / 10)));
233
+ let level_buffer_clone = level_buffer.clone();
234
+
235
+ // Resampling state - accumulate fractional samples
236
+ let resample_accum = Arc::new(Mutex::new(0.0f32));
237
+ let resample_accum_clone = resample_accum.clone();
238
+
239
+ // Handle Ctrl+C
240
+ ctrlc::set_handler(move || {
241
+ running_clone.store(false, Ordering::SeqCst);
242
+ })
243
+ .map_err(|e| format!("Failed to set signal handler: {}", e))?;
244
+
245
+ // Start audio capture
246
+ let stream = device
247
+ .build_input_stream(
248
+ &config,
249
+ move |data: &[f32], _: &cpal::InputCallbackInfo| {
250
+ // Convert to mono and resample to 16kHz
251
+ let num_channels = device_channels as usize;
252
+
253
+ // First convert to mono by averaging channels
254
+ let mono_samples: Vec<f32> = data
255
+ .chunks(num_channels)
256
+ .map(|frame| frame.iter().sum::<f32>() / num_channels as f32)
257
+ .collect();
258
+
259
+ // Simple linear resampling to 16kHz
260
+ // For better quality, could use a proper resampler crate
261
+ let mut accum = resample_accum_clone.lock().unwrap();
262
+ let mut resampled = Vec::new();
263
+
264
+ for &sample in &mono_samples {
265
+ *accum += resample_ratio;
266
+ while *accum >= 1.0 {
267
+ resampled.push(sample);
268
+ *accum -= 1.0;
269
+ }
270
+ }
271
+
272
+ // Push resampled samples to ring buffer for transcription
273
+ for sample in &resampled {
274
+ let _ = producer.try_push(*sample);
275
+ }
276
+
277
+ // Also collect for level metering (use original mono samples for responsiveness)
278
+ if let Ok(mut buf) = level_buffer_clone.lock() {
279
+ buf.extend_from_slice(&mono_samples);
280
+ }
281
+ },
282
+ |err| {
283
+ emit(&OutputMessage::Error {
284
+ message: format!("Audio stream error: {}", err),
285
+ });
286
+ },
287
+ None,
288
+ )
289
+ .map_err(|e| format!("Failed to build input stream: {}", e))?;
290
+
291
+ stream.play().map_err(|e| format!("Failed to start stream: {}", e))?;
292
+
293
+ // Main processing loop
294
+ let mut accumulated_audio: Vec<f32> = Vec::new();
295
+ let mut last_level_time = Instant::now();
296
+ let mut last_transcribe_time = Instant::now();
297
+ let level_interval = Duration::from_millis(50); // 20 Hz level updates
298
+ let transcribe_interval = Duration::from_millis(500); // Transcribe every 500ms
299
+
300
+ while running.load(Ordering::SeqCst) {
301
+ // Read stdin for commands (non-blocking would be better but this works)
302
+ // For now, just rely on Ctrl+C / signal handling
303
+
304
+ // Emit audio levels
305
+ if last_level_time.elapsed() >= level_interval {
306
+ if let Ok(mut buf) = level_buffer.lock() {
307
+ if !buf.is_empty() {
308
+ let (db, rms) = calculate_db(&buf);
309
+ emit(&OutputMessage::Level { db, rms });
310
+ buf.clear();
311
+ }
312
+ }
313
+ last_level_time = Instant::now();
314
+ }
315
+
316
+ // Collect audio for transcription
317
+ while let Some(sample) = consumer.try_pop() {
318
+ accumulated_audio.push(sample);
319
+ }
320
+
321
+ // Transcribe periodically
322
+ if last_transcribe_time.elapsed() >= transcribe_interval && !accumulated_audio.is_empty() {
323
+ // Run whisper on accumulated audio
324
+ let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 });
325
+ params.set_language(Some("en"));
326
+ params.set_print_special(false);
327
+ params.set_print_progress(false);
328
+ params.set_print_realtime(false);
329
+ params.set_print_timestamps(false);
330
+ params.set_single_segment(true);
331
+ params.set_no_context(true);
332
+
333
+ let mut state = ctx.create_state()
334
+ .map_err(|e| format!("Failed to create whisper state: {}", e))?;
335
+
336
+ if state.full(params, &accumulated_audio).is_ok() {
337
+ let num_segments = state.full_n_segments()
338
+ .unwrap_or(0);
339
+
340
+ let mut text = String::new();
341
+ for i in 0..num_segments {
342
+ if let Ok(segment) = state.full_get_segment_text(i) {
343
+ text.push_str(&segment);
344
+ }
345
+ }
346
+
347
+ let text = text.trim().to_string();
348
+ if !text.is_empty() {
349
+ emit(&OutputMessage::Text {
350
+ content: text,
351
+ partial: true,
352
+ });
353
+ }
354
+ }
355
+
356
+ last_transcribe_time = Instant::now();
357
+ }
358
+
359
+ std::thread::sleep(Duration::from_millis(10));
360
+ }
361
+
362
+ // Final transcription
363
+ if !accumulated_audio.is_empty() {
364
+ let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 });
365
+ params.set_language(Some("en"));
366
+ params.set_print_special(false);
367
+ params.set_print_progress(false);
368
+ params.set_print_realtime(false);
369
+ params.set_print_timestamps(false);
370
+
371
+ let mut state = ctx.create_state()
372
+ .map_err(|e| format!("Failed to create whisper state: {}", e))?;
373
+
374
+ if state.full(params, &accumulated_audio).is_ok() {
375
+ let num_segments = state.full_n_segments().unwrap_or(0);
376
+
377
+ let mut text = String::new();
378
+ for i in 0..num_segments {
379
+ if let Ok(segment) = state.full_get_segment_text(i) {
380
+ text.push_str(&segment);
381
+ }
382
+ }
383
+
384
+ let text = text.trim().to_string();
385
+ emit(&OutputMessage::Text {
386
+ content: text,
387
+ partial: false,
388
+ });
389
+ }
390
+ }
391
+
392
+ Ok(())
393
+ }
394
+
395
+ fn main() {
396
+ let args = Args::parse();
397
+
398
+ if args.list_devices {
399
+ list_devices();
400
+ return;
401
+ }
402
+
403
+ if args.download_model {
404
+ match download_model(&args.model) {
405
+ Ok(path) => {
406
+ eprintln!("Model ready at {:?}", path);
407
+ }
408
+ Err(e) => {
409
+ emit(&OutputMessage::Error { message: e });
410
+ std::process::exit(1);
411
+ }
412
+ }
413
+ return;
414
+ }
415
+
416
+ // Ensure model is downloaded
417
+ let model_path = match download_model(&args.model) {
418
+ Ok(path) => path,
419
+ Err(e) => {
420
+ emit(&OutputMessage::Error { message: e });
421
+ std::process::exit(1);
422
+ }
423
+ };
424
+
425
+ // Run audio capture and transcription
426
+ if let Err(e) = run_audio_capture(args.device.as_deref(), model_path) {
427
+ emit(&OutputMessage::Error { message: e });
428
+ std::process::exit(1);
429
+ }
430
+ }
package/src/App.tsx CHANGED
@@ -1,5 +1,5 @@
1
1
  import React, { useState, useCallback, useMemo, useRef } from 'react';
2
- import { Box, useApp, useInput, Static } from 'ink';
2
+ import { Box, Text, useApp, useInput, Static } from 'ink';
3
3
  import { Header, MessageList, SingleMessage, InputArea, StatusBar, FullScreen, ActivityLine, useScreenSize } from './components/index.js';
4
4
  import { buildAppView } from './ui/views/app.js';
5
5
  import { useMirror } from './web/mirror_hook.js';
@@ -25,11 +25,35 @@ import { checkForUpdate } from './utils/update.js';
25
25
  import { getVersion } from './utils/version.js';
26
26
  import { DEFAULT_SPINNER_VERBS } from './utils/spinner_verbs.js';
27
27
  import { SPINNER_FRAMES } from './utils/spinner_frames.js';
28
+ import { getRecordingDeviceName } from './utils/dictation.js';
29
+ import {
30
+ isNativeDictationAvailable,
31
+ isNativeRecording,
32
+ onDictationEvent,
33
+ startNativeRecording,
34
+ stopNativeRecording,
35
+ cancelNativeRecording,
36
+ DictationEvent
37
+ } from './utils/dictation_native.js';
28
38
 
29
39
  // --- Utilities ---
30
40
 
31
41
  const generateId = () => Math.random().toString(36).slice(2, 11);
32
42
 
43
+ /**
44
+ * Render a VU meter from dB level
45
+ * @param db Audio level in dB (-60 to 0)
46
+ * @returns ASCII VU meter string
47
+ */
48
+ function renderVUMeter(db: number): string {
49
+ // Map -60dB..0dB to 0..10 bars
50
+ const normalized = Math.max(0, Math.min(1, (db + 60) / 60));
51
+ const bars = Math.round(normalized * 10);
52
+ const filled = '█'.repeat(bars);
53
+ const empty = '░'.repeat(10 - bars);
54
+ return `[${filled}${empty}]`;
55
+ }
56
+
33
57
  // --- Initial welcome message ---
34
58
 
35
59
  function getWelcomeMessage(): Message {
@@ -112,6 +136,12 @@ export const App: React.FC = () => {
112
136
  const [retryAvailable, setRetryAvailable] = useState(false);
113
137
  const [toast, setToast] = useState<string | null>(null);
114
138
  const toastTimerRef = useRef<NodeJS.Timeout | null>(null);
139
+ const [isRecording, setIsRecording] = useState(false);
140
+ const [recordingDevice, setRecordingDevice] = useState<string | null>(null);
141
+ const [isTranscribing, setIsTranscribing] = useState(false);
142
+ const [audioLevel, setAudioLevel] = useState<number>(-60); // dB
143
+ const [partialTranscript, setPartialTranscript] = useState<string>('');
144
+ const [useNativeDictation] = useState(isNativeDictationAvailable());
115
145
  const [spinnerLabel, setSpinnerLabel] = useState<string | null>(null);
116
146
  const [spinnerFrame, setSpinnerFrame] = useState<string | null>(null);
117
147
  const streamingMessageId = React.useRef<string | null>(null);
@@ -715,6 +745,48 @@ export const App: React.FC = () => {
715
745
  }, 2500);
716
746
  }, []);
717
747
 
748
+ const handleDictationStateChange = useCallback((state: 'idle' | 'recording' | 'transcribing') => {
749
+ setIsRecording(state === 'recording');
750
+ setIsTranscribing(state === 'transcribing');
751
+ if (state === 'recording') {
752
+ setRecordingDevice(getRecordingDeviceName());
753
+ } else if (state === 'idle') {
754
+ setRecordingDevice(null);
755
+ setAudioLevel(-60);
756
+ setPartialTranscript('');
757
+ }
758
+ }, []);
759
+
760
+ // Native dictation event handler
761
+ React.useEffect(() => {
762
+ if (!useNativeDictation) return;
763
+
764
+ const cleanup = onDictationEvent((event: DictationEvent) => {
765
+ switch (event.type) {
766
+ case 'ready':
767
+ setRecordingDevice(event.device || 'Unknown');
768
+ break;
769
+ case 'level':
770
+ setAudioLevel(event.db ?? -60);
771
+ break;
772
+ case 'text':
773
+ if (event.partial) {
774
+ setPartialTranscript(event.content || '');
775
+ }
776
+ break;
777
+ case 'stopped':
778
+ setIsRecording(false);
779
+ setAudioLevel(-60);
780
+ break;
781
+ case 'error':
782
+ showToast(`Dictation error: ${event.message}`);
783
+ break;
784
+ }
785
+ });
786
+
787
+ return cleanup;
788
+ }, [useNativeDictation, showToast]);
789
+
718
790
  useMirror(layoutTree, inputBus);
719
791
 
720
792
  // Compute messages for Static (completed) vs live (streaming)
@@ -772,12 +844,37 @@ export const App: React.FC = () => {
772
844
  inputMode={inputMode}
773
845
  />
774
846
 
847
+ {isRecording && (
848
+ <Box flexDirection="column" paddingLeft={2}>
849
+ <Box>
850
+ <Text color="red">● Recording</Text>
851
+ {recordingDevice && (
852
+ <Text color="gray"> [{recordingDevice}]</Text>
853
+ )}
854
+ <Text color="gray"> </Text>
855
+ <Text color="cyan">{renderVUMeter(audioLevel)}</Text>
856
+ <Text color="gray"> — Ctrl+R to stop</Text>
857
+ </Box>
858
+ {partialTranscript && (
859
+ <Box>
860
+ <Text color="gray" italic>"{partialTranscript}"</Text>
861
+ </Box>
862
+ )}
863
+ </Box>
864
+ )}
865
+ {isTranscribing && !useNativeDictation && (
866
+ <Box paddingLeft={2}>
867
+ <Text color="yellow">⠋ Transcribing...</Text>
868
+ </Box>
869
+ )}
870
+
775
871
  <InputArea
776
872
  onSubmit={handleSubmit}
777
873
  onCommand={handleCommand}
778
874
  commands={commands}
779
875
  onStateChange={setInputSnapshot}
780
876
  onToast={showToast}
877
+ onDictationStateChange={handleDictationStateChange}
781
878
  cols={columns}
782
879
  inputBus={inputBus}
783
880
  disabled={false}
@@ -0,0 +1,11 @@
1
+ import { Command } from './types.js';
2
+ import { getDictationStatus } from '../../utils/dictation.js';
3
+
4
+ export const dictationStatusCommand: Command = {
5
+ name: 'dictation',
6
+ description: 'Check voice dictation status and availability',
7
+ handler: async (args, ctx) => {
8
+ const status = getDictationStatus();
9
+ ctx.addMessage({ role: 'system', content: status });
10
+ }
11
+ };
@@ -15,6 +15,7 @@ import { retryCommand } from './retry.js';
15
15
  import { inputModeCommand } from './input_mode.js';
16
16
  import { keybindingsCommand } from './keybindings.js';
17
17
  import { updateCommand } from './update.js';
18
+ import { dictationStatusCommand } from './dictation.js';
18
19
  import { Command } from './types.js';
19
20
 
20
21
  const commandList: Command[] = [];
@@ -39,6 +40,7 @@ commandList.push(
39
40
  updateCommand,
40
41
  inputModeCommand,
41
42
  retryCommand,
43
+ dictationStatusCommand,
42
44
  exitCommand
43
45
  );
44
46
 
@@ -53,6 +53,12 @@ export interface SkillsController {
53
53
  list: () => Promise<Skill[]>;
54
54
  }
55
55
 
56
+ export interface DictationController {
57
+ startRecording: () => void;
58
+ stopRecording: () => Promise<string>; // Returns transcribed text
59
+ isRecording: () => boolean;
60
+ }
61
+
56
62
  export interface CommandContext {
57
63
  addMessage: (msg: Omit<Message, 'id' | 'timestamp'>) => void;
58
64
  clearMessages: () => void;
@@ -68,8 +74,10 @@ export interface CommandContext {
68
74
  clipboard: ClipboardController;
69
75
  models: ModelsController;
70
76
  skills: SkillsController;
77
+ dictation?: DictationController;
71
78
  getInputMode: () => 'queue' | 'interrupt';
72
79
  setInputMode: (mode: 'queue' | 'interrupt') => void;
80
+ setInputText?: (text: string) => void; // Set input field text
73
81
  }
74
82
 
75
83
  export interface Command {