zerg-ztc 0.1.7 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/App.d.ts.map +1 -1
- package/dist/App.js +75 -8
- package/dist/App.js.map +1 -1
- package/dist/agent/agent.d.ts +2 -0
- package/dist/agent/agent.d.ts.map +1 -1
- package/dist/agent/agent.js +111 -10
- package/dist/agent/agent.js.map +1 -1
- package/dist/agent/backends/anthropic.d.ts.map +1 -1
- package/dist/agent/backends/anthropic.js +15 -3
- package/dist/agent/backends/anthropic.js.map +1 -1
- package/dist/agent/backends/gemini.d.ts.map +1 -1
- package/dist/agent/backends/gemini.js +12 -0
- package/dist/agent/backends/gemini.js.map +1 -1
- package/dist/agent/backends/index.d.ts +1 -1
- package/dist/agent/backends/index.d.ts.map +1 -1
- package/dist/agent/backends/openai_compatible.d.ts.map +1 -1
- package/dist/agent/backends/openai_compatible.js +12 -0
- package/dist/agent/backends/openai_compatible.js.map +1 -1
- package/dist/agent/backends/types.d.ts +21 -1
- package/dist/agent/backends/types.d.ts.map +1 -1
- package/dist/agent/commands/dictation.d.ts +3 -0
- package/dist/agent/commands/dictation.d.ts.map +1 -0
- package/dist/agent/commands/dictation.js +10 -0
- package/dist/agent/commands/dictation.js.map +1 -0
- package/dist/agent/commands/index.d.ts.map +1 -1
- package/dist/agent/commands/index.js +2 -1
- package/dist/agent/commands/index.js.map +1 -1
- package/dist/agent/commands/types.d.ts +7 -0
- package/dist/agent/commands/types.d.ts.map +1 -1
- package/dist/agent/runtime/capabilities.d.ts +2 -1
- package/dist/agent/runtime/capabilities.d.ts.map +1 -1
- package/dist/agent/runtime/capabilities.js +1 -0
- package/dist/agent/runtime/capabilities.js.map +1 -1
- package/dist/agent/tools/index.d.ts +1 -0
- package/dist/agent/tools/index.d.ts.map +1 -1
- package/dist/agent/tools/index.js +6 -1
- package/dist/agent/tools/index.js.map +1 -1
- package/dist/agent/tools/screenshot.d.ts +23 -0
- package/dist/agent/tools/screenshot.d.ts.map +1 -0
- package/dist/agent/tools/screenshot.js +735 -0
- package/dist/agent/tools/screenshot.js.map +1 -0
- package/dist/components/InputArea.d.ts +1 -0
- package/dist/components/InputArea.d.ts.map +1 -1
- package/dist/components/InputArea.js +591 -43
- package/dist/components/InputArea.js.map +1 -1
- package/dist/components/SingleMessage.d.ts.map +1 -1
- package/dist/components/SingleMessage.js +157 -7
- package/dist/components/SingleMessage.js.map +1 -1
- package/dist/config/types.d.ts +6 -0
- package/dist/config/types.d.ts.map +1 -1
- package/dist/ui/views/status_bar.js +2 -2
- package/dist/ui/views/status_bar.js.map +1 -1
- package/dist/utils/dictation.d.ts +46 -0
- package/dist/utils/dictation.d.ts.map +1 -0
- package/dist/utils/dictation.js +409 -0
- package/dist/utils/dictation.js.map +1 -0
- package/dist/utils/dictation_native.d.ts +51 -0
- package/dist/utils/dictation_native.d.ts.map +1 -0
- package/dist/utils/dictation_native.js +216 -0
- package/dist/utils/dictation_native.js.map +1 -0
- package/dist/utils/path_complete.d.ts.map +1 -1
- package/dist/utils/path_complete.js +31 -6
- package/dist/utils/path_complete.js.map +1 -1
- package/dist/utils/path_format.d.ts +20 -0
- package/dist/utils/path_format.d.ts.map +1 -0
- package/dist/utils/path_format.js +90 -0
- package/dist/utils/path_format.js.map +1 -0
- package/dist/utils/table.d.ts +38 -0
- package/dist/utils/table.d.ts.map +1 -0
- package/dist/utils/table.js +133 -0
- package/dist/utils/table.js.map +1 -0
- package/dist/utils/tool_trace.d.ts +7 -2
- package/dist/utils/tool_trace.d.ts.map +1 -1
- package/dist/utils/tool_trace.js +156 -51
- package/dist/utils/tool_trace.js.map +1 -1
- package/package.json +4 -1
- package/packages/ztc-dictation/Cargo.toml +43 -0
- package/packages/ztc-dictation/README.md +65 -0
- package/packages/ztc-dictation/bin/.gitkeep +0 -0
- package/packages/ztc-dictation/index.d.ts +16 -0
- package/packages/ztc-dictation/index.js +74 -0
- package/packages/ztc-dictation/package.json +41 -0
- package/packages/ztc-dictation/src/main.rs +430 -0
- package/src/App.tsx +110 -7
- package/src/agent/agent.ts +116 -11
- package/src/agent/backends/anthropic.ts +15 -5
- package/src/agent/backends/gemini.ts +12 -0
- package/src/agent/backends/index.ts +1 -0
- package/src/agent/backends/openai_compatible.ts +12 -0
- package/src/agent/backends/types.ts +25 -1
- package/src/agent/commands/dictation.ts +11 -0
- package/src/agent/commands/index.ts +2 -0
- package/src/agent/commands/types.ts +8 -0
- package/src/agent/runtime/capabilities.ts +2 -1
- package/src/agent/tools/index.ts +6 -1
- package/src/agent/tools/screenshot.ts +821 -0
- package/src/components/InputArea.tsx +606 -42
- package/src/components/SingleMessage.tsx +248 -9
- package/src/config/types.ts +7 -0
- package/src/ui/views/status_bar.ts +2 -2
- package/src/utils/dictation.ts +467 -0
- package/src/utils/dictation_native.ts +258 -0
- package/src/utils/path_complete.ts +30 -4
- package/src/utils/path_format.ts +99 -0
- package/src/utils/table.ts +171 -0
- package/src/utils/tool_trace.ts +184 -54
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Audio dictation with push-to-talk support
|
|
3
|
+
* Automatic provider fallback:
|
|
4
|
+
* 1. OpenAI Whisper API (if OPENAI_API_KEY available)
|
|
5
|
+
* 2. Local whisper (if installed)
|
|
6
|
+
* 3. macOS native (SFSpeechRecognizer)
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { spawn, execSync, spawnSync, ChildProcess } from 'child_process';
|
|
10
|
+
import { writeFile, readFile, unlink, mkdir } from 'fs/promises';
|
|
11
|
+
import { existsSync } from 'fs';
|
|
12
|
+
import { join } from 'path';
|
|
13
|
+
import { tmpdir, homedir, platform } from 'os';
|
|
14
|
+
|
|
15
|
+
export type DictationProvider = 'openai' | 'local' | 'macos';
|
|
16
|
+
|
|
17
|
+
export interface DictationResult {
|
|
18
|
+
text: string;
|
|
19
|
+
duration?: number;
|
|
20
|
+
provider: DictationProvider;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const RECORDING_DIR = join(tmpdir(), 'ztc-dictation');
|
|
24
|
+
const RECORDING_FILE = join(RECORDING_DIR, 'recording.wav');
|
|
25
|
+
|
|
26
|
+
// Global state for push-to-talk
|
|
27
|
+
let activeRecording: {
|
|
28
|
+
process: ChildProcess;
|
|
29
|
+
startTime: number;
|
|
30
|
+
} | null = null;
|
|
31
|
+
|
|
32
|
+
let cachedRecordingTool: 'sox' | 'ffmpeg' | 'arecord' | null = null;
|
|
33
|
+
let cachedProvider: DictationProvider | null = null;
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Check for available recording tool (cached)
|
|
37
|
+
*/
|
|
38
|
+
function getRecordingTool(): 'sox' | 'ffmpeg' | 'arecord' | null {
|
|
39
|
+
if (cachedRecordingTool !== null) return cachedRecordingTool;
|
|
40
|
+
|
|
41
|
+
const os = platform();
|
|
42
|
+
|
|
43
|
+
if (os === 'darwin' || os === 'linux') {
|
|
44
|
+
try {
|
|
45
|
+
execSync('which rec', { stdio: 'ignore' });
|
|
46
|
+
cachedRecordingTool = 'sox';
|
|
47
|
+
return 'sox';
|
|
48
|
+
} catch {}
|
|
49
|
+
|
|
50
|
+
try {
|
|
51
|
+
execSync('which ffmpeg', { stdio: 'ignore' });
|
|
52
|
+
cachedRecordingTool = 'ffmpeg';
|
|
53
|
+
return 'ffmpeg';
|
|
54
|
+
} catch {}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (os === 'linux') {
|
|
58
|
+
try {
|
|
59
|
+
execSync('which arecord', { stdio: 'ignore' });
|
|
60
|
+
cachedRecordingTool = 'arecord';
|
|
61
|
+
return 'arecord';
|
|
62
|
+
} catch {}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Get the best available transcription provider (cached)
|
|
70
|
+
*/
|
|
71
|
+
export function getBestProvider(): DictationProvider | null {
|
|
72
|
+
if (cachedProvider !== null) return cachedProvider;
|
|
73
|
+
|
|
74
|
+
// Check OpenAI first (best quality)
|
|
75
|
+
if (process.env.OPENAI_API_KEY) {
|
|
76
|
+
cachedProvider = 'openai';
|
|
77
|
+
return 'openai';
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Check local whisper
|
|
81
|
+
try {
|
|
82
|
+
execSync('which whisper', { stdio: 'ignore' });
|
|
83
|
+
cachedProvider = 'local';
|
|
84
|
+
return 'local';
|
|
85
|
+
} catch {}
|
|
86
|
+
|
|
87
|
+
try {
|
|
88
|
+
execSync('which whisper-cpp', { stdio: 'ignore' });
|
|
89
|
+
cachedProvider = 'local';
|
|
90
|
+
return 'local';
|
|
91
|
+
} catch {}
|
|
92
|
+
|
|
93
|
+
// macOS native as fallback
|
|
94
|
+
if (platform() === 'darwin') {
|
|
95
|
+
cachedProvider = 'macos';
|
|
96
|
+
return 'macos';
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return null;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Check if dictation is available
|
|
104
|
+
*/
|
|
105
|
+
export function isDictationAvailable(): boolean {
|
|
106
|
+
return getRecordingTool() !== null && getBestProvider() !== null;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Check if currently recording
|
|
111
|
+
*/
|
|
112
|
+
export function isRecording(): boolean {
|
|
113
|
+
return activeRecording !== null;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Get the name of the recording device/tool being used
|
|
118
|
+
*/
|
|
119
|
+
export function getRecordingDeviceName(): string | null {
|
|
120
|
+
const tool = getRecordingTool();
|
|
121
|
+
if (!tool) return null;
|
|
122
|
+
|
|
123
|
+
const os = platform();
|
|
124
|
+
|
|
125
|
+
if (tool === 'sox') {
|
|
126
|
+
// On macOS, sox uses coreaudio
|
|
127
|
+
if (os === 'darwin') {
|
|
128
|
+
return 'Default Input (sox)';
|
|
129
|
+
}
|
|
130
|
+
return 'Default Input (sox)';
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if (tool === 'ffmpeg') {
|
|
134
|
+
if (os === 'darwin') {
|
|
135
|
+
return 'Default Input (avfoundation)';
|
|
136
|
+
}
|
|
137
|
+
return 'Default Input (alsa)';
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
if (tool === 'arecord') {
|
|
141
|
+
return 'Default Input (alsa)';
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return tool;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Start recording (push-to-talk: press to start)
|
|
149
|
+
*/
|
|
150
|
+
export function startRecording(): boolean {
|
|
151
|
+
if (activeRecording) {
|
|
152
|
+
return false; // Already recording
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const tool = getRecordingTool();
|
|
156
|
+
if (!tool) {
|
|
157
|
+
throw new Error('No recording tool available. Install sox: brew install sox');
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Ensure directory exists
|
|
161
|
+
if (!existsSync(RECORDING_DIR)) {
|
|
162
|
+
execSync(`mkdir -p "${RECORDING_DIR}"`);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const os = platform();
|
|
166
|
+
let proc: ChildProcess;
|
|
167
|
+
|
|
168
|
+
if (tool === 'sox') {
|
|
169
|
+
proc = spawn('rec', [
|
|
170
|
+
'-q', // Quiet
|
|
171
|
+
'-r', '16000', // 16kHz (Whisper optimal)
|
|
172
|
+
'-c', '1', // Mono
|
|
173
|
+
'-b', '16', // 16-bit
|
|
174
|
+
RECORDING_FILE
|
|
175
|
+
], { stdio: 'ignore' });
|
|
176
|
+
} else if (tool === 'ffmpeg') {
|
|
177
|
+
if (os === 'darwin') {
|
|
178
|
+
proc = spawn('ffmpeg', [
|
|
179
|
+
'-y', '-f', 'avfoundation', '-i', ':0',
|
|
180
|
+
'-ar', '16000', '-ac', '1', '-sample_fmt', 's16',
|
|
181
|
+
RECORDING_FILE
|
|
182
|
+
], { stdio: 'ignore' });
|
|
183
|
+
} else {
|
|
184
|
+
proc = spawn('ffmpeg', [
|
|
185
|
+
'-y', '-f', 'alsa', '-i', 'default',
|
|
186
|
+
'-ar', '16000', '-ac', '1', '-sample_fmt', 's16',
|
|
187
|
+
RECORDING_FILE
|
|
188
|
+
], { stdio: 'ignore' });
|
|
189
|
+
}
|
|
190
|
+
} else {
|
|
191
|
+
proc = spawn('arecord', [
|
|
192
|
+
'-f', 'S16_LE', '-r', '16000', '-c', '1',
|
|
193
|
+
RECORDING_FILE
|
|
194
|
+
], { stdio: 'ignore' });
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
activeRecording = {
|
|
198
|
+
process: proc,
|
|
199
|
+
startTime: Date.now()
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
return true;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Stop recording and transcribe (push-to-talk: release to stop)
|
|
207
|
+
*/
|
|
208
|
+
export async function stopRecordingAndTranscribe(): Promise<DictationResult> {
|
|
209
|
+
if (!activeRecording) {
|
|
210
|
+
throw new Error('Not currently recording');
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
const { process: proc, startTime } = activeRecording;
|
|
214
|
+
activeRecording = null;
|
|
215
|
+
|
|
216
|
+
// Stop the recording process
|
|
217
|
+
await new Promise<void>((resolve) => {
|
|
218
|
+
proc.on('close', () => resolve());
|
|
219
|
+
proc.on('error', () => resolve());
|
|
220
|
+
proc.kill('SIGINT');
|
|
221
|
+
// Fallback timeout
|
|
222
|
+
setTimeout(() => resolve(), 1000);
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
// Small delay to ensure file is written
|
|
226
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
227
|
+
|
|
228
|
+
if (!existsSync(RECORDING_FILE)) {
|
|
229
|
+
throw new Error('Recording file not created');
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
const recordingDuration = Date.now() - startTime;
|
|
233
|
+
const provider = getBestProvider();
|
|
234
|
+
|
|
235
|
+
if (!provider) {
|
|
236
|
+
throw new Error('No transcription provider available');
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// Transcribe
|
|
240
|
+
const transcribeStart = Date.now();
|
|
241
|
+
let text: string;
|
|
242
|
+
|
|
243
|
+
try {
|
|
244
|
+
text = await transcribe(RECORDING_FILE, provider);
|
|
245
|
+
} finally {
|
|
246
|
+
// Clean up recording file
|
|
247
|
+
try {
|
|
248
|
+
await unlink(RECORDING_FILE);
|
|
249
|
+
} catch {}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
return {
|
|
253
|
+
text: text.trim(),
|
|
254
|
+
duration: Date.now() - transcribeStart,
|
|
255
|
+
provider
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Cancel active recording without transcribing
|
|
261
|
+
*/
|
|
262
|
+
export function cancelRecording(): void {
|
|
263
|
+
if (activeRecording) {
|
|
264
|
+
activeRecording.process.kill('SIGKILL');
|
|
265
|
+
activeRecording = null;
|
|
266
|
+
// Clean up file
|
|
267
|
+
try {
|
|
268
|
+
if (existsSync(RECORDING_FILE)) {
|
|
269
|
+
execSync(`rm -f "${RECORDING_FILE}"`);
|
|
270
|
+
}
|
|
271
|
+
} catch {}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Transcribe audio file
|
|
277
|
+
*/
|
|
278
|
+
async function transcribe(audioPath: string, provider: DictationProvider): Promise<string> {
|
|
279
|
+
switch (provider) {
|
|
280
|
+
case 'openai':
|
|
281
|
+
return transcribeOpenAI(audioPath);
|
|
282
|
+
case 'local':
|
|
283
|
+
return transcribeLocal(audioPath);
|
|
284
|
+
case 'macos':
|
|
285
|
+
return transcribeMacOS(audioPath);
|
|
286
|
+
default:
|
|
287
|
+
throw new Error(`Unknown provider: ${provider}`);
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* Transcribe using OpenAI Whisper API
|
|
293
|
+
*/
|
|
294
|
+
async function transcribeOpenAI(audioPath: string): Promise<string> {
|
|
295
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
296
|
+
if (!apiKey) {
|
|
297
|
+
throw new Error('OPENAI_API_KEY not set');
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
const audioData = await readFile(audioPath);
|
|
301
|
+
|
|
302
|
+
const formData = new FormData();
|
|
303
|
+
formData.append('file', new Blob([audioData], { type: 'audio/wav' }), 'audio.wav');
|
|
304
|
+
formData.append('model', 'whisper-1');
|
|
305
|
+
|
|
306
|
+
const response = await fetch('https://api.openai.com/v1/audio/transcriptions', {
|
|
307
|
+
method: 'POST',
|
|
308
|
+
headers: { 'Authorization': `Bearer ${apiKey}` },
|
|
309
|
+
body: formData
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
if (!response.ok) {
|
|
313
|
+
const error = await response.text();
|
|
314
|
+
throw new Error(`Whisper API error: ${error}`);
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
const result = await response.json() as { text: string };
|
|
318
|
+
return result.text;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* Transcribe using local whisper
|
|
323
|
+
*/
|
|
324
|
+
async function transcribeLocal(audioPath: string): Promise<string> {
|
|
325
|
+
// Try Python whisper first
|
|
326
|
+
try {
|
|
327
|
+
execSync('which whisper', { stdio: 'ignore' });
|
|
328
|
+
|
|
329
|
+
const result = spawnSync('whisper', [
|
|
330
|
+
audioPath,
|
|
331
|
+
'--model', 'base',
|
|
332
|
+
'--output_format', 'txt',
|
|
333
|
+
'--output_dir', RECORDING_DIR
|
|
334
|
+
], { encoding: 'utf-8', timeout: 60000 });
|
|
335
|
+
|
|
336
|
+
if (result.status === 0) {
|
|
337
|
+
const outputFile = audioPath.replace(/\.wav$/, '.txt');
|
|
338
|
+
if (existsSync(outputFile)) {
|
|
339
|
+
const text = await readFile(outputFile, 'utf-8');
|
|
340
|
+
await unlink(outputFile).catch(() => {});
|
|
341
|
+
return text.trim();
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
} catch {}
|
|
345
|
+
|
|
346
|
+
// Try whisper.cpp
|
|
347
|
+
try {
|
|
348
|
+
execSync('which whisper-cpp', { stdio: 'ignore' });
|
|
349
|
+
|
|
350
|
+
const modelPath = join(homedir(), '.local/share/whisper/ggml-base.bin');
|
|
351
|
+
const result = spawnSync('whisper-cpp', [
|
|
352
|
+
'-m', modelPath,
|
|
353
|
+
'-f', audioPath
|
|
354
|
+
], { encoding: 'utf-8', timeout: 60000 });
|
|
355
|
+
|
|
356
|
+
if (result.status === 0) {
|
|
357
|
+
return result.stdout.trim();
|
|
358
|
+
}
|
|
359
|
+
} catch {}
|
|
360
|
+
|
|
361
|
+
throw new Error('Local whisper transcription failed');
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
/**
|
|
365
|
+
* Transcribe using macOS native Speech Recognition
|
|
366
|
+
*/
|
|
367
|
+
async function transcribeMacOS(audioPath: string): Promise<string> {
|
|
368
|
+
const swiftCode = `
|
|
369
|
+
import Foundation
|
|
370
|
+
import Speech
|
|
371
|
+
|
|
372
|
+
let semaphore = DispatchSemaphore(value: 0)
|
|
373
|
+
var transcription = ""
|
|
374
|
+
var errorMsg = ""
|
|
375
|
+
|
|
376
|
+
SFSpeechRecognizer.requestAuthorization { status in
|
|
377
|
+
guard status == .authorized else {
|
|
378
|
+
errorMsg = "Speech recognition not authorized"
|
|
379
|
+
semaphore.signal()
|
|
380
|
+
return
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
guard let recognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US")),
|
|
384
|
+
recognizer.isAvailable else {
|
|
385
|
+
errorMsg = "Speech recognizer not available"
|
|
386
|
+
semaphore.signal()
|
|
387
|
+
return
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
let url = URL(fileURLWithPath: CommandLine.arguments[1])
|
|
391
|
+
let request = SFSpeechURLRecognitionRequest(url: url)
|
|
392
|
+
request.shouldReportPartialResults = false
|
|
393
|
+
|
|
394
|
+
recognizer.recognitionTask(with: request) { result, error in
|
|
395
|
+
if let error = error {
|
|
396
|
+
errorMsg = error.localizedDescription
|
|
397
|
+
semaphore.signal()
|
|
398
|
+
return
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
if let result = result, result.isFinal {
|
|
402
|
+
transcription = result.bestTranscription.formattedString
|
|
403
|
+
semaphore.signal()
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
_ = semaphore.wait(timeout: .now() + 30)
|
|
409
|
+
|
|
410
|
+
if !errorMsg.isEmpty {
|
|
411
|
+
fputs(errorMsg + "\\n", stderr)
|
|
412
|
+
exit(1)
|
|
413
|
+
}
|
|
414
|
+
print(transcription)
|
|
415
|
+
`;
|
|
416
|
+
|
|
417
|
+
const swiftFile = join(RECORDING_DIR, 'transcribe.swift');
|
|
418
|
+
await writeFile(swiftFile, swiftCode);
|
|
419
|
+
|
|
420
|
+
try {
|
|
421
|
+
const result = spawnSync('swift', [swiftFile, audioPath], {
|
|
422
|
+
encoding: 'utf-8',
|
|
423
|
+
timeout: 35000
|
|
424
|
+
});
|
|
425
|
+
|
|
426
|
+
if (result.status !== 0) {
|
|
427
|
+
throw new Error(result.stderr || 'macOS transcription failed');
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
return result.stdout.trim();
|
|
431
|
+
} finally {
|
|
432
|
+
await unlink(swiftFile).catch(() => {});
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
/**
|
|
437
|
+
* Get status message for dictation availability
|
|
438
|
+
*/
|
|
439
|
+
export function getDictationStatus(): string {
|
|
440
|
+
const tool = getRecordingTool();
|
|
441
|
+
const provider = getBestProvider();
|
|
442
|
+
|
|
443
|
+
const parts: string[] = [];
|
|
444
|
+
|
|
445
|
+
if (tool) {
|
|
446
|
+
parts.push(`Recording: ${tool}`);
|
|
447
|
+
} else {
|
|
448
|
+
parts.push('Recording: not available (install sox: brew install sox)');
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
if (provider) {
|
|
452
|
+
const providerNames: Record<DictationProvider, string> = {
|
|
453
|
+
openai: 'OpenAI Whisper',
|
|
454
|
+
local: 'Local Whisper',
|
|
455
|
+
macos: 'macOS Native'
|
|
456
|
+
};
|
|
457
|
+
parts.push(`Transcription: ${providerNames[provider]}`);
|
|
458
|
+
} else {
|
|
459
|
+
parts.push('Transcription: not available');
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
if (tool && provider) {
|
|
463
|
+
parts.push('\nPress Ctrl+T to start/stop recording');
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
return parts.join('\n');
|
|
467
|
+
}
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Native real-time dictation using ztc-audio Rust binary
|
|
3
|
+
* Provides:
|
|
4
|
+
* - Real-time audio level metering (VU meter)
|
|
5
|
+
* - Real-time transcription via whisper
|
|
6
|
+
* - Cross-platform support
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { spawn, ChildProcess } from 'child_process';
|
|
10
|
+
import { existsSync } from 'fs';
|
|
11
|
+
import { createInterface } from 'readline';
|
|
12
|
+
import { join, dirname } from 'path';
|
|
13
|
+
import { homedir, platform, arch } from 'os';
|
|
14
|
+
import { fileURLToPath } from 'url';
|
|
15
|
+
|
|
16
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
17
|
+
const __dirname = dirname(__filename);
|
|
18
|
+
|
|
19
|
+
// Try to import the @zergai/ztc-dictation package
|
|
20
|
+
let dictationPackage: { getBinaryPath: () => string | null; isAvailable: () => boolean } | null = null;
|
|
21
|
+
try {
|
|
22
|
+
// @ts-ignore - dynamic import of optional package
|
|
23
|
+
dictationPackage = await import('@zergai/ztc-dictation');
|
|
24
|
+
} catch {
|
|
25
|
+
// Package not installed, will fall back to local binary
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface DictationEvent {
|
|
29
|
+
type: 'ready' | 'level' | 'text' | 'error' | 'stopped';
|
|
30
|
+
device?: string;
|
|
31
|
+
model?: string;
|
|
32
|
+
db?: number;
|
|
33
|
+
rms?: number;
|
|
34
|
+
content?: string;
|
|
35
|
+
partial?: boolean;
|
|
36
|
+
message?: string;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export type DictationEventHandler = (event: DictationEvent) => void;
|
|
40
|
+
|
|
41
|
+
let activeProcess: ChildProcess | null = null;
|
|
42
|
+
let eventHandlers: DictationEventHandler[] = [];
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Get the path to the ztc-audio binary
|
|
46
|
+
*/
|
|
47
|
+
function getBinaryPath(): string | null {
|
|
48
|
+
// 1. Try the @zergai/ztc-dictation package first
|
|
49
|
+
if (dictationPackage) {
|
|
50
|
+
const pkgPath = dictationPackage.getBinaryPath();
|
|
51
|
+
if (pkgPath && existsSync(pkgPath)) {
|
|
52
|
+
return pkgPath;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// 2. Check local development/install paths
|
|
57
|
+
const possiblePaths = [
|
|
58
|
+
// Development (new location)
|
|
59
|
+
join(process.cwd(), 'packages', 'ztc-dictation', 'target', 'release', 'ztc-audio'),
|
|
60
|
+
// Development (old location)
|
|
61
|
+
join(process.cwd(), 'native', 'ztc-audio', 'target', 'release', 'ztc-audio'),
|
|
62
|
+
// Global install
|
|
63
|
+
join(homedir(), '.ztc', 'bin', 'ztc-audio'),
|
|
64
|
+
];
|
|
65
|
+
|
|
66
|
+
for (const p of possiblePaths) {
|
|
67
|
+
if (existsSync(p)) {
|
|
68
|
+
return p;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return null;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Check if native dictation is available
|
|
77
|
+
*/
|
|
78
|
+
export function isNativeDictationAvailable(): boolean {
|
|
79
|
+
return getBinaryPath() !== null;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Check if currently recording
|
|
84
|
+
*/
|
|
85
|
+
export function isNativeRecording(): boolean {
|
|
86
|
+
return activeProcess !== null;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Subscribe to dictation events
|
|
91
|
+
*/
|
|
92
|
+
export function onDictationEvent(handler: DictationEventHandler): () => void {
|
|
93
|
+
eventHandlers.push(handler);
|
|
94
|
+
return () => {
|
|
95
|
+
eventHandlers = eventHandlers.filter(h => h !== handler);
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function emit(event: DictationEvent) {
|
|
100
|
+
for (const handler of eventHandlers) {
|
|
101
|
+
try {
|
|
102
|
+
handler(event);
|
|
103
|
+
} catch (e) {
|
|
104
|
+
// Ignore handler errors
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Start native recording with real-time transcription
|
|
111
|
+
*/
|
|
112
|
+
export function startNativeRecording(options: {
|
|
113
|
+
model?: string;
|
|
114
|
+
device?: string;
|
|
115
|
+
} = {}): boolean {
|
|
116
|
+
if (activeProcess) {
|
|
117
|
+
return false; // Already recording
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const binaryPath = getBinaryPath();
|
|
121
|
+
if (!binaryPath) {
|
|
122
|
+
emit({
|
|
123
|
+
type: 'error',
|
|
124
|
+
message: 'ztc-audio binary not found. Run: cargo build --release in native/ztc-audio'
|
|
125
|
+
});
|
|
126
|
+
return false;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const args: string[] = [];
|
|
130
|
+
if (options.model) {
|
|
131
|
+
args.push('--model', options.model);
|
|
132
|
+
} else {
|
|
133
|
+
args.push('--model', 'tiny'); // Default to tiny for speed
|
|
134
|
+
}
|
|
135
|
+
if (options.device) {
|
|
136
|
+
args.push('--device', options.device);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
try {
|
|
140
|
+
activeProcess = spawn(binaryPath, args, {
|
|
141
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
// Parse JSON lines from stdout
|
|
145
|
+
const rl = createInterface({
|
|
146
|
+
input: activeProcess.stdout!,
|
|
147
|
+
crlfDelay: Infinity,
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
rl.on('line', (line) => {
|
|
151
|
+
try {
|
|
152
|
+
const msg = JSON.parse(line);
|
|
153
|
+
emit(msg as DictationEvent);
|
|
154
|
+
} catch (e) {
|
|
155
|
+
// Ignore parse errors
|
|
156
|
+
}
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
// Capture stderr for debugging
|
|
160
|
+
activeProcess.stderr?.on('data', (data) => {
|
|
161
|
+
const text = data.toString().trim();
|
|
162
|
+
if (text && !text.includes('Downloading')) {
|
|
163
|
+
// Don't emit download progress as errors
|
|
164
|
+
// Could log this for debugging
|
|
165
|
+
}
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
activeProcess.on('close', (code) => {
|
|
169
|
+
activeProcess = null;
|
|
170
|
+
emit({ type: 'stopped' });
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
activeProcess.on('error', (err) => {
|
|
174
|
+
activeProcess = null;
|
|
175
|
+
emit({ type: 'error', message: err.message });
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
return true;
|
|
179
|
+
} catch (err) {
|
|
180
|
+
emit({
|
|
181
|
+
type: 'error',
|
|
182
|
+
message: `Failed to start ztc-audio: ${err instanceof Error ? err.message : 'Unknown error'}`
|
|
183
|
+
});
|
|
184
|
+
return false;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Stop native recording
|
|
190
|
+
* Returns the final transcription
|
|
191
|
+
*/
|
|
192
|
+
export async function stopNativeRecording(): Promise<string> {
|
|
193
|
+
if (!activeProcess) {
|
|
194
|
+
return '';
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
return new Promise((resolve) => {
|
|
198
|
+
let finalText = '';
|
|
199
|
+
|
|
200
|
+
// Listen for the final transcription
|
|
201
|
+
const cleanup = onDictationEvent((event) => {
|
|
202
|
+
if (event.type === 'text' && event.partial === false) {
|
|
203
|
+
finalText = event.content || '';
|
|
204
|
+
}
|
|
205
|
+
if (event.type === 'stopped') {
|
|
206
|
+
cleanup();
|
|
207
|
+
resolve(finalText);
|
|
208
|
+
}
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
// Send SIGINT to trigger graceful shutdown
|
|
212
|
+
activeProcess?.kill('SIGINT');
|
|
213
|
+
|
|
214
|
+
// Fallback timeout
|
|
215
|
+
setTimeout(() => {
|
|
216
|
+
if (activeProcess) {
|
|
217
|
+
activeProcess.kill('SIGKILL');
|
|
218
|
+
activeProcess = null;
|
|
219
|
+
}
|
|
220
|
+
cleanup();
|
|
221
|
+
resolve(finalText);
|
|
222
|
+
}, 3000);
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Cancel native recording without getting transcription
|
|
228
|
+
*/
|
|
229
|
+
export function cancelNativeRecording(): void {
|
|
230
|
+
if (activeProcess) {
|
|
231
|
+
activeProcess.kill('SIGKILL');
|
|
232
|
+
activeProcess = null;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Get recording status info
|
|
238
|
+
*/
|
|
239
|
+
export function getNativeDictationStatus(): string {
|
|
240
|
+
const binaryPath = getBinaryPath();
|
|
241
|
+
|
|
242
|
+
const parts: string[] = [];
|
|
243
|
+
|
|
244
|
+
if (binaryPath) {
|
|
245
|
+
parts.push(`Native dictation: available`);
|
|
246
|
+
parts.push(`Binary: ${binaryPath}`);
|
|
247
|
+
parts.push(`Model: ~/.ztc/models/ggml-*.bin`);
|
|
248
|
+
} else {
|
|
249
|
+
parts.push('Native dictation: not available');
|
|
250
|
+
parts.push('Build with: cd native/ztc-audio && cargo build --release');
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
if (isNativeRecording()) {
|
|
254
|
+
parts.push('\nCurrently recording...');
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
return parts.join('\n');
|
|
258
|
+
}
|