@leverageaiapps/leverageai-agent 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +175 -0
- package/dist/capture.d.ts +3 -0
- package/dist/capture.d.ts.map +1 -0
- package/dist/capture.js +134 -0
- package/dist/capture.js.map +1 -0
- package/dist/cloudflare-tunnel.d.ts +9 -0
- package/dist/cloudflare-tunnel.d.ts.map +1 -0
- package/dist/cloudflare-tunnel.js +218 -0
- package/dist/cloudflare-tunnel.js.map +1 -0
- package/dist/config.d.ts +7 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +84 -0
- package/dist/config.js.map +1 -0
- package/dist/context-extractor.d.ts +17 -0
- package/dist/context-extractor.d.ts.map +1 -0
- package/dist/context-extractor.js +118 -0
- package/dist/context-extractor.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +85 -0
- package/dist/index.js.map +1 -0
- package/dist/pty.d.ts +20 -0
- package/dist/pty.d.ts.map +1 -0
- package/dist/pty.js +148 -0
- package/dist/pty.js.map +1 -0
- package/dist/relay.d.ts +5 -0
- package/dist/relay.d.ts.map +1 -0
- package/dist/relay.js +131 -0
- package/dist/relay.js.map +1 -0
- package/dist/session.d.ts +5 -0
- package/dist/session.d.ts.map +1 -0
- package/dist/session.js +250 -0
- package/dist/session.js.map +1 -0
- package/dist/voice-recognition-modelscope.d.ts +50 -0
- package/dist/voice-recognition-modelscope.d.ts.map +1 -0
- package/dist/voice-recognition-modelscope.js +171 -0
- package/dist/voice-recognition-modelscope.js.map +1 -0
- package/dist/web-server.d.ts +6 -0
- package/dist/web-server.d.ts.map +1 -0
- package/dist/web-server.js +1981 -0
- package/dist/web-server.js.map +1 -0
- package/package.json +66 -0
- package/public/index.html +639 -0
- package/public/js/terminal-asr.js +435 -0
- package/public/js/terminal.js +514 -0
- package/public/js/voice-input.js +422 -0
- package/scripts/postinstall.js +66 -0
- package/scripts/verify-install.js +124 -0
|
@@ -0,0 +1,435 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Terminal ASR - Uses terminal WebSocket for ASR
|
|
3
|
+
* Communicates with backend which proxies to voice.futuretech.social gateway
|
|
4
|
+
* NO API KEY REQUIRED - the server handles the gateway connection
|
|
5
|
+
*/
|
|
6
|
+
class TerminalASR {
|
|
7
|
+
constructor() {
|
|
8
|
+
this.language = 'zh';
|
|
9
|
+
this.isRecording = false;
|
|
10
|
+
this.audioContext = null;
|
|
11
|
+
this.processor = null;
|
|
12
|
+
this.source = null;
|
|
13
|
+
this.stream = null;
|
|
14
|
+
this.terminalContext = '';
|
|
15
|
+
this.maxContextLength = 2000;
|
|
16
|
+
|
|
17
|
+
// Callbacks
|
|
18
|
+
this.onPartialResult = null;
|
|
19
|
+
this.onFinalResult = null;
|
|
20
|
+
this.onError = null;
|
|
21
|
+
this.onReady = null; // Called when ASR session is ready to receive audio
|
|
22
|
+
this.onCorrectionResult = null; // Called when Claude correction is received
|
|
23
|
+
|
|
24
|
+
// ASR session state
|
|
25
|
+
this.asrSessionActive = false;
|
|
26
|
+
this.sessionReady = false; // True when ASR backend is ready to receive audio
|
|
27
|
+
this.audioBuffer = [];
|
|
28
|
+
this.pendingAudioBuffer = []; // Buffer audio before ASR is ready
|
|
29
|
+
|
|
30
|
+
// Setup message handler
|
|
31
|
+
window.handleASRResponse = (data) => {
|
|
32
|
+
this.handleASRResponse(data);
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Check if configured (always true - no API key needed)
|
|
38
|
+
*/
|
|
39
|
+
isConfigured() {
|
|
40
|
+
return true;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Update context from terminal output
|
|
45
|
+
*/
|
|
46
|
+
updateContext(terminalLines) {
|
|
47
|
+
const recentLines = terminalLines.slice(-50).join('\n');
|
|
48
|
+
if (recentLines.length > this.maxContextLength) {
|
|
49
|
+
this.terminalContext = recentLines.slice(-this.maxContextLength);
|
|
50
|
+
} else {
|
|
51
|
+
this.terminalContext = recentLines;
|
|
52
|
+
}
|
|
53
|
+
console.log('[Terminal ASR] Context updated, length:', this.terminalContext.length);
|
|
54
|
+
return this.terminalContext;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Set maximum context length
|
|
59
|
+
*/
|
|
60
|
+
setMaxContextLength(length) {
|
|
61
|
+
this.maxContextLength = Math.min(Math.max(100, length), 10000);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Start real-time recording and streaming
|
|
66
|
+
*/
|
|
67
|
+
async startRecording(onPartialResult, onFinalResult, onError, onReady) {
|
|
68
|
+
// Check if terminal WebSocket is connected
|
|
69
|
+
if (!window.terminalWs || window.terminalWs.readyState !== WebSocket.OPEN) {
|
|
70
|
+
const err = new Error('Terminal WebSocket not connected');
|
|
71
|
+
onError(err);
|
|
72
|
+
throw err;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
this.onPartialResult = onPartialResult;
|
|
76
|
+
this.onFinalResult = onFinalResult;
|
|
77
|
+
this.onError = onError;
|
|
78
|
+
this.onReady = onReady;
|
|
79
|
+
|
|
80
|
+
try {
|
|
81
|
+
// Get microphone access
|
|
82
|
+
this.stream = await navigator.mediaDevices.getUserMedia({
|
|
83
|
+
audio: {
|
|
84
|
+
channelCount: 1,
|
|
85
|
+
sampleRate: 16000,
|
|
86
|
+
sampleSize: 16,
|
|
87
|
+
echoCancellation: true,
|
|
88
|
+
noiseSuppression: true,
|
|
89
|
+
autoGainControl: true
|
|
90
|
+
}
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
// Create audio context
|
|
94
|
+
this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
|
|
95
|
+
sampleRate: 16000
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
this.source = this.audioContext.createMediaStreamSource(this.stream);
|
|
99
|
+
|
|
100
|
+
// Start ASR session via terminal WebSocket
|
|
101
|
+
// Server will connect to voice.futuretech.social gateway
|
|
102
|
+
const startMessage = {
|
|
103
|
+
type: 'asr_start',
|
|
104
|
+
language: this.language,
|
|
105
|
+
context: this.terminalContext
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
window.terminalWs.send(JSON.stringify(startMessage));
|
|
109
|
+
console.log('[Terminal ASR] Sent ASR start message');
|
|
110
|
+
|
|
111
|
+
this.isRecording = true;
|
|
112
|
+
this.asrSessionActive = true;
|
|
113
|
+
this.sessionReady = false; // Will be set to true when asr_ready is received
|
|
114
|
+
this.audioBuffer = [];
|
|
115
|
+
this.pendingAudioBuffer = []; // Clear pending buffer
|
|
116
|
+
|
|
117
|
+
// Start audio processing immediately (audio will be buffered until ASR is ready)
|
|
118
|
+
this.startAudioProcessing();
|
|
119
|
+
|
|
120
|
+
} catch (error) {
|
|
121
|
+
console.error('[Terminal ASR] Failed to start recording:', error);
|
|
122
|
+
onError(error);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Start processing and sending audio data
|
|
128
|
+
*/
|
|
129
|
+
startAudioProcessing() {
|
|
130
|
+
// Create ScriptProcessor for audio processing
|
|
131
|
+
const bufferSize = 4096;
|
|
132
|
+
this.processor = this.audioContext.createScriptProcessor(bufferSize, 1, 1);
|
|
133
|
+
|
|
134
|
+
this.processor.onaudioprocess = (e) => {
|
|
135
|
+
if (!this.isRecording || !window.terminalWs || window.terminalWs.readyState !== WebSocket.OPEN) {
|
|
136
|
+
return;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const inputData = e.inputBuffer.getChannelData(0);
|
|
140
|
+
|
|
141
|
+
// Convert float32 to int16 PCM
|
|
142
|
+
const pcmData = new Int16Array(inputData.length);
|
|
143
|
+
for (let i = 0; i < inputData.length; i++) {
|
|
144
|
+
const s = Math.max(-1, Math.min(1, inputData[i]));
|
|
145
|
+
pcmData[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Add to buffer (will be sent or cached based on sessionReady state)
|
|
149
|
+
this.audioBuffer.push(pcmData);
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
// Send audio data periodically
|
|
153
|
+
this.sendInterval = setInterval(() => {
|
|
154
|
+
if (this.audioBuffer.length > 0 && this.asrSessionActive) {
|
|
155
|
+
// Combine all buffered audio
|
|
156
|
+
const totalLength = this.audioBuffer.reduce((acc, arr) => acc + arr.length, 0);
|
|
157
|
+
const combinedBuffer = new Int16Array(totalLength);
|
|
158
|
+
let offset = 0;
|
|
159
|
+
for (const buffer of this.audioBuffer) {
|
|
160
|
+
combinedBuffer.set(buffer, offset);
|
|
161
|
+
offset += buffer.length;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Convert to base64
|
|
165
|
+
const base64Audio = this.arrayBufferToBase64(combinedBuffer.buffer);
|
|
166
|
+
|
|
167
|
+
if (this.sessionReady) {
|
|
168
|
+
// ASR is ready - send audio immediately
|
|
169
|
+
const audioMessage = {
|
|
170
|
+
type: 'asr_audio',
|
|
171
|
+
audio: base64Audio
|
|
172
|
+
};
|
|
173
|
+
window.terminalWs.send(JSON.stringify(audioMessage));
|
|
174
|
+
} else {
|
|
175
|
+
// ASR not ready yet - cache audio for later
|
|
176
|
+
this.pendingAudioBuffer.push(base64Audio);
|
|
177
|
+
console.log('[Terminal ASR] Buffering audio (session not ready), buffer size:', this.pendingAudioBuffer.length);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Clear buffer
|
|
181
|
+
this.audioBuffer = [];
|
|
182
|
+
}
|
|
183
|
+
}, 100);
|
|
184
|
+
|
|
185
|
+
// Connect audio nodes
|
|
186
|
+
this.source.connect(this.processor);
|
|
187
|
+
this.processor.connect(this.audioContext.destination);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Send all pending buffered audio when ASR becomes ready
|
|
192
|
+
*/
|
|
193
|
+
sendPendingAudio() {
|
|
194
|
+
if (this.pendingAudioBuffer.length > 0) {
|
|
195
|
+
console.log('[Terminal ASR] Sending', this.pendingAudioBuffer.length, 'buffered audio chunks');
|
|
196
|
+
|
|
197
|
+
// Send all buffered audio
|
|
198
|
+
for (const base64Audio of this.pendingAudioBuffer) {
|
|
199
|
+
const audioMessage = {
|
|
200
|
+
type: 'asr_audio',
|
|
201
|
+
audio: base64Audio
|
|
202
|
+
};
|
|
203
|
+
window.terminalWs.send(JSON.stringify(audioMessage));
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Clear pending buffer
|
|
207
|
+
this.pendingAudioBuffer = [];
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Handle ASR response from server
|
|
213
|
+
*/
|
|
214
|
+
handleASRResponse(data) {
|
|
215
|
+
console.log('[Terminal ASR] Received ASR response:', data);
|
|
216
|
+
|
|
217
|
+
if (data.error) {
|
|
218
|
+
// Handle error object or string
|
|
219
|
+
const errorMessage = typeof data.error === 'string' ?
|
|
220
|
+
data.error :
|
|
221
|
+
(data.error.message || JSON.stringify(data.error));
|
|
222
|
+
|
|
223
|
+
// Don't report errors for stopping recording
|
|
224
|
+
if (errorMessage.includes('no invalid audio stream') ||
|
|
225
|
+
errorMessage.includes('committing input audio buffer')) {
|
|
226
|
+
console.log('[Terminal ASR] Ignoring stop recording error');
|
|
227
|
+
return;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
console.error('[Terminal ASR] ASR error:', errorMessage);
|
|
231
|
+
if (this.onError) {
|
|
232
|
+
this.onError(new Error(errorMessage));
|
|
233
|
+
}
|
|
234
|
+
return;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Handle different response types
|
|
238
|
+
if (data.type === 'asr_ready') {
|
|
239
|
+
console.log('[Terminal ASR] ASR ready to receive audio');
|
|
240
|
+
this.sessionReady = true;
|
|
241
|
+
// Send any audio that was buffered while waiting for ASR to be ready
|
|
242
|
+
this.sendPendingAudio();
|
|
243
|
+
if (this.onReady) {
|
|
244
|
+
this.onReady();
|
|
245
|
+
}
|
|
246
|
+
} else if (data.type === 'session.created') {
|
|
247
|
+
console.log('[Terminal ASR] Session created');
|
|
248
|
+
} else if (data.type === 'session.updated') {
|
|
249
|
+
console.log('[Terminal ASR] Session updated');
|
|
250
|
+
} else if (data.type === 'partial') {
|
|
251
|
+
// Partial transcription from gateway
|
|
252
|
+
const text = data.text || data.transcript;
|
|
253
|
+
if (text) {
|
|
254
|
+
console.log('[Terminal ASR] Partial result:', text);
|
|
255
|
+
if (this.onPartialResult) {
|
|
256
|
+
this.onPartialResult(text);
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
} else if (data.type === 'conversation.item.input_audio_transcription.completed') {
|
|
260
|
+
// Final transcription - from both DashScope format and gateway
|
|
261
|
+
const text = data.transcript || data.text;
|
|
262
|
+
if (text) {
|
|
263
|
+
console.log('[Terminal ASR] Transcription completed:', text);
|
|
264
|
+
if (this.onFinalResult) {
|
|
265
|
+
this.onFinalResult(text);
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
} else if (data.type === 'conversation.item.input_audio_transcription.in_progress') {
|
|
269
|
+
// Partial transcription
|
|
270
|
+
const text = data.transcript;
|
|
271
|
+
if (text) {
|
|
272
|
+
console.log('[Terminal ASR] Transcription in progress:', text);
|
|
273
|
+
if (this.onPartialResult) {
|
|
274
|
+
this.onPartialResult(text);
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
} else if (data.type === 'correction_result') {
|
|
278
|
+
// Claude correction result from gateway
|
|
279
|
+
console.log('[Terminal ASR] Claude correction:', data.original, '->', data.corrected);
|
|
280
|
+
// Store the correction for use
|
|
281
|
+
this.lastCorrection = {
|
|
282
|
+
original: data.original,
|
|
283
|
+
corrected: data.corrected
|
|
284
|
+
};
|
|
285
|
+
// Notify via callback if set
|
|
286
|
+
if (this.onCorrectionResult) {
|
|
287
|
+
this.onCorrectionResult(data.original, data.corrected);
|
|
288
|
+
}
|
|
289
|
+
} else if (data.transcript || data.text) {
|
|
290
|
+
// This is a transcription result (fallback handling)
|
|
291
|
+
const text = data.transcript || data.text;
|
|
292
|
+
|
|
293
|
+
if (data.is_final || data.sentence_end) {
|
|
294
|
+
// Final result
|
|
295
|
+
console.log('[Terminal ASR] Final:', text);
|
|
296
|
+
if (this.onFinalResult) {
|
|
297
|
+
this.onFinalResult(text);
|
|
298
|
+
}
|
|
299
|
+
} else {
|
|
300
|
+
// Partial result
|
|
301
|
+
console.log('[Terminal ASR] Partial:', text);
|
|
302
|
+
if (this.onPartialResult) {
|
|
303
|
+
this.onPartialResult(text);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
/**
|
|
310
|
+
* Stop recording
|
|
311
|
+
*/
|
|
312
|
+
async stopRecording() {
|
|
313
|
+
this.isRecording = false;
|
|
314
|
+
this.asrSessionActive = false;
|
|
315
|
+
this.sessionReady = false;
|
|
316
|
+
this.pendingAudioBuffer = []; // Clear any pending audio
|
|
317
|
+
|
|
318
|
+
// Clear intervals
|
|
319
|
+
if (this.sendInterval) {
|
|
320
|
+
clearInterval(this.sendInterval);
|
|
321
|
+
this.sendInterval = null;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
// Send any remaining audio
|
|
325
|
+
if (this.audioBuffer.length > 0 && window.terminalWs && window.terminalWs.readyState === WebSocket.OPEN) {
|
|
326
|
+
// Combine all buffered audio
|
|
327
|
+
const totalLength = this.audioBuffer.reduce((acc, arr) => acc + arr.length, 0);
|
|
328
|
+
const combinedBuffer = new Int16Array(totalLength);
|
|
329
|
+
let offset = 0;
|
|
330
|
+
for (const buffer of this.audioBuffer) {
|
|
331
|
+
combinedBuffer.set(buffer, offset);
|
|
332
|
+
offset += buffer.length;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// Convert to base64
|
|
336
|
+
const base64Audio = this.arrayBufferToBase64(combinedBuffer.buffer);
|
|
337
|
+
|
|
338
|
+
// Send final audio data
|
|
339
|
+
const audioMessage = {
|
|
340
|
+
type: 'asr_audio',
|
|
341
|
+
audio: base64Audio
|
|
342
|
+
};
|
|
343
|
+
window.terminalWs.send(JSON.stringify(audioMessage));
|
|
344
|
+
this.audioBuffer = [];
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// Stop ASR session
|
|
348
|
+
if (window.terminalWs && window.terminalWs.readyState === WebSocket.OPEN) {
|
|
349
|
+
const stopMessage = {
|
|
350
|
+
type: 'asr_stop'
|
|
351
|
+
};
|
|
352
|
+
window.terminalWs.send(JSON.stringify(stopMessage));
|
|
353
|
+
console.log('[Terminal ASR] Sent ASR stop message');
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// Clean up audio resources
|
|
357
|
+
if (this.processor) {
|
|
358
|
+
this.processor.disconnect();
|
|
359
|
+
this.processor = null;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
if (this.source) {
|
|
363
|
+
this.source.disconnect();
|
|
364
|
+
this.source = null;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
if (this.audioContext) {
|
|
368
|
+
this.audioContext.close();
|
|
369
|
+
this.audioContext = null;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
if (this.stream) {
|
|
373
|
+
this.stream.getTracks().forEach(track => track.stop());
|
|
374
|
+
this.stream = null;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
console.log('[Terminal ASR] Recording stopped');
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
/**
|
|
381
|
+
* Convert ArrayBuffer to Base64
|
|
382
|
+
*/
|
|
383
|
+
arrayBufferToBase64(buffer) {
|
|
384
|
+
let binary = '';
|
|
385
|
+
const bytes = new Uint8Array(buffer);
|
|
386
|
+
const len = bytes.byteLength;
|
|
387
|
+
for (let i = 0; i < len; i++) {
|
|
388
|
+
binary += String.fromCharCode(bytes[i]);
|
|
389
|
+
}
|
|
390
|
+
return btoa(binary);
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
/**
|
|
394
|
+
* Request Claude correction for transcribed text
|
|
395
|
+
* Uses terminal WebSocket to send request to server, which forwards to gateway
|
|
396
|
+
*/
|
|
397
|
+
requestCorrection(text, callback) {
|
|
398
|
+
if (!text || !text.trim()) {
|
|
399
|
+
console.log('[Terminal ASR] No text to correct');
|
|
400
|
+
if (callback) {
|
|
401
|
+
callback(text, text);
|
|
402
|
+
}
|
|
403
|
+
return;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// Check if terminal WebSocket is connected
|
|
407
|
+
if (!window.terminalWs || window.terminalWs.readyState !== WebSocket.OPEN) {
|
|
408
|
+
console.error('[Terminal ASR] WebSocket not connected for correction');
|
|
409
|
+
if (callback) {
|
|
410
|
+
callback(text, text);
|
|
411
|
+
}
|
|
412
|
+
return;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
// Set callback for correction result
|
|
416
|
+
this.onCorrectionResult = (original, corrected) => {
|
|
417
|
+
if (callback) {
|
|
418
|
+
callback(original, corrected);
|
|
419
|
+
}
|
|
420
|
+
};
|
|
421
|
+
|
|
422
|
+
// Send claude_process request via terminal WebSocket
|
|
423
|
+
const correctionRequest = {
|
|
424
|
+
type: 'claude_process',
|
|
425
|
+
transcript: text,
|
|
426
|
+
context: this.terminalContext
|
|
427
|
+
};
|
|
428
|
+
|
|
429
|
+
window.terminalWs.send(JSON.stringify(correctionRequest));
|
|
430
|
+
console.log('[Terminal ASR] Sent correction request:', text);
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
// Create global instance
|
|
435
|
+
window.terminalASR = new TerminalASR();
|