@leverageaiapps/leverageai-agent 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +175 -0
  3. package/dist/capture.d.ts +3 -0
  4. package/dist/capture.d.ts.map +1 -0
  5. package/dist/capture.js +134 -0
  6. package/dist/capture.js.map +1 -0
  7. package/dist/cloudflare-tunnel.d.ts +9 -0
  8. package/dist/cloudflare-tunnel.d.ts.map +1 -0
  9. package/dist/cloudflare-tunnel.js +218 -0
  10. package/dist/cloudflare-tunnel.js.map +1 -0
  11. package/dist/config.d.ts +7 -0
  12. package/dist/config.d.ts.map +1 -0
  13. package/dist/config.js +84 -0
  14. package/dist/config.js.map +1 -0
  15. package/dist/context-extractor.d.ts +17 -0
  16. package/dist/context-extractor.d.ts.map +1 -0
  17. package/dist/context-extractor.js +118 -0
  18. package/dist/context-extractor.js.map +1 -0
  19. package/dist/index.d.ts +3 -0
  20. package/dist/index.d.ts.map +1 -0
  21. package/dist/index.js +85 -0
  22. package/dist/index.js.map +1 -0
  23. package/dist/pty.d.ts +20 -0
  24. package/dist/pty.d.ts.map +1 -0
  25. package/dist/pty.js +148 -0
  26. package/dist/pty.js.map +1 -0
  27. package/dist/relay.d.ts +5 -0
  28. package/dist/relay.d.ts.map +1 -0
  29. package/dist/relay.js +131 -0
  30. package/dist/relay.js.map +1 -0
  31. package/dist/session.d.ts +5 -0
  32. package/dist/session.d.ts.map +1 -0
  33. package/dist/session.js +250 -0
  34. package/dist/session.js.map +1 -0
  35. package/dist/voice-recognition-modelscope.d.ts +50 -0
  36. package/dist/voice-recognition-modelscope.d.ts.map +1 -0
  37. package/dist/voice-recognition-modelscope.js +171 -0
  38. package/dist/voice-recognition-modelscope.js.map +1 -0
  39. package/dist/web-server.d.ts +6 -0
  40. package/dist/web-server.d.ts.map +1 -0
  41. package/dist/web-server.js +1981 -0
  42. package/dist/web-server.js.map +1 -0
  43. package/package.json +66 -0
  44. package/public/index.html +639 -0
  45. package/public/js/terminal-asr.js +435 -0
  46. package/public/js/terminal.js +514 -0
  47. package/public/js/voice-input.js +422 -0
  48. package/scripts/postinstall.js +66 -0
  49. package/scripts/verify-install.js +124 -0
@@ -0,0 +1,435 @@
1
+ /**
2
+ * Terminal ASR - Uses terminal WebSocket for ASR
3
+ * Communicates with backend which proxies to voice.futuretech.social gateway
4
+ * NO API KEY REQUIRED - the server handles the gateway connection
5
+ */
6
+ class TerminalASR {
7
+ constructor() {
8
+ this.language = 'zh';
9
+ this.isRecording = false;
10
+ this.audioContext = null;
11
+ this.processor = null;
12
+ this.source = null;
13
+ this.stream = null;
14
+ this.terminalContext = '';
15
+ this.maxContextLength = 2000;
16
+
17
+ // Callbacks
18
+ this.onPartialResult = null;
19
+ this.onFinalResult = null;
20
+ this.onError = null;
21
+ this.onReady = null; // Called when ASR session is ready to receive audio
22
+ this.onCorrectionResult = null; // Called when Claude correction is received
23
+
24
+ // ASR session state
25
+ this.asrSessionActive = false;
26
+ this.sessionReady = false; // True when ASR backend is ready to receive audio
27
+ this.audioBuffer = [];
28
+ this.pendingAudioBuffer = []; // Buffer audio before ASR is ready
29
+
30
+ // Setup message handler
31
+ window.handleASRResponse = (data) => {
32
+ this.handleASRResponse(data);
33
+ };
34
+ }
35
+
36
+ /**
37
+ * Check if configured (always true - no API key needed)
38
+ */
39
+ isConfigured() {
40
+ return true;
41
+ }
42
+
43
+ /**
44
+ * Update context from terminal output
45
+ */
46
+ updateContext(terminalLines) {
47
+ const recentLines = terminalLines.slice(-50).join('\n');
48
+ if (recentLines.length > this.maxContextLength) {
49
+ this.terminalContext = recentLines.slice(-this.maxContextLength);
50
+ } else {
51
+ this.terminalContext = recentLines;
52
+ }
53
+ console.log('[Terminal ASR] Context updated, length:', this.terminalContext.length);
54
+ return this.terminalContext;
55
+ }
56
+
57
+ /**
58
+ * Set maximum context length
59
+ */
60
+ setMaxContextLength(length) {
61
+ this.maxContextLength = Math.min(Math.max(100, length), 10000);
62
+ }
63
+
64
+ /**
65
+ * Start real-time recording and streaming
66
+ */
67
+ async startRecording(onPartialResult, onFinalResult, onError, onReady) {
68
+ // Check if terminal WebSocket is connected
69
+ if (!window.terminalWs || window.terminalWs.readyState !== WebSocket.OPEN) {
70
+ const err = new Error('Terminal WebSocket not connected');
71
+ onError(err);
72
+ throw err;
73
+ }
74
+
75
+ this.onPartialResult = onPartialResult;
76
+ this.onFinalResult = onFinalResult;
77
+ this.onError = onError;
78
+ this.onReady = onReady;
79
+
80
+ try {
81
+ // Get microphone access
82
+ this.stream = await navigator.mediaDevices.getUserMedia({
83
+ audio: {
84
+ channelCount: 1,
85
+ sampleRate: 16000,
86
+ sampleSize: 16,
87
+ echoCancellation: true,
88
+ noiseSuppression: true,
89
+ autoGainControl: true
90
+ }
91
+ });
92
+
93
+ // Create audio context
94
+ this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
95
+ sampleRate: 16000
96
+ });
97
+
98
+ this.source = this.audioContext.createMediaStreamSource(this.stream);
99
+
100
+ // Start ASR session via terminal WebSocket
101
+ // Server will connect to voice.futuretech.social gateway
102
+ const startMessage = {
103
+ type: 'asr_start',
104
+ language: this.language,
105
+ context: this.terminalContext
106
+ };
107
+
108
+ window.terminalWs.send(JSON.stringify(startMessage));
109
+ console.log('[Terminal ASR] Sent ASR start message');
110
+
111
+ this.isRecording = true;
112
+ this.asrSessionActive = true;
113
+ this.sessionReady = false; // Will be set to true when asr_ready is received
114
+ this.audioBuffer = [];
115
+ this.pendingAudioBuffer = []; // Clear pending buffer
116
+
117
+ // Start audio processing immediately (audio will be buffered until ASR is ready)
118
+ this.startAudioProcessing();
119
+
120
+ } catch (error) {
121
+ console.error('[Terminal ASR] Failed to start recording:', error);
122
+ onError(error);
123
+ }
124
+ }
125
+
126
+ /**
127
+ * Start processing and sending audio data
128
+ */
129
+ startAudioProcessing() {
130
+ // Create ScriptProcessor for audio processing
131
+ const bufferSize = 4096;
132
+ this.processor = this.audioContext.createScriptProcessor(bufferSize, 1, 1);
133
+
134
+ this.processor.onaudioprocess = (e) => {
135
+ if (!this.isRecording || !window.terminalWs || window.terminalWs.readyState !== WebSocket.OPEN) {
136
+ return;
137
+ }
138
+
139
+ const inputData = e.inputBuffer.getChannelData(0);
140
+
141
+ // Convert float32 to int16 PCM
142
+ const pcmData = new Int16Array(inputData.length);
143
+ for (let i = 0; i < inputData.length; i++) {
144
+ const s = Math.max(-1, Math.min(1, inputData[i]));
145
+ pcmData[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
146
+ }
147
+
148
+ // Add to buffer (will be sent or cached based on sessionReady state)
149
+ this.audioBuffer.push(pcmData);
150
+ };
151
+
152
+ // Send audio data periodically
153
+ this.sendInterval = setInterval(() => {
154
+ if (this.audioBuffer.length > 0 && this.asrSessionActive) {
155
+ // Combine all buffered audio
156
+ const totalLength = this.audioBuffer.reduce((acc, arr) => acc + arr.length, 0);
157
+ const combinedBuffer = new Int16Array(totalLength);
158
+ let offset = 0;
159
+ for (const buffer of this.audioBuffer) {
160
+ combinedBuffer.set(buffer, offset);
161
+ offset += buffer.length;
162
+ }
163
+
164
+ // Convert to base64
165
+ const base64Audio = this.arrayBufferToBase64(combinedBuffer.buffer);
166
+
167
+ if (this.sessionReady) {
168
+ // ASR is ready - send audio immediately
169
+ const audioMessage = {
170
+ type: 'asr_audio',
171
+ audio: base64Audio
172
+ };
173
+ window.terminalWs.send(JSON.stringify(audioMessage));
174
+ } else {
175
+ // ASR not ready yet - cache audio for later
176
+ this.pendingAudioBuffer.push(base64Audio);
177
+ console.log('[Terminal ASR] Buffering audio (session not ready), buffer size:', this.pendingAudioBuffer.length);
178
+ }
179
+
180
+ // Clear buffer
181
+ this.audioBuffer = [];
182
+ }
183
+ }, 100);
184
+
185
+ // Connect audio nodes
186
+ this.source.connect(this.processor);
187
+ this.processor.connect(this.audioContext.destination);
188
+ }
189
+
190
+ /**
191
+ * Send all pending buffered audio when ASR becomes ready
192
+ */
193
+ sendPendingAudio() {
194
+ if (this.pendingAudioBuffer.length > 0) {
195
+ console.log('[Terminal ASR] Sending', this.pendingAudioBuffer.length, 'buffered audio chunks');
196
+
197
+ // Send all buffered audio
198
+ for (const base64Audio of this.pendingAudioBuffer) {
199
+ const audioMessage = {
200
+ type: 'asr_audio',
201
+ audio: base64Audio
202
+ };
203
+ window.terminalWs.send(JSON.stringify(audioMessage));
204
+ }
205
+
206
+ // Clear pending buffer
207
+ this.pendingAudioBuffer = [];
208
+ }
209
+ }
210
+
211
+ /**
212
+ * Handle ASR response from server
213
+ */
214
+ handleASRResponse(data) {
215
+ console.log('[Terminal ASR] Received ASR response:', data);
216
+
217
+ if (data.error) {
218
+ // Handle error object or string
219
+ const errorMessage = typeof data.error === 'string' ?
220
+ data.error :
221
+ (data.error.message || JSON.stringify(data.error));
222
+
223
+ // Don't report errors for stopping recording
224
+ if (errorMessage.includes('no invalid audio stream') ||
225
+ errorMessage.includes('committing input audio buffer')) {
226
+ console.log('[Terminal ASR] Ignoring stop recording error');
227
+ return;
228
+ }
229
+
230
+ console.error('[Terminal ASR] ASR error:', errorMessage);
231
+ if (this.onError) {
232
+ this.onError(new Error(errorMessage));
233
+ }
234
+ return;
235
+ }
236
+
237
+ // Handle different response types
238
+ if (data.type === 'asr_ready') {
239
+ console.log('[Terminal ASR] ASR ready to receive audio');
240
+ this.sessionReady = true;
241
+ // Send any audio that was buffered while waiting for ASR to be ready
242
+ this.sendPendingAudio();
243
+ if (this.onReady) {
244
+ this.onReady();
245
+ }
246
+ } else if (data.type === 'session.created') {
247
+ console.log('[Terminal ASR] Session created');
248
+ } else if (data.type === 'session.updated') {
249
+ console.log('[Terminal ASR] Session updated');
250
+ } else if (data.type === 'partial') {
251
+ // Partial transcription from gateway
252
+ const text = data.text || data.transcript;
253
+ if (text) {
254
+ console.log('[Terminal ASR] Partial result:', text);
255
+ if (this.onPartialResult) {
256
+ this.onPartialResult(text);
257
+ }
258
+ }
259
+ } else if (data.type === 'conversation.item.input_audio_transcription.completed') {
260
+ // Final transcription - from both DashScope format and gateway
261
+ const text = data.transcript || data.text;
262
+ if (text) {
263
+ console.log('[Terminal ASR] Transcription completed:', text);
264
+ if (this.onFinalResult) {
265
+ this.onFinalResult(text);
266
+ }
267
+ }
268
+ } else if (data.type === 'conversation.item.input_audio_transcription.in_progress') {
269
+ // Partial transcription
270
+ const text = data.transcript;
271
+ if (text) {
272
+ console.log('[Terminal ASR] Transcription in progress:', text);
273
+ if (this.onPartialResult) {
274
+ this.onPartialResult(text);
275
+ }
276
+ }
277
+ } else if (data.type === 'correction_result') {
278
+ // Claude correction result from gateway
279
+ console.log('[Terminal ASR] Claude correction:', data.original, '->', data.corrected);
280
+ // Store the correction for use
281
+ this.lastCorrection = {
282
+ original: data.original,
283
+ corrected: data.corrected
284
+ };
285
+ // Notify via callback if set
286
+ if (this.onCorrectionResult) {
287
+ this.onCorrectionResult(data.original, data.corrected);
288
+ }
289
+ } else if (data.transcript || data.text) {
290
+ // This is a transcription result (fallback handling)
291
+ const text = data.transcript || data.text;
292
+
293
+ if (data.is_final || data.sentence_end) {
294
+ // Final result
295
+ console.log('[Terminal ASR] Final:', text);
296
+ if (this.onFinalResult) {
297
+ this.onFinalResult(text);
298
+ }
299
+ } else {
300
+ // Partial result
301
+ console.log('[Terminal ASR] Partial:', text);
302
+ if (this.onPartialResult) {
303
+ this.onPartialResult(text);
304
+ }
305
+ }
306
+ }
307
+ }
308
+
309
+ /**
310
+ * Stop recording
311
+ */
312
+ async stopRecording() {
313
+ this.isRecording = false;
314
+ this.asrSessionActive = false;
315
+ this.sessionReady = false;
316
+ this.pendingAudioBuffer = []; // Clear any pending audio
317
+
318
+ // Clear intervals
319
+ if (this.sendInterval) {
320
+ clearInterval(this.sendInterval);
321
+ this.sendInterval = null;
322
+ }
323
+
324
+ // Send any remaining audio
325
+ if (this.audioBuffer.length > 0 && window.terminalWs && window.terminalWs.readyState === WebSocket.OPEN) {
326
+ // Combine all buffered audio
327
+ const totalLength = this.audioBuffer.reduce((acc, arr) => acc + arr.length, 0);
328
+ const combinedBuffer = new Int16Array(totalLength);
329
+ let offset = 0;
330
+ for (const buffer of this.audioBuffer) {
331
+ combinedBuffer.set(buffer, offset);
332
+ offset += buffer.length;
333
+ }
334
+
335
+ // Convert to base64
336
+ const base64Audio = this.arrayBufferToBase64(combinedBuffer.buffer);
337
+
338
+ // Send final audio data
339
+ const audioMessage = {
340
+ type: 'asr_audio',
341
+ audio: base64Audio
342
+ };
343
+ window.terminalWs.send(JSON.stringify(audioMessage));
344
+ this.audioBuffer = [];
345
+ }
346
+
347
+ // Stop ASR session
348
+ if (window.terminalWs && window.terminalWs.readyState === WebSocket.OPEN) {
349
+ const stopMessage = {
350
+ type: 'asr_stop'
351
+ };
352
+ window.terminalWs.send(JSON.stringify(stopMessage));
353
+ console.log('[Terminal ASR] Sent ASR stop message');
354
+ }
355
+
356
+ // Clean up audio resources
357
+ if (this.processor) {
358
+ this.processor.disconnect();
359
+ this.processor = null;
360
+ }
361
+
362
+ if (this.source) {
363
+ this.source.disconnect();
364
+ this.source = null;
365
+ }
366
+
367
+ if (this.audioContext) {
368
+ this.audioContext.close();
369
+ this.audioContext = null;
370
+ }
371
+
372
+ if (this.stream) {
373
+ this.stream.getTracks().forEach(track => track.stop());
374
+ this.stream = null;
375
+ }
376
+
377
+ console.log('[Terminal ASR] Recording stopped');
378
+ }
379
+
380
+ /**
381
+ * Convert ArrayBuffer to Base64
382
+ */
383
+ arrayBufferToBase64(buffer) {
384
+ let binary = '';
385
+ const bytes = new Uint8Array(buffer);
386
+ const len = bytes.byteLength;
387
+ for (let i = 0; i < len; i++) {
388
+ binary += String.fromCharCode(bytes[i]);
389
+ }
390
+ return btoa(binary);
391
+ }
392
+
393
+ /**
394
+ * Request Claude correction for transcribed text
395
+ * Uses terminal WebSocket to send request to server, which forwards to gateway
396
+ */
397
+ requestCorrection(text, callback) {
398
+ if (!text || !text.trim()) {
399
+ console.log('[Terminal ASR] No text to correct');
400
+ if (callback) {
401
+ callback(text, text);
402
+ }
403
+ return;
404
+ }
405
+
406
+ // Check if terminal WebSocket is connected
407
+ if (!window.terminalWs || window.terminalWs.readyState !== WebSocket.OPEN) {
408
+ console.error('[Terminal ASR] WebSocket not connected for correction');
409
+ if (callback) {
410
+ callback(text, text);
411
+ }
412
+ return;
413
+ }
414
+
415
+ // Set callback for correction result
416
+ this.onCorrectionResult = (original, corrected) => {
417
+ if (callback) {
418
+ callback(original, corrected);
419
+ }
420
+ };
421
+
422
+ // Send claude_process request via terminal WebSocket
423
+ const correctionRequest = {
424
+ type: 'claude_process',
425
+ transcript: text,
426
+ context: this.terminalContext
427
+ };
428
+
429
+ window.terminalWs.send(JSON.stringify(correctionRequest));
430
+ console.log('[Terminal ASR] Sent correction request:', text);
431
+ }
432
+ }
433
+
434
+ // Create global instance
435
+ window.terminalASR = new TerminalASR();