@leverageaiapps/theseus-server 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +165 -0
  3. package/dist/capture.d.ts +3 -0
  4. package/dist/capture.d.ts.map +1 -0
  5. package/dist/capture.js +134 -0
  6. package/dist/capture.js.map +1 -0
  7. package/dist/cloudflare-tunnel.d.ts +9 -0
  8. package/dist/cloudflare-tunnel.d.ts.map +1 -0
  9. package/dist/cloudflare-tunnel.js +218 -0
  10. package/dist/cloudflare-tunnel.js.map +1 -0
  11. package/dist/config.d.ts +7 -0
  12. package/dist/config.d.ts.map +1 -0
  13. package/dist/config.js +84 -0
  14. package/dist/config.js.map +1 -0
  15. package/dist/context-extractor.d.ts +17 -0
  16. package/dist/context-extractor.d.ts.map +1 -0
  17. package/dist/context-extractor.js +118 -0
  18. package/dist/context-extractor.js.map +1 -0
  19. package/dist/index.d.ts +3 -0
  20. package/dist/index.d.ts.map +1 -0
  21. package/dist/index.js +45 -0
  22. package/dist/index.js.map +1 -0
  23. package/dist/pty.d.ts +20 -0
  24. package/dist/pty.d.ts.map +1 -0
  25. package/dist/pty.js +148 -0
  26. package/dist/pty.js.map +1 -0
  27. package/dist/relay.d.ts +5 -0
  28. package/dist/relay.d.ts.map +1 -0
  29. package/dist/relay.js +131 -0
  30. package/dist/relay.js.map +1 -0
  31. package/dist/session.d.ts +5 -0
  32. package/dist/session.d.ts.map +1 -0
  33. package/dist/session.js +257 -0
  34. package/dist/session.js.map +1 -0
  35. package/dist/voice-recognition-modelscope.d.ts +50 -0
  36. package/dist/voice-recognition-modelscope.d.ts.map +1 -0
  37. package/dist/voice-recognition-modelscope.js +171 -0
  38. package/dist/voice-recognition-modelscope.js.map +1 -0
  39. package/dist/web-server.d.ts +6 -0
  40. package/dist/web-server.d.ts.map +1 -0
  41. package/dist/web-server.js +1971 -0
  42. package/dist/web-server.js.map +1 -0
  43. package/package.json +66 -0
  44. package/public/index.html +639 -0
  45. package/public/js/terminal-asr.js +508 -0
  46. package/public/js/terminal.js +514 -0
  47. package/public/js/voice-input.js +422 -0
  48. package/scripts/postinstall.js +66 -0
  49. package/scripts/verify-install.js +124 -0
@@ -0,0 +1,508 @@
1
+ /**
2
+ * Terminal ASR - Uses terminal WebSocket for ASR
3
+ * Communicates with backend which proxies to DashScope
4
+ */
5
+ class TerminalASR {
6
+ constructor() {
7
+ // Default API key for debugging
8
+ const DEFAULT_API_KEY = ''; // User must provide their own DashScope API key
9
+
10
+ // Get stored API key or use default
11
+ const storedKey = localStorage.getItem('dashscope_api_key') || DEFAULT_API_KEY;
12
+
13
+ // Validate and use API key
14
+ if (storedKey && (!storedKey.startsWith('sk-') || storedKey.includes('://'))) {
15
+ console.warn('[Terminal ASR] Invalid stored API key detected, using default...');
16
+ this.apiKey = DEFAULT_API_KEY;
17
+ localStorage.setItem('dashscope_api_key', DEFAULT_API_KEY);
18
+ } else {
19
+ this.apiKey = storedKey;
20
+ }
21
+
22
+ this.model = 'qwen3-asr-flash-realtime'; // Use the correct realtime model
23
+ this.language = 'zh';
24
+ this.isRecording = false;
25
+ this.audioContext = null;
26
+ this.processor = null;
27
+ this.source = null;
28
+ this.stream = null;
29
+ this.terminalContext = '';
30
+ this.maxContextLength = 2000;
31
+
32
+ // Callbacks
33
+ this.onPartialResult = null;
34
+ this.onFinalResult = null;
35
+ this.onError = null;
36
+ this.onReady = null; // Called when ASR session is ready to receive audio
37
+ this.onCorrectionResult = null; // Called when Claude correction is received
38
+
39
+ // ASR session state
40
+ this.asrSessionActive = false;
41
+ this.sessionReady = false; // True when ASR backend is ready to receive audio
42
+ this.audioBuffer = [];
43
+ this.pendingAudioBuffer = []; // Buffer audio before ASR is ready
44
+
45
+ // Setup message handler
46
+ window.handleASRResponse = (data) => {
47
+ this.handleASRResponse(data);
48
+ };
49
+ }
50
+
51
+ /**
52
+ * Set API key and save to localStorage
53
+ */
54
+ setApiKey(key) {
55
+ key = key.trim();
56
+ if (key && !key.startsWith('sk-')) {
57
+ console.warn('[Terminal ASR] Invalid API key format. Expected sk-xxx');
58
+ }
59
+ this.apiKey = key;
60
+ localStorage.setItem('dashscope_api_key', key);
61
+ return this.testConnection();
62
+ }
63
+
64
+ /**
65
+ * Get API key
66
+ */
67
+ getApiKey() {
68
+ return this.apiKey;
69
+ }
70
+
71
+ /**
72
+ * Check if API key is configured
73
+ */
74
+ isConfigured() {
75
+ return !!this.apiKey;
76
+ }
77
+
78
+ /**
79
+ * Test API connection
80
+ */
81
+ async testConnection() {
82
+ if (!this.apiKey) {
83
+ return { success: false, message: 'API key not configured' };
84
+ }
85
+
86
+ if (this.apiKey.startsWith('sk-') && this.apiKey.length > 10) {
87
+ return { success: true, message: 'API key configured' };
88
+ } else {
89
+ return { success: false, message: 'Invalid API key format. DashScope keys start with sk-' };
90
+ }
91
+ }
92
+
93
+ /**
94
+ * Update context from terminal output
95
+ */
96
+ updateContext(terminalLines) {
97
+ const recentLines = terminalLines.slice(-50).join('\n');
98
+ if (recentLines.length > this.maxContextLength) {
99
+ this.terminalContext = recentLines.slice(-this.maxContextLength);
100
+ } else {
101
+ this.terminalContext = recentLines;
102
+ }
103
+ console.log('[Terminal ASR] Context updated, length:', this.terminalContext.length);
104
+ return this.terminalContext;
105
+ }
106
+
107
+ /**
108
+ * Set maximum context length
109
+ */
110
+ setMaxContextLength(length) {
111
+ this.maxContextLength = Math.min(Math.max(100, length), 10000);
112
+ }
113
+
114
+ /**
115
+ * Start real-time recording and streaming
116
+ */
117
+ async startRecording(onPartialResult, onFinalResult, onError, onReady) {
118
+ if (!this.apiKey) {
119
+ const err = new Error('API key not configured');
120
+ onError(err);
121
+ throw err;
122
+ }
123
+
124
+ if (!this.apiKey.startsWith('sk-')) {
125
+ const err = new Error('Invalid API key. DashScope API keys must start with "sk-".');
126
+ onError(err);
127
+ throw err;
128
+ }
129
+
130
+ // Check if terminal WebSocket is connected
131
+ if (!window.terminalWs || window.terminalWs.readyState !== WebSocket.OPEN) {
132
+ const err = new Error('Terminal WebSocket not connected');
133
+ onError(err);
134
+ throw err;
135
+ }
136
+
137
+ this.onPartialResult = onPartialResult;
138
+ this.onFinalResult = onFinalResult;
139
+ this.onError = onError;
140
+ this.onReady = onReady;
141
+
142
+ try {
143
+ // Get microphone access
144
+ this.stream = await navigator.mediaDevices.getUserMedia({
145
+ audio: {
146
+ channelCount: 1,
147
+ sampleRate: 16000,
148
+ sampleSize: 16,
149
+ echoCancellation: true,
150
+ noiseSuppression: true,
151
+ autoGainControl: true
152
+ }
153
+ });
154
+
155
+ // Create audio context
156
+ this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
157
+ sampleRate: 16000
158
+ });
159
+
160
+ this.source = this.audioContext.createMediaStreamSource(this.stream);
161
+
162
+ // Start ASR session via terminal WebSocket
163
+ const startMessage = {
164
+ type: 'asr_start',
165
+ api_key: this.apiKey,
166
+ model: this.model,
167
+ language: this.language,
168
+ context: this.terminalContext
169
+ };
170
+
171
+ window.terminalWs.send(JSON.stringify(startMessage));
172
+ console.log('[Terminal ASR] Sent ASR start message');
173
+
174
+ this.isRecording = true;
175
+ this.asrSessionActive = true;
176
+ this.sessionReady = false; // Will be set to true when asr_ready is received
177
+ this.audioBuffer = [];
178
+ this.pendingAudioBuffer = []; // Clear pending buffer
179
+
180
+ // Start audio processing immediately (audio will be buffered until ASR is ready)
181
+ this.startAudioProcessing();
182
+
183
+ } catch (error) {
184
+ console.error('[Terminal ASR] Failed to start recording:', error);
185
+ onError(error);
186
+ }
187
+ }
188
+
189
+ /**
190
+ * Start processing and sending audio data
191
+ */
192
+ startAudioProcessing() {
193
+ // Create ScriptProcessor for audio processing
194
+ const bufferSize = 4096;
195
+ this.processor = this.audioContext.createScriptProcessor(bufferSize, 1, 1);
196
+
197
+ this.processor.onaudioprocess = (e) => {
198
+ if (!this.isRecording || !window.terminalWs || window.terminalWs.readyState !== WebSocket.OPEN) {
199
+ return;
200
+ }
201
+
202
+ const inputData = e.inputBuffer.getChannelData(0);
203
+
204
+ // Convert float32 to int16 PCM
205
+ const pcmData = new Int16Array(inputData.length);
206
+ for (let i = 0; i < inputData.length; i++) {
207
+ const s = Math.max(-1, Math.min(1, inputData[i]));
208
+ pcmData[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
209
+ }
210
+
211
+ // Add to buffer (will be sent or cached based on sessionReady state)
212
+ this.audioBuffer.push(pcmData);
213
+ };
214
+
215
+ // Send audio data periodically
216
+ this.sendInterval = setInterval(() => {
217
+ if (this.audioBuffer.length > 0 && this.asrSessionActive) {
218
+ // Combine all buffered audio
219
+ const totalLength = this.audioBuffer.reduce((acc, arr) => acc + arr.length, 0);
220
+ const combinedBuffer = new Int16Array(totalLength);
221
+ let offset = 0;
222
+ for (const buffer of this.audioBuffer) {
223
+ combinedBuffer.set(buffer, offset);
224
+ offset += buffer.length;
225
+ }
226
+
227
+ // Convert to base64
228
+ const base64Audio = this.arrayBufferToBase64(combinedBuffer.buffer);
229
+
230
+ if (this.sessionReady) {
231
+ // ASR is ready - send audio immediately
232
+ const audioMessage = {
233
+ type: 'asr_audio',
234
+ audio: base64Audio
235
+ };
236
+ window.terminalWs.send(JSON.stringify(audioMessage));
237
+ } else {
238
+ // ASR not ready yet - cache audio for later
239
+ this.pendingAudioBuffer.push(base64Audio);
240
+ console.log('[Terminal ASR] Buffering audio (session not ready), buffer size:', this.pendingAudioBuffer.length);
241
+ }
242
+
243
+ // Clear buffer
244
+ this.audioBuffer = [];
245
+ }
246
+ }, 100);
247
+
248
+ // Don't commit in VAD mode - server handles it automatically
249
+
250
+ // Connect audio nodes
251
+ this.source.connect(this.processor);
252
+ this.processor.connect(this.audioContext.destination);
253
+ }
254
+
255
+ /**
256
+ * Send all pending buffered audio when ASR becomes ready
257
+ */
258
+ sendPendingAudio() {
259
+ if (this.pendingAudioBuffer.length > 0) {
260
+ console.log('[Terminal ASR] Sending', this.pendingAudioBuffer.length, 'buffered audio chunks');
261
+
262
+ // Send all buffered audio
263
+ for (const base64Audio of this.pendingAudioBuffer) {
264
+ const audioMessage = {
265
+ type: 'asr_audio',
266
+ audio: base64Audio
267
+ };
268
+ window.terminalWs.send(JSON.stringify(audioMessage));
269
+ }
270
+
271
+ // Clear pending buffer
272
+ this.pendingAudioBuffer = [];
273
+ }
274
+ }
275
+
276
+ /**
277
+ * Handle ASR response from server
278
+ */
279
+ handleASRResponse(data) {
280
+ console.log('[Terminal ASR] Received ASR response:', data);
281
+
282
+ if (data.error) {
283
+ // Handle error object or string
284
+ const errorMessage = typeof data.error === 'string' ?
285
+ data.error :
286
+ (data.error.message || JSON.stringify(data.error));
287
+
288
+ // Don't report errors for stopping recording
289
+ if (errorMessage.includes('no invalid audio stream') ||
290
+ errorMessage.includes('committing input audio buffer')) {
291
+ console.log('[Terminal ASR] Ignoring stop recording error');
292
+ return;
293
+ }
294
+
295
+ console.error('[Terminal ASR] ASR error:', errorMessage);
296
+ if (this.onError) {
297
+ this.onError(new Error(errorMessage));
298
+ }
299
+ return;
300
+ }
301
+
302
+ // Handle different response types
303
+ if (data.type === 'asr_ready') {
304
+ console.log('[Terminal ASR] ASR ready to receive audio');
305
+ this.sessionReady = true;
306
+ // Send any audio that was buffered while waiting for ASR to be ready
307
+ this.sendPendingAudio();
308
+ if (this.onReady) {
309
+ this.onReady();
310
+ }
311
+ } else if (data.type === 'session.created') {
312
+ console.log('[Terminal ASR] Session created');
313
+ } else if (data.type === 'session.updated') {
314
+ console.log('[Terminal ASR] Session updated');
315
+ } else if (data.type === 'partial') {
316
+ // Partial transcription from gateway
317
+ const text = data.text || data.transcript;
318
+ if (text) {
319
+ console.log('[Terminal ASR] Partial result:', text);
320
+ if (this.onPartialResult) {
321
+ this.onPartialResult(text);
322
+ }
323
+ }
324
+ } else if (data.type === 'conversation.item.input_audio_transcription.completed') {
325
+ // Final transcription - from both DashScope format and gateway
326
+ const text = data.transcript || data.text;
327
+ if (text) {
328
+ console.log('[Terminal ASR] Transcription completed:', text);
329
+ if (this.onFinalResult) {
330
+ this.onFinalResult(text);
331
+ }
332
+ }
333
+ } else if (data.type === 'conversation.item.input_audio_transcription.in_progress') {
334
+ // Partial transcription
335
+ const text = data.transcript;
336
+ if (text) {
337
+ console.log('[Terminal ASR] Transcription in progress:', text);
338
+ if (this.onPartialResult) {
339
+ this.onPartialResult(text);
340
+ }
341
+ }
342
+ } else if (data.type === 'correction_result') {
343
+ // Claude correction result from gateway
344
+ console.log('[Terminal ASR] Claude correction:', data.original, '->', data.corrected);
345
+ // Store the correction for use
346
+ this.lastCorrection = {
347
+ original: data.original,
348
+ corrected: data.corrected
349
+ };
350
+ // Notify via callback if set
351
+ if (this.onCorrectionResult) {
352
+ this.onCorrectionResult(data.original, data.corrected);
353
+ }
354
+ } else if (data.transcript || data.text) {
355
+ // This is a transcription result (fallback handling)
356
+ const text = data.transcript || data.text;
357
+
358
+ if (data.is_final || data.sentence_end) {
359
+ // Final result
360
+ console.log('[Terminal ASR] Final:', text);
361
+ if (this.onFinalResult) {
362
+ this.onFinalResult(text);
363
+ }
364
+ } else {
365
+ // Partial result
366
+ console.log('[Terminal ASR] Partial:', text);
367
+ if (this.onPartialResult) {
368
+ this.onPartialResult(text);
369
+ }
370
+ }
371
+ }
372
+ }
373
+
374
+ /**
375
+ * Stop recording
376
+ */
377
+ async stopRecording() {
378
+ this.isRecording = false;
379
+ this.asrSessionActive = false;
380
+ this.sessionReady = false;
381
+ this.pendingAudioBuffer = []; // Clear any pending audio
382
+
383
+ // Clear intervals
384
+ if (this.sendInterval) {
385
+ clearInterval(this.sendInterval);
386
+ this.sendInterval = null;
387
+ }
388
+
389
+ // Send any remaining audio
390
+ if (this.audioBuffer.length > 0 && window.terminalWs && window.terminalWs.readyState === WebSocket.OPEN) {
391
+ // Combine all buffered audio
392
+ const totalLength = this.audioBuffer.reduce((acc, arr) => acc + arr.length, 0);
393
+ const combinedBuffer = new Int16Array(totalLength);
394
+ let offset = 0;
395
+ for (const buffer of this.audioBuffer) {
396
+ combinedBuffer.set(buffer, offset);
397
+ offset += buffer.length;
398
+ }
399
+
400
+ // Convert to base64
401
+ const base64Audio = this.arrayBufferToBase64(combinedBuffer.buffer);
402
+
403
+ // Send final audio data
404
+ const audioMessage = {
405
+ type: 'asr_audio',
406
+ audio: base64Audio
407
+ };
408
+ window.terminalWs.send(JSON.stringify(audioMessage));
409
+ this.audioBuffer = [];
410
+ }
411
+
412
+ // Stop ASR session
413
+ if (window.terminalWs && window.terminalWs.readyState === WebSocket.OPEN) {
414
+ const stopMessage = {
415
+ type: 'asr_stop'
416
+ };
417
+ window.terminalWs.send(JSON.stringify(stopMessage));
418
+ console.log('[Terminal ASR] Sent ASR stop message');
419
+ }
420
+
421
+ // Clean up audio resources
422
+ if (this.processor) {
423
+ this.processor.disconnect();
424
+ this.processor = null;
425
+ }
426
+
427
+ if (this.source) {
428
+ this.source.disconnect();
429
+ this.source = null;
430
+ }
431
+
432
+ if (this.audioContext) {
433
+ this.audioContext.close();
434
+ this.audioContext = null;
435
+ }
436
+
437
+ if (this.stream) {
438
+ this.stream.getTracks().forEach(track => track.stop());
439
+ this.stream = null;
440
+ }
441
+
442
+ console.log('[Terminal ASR] Recording stopped');
443
+ }
444
+
445
+ /**
446
+ * Convert ArrayBuffer to Base64
447
+ */
448
+ arrayBufferToBase64(buffer) {
449
+ let binary = '';
450
+ const bytes = new Uint8Array(buffer);
451
+ const len = bytes.byteLength;
452
+ for (let i = 0; i < len; i++) {
453
+ binary += String.fromCharCode(bytes[i]);
454
+ }
455
+ return btoa(binary);
456
+ }
457
+
458
+ /**
459
+ * Clear API key from storage
460
+ */
461
+ clearApiKey() {
462
+ this.apiKey = '';
463
+ localStorage.removeItem('dashscope_api_key');
464
+ }
465
+
466
+ /**
467
+ * Request Claude correction for transcribed text
468
+ * Uses terminal WebSocket to send request to server, which forwards to gateway
469
+ */
470
+ requestCorrection(text, callback) {
471
+ if (!text || !text.trim()) {
472
+ console.log('[Terminal ASR] No text to correct');
473
+ if (callback) {
474
+ callback(text, text);
475
+ }
476
+ return;
477
+ }
478
+
479
+ // Check if terminal WebSocket is connected
480
+ if (!window.terminalWs || window.terminalWs.readyState !== WebSocket.OPEN) {
481
+ console.error('[Terminal ASR] WebSocket not connected for correction');
482
+ if (callback) {
483
+ callback(text, text);
484
+ }
485
+ return;
486
+ }
487
+
488
+ // Set callback for correction result
489
+ this.onCorrectionResult = (original, corrected) => {
490
+ if (callback) {
491
+ callback(original, corrected);
492
+ }
493
+ };
494
+
495
+ // Send claude_process request via terminal WebSocket
496
+ const correctionRequest = {
497
+ type: 'claude_process',
498
+ transcript: text,
499
+ context: this.terminalContext
500
+ };
501
+
502
+ window.terminalWs.send(JSON.stringify(correctionRequest));
503
+ console.log('[Terminal ASR] Sent correction request:', text);
504
+ }
505
+ }
506
+
507
+ // Create global instance
508
+ window.terminalASR = new TerminalASR();