@elizaos/plugin-vision 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/.npmignore +5 -0
  2. package/README.md +270 -0
  3. package/build.config.ts +70 -0
  4. package/dist/action.d.ts +8 -0
  5. package/dist/action.js +1212 -0
  6. package/dist/action.js.map +1 -0
  7. package/dist/audio-capture-stream.d.ts +42 -0
  8. package/dist/audio-capture-stream.js +516 -0
  9. package/dist/audio-capture-stream.js.map +1 -0
  10. package/dist/audio-capture.d.ts +25 -0
  11. package/dist/audio-capture.js +412 -0
  12. package/dist/audio-capture.js.map +1 -0
  13. package/dist/basic.test.d.ts +1 -0
  14. package/dist/basic.test.js +97 -0
  15. package/dist/basic.test.js.map +1 -0
  16. package/dist/config.d.ts +73 -0
  17. package/dist/config.js +254 -0
  18. package/dist/config.js.map +1 -0
  19. package/dist/entity-tracker.d.ts +32 -0
  20. package/dist/entity-tracker.js +361 -0
  21. package/dist/entity-tracker.js.map +1 -0
  22. package/dist/errors.d.ts +67 -0
  23. package/dist/errors.js +395 -0
  24. package/dist/errors.js.map +1 -0
  25. package/dist/face-recognition.d.ts +31 -0
  26. package/dist/face-recognition.js +332 -0
  27. package/dist/face-recognition.js.map +1 -0
  28. package/dist/florence2-local.d.ts +25 -0
  29. package/dist/florence2-local.js +280 -0
  30. package/dist/florence2-local.js.map +1 -0
  31. package/dist/florence2-model.d.ts +36 -0
  32. package/dist/florence2-model.js +503 -0
  33. package/dist/florence2-model.js.map +1 -0
  34. package/dist/index.d.ts +3 -0
  35. package/dist/index.js +73 -0
  36. package/dist/index.js.map +1 -0
  37. package/dist/ocr-service-real.d.ts +32 -0
  38. package/dist/ocr-service-real.js +396 -0
  39. package/dist/ocr-service-real.js.map +1 -0
  40. package/dist/ocr-service.d.ts +28 -0
  41. package/dist/ocr-service.js +216 -0
  42. package/dist/ocr-service.js.map +1 -0
  43. package/dist/provider.d.ts +2 -0
  44. package/dist/provider.js +285 -0
  45. package/dist/provider.js.map +1 -0
  46. package/dist/screen-capture.d.ts +16 -0
  47. package/dist/screen-capture.js +302 -0
  48. package/dist/screen-capture.js.map +1 -0
  49. package/dist/service.d.ts +73 -0
  50. package/dist/service.js +1662 -0
  51. package/dist/service.js.map +1 -0
  52. package/dist/tests/e2e/index.d.ts +8 -0
  53. package/dist/tests/e2e/index.js +33 -0
  54. package/dist/tests/e2e/index.js.map +1 -0
  55. package/dist/tests/e2e/run-local.d.ts +2 -0
  56. package/dist/tests/e2e/run-local.js +166 -0
  57. package/dist/tests/e2e/run-local.js.map +1 -0
  58. package/dist/tests/e2e/screen-vision.d.ts +11 -0
  59. package/dist/tests/e2e/screen-vision.js +384 -0
  60. package/dist/tests/e2e/screen-vision.js.map +1 -0
  61. package/dist/tests/e2e/vision-autonomy.d.ts +11 -0
  62. package/dist/tests/e2e/vision-autonomy.js +375 -0
  63. package/dist/tests/e2e/vision-autonomy.js.map +1 -0
  64. package/dist/tests/e2e/vision-basic.d.ts +11 -0
  65. package/dist/tests/e2e/vision-basic.js +434 -0
  66. package/dist/tests/e2e/vision-basic.js.map +1 -0
  67. package/dist/tests/e2e/vision-capture-log.d.ts +11 -0
  68. package/dist/tests/e2e/vision-capture-log.js +302 -0
  69. package/dist/tests/e2e/vision-capture-log.js.map +1 -0
  70. package/dist/tests/e2e/vision-runtime.d.ts +11 -0
  71. package/dist/tests/e2e/vision-runtime.js +357 -0
  72. package/dist/tests/e2e/vision-runtime.js.map +1 -0
  73. package/dist/tests/e2e/vision-worker-tests.d.ts +11 -0
  74. package/dist/tests/e2e/vision-worker-tests.js +466 -0
  75. package/dist/tests/e2e/vision-worker-tests.js.map +1 -0
  76. package/dist/tests/test-pattern-generator.d.ts +40 -0
  77. package/dist/tests/test-pattern-generator.js +191 -0
  78. package/dist/tests/test-pattern-generator.js.map +1 -0
  79. package/dist/tests.d.ts +3 -0
  80. package/dist/tests.js +11 -0
  81. package/dist/tests.js.map +1 -0
  82. package/dist/types.d.ts +222 -0
  83. package/dist/types.js +16 -0
  84. package/dist/types.js.map +1 -0
  85. package/dist/vision-models.d.ts +47 -0
  86. package/dist/vision-models.js +501 -0
  87. package/dist/vision-models.js.map +1 -0
  88. package/dist/vision-worker-manager.d.ts +61 -0
  89. package/dist/vision-worker-manager.js +668 -0
  90. package/dist/vision-worker-manager.js.map +1 -0
  91. package/dist/workers/florence2-worker-simple.d.ts +13 -0
  92. package/dist/workers/florence2-worker-simple.js +121 -0
  93. package/dist/workers/florence2-worker-simple.js.map +1 -0
  94. package/dist/workers/florence2-worker.d.ts +1 -0
  95. package/dist/workers/florence2-worker.js +328 -0
  96. package/dist/workers/florence2-worker.js.map +1 -0
  97. package/dist/workers/ocr-worker.d.ts +1 -0
  98. package/dist/workers/ocr-worker.js +354 -0
  99. package/dist/workers/ocr-worker.js.map +1 -0
  100. package/dist/workers/screen-capture-worker.d.ts +1 -0
  101. package/dist/workers/screen-capture-worker.js +427 -0
  102. package/dist/workers/screen-capture-worker.js.map +1 -0
  103. package/dist/workers/worker-logger.d.ts +9 -0
  104. package/dist/workers/worker-logger.js +95 -0
  105. package/dist/workers/worker-logger.js.map +1 -0
  106. package/package.json +100 -0
@@ -0,0 +1,516 @@
1
+ "use strict";
2
+ var __extends = (this && this.__extends) || (function () {
3
+ var extendStatics = function (d, b) {
4
+ extendStatics = Object.setPrototypeOf ||
5
+ ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
6
+ function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };
7
+ return extendStatics(d, b);
8
+ };
9
+ return function (d, b) {
10
+ if (typeof b !== "function" && b !== null)
11
+ throw new TypeError("Class extends value " + String(b) + " is not a constructor or null");
12
+ extendStatics(d, b);
13
+ function __() { this.constructor = d; }
14
+ d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
15
+ };
16
+ })();
17
+ var __assign = (this && this.__assign) || function () {
18
+ __assign = Object.assign || function(t) {
19
+ for (var s, i = 1, n = arguments.length; i < n; i++) {
20
+ s = arguments[i];
21
+ for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
22
+ t[p] = s[p];
23
+ }
24
+ return t;
25
+ };
26
+ return __assign.apply(this, arguments);
27
+ };
28
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
29
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
30
+ return new (P || (P = Promise))(function (resolve, reject) {
31
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
32
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
33
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
34
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
35
+ });
36
+ };
37
+ var __generator = (this && this.__generator) || function (thisArg, body) {
38
+ var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g = Object.create((typeof Iterator === "function" ? Iterator : Object).prototype);
39
+ return g.next = verb(0), g["throw"] = verb(1), g["return"] = verb(2), typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
40
+ function verb(n) { return function (v) { return step([n, v]); }; }
41
+ function step(op) {
42
+ if (f) throw new TypeError("Generator is already executing.");
43
+ while (g && (g = 0, op[0] && (_ = 0)), _) try {
44
+ if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
45
+ if (y = 0, t) op = [op[0] & 2, t.value];
46
+ switch (op[0]) {
47
+ case 0: case 1: t = op; break;
48
+ case 4: _.label++; return { value: op[1], done: false };
49
+ case 5: _.label++; y = op[1]; op = [0]; continue;
50
+ case 7: op = _.ops.pop(); _.trys.pop(); continue;
51
+ default:
52
+ if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
53
+ if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
54
+ if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
55
+ if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
56
+ if (t[2]) _.ops.pop();
57
+ _.trys.pop(); continue;
58
+ }
59
+ op = body.call(thisArg, _);
60
+ } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
61
+ if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
62
+ }
63
+ };
64
+ Object.defineProperty(exports, "__esModule", { value: true });
65
+ exports.StreamingAudioCaptureService = void 0;
66
+ var core_1 = require("@elizaos/core");
67
+ var child_process_1 = require("child_process");
68
+ var events_1 = require("events");
69
+ var StreamingAudioCaptureService = /** @class */ (function (_super) {
70
+ __extends(StreamingAudioCaptureService, _super);
71
+ function StreamingAudioCaptureService(runtime, config) {
72
+ var _this = _super.call(this) || this;
73
+ _this.captureProcess = null;
74
+ _this.isCapturing = false;
75
+ _this.audioBuffer = [];
76
+ _this.isSpeaking = false;
77
+ _this.lastSpeechTime = 0;
78
+ _this.silenceTimer = null;
79
+ _this.transcriptionInProgress = false;
80
+ _this.currentTranscription = '';
81
+ _this.responseTimer = null;
82
+ _this.runtime = runtime;
83
+ _this.config = __assign({ sampleRate: 16000, channels: 1, vadThreshold: 0.01, silenceTimeout: 1500, responseDelay: 3000, chunkSize: 4096 }, config);
84
+ return _this;
85
+ }
86
+ StreamingAudioCaptureService.prototype.initialize = function () {
87
+ return __awaiter(this, void 0, void 0, function () {
88
+ var error_1;
89
+ return __generator(this, function (_a) {
90
+ switch (_a.label) {
91
+ case 0:
92
+ if (!this.config.enabled) {
93
+ core_1.logger.info('[StreamingAudio] Audio capture disabled');
94
+ return [2 /*return*/];
95
+ }
96
+ _a.label = 1;
97
+ case 1:
98
+ _a.trys.push([1, 3, , 4]);
99
+ core_1.logger.info('[StreamingAudio] Initializing streaming audio capture...');
100
+ // Start continuous audio capture
101
+ return [4 /*yield*/, this.startContinuousCapture()];
102
+ case 2:
103
+ // Start continuous audio capture
104
+ _a.sent();
105
+ core_1.logger.info('[StreamingAudio] Streaming audio capture initialized');
106
+ return [3 /*break*/, 4];
107
+ case 3:
108
+ error_1 = _a.sent();
109
+ core_1.logger.error('[StreamingAudio] Failed to initialize:', error_1);
110
+ throw error_1;
111
+ case 4: return [2 /*return*/];
112
+ }
113
+ });
114
+ });
115
+ };
116
+ StreamingAudioCaptureService.prototype.startContinuousCapture = function () {
117
+ return __awaiter(this, void 0, void 0, function () {
118
+ var platform, command, args;
119
+ var _this = this;
120
+ var _a, _b;
121
+ return __generator(this, function (_c) {
122
+ platform = process.platform;
123
+ if (platform === 'darwin') {
124
+ // macOS: Use sox for continuous capture
125
+ command = 'sox';
126
+ args = [
127
+ '-d', // default input device
128
+ '-r',
129
+ this.config.sampleRate.toString(),
130
+ '-c',
131
+ this.config.channels.toString(),
132
+ '-b',
133
+ '16',
134
+ '-e',
135
+ 'signed',
136
+ '-t',
137
+ 'raw',
138
+ '-', // output to stdout
139
+ ];
140
+ }
141
+ else if (platform === 'linux') {
142
+ // Linux: Use arecord
143
+ command = 'arecord';
144
+ args = [
145
+ '-D',
146
+ this.config.device || 'default',
147
+ '-f',
148
+ 'S16_LE',
149
+ '-r',
150
+ this.config.sampleRate.toString(),
151
+ '-c',
152
+ this.config.channels.toString(),
153
+ '-t',
154
+ 'raw',
155
+ '-', // output to stdout
156
+ ];
157
+ }
158
+ else if (platform === 'win32') {
159
+ // Windows: Use ffmpeg
160
+ command = 'ffmpeg';
161
+ args = [
162
+ '-f',
163
+ 'dshow',
164
+ '-i',
165
+ "audio=\"".concat(this.config.device || 'Microphone', "\""),
166
+ '-acodec',
167
+ 'pcm_s16le',
168
+ '-ar',
169
+ this.config.sampleRate.toString(),
170
+ '-ac',
171
+ this.config.channels.toString(),
172
+ '-f',
173
+ 's16le',
174
+ 'pipe:1', // output to stdout
175
+ ];
176
+ }
177
+ else {
178
+ throw new Error("Unsupported platform: ".concat(platform));
179
+ }
180
+ this.captureProcess = (0, child_process_1.spawn)(command, args);
181
+ this.isCapturing = true;
182
+ // Handle audio data stream
183
+ (_a = this.captureProcess.stdout) === null || _a === void 0 ? void 0 : _a.on('data', function (chunk) {
184
+ _this.processAudioChunk(chunk);
185
+ });
186
+ (_b = this.captureProcess.stderr) === null || _b === void 0 ? void 0 : _b.on('data', function (data) {
187
+ core_1.logger.debug('[StreamingAudio] Capture stderr:', data.toString());
188
+ });
189
+ this.captureProcess.on('error', function (error) {
190
+ core_1.logger.error('[StreamingAudio] Capture process error:', error);
191
+ _this.isCapturing = false;
192
+ });
193
+ this.captureProcess.on('exit', function (code) {
194
+ core_1.logger.info('[StreamingAudio] Capture process exited with code:', code);
195
+ _this.isCapturing = false;
196
+ });
197
+ return [2 /*return*/];
198
+ });
199
+ });
200
+ };
201
+ StreamingAudioCaptureService.prototype.processAudioChunk = function (chunk) {
202
+ var _this = this;
203
+ // Calculate audio energy for VAD
204
+ var energy = this.calculateEnergy(chunk);
205
+ var timestamp = Date.now();
206
+ // Store chunk
207
+ var audioChunk = { data: chunk, timestamp: timestamp, energy: energy };
208
+ // Voice Activity Detection
209
+ if (energy > this.config.vadThreshold) {
210
+ if (!this.isSpeaking) {
211
+ // Speech started
212
+ this.isSpeaking = true;
213
+ this.lastSpeechTime = timestamp;
214
+ core_1.logger.debug('[StreamingAudio] Speech detected, starting recording');
215
+ this.emit('speechStart');
216
+ // Clear any pending response
217
+ if (this.responseTimer) {
218
+ clearTimeout(this.responseTimer);
219
+ this.responseTimer = null;
220
+ core_1.logger.debug('[StreamingAudio] Cancelled pending response due to new speech');
221
+ }
222
+ }
223
+ // Add to buffer
224
+ this.audioBuffer.push(audioChunk);
225
+ this.lastSpeechTime = timestamp;
226
+ // Reset silence timer
227
+ if (this.silenceTimer) {
228
+ clearTimeout(this.silenceTimer);
229
+ }
230
+ // Start streaming transcription if not already running
231
+ if (!this.transcriptionInProgress) {
232
+ this.startStreamingTranscription();
233
+ }
234
+ }
235
+ else if (this.isSpeaking) {
236
+ // Currently in speech but detected silence
237
+ this.audioBuffer.push(audioChunk);
238
+ // Set timer for end of speech
239
+ if (!this.silenceTimer) {
240
+ this.silenceTimer = setTimeout(function () {
241
+ _this.endSpeech();
242
+ }, this.config.silenceTimeout);
243
+ }
244
+ }
245
+ // Clean up old chunks (keep last 30 seconds)
246
+ var cutoffTime = timestamp - 30000;
247
+ this.audioBuffer = this.audioBuffer.filter(function (c) { return c.timestamp > cutoffTime; });
248
+ };
249
+ StreamingAudioCaptureService.prototype.calculateEnergy = function (chunk) {
250
+ // Calculate RMS energy of audio chunk
251
+ var sum = 0;
252
+ var samples = chunk.length / 2; // 16-bit samples
253
+ for (var i = 0; i < chunk.length; i += 2) {
254
+ var sample = chunk.readInt16LE(i);
255
+ sum += sample * sample;
256
+ }
257
+ var rms = Math.sqrt(sum / samples);
258
+ return rms / 32768; // Normalize to 0-1
259
+ };
260
+ StreamingAudioCaptureService.prototype.startStreamingTranscription = function () {
261
+ return __awaiter(this, void 0, void 0, function () {
262
+ var audioData, result, error_2;
263
+ var _this = this;
264
+ return __generator(this, function (_a) {
265
+ switch (_a.label) {
266
+ case 0:
267
+ if (this.transcriptionInProgress) {
268
+ return [2 /*return*/];
269
+ }
270
+ this.transcriptionInProgress = true;
271
+ core_1.logger.debug('[StreamingAudio] Starting streaming transcription');
272
+ _a.label = 1;
273
+ case 1:
274
+ _a.trys.push([1, 3, , 4]);
275
+ audioData = this.getRecentAudioData();
276
+ if (audioData.length === 0) {
277
+ this.transcriptionInProgress = false;
278
+ return [2 /*return*/];
279
+ }
280
+ return [4 /*yield*/, this.transcribeAudio(audioData)];
281
+ case 2:
282
+ result = _a.sent();
283
+ if (result && result.trim()) {
284
+ this.currentTranscription = result;
285
+ core_1.logger.info("[StreamingAudio] Partial transcription: \"".concat(result, "\""));
286
+ this.emit('transcription', { text: result, isFinal: false });
287
+ }
288
+ return [3 /*break*/, 4];
289
+ case 3:
290
+ error_2 = _a.sent();
291
+ core_1.logger.error('[StreamingAudio] Transcription error:', error_2);
292
+ return [3 /*break*/, 4];
293
+ case 4:
294
+ this.transcriptionInProgress = false;
295
+ // Continue transcription if still speaking
296
+ if (this.isSpeaking) {
297
+ setTimeout(function () { return _this.startStreamingTranscription(); }, 500);
298
+ }
299
+ return [2 /*return*/];
300
+ }
301
+ });
302
+ });
303
+ };
304
+ StreamingAudioCaptureService.prototype.endSpeech = function () {
305
+ if (!this.isSpeaking) {
306
+ return;
307
+ }
308
+ this.isSpeaking = false;
309
+ this.silenceTimer = null;
310
+ core_1.logger.debug('[StreamingAudio] Speech ended');
311
+ this.emit('speechEnd');
312
+ // Get final transcription
313
+ this.processFinalTranscription();
314
+ };
315
+ StreamingAudioCaptureService.prototype.processFinalTranscription = function () {
316
+ return __awaiter(this, void 0, void 0, function () {
317
+ var audioData, finalText_1, error_3;
318
+ var _this = this;
319
+ return __generator(this, function (_a) {
320
+ switch (_a.label) {
321
+ case 0:
322
+ audioData = this.getRecentAudioData();
323
+ if (audioData.length === 0) {
324
+ return [2 /*return*/];
325
+ }
326
+ _a.label = 1;
327
+ case 1:
328
+ _a.trys.push([1, 3, 4, 5]);
329
+ return [4 /*yield*/, this.transcribeAudio(audioData)];
330
+ case 2:
331
+ finalText_1 = _a.sent();
332
+ if (finalText_1 && finalText_1.trim()) {
333
+ this.currentTranscription = finalText_1;
334
+ core_1.logger.info("[StreamingAudio] Final transcription: \"".concat(finalText_1, "\""));
335
+ this.emit('transcription', { text: finalText_1, isFinal: true });
336
+ // Set timer for response generation
337
+ this.responseTimer = setTimeout(function () {
338
+ _this.generateResponse(finalText_1);
339
+ }, this.config.responseDelay);
340
+ }
341
+ return [3 /*break*/, 5];
342
+ case 3:
343
+ error_3 = _a.sent();
344
+ core_1.logger.error('[StreamingAudio] Final transcription error:', error_3);
345
+ return [3 /*break*/, 5];
346
+ case 4:
347
+ // Clear audio buffer
348
+ this.audioBuffer = [];
349
+ this.currentTranscription = '';
350
+ return [7 /*endfinally*/];
351
+ case 5: return [2 /*return*/];
352
+ }
353
+ });
354
+ });
355
+ };
356
+ StreamingAudioCaptureService.prototype.getRecentAudioData = function () {
357
+ if (this.audioBuffer.length === 0) {
358
+ return Buffer.alloc(0);
359
+ }
360
+ // Get audio from start of speech to now
361
+ var startTime = this.audioBuffer[0].timestamp;
362
+ var relevantChunks = this.audioBuffer.filter(function (c) { return c.timestamp >= startTime; });
363
+ // Combine chunks
364
+ var totalLength = relevantChunks.reduce(function (sum, c) { return sum + c.data.length; }, 0);
365
+ var combined = Buffer.alloc(totalLength);
366
+ var offset = 0;
367
+ for (var _i = 0, relevantChunks_1 = relevantChunks; _i < relevantChunks_1.length; _i++) {
368
+ var chunk = relevantChunks_1[_i];
369
+ chunk.data.copy(combined, offset);
370
+ offset += chunk.data.length;
371
+ }
372
+ return combined;
373
+ };
374
+ StreamingAudioCaptureService.prototype.transcribeAudio = function (audioData) {
375
+ return __awaiter(this, void 0, void 0, function () {
376
+ var wavBuffer, result, error_4;
377
+ return __generator(this, function (_a) {
378
+ switch (_a.label) {
379
+ case 0:
380
+ _a.trys.push([0, 2, , 3]);
381
+ wavBuffer = this.rawToWav(audioData);
382
+ return [4 /*yield*/, this.runtime.useModel(core_1.ModelType.TRANSCRIPTION, {
383
+ audio: wavBuffer,
384
+ language: 'en',
385
+ stream: true, // Request streaming if supported
386
+ })];
387
+ case 1:
388
+ result = _a.sent();
389
+ return [2 /*return*/, result];
390
+ case 2:
391
+ error_4 = _a.sent();
392
+ core_1.logger.error('[StreamingAudio] Transcription failed:', error_4);
393
+ return [2 /*return*/, null];
394
+ case 3: return [2 /*return*/];
395
+ }
396
+ });
397
+ });
398
+ };
399
+ StreamingAudioCaptureService.prototype.rawToWav = function (rawData) {
400
+ // Create WAV header
401
+ var sampleRate = this.config.sampleRate;
402
+ var channels = this.config.channels;
403
+ var bitsPerSample = 16;
404
+ var byteRate = sampleRate * channels * (bitsPerSample / 8);
405
+ var blockAlign = channels * (bitsPerSample / 8);
406
+ var dataSize = rawData.length;
407
+ var fileSize = 36 + dataSize;
408
+ var header = Buffer.alloc(44);
409
+ // RIFF chunk
410
+ header.write('RIFF', 0);
411
+ header.writeUInt32LE(fileSize, 4);
412
+ header.write('WAVE', 8);
413
+ // fmt chunk
414
+ header.write('fmt ', 12);
415
+ header.writeUInt32LE(16, 16); // fmt chunk size
416
+ header.writeUInt16LE(1, 20); // PCM format
417
+ header.writeUInt16LE(channels, 22);
418
+ header.writeUInt32LE(sampleRate, 24);
419
+ header.writeUInt32LE(byteRate, 28);
420
+ header.writeUInt16LE(blockAlign, 32);
421
+ header.writeUInt16LE(bitsPerSample, 34);
422
+ // data chunk
423
+ header.write('data', 36);
424
+ header.writeUInt32LE(dataSize, 40);
425
+ return Buffer.concat([header, rawData]);
426
+ };
427
+ StreamingAudioCaptureService.prototype.generateResponse = function (transcription) {
428
+ return __awaiter(this, void 0, void 0, function () {
429
+ var error_5;
430
+ return __generator(this, function (_a) {
431
+ switch (_a.label) {
432
+ case 0:
433
+ this.responseTimer = null;
434
+ _a.label = 1;
435
+ case 1:
436
+ _a.trys.push([1, 3, , 4]);
437
+ // Create audio memory
438
+ return [4 /*yield*/, this.createAudioMemory(transcription)];
439
+ case 2:
440
+ // Create audio memory
441
+ _a.sent();
442
+ // Emit event for response generation
443
+ this.emit('utteranceComplete', transcription);
444
+ return [3 /*break*/, 4];
445
+ case 3:
446
+ error_5 = _a.sent();
447
+ core_1.logger.error('[StreamingAudio] Response generation error:', error_5);
448
+ return [3 /*break*/, 4];
449
+ case 4: return [2 /*return*/];
450
+ }
451
+ });
452
+ });
453
+ };
454
+ StreamingAudioCaptureService.prototype.createAudioMemory = function (transcription) {
455
+ return __awaiter(this, void 0, void 0, function () {
456
+ var _memory;
457
+ return __generator(this, function (_a) {
458
+ try {
459
+ _memory = {
460
+ content: {
461
+ text: "[Audio] ".concat(transcription),
462
+ type: 'audio_transcription',
463
+ source: 'microphone_streaming',
464
+ timestamp: Date.now(),
465
+ },
466
+ metadata: {
467
+ isAudioTranscription: true,
468
+ streaming: true,
469
+ },
470
+ };
471
+ core_1.logger.info('[StreamingAudio] Audio transcription stored in context');
472
+ }
473
+ catch (error) {
474
+ core_1.logger.error('[StreamingAudio] Failed to create audio memory:', error);
475
+ }
476
+ return [2 /*return*/];
477
+ });
478
+ });
479
+ };
480
+ StreamingAudioCaptureService.prototype.stop = function () {
481
+ return __awaiter(this, void 0, void 0, function () {
482
+ return __generator(this, function (_a) {
483
+ core_1.logger.info('[StreamingAudio] Stopping audio capture...');
484
+ if (this.captureProcess) {
485
+ this.captureProcess.kill();
486
+ this.captureProcess = null;
487
+ }
488
+ if (this.silenceTimer) {
489
+ clearTimeout(this.silenceTimer);
490
+ this.silenceTimer = null;
491
+ }
492
+ if (this.responseTimer) {
493
+ clearTimeout(this.responseTimer);
494
+ this.responseTimer = null;
495
+ }
496
+ this.isCapturing = false;
497
+ this.isSpeaking = false;
498
+ this.audioBuffer = [];
499
+ core_1.logger.info('[StreamingAudio] Audio capture stopped');
500
+ return [2 /*return*/];
501
+ });
502
+ });
503
+ };
504
+ StreamingAudioCaptureService.prototype.isActive = function () {
505
+ return this.isCapturing;
506
+ };
507
+ StreamingAudioCaptureService.prototype.getCurrentTranscription = function () {
508
+ return this.currentTranscription;
509
+ };
510
+ StreamingAudioCaptureService.prototype.isSpeechActive = function () {
511
+ return this.isSpeaking;
512
+ };
513
+ return StreamingAudioCaptureService;
514
+ }(events_1.EventEmitter));
515
+ exports.StreamingAudioCaptureService = StreamingAudioCaptureService;
516
+ //# sourceMappingURL=audio-capture-stream.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"audio-capture-stream.js","sourceRoot":"","sources":["../src/audio-capture-stream.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,sCAAsE;AACtE,+CAAoD;AACpD,iCAAsC;AAmBtC;IAAkD,gDAAY;IAa5D,sCAAY,OAAsB,EAAE,MAA4B;QAC9D,YAAA,MAAK,WAAE,SAAC;QAXF,oBAAc,GAAwB,IAAI,CAAC;QAC3C,iBAAW,GAAG,KAAK,CAAC;QACpB,iBAAW,GAAiB,EAAE,CAAC;QAC/B,gBAAU,GAAG,KAAK,CAAC;QACnB,oBAAc,GAAG,CAAC,CAAC;QACnB,kBAAY,GAA0B,IAAI,CAAC;QAC3C,6BAAuB,GAAG,KAAK,CAAC;QAChC,0BAAoB,GAAG,EAAE,CAAC;QAC1B,mBAAa,GAA0B,IAAI,CAAC;QAIlD,KAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,KAAI,CAAC,MAAM,cACT,UAAU,EAAE,KAAK,EACjB,QAAQ,EAAE,CAAC,EACX,YAAY,EAAE,IAAI,EAClB,cAAc,EAAE,IAAI,EACpB,aAAa,EAAE,IAAI,EACnB,SAAS,EAAE,IAAI,IACZ,MAAM,CACV,CAAC;;IACJ,CAAC;IAEK,iDAAU,GAAhB;;;;;;wBACE,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;4BACzB,aAAM,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;4BACvD,sBAAO;wBACT,CAAC;;;;wBAGC,aAAM,CAAC,IAAI,CAAC,0DAA0D,CAAC,CAAC;wBAExE,iCAAiC;wBACjC,qBAAM,IAAI,CAAC,sBAAsB,EAAE,EAAA;;wBADnC,iCAAiC;wBACjC,SAAmC,CAAC;wBAEpC,aAAM,CAAC,IAAI,CAAC,sDAAsD,CAAC,CAAC;;;;wBAEpE,aAAM,CAAC,KAAK,CAAC,wCAAwC,EAAE,OAAK,CAAC,CAAC;wBAC9D,MAAM,OAAK,CAAC;;;;;KAEf;IAEa,6DAAsB,GAApC;;;;;;gBACQ,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;gBAIlC,IAAI,QAAQ,KAAK,QAAQ,EAAE,CAAC;oBAC1B,wCAAwC;oBACxC,OAAO,GAAG,KAAK,CAAC;oBAChB,IAAI,GAAG;wBACL,IAAI,EAAE,uBAAuB;wBAC7B,IAAI;wBACJ,IAAI,CAAC,MAAM,CAAC,UAAW,CAAC,QAAQ,EAAE;wBAClC,IAAI;wBACJ,IAAI,CAAC,MAAM,CAAC,QAAS,CAAC,QAAQ,EAAE;wBAChC,IAAI;wBACJ,IAAI;wBACJ,IAAI;wBACJ,QAAQ;wBACR,IAAI;wBACJ,KAAK;wBACL,GAAG,EAAE,mBAAmB;qBACzB,CAAC;gBACJ,CAAC;qBAAM,IAAI,QAAQ,KAAK,OAAO,EAAE,CAAC;oBAChC,qBAAqB;oBACrB,OAAO,GAAG,SAAS,CAAC;oBACpB,IAAI,GAAG;wBACL,IAAI;wBACJ,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,SAAS;wBAC/B,IAAI;wBACJ,QAAQ;wBACR,IAAI;wBACJ,IAAI,CAAC,MAAM,CAAC,UAAW,CAAC,QAAQ,EAAE;wBAClC,IAAI;wBACJ,IAAI,CAAC,MAAM,CAAC,QAAS,CAAC,QAAQ,EAAE;wBAChC,IAAI;wBACJ,KAAK;wBACL,GAAG,EAAE,mBAAmB;qBACzB,CAAC;gBACJ,CAAC;qBAAM,IAAI,QAAQ,KAAK,OAAO,EAAE,CAAC;oBAChC,sBAAsB;oBACtB,OAAO,GAAG,QAAQ,CAAC;oBACnB,IAAI,GAAG;wBACL,IAAI;wBACJ,OAAO;wBACP,IAAI;wBACJ,kBAAU,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,YAAY,OAAG;wBAC/C,SAAS;wBACT,WAAW;wBACX,KAAK;wBACL,IAAI,CAAC,MAAM,CAAC,UAAW,CAAC,QAAQ,EAAE;wBAClC,KAAK;wBACL,IAAI,CAAC,MAAM,CAAC,QAAS,CAAC,QAAQ,EAAE;wBAChC,IAAI;wBACJ,OAAO;wBACP,QAAQ,EAAE,mBAAmB;qBAC9B,CAAC;gBACJ,CAAC;qBAAM,CAAC;oBACN,MAAM,IAAI,KAAK,CAAC,gCAAyB,QAAQ,CAAE,CAAC,CAAC;gBACvD,CAAC;gBAED,IAAI,CAAC,cAAc,GAAG,IAAA,qBAAK,EAAC,OAAO,EAAE,IAAI,CAAC,CAAC;gBAC3C,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;gBAExB,2BAA2B;gBAC3B,MAAA,IAAI,CAAC,cAAc,CAAC,MAAM,0CAAE,EAAE,CAAC,MAAM,EAAE,UAAC,KAAa;oBACnD,KAAI,CAAC,iBAAiB,CAAC,KAAK,CAAC,CAAC;gBAChC,CAAC,CAAC,CAAC;gBAEH,MAAA,IAAI,CAAC,cAAc,CAAC,MAAM,0CAAE,EAAE,CAAC,MAAM,EAAE,UAAC,IAAI;oBAC1C,aAAM,CAAC,KAAK,CAAC,kCAAkC,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;gBACpE,CAAC,CAAC,CAAC;gBAEH,IAAI,CAAC,cAAc,CAAC,EAAE,CAAC,OAAO,EAAE,UAAC,KAAK;oBACpC,aAAM,CAAC,KAAK,CAAC,yCAAyC,EAAE,KAAK,CAAC,CAAC;oBAC/D,KAAI,CAAC,WAAW,GAAG,KAAK,CAAC;gBAC3B,CAAC,CAAC,CAAC;gBAEH,IAAI,CAAC,cAAc,CAAC,EAAE,CAAC,MAAM,EAAE,UAAC,IAAI;oBAClC,aAAM,CAAC,IAAI,CAAC,oDAAoD,EAAE,IAAI,CAAC,CAAC;oBACxE,KAAI,CAAC,WAAW,GAAG,KAAK,CAAC;gBAC3B,CAAC,CAAC,CAAC;;;;KACJ;IAEO,wDAAiB,GAAzB,UAA0B,KAAa;QAAvC,iBAqDC;QApDC,iCAAiC;QACjC,IAAM,MAAM,GAAG,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC;QAC3C,IAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,cAAc;QACd,IAAM,UAAU,GAAe,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,WAAA,EAAE,MAAM,QAAA,EAAE,CAAC;QAElE,2BAA2B;QAC3B,IAAI,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,YAAa,EAAE,CAAC;YACvC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC;gBACrB,iBAAiB;gBACjB,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC;gBACvB,IAAI,CAAC,cAAc,GAAG,SAAS,CAAC;gBAChC,aAAM,CAAC,KAAK,CAAC,sDAAsD,CAAC,CAAC;gBACrE,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;gBAEzB,6BAA6B;gBAC7B,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;oBACvB,YAAY,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;oBACjC,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;oBAC1B,aAAM,CAAC,KAAK,CAAC,+DAA+D,CAAC,CAAC;gBAChF,CAAC;YACH,CAAC;YAED,gBAAgB;YAChB,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAClC,IAAI,CAAC,cAAc,GAAG,SAAS,CAAC;YAEhC,sBAAsB;YACtB,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;gBACtB,YAAY,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YAClC,CAAC;YAED,uDAAuD;YACvD,IAAI,CAAC,IAAI,CAAC,uBAAuB,EAAE,CAAC;gBAClC,IAAI,CAAC,2BAA2B,EAAE,CAAC;YACrC,CAAC;QACH,CAAC;aAAM,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YAC3B,2CAA2C;YAC3C,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAElC,8BAA8B;YAC9B,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;gBACvB,IAAI,CAAC,YAAY,GAAG,UAAU,CAAC;oBAC7B,KAAI,CAAC,SAAS,EAAE,CAAC;gBACnB,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,cAAe,CAAC,CAAC;YAClC,CAAC;QACH,CAAC;QAED,6CAA6C;QAC7C,IAAM,UAAU,GAAG,SAAS,GAAG,KAAK,CAAC;QACrC,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,UAAC,CAAC,IAAK,OAAA,CAAC,CAAC,SAAS,GAAG,UAAU,EAAxB,CAAwB,CAAC,CAAC;IAC9E,CAAC;IAEO,sDAAe,GAAvB,UAAwB,KAAa;QACnC,sCAAsC;QACtC,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,IAAM,OAAO,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,iBAAiB;QAEnD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YACzC,IAAM,MAAM,GAAG,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;YACpC,GAAG,IAAI,MAAM,GAAG,MAAM,CAAC;QACzB,CAAC;QAED,IAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC,CAAC;QACrC,OAAO,GAAG,GAAG,KAAK,CAAC,CAAC,mBAAmB;IACzC,CAAC;IAEa,kEAA2B,GAAzC;;;;;;;wBACE,IAAI,IAAI,CAAC,uBAAuB,EAAE,CAAC;4BACjC,sBAAO;wBACT,CAAC;wBAED,IAAI,CAAC,uBAAuB,GAAG,IAAI,CAAC;wBACpC,aAAM,CAAC,KAAK,CAAC,mDAAmD,CAAC,CAAC;;;;wBAI1D,SAAS,GAAG,IAAI,CAAC,kBAAkB,EAAE,CAAC;wBAE5C,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;4BAC3B,IAAI,CAAC,uBAAuB,GAAG,KAAK,CAAC;4BACrC,sBAAO;wBACT,CAAC;wBAGc,qBAAM,IAAI,CAAC,eAAe,CAAC,SAAS,CAAC,EAAA;;wBAA9C,MAAM,GAAG,SAAqC;wBAEpD,IAAI,MAAM,IAAI,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC;4BAC5B,IAAI,CAAC,oBAAoB,GAAG,MAAM,CAAC;4BACnC,aAAM,CAAC,IAAI,CAAC,oDAA4C,MAAM,OAAG,CAAC,CAAC;4BACnE,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;wBAC/D,CAAC;;;;wBAED,aAAM,CAAC,KAAK,CAAC,uCAAuC,EAAE,OAAK,CAAC,CAAC;;;wBAG/D,IAAI,CAAC,uBAAuB,GAAG,KAAK,CAAC;wBAErC,2CAA2C;wBAC3C,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;4BACpB,UAAU,CAAC,cAAM,OAAA,KAAI,CAAC,2BAA2B,EAAE,EAAlC,CAAkC,EAAE,GAAG,CAAC,CAAC;wBAC5D,CAAC;;;;;KACF;IAEO,gDAAS,GAAjB;QACE,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC;YACrB,OAAO;QACT,CAAC;QAED,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC;QACxB,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;QACzB,aAAM,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;QAC9C,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAEvB,0BAA0B;QAC1B,IAAI,CAAC,yBAAyB,EAAE,CAAC;IACnC,CAAC;IAEa,gEAAyB,GAAvC;;;;;;;wBACQ,SAAS,GAAG,IAAI,CAAC,kBAAkB,EAAE,CAAC;wBAE5C,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;4BAC3B,sBAAO;wBACT,CAAC;;;;wBAImB,qBAAM,IAAI,CAAC,eAAe,CAAC,SAAS,CAAC,EAAA;;wBAAjD,cAAY,SAAqC;wBAEvD,IAAI,WAAS,IAAI,WAAS,CAAC,IAAI,EAAE,EAAE,CAAC;4BAClC,IAAI,CAAC,oBAAoB,GAAG,WAAS,CAAC;4BACtC,aAAM,CAAC,IAAI,CAAC,kDAA0C,WAAS,OAAG,CAAC,CAAC;4BACpE,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,EAAE,IAAI,EAAE,WAAS,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;4BAE/D,oCAAoC;4BACpC,IAAI,CAAC,aAAa,GAAG,UAAU,CAAC;gCAC9B,KAAI,CAAC,gBAAgB,CAAC,WAAS,CAAC,CAAC;4BACnC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,aAAc,CAAC,CAAC;wBACjC,CAAC;;;;wBAED,aAAM,CAAC,KAAK,CAAC,6CAA6C,EAAE,OAAK,CAAC,CAAC;;;wBAEnE,qBAAqB;wBACrB,IAAI,CAAC,WAAW,GAAG,EAAE,CAAC;wBACtB,IAAI,CAAC,oBAAoB,GAAG,EAAE,CAAC;;;;;;KAElC;IAEO,yDAAkB,GAA1B;QACE,IAAI,IAAI,CAAC,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClC,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACzB,CAAC;QAED,wCAAwC;QACxC,IAAM,SAAS,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAChD,IAAM,cAAc,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,UAAC,CAAC,IAAK,OAAA,CAAC,CAAC,SAAS,IAAI,SAAS,EAAxB,CAAwB,CAAC,CAAC;QAEhF,iBAAiB;QACjB,IAAM,WAAW,GAAG,cAAc,CAAC,MAAM,CAAC,UAAC,GAAG,EAAE,CAAC,IAAK,OAAA,GAAG,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,EAAnB,CAAmB,EAAE,CAAC,CAAC,CAAC;QAC9E,IAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;QAC3C,IAAI,MAAM,GAAG,CAAC,CAAC;QAEf,KAAoB,UAAc,EAAd,iCAAc,EAAd,4BAAc,EAAd,IAAc,EAAE,CAAC;YAAhC,IAAM,KAAK,uBAAA;YACd,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC;QAC9B,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAEa,sDAAe,GAA7B,UAA8B,SAAiB;;;;;;;wBAGrC,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;wBAG5B,qBAAM,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,gBAAS,CAAC,aAAa,EAAE;gCAClE,KAAK,EAAE,SAAS;gCAChB,QAAQ,EAAE,IAAI;gCACd,MAAM,EAAE,IAAI,EAAE,iCAAiC;6BAChD,CAAC,EAAA;;wBAJI,MAAM,GAAG,SAIb;wBAEF,sBAAO,MAAgB,EAAC;;;wBAExB,aAAM,CAAC,KAAK,CAAC,wCAAwC,EAAE,OAAK,CAAC,CAAC;wBAC9D,sBAAO,IAAI,EAAC;;;;;KAEf;IAEO,+CAAQ,GAAhB,UAAiB,OAAe;QAC9B,oBAAoB;QACpB,IAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,UAAW,CAAC;QAC3C,IAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,QAAS,CAAC;QACvC,IAAM,aAAa,GAAG,EAAE,CAAC;QACzB,IAAM,QAAQ,GAAG,UAAU,GAAG,QAAQ,GAAG,CAAC,aAAa,GAAG,CAAC,CAAC,CAAC;QAC7D,IAAM,UAAU,GAAG,QAAQ,GAAG,CAAC,aAAa,GAAG,CAAC,CAAC,CAAC;QAClD,IAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC;QAChC,IAAM,QAAQ,GAAG,EAAE,GAAG,QAAQ,CAAC;QAE/B,IAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAEhC,aAAa;QACb,MAAM,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QACxB,MAAM,CAAC,aAAa,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;QAClC,MAAM,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAExB,YAAY;QACZ,MAAM,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QACzB,MAAM,CAAC,aAAa,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,iBAAiB;QAC/C,MAAM,CAAC,aAAa,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa;QAC1C,MAAM,CAAC,aAAa,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QACnC,MAAM,CAAC,aAAa,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;QACrC,MAAM,CAAC,aAAa,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QACnC,MAAM,CAAC,aAAa,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;QACrC,MAAM,CAAC,aAAa,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;QAExC,aAAa;QACb,MAAM,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QACzB,MAAM,CAAC,aAAa,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAEnC,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IAC1C,CAAC;IAEa,uDAAgB,GAA9B,UAA+B,aAAqB;;;;;;wBAClD,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;;;;wBAGxB,sBAAsB;wBACtB,qBAAM,IAAI,CAAC,iBAAiB,CAAC,aAAa,CAAC,EAAA;;wBAD3C,sBAAsB;wBACtB,SAA2C,CAAC;wBAE5C,qCAAqC;wBACrC,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,aAAa,CAAC,CAAC;;;;wBAE9C,aAAM,CAAC,KAAK,CAAC,6CAA6C,EAAE,OAAK,CAAC,CAAC;;;;;;KAEtE;IAEa,wDAAiB,GAA/B,UAAgC,aAAqB;;;;gBACnD,IAAI,CAAC;oBACG,OAAO,GAAG;wBACd,OAAO,EAAE;4BACP,IAAI,EAAE,kBAAW,aAAa,CAAE;4BAChC,IAAI,EAAE,qBAAqB;4BAC3B,MAAM,EAAE,sBAAsB;4BAC9B,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;yBACtB;wBACD,QAAQ,EAAE;4BACR,oBAAoB,EAAE,IAAI;4BAC1B,SAAS,EAAE,IAAI;yBAChB;qBACF,CAAC;oBAEF,aAAM,CAAC,IAAI,CAAC,wDAAwD,CAAC,CAAC;gBACxE,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,aAAM,CAAC,KAAK,CAAC,iDAAiD,EAAE,KAAK,CAAC,CAAC;gBACzE,CAAC;;;;KACF;IAEK,2CAAI,GAAV;;;gBACE,aAAM,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;gBAE1D,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;oBACxB,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;oBAC3B,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC;gBAC7B,CAAC;gBAED,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;oBACtB,YAAY,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;oBAChC,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;gBAC3B,CAAC;gBAED,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;oBACvB,YAAY,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;oBACjC,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;gBAC5B,CAAC;gBAED,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;gBACzB,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC;gBACxB,IAAI,CAAC,WAAW,GAAG,EAAE,CAAC;gBAEtB,aAAM,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;;;;KACvD;IAED,+CAAQ,GAAR;QACE,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAED,8DAAuB,GAAvB;QACE,OAAO,IAAI,CAAC,oBAAoB,CAAC;IACnC,CAAC;IAED,qDAAc,GAAd;QACE,OAAO,IAAI,CAAC,UAAU,CAAC;IACzB,CAAC;IACH,mCAAC;AAAD,CAAC,AAzaD,CAAkD,qBAAY,GAya7D;AAzaY,oEAA4B","sourcesContent":["import { logger, ModelType, type IAgentRuntime } from '@elizaos/core';\nimport { spawn, ChildProcess } from 'child_process';\nimport { EventEmitter } from 'events';\n\nexport interface StreamingAudioConfig {\n enabled: boolean;\n device?: string;\n sampleRate?: number;\n channels?: number;\n vadThreshold?: number; // 0-1, energy threshold for speech detection\n silenceTimeout?: number; // ms to wait before considering speech ended\n responseDelay?: number; // ms to wait before processing (for interruption detection)\n chunkSize?: number; // bytes per chunk for streaming\n}\n\ninterface AudioChunk {\n data: Buffer;\n timestamp: number;\n energy: number;\n}\n\nexport class StreamingAudioCaptureService extends EventEmitter {\n private runtime: IAgentRuntime;\n private config: StreamingAudioConfig;\n private captureProcess: ChildProcess | null = null;\n private isCapturing = false;\n private audioBuffer: AudioChunk[] = [];\n private isSpeaking = false;\n private lastSpeechTime = 0;\n private silenceTimer: NodeJS.Timeout | null = null;\n private transcriptionInProgress = false;\n private currentTranscription = '';\n private responseTimer: NodeJS.Timeout | null = null;\n\n constructor(runtime: IAgentRuntime, config: StreamingAudioConfig) {\n super();\n this.runtime = runtime;\n this.config = {\n sampleRate: 16000,\n channels: 1,\n vadThreshold: 0.01,\n silenceTimeout: 1500, // 1.5 seconds of silence to end speech\n responseDelay: 3000, // 3 seconds before response (allows for interruption)\n chunkSize: 4096,\n ...config,\n };\n }\n\n async initialize(): Promise<void> {\n if (!this.config.enabled) {\n logger.info('[StreamingAudio] Audio capture disabled');\n return;\n }\n\n try {\n logger.info('[StreamingAudio] Initializing streaming audio capture...');\n\n // Start continuous audio capture\n await this.startContinuousCapture();\n\n logger.info('[StreamingAudio] Streaming audio capture initialized');\n } catch (error) {\n logger.error('[StreamingAudio] Failed to initialize:', error);\n throw error;\n }\n }\n\n private async startContinuousCapture(): Promise<void> {\n const platform = process.platform;\n let command: string;\n let args: string[];\n\n if (platform === 'darwin') {\n // macOS: Use sox for continuous capture\n command = 'sox';\n args = [\n '-d', // default input device\n '-r',\n this.config.sampleRate!.toString(),\n '-c',\n this.config.channels!.toString(),\n '-b',\n '16',\n '-e',\n 'signed',\n '-t',\n 'raw',\n '-', // output to stdout\n ];\n } else if (platform === 'linux') {\n // Linux: Use arecord\n command = 'arecord';\n args = [\n '-D',\n this.config.device || 'default',\n '-f',\n 'S16_LE',\n '-r',\n this.config.sampleRate!.toString(),\n '-c',\n this.config.channels!.toString(),\n '-t',\n 'raw',\n '-', // output to stdout\n ];\n } else if (platform === 'win32') {\n // Windows: Use ffmpeg\n command = 'ffmpeg';\n args = [\n '-f',\n 'dshow',\n '-i',\n `audio=\"${this.config.device || 'Microphone'}\"`,\n '-acodec',\n 'pcm_s16le',\n '-ar',\n this.config.sampleRate!.toString(),\n '-ac',\n this.config.channels!.toString(),\n '-f',\n 's16le',\n 'pipe:1', // output to stdout\n ];\n } else {\n throw new Error(`Unsupported platform: ${platform}`);\n }\n\n this.captureProcess = spawn(command, args);\n this.isCapturing = true;\n\n // Handle audio data stream\n this.captureProcess.stdout?.on('data', (chunk: Buffer) => {\n this.processAudioChunk(chunk);\n });\n\n this.captureProcess.stderr?.on('data', (data) => {\n logger.debug('[StreamingAudio] Capture stderr:', data.toString());\n });\n\n this.captureProcess.on('error', (error) => {\n logger.error('[StreamingAudio] Capture process error:', error);\n this.isCapturing = false;\n });\n\n this.captureProcess.on('exit', (code) => {\n logger.info('[StreamingAudio] Capture process exited with code:', code);\n this.isCapturing = false;\n });\n }\n\n private processAudioChunk(chunk: Buffer): void {\n // Calculate audio energy for VAD\n const energy = this.calculateEnergy(chunk);\n const timestamp = Date.now();\n\n // Store chunk\n const audioChunk: AudioChunk = { data: chunk, timestamp, energy };\n\n // Voice Activity Detection\n if (energy > this.config.vadThreshold!) {\n if (!this.isSpeaking) {\n // Speech started\n this.isSpeaking = true;\n this.lastSpeechTime = timestamp;\n logger.debug('[StreamingAudio] Speech detected, starting recording');\n this.emit('speechStart');\n\n // Clear any pending response\n if (this.responseTimer) {\n clearTimeout(this.responseTimer);\n this.responseTimer = null;\n logger.debug('[StreamingAudio] Cancelled pending response due to new speech');\n }\n }\n\n // Add to buffer\n this.audioBuffer.push(audioChunk);\n this.lastSpeechTime = timestamp;\n\n // Reset silence timer\n if (this.silenceTimer) {\n clearTimeout(this.silenceTimer);\n }\n\n // Start streaming transcription if not already running\n if (!this.transcriptionInProgress) {\n this.startStreamingTranscription();\n }\n } else if (this.isSpeaking) {\n // Currently in speech but detected silence\n this.audioBuffer.push(audioChunk);\n\n // Set timer for end of speech\n if (!this.silenceTimer) {\n this.silenceTimer = setTimeout(() => {\n this.endSpeech();\n }, this.config.silenceTimeout!);\n }\n }\n\n // Clean up old chunks (keep last 30 seconds)\n const cutoffTime = timestamp - 30000;\n this.audioBuffer = this.audioBuffer.filter((c) => c.timestamp > cutoffTime);\n }\n\n private calculateEnergy(chunk: Buffer): number {\n // Calculate RMS energy of audio chunk\n let sum = 0;\n const samples = chunk.length / 2; // 16-bit samples\n\n for (let i = 0; i < chunk.length; i += 2) {\n const sample = chunk.readInt16LE(i);\n sum += sample * sample;\n }\n\n const rms = Math.sqrt(sum / samples);\n return rms / 32768; // Normalize to 0-1\n }\n\n private async startStreamingTranscription(): Promise<void> {\n if (this.transcriptionInProgress) {\n return;\n }\n\n this.transcriptionInProgress = true;\n logger.debug('[StreamingAudio] Starting streaming transcription');\n\n try {\n // Get audio data from buffer\n const audioData = this.getRecentAudioData();\n\n if (audioData.length === 0) {\n this.transcriptionInProgress = false;\n return;\n }\n\n // Use streaming transcription if available, otherwise batch\n const result = await this.transcribeAudio(audioData);\n\n if (result && result.trim()) {\n this.currentTranscription = result;\n logger.info(`[StreamingAudio] Partial transcription: \"${result}\"`);\n this.emit('transcription', { text: result, isFinal: false });\n }\n } catch (error) {\n logger.error('[StreamingAudio] Transcription error:', error);\n }\n\n this.transcriptionInProgress = false;\n\n // Continue transcription if still speaking\n if (this.isSpeaking) {\n setTimeout(() => this.startStreamingTranscription(), 500);\n }\n }\n\n private endSpeech(): void {\n if (!this.isSpeaking) {\n return;\n }\n\n this.isSpeaking = false;\n this.silenceTimer = null;\n logger.debug('[StreamingAudio] Speech ended');\n this.emit('speechEnd');\n\n // Get final transcription\n this.processFinalTranscription();\n }\n\n private async processFinalTranscription(): Promise<void> {\n const audioData = this.getRecentAudioData();\n\n if (audioData.length === 0) {\n return;\n }\n\n try {\n // Get final transcription\n const finalText = await this.transcribeAudio(audioData);\n\n if (finalText && finalText.trim()) {\n this.currentTranscription = finalText;\n logger.info(`[StreamingAudio] Final transcription: \"${finalText}\"`);\n this.emit('transcription', { text: finalText, isFinal: true });\n\n // Set timer for response generation\n this.responseTimer = setTimeout(() => {\n this.generateResponse(finalText);\n }, this.config.responseDelay!);\n }\n } catch (error) {\n logger.error('[StreamingAudio] Final transcription error:', error);\n } finally {\n // Clear audio buffer\n this.audioBuffer = [];\n this.currentTranscription = '';\n }\n }\n\n private getRecentAudioData(): Buffer {\n if (this.audioBuffer.length === 0) {\n return Buffer.alloc(0);\n }\n\n // Get audio from start of speech to now\n const startTime = this.audioBuffer[0].timestamp;\n const relevantChunks = this.audioBuffer.filter((c) => c.timestamp >= startTime);\n\n // Combine chunks\n const totalLength = relevantChunks.reduce((sum, c) => sum + c.data.length, 0);\n const combined = Buffer.alloc(totalLength);\n let offset = 0;\n\n for (const chunk of relevantChunks) {\n chunk.data.copy(combined, offset);\n offset += chunk.data.length;\n }\n\n return combined;\n }\n\n private async transcribeAudio(audioData: Buffer): Promise<string | null> {\n try {\n // Convert raw audio to WAV format\n const wavBuffer = this.rawToWav(audioData);\n\n // Use runtime transcription model\n const result = await this.runtime.useModel(ModelType.TRANSCRIPTION, {\n audio: wavBuffer,\n language: 'en',\n stream: true, // Request streaming if supported\n });\n\n return result as string;\n } catch (error) {\n logger.error('[StreamingAudio] Transcription failed:', error);\n return null;\n }\n }\n\n private rawToWav(rawData: Buffer): Buffer {\n // Create WAV header\n const sampleRate = this.config.sampleRate!;\n const channels = this.config.channels!;\n const bitsPerSample = 16;\n const byteRate = sampleRate * channels * (bitsPerSample / 8);\n const blockAlign = channels * (bitsPerSample / 8);\n const dataSize = rawData.length;\n const fileSize = 36 + dataSize;\n\n const header = Buffer.alloc(44);\n\n // RIFF chunk\n header.write('RIFF', 0);\n header.writeUInt32LE(fileSize, 4);\n header.write('WAVE', 8);\n\n // fmt chunk\n header.write('fmt ', 12);\n header.writeUInt32LE(16, 16); // fmt chunk size\n header.writeUInt16LE(1, 20); // PCM format\n header.writeUInt16LE(channels, 22);\n header.writeUInt32LE(sampleRate, 24);\n header.writeUInt32LE(byteRate, 28);\n header.writeUInt16LE(blockAlign, 32);\n header.writeUInt16LE(bitsPerSample, 34);\n\n // data chunk\n header.write('data', 36);\n header.writeUInt32LE(dataSize, 40);\n\n return Buffer.concat([header, rawData]);\n }\n\n private async generateResponse(transcription: string): Promise<void> {\n this.responseTimer = null;\n\n try {\n // Create audio memory\n await this.createAudioMemory(transcription);\n\n // Emit event for response generation\n this.emit('utteranceComplete', transcription);\n } catch (error) {\n logger.error('[StreamingAudio] Response generation error:', error);\n }\n }\n\n private async createAudioMemory(transcription: string): Promise<void> {\n try {\n const _memory = {\n content: {\n text: `[Audio] ${transcription}`,\n type: 'audio_transcription',\n source: 'microphone_streaming',\n timestamp: Date.now(),\n },\n metadata: {\n isAudioTranscription: true,\n streaming: true,\n },\n };\n\n logger.info('[StreamingAudio] Audio transcription stored in context');\n } catch (error) {\n logger.error('[StreamingAudio] Failed to create audio memory:', error);\n }\n }\n\n async stop(): Promise<void> {\n logger.info('[StreamingAudio] Stopping audio capture...');\n\n if (this.captureProcess) {\n this.captureProcess.kill();\n this.captureProcess = null;\n }\n\n if (this.silenceTimer) {\n clearTimeout(this.silenceTimer);\n this.silenceTimer = null;\n }\n\n if (this.responseTimer) {\n clearTimeout(this.responseTimer);\n this.responseTimer = null;\n }\n\n this.isCapturing = false;\n this.isSpeaking = false;\n this.audioBuffer = [];\n\n logger.info('[StreamingAudio] Audio capture stopped');\n }\n\n isActive(): boolean {\n return this.isCapturing;\n }\n\n getCurrentTranscription(): string {\n return this.currentTranscription;\n }\n\n isSpeechActive(): boolean {\n return this.isSpeaking;\n }\n}\n"]}
@@ -0,0 +1,25 @@
1
+ import { type IAgentRuntime } from '@elizaos/core';
2
+ export interface AudioConfig {
3
+ enabled: boolean;
4
+ transcriptionInterval: number;
5
+ device?: string;
6
+ sampleRate?: number;
7
+ channels?: number;
8
+ }
9
+ export declare class AudioCaptureService {
10
+ private runtime;
11
+ private config;
12
+ private isRecording;
13
+ private recordingInterval;
14
+ private currentRecordingPath;
15
+ constructor(runtime: IAgentRuntime, config: AudioConfig);
16
+ initialize(): Promise<void>;
17
+ private checkAudioTools;
18
+ private startTranscriptionLoop;
19
+ recordAndTranscribe(): Promise<string | null>;
20
+ private recordAudio;
21
+ private createAudioMemory;
22
+ listAudioDevices(): Promise<string[]>;
23
+ isActive(): boolean;
24
+ stop(): Promise<void>;
25
+ }