@layercode/js-sdk 2.8.1 → 2.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/layercode-js-sdk.esm.js +116 -14
- package/dist/layercode-js-sdk.esm.js.map +1 -1
- package/dist/layercode-js-sdk.min.js +116 -14
- package/dist/layercode-js-sdk.min.js.map +1 -1
- package/dist/types/index.d.ts +14 -0
- package/dist/types/interfaces.d.ts +6 -2
- package/dist/types/wavtools/lib/analysis/audio_analysis.d.ts +1 -1
- package/package.json +1 -1
|
@@ -5318,13 +5318,15 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
5318
5318
|
* @returns {Promise<true>}
|
|
5319
5319
|
*/
|
|
5320
5320
|
async requestPermission() {
|
|
5321
|
+
console.log('ensureUserMediaAccess');
|
|
5321
5322
|
try {
|
|
5322
|
-
|
|
5323
|
-
await navigator.mediaDevices.getUserMedia({
|
|
5323
|
+
const stream = await navigator.mediaDevices.getUserMedia({
|
|
5324
5324
|
audio: true,
|
|
5325
5325
|
});
|
|
5326
|
+
// Stop the tracks immediately after getting permission
|
|
5327
|
+
stream.getTracks().forEach(track => track.stop());
|
|
5326
5328
|
} catch (fallbackError) {
|
|
5327
|
-
|
|
5329
|
+
console.error('getUserMedia failed:', fallbackError.name, fallbackError.message);
|
|
5328
5330
|
throw fallbackError;
|
|
5329
5331
|
}
|
|
5330
5332
|
return true;
|
|
@@ -5968,9 +5970,11 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
5968
5970
|
this.canInterrupt = false;
|
|
5969
5971
|
this.userIsSpeaking = false;
|
|
5970
5972
|
this.agentIsSpeaking = false;
|
|
5973
|
+
this.agentIsPlayingAudio = false;
|
|
5971
5974
|
this.recorderStarted = false;
|
|
5972
5975
|
this.readySent = false;
|
|
5973
5976
|
this.currentTurnId = null;
|
|
5977
|
+
this.sentReplayFinishedForDisabledOutput = false;
|
|
5974
5978
|
this.audioBuffer = [];
|
|
5975
5979
|
this.vadConfig = null;
|
|
5976
5980
|
this.activeDeviceId = null;
|
|
@@ -6120,6 +6124,8 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6120
6124
|
await this.audioOutputReady;
|
|
6121
6125
|
}
|
|
6122
6126
|
_setAgentSpeaking(isSpeaking) {
|
|
6127
|
+
// Track the actual audio playback state regardless of audioOutput setting
|
|
6128
|
+
this.agentIsPlayingAudio = isSpeaking;
|
|
6123
6129
|
const shouldReportSpeaking = this.audioOutput && isSpeaking;
|
|
6124
6130
|
if (this.agentIsSpeaking === shouldReportSpeaking) {
|
|
6125
6131
|
return;
|
|
@@ -6128,11 +6134,14 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6128
6134
|
this.options.onAgentSpeakingChange(shouldReportSpeaking);
|
|
6129
6135
|
}
|
|
6130
6136
|
_setUserSpeaking(isSpeaking) {
|
|
6131
|
-
const
|
|
6137
|
+
const shouldCapture = this._shouldCaptureUserAudio();
|
|
6138
|
+
const shouldReportSpeaking = shouldCapture && isSpeaking;
|
|
6139
|
+
console.log('_setUserSpeaking called:', isSpeaking, 'shouldCapture:', shouldCapture, 'shouldReportSpeaking:', shouldReportSpeaking, 'current userIsSpeaking:', this.userIsSpeaking);
|
|
6132
6140
|
if (this.userIsSpeaking === shouldReportSpeaking) {
|
|
6133
6141
|
return;
|
|
6134
6142
|
}
|
|
6135
6143
|
this.userIsSpeaking = shouldReportSpeaking;
|
|
6144
|
+
console.log('_setUserSpeaking: updated userIsSpeaking to:', this.userIsSpeaking);
|
|
6136
6145
|
this.options.onUserIsSpeakingChange(shouldReportSpeaking);
|
|
6137
6146
|
}
|
|
6138
6147
|
/**
|
|
@@ -6182,6 +6191,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6182
6191
|
* @param {MessageEvent} event - The WebSocket message event
|
|
6183
6192
|
*/
|
|
6184
6193
|
async _handleWebSocketMessage(event) {
|
|
6194
|
+
var _a, _b;
|
|
6185
6195
|
try {
|
|
6186
6196
|
const message = JSON.parse(event.data);
|
|
6187
6197
|
if (message.type !== 'response.audio') {
|
|
@@ -6194,6 +6204,20 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6194
6204
|
// Start tracking new agent turn
|
|
6195
6205
|
console.debug('Agent turn started, will track new turn ID from audio/text');
|
|
6196
6206
|
this._setUserSpeaking(false);
|
|
6207
|
+
// Reset the flag for the new assistant turn
|
|
6208
|
+
this.sentReplayFinishedForDisabledOutput = false;
|
|
6209
|
+
// When assistant's turn starts but we're not playing audio,
|
|
6210
|
+
// we need to tell the server we're "done" with playback so it can
|
|
6211
|
+
// transition the turn back to user. Use a small delay to let any
|
|
6212
|
+
// response.audio/response.end messages arrive first.
|
|
6213
|
+
if (!this.audioOutput) {
|
|
6214
|
+
setTimeout(() => {
|
|
6215
|
+
if (!this.audioOutput && !this.sentReplayFinishedForDisabledOutput) {
|
|
6216
|
+
this.sentReplayFinishedForDisabledOutput = true;
|
|
6217
|
+
this._clientResponseAudioReplayFinished();
|
|
6218
|
+
}
|
|
6219
|
+
}, 1000);
|
|
6220
|
+
}
|
|
6197
6221
|
}
|
|
6198
6222
|
else if (message.role === 'user' && !this.pushToTalkEnabled) {
|
|
6199
6223
|
// Interrupt any playing agent audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
|
|
@@ -6213,11 +6237,42 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6213
6237
|
});
|
|
6214
6238
|
break;
|
|
6215
6239
|
}
|
|
6216
|
-
case 'response.
|
|
6240
|
+
case 'response.end': {
|
|
6241
|
+
// When audioOutput is disabled, notify server that "playback" is complete
|
|
6242
|
+
if (!this.audioOutput && !this.sentReplayFinishedForDisabledOutput) {
|
|
6243
|
+
this.sentReplayFinishedForDisabledOutput = true;
|
|
6244
|
+
this._clientResponseAudioReplayFinished();
|
|
6245
|
+
}
|
|
6246
|
+
(_b = (_a = this.options).onMessage) === null || _b === void 0 ? void 0 : _b.call(_a, message);
|
|
6247
|
+
break;
|
|
6248
|
+
}
|
|
6249
|
+
case 'response.audio': {
|
|
6250
|
+
// Skip audio playback if audioOutput is disabled
|
|
6251
|
+
if (!this.audioOutput) {
|
|
6252
|
+
// Send replay_finished so server knows we're "done" with playback (only once per turn)
|
|
6253
|
+
if (!this.sentReplayFinishedForDisabledOutput) {
|
|
6254
|
+
this.sentReplayFinishedForDisabledOutput = true;
|
|
6255
|
+
this._clientResponseAudioReplayFinished();
|
|
6256
|
+
}
|
|
6257
|
+
break;
|
|
6258
|
+
}
|
|
6217
6259
|
await this._waitForAudioOutputReady();
|
|
6218
|
-
this._setAgentSpeaking(true);
|
|
6219
6260
|
const audioBuffer = base64ToArrayBuffer(message.content);
|
|
6220
|
-
|
|
6261
|
+
const hasAudioSamples = audioBuffer.byteLength > 0;
|
|
6262
|
+
let audioEnqueued = false;
|
|
6263
|
+
if (hasAudioSamples) {
|
|
6264
|
+
try {
|
|
6265
|
+
const playbackBuffer = this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
|
|
6266
|
+
audioEnqueued = Boolean(playbackBuffer && playbackBuffer.length > 0);
|
|
6267
|
+
}
|
|
6268
|
+
catch (error) {
|
|
6269
|
+
this._setAgentSpeaking(false);
|
|
6270
|
+
throw error;
|
|
6271
|
+
}
|
|
6272
|
+
}
|
|
6273
|
+
else {
|
|
6274
|
+
console.debug(`Skipping empty audio response for turn ${message.turn_id}`);
|
|
6275
|
+
}
|
|
6221
6276
|
// TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
|
|
6222
6277
|
// Set current turn ID from first audio message, or update if different turn
|
|
6223
6278
|
if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
|
|
@@ -6226,7 +6281,11 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6226
6281
|
// Clean up interrupted tracks, keeping only the current turn
|
|
6227
6282
|
this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
|
|
6228
6283
|
}
|
|
6284
|
+
if (audioEnqueued) {
|
|
6285
|
+
this._setAgentSpeaking(true);
|
|
6286
|
+
}
|
|
6229
6287
|
break;
|
|
6288
|
+
}
|
|
6230
6289
|
case 'response.text':
|
|
6231
6290
|
// Set turn ID from first text message if not set
|
|
6232
6291
|
if (!this.currentTurnId) {
|
|
@@ -6331,6 +6390,9 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6331
6390
|
}
|
|
6332
6391
|
_sendReadyIfNeeded() {
|
|
6333
6392
|
var _a;
|
|
6393
|
+
// Send client.ready when either:
|
|
6394
|
+
// 1. Recorder is started (audio mode active)
|
|
6395
|
+
// 2. audioInput is false (text-only mode, but server should still be ready)
|
|
6334
6396
|
const audioReady = this.recorderStarted || !this.audioInput;
|
|
6335
6397
|
if (audioReady && ((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN && !this.readySent) {
|
|
6336
6398
|
this._wsSend({ type: 'client.ready' });
|
|
@@ -6396,12 +6458,16 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6396
6458
|
}
|
|
6397
6459
|
async audioInputConnect() {
|
|
6398
6460
|
// Turn mic ON
|
|
6461
|
+
console.log('audioInputConnect: requesting permission');
|
|
6399
6462
|
await this.wavRecorder.requestPermission();
|
|
6463
|
+
console.log('audioInputConnect: setting up device change listener');
|
|
6400
6464
|
await this._setupDeviceChangeListener();
|
|
6401
6465
|
// If the recorder hasn't spun up yet, proactively select a device.
|
|
6402
6466
|
if (!this.recorderStarted && this.deviceChangeListener) {
|
|
6467
|
+
console.log('audioInputConnect: initializing recorder with default device');
|
|
6403
6468
|
await this._initializeRecorderWithDefaultDevice();
|
|
6404
6469
|
}
|
|
6470
|
+
console.log('audioInputConnect: done, recorderStarted =', this.recorderStarted);
|
|
6405
6471
|
}
|
|
6406
6472
|
async audioInputDisconnect() {
|
|
6407
6473
|
try {
|
|
@@ -6433,11 +6499,27 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6433
6499
|
}
|
|
6434
6500
|
}
|
|
6435
6501
|
async setAudioOutput(state) {
|
|
6502
|
+
console.log('setAudioOutput called with state:', state, 'current:', this.audioOutput);
|
|
6436
6503
|
if (this.audioOutput !== state) {
|
|
6437
6504
|
this.audioOutput = state;
|
|
6438
6505
|
this._emitAudioOutput();
|
|
6439
6506
|
if (state) {
|
|
6440
|
-
|
|
6507
|
+
// Initialize audio output if not already connected
|
|
6508
|
+
// This happens when audioOutput was initially false and is now being enabled
|
|
6509
|
+
if (!this.wavPlayer.context) {
|
|
6510
|
+
console.log('setAudioOutput: initializing audio output (no context yet)');
|
|
6511
|
+
// Store the promise so _waitForAudioOutputReady() can await it
|
|
6512
|
+
// This prevents response.audio from running before AudioContext is ready
|
|
6513
|
+
const setupPromise = this.setupAudioOutput();
|
|
6514
|
+
this.audioOutputReady = setupPromise;
|
|
6515
|
+
await setupPromise;
|
|
6516
|
+
}
|
|
6517
|
+
else {
|
|
6518
|
+
console.log('setAudioOutput: unmuting existing player');
|
|
6519
|
+
this.wavPlayer.unmute();
|
|
6520
|
+
}
|
|
6521
|
+
// Sync agentSpeaking state with actual playback state when enabling audio output
|
|
6522
|
+
this._syncAgentSpeakingState();
|
|
6441
6523
|
}
|
|
6442
6524
|
else {
|
|
6443
6525
|
this.wavPlayer.mute();
|
|
@@ -6445,6 +6527,17 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6445
6527
|
}
|
|
6446
6528
|
}
|
|
6447
6529
|
}
|
|
6530
|
+
/**
|
|
6531
|
+
* Syncs the reported agentSpeaking state with the actual audio playback state.
|
|
6532
|
+
* Called when audioOutput is enabled to ensure proper state synchronization.
|
|
6533
|
+
*/
|
|
6534
|
+
_syncAgentSpeakingState() {
|
|
6535
|
+
const shouldReportSpeaking = this.audioOutput && this.agentIsPlayingAudio;
|
|
6536
|
+
if (this.agentIsSpeaking !== shouldReportSpeaking) {
|
|
6537
|
+
this.agentIsSpeaking = shouldReportSpeaking;
|
|
6538
|
+
this.options.onAgentSpeakingChange(shouldReportSpeaking);
|
|
6539
|
+
}
|
|
6540
|
+
}
|
|
6448
6541
|
/** Emitters for audio flags */
|
|
6449
6542
|
_emitAudioInput() {
|
|
6450
6543
|
this.options.audioInputChanged(this.audioInput);
|
|
@@ -6581,6 +6674,11 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6581
6674
|
return authorizeSessionResponseBody;
|
|
6582
6675
|
}
|
|
6583
6676
|
async setupAudioOutput() {
|
|
6677
|
+
// Only initialize audio player if audioOutput is enabled
|
|
6678
|
+
// This prevents AudioContext creation before user gesture when audio is disabled
|
|
6679
|
+
if (!this.audioOutput) {
|
|
6680
|
+
return;
|
|
6681
|
+
}
|
|
6584
6682
|
// Initialize audio player
|
|
6585
6683
|
// wavRecorder will be started from the onDeviceSwitched callback,
|
|
6586
6684
|
// which is called when the device is first initialized and also when the device is switched
|
|
@@ -6591,12 +6689,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6591
6689
|
if (!this.options.enableAmplitudeMonitoring) {
|
|
6592
6690
|
this.agentAudioAmplitude = 0;
|
|
6593
6691
|
}
|
|
6594
|
-
|
|
6595
|
-
this.wavPlayer.unmute();
|
|
6596
|
-
}
|
|
6597
|
-
else {
|
|
6598
|
-
this.wavPlayer.mute();
|
|
6599
|
-
}
|
|
6692
|
+
this.wavPlayer.unmute();
|
|
6600
6693
|
}
|
|
6601
6694
|
async connectToAudioInput() {
|
|
6602
6695
|
if (!this.audioInput) {
|
|
@@ -6645,6 +6738,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6645
6738
|
*/
|
|
6646
6739
|
async setInputDevice(deviceId) {
|
|
6647
6740
|
var _a, _b, _c;
|
|
6741
|
+
console.log('setInputDevice called with:', deviceId, 'audioInput:', this.audioInput);
|
|
6648
6742
|
const normalizedDeviceId = !deviceId || deviceId === 'default' ? null : deviceId;
|
|
6649
6743
|
this.useSystemDefaultDevice = normalizedDeviceId === null;
|
|
6650
6744
|
this.deviceId = normalizedDeviceId;
|
|
@@ -6653,6 +6747,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6653
6747
|
return;
|
|
6654
6748
|
}
|
|
6655
6749
|
try {
|
|
6750
|
+
console.log('setInputDevice: calling _queueRecorderRestart');
|
|
6656
6751
|
// Restart recording with the new device
|
|
6657
6752
|
await this._queueRecorderRestart();
|
|
6658
6753
|
// Reinitialize VAD with the new audio stream if VAD is enabled
|
|
@@ -6736,12 +6831,15 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6736
6831
|
return run;
|
|
6737
6832
|
}
|
|
6738
6833
|
async _initializeRecorderWithDefaultDevice() {
|
|
6834
|
+
console.log('_initializeRecorderWithDefaultDevice called, deviceChangeListener:', !!this.deviceChangeListener);
|
|
6739
6835
|
if (!this.deviceChangeListener) {
|
|
6740
6836
|
return;
|
|
6741
6837
|
}
|
|
6742
6838
|
try {
|
|
6743
6839
|
const devices = await this.wavRecorder.listDevices();
|
|
6840
|
+
console.log('_initializeRecorderWithDefaultDevice: got devices:', devices.length);
|
|
6744
6841
|
if (devices.length) {
|
|
6842
|
+
console.log('_initializeRecorderWithDefaultDevice: calling deviceChangeListener');
|
|
6745
6843
|
await this.deviceChangeListener(devices);
|
|
6746
6844
|
return;
|
|
6747
6845
|
}
|
|
@@ -6751,6 +6849,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6751
6849
|
console.warn('Unable to prime audio devices from listDevices()', error);
|
|
6752
6850
|
}
|
|
6753
6851
|
try {
|
|
6852
|
+
console.log('_initializeRecorderWithDefaultDevice: calling setInputDevice default');
|
|
6754
6853
|
await this.setInputDevice('default');
|
|
6755
6854
|
}
|
|
6756
6855
|
catch (error) {
|
|
@@ -6799,6 +6898,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6799
6898
|
});
|
|
6800
6899
|
this.deviceChangeListener = async (devices) => {
|
|
6801
6900
|
var _a;
|
|
6901
|
+
console.log('deviceChangeListener called, devices:', devices.length, 'recorderStarted:', this.recorderStarted);
|
|
6802
6902
|
try {
|
|
6803
6903
|
// Notify user that devices have changed
|
|
6804
6904
|
this.options.onDevicesChanged(devices);
|
|
@@ -6807,6 +6907,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6807
6907
|
const previousDefaultDeviceKey = this.lastKnownSystemDefaultDeviceKey;
|
|
6808
6908
|
const currentDefaultDeviceKey = this._getDeviceComparisonKey(defaultDevice);
|
|
6809
6909
|
let shouldSwitch = !this.recorderStarted;
|
|
6910
|
+
console.log('deviceChangeListener: shouldSwitch initial:', shouldSwitch);
|
|
6810
6911
|
if (!shouldSwitch) {
|
|
6811
6912
|
if (usingDefaultDevice) {
|
|
6812
6913
|
if (!defaultDevice) {
|
|
@@ -6826,6 +6927,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6826
6927
|
}
|
|
6827
6928
|
}
|
|
6828
6929
|
this.lastKnownSystemDefaultDeviceKey = currentDefaultDeviceKey;
|
|
6930
|
+
console.log('deviceChangeListener: final shouldSwitch:', shouldSwitch);
|
|
6829
6931
|
if (shouldSwitch) {
|
|
6830
6932
|
console.debug('Selecting audio input device after change');
|
|
6831
6933
|
let targetDeviceId = null;
|