@layercode/js-sdk 2.8.1 → 2.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/layercode-js-sdk.esm.js +116 -14
- package/dist/layercode-js-sdk.esm.js.map +1 -1
- package/dist/layercode-js-sdk.min.js +116 -14
- package/dist/layercode-js-sdk.min.js.map +1 -1
- package/dist/types/index.d.ts +14 -0
- package/dist/types/interfaces.d.ts +6 -2
- package/dist/types/wavtools/lib/analysis/audio_analysis.d.ts +1 -1
- package/package.json +1 -1
|
@@ -5312,13 +5312,15 @@ class WavRecorder {
|
|
|
5312
5312
|
* @returns {Promise<true>}
|
|
5313
5313
|
*/
|
|
5314
5314
|
async requestPermission() {
|
|
5315
|
+
console.log('ensureUserMediaAccess');
|
|
5315
5316
|
try {
|
|
5316
|
-
|
|
5317
|
-
await navigator.mediaDevices.getUserMedia({
|
|
5317
|
+
const stream = await navigator.mediaDevices.getUserMedia({
|
|
5318
5318
|
audio: true,
|
|
5319
5319
|
});
|
|
5320
|
+
// Stop the tracks immediately after getting permission
|
|
5321
|
+
stream.getTracks().forEach(track => track.stop());
|
|
5320
5322
|
} catch (fallbackError) {
|
|
5321
|
-
|
|
5323
|
+
console.error('getUserMedia failed:', fallbackError.name, fallbackError.message);
|
|
5322
5324
|
throw fallbackError;
|
|
5323
5325
|
}
|
|
5324
5326
|
return true;
|
|
@@ -5962,9 +5964,11 @@ class LayercodeClient {
|
|
|
5962
5964
|
this.canInterrupt = false;
|
|
5963
5965
|
this.userIsSpeaking = false;
|
|
5964
5966
|
this.agentIsSpeaking = false;
|
|
5967
|
+
this.agentIsPlayingAudio = false;
|
|
5965
5968
|
this.recorderStarted = false;
|
|
5966
5969
|
this.readySent = false;
|
|
5967
5970
|
this.currentTurnId = null;
|
|
5971
|
+
this.sentReplayFinishedForDisabledOutput = false;
|
|
5968
5972
|
this.audioBuffer = [];
|
|
5969
5973
|
this.vadConfig = null;
|
|
5970
5974
|
this.activeDeviceId = null;
|
|
@@ -6114,6 +6118,8 @@ class LayercodeClient {
|
|
|
6114
6118
|
await this.audioOutputReady;
|
|
6115
6119
|
}
|
|
6116
6120
|
_setAgentSpeaking(isSpeaking) {
|
|
6121
|
+
// Track the actual audio playback state regardless of audioOutput setting
|
|
6122
|
+
this.agentIsPlayingAudio = isSpeaking;
|
|
6117
6123
|
const shouldReportSpeaking = this.audioOutput && isSpeaking;
|
|
6118
6124
|
if (this.agentIsSpeaking === shouldReportSpeaking) {
|
|
6119
6125
|
return;
|
|
@@ -6122,11 +6128,14 @@ class LayercodeClient {
|
|
|
6122
6128
|
this.options.onAgentSpeakingChange(shouldReportSpeaking);
|
|
6123
6129
|
}
|
|
6124
6130
|
_setUserSpeaking(isSpeaking) {
|
|
6125
|
-
const
|
|
6131
|
+
const shouldCapture = this._shouldCaptureUserAudio();
|
|
6132
|
+
const shouldReportSpeaking = shouldCapture && isSpeaking;
|
|
6133
|
+
console.log('_setUserSpeaking called:', isSpeaking, 'shouldCapture:', shouldCapture, 'shouldReportSpeaking:', shouldReportSpeaking, 'current userIsSpeaking:', this.userIsSpeaking);
|
|
6126
6134
|
if (this.userIsSpeaking === shouldReportSpeaking) {
|
|
6127
6135
|
return;
|
|
6128
6136
|
}
|
|
6129
6137
|
this.userIsSpeaking = shouldReportSpeaking;
|
|
6138
|
+
console.log('_setUserSpeaking: updated userIsSpeaking to:', this.userIsSpeaking);
|
|
6130
6139
|
this.options.onUserIsSpeakingChange(shouldReportSpeaking);
|
|
6131
6140
|
}
|
|
6132
6141
|
/**
|
|
@@ -6176,6 +6185,7 @@ class LayercodeClient {
|
|
|
6176
6185
|
* @param {MessageEvent} event - The WebSocket message event
|
|
6177
6186
|
*/
|
|
6178
6187
|
async _handleWebSocketMessage(event) {
|
|
6188
|
+
var _a, _b;
|
|
6179
6189
|
try {
|
|
6180
6190
|
const message = JSON.parse(event.data);
|
|
6181
6191
|
if (message.type !== 'response.audio') {
|
|
@@ -6188,6 +6198,20 @@ class LayercodeClient {
|
|
|
6188
6198
|
// Start tracking new agent turn
|
|
6189
6199
|
console.debug('Agent turn started, will track new turn ID from audio/text');
|
|
6190
6200
|
this._setUserSpeaking(false);
|
|
6201
|
+
// Reset the flag for the new assistant turn
|
|
6202
|
+
this.sentReplayFinishedForDisabledOutput = false;
|
|
6203
|
+
// When assistant's turn starts but we're not playing audio,
|
|
6204
|
+
// we need to tell the server we're "done" with playback so it can
|
|
6205
|
+
// transition the turn back to user. Use a small delay to let any
|
|
6206
|
+
// response.audio/response.end messages arrive first.
|
|
6207
|
+
if (!this.audioOutput) {
|
|
6208
|
+
setTimeout(() => {
|
|
6209
|
+
if (!this.audioOutput && !this.sentReplayFinishedForDisabledOutput) {
|
|
6210
|
+
this.sentReplayFinishedForDisabledOutput = true;
|
|
6211
|
+
this._clientResponseAudioReplayFinished();
|
|
6212
|
+
}
|
|
6213
|
+
}, 1000);
|
|
6214
|
+
}
|
|
6191
6215
|
}
|
|
6192
6216
|
else if (message.role === 'user' && !this.pushToTalkEnabled) {
|
|
6193
6217
|
// Interrupt any playing agent audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
|
|
@@ -6207,11 +6231,42 @@ class LayercodeClient {
|
|
|
6207
6231
|
});
|
|
6208
6232
|
break;
|
|
6209
6233
|
}
|
|
6210
|
-
case 'response.
|
|
6234
|
+
case 'response.end': {
|
|
6235
|
+
// When audioOutput is disabled, notify server that "playback" is complete
|
|
6236
|
+
if (!this.audioOutput && !this.sentReplayFinishedForDisabledOutput) {
|
|
6237
|
+
this.sentReplayFinishedForDisabledOutput = true;
|
|
6238
|
+
this._clientResponseAudioReplayFinished();
|
|
6239
|
+
}
|
|
6240
|
+
(_b = (_a = this.options).onMessage) === null || _b === void 0 ? void 0 : _b.call(_a, message);
|
|
6241
|
+
break;
|
|
6242
|
+
}
|
|
6243
|
+
case 'response.audio': {
|
|
6244
|
+
// Skip audio playback if audioOutput is disabled
|
|
6245
|
+
if (!this.audioOutput) {
|
|
6246
|
+
// Send replay_finished so server knows we're "done" with playback (only once per turn)
|
|
6247
|
+
if (!this.sentReplayFinishedForDisabledOutput) {
|
|
6248
|
+
this.sentReplayFinishedForDisabledOutput = true;
|
|
6249
|
+
this._clientResponseAudioReplayFinished();
|
|
6250
|
+
}
|
|
6251
|
+
break;
|
|
6252
|
+
}
|
|
6211
6253
|
await this._waitForAudioOutputReady();
|
|
6212
|
-
this._setAgentSpeaking(true);
|
|
6213
6254
|
const audioBuffer = base64ToArrayBuffer(message.content);
|
|
6214
|
-
|
|
6255
|
+
const hasAudioSamples = audioBuffer.byteLength > 0;
|
|
6256
|
+
let audioEnqueued = false;
|
|
6257
|
+
if (hasAudioSamples) {
|
|
6258
|
+
try {
|
|
6259
|
+
const playbackBuffer = this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
|
|
6260
|
+
audioEnqueued = Boolean(playbackBuffer && playbackBuffer.length > 0);
|
|
6261
|
+
}
|
|
6262
|
+
catch (error) {
|
|
6263
|
+
this._setAgentSpeaking(false);
|
|
6264
|
+
throw error;
|
|
6265
|
+
}
|
|
6266
|
+
}
|
|
6267
|
+
else {
|
|
6268
|
+
console.debug(`Skipping empty audio response for turn ${message.turn_id}`);
|
|
6269
|
+
}
|
|
6215
6270
|
// TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
|
|
6216
6271
|
// Set current turn ID from first audio message, or update if different turn
|
|
6217
6272
|
if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
|
|
@@ -6220,7 +6275,11 @@ class LayercodeClient {
|
|
|
6220
6275
|
// Clean up interrupted tracks, keeping only the current turn
|
|
6221
6276
|
this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
|
|
6222
6277
|
}
|
|
6278
|
+
if (audioEnqueued) {
|
|
6279
|
+
this._setAgentSpeaking(true);
|
|
6280
|
+
}
|
|
6223
6281
|
break;
|
|
6282
|
+
}
|
|
6224
6283
|
case 'response.text':
|
|
6225
6284
|
// Set turn ID from first text message if not set
|
|
6226
6285
|
if (!this.currentTurnId) {
|
|
@@ -6325,6 +6384,9 @@ class LayercodeClient {
|
|
|
6325
6384
|
}
|
|
6326
6385
|
_sendReadyIfNeeded() {
|
|
6327
6386
|
var _a;
|
|
6387
|
+
// Send client.ready when either:
|
|
6388
|
+
// 1. Recorder is started (audio mode active)
|
|
6389
|
+
// 2. audioInput is false (text-only mode, but server should still be ready)
|
|
6328
6390
|
const audioReady = this.recorderStarted || !this.audioInput;
|
|
6329
6391
|
if (audioReady && ((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN && !this.readySent) {
|
|
6330
6392
|
this._wsSend({ type: 'client.ready' });
|
|
@@ -6390,12 +6452,16 @@ class LayercodeClient {
|
|
|
6390
6452
|
}
|
|
6391
6453
|
async audioInputConnect() {
|
|
6392
6454
|
// Turn mic ON
|
|
6455
|
+
console.log('audioInputConnect: requesting permission');
|
|
6393
6456
|
await this.wavRecorder.requestPermission();
|
|
6457
|
+
console.log('audioInputConnect: setting up device change listener');
|
|
6394
6458
|
await this._setupDeviceChangeListener();
|
|
6395
6459
|
// If the recorder hasn't spun up yet, proactively select a device.
|
|
6396
6460
|
if (!this.recorderStarted && this.deviceChangeListener) {
|
|
6461
|
+
console.log('audioInputConnect: initializing recorder with default device');
|
|
6397
6462
|
await this._initializeRecorderWithDefaultDevice();
|
|
6398
6463
|
}
|
|
6464
|
+
console.log('audioInputConnect: done, recorderStarted =', this.recorderStarted);
|
|
6399
6465
|
}
|
|
6400
6466
|
async audioInputDisconnect() {
|
|
6401
6467
|
try {
|
|
@@ -6427,11 +6493,27 @@ class LayercodeClient {
|
|
|
6427
6493
|
}
|
|
6428
6494
|
}
|
|
6429
6495
|
async setAudioOutput(state) {
|
|
6496
|
+
console.log('setAudioOutput called with state:', state, 'current:', this.audioOutput);
|
|
6430
6497
|
if (this.audioOutput !== state) {
|
|
6431
6498
|
this.audioOutput = state;
|
|
6432
6499
|
this._emitAudioOutput();
|
|
6433
6500
|
if (state) {
|
|
6434
|
-
|
|
6501
|
+
// Initialize audio output if not already connected
|
|
6502
|
+
// This happens when audioOutput was initially false and is now being enabled
|
|
6503
|
+
if (!this.wavPlayer.context) {
|
|
6504
|
+
console.log('setAudioOutput: initializing audio output (no context yet)');
|
|
6505
|
+
// Store the promise so _waitForAudioOutputReady() can await it
|
|
6506
|
+
// This prevents response.audio from running before AudioContext is ready
|
|
6507
|
+
const setupPromise = this.setupAudioOutput();
|
|
6508
|
+
this.audioOutputReady = setupPromise;
|
|
6509
|
+
await setupPromise;
|
|
6510
|
+
}
|
|
6511
|
+
else {
|
|
6512
|
+
console.log('setAudioOutput: unmuting existing player');
|
|
6513
|
+
this.wavPlayer.unmute();
|
|
6514
|
+
}
|
|
6515
|
+
// Sync agentSpeaking state with actual playback state when enabling audio output
|
|
6516
|
+
this._syncAgentSpeakingState();
|
|
6435
6517
|
}
|
|
6436
6518
|
else {
|
|
6437
6519
|
this.wavPlayer.mute();
|
|
@@ -6439,6 +6521,17 @@ class LayercodeClient {
|
|
|
6439
6521
|
}
|
|
6440
6522
|
}
|
|
6441
6523
|
}
|
|
6524
|
+
/**
|
|
6525
|
+
* Syncs the reported agentSpeaking state with the actual audio playback state.
|
|
6526
|
+
* Called when audioOutput is enabled to ensure proper state synchronization.
|
|
6527
|
+
*/
|
|
6528
|
+
_syncAgentSpeakingState() {
|
|
6529
|
+
const shouldReportSpeaking = this.audioOutput && this.agentIsPlayingAudio;
|
|
6530
|
+
if (this.agentIsSpeaking !== shouldReportSpeaking) {
|
|
6531
|
+
this.agentIsSpeaking = shouldReportSpeaking;
|
|
6532
|
+
this.options.onAgentSpeakingChange(shouldReportSpeaking);
|
|
6533
|
+
}
|
|
6534
|
+
}
|
|
6442
6535
|
/** Emitters for audio flags */
|
|
6443
6536
|
_emitAudioInput() {
|
|
6444
6537
|
this.options.audioInputChanged(this.audioInput);
|
|
@@ -6575,6 +6668,11 @@ class LayercodeClient {
|
|
|
6575
6668
|
return authorizeSessionResponseBody;
|
|
6576
6669
|
}
|
|
6577
6670
|
async setupAudioOutput() {
|
|
6671
|
+
// Only initialize audio player if audioOutput is enabled
|
|
6672
|
+
// This prevents AudioContext creation before user gesture when audio is disabled
|
|
6673
|
+
if (!this.audioOutput) {
|
|
6674
|
+
return;
|
|
6675
|
+
}
|
|
6578
6676
|
// Initialize audio player
|
|
6579
6677
|
// wavRecorder will be started from the onDeviceSwitched callback,
|
|
6580
6678
|
// which is called when the device is first initialized and also when the device is switched
|
|
@@ -6585,12 +6683,7 @@ class LayercodeClient {
|
|
|
6585
6683
|
if (!this.options.enableAmplitudeMonitoring) {
|
|
6586
6684
|
this.agentAudioAmplitude = 0;
|
|
6587
6685
|
}
|
|
6588
|
-
|
|
6589
|
-
this.wavPlayer.unmute();
|
|
6590
|
-
}
|
|
6591
|
-
else {
|
|
6592
|
-
this.wavPlayer.mute();
|
|
6593
|
-
}
|
|
6686
|
+
this.wavPlayer.unmute();
|
|
6594
6687
|
}
|
|
6595
6688
|
async connectToAudioInput() {
|
|
6596
6689
|
if (!this.audioInput) {
|
|
@@ -6639,6 +6732,7 @@ class LayercodeClient {
|
|
|
6639
6732
|
*/
|
|
6640
6733
|
async setInputDevice(deviceId) {
|
|
6641
6734
|
var _a, _b, _c;
|
|
6735
|
+
console.log('setInputDevice called with:', deviceId, 'audioInput:', this.audioInput);
|
|
6642
6736
|
const normalizedDeviceId = !deviceId || deviceId === 'default' ? null : deviceId;
|
|
6643
6737
|
this.useSystemDefaultDevice = normalizedDeviceId === null;
|
|
6644
6738
|
this.deviceId = normalizedDeviceId;
|
|
@@ -6647,6 +6741,7 @@ class LayercodeClient {
|
|
|
6647
6741
|
return;
|
|
6648
6742
|
}
|
|
6649
6743
|
try {
|
|
6744
|
+
console.log('setInputDevice: calling _queueRecorderRestart');
|
|
6650
6745
|
// Restart recording with the new device
|
|
6651
6746
|
await this._queueRecorderRestart();
|
|
6652
6747
|
// Reinitialize VAD with the new audio stream if VAD is enabled
|
|
@@ -6730,12 +6825,15 @@ class LayercodeClient {
|
|
|
6730
6825
|
return run;
|
|
6731
6826
|
}
|
|
6732
6827
|
async _initializeRecorderWithDefaultDevice() {
|
|
6828
|
+
console.log('_initializeRecorderWithDefaultDevice called, deviceChangeListener:', !!this.deviceChangeListener);
|
|
6733
6829
|
if (!this.deviceChangeListener) {
|
|
6734
6830
|
return;
|
|
6735
6831
|
}
|
|
6736
6832
|
try {
|
|
6737
6833
|
const devices = await this.wavRecorder.listDevices();
|
|
6834
|
+
console.log('_initializeRecorderWithDefaultDevice: got devices:', devices.length);
|
|
6738
6835
|
if (devices.length) {
|
|
6836
|
+
console.log('_initializeRecorderWithDefaultDevice: calling deviceChangeListener');
|
|
6739
6837
|
await this.deviceChangeListener(devices);
|
|
6740
6838
|
return;
|
|
6741
6839
|
}
|
|
@@ -6745,6 +6843,7 @@ class LayercodeClient {
|
|
|
6745
6843
|
console.warn('Unable to prime audio devices from listDevices()', error);
|
|
6746
6844
|
}
|
|
6747
6845
|
try {
|
|
6846
|
+
console.log('_initializeRecorderWithDefaultDevice: calling setInputDevice default');
|
|
6748
6847
|
await this.setInputDevice('default');
|
|
6749
6848
|
}
|
|
6750
6849
|
catch (error) {
|
|
@@ -6793,6 +6892,7 @@ class LayercodeClient {
|
|
|
6793
6892
|
});
|
|
6794
6893
|
this.deviceChangeListener = async (devices) => {
|
|
6795
6894
|
var _a;
|
|
6895
|
+
console.log('deviceChangeListener called, devices:', devices.length, 'recorderStarted:', this.recorderStarted);
|
|
6796
6896
|
try {
|
|
6797
6897
|
// Notify user that devices have changed
|
|
6798
6898
|
this.options.onDevicesChanged(devices);
|
|
@@ -6801,6 +6901,7 @@ class LayercodeClient {
|
|
|
6801
6901
|
const previousDefaultDeviceKey = this.lastKnownSystemDefaultDeviceKey;
|
|
6802
6902
|
const currentDefaultDeviceKey = this._getDeviceComparisonKey(defaultDevice);
|
|
6803
6903
|
let shouldSwitch = !this.recorderStarted;
|
|
6904
|
+
console.log('deviceChangeListener: shouldSwitch initial:', shouldSwitch);
|
|
6804
6905
|
if (!shouldSwitch) {
|
|
6805
6906
|
if (usingDefaultDevice) {
|
|
6806
6907
|
if (!defaultDevice) {
|
|
@@ -6820,6 +6921,7 @@ class LayercodeClient {
|
|
|
6820
6921
|
}
|
|
6821
6922
|
}
|
|
6822
6923
|
this.lastKnownSystemDefaultDeviceKey = currentDefaultDeviceKey;
|
|
6924
|
+
console.log('deviceChangeListener: final shouldSwitch:', shouldSwitch);
|
|
6823
6925
|
if (shouldSwitch) {
|
|
6824
6926
|
console.debug('Selecting audio input device after change');
|
|
6825
6927
|
let targetDeviceId = null;
|