@unith-ai/core-client 2.0.4-beta.5 → 2.0.5-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/lib.web.js CHANGED
@@ -503,6 +503,7 @@ var EventType = /* @__PURE__ */ ((EventType2) => {
503
503
  EventType2["PING"] = "ping";
504
504
  EventType2["PONG"] = "pong";
505
505
  EventType2["BINARY"] = "binary";
506
+ EventType2["STOP"] = "stop";
506
507
  return EventType2;
507
508
  })(EventType || {});
508
509
  var StreamingEventType = /* @__PURE__ */ ((StreamingEventType2) => {
@@ -511,6 +512,7 @@ var StreamingEventType = /* @__PURE__ */ ((StreamingEventType2) => {
511
512
  StreamingEventType2["METADATA"] = "metadata";
512
513
  StreamingEventType2["ERROR"] = "error";
513
514
  StreamingEventType2["CACHE"] = "cache";
515
+ StreamingEventType2["STOP"] = "stop";
514
516
  return StreamingEventType2;
515
517
  })(StreamingEventType || {});
516
518
  function isJoinEvent(event) {
@@ -25406,6 +25408,45 @@ var N = class _N {
25406
25408
  };
25407
25409
  N.DEFAULT_BASE_URI = "wss://api.elevenlabs.io";
25408
25410
 
25411
+ // src/utils/storage.ts
25412
+ var storage = window.localStorage;
25413
+ var location = window.location.origin + window.location.pathname;
25414
+ var getFromStorage = (key, orgId, headId) => {
25415
+ if (typeof storage !== "undefined") {
25416
+ const prefix = `chat:${location}:${orgId}:${headId}:`;
25417
+ return storage.getItem(`${prefix}${key}`);
25418
+ }
25419
+ };
25420
+ var setToStorage = (key, val, orgId, headId) => {
25421
+ if (typeof storage !== "undefined") {
25422
+ const prefix = `chat:${location}:${orgId}:${headId}:`;
25423
+ storage.setItem(`${prefix}${key}`, val);
25424
+ return;
25425
+ }
25426
+ };
25427
+
25428
+ // src/utils/microphone.ts
25429
+ var EXPIRATION_OFFSET = 5 * 60 * 1e3;
25430
+ var defaultElevenLabsOptions = {
25431
+ vadSilenceThresholdSecs: 1.5,
25432
+ noiseSuppression: true,
25433
+ vadThreshold: 0.4,
25434
+ minSpeechDurationMs: 100,
25435
+ minSilenceDurationMs: 100,
25436
+ disableDynamicSpeechRecognition: false
25437
+ };
25438
+ function getLanguageCode(languageTag) {
25439
+ if (!languageTag || typeof languageTag !== "string") {
25440
+ throw new Error("Invalid language tag");
25441
+ }
25442
+ const parts = languageTag.trim().split("-");
25443
+ const languageCode = parts[0].toLowerCase();
25444
+ if (!/^[a-z]{2,3}$/.test(languageCode)) {
25445
+ throw new Error("Invalid ISO 639 language code format");
25446
+ }
25447
+ return languageCode;
25448
+ }
25449
+
25409
25450
  // src/modules/microphone.ts
25410
25451
  var Microphone = class _Microphone {
25411
25452
  constructor(provider, options, elevenLabsOptions, user, headInfo, microphoneAccess, token, sendMessage) {
@@ -25499,13 +25540,33 @@ var Microphone = class _Microphone {
25499
25540
  const sdk = speechSdk;
25500
25541
  let SpeechConfig, AudioConfig, SpeechRecognizer, ResultReason, PhraseListGrammar;
25501
25542
  if (sdk.SpeechConfig) {
25502
- ({ SpeechConfig, AudioConfig, SpeechRecognizer, ResultReason, PhraseListGrammar } = sdk);
25543
+ ({
25544
+ SpeechConfig,
25545
+ AudioConfig,
25546
+ SpeechRecognizer,
25547
+ ResultReason,
25548
+ PhraseListGrammar
25549
+ } = sdk);
25503
25550
  } else if ((_a = sdk.default) == null ? void 0 : _a.SpeechConfig) {
25504
- ({ SpeechConfig, AudioConfig, SpeechRecognizer, ResultReason, PhraseListGrammar } = sdk.default);
25551
+ ({
25552
+ SpeechConfig,
25553
+ AudioConfig,
25554
+ SpeechRecognizer,
25555
+ ResultReason,
25556
+ PhraseListGrammar
25557
+ } = sdk.default);
25505
25558
  } else if ((_b = sdk.SpeechSDK) == null ? void 0 : _b.SpeechConfig) {
25506
- ({ SpeechConfig, AudioConfig, SpeechRecognizer, ResultReason, PhraseListGrammar } = sdk.SpeechSDK);
25559
+ ({
25560
+ SpeechConfig,
25561
+ AudioConfig,
25562
+ SpeechRecognizer,
25563
+ ResultReason,
25564
+ PhraseListGrammar
25565
+ } = sdk.SpeechSDK);
25507
25566
  } else {
25508
- throw new Error("Azure Speech SDK does not expose expected symbols. Ensure microsoft-cognitiveservices-speech-sdk is installed.");
25567
+ throw new Error(
25568
+ "Azure Speech SDK does not expose expected symbols. Ensure microsoft-cognitiveservices-speech-sdk is installed."
25569
+ );
25509
25570
  }
25510
25571
  const speechConfig = SpeechConfig.fromAuthorizationToken(
25511
25572
  this.tokenObj.token,
@@ -25528,6 +25589,12 @@ var Microphone = class _Microphone {
25528
25589
  });
25529
25590
  }
25530
25591
  };
25592
+ this.recognizer.recognizing = (_3, event) => {
25593
+ if (event.result.text.length < 2) return;
25594
+ this.options.onMicrophonePartialSpeechRecognitionResult({
25595
+ transcript: event.result.text
25596
+ });
25597
+ };
25531
25598
  this.recognizer.startContinuousRecognitionAsync(
25532
25599
  () => {
25533
25600
  this.micBeep.play();
@@ -25565,11 +25632,10 @@ var Microphone = class _Microphone {
25565
25632
  generatedAt: Date.now()
25566
25633
  };
25567
25634
  }
25568
- this.connection = N.connect({
25635
+ const params = {
25569
25636
  token: this.tokenObj.token,
25570
25637
  modelId: "scribe_v2_realtime",
25571
25638
  includeTimestamps: false,
25572
- // languageCode: getLanguageCode(this.headInfo.lang_speech_recognition),
25573
25639
  microphone: {
25574
25640
  echoCancellation: true,
25575
25641
  noiseSuppression: this.elevenLabsOptions.noiseSuppression
@@ -25579,7 +25645,13 @@ var Microphone = class _Microphone {
25579
25645
  vadThreshold: this.elevenLabsOptions.vadThreshold,
25580
25646
  minSpeechDurationMs: this.elevenLabsOptions.minSpeechDurationMs,
25581
25647
  minSilenceDurationMs: this.elevenLabsOptions.minSilenceDurationMs
25582
- });
25648
+ };
25649
+ if (this.elevenLabsOptions.disableDynamicSpeechRecognition) {
25650
+ params.languageCode = getLanguageCode(
25651
+ this.headInfo.lang_speech_recognition || "en-US"
25652
+ );
25653
+ }
25654
+ this.connection = N.connect(params);
25583
25655
  this.connection.on(P.SESSION_STARTED, () => {
25584
25656
  this.micBeep.play();
25585
25657
  this.updateMicrophoneStatus("ON");
@@ -25588,6 +25660,12 @@ var Microphone = class _Microphone {
25588
25660
  if (data.text.length < 2 || data.text[0] === "(") return;
25589
25661
  this.handleRecognitionResult(data.text);
25590
25662
  });
25663
+ this.connection.on(P.PARTIAL_TRANSCRIPT, (data) => {
25664
+ if (data.text.length < 2 || data.text[0] === "(") return;
25665
+ this.options.onMicrophonePartialSpeechRecognitionResult({
25666
+ transcript: data.text
25667
+ });
25668
+ });
25591
25669
  this.connection.on(P.ERROR, (error) => {
25592
25670
  console.error("Error:", error);
25593
25671
  this.options.onMicrophoneError({
@@ -25769,23 +25847,6 @@ var SyncController = class {
25769
25847
  }
25770
25848
  };
25771
25849
 
25772
- // src/utils/storage.ts
25773
- var storage = window.localStorage;
25774
- var location = window.location.origin + window.location.pathname;
25775
- var getFromStorage = (key, orgId, headId) => {
25776
- if (typeof storage !== "undefined") {
25777
- const prefix = `chat:${location}:${orgId}:${headId}:`;
25778
- return storage.getItem(`${prefix}${key}`);
25779
- }
25780
- };
25781
- var setToStorage = (key, val, orgId, headId) => {
25782
- if (typeof storage !== "undefined") {
25783
- const prefix = `chat:${location}:${orgId}:${headId}:`;
25784
- storage.setItem(`${prefix}${key}`, val);
25785
- return;
25786
- }
25787
- };
25788
-
25789
25850
  // src/modules/user.ts
25790
25851
  var User = class _User {
25791
25852
  constructor(accessToken, tokenType, id, username, password, orgId, headId, apiBase) {
@@ -26173,7 +26234,7 @@ var Vp8VideoOutput = class _Vp8VideoOutput {
26173
26234
  cancelAnimationFrame(this.animationFrameId);
26174
26235
  this.animationFrameId = null;
26175
26236
  }
26176
- this.frameBuffer = [];
26237
+ this.frameBuffer.splice(0);
26177
26238
  this.isProcessingFrame = false;
26178
26239
  this.currentSequenceId = 0;
26179
26240
  this.startTime = 0;
@@ -26181,35 +26242,8 @@ var Vp8VideoOutput = class _Vp8VideoOutput {
26181
26242
  count: 0,
26182
26243
  lastTime: 0
26183
26244
  };
26184
- if (this.decoder && this.decoder.state === "configured") {
26185
- try {
26186
- this.decoder.flush();
26187
- } catch (error) {
26188
- console.warn("Error flushing decoder:", error);
26189
- }
26190
- }
26191
- this.clearCanvas();
26192
26245
  this.state = "ready" /* READY */;
26193
- }
26194
- interrupt(fadeOut = false) {
26195
- if (this.state === "destroyed" /* DESTROYED */) return;
26196
- this.state = "interrupted" /* INTERRUPTED */;
26197
- this.frameBuffer = [];
26198
- this.isProcessingFrame = false;
26199
- if (this.decoder && this.decoder.state === "configured") {
26200
- try {
26201
- this.decoder.flush();
26202
- } catch (error) {
26203
- console.warn("Error flushing decoder:", error);
26204
- }
26205
- }
26206
- if (fadeOut) {
26207
- this.fadeOutCanvas().then(() => {
26208
- this.clearCanvas();
26209
- });
26210
- } else {
26211
- this.clearCanvas();
26212
- }
26246
+ return;
26213
26247
  }
26214
26248
  destroy() {
26215
26249
  if (this.state === "destroyed" /* DESTROYED */) return;
@@ -26315,22 +26349,6 @@ var Vp8VideoOutput = class _Vp8VideoOutput {
26315
26349
  clearCanvas() {
26316
26350
  this.ctx.clearRect(0, 0, this.canvas.width, this.canvas.height);
26317
26351
  }
26318
- async fadeOutCanvas() {
26319
- return new Promise((resolve) => {
26320
- let opacity = 1;
26321
- const fadeStep = () => {
26322
- opacity -= 0.05;
26323
- this.canvas.style.opacity = opacity.toString();
26324
- if (opacity <= 0) {
26325
- this.canvas.style.opacity = "1";
26326
- resolve();
26327
- } else {
26328
- requestAnimationFrame(fadeStep);
26329
- }
26330
- };
26331
- fadeStep();
26332
- });
26333
- }
26334
26352
  setupContextLossHandling() {
26335
26353
  this.handleContextLoss = (event) => {
26336
26354
  event.preventDefault();
@@ -26365,6 +26383,7 @@ var VideoOutput = class _VideoOutput {
26365
26383
  this.bufferCheckAnimationId = null;
26366
26384
  this.lastBufferCheckTime = 0;
26367
26385
  this.sessionStarted = false;
26386
+ this.isRestoringIdleVideo = false;
26368
26387
  //cached video
26369
26388
  this.isShowingCachedVideo = false;
26370
26389
  this.onSpeakingStartCallback = null;
@@ -26468,7 +26487,7 @@ var VideoOutput = class _VideoOutput {
26468
26487
  this.lastBufferCheckTime = 0;
26469
26488
  const checkBuffer = (timestamp) => {
26470
26489
  if (timestamp - this.lastBufferCheckTime >= 100) {
26471
- if (this.sessionStarted && this.videoOutput.getBufferLength() > 0) {
26490
+ if (this.sessionStarted && this.videoOutput.getBufferLength() > 0 && !this.isRestoringIdleVideo) {
26472
26491
  this.hideIdleVideoBeforeStream();
26473
26492
  } else if (!this.videoOutput.getStreamingStatus() && this.videoOutput.getBufferLength() === 0) {
26474
26493
  this.showIdleVideoAfterStream();
@@ -26512,7 +26531,20 @@ var VideoOutput = class _VideoOutput {
26512
26531
  { once: true }
26513
26532
  );
26514
26533
  } catch (error) {
26515
- console.error("Failed to play cached video:", error);
26534
+ this.cachedVideo.style.opacity = "0";
26535
+ this.isShowingCachedVideo = false;
26536
+ await this.showIdleVideo();
26537
+ }
26538
+ }
26539
+ async stopCachedVideo() {
26540
+ if (!this.isShowingCachedVideo || !this.cachedVideo || this.isTransitioning)
26541
+ return;
26542
+ this.isShowingCachedVideo = false;
26543
+ try {
26544
+ await this.crossfadeFromCachedToIdle();
26545
+ await this.cleanupCachedVideo();
26546
+ await this.showIdleVideo();
26547
+ } catch (error) {
26516
26548
  this.cachedVideo.style.opacity = "0";
26517
26549
  this.isShowingCachedVideo = false;
26518
26550
  await this.showIdleVideo();
@@ -26587,6 +26619,9 @@ var VideoOutput = class _VideoOutput {
26587
26619
  getStreamingStatus() {
26588
26620
  return this.videoOutput.getStreamingStatus();
26589
26621
  }
26622
+ isPlayingCachedVideo() {
26623
+ return this.isShowingCachedVideo;
26624
+ }
26590
26625
  async showIdleVideo() {
26591
26626
  var _a, _b;
26592
26627
  if (!this.idleVideo || this.isShowingIdleVideo) return;
@@ -26623,15 +26658,16 @@ var VideoOutput = class _VideoOutput {
26623
26658
  return this.videoOutput.addFrame(uint8Array, timeStamp, isKeyframe);
26624
26659
  }
26625
26660
  clearFrame() {
26626
- this.showIdleVideo();
26627
- return this.videoOutput.clearFrame();
26661
+ this.isRestoringIdleVideo = true;
26662
+ this.showIdleVideoAfterStream();
26663
+ this.videoOutput.clearFrame();
26628
26664
  }
26629
26665
  async toggleStream(status) {
26666
+ if (status) {
26667
+ this.isRestoringIdleVideo = false;
26668
+ }
26630
26669
  return this.videoOutput.toggleStream(status);
26631
26670
  }
26632
- interrupt(fadeOut) {
26633
- this.videoOutput.interrupt(fadeOut);
26634
- }
26635
26671
  destroy() {
26636
26672
  if (this.bufferCheckAnimationId) {
26637
26673
  cancelAnimationFrame(this.bufferCheckAnimationId);
@@ -26655,16 +26691,6 @@ var VideoOutput = class _VideoOutput {
26655
26691
  }
26656
26692
  };
26657
26693
 
26658
- // src/utils/microphone.ts
26659
- var EXPIRATION_OFFSET = 5 * 60 * 1e3;
26660
- var defaultElevenLabsOptions = {
26661
- vadSilenceThresholdSecs: 1.5,
26662
- noiseSuppression: true,
26663
- vadThreshold: 0.4,
26664
- minSpeechDurationMs: 100,
26665
- minSilenceDurationMs: 100
26666
- };
26667
-
26668
26694
  // src/utils/sync.ts
26669
26695
  var DEFAULT_SYNC_CONFIG = {
26670
26696
  tolerance: 40,
@@ -26705,6 +26731,7 @@ var Conversation = class _Conversation {
26705
26731
  this.status = "connecting";
26706
26732
  this.volume = 1;
26707
26733
  this.sessionStarted = false;
26734
+ this.isStoppingLastResponse = false;
26708
26735
  this.messageCounter = 0;
26709
26736
  this.monitor = null;
26710
26737
  this.microphone = null;
@@ -26723,7 +26750,7 @@ var Conversation = class _Conversation {
26723
26750
  });
26724
26751
  if ("suggestions" in event) {
26725
26752
  const suggestions = event.suggestions || [];
26726
- if (suggestions.length > 1) {
26753
+ if (suggestions.length > 0) {
26727
26754
  this.suggestionsQueue = suggestions;
26728
26755
  }
26729
26756
  }
@@ -26733,7 +26760,8 @@ var Conversation = class _Conversation {
26733
26760
  if (!errorType || ![
26734
26761
  "resource_exhausted",
26735
26762
  "deadline_exceeded",
26736
- "inactivity_timeout"
26763
+ "inactivity_timeout",
26764
+ "canceled"
26737
26765
  ].includes(errorType)) {
26738
26766
  this.options.onError({
26739
26767
  message: "A connection error occurred. Please try again.",
@@ -26742,6 +26770,9 @@ var Conversation = class _Conversation {
26742
26770
  });
26743
26771
  return;
26744
26772
  }
26773
+ if (errorType === "canceled") {
26774
+ return;
26775
+ }
26745
26776
  if (errorType === "resource_exhausted") {
26746
26777
  if (this.avController.isPlaying) {
26747
26778
  this.options.onError({
@@ -26817,6 +26848,15 @@ var Conversation = class _Conversation {
26817
26848
  if (this.avController.isStoppingAV) return;
26818
26849
  this.handleVideoFrame(event);
26819
26850
  }
26851
+ if (event.type === "stop") {
26852
+ this.videoOutput.clearFrame();
26853
+ this.audioOutput.worklet.port.postMessage({
26854
+ type: "stopPlayback"
26855
+ });
26856
+ this.options.onStoppingEnd();
26857
+ this.isStoppingLastResponse = false;
26858
+ return;
26859
+ }
26820
26860
  this.avController.playAudioVideo();
26821
26861
  };
26822
26862
  this.handleVideoFrame = async (event) => {
@@ -26942,7 +26982,7 @@ var Conversation = class _Conversation {
26942
26982
  this.options.onConnect({
26943
26983
  userId: connection.userId,
26944
26984
  headInfo: {
26945
- name: this.headInfo.name,
26985
+ name: this.headInfo.alias,
26946
26986
  phrases: this.headInfo.phrases,
26947
26987
  language: this.headInfo.language,
26948
26988
  avatar: this.headInfo.avatarSrc
@@ -26971,6 +27011,8 @@ var Conversation = class _Conversation {
26971
27011
  onMicrophoneStatusChange: () => {
26972
27012
  },
26973
27013
  onMicrophoneSpeechRecognitionResult: () => {
27014
+ },
27015
+ onMicrophonePartialSpeechRecognitionResult: () => {
26974
27016
  }
26975
27017
  },
26976
27018
  elevenLabsOptions: defaultElevenLabsOptions,
@@ -26992,6 +27034,8 @@ var Conversation = class _Conversation {
26992
27034
  },
26993
27035
  onStoppingEnd: () => {
26994
27036
  },
27037
+ onStoppingStart: () => {
27038
+ },
26995
27039
  onTimeout: () => {
26996
27040
  },
26997
27041
  onTimeoutWarning: () => {
@@ -27163,23 +27207,47 @@ var Conversation = class _Conversation {
27163
27207
  }
27164
27208
  }
27165
27209
  /**
27166
- * To stop current response, we'll do the following:
27167
- * 1. set a flag that'll prevent adding new audio & video events to their respective queues
27168
- * 2. clear video queue & switch state to idle
27169
- * 3. clear audio queue
27170
- * 4. send an event if all expected response from BE has been received. If not, FE will keep status in 'stopping' mode
27210
+ * To stop streaming response, we'll send a stop message to the BE. The BE will then stop sending audio and video frames, which will naturally end the response. This is more efficient and leads to a better user experience as it allows for a smoother transition when stopping the response.
27211
+ *
27212
+ * To stop cached response, we just stop the video and fade to idle.
27171
27213
  */
27172
- // public stopCurrentResponse() {
27173
- // this.avController.toggleStoppingVideo(true);
27174
- // this.videoOutput.clearFrame();
27175
- // this.audioOutput.worklet.port.postMessage({
27176
- // type: "stopPlayback",
27177
- // });
27178
- // if (!this.videoOutput.getStreamingStatus()) {
27179
- // this.options.onStoppingEnd();
27180
- // this.avController.toggleStoppingVideo(false);
27181
- // }
27182
- // }
27214
+ async stopResponse() {
27215
+ if (this.isStoppingLastResponse) {
27216
+ return;
27217
+ }
27218
+ this.isStoppingLastResponse = true;
27219
+ this.options.onStoppingStart();
27220
+ if (!this.connection) {
27221
+ this.isStoppingLastResponse = false;
27222
+ this.options.onStoppingEnd();
27223
+ throw new Error("Connection not established");
27224
+ }
27225
+ if (this.videoOutput.isPlayingCachedVideo()) {
27226
+ await this.videoOutput.stopCachedVideo();
27227
+ this.isStoppingLastResponse = false;
27228
+ this.options.onStoppingEnd();
27229
+ return;
27230
+ }
27231
+ if (!this.avController.isPlaying) {
27232
+ this.isStoppingLastResponse = false;
27233
+ this.options.onStoppingEnd();
27234
+ return;
27235
+ }
27236
+ const sessionId = `${this.user.id}::${this.user.orgId}::${this.user.headId}::${this.user.sessionId.toString().padStart(5, "0")}`;
27237
+ const message = {
27238
+ id: this.messageCounter,
27239
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
27240
+ speaker: "user",
27241
+ text: "",
27242
+ isSent: false,
27243
+ user_id: this.user.id,
27244
+ username: this.user.username,
27245
+ event: "stop" /* STOP */,
27246
+ visible: true,
27247
+ session_id: sessionId
27248
+ };
27249
+ this.connection.sendMessage(message);
27250
+ }
27183
27251
  async toggleMute() {
27184
27252
  this.volume = this.volume === 0 ? 1 : 0;
27185
27253
  this.audioOutput.toggleMute();