@drawdream/livespeech 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -7,7 +7,7 @@ var Region = {
7
7
  };
8
8
  var REGION_ENDPOINTS = {
9
9
  "ap-northeast-2": "wss://talk.drawdream.co.kr",
10
- "us-west-2": "wss://talk..drawdream.ca"
10
+ "us-west-2": "wss://talk.drawdream.ca"
11
11
  // Coming soon
12
12
  };
13
13
  function getEndpointForRegion(region) {
@@ -293,22 +293,13 @@ var WebSocketConnection = class {
293
293
  /**
294
294
  * Handle incoming message
295
295
  */
296
- handleMessage(data, onFirstConnect) {
296
+ handleMessage(data) {
297
297
  const message = parseServerMessage(data);
298
298
  if (!message) {
299
299
  this.logger.warn("Invalid message received:", data);
300
300
  return;
301
301
  }
302
302
  this.logger.debug("Received message:", message.type);
303
- if (message.type === "connected") {
304
- this.connectionId = message.connectionId;
305
- this.state = "connected";
306
- this.retryController.reset();
307
- this.startPingInterval();
308
- this.events.onOpen?.(message.connectionId);
309
- onFirstConnect?.();
310
- return;
311
- }
312
303
  if (message.type === "pong") {
313
304
  this.logger.debug("Pong received");
314
305
  return;
@@ -574,24 +565,17 @@ var CONFIG_DEFAULTS = {
574
565
  reconnectDelay: 1e3,
575
566
  debug: false
576
567
  };
577
- var SESSION_DEFAULTS = {
578
- voiceId: "en-US-Standard-A",
579
- languageCode: "en-US",
580
- inputFormat: "pcm16",
581
- outputFormat: "pcm16",
582
- sampleRate: 16e3
583
- };
584
568
  var LiveSpeechClient = class {
585
569
  config;
586
570
  connection;
587
571
  audioEncoder;
588
572
  logger;
589
573
  sessionId = null;
590
- sessionConfig = null;
574
+ isStreaming = false;
591
575
  // Event listeners using a simple map
592
576
  eventListeners = /* @__PURE__ */ new Map();
593
577
  // Simplified handlers
594
- transcriptHandler = null;
578
+ userTranscriptHandler = null;
595
579
  responseHandler = null;
596
580
  audioHandler = null;
597
581
  errorHandler = null;
@@ -653,6 +637,12 @@ var LiveSpeechClient = class {
653
637
  get hasActiveSession() {
654
638
  return this.sessionId !== null;
655
639
  }
640
+ /**
641
+ * Check if audio streaming is active
642
+ */
643
+ get isAudioStreaming() {
644
+ return this.isStreaming;
645
+ }
656
646
  /**
657
647
  * Connect to the server
658
648
  */
@@ -666,7 +656,7 @@ var LiveSpeechClient = class {
666
656
  disconnect() {
667
657
  this.logger.info("Disconnecting...");
668
658
  this.sessionId = null;
669
- this.sessionConfig = null;
659
+ this.isStreaming = false;
670
660
  this.connection.disconnect();
671
661
  }
672
662
  /**
@@ -679,16 +669,6 @@ var LiveSpeechClient = class {
679
669
  if (this.sessionId) {
680
670
  throw new Error("Session already active. Call endSession() first.");
681
671
  }
682
- const resolvedConfig = {
683
- prePrompt: config.prePrompt,
684
- voiceId: config.voiceId ?? SESSION_DEFAULTS.voiceId,
685
- languageCode: config.languageCode ?? SESSION_DEFAULTS.languageCode,
686
- inputFormat: config.inputFormat ?? SESSION_DEFAULTS.inputFormat,
687
- outputFormat: config.outputFormat ?? SESSION_DEFAULTS.outputFormat,
688
- sampleRate: config.sampleRate ?? SESSION_DEFAULTS.sampleRate,
689
- metadata: config.metadata ?? {}
690
- };
691
- this.sessionConfig = resolvedConfig;
692
672
  this.logger.info("Starting session...");
693
673
  return new Promise((resolve, reject) => {
694
674
  const onSessionStarted = (event) => {
@@ -705,16 +685,17 @@ var LiveSpeechClient = class {
705
685
  };
706
686
  this.on("sessionStarted", onSessionStarted);
707
687
  this.on("error", onError);
708
- this.connection.send({
709
- action: "startSession",
710
- prePrompt: resolvedConfig.prePrompt,
711
- voiceId: resolvedConfig.voiceId,
712
- languageCode: resolvedConfig.languageCode,
713
- inputFormat: resolvedConfig.inputFormat,
714
- outputFormat: resolvedConfig.outputFormat,
715
- sampleRate: resolvedConfig.sampleRate,
716
- metadata: resolvedConfig.metadata
717
- });
688
+ const startMessage = {
689
+ action: "startSession"
690
+ };
691
+ if (config?.prePrompt) {
692
+ startMessage.prePrompt = config.prePrompt;
693
+ }
694
+ if (config?.language) {
695
+ startMessage.language = config.language;
696
+ }
697
+ startMessage.pipelineMode = config?.pipelineMode ?? "live";
698
+ this.connection.send(startMessage);
718
699
  });
719
700
  }
720
701
  /**
@@ -726,6 +707,9 @@ var LiveSpeechClient = class {
726
707
  return;
727
708
  }
728
709
  this.logger.info("Ending session...");
710
+ if (this.isStreaming) {
711
+ this.audioEnd();
712
+ }
729
713
  return new Promise((resolve) => {
730
714
  const onSessionEnded = () => {
731
715
  this.off("sessionEnded", onSessionEnded);
@@ -736,28 +720,49 @@ var LiveSpeechClient = class {
736
720
  });
737
721
  }
738
722
  /**
739
- * Send audio data
723
+ * Start audio streaming session
740
724
  */
741
- sendAudio(data, options) {
725
+ audioStart() {
742
726
  if (!this.isConnected) {
743
727
  throw new Error("Not connected");
744
728
  }
745
729
  if (!this.sessionId) {
746
730
  throw new Error("No active session. Call startSession() first.");
747
731
  }
732
+ if (this.isStreaming) {
733
+ throw new Error("Already streaming. Call audioEnd() first.");
734
+ }
735
+ this.logger.info("Starting audio stream...");
736
+ this.connection.send({ action: "audioStart" });
737
+ this.isStreaming = true;
738
+ }
739
+ /**
740
+ * Send audio chunk (PCM16 base64 encoded)
741
+ */
742
+ sendAudioChunk(data) {
743
+ if (!this.isConnected) {
744
+ throw new Error("Not connected");
745
+ }
746
+ if (!this.isStreaming) {
747
+ throw new Error("Not streaming. Call audioStart() first.");
748
+ }
748
749
  const base64Data = this.audioEncoder.encode(data);
749
- const format = options?.format ?? this.sessionConfig?.inputFormat ?? SESSION_DEFAULTS.inputFormat;
750
- const sampleRate = this.sessionConfig?.sampleRate ?? SESSION_DEFAULTS.sampleRate;
751
- const audioMessage = {
752
- action: "audio",
753
- data: base64Data,
754
- format,
755
- sampleRate
756
- };
757
- if (options?.isFinal !== void 0) {
758
- audioMessage.isFinal = options.isFinal;
750
+ this.connection.send({
751
+ action: "audioChunk",
752
+ data: base64Data
753
+ });
754
+ }
755
+ /**
756
+ * End audio streaming session
757
+ */
758
+ audioEnd() {
759
+ if (!this.isStreaming) {
760
+ this.logger.warn("Not streaming");
761
+ return;
759
762
  }
760
- this.connection.send(audioMessage);
763
+ this.logger.info("Ending audio stream...");
764
+ this.connection.send({ action: "audioEnd" });
765
+ this.isStreaming = false;
761
766
  }
762
767
  // ==================== Event System ====================
763
768
  /**
@@ -779,13 +784,7 @@ var LiveSpeechClient = class {
779
784
  }
780
785
  }
781
786
  /**
782
- * Set transcript handler (simplified)
783
- */
784
- setTranscriptHandler(handler) {
785
- this.transcriptHandler = handler;
786
- }
787
- /**
788
- * Set response handler (simplified)
787
+ * Set response handler
789
788
  */
790
789
  setResponseHandler(handler) {
791
790
  this.responseHandler = handler;
@@ -796,6 +795,12 @@ var LiveSpeechClient = class {
796
795
  setAudioHandler(handler) {
797
796
  this.audioHandler = handler;
798
797
  }
798
+ /**
799
+ * Set user transcript handler
800
+ */
801
+ setUserTranscriptHandler(handler) {
802
+ this.userTranscriptHandler = handler;
803
+ }
799
804
  /**
800
805
  * Set error handler (simplified)
801
806
  */
@@ -825,7 +830,7 @@ var LiveSpeechClient = class {
825
830
  }
826
831
  handleDisconnected(code, _reason) {
827
832
  this.sessionId = null;
828
- this.sessionConfig = null;
833
+ this.isStreaming = false;
829
834
  const event = {
830
835
  type: "disconnected",
831
836
  reason: code === 1e3 ? "normal" : "error",
@@ -867,25 +872,19 @@ var LiveSpeechClient = class {
867
872
  break;
868
873
  case "sessionEnded":
869
874
  this.sessionId = null;
870
- this.sessionConfig = null;
875
+ this.isStreaming = false;
871
876
  this.emit("sessionEnded", {
872
877
  type: "sessionEnded",
873
878
  sessionId: message.sessionId,
874
879
  timestamp: message.timestamp
875
880
  });
876
881
  break;
877
- case "transcript": {
878
- const transcriptEvent = {
879
- type: "transcript",
880
- text: message.text,
881
- isFinal: message.isFinal,
882
+ case "ready": {
883
+ const readyEvent = {
884
+ type: "ready",
882
885
  timestamp: message.timestamp
883
886
  };
884
- if (message.confidence !== void 0) {
885
- transcriptEvent.confidence = message.confidence;
886
- }
887
- this.emit("transcript", transcriptEvent);
888
- this.transcriptHandler?.(message.text, message.isFinal);
887
+ this.emit("ready", readyEvent);
889
888
  break;
890
889
  }
891
890
  case "response": {
@@ -912,8 +911,26 @@ var LiveSpeechClient = class {
912
911
  this.audioHandler?.(audioData);
913
912
  break;
914
913
  }
914
+ case "userTranscript": {
915
+ const userTranscriptEvent = {
916
+ type: "userTranscript",
917
+ text: message.text,
918
+ timestamp: message.timestamp
919
+ };
920
+ this.emit("userTranscript", userTranscriptEvent);
921
+ this.userTranscriptHandler?.(message.text);
922
+ break;
923
+ }
924
+ case "turnComplete": {
925
+ const turnCompleteEvent = {
926
+ type: "turnComplete",
927
+ timestamp: message.timestamp
928
+ };
929
+ this.emit("turnComplete", turnCompleteEvent);
930
+ break;
931
+ }
915
932
  case "error":
916
- this.handleError(message.code, message.message, message.details);
933
+ this.handleError(message.code, message.message);
917
934
  break;
918
935
  default:
919
936
  this.logger.warn("Unknown message type:", message.type);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@drawdream/livespeech",
3
- "version": "0.1.0",
3
+ "version": "0.1.2",
4
4
  "description": "Real-time speech-to-speech AI conversation SDK",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",