@volley/recognition-client-sdk 0.1.782 → 0.1.799

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -159,9 +159,11 @@ declare enum AmazonNovaSonicModel {
159
159
  }
160
160
  /**
161
161
  * Self-serve vLLM batch transcription models
162
- * Backed by recognition-inference / RunPod `/transcribe`
162
+ * Backed by recognition-inference / RunPod `/ws/transcribe`
163
163
  */
164
164
  declare enum SelfServeVllmModel {
165
+ QWEN3_ASR_0_6B = "qwen3-asr-0.6b",
166
+ QWEN3_ASR_0_6B_WOF_LETTER = "qwen3-asr-0.6b-wof-letter",
165
167
  QWEN3_ASR_1_7B = "qwen3-asr-1.7b"
166
168
  }
167
169
  /**
@@ -651,6 +653,20 @@ declare namespace AudioEncoding {
651
653
  * @returns true if valid encoding name
652
654
  */
653
655
  function isNameValid(nameStr: string): boolean;
656
+ /**
657
+ * Coerce a possibly-stringly-typed encoding value into the AudioEncoding enum.
658
+ *
659
+ * - enum / number → returned as-is (already AudioEncoding-shaped)
660
+ * - string (case-insensitive, e.g. 'linear16', 'LINEAR16') → converted via {@link fromName}.
661
+ * Invokes `onStringInput` with a warning message so callers can route it
662
+ * to their preferred logger.
663
+ * - invalid string → throws (preferred over silent fallback so typos surface)
664
+ * - undefined → defaults to {@link AudioEncoding.LINEAR16}
665
+ *
666
+ * Always normalize at the SDK / server boundary so downstream code can rely
667
+ * on a numeric AudioEncoding (the wire-level binary frame header is uint32).
668
+ */
669
+ function coerce(value: AudioEncoding | string | number | undefined, onStringInput?: (warning: string) => void): AudioEncoding;
654
670
  }
655
671
  /**
656
672
  * Common sample rates (in Hz)
@@ -906,10 +922,10 @@ interface ASRRequestConfig {
906
922
  * doesn't respond with is_final=true after stopRecording().
907
923
  *
908
924
  * - aggressive: 100ms - fast response, may cut off slow providers
909
- * - balanced: 500ms - current default, good for most cases
910
- * - conservative: 1000ms - wait longer for complex utterances
925
+ * - balanced: 500ms - good for most cases
926
+ * - conservative: 1000ms - current default, wait longer for complex utterances
911
927
  *
912
- * @default 'balanced'
928
+ * @default 'conservative'
913
929
  * @see FinalTranscriptStability enum for detailed descriptions
914
930
  */
915
931
  finalTranscriptStability?: FinalTranscriptStability | string;
@@ -1404,6 +1420,23 @@ interface IRecognitionClient {
1404
1420
  * @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob
1405
1421
  */
1406
1422
  sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
1423
+ /**
1424
+ * Send PCM16 mono audio captured at `sourceSampleRate`; the SDK
1425
+ * downsamples to the session's target rate (currently 16 kHz, set by the
1426
+ * server validator) before transmitting.
1427
+ *
1428
+ * Use this when your capture pipeline produces audio at the system's
1429
+ * native rate (browser `AudioContext` is typically 44.1 kHz or 48 kHz).
1430
+ * If your audio is already at the target rate, prefer `sendAudio()` to
1431
+ * skip the resample step.
1432
+ *
1433
+ * Audio must be signed 16-bit little-endian PCM, mono. Stereo must be
1434
+ * mixed to mono by the caller.
1435
+ *
1436
+ * @param audioData - PCM16 mono audio at `sourceSampleRate`.
1437
+ * @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
1438
+ */
1439
+ sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
1407
1440
  /**
1408
1441
  * Stop recording and wait for final transcript
1409
1442
  * The server will close the connection after sending the final transcript.
@@ -1603,6 +1636,29 @@ declare class RealTimeTwoWayWebSocketRecognitionClient extends WebSocketAudioCli
1603
1636
  */
1604
1637
  private connectWithRetry;
1605
1638
  sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
1639
+ /**
1640
+ * Send PCM16 mono audio captured at any sample rate. The SDK downsamples
1641
+ * to the session's target rate (currently 16 kHz per server validator)
1642
+ * before sending.
1643
+ *
1644
+ * Use this when your capture pipeline produces audio at the system's
1645
+ * native rate — `AudioContext` defaults to 44.1 kHz or 48 kHz on most
1646
+ * desktop/mobile hardware — and you don't want to bring your own
1647
+ * resampler. If your audio is already at the target rate, prefer
1648
+ * `sendAudio()` to skip the resample step.
1649
+ *
1650
+ * Algorithm: box-filter averaging (see audio-resampler.ts). Cheap, no
1651
+ * dependencies, has a built-in low-pass effect so aliasing stays out of
1652
+ * the speech band. Suitable for ASR; not a substitute for a high-quality
1653
+ * resampler if you're doing music or full-fidelity processing.
1654
+ *
1655
+ * Audio must be signed 16-bit little-endian PCM, mono. Stereo must be
1656
+ * mixed to mono by the caller.
1657
+ *
1658
+ * @param audioData - PCM16 mono audio at `sourceSampleRate`.
1659
+ * @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
1660
+ */
1661
+ sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
1606
1662
  private sendAudioInternal;
1607
1663
  /**
1608
1664
  * Only active ehwne client is in READY state. otherwise it will return immediately.
@@ -159,9 +159,11 @@ declare enum AmazonNovaSonicModel {
159
159
  }
160
160
  /**
161
161
  * Self-serve vLLM batch transcription models
162
- * Backed by recognition-inference / RunPod `/transcribe`
162
+ * Backed by recognition-inference / RunPod `/ws/transcribe`
163
163
  */
164
164
  declare enum SelfServeVllmModel {
165
+ QWEN3_ASR_0_6B = "qwen3-asr-0.6b",
166
+ QWEN3_ASR_0_6B_WOF_LETTER = "qwen3-asr-0.6b-wof-letter",
165
167
  QWEN3_ASR_1_7B = "qwen3-asr-1.7b"
166
168
  }
167
169
  /**
@@ -660,6 +662,20 @@ declare namespace AudioEncoding {
660
662
  * @returns true if valid encoding name
661
663
  */
662
664
  function isNameValid(nameStr: string): boolean;
665
+ /**
666
+ * Coerce a possibly-stringly-typed encoding value into the AudioEncoding enum.
667
+ *
668
+ * - enum / number → returned as-is (already AudioEncoding-shaped)
669
+ * - string (case-insensitive, e.g. 'linear16', 'LINEAR16') → converted via {@link fromName}.
670
+ * Invokes `onStringInput` with a warning message so callers can route it
671
+ * to their preferred logger.
672
+ * - invalid string → throws (preferred over silent fallback so typos surface)
673
+ * - undefined → defaults to {@link AudioEncoding.LINEAR16}
674
+ *
675
+ * Always normalize at the SDK / server boundary so downstream code can rely
676
+ * on a numeric AudioEncoding (the wire-level binary frame header is uint32).
677
+ */
678
+ function coerce(value: AudioEncoding | string | number | undefined, onStringInput?: (warning: string) => void): AudioEncoding;
663
679
  }
664
680
  /**
665
681
  * Common sample rates (in Hz)
@@ -1636,10 +1652,10 @@ interface ASRRequestConfig {
1636
1652
  * doesn't respond with is_final=true after stopRecording().
1637
1653
  *
1638
1654
  * - aggressive: 100ms - fast response, may cut off slow providers
1639
- * - balanced: 500ms - current default, good for most cases
1640
- * - conservative: 1000ms - wait longer for complex utterances
1655
+ * - balanced: 500ms - good for most cases
1656
+ * - conservative: 1000ms - current default, wait longer for complex utterances
1641
1657
  *
1642
- * @default 'balanced'
1658
+ * @default 'conservative'
1643
1659
  * @see FinalTranscriptStability enum for detailed descriptions
1644
1660
  */
1645
1661
  finalTranscriptStability?: FinalTranscriptStability | string;
@@ -2173,6 +2189,23 @@ interface IRecognitionClient {
2173
2189
  * @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob
2174
2190
  */
2175
2191
  sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
2192
+ /**
2193
+ * Send PCM16 mono audio captured at `sourceSampleRate`; the SDK
2194
+ * downsamples to the session's target rate (currently 16 kHz, set by the
2195
+ * server validator) before transmitting.
2196
+ *
2197
+ * Use this when your capture pipeline produces audio at the system's
2198
+ * native rate (browser `AudioContext` is typically 44.1 kHz or 48 kHz).
2199
+ * If your audio is already at the target rate, prefer `sendAudio()` to
2200
+ * skip the resample step.
2201
+ *
2202
+ * Audio must be signed 16-bit little-endian PCM, mono. Stereo must be
2203
+ * mixed to mono by the caller.
2204
+ *
2205
+ * @param audioData - PCM16 mono audio at `sourceSampleRate`.
2206
+ * @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
2207
+ */
2208
+ sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
2176
2209
  /**
2177
2210
  * Stop recording and wait for final transcript
2178
2211
  * The server will close the connection after sending the final transcript.
@@ -2378,6 +2411,29 @@ declare class RealTimeTwoWayWebSocketRecognitionClient extends WebSocketAudioCli
2378
2411
  */
2379
2412
  private connectWithRetry;
2380
2413
  sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
2414
+ /**
2415
+ * Send PCM16 mono audio captured at any sample rate. The SDK downsamples
2416
+ * to the session's target rate (currently 16 kHz per server validator)
2417
+ * before sending.
2418
+ *
2419
+ * Use this when your capture pipeline produces audio at the system's
2420
+ * native rate — `AudioContext` defaults to 44.1 kHz or 48 kHz on most
2421
+ * desktop/mobile hardware — and you don't want to bring your own
2422
+ * resampler. If your audio is already at the target rate, prefer
2423
+ * `sendAudio()` to skip the resample step.
2424
+ *
2425
+ * Algorithm: box-filter averaging (see audio-resampler.ts). Cheap, no
2426
+ * dependencies, has a built-in low-pass effect so aliasing stays out of
2427
+ * the speech band. Suitable for ASR; not a substitute for a high-quality
2428
+ * resampler if you're doing music or full-fidelity processing.
2429
+ *
2430
+ * Audio must be signed 16-bit little-endian PCM, mono. Stereo must be
2431
+ * mixed to mono by the caller.
2432
+ *
2433
+ * @param audioData - PCM16 mono audio at `sourceSampleRate`.
2434
+ * @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
2435
+ */
2436
+ sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
2381
2437
  private sendAudioInternal;
2382
2438
  /**
2383
2439
  * Only active ehwne client is in READY state. otherwise it will return immediately.
@@ -2801,6 +2857,14 @@ interface ISimplifiedVGFRecognitionClient {
2801
2857
  * @param audioData - PCM audio data as ArrayBuffer, typed array, or Blob
2802
2858
  */
2803
2859
  sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
2860
+ /**
2861
+ * Send PCM16 mono audio captured at `sourceSampleRate`; the SDK
2862
+ * downsamples to the session's target rate before transmitting. Use
2863
+ * when capture is at the system's native rate (browser AudioContext is
2864
+ * typically 44.1 kHz or 48 kHz). Audio must be signed 16-bit
2865
+ * little-endian PCM, mono.
2866
+ */
2867
+ sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
2804
2868
  /**
2805
2869
  * Stop recording and wait for final transcription
2806
2870
  * @returns Promise that resolves when transcription is complete
@@ -2891,6 +2955,13 @@ declare class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognitio
2891
2955
  constructor(config: SimplifiedVGFClientConfig);
2892
2956
  connect(): Promise<void>;
2893
2957
  sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
2958
+ sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
2959
+ /**
2960
+ * Set VGF recording status to RECORDING on the first audio chunk.
2961
+ * Idempotent — subsequent calls are no-ops until disconnect/stop resets
2962
+ * `isRecordingAudio`.
2963
+ */
2964
+ private markRecordingStarted;
2894
2965
  stopRecording(): Promise<void>;
2895
2966
  stopAbnormally(): void;
2896
2967
  getAudioUtteranceId(): string;
package/dist/index.js CHANGED
@@ -3838,6 +3838,8 @@ var AmazonNovaSonicModel;
3838
3838
  })(AmazonNovaSonicModel || (AmazonNovaSonicModel = {}));
3839
3839
  var SelfServeVllmModel;
3840
3840
  (function(SelfServeVllmModel2) {
3841
+ SelfServeVllmModel2["QWEN3_ASR_0_6B"] = "qwen3-asr-0.6b";
3842
+ SelfServeVllmModel2["QWEN3_ASR_0_6B_WOF_LETTER"] = "qwen3-asr-0.6b-wof-letter";
3841
3843
  SelfServeVllmModel2["QWEN3_ASR_1_7B"] = "qwen3-asr-1.7b";
3842
3844
  })(SelfServeVllmModel || (SelfServeVllmModel = {}));
3843
3845
 
@@ -4642,6 +4644,21 @@ var AudioEncoding;
4642
4644
  return NAME_TO_ENUM.has(nameStr.toUpperCase());
4643
4645
  }
4644
4646
  AudioEncoding2.isNameValid = isNameValid;
4647
+ function coerce2(value, onStringInput) {
4648
+ if (value === void 0) {
4649
+ return AudioEncoding2.LINEAR16;
4650
+ }
4651
+ if (typeof value === "number") {
4652
+ return value;
4653
+ }
4654
+ const result = fromName(value);
4655
+ if (result === void 0) {
4656
+ throw new Error(`Invalid encoding string: '${value}'. Use AudioEncoding enum or one of: LINEAR16, OGG_OPUS, FLAC, MULAW, ALAW (case insensitive)`);
4657
+ }
4658
+ onStringInput?.(`encoding passed as string '${value}'; prefer AudioEncoding.${toName(result)} enum for type safety`);
4659
+ return result;
4660
+ }
4661
+ AudioEncoding2.coerce = coerce2;
4645
4662
  })(AudioEncoding || (AudioEncoding = {}));
4646
4663
  var PREFIX_AUDIO_ENCODING_OFFSET = 128;
4647
4664
  var SampleRate;
@@ -5412,6 +5429,37 @@ var MessageHandler = class {
5412
5429
  }
5413
5430
  };
5414
5431
 
5432
+ // src/utils/audio-resampler.ts
5433
+ function downsamplePcm16(input, srcRate, targetRate) {
5434
+ if (targetRate > srcRate) {
5435
+ throw new Error(
5436
+ `downsamplePcm16: cannot upsample from ${srcRate}Hz to ${targetRate}Hz; capture audio at \u2265 ${targetRate}Hz instead.`
5437
+ );
5438
+ }
5439
+ const buffer = ArrayBuffer.isView(input) ? input.buffer.slice(input.byteOffset, input.byteOffset + input.byteLength) : input;
5440
+ const src = new Int16Array(buffer);
5441
+ if (srcRate === targetRate || src.length === 0) {
5442
+ return src.slice().buffer;
5443
+ }
5444
+ const ratio = srcRate / targetRate;
5445
+ const dstLen = Math.floor(src.length / ratio);
5446
+ const dst = new Int16Array(dstLen);
5447
+ for (let i = 0; i < dstLen; i++) {
5448
+ const startPos = i * ratio;
5449
+ const endPos = (i + 1) * ratio;
5450
+ const startIdx = Math.floor(startPos);
5451
+ const endIdx = Math.min(Math.ceil(endPos), src.length);
5452
+ let sum = 0;
5453
+ let count = 0;
5454
+ for (let j = startIdx; j < endIdx; j++) {
5455
+ sum += src[j] ?? 0;
5456
+ count++;
5457
+ }
5458
+ dst[i] = count > 0 ? Math.round(sum / count) : 0;
5459
+ }
5460
+ return dst.buffer;
5461
+ }
5462
+
5415
5463
  // src/errors.ts
5416
5464
  var RecognitionError = class extends Error {
5417
5465
  constructor(errorType, message) {
@@ -5512,10 +5560,17 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5512
5560
  const retryConfig = config.connectionRetry || {};
5513
5561
  const maxAttempts = Math.max(1, Math.min(5, retryConfig.maxAttempts ?? 4));
5514
5562
  const delayMs = retryConfig.delayMs ?? 200;
5563
+ const normalizedASRConfig = config.asrRequestConfig ? {
5564
+ ...config.asrRequestConfig,
5565
+ encoding: AudioEncoding.coerce(
5566
+ config.asrRequestConfig.encoding,
5567
+ (warning) => config.logger?.("warn", warning)
5568
+ )
5569
+ } : void 0;
5515
5570
  this.config = {
5516
5571
  url,
5517
5572
  audioUtteranceId,
5518
- ...config.asrRequestConfig && { asrRequestConfig: config.asrRequestConfig },
5573
+ ...normalizedASRConfig && { asrRequestConfig: normalizedASRConfig },
5519
5574
  ...config.gameContext && { gameContext: config.gameContext },
5520
5575
  ...config.callbackUrls && { callbackUrls: config.callbackUrls },
5521
5576
  onTranscript: config.onTranscript || (() => {
@@ -5713,6 +5768,42 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5713
5768
  }
5714
5769
  this.sendAudioInternal(audioData);
5715
5770
  }
5771
+ /**
5772
+ * Send PCM16 mono audio captured at any sample rate. The SDK downsamples
5773
+ * to the session's target rate (currently 16 kHz per server validator)
5774
+ * before sending.
5775
+ *
5776
+ * Use this when your capture pipeline produces audio at the system's
5777
+ * native rate — `AudioContext` defaults to 44.1 kHz or 48 kHz on most
5778
+ * desktop/mobile hardware — and you don't want to bring your own
5779
+ * resampler. If your audio is already at the target rate, prefer
5780
+ * `sendAudio()` to skip the resample step.
5781
+ *
5782
+ * Algorithm: box-filter averaging (see audio-resampler.ts). Cheap, no
5783
+ * dependencies, has a built-in low-pass effect so aliasing stays out of
5784
+ * the speech band. Suitable for ASR; not a substitute for a high-quality
5785
+ * resampler if you're doing music or full-fidelity processing.
5786
+ *
5787
+ * Audio must be signed 16-bit little-endian PCM, mono. Stereo must be
5788
+ * mixed to mono by the caller.
5789
+ *
5790
+ * @param audioData - PCM16 mono audio at `sourceSampleRate`.
5791
+ * @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
5792
+ */
5793
+ sendAudioWithSampleRate(audioData, sourceSampleRate) {
5794
+ const targetRate = typeof this.config.asrRequestConfig?.sampleRate === "number" ? this.config.asrRequestConfig.sampleRate : SampleRate.RATE_16000;
5795
+ if (audioData instanceof Blob) {
5796
+ blobToArrayBuffer(audioData).then((arrayBuffer) => {
5797
+ this.sendAudioInternal(
5798
+ downsamplePcm16(arrayBuffer, sourceSampleRate, targetRate)
5799
+ );
5800
+ }).catch((error) => {
5801
+ this.log("warn", "Failed to convert Blob to ArrayBuffer", error);
5802
+ });
5803
+ return;
5804
+ }
5805
+ this.sendAudioInternal(downsamplePcm16(audioData, sourceSampleRate, targetRate));
5806
+ }
5716
5807
  sendAudioInternal(audioData) {
5717
5808
  const bytes = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
5718
5809
  if (bytes === 0) return;
@@ -5859,7 +5950,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5859
5950
  model: this.config.asrRequestConfig.model,
5860
5951
  language: this.config.asrRequestConfig.language?.toString() || "en",
5861
5952
  sampleRate: typeof this.config.asrRequestConfig.sampleRate === "number" ? this.config.asrRequestConfig.sampleRate : SampleRate.RATE_16000,
5862
- encoding: typeof this.config.asrRequestConfig.encoding === "number" ? this.config.asrRequestConfig.encoding : AudioEncoding.LINEAR16,
5953
+ encoding: this.config.asrRequestConfig.encoding,
5863
5954
  interimResults: this.config.asrRequestConfig.interimResults ?? false,
5864
5955
  // Auto-enable useContext if gameContext is provided, or use explicit value if set
5865
5956
  useContext: this.config.asrRequestConfig.useContext ?? !!this.config.gameContext,
@@ -6023,7 +6114,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
6023
6114
  */
6024
6115
  sendAudioNow(audioData) {
6025
6116
  const byteLength = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
6026
- const encodingId = this.config.asrRequestConfig?.encoding || AudioEncoding.LINEAR16;
6117
+ const encodingId = this.config.asrRequestConfig?.encoding ?? AudioEncoding.LINEAR16;
6027
6118
  const sampleRate = typeof this.config.asrRequestConfig?.sampleRate === "number" ? this.config.asrRequestConfig.sampleRate : SampleRate.RATE_16000;
6028
6119
  super.sendAudio(
6029
6120
  audioData,
@@ -6091,7 +6182,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
6091
6182
  sendPrefixAudioNow(audioData) {
6092
6183
  const byteLength = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
6093
6184
  if (byteLength === 0) return;
6094
- const baseEncodingId = this.config.asrRequestConfig?.encoding || AudioEncoding.LINEAR16;
6185
+ const baseEncodingId = this.config.asrRequestConfig?.encoding ?? AudioEncoding.LINEAR16;
6095
6186
  const prefixEncodingId = baseEncodingId + PREFIX_AUDIO_ENCODING_OFFSET;
6096
6187
  const sampleRate = typeof this.config.asrRequestConfig?.sampleRate === "number" ? this.config.asrRequestConfig.sampleRate : SampleRate.RATE_16000;
6097
6188
  this.log("debug", "Sending prefix audio", { bytes: byteLength, encoding: prefixEncodingId });
@@ -6619,17 +6710,28 @@ var SimplifiedVGFRecognitionClient = class {
6619
6710
  await this.client.connect();
6620
6711
  }
6621
6712
  sendAudio(audioData) {
6622
- if (!this.isRecordingAudio) {
6623
- this.isRecordingAudio = true;
6624
- this.state = {
6625
- ...this.state,
6626
- startRecordingStatus: "RECORDING",
6627
- startRecordingTimestamp: (/* @__PURE__ */ new Date()).toISOString()
6628
- };
6629
- this.notifyStateChange();
6630
- }
6713
+ this.markRecordingStarted();
6631
6714
  this.client.sendAudio(audioData);
6632
6715
  }
6716
+ sendAudioWithSampleRate(audioData, sourceSampleRate) {
6717
+ this.markRecordingStarted();
6718
+ this.client.sendAudioWithSampleRate(audioData, sourceSampleRate);
6719
+ }
6720
+ /**
6721
+ * Set VGF recording status to RECORDING on the first audio chunk.
6722
+ * Idempotent — subsequent calls are no-ops until disconnect/stop resets
6723
+ * `isRecordingAudio`.
6724
+ */
6725
+ markRecordingStarted() {
6726
+ if (this.isRecordingAudio) return;
6727
+ this.isRecordingAudio = true;
6728
+ this.state = {
6729
+ ...this.state,
6730
+ startRecordingStatus: "RECORDING",
6731
+ startRecordingTimestamp: (/* @__PURE__ */ new Date()).toISOString()
6732
+ };
6733
+ this.notifyStateChange();
6734
+ }
6633
6735
  async stopRecording() {
6634
6736
  this.isRecordingAudio = false;
6635
6737
  this.state = updateStateOnStop(this.state);