@volley/recognition-client-sdk 0.1.800 → 0.1.806

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3769,6 +3769,13 @@ var DeepgramModel;
3769
3769
  DeepgramModel2["NOVA_3"] = "nova-3";
3770
3770
  DeepgramModel2["FLUX_GENERAL_EN"] = "flux-general-en";
3771
3771
  })(DeepgramModel || (DeepgramModel = {}));
3772
+ var AssemblyAIModel;
3773
+ (function(AssemblyAIModel2) {
3774
+ AssemblyAIModel2["DEFAULT"] = "default";
3775
+ AssemblyAIModel2["UNIVERSAL_STREAMING_ENGLISH"] = "universal-streaming-english";
3776
+ AssemblyAIModel2["UNIVERSAL_STREAMING_MULTILINGUAL"] = "universal-streaming-multilingual";
3777
+ AssemblyAIModel2["U3_RT_PRO"] = "u3-rt-pro";
3778
+ })(AssemblyAIModel || (AssemblyAIModel = {}));
3772
3779
  var GoogleModel;
3773
3780
  (function(GoogleModel2) {
3774
3781
  GoogleModel2["CHIRP_3"] = "chirp_3";
@@ -4499,12 +4506,26 @@ var RecognitionContextSchemaV1 = z.discriminatedUnion("type", [
4499
4506
  ASRRequestSchemaV1
4500
4507
  ]);
4501
4508
 
4509
+ // ../../libs/types/dist/microphone-source.types.js
4510
+ var MicrophoneSourceType;
4511
+ (function(MicrophoneSourceType2) {
4512
+ MicrophoneSourceType2["WEB"] = "web";
4513
+ MicrophoneSourceType2["NATIVE"] = "native";
4514
+ })(MicrophoneSourceType || (MicrophoneSourceType = {}));
4515
+ var DEFAULT_MICROPHONE_SOURCE_TYPE = MicrophoneSourceType.WEB;
4516
+ var NAME_TO_ENUM = {
4517
+ web: MicrophoneSourceType.WEB,
4518
+ native: MicrophoneSourceType.NATIVE
4519
+ };
4520
+
4502
4521
  // ../../libs/types/dist/recognition-query.types.js
4503
4522
  var RecognitionGameInfoSchema = z.object({
4504
4523
  userId: z.string().optional(),
4505
4524
  gameSessionId: z.string().optional(),
4506
4525
  deviceId: z.string().optional(),
4507
4526
  accountId: z.string().optional(),
4527
+ clientId: z.string().optional(),
4528
+ microphoneSourceType: z.nativeEnum(MicrophoneSourceType).optional(),
4508
4529
  gameId: z.string().optional(),
4509
4530
  gamePhase: z.string().optional(),
4510
4531
  questionAskedId: z.string().optional(),
@@ -4545,7 +4566,7 @@ var AudioFormat;
4545
4566
  [5, AudioFormat2.OPUS],
4546
4567
  [6, AudioFormat2.PCM]
4547
4568
  ]);
4548
- const NAME_TO_ENUM = /* @__PURE__ */ new Map([
4569
+ const NAME_TO_ENUM2 = /* @__PURE__ */ new Map([
4549
4570
  ["WAV", AudioFormat2.WAV],
4550
4571
  ["MP3", AudioFormat2.MP3],
4551
4572
  ["FLAC", AudioFormat2.FLAC],
@@ -4566,7 +4587,7 @@ var AudioFormat;
4566
4587
  }
4567
4588
  AudioFormat2.fromId = fromId;
4568
4589
  function fromName(nameStr) {
4569
- return NAME_TO_ENUM.get(nameStr.toUpperCase());
4590
+ return NAME_TO_ENUM2.get(nameStr.toUpperCase());
4570
4591
  }
4571
4592
  AudioFormat2.fromName = fromName;
4572
4593
  function toId(format) {
@@ -4582,7 +4603,7 @@ var AudioFormat;
4582
4603
  }
4583
4604
  AudioFormat2.isIdValid = isIdValid;
4584
4605
  function isNameValid(nameStr) {
4585
- return NAME_TO_ENUM.has(nameStr.toUpperCase());
4606
+ return NAME_TO_ENUM2.has(nameStr.toUpperCase());
4586
4607
  }
4587
4608
  AudioFormat2.isNameValid = isNameValid;
4588
4609
  })(AudioFormat || (AudioFormat = {}));
@@ -4604,7 +4625,7 @@ var AudioEncoding;
4604
4625
  [4, AudioEncoding2.MULAW],
4605
4626
  [5, AudioEncoding2.ALAW]
4606
4627
  ]);
4607
- const NAME_TO_ENUM = /* @__PURE__ */ new Map([
4628
+ const NAME_TO_ENUM2 = /* @__PURE__ */ new Map([
4608
4629
  ["ENCODING_UNSPECIFIED", AudioEncoding2.ENCODING_UNSPECIFIED],
4609
4630
  ["LINEAR16", AudioEncoding2.LINEAR16],
4610
4631
  ["OGG_OPUS", AudioEncoding2.OGG_OPUS],
@@ -4625,7 +4646,7 @@ var AudioEncoding;
4625
4646
  }
4626
4647
  AudioEncoding2.fromId = fromId;
4627
4648
  function fromName(nameStr) {
4628
- return NAME_TO_ENUM.get(nameStr.toUpperCase());
4649
+ return NAME_TO_ENUM2.get(nameStr.toUpperCase());
4629
4650
  }
4630
4651
  AudioEncoding2.fromName = fromName;
4631
4652
  function toId(encoding) {
@@ -4641,7 +4662,7 @@ var AudioEncoding;
4641
4662
  }
4642
4663
  AudioEncoding2.isIdValid = isIdValid;
4643
4664
  function isNameValid(nameStr) {
4644
- return NAME_TO_ENUM.has(nameStr.toUpperCase());
4665
+ return NAME_TO_ENUM2.has(nameStr.toUpperCase());
4645
4666
  }
4646
4667
  AudioEncoding2.isNameValid = isNameValid;
4647
4668
  function coerce2(value, onStringInput) {
@@ -4681,7 +4702,7 @@ var SampleRate;
4681
4702
  [44100, SampleRate2.RATE_44100],
4682
4703
  [48e3, SampleRate2.RATE_48000]
4683
4704
  ]);
4684
- const NAME_TO_ENUM = /* @__PURE__ */ new Map([
4705
+ const NAME_TO_ENUM2 = /* @__PURE__ */ new Map([
4685
4706
  ["RATE_8000", SampleRate2.RATE_8000],
4686
4707
  ["RATE_16000", SampleRate2.RATE_16000],
4687
4708
  ["RATE_22050", SampleRate2.RATE_22050],
@@ -4704,7 +4725,7 @@ var SampleRate;
4704
4725
  }
4705
4726
  SampleRate2.fromHz = fromHz;
4706
4727
  function fromName(nameStr) {
4707
- return NAME_TO_ENUM.get(nameStr.toUpperCase());
4728
+ return NAME_TO_ENUM2.get(nameStr.toUpperCase());
4708
4729
  }
4709
4730
  SampleRate2.fromName = fromName;
4710
4731
  function toHz(rate) {
@@ -4720,7 +4741,7 @@ var SampleRate;
4720
4741
  }
4721
4742
  SampleRate2.isHzValid = isHzValid;
4722
4743
  function isNameValid(nameStr) {
4723
- return NAME_TO_ENUM.has(nameStr.toUpperCase());
4744
+ return NAME_TO_ENUM2.has(nameStr.toUpperCase());
4724
4745
  }
4725
4746
  SampleRate2.isNameValid = isNameValid;
4726
4747
  })(SampleRate || (SampleRate = {}));
@@ -5190,6 +5211,12 @@ function buildWebSocketUrl(config) {
5190
5211
  if (config.questionAnswerId) {
5191
5212
  url.searchParams.set("questionAnswerId", config.questionAnswerId);
5192
5213
  }
5214
+ if (config.clientId) {
5215
+ url.searchParams.set("clientId", config.clientId);
5216
+ }
5217
+ if (config.microphoneSourceType) {
5218
+ url.searchParams.set("microphoneSourceType", config.microphoneSourceType);
5219
+ }
5193
5220
  if (config.platform) {
5194
5221
  url.searchParams.set("platform", config.platform);
5195
5222
  }
@@ -5535,6 +5562,8 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5535
5562
  ...config.deviceId && { deviceId: config.deviceId },
5536
5563
  ...config.accountId && { accountId: config.accountId },
5537
5564
  ...config.questionAnswerId && { questionAnswerId: config.questionAnswerId },
5565
+ ...config.clientId && { clientId: config.clientId },
5566
+ ...config.microphoneSourceType && { microphoneSourceType: config.microphoneSourceType },
5538
5567
  ...config.platform && { platform: config.platform },
5539
5568
  ...config.gameContext && { gameContext: config.gameContext },
5540
5569
  ...config.gameId && { gameId: config.gameId },
@@ -6292,6 +6321,21 @@ var ConfigBuilder = class {
6292
6321
  this.config.questionAnswerId = id;
6293
6322
  return this;
6294
6323
  }
6324
+ /**
6325
+ * Set client ID (for downstream tracking/audio-labeling)
6326
+ */
6327
+ clientId(id) {
6328
+ this.config.clientId = id;
6329
+ return this;
6330
+ }
6331
+ /**
6332
+ * Set the client audio capture path (MicrophoneSourceType enum or 'web' | 'native' string).
6333
+ * Defaults to 'web' on the server when omitted.
6334
+ */
6335
+ microphoneSourceType(source) {
6336
+ this.config.microphoneSourceType = source;
6337
+ return this;
6338
+ }
6295
6339
  /**
6296
6340
  * Set platform
6297
6341
  */
@@ -6461,6 +6505,14 @@ var RecognitionVGFStateSchema = z.object({
6461
6505
  // Confidence score for the function call.
6462
6506
  finalFunctionCallTimestamp: z.string().optional(),
6463
6507
  // When the final action after interpreting the transcript was taken. Immutable.
6508
+ // Session identity — when set, the VGF client backfills these into
6509
+ // GameContextV1 if the caller didn't pass a `gameContext` in config.
6510
+ // Lets RecognitionState be the single source of truth: server seeds
6511
+ // `gameId` + `gamePhase` + `promptSlotMap` per player, controller passes
6512
+ // the whole state as `initialState`, no separate `gameContext` needed.
6513
+ // Backward-compatible: if `gameContext` is also passed in config, it wins.
6514
+ gameId: z.string().optional(),
6515
+ gamePhase: z.string().optional(),
6464
6516
  // Support for prompt slot mapping - passed to recognition context when present
6465
6517
  promptSlotMap: z.record(z.string(), z.array(z.string())).optional(),
6466
6518
  // Optional map of slot names to prompt values for recognition context
@@ -6507,6 +6559,9 @@ function createInitialRecognitionState(audioUtteranceId) {
6507
6559
  recognitionActionProcessingState: RecognitionActionProcessingState.NOT_STARTED
6508
6560
  };
6509
6561
  }
6562
+ function isTerminal(state) {
6563
+ return state.transcriptionStatus === TranscriptionStatus.FINALIZED || state.transcriptionStatus === TranscriptionStatus.ABORTED || state.transcriptionStatus === TranscriptionStatus.ERROR;
6564
+ }
6510
6565
  function isValidRecordingStatusTransition(from, to) {
6511
6566
  const statusOrder = [
6512
6567
  RecordingStatus.NOT_READY,
@@ -6659,6 +6714,13 @@ var SimplifiedVGFRecognitionClient = class {
6659
6714
  }
6660
6715
  this.state = { ...this.state, startRecordingStatus: "READY" };
6661
6716
  this.expectedUuid = this.state.audioUtteranceId;
6717
+ if (!clientConfig.gameContext && this.state.gameId && this.state.gamePhase) {
6718
+ clientConfig.gameContext = {
6719
+ type: RecognitionContextTypeV1.GAME_CONTEXT,
6720
+ gameId: this.state.gameId,
6721
+ gamePhase: this.state.gamePhase
6722
+ };
6723
+ }
6662
6724
  const hasPromptInputs = this.state.promptSlotMap !== void 0 || this.state.promptSTT !== void 0 || this.state.promptSTF !== void 0 || this.state.promptTTF !== void 0;
6663
6725
  if (hasPromptInputs) {
6664
6726
  if (clientConfig.asrRequestConfig) {
@@ -6666,7 +6728,7 @@ var SimplifiedVGFRecognitionClient = class {
6666
6728
  }
6667
6729
  if (!clientConfig.gameContext) {
6668
6730
  if (clientConfig.logger) {
6669
- clientConfig.logger("warn", "[VGF] prompt inputs found but no gameContext provided. They will not be sent.");
6731
+ clientConfig.logger("warn", "[VGF] prompt inputs found but no gameContext provided and state has no gameId/gamePhase. They will not be sent.");
6670
6732
  }
6671
6733
  } else {
6672
6734
  if (this.state.promptSlotMap !== void 0) {
@@ -6878,11 +6940,8 @@ var SimplifiedVGFRecognitionClient = class {
6878
6940
  getVGFState() {
6879
6941
  return { ...this.state };
6880
6942
  }
6881
- isTerminalStatus(status) {
6882
- return status === TranscriptionStatus.FINALIZED || status === TranscriptionStatus.ABORTED || status === TranscriptionStatus.ERROR;
6883
- }
6884
6943
  notifyStateChange() {
6885
- if (this.isTerminalStatus(this.state.transcriptionStatus)) {
6944
+ if (isTerminal(this.state)) {
6886
6945
  if (this.lastSentTerminalUuid === this.expectedUuid) {
6887
6946
  if (this.logger) {
6888
6947
  this.logger(
@@ -6908,11 +6967,44 @@ var SimplifiedVGFRecognitionClient = class {
6908
6967
  this.stateChangeCallback({ ...this.state });
6909
6968
  }
6910
6969
  };
6970
+ var SimplifiedVGFConfigBuilder = class extends ConfigBuilder {
6971
+ constructor() {
6972
+ super(...arguments);
6973
+ this.vgfConfig = {};
6974
+ }
6975
+ /**
6976
+ * Set the VGF state-change callback, fired whenever the VGF state updates.
6977
+ */
6978
+ onStateChange(callback) {
6979
+ this.vgfConfig.onStateChange = callback;
6980
+ return this;
6981
+ }
6982
+ /**
6983
+ * Set the initial VGF state to restore from a previous session.
6984
+ * The audioUtteranceId is extracted from it when valid.
6985
+ */
6986
+ initialState(state) {
6987
+ this.vgfConfig.initialState = state;
6988
+ return this;
6989
+ }
6990
+ /**
6991
+ * Build the SimplifiedVGFClientConfig (base fields + VGF fields).
6992
+ */
6993
+ build() {
6994
+ return { ...super.build(), ...this.vgfConfig };
6995
+ }
6996
+ };
6911
6997
  function createSimplifiedVGFClient(config) {
6912
6998
  return new SimplifiedVGFRecognitionClient(config);
6913
6999
  }
7000
+ function createSimplifiedVGFClientWithBuilder(configure) {
7001
+ const builder = new SimplifiedVGFConfigBuilder();
7002
+ const config = configure(builder).build();
7003
+ return new SimplifiedVGFRecognitionClient(config);
7004
+ }
6914
7005
  export {
6915
7006
  AmazonNovaSonicModel,
7007
+ AssemblyAIModel,
6916
7008
  AudioEncoding,
6917
7009
  AwsTranscribeModel,
6918
7010
  BedrockModel,
@@ -6933,6 +7025,7 @@ export {
6933
7025
  GladiaModel,
6934
7026
  GoogleModel,
6935
7027
  Language,
7028
+ MicrophoneSourceType,
6936
7029
  MistralVoxtralModel,
6937
7030
  OpenAIModel,
6938
7031
  OpenAIRealtimeModel,
@@ -6948,6 +7041,7 @@ export {
6948
7041
  STAGES,
6949
7042
  SampleRate,
6950
7043
  SelfServeVllmModel,
7044
+ SimplifiedVGFConfigBuilder,
6951
7045
  SimplifiedVGFRecognitionClient,
6952
7046
  TimeoutError,
6953
7047
  TranscriptionStatus,
@@ -6957,6 +7051,7 @@ export {
6957
7051
  createDefaultASRConfig,
6958
7052
  createInitialRecognitionState,
6959
7053
  createSimplifiedVGFClient,
7054
+ createSimplifiedVGFClientWithBuilder,
6960
7055
  getRecognitionConductorBase,
6961
7056
  getRecognitionConductorHost,
6962
7057
  getRecognitionConductorHttpBase,
@@ -6968,6 +7063,7 @@ export {
6968
7063
  getUserFriendlyMessage,
6969
7064
  isExceptionImmediatelyAvailable,
6970
7065
  isNormalDisconnection,
7066
+ isTerminal,
6971
7067
  isValidRecordingStatusTransition,
6972
7068
  normalizeStage,
6973
7069
  resetRecognitionVGFState