@volley/recognition-client-sdk 0.1.707 → 0.1.782

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -11,6 +11,6 @@ export { type RecognitionState, RecognitionVGFStateSchema, RecordingStatus, Tran
11
11
  export { resetRecognitionVGFState } from './vgf-recognition-mapper.js';
12
12
  export { AudioEncoding } from '@recog/websocket';
13
13
  export { type GameContextV1, type SlotMap, RecognitionContextTypeV1, ControlSignalTypeV1, ControlSignalTypeV1 as ControlSignal, // Alias for backward compatibility
14
- type TranscriptionResultV1, type FunctionCallResultV1, type MetadataResultV1, type ErrorResultV1, RecognitionResultTypeV1, ClientControlActionV1, type ASRRequestConfig, type ASRRequestV1, FinalTranscriptStability, createDefaultASRConfig, RecognitionProvider, DeepgramModel, ElevenLabsModel, FireworksModel, GladiaModel, GoogleModel, GeminiModel, OpenAIModel, SelfServeVllmModel, OpenAIRealtimeModel, MistralVoxtralModel, CartesiaModel, DashScopeModel, BedrockModel, Language, SampleRate, STAGES, type Stage } from '@recog/shared-types';
14
+ type TranscriptionResultV1, type FunctionCallResultV1, type MetadataResultV1, type AudioMetricsResultV1, type ErrorResultV1, RecognitionResultTypeV1, ClientControlActionV1, type ASRRequestConfig, type ASRRequestV1, FinalTranscriptStability, createDefaultASRConfig, RecognitionProvider, DeepgramModel, ElevenLabsModel, FireworksModel, GladiaModel, GoogleModel, GeminiModel, OpenAIModel, SelfServeVllmModel, OpenAIRealtimeModel, MistralVoxtralModel, CartesiaModel, DashScopeModel, BedrockModel, AwsTranscribeModel, AmazonNovaSonicModel, Language, SampleRate, STAGES, type Stage } from '@recog/shared-types';
15
15
  export { getRecognitionServiceBase, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getRecognitionServiceHost, getRecognitionConductorBase, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionConductorHost, normalizeStage, RECOGNITION_SERVICE_BASES, RECOGNITION_CONDUCTOR_BASES } from '@recog/shared-config';
16
16
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,WAAW,EACX,kBAAkB,EAClB,mBAAmB,EACnB,mBAAmB,EACnB,aAAa,EACb,cAAc,EACd,YAAY,EACZ,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,oBAAoB,EACzB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,WAAW,EACX,kBAAkB,EAClB,mBAAmB,EACnB,mBAAmB,EACnB,aAAa,EACb,cAAc,EACd,YAAY,EACZ,kBAAkB,EAClB,oBAAoB,EACpB,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
package/dist/index.js CHANGED
@@ -3747,6 +3747,9 @@ var RecognitionProvider;
3747
3747
  RecognitionProvider2["CARTESIA"] = "cartesia";
3748
3748
  RecognitionProvider2["DASHSCOPE"] = "dashscope";
3749
3749
  RecognitionProvider2["BEDROCK"] = "bedrock";
3750
+ RecognitionProvider2["INWORLD_STT"] = "inworld-stt";
3751
+ RecognitionProvider2["AWS_TRANSCRIBE"] = "aws-transcribe";
3752
+ RecognitionProvider2["AMAZON_NOVA_SONIC"] = "amazon-nova-sonic";
3750
3753
  RecognitionProvider2["TEST_ASR_PROVIDER_QUOTA"] = "test-asr-provider-quota";
3751
3754
  RecognitionProvider2["TEST_ASR_STREAMING"] = "test-asr-streaming";
3752
3755
  })(RecognitionProvider || (RecognitionProvider = {}));
@@ -3797,6 +3800,7 @@ var ElevenLabsModel;
3797
3800
  })(ElevenLabsModel || (ElevenLabsModel = {}));
3798
3801
  var OpenAIRealtimeModel;
3799
3802
  (function(OpenAIRealtimeModel2) {
3803
+ OpenAIRealtimeModel2["GPT_REALTIME_WHISPER"] = "gpt-realtime-whisper";
3800
3804
  OpenAIRealtimeModel2["GPT_4O_TRANSCRIBE"] = "gpt-4o-transcribe";
3801
3805
  OpenAIRealtimeModel2["GPT_4O_MINI_TRANSCRIBE"] = "gpt-4o-mini-transcribe";
3802
3806
  })(OpenAIRealtimeModel || (OpenAIRealtimeModel = {}));
@@ -3819,6 +3823,19 @@ var BedrockModel;
3819
3823
  BedrockModel2["VOXTRAL_MINI_3B_2507"] = "mistral.voxtral-mini-3b-2507";
3820
3824
  BedrockModel2["VOXTRAL_SMALL_24B_2507"] = "mistral.voxtral-small-24b-2507";
3821
3825
  })(BedrockModel || (BedrockModel = {}));
3826
+ var InworldSttModel;
3827
+ (function(InworldSttModel2) {
3828
+ InworldSttModel2["INWORLD_STT_1"] = "inworld/inworld-stt-1";
3829
+ })(InworldSttModel || (InworldSttModel = {}));
3830
+ var AwsTranscribeModel;
3831
+ (function(AwsTranscribeModel2) {
3832
+ AwsTranscribeModel2["DEFAULT"] = "default";
3833
+ })(AwsTranscribeModel || (AwsTranscribeModel = {}));
3834
+ var AmazonNovaSonicModel;
3835
+ (function(AmazonNovaSonicModel2) {
3836
+ AmazonNovaSonicModel2["AMAZON_NOVA_SONIC_V1"] = "amazon.nova-sonic-v1:0";
3837
+ AmazonNovaSonicModel2["AMAZON_NOVA_2_SONIC"] = "amazon.nova-2-sonic-v1:0";
3838
+ })(AmazonNovaSonicModel || (AmazonNovaSonicModel = {}));
3822
3839
  var SelfServeVllmModel;
3823
3840
  (function(SelfServeVllmModel2) {
3824
3841
  SelfServeVllmModel2["QWEN3_ASR_1_7B"] = "qwen3-asr-1.7b";
@@ -3835,6 +3852,18 @@ var RecognitionResultTypeV1;
3835
3852
  RecognitionResultTypeV12["AUDIO_METRICS"] = "AudioMetrics";
3836
3853
  RecognitionResultTypeV12["SESSION_CONFIGURED"] = "SessionConfigured";
3837
3854
  })(RecognitionResultTypeV1 || (RecognitionResultTypeV1 = {}));
3855
+ var DetectionTypeV1;
3856
+ (function(DetectionTypeV12) {
3857
+ DetectionTypeV12["SEARCH"] = "search";
3858
+ })(DetectionTypeV1 || (DetectionTypeV1 = {}));
3859
+ var DetectionV1Schema = z.object({
3860
+ type: z.nativeEnum(DetectionTypeV1),
3861
+ query: z.string(),
3862
+ score: z.number().min(0).max(1),
3863
+ startMs: z.number().optional(),
3864
+ endMs: z.number().optional()
3865
+ // Audio time (ms from stream start) where the hit ends
3866
+ });
3838
3867
  var TranscriptionResultSchemaV1 = z.object({
3839
3868
  type: z.literal(RecognitionResultTypeV1.TRANSCRIPTION),
3840
3869
  audioUtteranceId: z.string(),
@@ -3853,8 +3882,9 @@ var TranscriptionResultSchemaV1 = z.object({
3853
3882
  endTimestamp: z.number().optional(),
3854
3883
  receivedAtMs: z.number().optional(),
3855
3884
  accumulatedAudioTimeMs: z.number().optional(),
3856
- rawAudioTimeMs: z.number().optional()
3857
- // Total audio duration sent to provider (includes prefix)
3885
+ rawAudioTimeMs: z.number().optional(),
3886
+ detections: z.array(DetectionV1Schema).optional()
3887
+ // Provider-reported phrase detections (query + score, optionally startMs/endMs). Always populated when the provider returns hits, regardless of `appendSearch`. Other providers leave this undefined.
3858
3888
  });
3859
3889
  var FunctionCallResultSchemaV1 = z.object({
3860
3890
  type: z.literal(RecognitionResultTypeV1.FUNCTION_CALL),
@@ -3944,9 +3974,9 @@ var ErrorResultSchemaV1 = z.object({
3944
3974
  // Detailed description
3945
3975
  });
3946
3976
  var ClientControlActionV1;
3947
- (function(ClientControlActionV13) {
3948
- ClientControlActionV13["READY_FOR_UPLOADING_RECORDING"] = "ready_for_uploading_recording";
3949
- ClientControlActionV13["STOP_RECORDING"] = "stop_recording";
3977
+ (function(ClientControlActionV12) {
3978
+ ClientControlActionV12["READY_FOR_UPLOADING_RECORDING"] = "ready_for_uploading_recording";
3979
+ ClientControlActionV12["STOP_RECORDING"] = "stop_recording";
3950
3980
  })(ClientControlActionV1 || (ClientControlActionV1 = {}));
3951
3981
  var ClientControlActionsV1 = z.nativeEnum(ClientControlActionV1);
3952
3982
  var ClientControlMessageSchemaV1 = z.object({
@@ -3979,6 +4009,8 @@ var AudioMetricsResultSchemaV1 = z.object({
3979
4009
  maxVolume: z.number(),
3980
4010
  minVolume: z.number(),
3981
4011
  avgVolume: z.number(),
4012
+ peakVolumeDb: z.number().nullable(),
4013
+ avgVolumeDb: z.number().nullable(),
3982
4014
  silenceRatio: z.number(),
3983
4015
  clippingRatio: z.number(),
3984
4016
  snrEstimate: z.number().nullable(),
@@ -3995,7 +4027,8 @@ var RecognitionResultSchemaV1 = z.discriminatedUnion("type", [
3995
4027
  // P1 - P2
3996
4028
  FunctionCallResultSchemaV1,
3997
4029
  ClientControlMessageSchemaV1,
3998
- SessionConfiguredSchemaV1
4030
+ SessionConfiguredSchemaV1,
4031
+ AudioMetricsResultSchemaV1
3999
4032
  ]);
4000
4033
 
4001
4034
  // ../../libs/types/dist/provider-transcription.types.js
@@ -4104,7 +4137,15 @@ var TranscriptMessageSchema = z.object({
4104
4137
  * @example true
4105
4138
  * @default false
4106
4139
  */
4107
- is_fallback: z.boolean().optional()
4140
+ is_fallback: z.boolean().optional(),
4141
+ /**
4142
+ * Provider-reported phrase detections (query + score, optionally
4143
+ * startMs/endMs). Always populated when the provider returns hits,
4144
+ * regardless of `appendSearch` or scene gating. Other providers leave
4145
+ * this undefined.
4146
+ * @example [{ query: 'justin bieber one time', score: 0.78, startMs: 1200, endMs: 2800 }]
4147
+ */
4148
+ detections: z.array(DetectionV1Schema).optional()
4108
4149
  });
4109
4150
  var VADEndSignalSchema = z.object({
4110
4151
  type: z.literal(ProviderMessageType.VAD_END_SIGNAL),
@@ -4441,6 +4482,12 @@ var ASRRequestSchemaV1 = z.object({
4441
4482
  prefixMode: z.nativeEnum(PrefixMode).optional().default(PrefixMode.NONE),
4442
4483
  prefixId: z.string().optional(),
4443
4484
  prefixTextToRemove: z.array(z.string()).optional(),
4485
+ // Streaming audio metrics opt-in: when > 0, server emits AudioMetrics results throttled to this interval (ms).
4486
+ // Undefined / 0 disables streaming audio metrics (final metrics still embedded in Metadata).
4487
+ audioMetricsIntervalMs: z.number().optional(),
4488
+ // Opt-in: round-trip Deepgram `search` phrase hits into the transcript.
4489
+ // Active only when (model = deepgram nova-2) AND (GameContext.gamePhase = 'Solve Puzzle'). See ASRRequestConfig.appendSearch in asr-config.types.ts for full semantics.
4490
+ appendSearch: z.boolean().optional(),
4444
4491
  // Debug options (FOR DEBUG/TESTING ONLY - not for production use)
4445
4492
  debugCommand: RequestDebugCommandSchema
4446
4493
  });
@@ -5289,6 +5336,7 @@ var MessageHandler = class {
5289
5336
  /**
5290
5337
  * Handle incoming WebSocket message
5291
5338
  */
5339
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
5292
5340
  handleMessage(msg) {
5293
5341
  if (this.callbacks.logger) {
5294
5342
  this.callbacks.logger("debug", "[RecogSDK] Received WebSocket message", {
@@ -5327,6 +5375,9 @@ var MessageHandler = class {
5327
5375
  case RecognitionResultTypeV1.SESSION_CONFIGURED:
5328
5376
  this.callbacks.onSessionConfigured?.(msgData);
5329
5377
  break;
5378
+ case RecognitionResultTypeV1.AUDIO_METRICS:
5379
+ this.callbacks.onAudioMetrics?.(msgData);
5380
+ break;
5330
5381
  default:
5331
5382
  if (this.callbacks.logger) {
5332
5383
  this.callbacks.logger("debug", "[RecogSDK] Unknown message type", { type: msgType });
@@ -5474,6 +5525,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5474
5525
  onMetadata: config.onMetadata || (() => {
5475
5526
  }),
5476
5527
  onSessionConfigured: config.onSessionConfigured,
5528
+ onAudioMetrics: config.onAudioMetrics,
5477
5529
  onError: config.onError || (() => {
5478
5530
  }),
5479
5531
  onConnected: config.onConnected || (() => {
@@ -5502,6 +5554,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5502
5554
  onError: this.config.onError,
5503
5555
  onControlMessage: this.handleControlMessage.bind(this),
5504
5556
  onSessionConfigured: this.config.onSessionConfigured,
5557
+ onAudioMetrics: this.config.onAudioMetrics,
5505
5558
  ...this.config.logger && { logger: this.config.logger }
5506
5559
  });
5507
5560
  }
@@ -5827,6 +5880,16 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
5827
5880
  // Include prefix text to remove if provided (for server-side prefix text removal)
5828
5881
  ...this.config.asrRequestConfig.prefixTextToRemove && {
5829
5882
  prefixTextToRemove: this.config.asrRequestConfig.prefixTextToRemove
5883
+ },
5884
+ // Streaming audio metrics opt-in (ms interval). Server only forwards metrics if > 0.
5885
+ ...this.config.asrRequestConfig.audioMetricsIntervalMs !== void 0 && {
5886
+ audioMetricsIntervalMs: this.config.asrRequestConfig.audioMetricsIntervalMs
5887
+ },
5888
+ // Opt-in: round-trip Deepgram nova-2 search-phrase hits into the
5889
+ // transcript. Only fires server-side when (model = nova-2) AND
5890
+ // (GameContext.gamePhase = 'Solve Puzzle'). See ASRRequestConfig.appendSearch.
5891
+ ...this.config.asrRequestConfig.appendSearch !== void 0 && {
5892
+ appendSearch: this.config.asrRequestConfig.appendSearch
5830
5893
  }
5831
5894
  };
5832
5895
  super.sendMessage(
@@ -6181,6 +6244,14 @@ var ConfigBuilder = class {
6181
6244
  this.config.onSessionConfigured = callback;
6182
6245
  return this;
6183
6246
  }
6247
+ /**
6248
+ * Set streaming audio metrics callback (optional).
6249
+ * Only fires when `asrRequestConfig.audioMetricsIntervalMs > 0`.
6250
+ */
6251
+ onAudioMetrics(callback) {
6252
+ this.config.onAudioMetrics = callback;
6253
+ return this;
6254
+ }
6184
6255
  /**
6185
6256
  * Set error callback
6186
6257
  */
@@ -6233,6 +6304,7 @@ var ConfigBuilder = class {
6233
6304
  /**
6234
6305
  * Set logger function
6235
6306
  */
6307
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
6236
6308
  logger(logger) {
6237
6309
  this.config.logger = logger;
6238
6310
  return this;
@@ -6669,7 +6741,9 @@ function createSimplifiedVGFClient(config) {
6669
6741
  return new SimplifiedVGFRecognitionClient(config);
6670
6742
  }
6671
6743
  export {
6744
+ AmazonNovaSonicModel,
6672
6745
  AudioEncoding,
6746
+ AwsTranscribeModel,
6673
6747
  BedrockModel,
6674
6748
  CartesiaModel,
6675
6749
  ClientControlActionV1,