@volley/recognition-client-sdk 0.1.707 → 0.1.782
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -1
- package/dist/browser.bundled.d.ts +282 -116
- package/dist/config-builder.d.ts +6 -1
- package/dist/config-builder.d.ts.map +1 -1
- package/dist/index.bundled.d.ts +344 -164
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +81 -7
- package/dist/index.js.map +3 -3
- package/dist/recog-client-sdk.browser.js +70 -7
- package/dist/recog-client-sdk.browser.js.map +3 -3
- package/dist/recognition-client.d.ts.map +1 -1
- package/dist/recognition-client.types.d.ts +7 -1
- package/dist/recognition-client.types.d.ts.map +1 -1
- package/dist/utils/message-handler.d.ts +2 -1
- package/dist/utils/message-handler.d.ts.map +1 -1
- package/package.json +3 -3
- package/src/config-builder.ts +11 -0
- package/src/index.ts +3 -0
- package/src/recognition-client.ts +15 -0
- package/src/recognition-client.types.ts +9 -0
- package/src/utils/message-handler.ts +14 -3
- package/src/utils/url-builder.spec.ts +43 -0
|
@@ -3747,6 +3747,9 @@ var RecognitionProvider;
|
|
|
3747
3747
|
RecognitionProvider2["CARTESIA"] = "cartesia";
|
|
3748
3748
|
RecognitionProvider2["DASHSCOPE"] = "dashscope";
|
|
3749
3749
|
RecognitionProvider2["BEDROCK"] = "bedrock";
|
|
3750
|
+
RecognitionProvider2["INWORLD_STT"] = "inworld-stt";
|
|
3751
|
+
RecognitionProvider2["AWS_TRANSCRIBE"] = "aws-transcribe";
|
|
3752
|
+
RecognitionProvider2["AMAZON_NOVA_SONIC"] = "amazon-nova-sonic";
|
|
3750
3753
|
RecognitionProvider2["TEST_ASR_PROVIDER_QUOTA"] = "test-asr-provider-quota";
|
|
3751
3754
|
RecognitionProvider2["TEST_ASR_STREAMING"] = "test-asr-streaming";
|
|
3752
3755
|
})(RecognitionProvider || (RecognitionProvider = {}));
|
|
@@ -3797,6 +3800,7 @@ var ElevenLabsModel;
|
|
|
3797
3800
|
})(ElevenLabsModel || (ElevenLabsModel = {}));
|
|
3798
3801
|
var OpenAIRealtimeModel;
|
|
3799
3802
|
(function(OpenAIRealtimeModel2) {
|
|
3803
|
+
OpenAIRealtimeModel2["GPT_REALTIME_WHISPER"] = "gpt-realtime-whisper";
|
|
3800
3804
|
OpenAIRealtimeModel2["GPT_4O_TRANSCRIBE"] = "gpt-4o-transcribe";
|
|
3801
3805
|
OpenAIRealtimeModel2["GPT_4O_MINI_TRANSCRIBE"] = "gpt-4o-mini-transcribe";
|
|
3802
3806
|
})(OpenAIRealtimeModel || (OpenAIRealtimeModel = {}));
|
|
@@ -3819,6 +3823,19 @@ var BedrockModel;
|
|
|
3819
3823
|
BedrockModel2["VOXTRAL_MINI_3B_2507"] = "mistral.voxtral-mini-3b-2507";
|
|
3820
3824
|
BedrockModel2["VOXTRAL_SMALL_24B_2507"] = "mistral.voxtral-small-24b-2507";
|
|
3821
3825
|
})(BedrockModel || (BedrockModel = {}));
|
|
3826
|
+
var InworldSttModel;
|
|
3827
|
+
(function(InworldSttModel2) {
|
|
3828
|
+
InworldSttModel2["INWORLD_STT_1"] = "inworld/inworld-stt-1";
|
|
3829
|
+
})(InworldSttModel || (InworldSttModel = {}));
|
|
3830
|
+
var AwsTranscribeModel;
|
|
3831
|
+
(function(AwsTranscribeModel2) {
|
|
3832
|
+
AwsTranscribeModel2["DEFAULT"] = "default";
|
|
3833
|
+
})(AwsTranscribeModel || (AwsTranscribeModel = {}));
|
|
3834
|
+
var AmazonNovaSonicModel;
|
|
3835
|
+
(function(AmazonNovaSonicModel2) {
|
|
3836
|
+
AmazonNovaSonicModel2["AMAZON_NOVA_SONIC_V1"] = "amazon.nova-sonic-v1:0";
|
|
3837
|
+
AmazonNovaSonicModel2["AMAZON_NOVA_2_SONIC"] = "amazon.nova-2-sonic-v1:0";
|
|
3838
|
+
})(AmazonNovaSonicModel || (AmazonNovaSonicModel = {}));
|
|
3822
3839
|
var SelfServeVllmModel;
|
|
3823
3840
|
(function(SelfServeVllmModel2) {
|
|
3824
3841
|
SelfServeVllmModel2["QWEN3_ASR_1_7B"] = "qwen3-asr-1.7b";
|
|
@@ -3835,6 +3852,18 @@ var RecognitionResultTypeV1;
|
|
|
3835
3852
|
RecognitionResultTypeV12["AUDIO_METRICS"] = "AudioMetrics";
|
|
3836
3853
|
RecognitionResultTypeV12["SESSION_CONFIGURED"] = "SessionConfigured";
|
|
3837
3854
|
})(RecognitionResultTypeV1 || (RecognitionResultTypeV1 = {}));
|
|
3855
|
+
var DetectionTypeV1;
|
|
3856
|
+
(function(DetectionTypeV12) {
|
|
3857
|
+
DetectionTypeV12["SEARCH"] = "search";
|
|
3858
|
+
})(DetectionTypeV1 || (DetectionTypeV1 = {}));
|
|
3859
|
+
var DetectionV1Schema = z.object({
|
|
3860
|
+
type: z.nativeEnum(DetectionTypeV1),
|
|
3861
|
+
query: z.string(),
|
|
3862
|
+
score: z.number().min(0).max(1),
|
|
3863
|
+
startMs: z.number().optional(),
|
|
3864
|
+
endMs: z.number().optional()
|
|
3865
|
+
// Audio time (ms from stream start) where the hit ends
|
|
3866
|
+
});
|
|
3838
3867
|
var TranscriptionResultSchemaV1 = z.object({
|
|
3839
3868
|
type: z.literal(RecognitionResultTypeV1.TRANSCRIPTION),
|
|
3840
3869
|
audioUtteranceId: z.string(),
|
|
@@ -3853,8 +3882,9 @@ var TranscriptionResultSchemaV1 = z.object({
|
|
|
3853
3882
|
endTimestamp: z.number().optional(),
|
|
3854
3883
|
receivedAtMs: z.number().optional(),
|
|
3855
3884
|
accumulatedAudioTimeMs: z.number().optional(),
|
|
3856
|
-
rawAudioTimeMs: z.number().optional()
|
|
3857
|
-
|
|
3885
|
+
rawAudioTimeMs: z.number().optional(),
|
|
3886
|
+
detections: z.array(DetectionV1Schema).optional()
|
|
3887
|
+
// Provider-reported phrase detections (query + score, optionally startMs/endMs). Always populated when the provider returns hits, regardless of `appendSearch`. Other providers leave this undefined.
|
|
3858
3888
|
});
|
|
3859
3889
|
var FunctionCallResultSchemaV1 = z.object({
|
|
3860
3890
|
type: z.literal(RecognitionResultTypeV1.FUNCTION_CALL),
|
|
@@ -3944,9 +3974,9 @@ var ErrorResultSchemaV1 = z.object({
|
|
|
3944
3974
|
// Detailed description
|
|
3945
3975
|
});
|
|
3946
3976
|
var ClientControlActionV1;
|
|
3947
|
-
(function(
|
|
3948
|
-
|
|
3949
|
-
|
|
3977
|
+
(function(ClientControlActionV12) {
|
|
3978
|
+
ClientControlActionV12["READY_FOR_UPLOADING_RECORDING"] = "ready_for_uploading_recording";
|
|
3979
|
+
ClientControlActionV12["STOP_RECORDING"] = "stop_recording";
|
|
3950
3980
|
})(ClientControlActionV1 || (ClientControlActionV1 = {}));
|
|
3951
3981
|
var ClientControlActionsV1 = z.nativeEnum(ClientControlActionV1);
|
|
3952
3982
|
var ClientControlMessageSchemaV1 = z.object({
|
|
@@ -3979,6 +4009,8 @@ var AudioMetricsResultSchemaV1 = z.object({
|
|
|
3979
4009
|
maxVolume: z.number(),
|
|
3980
4010
|
minVolume: z.number(),
|
|
3981
4011
|
avgVolume: z.number(),
|
|
4012
|
+
peakVolumeDb: z.number().nullable(),
|
|
4013
|
+
avgVolumeDb: z.number().nullable(),
|
|
3982
4014
|
silenceRatio: z.number(),
|
|
3983
4015
|
clippingRatio: z.number(),
|
|
3984
4016
|
snrEstimate: z.number().nullable(),
|
|
@@ -3995,7 +4027,8 @@ var RecognitionResultSchemaV1 = z.discriminatedUnion("type", [
|
|
|
3995
4027
|
// P1 - P2
|
|
3996
4028
|
FunctionCallResultSchemaV1,
|
|
3997
4029
|
ClientControlMessageSchemaV1,
|
|
3998
|
-
SessionConfiguredSchemaV1
|
|
4030
|
+
SessionConfiguredSchemaV1,
|
|
4031
|
+
AudioMetricsResultSchemaV1
|
|
3999
4032
|
]);
|
|
4000
4033
|
|
|
4001
4034
|
// ../../libs/types/dist/provider-transcription.types.js
|
|
@@ -4104,7 +4137,15 @@ var TranscriptMessageSchema = z.object({
|
|
|
4104
4137
|
* @example true
|
|
4105
4138
|
* @default false
|
|
4106
4139
|
*/
|
|
4107
|
-
is_fallback: z.boolean().optional()
|
|
4140
|
+
is_fallback: z.boolean().optional(),
|
|
4141
|
+
/**
|
|
4142
|
+
* Provider-reported phrase detections (query + score, optionally
|
|
4143
|
+
* startMs/endMs). Always populated when the provider returns hits,
|
|
4144
|
+
* regardless of `appendSearch` or scene gating. Other providers leave
|
|
4145
|
+
* this undefined.
|
|
4146
|
+
* @example [{ query: 'justin bieber one time', score: 0.78, startMs: 1200, endMs: 2800 }]
|
|
4147
|
+
*/
|
|
4148
|
+
detections: z.array(DetectionV1Schema).optional()
|
|
4108
4149
|
});
|
|
4109
4150
|
var VADEndSignalSchema = z.object({
|
|
4110
4151
|
type: z.literal(ProviderMessageType.VAD_END_SIGNAL),
|
|
@@ -4418,6 +4459,12 @@ var ASRRequestSchemaV1 = z.object({
|
|
|
4418
4459
|
prefixMode: z.nativeEnum(PrefixMode).optional().default(PrefixMode.NONE),
|
|
4419
4460
|
prefixId: z.string().optional(),
|
|
4420
4461
|
prefixTextToRemove: z.array(z.string()).optional(),
|
|
4462
|
+
// Streaming audio metrics opt-in: when > 0, server emits AudioMetrics results throttled to this interval (ms).
|
|
4463
|
+
// Undefined / 0 disables streaming audio metrics (final metrics still embedded in Metadata).
|
|
4464
|
+
audioMetricsIntervalMs: z.number().optional(),
|
|
4465
|
+
// Opt-in: round-trip Deepgram `search` phrase hits into the transcript.
|
|
4466
|
+
// Active only when (model = deepgram nova-2) AND (GameContext.gamePhase = 'Solve Puzzle'). See ASRRequestConfig.appendSearch in asr-config.types.ts for full semantics.
|
|
4467
|
+
appendSearch: z.boolean().optional(),
|
|
4421
4468
|
// Debug options (FOR DEBUG/TESTING ONLY - not for production use)
|
|
4422
4469
|
debugCommand: RequestDebugCommandSchema
|
|
4423
4470
|
});
|
|
@@ -5221,6 +5268,7 @@ var MessageHandler = class {
|
|
|
5221
5268
|
/**
|
|
5222
5269
|
* Handle incoming WebSocket message
|
|
5223
5270
|
*/
|
|
5271
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
5224
5272
|
handleMessage(msg) {
|
|
5225
5273
|
if (this.callbacks.logger) {
|
|
5226
5274
|
this.callbacks.logger("debug", "[RecogSDK] Received WebSocket message", {
|
|
@@ -5259,6 +5307,9 @@ var MessageHandler = class {
|
|
|
5259
5307
|
case RecognitionResultTypeV1.SESSION_CONFIGURED:
|
|
5260
5308
|
this.callbacks.onSessionConfigured?.(msgData);
|
|
5261
5309
|
break;
|
|
5310
|
+
case RecognitionResultTypeV1.AUDIO_METRICS:
|
|
5311
|
+
this.callbacks.onAudioMetrics?.(msgData);
|
|
5312
|
+
break;
|
|
5262
5313
|
default:
|
|
5263
5314
|
if (this.callbacks.logger) {
|
|
5264
5315
|
this.callbacks.logger("debug", "[RecogSDK] Unknown message type", { type: msgType });
|
|
@@ -5380,6 +5431,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5380
5431
|
onMetadata: config.onMetadata || (() => {
|
|
5381
5432
|
}),
|
|
5382
5433
|
onSessionConfigured: config.onSessionConfigured,
|
|
5434
|
+
onAudioMetrics: config.onAudioMetrics,
|
|
5383
5435
|
onError: config.onError || (() => {
|
|
5384
5436
|
}),
|
|
5385
5437
|
onConnected: config.onConnected || (() => {
|
|
@@ -5408,6 +5460,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5408
5460
|
onError: this.config.onError,
|
|
5409
5461
|
onControlMessage: this.handleControlMessage.bind(this),
|
|
5410
5462
|
onSessionConfigured: this.config.onSessionConfigured,
|
|
5463
|
+
onAudioMetrics: this.config.onAudioMetrics,
|
|
5411
5464
|
...this.config.logger && { logger: this.config.logger }
|
|
5412
5465
|
});
|
|
5413
5466
|
}
|
|
@@ -5733,6 +5786,16 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5733
5786
|
// Include prefix text to remove if provided (for server-side prefix text removal)
|
|
5734
5787
|
...this.config.asrRequestConfig.prefixTextToRemove && {
|
|
5735
5788
|
prefixTextToRemove: this.config.asrRequestConfig.prefixTextToRemove
|
|
5789
|
+
},
|
|
5790
|
+
// Streaming audio metrics opt-in (ms interval). Server only forwards metrics if > 0.
|
|
5791
|
+
...this.config.asrRequestConfig.audioMetricsIntervalMs !== void 0 && {
|
|
5792
|
+
audioMetricsIntervalMs: this.config.asrRequestConfig.audioMetricsIntervalMs
|
|
5793
|
+
},
|
|
5794
|
+
// Opt-in: round-trip Deepgram nova-2 search-phrase hits into the
|
|
5795
|
+
// transcript. Only fires server-side when (model = nova-2) AND
|
|
5796
|
+
// (GameContext.gamePhase = 'Solve Puzzle'). See ASRRequestConfig.appendSearch.
|
|
5797
|
+
...this.config.asrRequestConfig.appendSearch !== void 0 && {
|
|
5798
|
+
appendSearch: this.config.asrRequestConfig.appendSearch
|
|
5736
5799
|
}
|
|
5737
5800
|
};
|
|
5738
5801
|
super.sendMessage(
|