@volley/recognition-client-sdk 0.1.621 → 0.1.670
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.bundled.d.ts +36 -3
- package/dist/index.bundled.d.ts +88 -49
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +53 -11
- package/dist/index.js.map +3 -3
- package/dist/recog-client-sdk.browser.js +29 -10
- package/dist/recog-client-sdk.browser.js.map +3 -3
- package/dist/vgf-recognition-mapper.d.ts.map +1 -1
- package/dist/vgf-recognition-state.d.ts +6 -0
- package/dist/vgf-recognition-state.d.ts.map +1 -1
- package/package.json +3 -3
- package/src/index.spec.ts +11 -0
- package/src/index.ts +4 -0
- package/src/recognition-client.ts +8 -8
- package/src/utils/message-handler.ts +1 -1
- package/src/vgf-recognition-mapper.ts +19 -1
- package/src/vgf-recognition-state.ts +4 -0
package/dist/index.js
CHANGED
|
@@ -3737,11 +3737,14 @@ var RecognitionProvider;
|
|
|
3737
3737
|
RecognitionProvider2["DEEPGRAM"] = "deepgram";
|
|
3738
3738
|
RecognitionProvider2["ELEVENLABS"] = "elevenlabs";
|
|
3739
3739
|
RecognitionProvider2["FIREWORKS"] = "fireworks";
|
|
3740
|
+
RecognitionProvider2["GLADIA"] = "gladia";
|
|
3740
3741
|
RecognitionProvider2["GOOGLE"] = "google";
|
|
3741
3742
|
RecognitionProvider2["GEMINI_BATCH"] = "gemini-batch";
|
|
3742
3743
|
RecognitionProvider2["OPENAI_BATCH"] = "openai-batch";
|
|
3744
|
+
RecognitionProvider2["SELF_SERVE_VLLM"] = "self-serve-vllm";
|
|
3743
3745
|
RecognitionProvider2["OPENAI_REALTIME"] = "openai-realtime";
|
|
3744
3746
|
RecognitionProvider2["MISTRAL_VOXTRAL"] = "mistral-voxtral";
|
|
3747
|
+
RecognitionProvider2["CARTESIA"] = "cartesia";
|
|
3745
3748
|
RecognitionProvider2["DASHSCOPE"] = "dashscope";
|
|
3746
3749
|
RecognitionProvider2["TEST_ASR_PROVIDER_QUOTA"] = "test-asr-provider-quota";
|
|
3747
3750
|
RecognitionProvider2["TEST_ASR_STREAMING"] = "test-asr-streaming";
|
|
@@ -3783,10 +3786,13 @@ var FireworksModel;
|
|
|
3783
3786
|
FireworksModel2["WHISPER_V3"] = "whisper-v3";
|
|
3784
3787
|
FireworksModel2["WHISPER_V3_TURBO"] = "whisper-v3-turbo";
|
|
3785
3788
|
})(FireworksModel || (FireworksModel = {}));
|
|
3789
|
+
var GladiaModel;
|
|
3790
|
+
(function(GladiaModel2) {
|
|
3791
|
+
GladiaModel2["SOLARIA_1"] = "solaria-1";
|
|
3792
|
+
})(GladiaModel || (GladiaModel = {}));
|
|
3786
3793
|
var ElevenLabsModel;
|
|
3787
3794
|
(function(ElevenLabsModel2) {
|
|
3788
3795
|
ElevenLabsModel2["SCRIBE_V2_REALTIME"] = "scribe_v2_realtime";
|
|
3789
|
-
ElevenLabsModel2["SCRIBE_V1"] = "scribe_v1";
|
|
3790
3796
|
})(ElevenLabsModel || (ElevenLabsModel = {}));
|
|
3791
3797
|
var OpenAIRealtimeModel;
|
|
3792
3798
|
(function(OpenAIRealtimeModel2) {
|
|
@@ -3797,11 +3803,20 @@ var MistralVoxtralModel;
|
|
|
3797
3803
|
(function(MistralVoxtralModel2) {
|
|
3798
3804
|
MistralVoxtralModel2["VOXTRAL_MINI_REALTIME_2602"] = "voxtral-mini-transcribe-realtime-2602";
|
|
3799
3805
|
})(MistralVoxtralModel || (MistralVoxtralModel = {}));
|
|
3806
|
+
var CartesiaModel;
|
|
3807
|
+
(function(CartesiaModel2) {
|
|
3808
|
+
CartesiaModel2["INK_WHISPER"] = "ink-whisper";
|
|
3809
|
+
CartesiaModel2["INK_WHISPER_20250604"] = "ink-whisper-2025-06-04";
|
|
3810
|
+
})(CartesiaModel || (CartesiaModel = {}));
|
|
3800
3811
|
var DashScopeModel;
|
|
3801
3812
|
(function(DashScopeModel2) {
|
|
3802
3813
|
DashScopeModel2["QWEN3_ASR_FLASH_REALTIME_2602"] = "qwen3-asr-flash-realtime-2026-02-10";
|
|
3803
3814
|
DashScopeModel2["QWEN3_ASR_FLASH_REALTIME"] = "qwen3-asr-flash-realtime";
|
|
3804
3815
|
})(DashScopeModel || (DashScopeModel = {}));
|
|
3816
|
+
var SelfServeVllmModel;
|
|
3817
|
+
(function(SelfServeVllmModel2) {
|
|
3818
|
+
SelfServeVllmModel2["QWEN3_ASR_1_7B"] = "qwen3-asr-1.7b";
|
|
3819
|
+
})(SelfServeVllmModel || (SelfServeVllmModel = {}));
|
|
3805
3820
|
|
|
3806
3821
|
// ../../libs/types/dist/recognition-result-v1.types.js
|
|
3807
3822
|
var RecognitionResultTypeV1;
|
|
@@ -3826,6 +3841,7 @@ var TranscriptionResultSchemaV1 = z.object({
|
|
|
3826
3841
|
voiceStart: z.number().optional(),
|
|
3827
3842
|
voiceDuration: z.number().optional(),
|
|
3828
3843
|
voiceEnd: z.number().optional(),
|
|
3844
|
+
lastNonSilence: z.number().optional(),
|
|
3829
3845
|
startTimestamp: z.number().optional(),
|
|
3830
3846
|
endTimestamp: z.number().optional(),
|
|
3831
3847
|
receivedAtMs: z.number().optional(),
|
|
@@ -3873,6 +3889,9 @@ var MetadataResultSchemaV1 = z.object({
|
|
|
3873
3889
|
costInUSD: z.number().default(0).optional(),
|
|
3874
3890
|
// ASR API Type
|
|
3875
3891
|
apiType: z.nativeEnum(ASRApiType).optional(),
|
|
3892
|
+
// Provider identification
|
|
3893
|
+
provider: z.string().optional(),
|
|
3894
|
+
model: z.string().optional(),
|
|
3876
3895
|
// ASR configuration as JSON string (no type validation)
|
|
3877
3896
|
asrConfig: z.string().optional(),
|
|
3878
3897
|
// Raw ASR metadata payload as provided by the provider (stringified if needed)
|
|
@@ -5248,7 +5267,7 @@ var MessageHandler = class {
|
|
|
5248
5267
|
}
|
|
5249
5268
|
if (msg.data && typeof msg.data !== "object") {
|
|
5250
5269
|
if (this.callbacks.logger) {
|
|
5251
|
-
this.callbacks.logger("
|
|
5270
|
+
this.callbacks.logger("warn", "[RecogSDK] Received primitive msg.data from server", {
|
|
5252
5271
|
dataType: typeof msg.data,
|
|
5253
5272
|
data: msg.data,
|
|
5254
5273
|
fullMessage: msg
|
|
@@ -5529,7 +5548,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5529
5548
|
const timeout = setTimeout(() => {
|
|
5530
5549
|
if (settled) return;
|
|
5531
5550
|
settled = true;
|
|
5532
|
-
this.log("warn",
|
|
5551
|
+
this.log("warn", `Connection timeout url=${this.config.url}`, { timeout: connectionTimeout, attempt });
|
|
5533
5552
|
this.state = "failed" /* FAILED */;
|
|
5534
5553
|
reject(new Error(`Connection timeout after ${connectionTimeout}ms`));
|
|
5535
5554
|
}, connectionTimeout);
|
|
@@ -5551,7 +5570,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5551
5570
|
if (settled) return;
|
|
5552
5571
|
settled = true;
|
|
5553
5572
|
clearTimeout(timeout);
|
|
5554
|
-
this.log("warn",
|
|
5573
|
+
this.log("warn", `Connection error url=${this.config.url}`, { error, attempt });
|
|
5555
5574
|
this.state = "failed" /* FAILED */;
|
|
5556
5575
|
reject(error);
|
|
5557
5576
|
};
|
|
@@ -5566,14 +5585,14 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5566
5585
|
lastError = error;
|
|
5567
5586
|
if (attempt < maxAttempts) {
|
|
5568
5587
|
const logLevel = attempt < 3 ? "info" : "warn";
|
|
5569
|
-
this.log(logLevel, `Connection attempt ${attempt} failed, retrying after ${delayMs}ms`, {
|
|
5588
|
+
this.log(logLevel, `Connection attempt ${attempt} failed, retrying after ${delayMs}ms url=${this.config.url}`, {
|
|
5570
5589
|
error: lastError.message,
|
|
5571
5590
|
nextAttempt: attempt + 1
|
|
5572
5591
|
});
|
|
5573
5592
|
this.state = "initial" /* INITIAL */;
|
|
5574
5593
|
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
5575
5594
|
} else {
|
|
5576
|
-
this.log("warn", `All ${maxAttempts} connection attempts failed`, {
|
|
5595
|
+
this.log("warn", `All ${maxAttempts} connection attempts failed url=${this.config.url}`, {
|
|
5577
5596
|
error: lastError.message
|
|
5578
5597
|
});
|
|
5579
5598
|
}
|
|
@@ -5596,7 +5615,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5596
5615
|
blobToArrayBuffer(audioData).then((arrayBuffer) => {
|
|
5597
5616
|
this.sendAudioInternal(arrayBuffer);
|
|
5598
5617
|
}).catch((error) => {
|
|
5599
|
-
this.log("
|
|
5618
|
+
this.log("warn", "Failed to convert Blob to ArrayBuffer", error);
|
|
5600
5619
|
});
|
|
5601
5620
|
return;
|
|
5602
5621
|
}
|
|
@@ -5636,7 +5655,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5636
5655
|
*/
|
|
5637
5656
|
async stopRecording() {
|
|
5638
5657
|
if (this.state !== "ready" /* READY */) {
|
|
5639
|
-
this.log("
|
|
5658
|
+
this.log("info", "stopRecording called but not in READY state", { state: this.state });
|
|
5640
5659
|
return;
|
|
5641
5660
|
}
|
|
5642
5661
|
this.log("debug", "Stopping recording");
|
|
@@ -5806,7 +5825,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5806
5825
|
if (this.state === "stopping" /* STOPPING */) {
|
|
5807
5826
|
this.state = "stopped" /* STOPPED */;
|
|
5808
5827
|
} else if (this.state === "connected" /* CONNECTED */ || this.state === "ready" /* READY */ || this.state === "connecting" /* CONNECTING */) {
|
|
5809
|
-
this.log("
|
|
5828
|
+
this.log("warn", "[DIAGNOSTIC] Unexpected disconnection", {
|
|
5810
5829
|
code,
|
|
5811
5830
|
codeDescription: closeCodeDescription,
|
|
5812
5831
|
reason: reason || "(empty)",
|
|
@@ -5928,7 +5947,7 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
|
|
|
5928
5947
|
blobToArrayBuffer(audioData).then((arrayBuffer) => {
|
|
5929
5948
|
this.sendPrefixAudioInternal(arrayBuffer);
|
|
5930
5949
|
}).catch((error) => {
|
|
5931
|
-
this.log("
|
|
5950
|
+
this.log("warn", "Failed to convert Blob to ArrayBuffer for prefix audio", error);
|
|
5932
5951
|
});
|
|
5933
5952
|
return;
|
|
5934
5953
|
}
|
|
@@ -6187,6 +6206,11 @@ var RecognitionVGFStateSchema = z.object({
|
|
|
6187
6206
|
finalTranscript: z.string().optional(),
|
|
6188
6207
|
// Full finalized transcript for the utterance. Will not change.
|
|
6189
6208
|
finalConfidence: z.number().optional(),
|
|
6209
|
+
// Voice timing (ms from stream start, prefix-adjusted)
|
|
6210
|
+
voiceEnd: z.number().optional(),
|
|
6211
|
+
// voice end time identified by ASR
|
|
6212
|
+
lastNonSilence: z.number().optional(),
|
|
6213
|
+
// last non-silence sample time from PCM analysis
|
|
6190
6214
|
// Tracking-only metadata
|
|
6191
6215
|
asrConfig: z.string().optional(),
|
|
6192
6216
|
// Json format of the ASR config
|
|
@@ -6275,6 +6299,12 @@ function mapTranscriptionResultToState(currentState, result, isRecording) {
|
|
|
6275
6299
|
newState.finalConfidence = result.finalTranscriptConfidence;
|
|
6276
6300
|
}
|
|
6277
6301
|
}
|
|
6302
|
+
if (result.voiceEnd !== void 0) {
|
|
6303
|
+
newState.voiceEnd = result.voiceEnd;
|
|
6304
|
+
}
|
|
6305
|
+
if (result.lastNonSilence !== void 0) {
|
|
6306
|
+
newState.lastNonSilence = result.lastNonSilence;
|
|
6307
|
+
}
|
|
6278
6308
|
} else {
|
|
6279
6309
|
newState.transcriptionStatus = TranscriptionStatus.FINALIZED;
|
|
6280
6310
|
newState.finalTranscript = result.finalTranscript || "";
|
|
@@ -6282,6 +6312,12 @@ function mapTranscriptionResultToState(currentState, result, isRecording) {
|
|
|
6282
6312
|
newState.finalConfidence = result.finalTranscriptConfidence;
|
|
6283
6313
|
}
|
|
6284
6314
|
newState.finalTranscriptionTimestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
6315
|
+
if (result.voiceEnd !== void 0) {
|
|
6316
|
+
newState.voiceEnd = result.voiceEnd;
|
|
6317
|
+
}
|
|
6318
|
+
if (result.lastNonSilence !== void 0) {
|
|
6319
|
+
newState.lastNonSilence = result.lastNonSilence;
|
|
6320
|
+
}
|
|
6285
6321
|
newState.pendingTranscript = "";
|
|
6286
6322
|
newState.pendingConfidence = void 0;
|
|
6287
6323
|
}
|
|
@@ -6317,7 +6353,9 @@ function resetRecognitionVGFState(currentState) {
|
|
|
6317
6353
|
transcriptionStatus: TranscriptionStatus.NOT_STARTED,
|
|
6318
6354
|
startRecordingStatus: RecordingStatus.READY,
|
|
6319
6355
|
recognitionActionProcessingState: RecognitionActionProcessingState.NOT_STARTED,
|
|
6320
|
-
finalTranscript: void 0
|
|
6356
|
+
finalTranscript: void 0,
|
|
6357
|
+
voiceEnd: void 0,
|
|
6358
|
+
lastNonSilence: void 0
|
|
6321
6359
|
};
|
|
6322
6360
|
}
|
|
6323
6361
|
function generateUUID() {
|
|
@@ -6571,6 +6609,7 @@ function createSimplifiedVGFClient(config) {
|
|
|
6571
6609
|
}
|
|
6572
6610
|
export {
|
|
6573
6611
|
AudioEncoding,
|
|
6612
|
+
CartesiaModel,
|
|
6574
6613
|
ClientControlActionV1,
|
|
6575
6614
|
ClientState,
|
|
6576
6615
|
ConfigBuilder,
|
|
@@ -6584,10 +6623,12 @@ export {
|
|
|
6584
6623
|
FinalTranscriptStability,
|
|
6585
6624
|
FireworksModel,
|
|
6586
6625
|
GeminiModel,
|
|
6626
|
+
GladiaModel,
|
|
6587
6627
|
GoogleModel,
|
|
6588
6628
|
Language,
|
|
6589
6629
|
MistralVoxtralModel,
|
|
6590
6630
|
OpenAIModel,
|
|
6631
|
+
OpenAIRealtimeModel,
|
|
6591
6632
|
RECOGNITION_CONDUCTOR_BASES,
|
|
6592
6633
|
RECOGNITION_SERVICE_BASES,
|
|
6593
6634
|
RealTimeTwoWayWebSocketRecognitionClient,
|
|
@@ -6599,6 +6640,7 @@ export {
|
|
|
6599
6640
|
RecordingStatus,
|
|
6600
6641
|
STAGES,
|
|
6601
6642
|
SampleRate,
|
|
6643
|
+
SelfServeVllmModel,
|
|
6602
6644
|
SimplifiedVGFRecognitionClient,
|
|
6603
6645
|
TimeoutError,
|
|
6604
6646
|
TranscriptionStatus,
|