@volley/recognition-client-sdk 0.1.621 → 0.1.670
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.bundled.d.ts +36 -3
- package/dist/index.bundled.d.ts +88 -49
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +53 -11
- package/dist/index.js.map +3 -3
- package/dist/recog-client-sdk.browser.js +29 -10
- package/dist/recog-client-sdk.browser.js.map +3 -3
- package/dist/vgf-recognition-mapper.d.ts.map +1 -1
- package/dist/vgf-recognition-state.d.ts +6 -0
- package/dist/vgf-recognition-state.d.ts.map +1 -1
- package/package.json +3 -3
- package/src/index.spec.ts +11 -0
- package/src/index.ts +4 -0
- package/src/recognition-client.ts +8 -8
- package/src/utils/message-handler.ts +1 -1
- package/src/vgf-recognition-mapper.ts +19 -1
- package/src/vgf-recognition-state.ts +4 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vgf-recognition-mapper.d.ts","sourceRoot":"","sources":["../src/vgf-recognition-mapper.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EACH,gBAAgB,EAKnB,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EACH,WAAW,EACX,wBAAwB,EAC3B,MAAM,+BAA+B,CAAC;AACvC,OAAO,EACH,qBAAqB,EACrB,aAAa,EAChB,MAAM,qBAAqB,CAAC;AAE7B;;GAEG;AACH,wBAAgB,+BAA+B,CAAC,WAAW,EAAE,WAAW,GAAG,MAAM,CAmBhF;AAED;;GAEG;AACH,wBAAgB,6BAA6B,CACzC,YAAY,EAAE,gBAAgB,EAC9B,MAAM,EAAE,qBAAqB,EAC7B,WAAW,EAAE,OAAO,GACrB,gBAAgB,
|
|
1
|
+
{"version":3,"file":"vgf-recognition-mapper.d.ts","sourceRoot":"","sources":["../src/vgf-recognition-mapper.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EACH,gBAAgB,EAKnB,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EACH,WAAW,EACX,wBAAwB,EAC3B,MAAM,+BAA+B,CAAC;AACvC,OAAO,EACH,qBAAqB,EACrB,aAAa,EAChB,MAAM,qBAAqB,CAAC;AAE7B;;GAEG;AACH,wBAAgB,+BAA+B,CAAC,WAAW,EAAE,WAAW,GAAG,MAAM,CAmBhF;AAED;;GAEG;AACH,wBAAgB,6BAA6B,CACzC,YAAY,EAAE,gBAAgB,EAC9B,MAAM,EAAE,qBAAqB,EAC7B,WAAW,EAAE,OAAO,GACrB,gBAAgB,CAgElB;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC3B,YAAY,EAAE,gBAAgB,EAC9B,KAAK,EAAE,aAAa,GACrB,gBAAgB,CAOlB;AAED;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,wBAAwB,GAAG,gBAAgB,CAU3F;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,YAAY,EAAE,gBAAgB,GAAG,gBAAgB,CAMlF;AAED;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,wBAAwB,CAAC,YAAY,EAAE,gBAAgB,GAAG,gBAAgB,CAWzF;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,YAAY,EAAE,gBAAgB,GAAG,gBAAgB,CAKnF;AAED;;;GAGG;AACH,wBAAgB,iCAAiC,CAC7C,UAAU,EAAE,MAAM,EAClB,WAAW,CAAC,EAAE,GAAG,GAClB;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAAC,UAAU,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAiBnD;AAED;;GAEG;AACH,wBAAgB,2BAA2B,CACvC,YAAY,EAAE,gBAAgB,EAC9B,YAAY,EAAE;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAAC,UAAU,CAAC,EAAE,MAAM,CAAA;CAAE,GACzD,gBAAgB,CAOlB"}
|
|
@@ -18,6 +18,8 @@ export declare const RecognitionVGFStateSchema: z.ZodObject<{
|
|
|
18
18
|
transcriptionStatus: z.ZodOptional<z.ZodString>;
|
|
19
19
|
finalTranscript: z.ZodOptional<z.ZodString>;
|
|
20
20
|
finalConfidence: z.ZodOptional<z.ZodNumber>;
|
|
21
|
+
voiceEnd: z.ZodOptional<z.ZodNumber>;
|
|
22
|
+
lastNonSilence: z.ZodOptional<z.ZodNumber>;
|
|
21
23
|
asrConfig: z.ZodOptional<z.ZodString>;
|
|
22
24
|
startRecordingTimestamp: z.ZodOptional<z.ZodString>;
|
|
23
25
|
finalRecordingTimestamp: z.ZodOptional<z.ZodString>;
|
|
@@ -36,6 +38,8 @@ export declare const RecognitionVGFStateSchema: z.ZodObject<{
|
|
|
36
38
|
transcriptionStatus?: string | undefined;
|
|
37
39
|
finalTranscript?: string | undefined;
|
|
38
40
|
finalConfidence?: number | undefined;
|
|
41
|
+
voiceEnd?: number | undefined;
|
|
42
|
+
lastNonSilence?: number | undefined;
|
|
39
43
|
asrConfig?: string | undefined;
|
|
40
44
|
startRecordingTimestamp?: string | undefined;
|
|
41
45
|
finalRecordingTimestamp?: string | undefined;
|
|
@@ -52,6 +56,8 @@ export declare const RecognitionVGFStateSchema: z.ZodObject<{
|
|
|
52
56
|
transcriptionStatus?: string | undefined;
|
|
53
57
|
finalTranscript?: string | undefined;
|
|
54
58
|
finalConfidence?: number | undefined;
|
|
59
|
+
voiceEnd?: number | undefined;
|
|
60
|
+
lastNonSilence?: number | undefined;
|
|
55
61
|
asrConfig?: string | undefined;
|
|
56
62
|
startRecordingTimestamp?: string | undefined;
|
|
57
63
|
finalRecordingTimestamp?: string | undefined;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vgf-recognition-state.d.ts","sourceRoot":"","sources":["../src/vgf-recognition-state.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAA;AAEvB;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,yBAAyB
|
|
1
|
+
{"version":3,"file":"vgf-recognition-state.d.ts","sourceRoot":"","sources":["../src/vgf-recognition-state.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAA;AAEvB;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,yBAAyB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAiCpC,CAAA;AAEF,MAAM,MAAM,gBAAgB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,yBAAyB,CAAC,CAAA;AAGxE,eAAO,MAAM,eAAe;;;;;CAKlB,CAAA;AAEV,MAAM,MAAM,mBAAmB,GAAG,OAAO,eAAe,CAAC,MAAM,OAAO,eAAe,CAAC,CAAA;AAEtF,eAAO,MAAM,mBAAmB;;;;;;CAMtB,CAAA;AAEV,MAAM,MAAM,uBAAuB,GAAG,OAAO,mBAAmB,CAAC,MAAM,OAAO,mBAAmB,CAAC,CAAA;AAElG,eAAO,MAAM,gCAAgC;;;;CAInC,CAAA;AAEV,MAAM,MAAM,oCAAoC,GAAG,OAAO,gCAAgC,CAAC,MAAM,OAAO,gCAAgC,CAAC,CAAA;AAGzI,wBAAgB,6BAA6B,CAAC,gBAAgB,EAAE,MAAM,GAAG,gBAAgB,CAQxF;AAGD,wBAAgB,gCAAgC,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE,EAAE,EAAE,MAAM,GAAG,OAAO,CAa9F"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@volley/recognition-client-sdk",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.670",
|
|
4
4
|
"description": "Recognition Service TypeScript/Node.js Client SDK",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -55,9 +55,9 @@
|
|
|
55
55
|
"ts-jest": "29.2.5",
|
|
56
56
|
"typescript": "5.1.6",
|
|
57
57
|
"@recog/shared-config": "1.0.0",
|
|
58
|
+
"@recog/shared-types": "1.0.0",
|
|
58
59
|
"@recog/shared-utils": "1.0.0",
|
|
59
|
-
"@recog/websocket": "1.0.0"
|
|
60
|
-
"@recog/shared-types": "1.0.0"
|
|
60
|
+
"@recog/websocket": "1.0.0"
|
|
61
61
|
},
|
|
62
62
|
"keywords": [
|
|
63
63
|
"recognition",
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { CartesiaModel, RecognitionProvider, SelfServeVllmModel } from './index.js';
|
|
2
|
+
|
|
3
|
+
describe('SDK top-level exports', () => {
|
|
4
|
+
it('should export Cartesia and self-serve-vllm enums for downstream imports', () => {
|
|
5
|
+
expect(RecognitionProvider.CARTESIA).toBe('cartesia');
|
|
6
|
+
expect(RecognitionProvider.SELF_SERVE_VLLM).toBe('self-serve-vllm');
|
|
7
|
+
expect(CartesiaModel.INK_WHISPER).toBe('ink-whisper');
|
|
8
|
+
expect(CartesiaModel.INK_WHISPER_20250604).toBe('ink-whisper-2025-06-04');
|
|
9
|
+
expect(SelfServeVllmModel.QWEN3_ASR_1_7B).toBe('qwen3-asr-1.7b');
|
|
10
|
+
});
|
|
11
|
+
});
|
package/src/index.ts
CHANGED
|
@@ -100,10 +100,14 @@ export {
|
|
|
100
100
|
DeepgramModel,
|
|
101
101
|
ElevenLabsModel,
|
|
102
102
|
FireworksModel,
|
|
103
|
+
GladiaModel,
|
|
103
104
|
GoogleModel,
|
|
104
105
|
GeminiModel,
|
|
105
106
|
OpenAIModel,
|
|
107
|
+
SelfServeVllmModel,
|
|
108
|
+
OpenAIRealtimeModel,
|
|
106
109
|
MistralVoxtralModel,
|
|
110
|
+
CartesiaModel,
|
|
107
111
|
DashScopeModel,
|
|
108
112
|
Language,
|
|
109
113
|
SampleRate,
|
|
@@ -369,7 +369,7 @@ export class RealTimeTwoWayWebSocketRecognitionClient
|
|
|
369
369
|
const timeout = setTimeout(() => {
|
|
370
370
|
if (settled) return;
|
|
371
371
|
settled = true;
|
|
372
|
-
this.log('warn',
|
|
372
|
+
this.log('warn', `Connection timeout url=${this.config.url}`, { timeout: connectionTimeout, attempt });
|
|
373
373
|
this.state = ClientState.FAILED;
|
|
374
374
|
reject(new Error(`Connection timeout after ${connectionTimeout}ms`));
|
|
375
375
|
}, connectionTimeout);
|
|
@@ -398,7 +398,7 @@ export class RealTimeTwoWayWebSocketRecognitionClient
|
|
|
398
398
|
settled = true;
|
|
399
399
|
clearTimeout(timeout);
|
|
400
400
|
|
|
401
|
-
this.log('warn',
|
|
401
|
+
this.log('warn', `Connection error url=${this.config.url}`, { error, attempt });
|
|
402
402
|
this.state = ClientState.FAILED;
|
|
403
403
|
|
|
404
404
|
// Don't call originalOnError - it expects ErrorResultV1, not WebSocket Event
|
|
@@ -424,7 +424,7 @@ export class RealTimeTwoWayWebSocketRecognitionClient
|
|
|
424
424
|
// Not the last attempt - wait before retry
|
|
425
425
|
// Use info for first 2 retries (attempts 2-3), warn for 3rd retry (attempt 4)
|
|
426
426
|
const logLevel = attempt < 3 ? 'info' : 'warn';
|
|
427
|
-
this.log(logLevel, `Connection attempt ${attempt} failed, retrying after ${delayMs}ms`, {
|
|
427
|
+
this.log(logLevel, `Connection attempt ${attempt} failed, retrying after ${delayMs}ms url=${this.config.url}`, {
|
|
428
428
|
error: lastError.message,
|
|
429
429
|
nextAttempt: attempt + 1
|
|
430
430
|
});
|
|
@@ -436,7 +436,7 @@ export class RealTimeTwoWayWebSocketRecognitionClient
|
|
|
436
436
|
await new Promise(resolve => setTimeout(resolve, delayMs));
|
|
437
437
|
} else {
|
|
438
438
|
// Last attempt failed - all retries exhausted
|
|
439
|
-
this.log('warn', `All ${maxAttempts} connection attempts failed`, {
|
|
439
|
+
this.log('warn', `All ${maxAttempts} connection attempts failed url=${this.config.url}`, {
|
|
440
440
|
error: lastError.message
|
|
441
441
|
});
|
|
442
442
|
}
|
|
@@ -468,7 +468,7 @@ export class RealTimeTwoWayWebSocketRecognitionClient
|
|
|
468
468
|
this.sendAudioInternal(arrayBuffer);
|
|
469
469
|
})
|
|
470
470
|
.catch((error) => {
|
|
471
|
-
this.log('
|
|
471
|
+
this.log('warn', 'Failed to convert Blob to ArrayBuffer', error);
|
|
472
472
|
});
|
|
473
473
|
return;
|
|
474
474
|
}
|
|
@@ -522,7 +522,7 @@ export class RealTimeTwoWayWebSocketRecognitionClient
|
|
|
522
522
|
|
|
523
523
|
async stopRecording(): Promise<void> {
|
|
524
524
|
if (this.state !== ClientState.READY) {
|
|
525
|
-
this.log('
|
|
525
|
+
this.log('info', 'stopRecording called but not in READY state', { state: this.state });
|
|
526
526
|
return;
|
|
527
527
|
}
|
|
528
528
|
|
|
@@ -756,7 +756,7 @@ export class RealTimeTwoWayWebSocketRecognitionClient
|
|
|
756
756
|
this.state === ClientState.READY ||
|
|
757
757
|
this.state === ClientState.CONNECTING
|
|
758
758
|
) {
|
|
759
|
-
this.log('
|
|
759
|
+
this.log('warn', '[DIAGNOSTIC] Unexpected disconnection', {
|
|
760
760
|
code,
|
|
761
761
|
codeDescription: closeCodeDescription,
|
|
762
762
|
reason: reason || '(empty)',
|
|
@@ -908,7 +908,7 @@ export class RealTimeTwoWayWebSocketRecognitionClient
|
|
|
908
908
|
this.sendPrefixAudioInternal(arrayBuffer);
|
|
909
909
|
})
|
|
910
910
|
.catch((error) => {
|
|
911
|
-
this.log('
|
|
911
|
+
this.log('warn', 'Failed to convert Blob to ArrayBuffer for prefix audio', error);
|
|
912
912
|
});
|
|
913
913
|
return;
|
|
914
914
|
}
|
|
@@ -55,7 +55,7 @@ export class MessageHandler {
|
|
|
55
55
|
// Log error if we receive primitive data (indicates server issue)
|
|
56
56
|
if (msg.data && typeof msg.data !== 'object') {
|
|
57
57
|
if (this.callbacks.logger) {
|
|
58
|
-
this.callbacks.logger('
|
|
58
|
+
this.callbacks.logger('warn', '[RecogSDK] Received primitive msg.data from server', {
|
|
59
59
|
dataType: typeof msg.data,
|
|
60
60
|
data: msg.data,
|
|
61
61
|
fullMessage: msg
|
|
@@ -86,6 +86,14 @@ export function mapTranscriptionResultToState(
|
|
|
86
86
|
newState.finalConfidence = result.finalTranscriptConfidence;
|
|
87
87
|
}
|
|
88
88
|
}
|
|
89
|
+
|
|
90
|
+
// Update voice timing on every transcript message
|
|
91
|
+
if (result.voiceEnd !== undefined) {
|
|
92
|
+
newState.voiceEnd = result.voiceEnd;
|
|
93
|
+
}
|
|
94
|
+
if (result.lastNonSilence !== undefined) {
|
|
95
|
+
newState.lastNonSilence = result.lastNonSilence;
|
|
96
|
+
}
|
|
89
97
|
} else {
|
|
90
98
|
// Transcription is finished
|
|
91
99
|
newState.transcriptionStatus = TranscriptionStatus.FINALIZED;
|
|
@@ -95,6 +103,14 @@ export function mapTranscriptionResultToState(
|
|
|
95
103
|
}
|
|
96
104
|
newState.finalTranscriptionTimestamp = new Date().toISOString();
|
|
97
105
|
|
|
106
|
+
// Update voice timing on final transcript
|
|
107
|
+
if (result.voiceEnd !== undefined) {
|
|
108
|
+
newState.voiceEnd = result.voiceEnd;
|
|
109
|
+
}
|
|
110
|
+
if (result.lastNonSilence !== undefined) {
|
|
111
|
+
newState.lastNonSilence = result.lastNonSilence;
|
|
112
|
+
}
|
|
113
|
+
|
|
98
114
|
// Clear pending when we have final
|
|
99
115
|
newState.pendingTranscript = "";
|
|
100
116
|
newState.pendingConfidence = undefined;
|
|
@@ -167,7 +183,9 @@ export function resetRecognitionVGFState(currentState: RecognitionState): Recogn
|
|
|
167
183
|
transcriptionStatus: TranscriptionStatus.NOT_STARTED,
|
|
168
184
|
startRecordingStatus: RecordingStatus.READY,
|
|
169
185
|
recognitionActionProcessingState: RecognitionActionProcessingState.NOT_STARTED,
|
|
170
|
-
finalTranscript: undefined
|
|
186
|
+
finalTranscript: undefined,
|
|
187
|
+
voiceEnd: undefined,
|
|
188
|
+
lastNonSilence: undefined
|
|
171
189
|
};
|
|
172
190
|
}
|
|
173
191
|
|
|
@@ -22,6 +22,10 @@ export const RecognitionVGFStateSchema = z.object({
|
|
|
22
22
|
finalTranscript: z.string().optional(), // Full finalized transcript for the utterance. Will not change.
|
|
23
23
|
finalConfidence: z.number().optional(),
|
|
24
24
|
|
|
25
|
+
// Voice timing (ms from stream start, prefix-adjusted)
|
|
26
|
+
voiceEnd: z.number().optional(), // voice end time identified by ASR
|
|
27
|
+
lastNonSilence: z.number().optional(), // last non-silence sample time from PCM analysis
|
|
28
|
+
|
|
25
29
|
// Tracking-only metadata
|
|
26
30
|
asrConfig: z.string().optional(), // Json format of the ASR config
|
|
27
31
|
startRecordingTimestamp: z.string().optional(), // Start of recording. Immutable after set.
|