@volley/recognition-client-sdk 0.1.621 → 0.1.670

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"vgf-recognition-mapper.d.ts","sourceRoot":"","sources":["../src/vgf-recognition-mapper.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EACH,gBAAgB,EAKnB,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EACH,WAAW,EACX,wBAAwB,EAC3B,MAAM,+BAA+B,CAAC;AACvC,OAAO,EACH,qBAAqB,EACrB,aAAa,EAChB,MAAM,qBAAqB,CAAC;AAE7B;;GAEG;AACH,wBAAgB,+BAA+B,CAAC,WAAW,EAAE,WAAW,GAAG,MAAM,CAmBhF;AAED;;GAEG;AACH,wBAAgB,6BAA6B,CACzC,YAAY,EAAE,gBAAgB,EAC9B,MAAM,EAAE,qBAAqB,EAC7B,WAAW,EAAE,OAAO,GACrB,gBAAgB,CAgDlB;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC3B,YAAY,EAAE,gBAAgB,EAC9B,KAAK,EAAE,aAAa,GACrB,gBAAgB,CAOlB;AAED;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,wBAAwB,GAAG,gBAAgB,CAU3F;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,YAAY,EAAE,gBAAgB,GAAG,gBAAgB,CAMlF;AAED;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,wBAAwB,CAAC,YAAY,EAAE,gBAAgB,GAAG,gBAAgB,CASzF;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,YAAY,EAAE,gBAAgB,GAAG,gBAAgB,CAKnF;AAED;;;GAGG;AACH,wBAAgB,iCAAiC,CAC7C,UAAU,EAAE,MAAM,EAClB,WAAW,CAAC,EAAE,GAAG,GAClB;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAAC,UAAU,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAiBnD;AAED;;GAEG;AACH,wBAAgB,2BAA2B,CACvC,YAAY,EAAE,gBAAgB,EAC9B,YAAY,EAAE;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAAC,UAAU,CAAC,EAAE,MAAM,CAAA;CAAE,GACzD,gBAAgB,CAOlB"}
1
+ {"version":3,"file":"vgf-recognition-mapper.d.ts","sourceRoot":"","sources":["../src/vgf-recognition-mapper.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EACH,gBAAgB,EAKnB,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EACH,WAAW,EACX,wBAAwB,EAC3B,MAAM,+BAA+B,CAAC;AACvC,OAAO,EACH,qBAAqB,EACrB,aAAa,EAChB,MAAM,qBAAqB,CAAC;AAE7B;;GAEG;AACH,wBAAgB,+BAA+B,CAAC,WAAW,EAAE,WAAW,GAAG,MAAM,CAmBhF;AAED;;GAEG;AACH,wBAAgB,6BAA6B,CACzC,YAAY,EAAE,gBAAgB,EAC9B,MAAM,EAAE,qBAAqB,EAC7B,WAAW,EAAE,OAAO,GACrB,gBAAgB,CAgElB;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC3B,YAAY,EAAE,gBAAgB,EAC9B,KAAK,EAAE,aAAa,GACrB,gBAAgB,CAOlB;AAED;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,wBAAwB,GAAG,gBAAgB,CAU3F;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,YAAY,EAAE,gBAAgB,GAAG,gBAAgB,CAMlF;AAED;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,wBAAwB,CAAC,YAAY,EAAE,gBAAgB,GAAG,gBAAgB,CAWzF;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,YAAY,EAAE,gBAAgB,GAAG,gBAAgB,CAKnF;AAED;;;GAGG;AACH,wBAAgB,iCAAiC,CAC7C,UAAU,EAAE,MAAM,EAClB,WAAW,CAAC,EAAE,GAAG,GAClB;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAAC,UAAU,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAiBnD;AAED;;GAEG;AACH,wBAAgB,2BAA2B,CACvC,YAAY,EAAE,gBAAgB,EAC9B,YAAY,EAAE;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAAC,UAAU,CAAC,EAAE,MAAM,CAAA;CAAE,GACzD,gBAAgB,CAOlB"}
@@ -18,6 +18,8 @@ export declare const RecognitionVGFStateSchema: z.ZodObject<{
18
18
  transcriptionStatus: z.ZodOptional<z.ZodString>;
19
19
  finalTranscript: z.ZodOptional<z.ZodString>;
20
20
  finalConfidence: z.ZodOptional<z.ZodNumber>;
21
+ voiceEnd: z.ZodOptional<z.ZodNumber>;
22
+ lastNonSilence: z.ZodOptional<z.ZodNumber>;
21
23
  asrConfig: z.ZodOptional<z.ZodString>;
22
24
  startRecordingTimestamp: z.ZodOptional<z.ZodString>;
23
25
  finalRecordingTimestamp: z.ZodOptional<z.ZodString>;
@@ -36,6 +38,8 @@ export declare const RecognitionVGFStateSchema: z.ZodObject<{
36
38
  transcriptionStatus?: string | undefined;
37
39
  finalTranscript?: string | undefined;
38
40
  finalConfidence?: number | undefined;
41
+ voiceEnd?: number | undefined;
42
+ lastNonSilence?: number | undefined;
39
43
  asrConfig?: string | undefined;
40
44
  startRecordingTimestamp?: string | undefined;
41
45
  finalRecordingTimestamp?: string | undefined;
@@ -52,6 +56,8 @@ export declare const RecognitionVGFStateSchema: z.ZodObject<{
52
56
  transcriptionStatus?: string | undefined;
53
57
  finalTranscript?: string | undefined;
54
58
  finalConfidence?: number | undefined;
59
+ voiceEnd?: number | undefined;
60
+ lastNonSilence?: number | undefined;
55
61
  asrConfig?: string | undefined;
56
62
  startRecordingTimestamp?: string | undefined;
57
63
  finalRecordingTimestamp?: string | undefined;
@@ -1 +1 @@
1
- {"version":3,"file":"vgf-recognition-state.d.ts","sourceRoot":"","sources":["../src/vgf-recognition-state.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAA;AAEvB;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,yBAAyB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EA6BpC,CAAA;AAEF,MAAM,MAAM,gBAAgB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,yBAAyB,CAAC,CAAA;AAGxE,eAAO,MAAM,eAAe;;;;;CAKlB,CAAA;AAEV,MAAM,MAAM,mBAAmB,GAAG,OAAO,eAAe,CAAC,MAAM,OAAO,eAAe,CAAC,CAAA;AAEtF,eAAO,MAAM,mBAAmB;;;;;;CAMtB,CAAA;AAEV,MAAM,MAAM,uBAAuB,GAAG,OAAO,mBAAmB,CAAC,MAAM,OAAO,mBAAmB,CAAC,CAAA;AAElG,eAAO,MAAM,gCAAgC;;;;CAInC,CAAA;AAEV,MAAM,MAAM,oCAAoC,GAAG,OAAO,gCAAgC,CAAC,MAAM,OAAO,gCAAgC,CAAC,CAAA;AAGzI,wBAAgB,6BAA6B,CAAC,gBAAgB,EAAE,MAAM,GAAG,gBAAgB,CAQxF;AAGD,wBAAgB,gCAAgC,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE,EAAE,EAAE,MAAM,GAAG,OAAO,CAa9F"}
1
+ {"version":3,"file":"vgf-recognition-state.d.ts","sourceRoot":"","sources":["../src/vgf-recognition-state.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAA;AAEvB;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,yBAAyB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAiCpC,CAAA;AAEF,MAAM,MAAM,gBAAgB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,yBAAyB,CAAC,CAAA;AAGxE,eAAO,MAAM,eAAe;;;;;CAKlB,CAAA;AAEV,MAAM,MAAM,mBAAmB,GAAG,OAAO,eAAe,CAAC,MAAM,OAAO,eAAe,CAAC,CAAA;AAEtF,eAAO,MAAM,mBAAmB;;;;;;CAMtB,CAAA;AAEV,MAAM,MAAM,uBAAuB,GAAG,OAAO,mBAAmB,CAAC,MAAM,OAAO,mBAAmB,CAAC,CAAA;AAElG,eAAO,MAAM,gCAAgC;;;;CAInC,CAAA;AAEV,MAAM,MAAM,oCAAoC,GAAG,OAAO,gCAAgC,CAAC,MAAM,OAAO,gCAAgC,CAAC,CAAA;AAGzI,wBAAgB,6BAA6B,CAAC,gBAAgB,EAAE,MAAM,GAAG,gBAAgB,CAQxF;AAGD,wBAAgB,gCAAgC,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE,EAAE,EAAE,MAAM,GAAG,OAAO,CAa9F"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@volley/recognition-client-sdk",
3
- "version": "0.1.621",
3
+ "version": "0.1.670",
4
4
  "description": "Recognition Service TypeScript/Node.js Client SDK",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -55,9 +55,9 @@
55
55
  "ts-jest": "29.2.5",
56
56
  "typescript": "5.1.6",
57
57
  "@recog/shared-config": "1.0.0",
58
+ "@recog/shared-types": "1.0.0",
58
59
  "@recog/shared-utils": "1.0.0",
59
- "@recog/websocket": "1.0.0",
60
- "@recog/shared-types": "1.0.0"
60
+ "@recog/websocket": "1.0.0"
61
61
  },
62
62
  "keywords": [
63
63
  "recognition",
@@ -0,0 +1,11 @@
1
+ import { CartesiaModel, RecognitionProvider, SelfServeVllmModel } from './index.js';
2
+
3
+ describe('SDK top-level exports', () => {
4
+ it('should export Cartesia and self-serve-vllm enums for downstream imports', () => {
5
+ expect(RecognitionProvider.CARTESIA).toBe('cartesia');
6
+ expect(RecognitionProvider.SELF_SERVE_VLLM).toBe('self-serve-vllm');
7
+ expect(CartesiaModel.INK_WHISPER).toBe('ink-whisper');
8
+ expect(CartesiaModel.INK_WHISPER_20250604).toBe('ink-whisper-2025-06-04');
9
+ expect(SelfServeVllmModel.QWEN3_ASR_1_7B).toBe('qwen3-asr-1.7b');
10
+ });
11
+ });
package/src/index.ts CHANGED
@@ -100,10 +100,14 @@ export {
100
100
  DeepgramModel,
101
101
  ElevenLabsModel,
102
102
  FireworksModel,
103
+ GladiaModel,
103
104
  GoogleModel,
104
105
  GeminiModel,
105
106
  OpenAIModel,
107
+ SelfServeVllmModel,
108
+ OpenAIRealtimeModel,
106
109
  MistralVoxtralModel,
110
+ CartesiaModel,
107
111
  DashScopeModel,
108
112
  Language,
109
113
  SampleRate,
@@ -369,7 +369,7 @@ export class RealTimeTwoWayWebSocketRecognitionClient
369
369
  const timeout = setTimeout(() => {
370
370
  if (settled) return;
371
371
  settled = true;
372
- this.log('warn', 'Connection timeout', { timeout: connectionTimeout, attempt });
372
+ this.log('warn', `Connection timeout url=${this.config.url}`, { timeout: connectionTimeout, attempt });
373
373
  this.state = ClientState.FAILED;
374
374
  reject(new Error(`Connection timeout after ${connectionTimeout}ms`));
375
375
  }, connectionTimeout);
@@ -398,7 +398,7 @@ export class RealTimeTwoWayWebSocketRecognitionClient
398
398
  settled = true;
399
399
  clearTimeout(timeout);
400
400
 
401
- this.log('warn', 'Connection error', { error, attempt });
401
+ this.log('warn', `Connection error url=${this.config.url}`, { error, attempt });
402
402
  this.state = ClientState.FAILED;
403
403
 
404
404
  // Don't call originalOnError - it expects ErrorResultV1, not WebSocket Event
@@ -424,7 +424,7 @@ export class RealTimeTwoWayWebSocketRecognitionClient
424
424
  // Not the last attempt - wait before retry
425
425
  // Use info for first 2 retries (attempts 2-3), warn for 3rd retry (attempt 4)
426
426
  const logLevel = attempt < 3 ? 'info' : 'warn';
427
- this.log(logLevel, `Connection attempt ${attempt} failed, retrying after ${delayMs}ms`, {
427
+ this.log(logLevel, `Connection attempt ${attempt} failed, retrying after ${delayMs}ms url=${this.config.url}`, {
428
428
  error: lastError.message,
429
429
  nextAttempt: attempt + 1
430
430
  });
@@ -436,7 +436,7 @@ export class RealTimeTwoWayWebSocketRecognitionClient
436
436
  await new Promise(resolve => setTimeout(resolve, delayMs));
437
437
  } else {
438
438
  // Last attempt failed - all retries exhausted
439
- this.log('warn', `All ${maxAttempts} connection attempts failed`, {
439
+ this.log('warn', `All ${maxAttempts} connection attempts failed url=${this.config.url}`, {
440
440
  error: lastError.message
441
441
  });
442
442
  }
@@ -468,7 +468,7 @@ export class RealTimeTwoWayWebSocketRecognitionClient
468
468
  this.sendAudioInternal(arrayBuffer);
469
469
  })
470
470
  .catch((error) => {
471
- this.log('error', 'Failed to convert Blob to ArrayBuffer', error);
471
+ this.log('warn', 'Failed to convert Blob to ArrayBuffer', error);
472
472
  });
473
473
  return;
474
474
  }
@@ -522,7 +522,7 @@ export class RealTimeTwoWayWebSocketRecognitionClient
522
522
 
523
523
  async stopRecording(): Promise<void> {
524
524
  if (this.state !== ClientState.READY) {
525
- this.log('warn', 'stopRecording called but not in READY state', { state: this.state });
525
+ this.log('info', 'stopRecording called but not in READY state', { state: this.state });
526
526
  return;
527
527
  }
528
528
 
@@ -756,7 +756,7 @@ export class RealTimeTwoWayWebSocketRecognitionClient
756
756
  this.state === ClientState.READY ||
757
757
  this.state === ClientState.CONNECTING
758
758
  ) {
759
- this.log('error', '[DIAGNOSTIC] Unexpected disconnection', {
759
+ this.log('warn', '[DIAGNOSTIC] Unexpected disconnection', {
760
760
  code,
761
761
  codeDescription: closeCodeDescription,
762
762
  reason: reason || '(empty)',
@@ -908,7 +908,7 @@ export class RealTimeTwoWayWebSocketRecognitionClient
908
908
  this.sendPrefixAudioInternal(arrayBuffer);
909
909
  })
910
910
  .catch((error) => {
911
- this.log('error', 'Failed to convert Blob to ArrayBuffer for prefix audio', error);
911
+ this.log('warn', 'Failed to convert Blob to ArrayBuffer for prefix audio', error);
912
912
  });
913
913
  return;
914
914
  }
@@ -55,7 +55,7 @@ export class MessageHandler {
55
55
  // Log error if we receive primitive data (indicates server issue)
56
56
  if (msg.data && typeof msg.data !== 'object') {
57
57
  if (this.callbacks.logger) {
58
- this.callbacks.logger('error', '[RecogSDK] Received primitive msg.data from server', {
58
+ this.callbacks.logger('warn', '[RecogSDK] Received primitive msg.data from server', {
59
59
  dataType: typeof msg.data,
60
60
  data: msg.data,
61
61
  fullMessage: msg
@@ -86,6 +86,14 @@ export function mapTranscriptionResultToState(
86
86
  newState.finalConfidence = result.finalTranscriptConfidence;
87
87
  }
88
88
  }
89
+
90
+ // Update voice timing on every transcript message
91
+ if (result.voiceEnd !== undefined) {
92
+ newState.voiceEnd = result.voiceEnd;
93
+ }
94
+ if (result.lastNonSilence !== undefined) {
95
+ newState.lastNonSilence = result.lastNonSilence;
96
+ }
89
97
  } else {
90
98
  // Transcription is finished
91
99
  newState.transcriptionStatus = TranscriptionStatus.FINALIZED;
@@ -95,6 +103,14 @@ export function mapTranscriptionResultToState(
95
103
  }
96
104
  newState.finalTranscriptionTimestamp = new Date().toISOString();
97
105
 
106
+ // Update voice timing on final transcript
107
+ if (result.voiceEnd !== undefined) {
108
+ newState.voiceEnd = result.voiceEnd;
109
+ }
110
+ if (result.lastNonSilence !== undefined) {
111
+ newState.lastNonSilence = result.lastNonSilence;
112
+ }
113
+
98
114
  // Clear pending when we have final
99
115
  newState.pendingTranscript = "";
100
116
  newState.pendingConfidence = undefined;
@@ -167,7 +183,9 @@ export function resetRecognitionVGFState(currentState: RecognitionState): Recogn
167
183
  transcriptionStatus: TranscriptionStatus.NOT_STARTED,
168
184
  startRecordingStatus: RecordingStatus.READY,
169
185
  recognitionActionProcessingState: RecognitionActionProcessingState.NOT_STARTED,
170
- finalTranscript: undefined
186
+ finalTranscript: undefined,
187
+ voiceEnd: undefined,
188
+ lastNonSilence: undefined
171
189
  };
172
190
  }
173
191
 
@@ -22,6 +22,10 @@ export const RecognitionVGFStateSchema = z.object({
22
22
  finalTranscript: z.string().optional(), // Full finalized transcript for the utterance. Will not change.
23
23
  finalConfidence: z.number().optional(),
24
24
 
25
+ // Voice timing (ms from stream start, prefix-adjusted)
26
+ voiceEnd: z.number().optional(), // voice end time identified by ASR
27
+ lastNonSilence: z.number().optional(), // last non-silence sample time from PCM analysis
28
+
25
29
  // Tracking-only metadata
26
30
  asrConfig: z.string().optional(), // Json format of the ASR config
27
31
  startRecordingTimestamp: z.string().optional(), // Start of recording. Immutable after set.