@volley/recognition-client-sdk 0.1.255 → 0.1.294
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.d.ts +10 -0
- package/dist/browser.d.ts.map +1 -0
- package/dist/config-builder.d.ts +129 -0
- package/dist/config-builder.d.ts.map +1 -0
- package/dist/errors.d.ts +41 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/factory.d.ts +36 -0
- package/dist/factory.d.ts.map +1 -0
- package/dist/index.d.ts +15 -1079
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +4293 -645
- package/dist/index.js.map +7 -1
- package/dist/recog-client-sdk.browser.d.ts +10 -2
- package/dist/recog-client-sdk.browser.d.ts.map +1 -0
- package/dist/recog-client-sdk.browser.js +4127 -525
- package/dist/recog-client-sdk.browser.js.map +7 -1
- package/dist/recognition-client.d.ts +120 -0
- package/dist/recognition-client.d.ts.map +1 -0
- package/dist/recognition-client.types.d.ts +265 -0
- package/dist/recognition-client.types.d.ts.map +1 -0
- package/dist/simplified-vgf-recognition-client.d.ts +174 -0
- package/dist/simplified-vgf-recognition-client.d.ts.map +1 -0
- package/dist/utils/audio-ring-buffer.d.ts +69 -0
- package/dist/utils/audio-ring-buffer.d.ts.map +1 -0
- package/dist/utils/message-handler.d.ts +45 -0
- package/dist/utils/message-handler.d.ts.map +1 -0
- package/dist/utils/url-builder.d.ts +26 -0
- package/dist/utils/url-builder.d.ts.map +1 -0
- package/dist/vgf-recognition-mapper.d.ts +53 -0
- package/dist/vgf-recognition-mapper.d.ts.map +1 -0
- package/dist/vgf-recognition-state.d.ts +82 -0
- package/dist/vgf-recognition-state.d.ts.map +1 -0
- package/package.json +7 -8
- package/src/index.ts +4 -0
- package/src/recognition-client.spec.ts +147 -14
- package/src/recognition-client.ts +27 -0
- package/src/recognition-client.types.ts +19 -0
- package/src/simplified-vgf-recognition-client.spec.ts +246 -0
- package/src/simplified-vgf-recognition-client.ts +58 -1
- package/src/utils/url-builder.spec.ts +5 -3
- package/src/vgf-recognition-state.ts +2 -1
- package/dist/browser-BZs4BL_w.d.ts +0 -1118
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Message Handler for Recognition Client
|
|
3
|
+
* Routes incoming WebSocket messages to appropriate callbacks
|
|
4
|
+
*/
|
|
5
|
+
import { type TranscriptionResultV1, type FunctionCallResultV1, type MetadataResultV1, type ErrorResultV1, type ClientControlMessageV1 } from '@recog/shared-types';
|
|
6
|
+
export interface MessageHandlerCallbacks {
|
|
7
|
+
onTranscript: (result: TranscriptionResultV1) => void;
|
|
8
|
+
onFunctionCall: (result: FunctionCallResultV1) => void;
|
|
9
|
+
onMetadata: (metadata: MetadataResultV1) => void;
|
|
10
|
+
onError: (error: ErrorResultV1) => void;
|
|
11
|
+
onControlMessage: (msg: ClientControlMessageV1) => void;
|
|
12
|
+
logger?: (level: 'debug' | 'info' | 'warn' | 'error', message: string, data?: any) => void;
|
|
13
|
+
}
|
|
14
|
+
export declare class MessageHandler {
|
|
15
|
+
private firstTranscriptTime;
|
|
16
|
+
private sessionStartTime;
|
|
17
|
+
private callbacks;
|
|
18
|
+
constructor(callbacks: MessageHandlerCallbacks);
|
|
19
|
+
/**
|
|
20
|
+
* Set session start time for performance tracking
|
|
21
|
+
*/
|
|
22
|
+
setSessionStartTime(time: number): void;
|
|
23
|
+
/**
|
|
24
|
+
* Handle incoming WebSocket message
|
|
25
|
+
*/
|
|
26
|
+
handleMessage(msg: {
|
|
27
|
+
v: number;
|
|
28
|
+
type: string;
|
|
29
|
+
data: any;
|
|
30
|
+
}): void;
|
|
31
|
+
/**
|
|
32
|
+
* Handle transcript message and track performance metrics
|
|
33
|
+
* @param result - The transcription result from the server
|
|
34
|
+
*/
|
|
35
|
+
private handleTranscription;
|
|
36
|
+
/**
|
|
37
|
+
* Get performance metrics
|
|
38
|
+
*/
|
|
39
|
+
getMetrics(): {
|
|
40
|
+
sessionStartTime: number | null;
|
|
41
|
+
firstTranscriptTime: number | null;
|
|
42
|
+
timeToFirstTranscript: number | null;
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
//# sourceMappingURL=message-handler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"message-handler.d.ts","sourceRoot":"","sources":["../../src/utils/message-handler.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAGL,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,aAAa,EAClB,KAAK,sBAAsB,EAC5B,MAAM,qBAAqB,CAAC;AAE7B,MAAM,WAAW,uBAAuB;IACtC,YAAY,EAAE,CAAC,MAAM,EAAE,qBAAqB,KAAK,IAAI,CAAC;IACtD,cAAc,EAAE,CAAC,MAAM,EAAE,oBAAoB,KAAK,IAAI,CAAC;IACvD,UAAU,EAAE,CAAC,QAAQ,EAAE,gBAAgB,KAAK,IAAI,CAAC;IACjD,OAAO,EAAE,CAAC,KAAK,EAAE,aAAa,KAAK,IAAI,CAAC;IACxC,gBAAgB,EAAE,CAAC,GAAG,EAAE,sBAAsB,KAAK,IAAI,CAAC;IACxD,MAAM,CAAC,EAAE,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,GAAG,KAAK,IAAI,CAAC;CAC5F;AAED,qBAAa,cAAc;IACzB,OAAO,CAAC,mBAAmB,CAAuB;IAClD,OAAO,CAAC,gBAAgB,CAAuB;IAC/C,OAAO,CAAC,SAAS,CAA0B;gBAE/B,SAAS,EAAE,uBAAuB;IAI9C;;OAEG;IACH,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAIvC;;OAEG;IACH,aAAa,CAAC,GAAG,EAAE;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,GAAG,CAAA;KAAE,GAAG,IAAI;IAsDhE;;;OAGG;IACH,OAAO,CAAC,mBAAmB;IAgB3B;;OAEG;IACH,UAAU;;;;;CAUX"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* URL Builder for Recognition Client
|
|
3
|
+
* Handles WebSocket URL construction with query parameters
|
|
4
|
+
*/
|
|
5
|
+
import type { GameContextV1, Stage } from '@recog/shared-types';
|
|
6
|
+
import type { RecognitionCallbackUrl } from '../recognition-client.types.js';
|
|
7
|
+
export interface UrlBuilderConfig {
|
|
8
|
+
url?: string;
|
|
9
|
+
stage?: Stage | string;
|
|
10
|
+
audioUtteranceId: string;
|
|
11
|
+
callbackUrls?: RecognitionCallbackUrl[];
|
|
12
|
+
userId?: string;
|
|
13
|
+
gameSessionId?: string;
|
|
14
|
+
deviceId?: string;
|
|
15
|
+
accountId?: string;
|
|
16
|
+
questionAnswerId?: string;
|
|
17
|
+
platform?: string;
|
|
18
|
+
gameContext?: GameContextV1;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Build WebSocket URL with all query parameters
|
|
22
|
+
* Either `url` or `stage` must be provided (or defaults to production if neither provided)
|
|
23
|
+
* If both are provided, `url` takes precedence over `stage`
|
|
24
|
+
*/
|
|
25
|
+
export declare function buildWebSocketUrl(config: UrlBuilderConfig): string;
|
|
26
|
+
//# sourceMappingURL=url-builder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"url-builder.d.ts","sourceRoot":"","sources":["../../src/utils/url-builder.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,qBAAqB,CAAC;AAChE,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,gCAAgC,CAAC;AAE7E,MAAM,WAAW,gBAAgB;IAC/B,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC;IACvB,gBAAgB,EAAE,MAAM,CAAC;IACzB,YAAY,CAAC,EAAE,sBAAsB,EAAE,CAAC;IACxC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,aAAa,CAAC;CAC7B;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,gBAAgB,GAAG,MAAM,CAwDlE"}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VGF Recognition Mapper
|
|
3
|
+
*
|
|
4
|
+
* Maps between the existing recognition client types and the simplified VGF state.
|
|
5
|
+
* This provides a clean abstraction layer for game developers.
|
|
6
|
+
*/
|
|
7
|
+
import { RecognitionState } from './vgf-recognition-state.js';
|
|
8
|
+
import { ClientState, IRecognitionClientConfig } from './recognition-client.types.js';
|
|
9
|
+
import { TranscriptionResultV1, MetadataResultV1, ErrorResultV1 } from '@recog/shared-types';
|
|
10
|
+
/**
|
|
11
|
+
* Maps ClientState to RecordingStatus for VGF state
|
|
12
|
+
*/
|
|
13
|
+
export declare function mapClientStateToRecordingStatus(clientState: ClientState): string;
|
|
14
|
+
/**
|
|
15
|
+
* Creates a VGF state from transcription result
|
|
16
|
+
*/
|
|
17
|
+
export declare function mapTranscriptionResultToState(currentState: RecognitionState, result: TranscriptionResultV1, isRecording: boolean): RecognitionState;
|
|
18
|
+
/**
|
|
19
|
+
* Maps metadata result to update state timestamps
|
|
20
|
+
*/
|
|
21
|
+
export declare function mapMetadataToState(currentState: RecognitionState, metadata: MetadataResultV1): RecognitionState;
|
|
22
|
+
/**
|
|
23
|
+
* Maps error to state
|
|
24
|
+
*/
|
|
25
|
+
export declare function mapErrorToState(currentState: RecognitionState, error: ErrorResultV1): RecognitionState;
|
|
26
|
+
/**
|
|
27
|
+
* Creates initial VGF state from client config
|
|
28
|
+
*/
|
|
29
|
+
export declare function createVGFStateFromConfig(config: IRecognitionClientConfig): RecognitionState;
|
|
30
|
+
/**
|
|
31
|
+
* Updates state when recording stops
|
|
32
|
+
*/
|
|
33
|
+
export declare function updateStateOnStop(currentState: RecognitionState): RecognitionState;
|
|
34
|
+
/**
|
|
35
|
+
* Updates state when client becomes ready
|
|
36
|
+
*/
|
|
37
|
+
export declare function updateStateOnReady(currentState: RecognitionState): RecognitionState;
|
|
38
|
+
/**
|
|
39
|
+
* Parses function call from transcript (STEP 3 support)
|
|
40
|
+
* This is a placeholder - actual implementation would use NLP/LLM
|
|
41
|
+
*/
|
|
42
|
+
export declare function extractFunctionCallFromTranscript(transcript: string, gameContext?: any): {
|
|
43
|
+
metadata?: string;
|
|
44
|
+
confidence?: number;
|
|
45
|
+
} | null;
|
|
46
|
+
/**
|
|
47
|
+
* Updates state with function call results (STEP 3)
|
|
48
|
+
*/
|
|
49
|
+
export declare function updateStateWithFunctionCall(currentState: RecognitionState, functionCall: {
|
|
50
|
+
metadata?: string;
|
|
51
|
+
confidence?: number;
|
|
52
|
+
}): RecognitionState;
|
|
53
|
+
//# sourceMappingURL=vgf-recognition-mapper.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vgf-recognition-mapper.d.ts","sourceRoot":"","sources":["../src/vgf-recognition-mapper.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EACH,gBAAgB,EAInB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EACH,WAAW,EACX,wBAAwB,EAC3B,MAAM,+BAA+B,CAAC;AACvC,OAAO,EACH,qBAAqB,EACrB,gBAAgB,EAChB,aAAa,EAEhB,MAAM,qBAAqB,CAAC;AAE7B;;GAEG;AACH,wBAAgB,+BAA+B,CAAC,WAAW,EAAE,WAAW,GAAG,MAAM,CAmBhF;AAED;;GAEG;AACH,wBAAgB,6BAA6B,CACzC,YAAY,EAAE,gBAAgB,EAC9B,MAAM,EAAE,qBAAqB,EAC7B,WAAW,EAAE,OAAO,GACrB,gBAAgB,CAgDlB;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAC9B,YAAY,EAAE,gBAAgB,EAC9B,QAAQ,EAAE,gBAAgB,GAC3B,gBAAgB,CAYlB;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC3B,YAAY,EAAE,gBAAgB,EAC9B,KAAK,EAAE,aAAa,GACrB,gBAAgB,CAOlB;AAED;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,wBAAwB,GAAG,gBAAgB,CAU3F;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,YAAY,EAAE,gBAAgB,GAAG,gBAAgB,CAMlF;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,YAAY,EAAE,gBAAgB,GAAG,gBAAgB,CAKnF;AAED;;;GAGG;AACH,wBAAgB,iCAAiC,CAC7C,UAAU,EAAE,MAAM,EAClB,WAAW,CAAC,EAAE,GAAG,GAClB;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAAC,UAAU,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAiBnD;AAED;;GAEG;AACH,wBAAgB,2BAA2B,CACvC,YAAY,EAAE,gBAAgB,EAC9B,YAAY,EAAE;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAAC,UAAU,CAAC,EAAE,MAAM,CAAA;CAAE,GACzD,gBAAgB,CAOlB"}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
/**
|
|
3
|
+
* VGF-style state schema for game-side recognition state/results management.
|
|
4
|
+
*
|
|
5
|
+
* This schema provides a standardized way for game developers to manage
|
|
6
|
+
* voice recognition state and results in their applications. It supports:
|
|
7
|
+
*
|
|
8
|
+
* STEP 1: Basic transcription flow
|
|
9
|
+
* STEP 2: Mic auto-stop upon correct answer (using partial transcripts)
|
|
10
|
+
* STEP 3: Semantic/function-call outcomes for game actions
|
|
11
|
+
*
|
|
12
|
+
* Ideally this should be part of a more centralized shared type library to free
|
|
13
|
+
* game developers and provide helper functions (VGF? Platform SDK?).
|
|
14
|
+
*/
|
|
15
|
+
export declare const RecognitionVGFStateSchema: z.ZodObject<{
|
|
16
|
+
audioUtteranceId: z.ZodString;
|
|
17
|
+
startRecordingStatus: z.ZodOptional<z.ZodString>;
|
|
18
|
+
transcriptionStatus: z.ZodOptional<z.ZodString>;
|
|
19
|
+
finalTranscript: z.ZodOptional<z.ZodString>;
|
|
20
|
+
finalConfidence: z.ZodOptional<z.ZodNumber>;
|
|
21
|
+
asrConfig: z.ZodOptional<z.ZodString>;
|
|
22
|
+
startRecordingTimestamp: z.ZodOptional<z.ZodString>;
|
|
23
|
+
finalRecordingTimestamp: z.ZodOptional<z.ZodString>;
|
|
24
|
+
finalTranscriptionTimestamp: z.ZodOptional<z.ZodString>;
|
|
25
|
+
pendingTranscript: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
26
|
+
pendingConfidence: z.ZodOptional<z.ZodNumber>;
|
|
27
|
+
functionCallMetadata: z.ZodOptional<z.ZodString>;
|
|
28
|
+
functionCallConfidence: z.ZodOptional<z.ZodNumber>;
|
|
29
|
+
finalFunctionCallTimestamp: z.ZodOptional<z.ZodString>;
|
|
30
|
+
promptSlotMap: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodString, "many">>>;
|
|
31
|
+
}, "strip", z.ZodTypeAny, {
|
|
32
|
+
audioUtteranceId: string;
|
|
33
|
+
pendingTranscript: string;
|
|
34
|
+
startRecordingStatus?: string | undefined;
|
|
35
|
+
transcriptionStatus?: string | undefined;
|
|
36
|
+
finalTranscript?: string | undefined;
|
|
37
|
+
finalConfidence?: number | undefined;
|
|
38
|
+
asrConfig?: string | undefined;
|
|
39
|
+
startRecordingTimestamp?: string | undefined;
|
|
40
|
+
finalRecordingTimestamp?: string | undefined;
|
|
41
|
+
finalTranscriptionTimestamp?: string | undefined;
|
|
42
|
+
pendingConfidence?: number | undefined;
|
|
43
|
+
functionCallMetadata?: string | undefined;
|
|
44
|
+
functionCallConfidence?: number | undefined;
|
|
45
|
+
finalFunctionCallTimestamp?: string | undefined;
|
|
46
|
+
promptSlotMap?: Record<string, string[]> | undefined;
|
|
47
|
+
}, {
|
|
48
|
+
audioUtteranceId: string;
|
|
49
|
+
startRecordingStatus?: string | undefined;
|
|
50
|
+
transcriptionStatus?: string | undefined;
|
|
51
|
+
finalTranscript?: string | undefined;
|
|
52
|
+
finalConfidence?: number | undefined;
|
|
53
|
+
asrConfig?: string | undefined;
|
|
54
|
+
startRecordingTimestamp?: string | undefined;
|
|
55
|
+
finalRecordingTimestamp?: string | undefined;
|
|
56
|
+
finalTranscriptionTimestamp?: string | undefined;
|
|
57
|
+
pendingTranscript?: string | undefined;
|
|
58
|
+
pendingConfidence?: number | undefined;
|
|
59
|
+
functionCallMetadata?: string | undefined;
|
|
60
|
+
functionCallConfidence?: number | undefined;
|
|
61
|
+
finalFunctionCallTimestamp?: string | undefined;
|
|
62
|
+
promptSlotMap?: Record<string, string[]> | undefined;
|
|
63
|
+
}>;
|
|
64
|
+
export type RecognitionState = z.infer<typeof RecognitionVGFStateSchema>;
|
|
65
|
+
export declare const RecordingStatus: {
|
|
66
|
+
readonly NOT_READY: "NOT_READY";
|
|
67
|
+
readonly READY: "READY";
|
|
68
|
+
readonly RECORDING: "RECORDING";
|
|
69
|
+
readonly FINISHED: "FINISHED";
|
|
70
|
+
};
|
|
71
|
+
export type RecordingStatusType = typeof RecordingStatus[keyof typeof RecordingStatus];
|
|
72
|
+
export declare const TranscriptionStatus: {
|
|
73
|
+
readonly NOT_STARTED: "NOT_STARTED";
|
|
74
|
+
readonly IN_PROGRESS: "IN_PROGRESS";
|
|
75
|
+
readonly FINALIZED: "FINALIZED";
|
|
76
|
+
readonly ABORTED: "ABORTED";
|
|
77
|
+
readonly ERROR: "ERROR";
|
|
78
|
+
};
|
|
79
|
+
export type TranscriptionStatusType = typeof TranscriptionStatus[keyof typeof TranscriptionStatus];
|
|
80
|
+
export declare function createInitialRecognitionState(audioUtteranceId: string): RecognitionState;
|
|
81
|
+
export declare function isValidRecordingStatusTransition(from: string | undefined, to: string): boolean;
|
|
82
|
+
//# sourceMappingURL=vgf-recognition-state.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vgf-recognition-state.d.ts","sourceRoot":"","sources":["../src/vgf-recognition-state.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAA;AAEvB;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,yBAAyB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EA0BpC,CAAA;AAEF,MAAM,MAAM,gBAAgB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,yBAAyB,CAAC,CAAA;AAGxE,eAAO,MAAM,eAAe;;;;;CAKlB,CAAA;AAEV,MAAM,MAAM,mBAAmB,GAAG,OAAO,eAAe,CAAC,MAAM,OAAO,eAAe,CAAC,CAAA;AAEtF,eAAO,MAAM,mBAAmB;;;;;;CAMtB,CAAA;AAEV,MAAM,MAAM,uBAAuB,GAAG,OAAO,mBAAmB,CAAC,MAAM,OAAO,mBAAmB,CAAC,CAAA;AAGlG,wBAAgB,6BAA6B,CAAC,gBAAgB,EAAE,MAAM,GAAG,gBAAgB,CAOxF;AAGD,wBAAgB,gCAAgC,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE,EAAE,EAAE,MAAM,GAAG,OAAO,CAa9F"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@volley/recognition-client-sdk",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.294",
|
|
4
4
|
"description": "Recognition Service TypeScript/Node.js Client SDK",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -32,8 +32,8 @@
|
|
|
32
32
|
},
|
|
33
33
|
"dependencies": {
|
|
34
34
|
"uuid": "^11.0.0",
|
|
35
|
-
"ws": "
|
|
36
|
-
"zod": "
|
|
35
|
+
"ws": "8.18.3",
|
|
36
|
+
"zod": "~3.22.4"
|
|
37
37
|
},
|
|
38
38
|
"devDependencies": {
|
|
39
39
|
"@semantic-release/changelog": "^6.0.3",
|
|
@@ -51,12 +51,11 @@
|
|
|
51
51
|
"jest": "^29.6.1",
|
|
52
52
|
"semantic-release": "^25.0.1",
|
|
53
53
|
"ts-jest": "^29.4.5",
|
|
54
|
-
"tsup": "^8.5.0",
|
|
55
54
|
"typescript": "^5.1.6",
|
|
56
55
|
"@recog/shared-config": "1.0.0",
|
|
57
56
|
"@recog/websocket": "1.0.0",
|
|
58
|
-
"@recog/shared-
|
|
59
|
-
"@recog/shared-
|
|
57
|
+
"@recog/shared-types": "1.0.0",
|
|
58
|
+
"@recog/shared-utils": "1.0.0"
|
|
60
59
|
},
|
|
61
60
|
"keywords": [
|
|
62
61
|
"recognition",
|
|
@@ -65,8 +64,8 @@
|
|
|
65
64
|
"speech"
|
|
66
65
|
],
|
|
67
66
|
"scripts": {
|
|
68
|
-
"build": "
|
|
69
|
-
"dev": "
|
|
67
|
+
"build": "node build.mjs",
|
|
68
|
+
"dev": "node build.mjs --watch",
|
|
70
69
|
"test": "jest --passWithNoTests",
|
|
71
70
|
"lint": "eslint src --ext .ts"
|
|
72
71
|
}
|
package/src/index.ts
CHANGED
|
@@ -76,6 +76,7 @@ export { AudioEncoding } from '@recog/websocket';
|
|
|
76
76
|
export {
|
|
77
77
|
// Recognition context types
|
|
78
78
|
type GameContextV1,
|
|
79
|
+
type SlotMap,
|
|
79
80
|
RecognitionContextTypeV1,
|
|
80
81
|
ControlSignalTypeV1,
|
|
81
82
|
ControlSignalTypeV1 as ControlSignal, // Alias for backward compatibility
|
|
@@ -86,10 +87,13 @@ export {
|
|
|
86
87
|
type MetadataResultV1,
|
|
87
88
|
type ErrorResultV1,
|
|
88
89
|
RecognitionResultTypeV1,
|
|
90
|
+
ClientControlActionV1,
|
|
89
91
|
|
|
90
92
|
// ASR configuration types
|
|
91
93
|
type ASRRequestConfig,
|
|
92
94
|
type ASRRequestV1,
|
|
95
|
+
FinalTranscriptStability,
|
|
96
|
+
createDefaultASRConfig,
|
|
93
97
|
RecognitionProvider,
|
|
94
98
|
DeepgramModel,
|
|
95
99
|
GoogleModel,
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
import { RealTimeTwoWayWebSocketRecognitionClient } from './recognition-client';
|
|
6
6
|
import { ClientState } from './recognition-client.types';
|
|
7
|
+
import { RecognitionResultTypeV1 } from '@recog/shared-types';
|
|
7
8
|
import { WebSocket as MockWebSocket } from 'ws';
|
|
8
9
|
|
|
9
10
|
// Mock WebSocket
|
|
@@ -581,12 +582,14 @@ describe('RealTimeTwoWayWebSocketRecognitionClient', () => {
|
|
|
581
582
|
logger: mockLogger
|
|
582
583
|
});
|
|
583
584
|
|
|
584
|
-
// Create a
|
|
585
|
-
const
|
|
586
|
-
|
|
587
|
-
|
|
585
|
+
// Create a real Blob but spy on arrayBuffer to make it fail
|
|
586
|
+
const audioData = new Uint8Array([1, 2, 3, 4]);
|
|
587
|
+
const badBlob = new Blob([audioData], { type: 'audio/raw' });
|
|
588
|
+
|
|
589
|
+
// Mock arrayBuffer to reject
|
|
590
|
+
jest.spyOn(badBlob, 'arrayBuffer').mockRejectedValue(new Error('Conversion failed'));
|
|
588
591
|
|
|
589
|
-
testClient.sendAudio(badBlob
|
|
592
|
+
testClient.sendAudio(badBlob);
|
|
590
593
|
|
|
591
594
|
// Wait for error handling
|
|
592
595
|
await new Promise(resolve => setTimeout(resolve, 100));
|
|
@@ -623,14 +626,10 @@ describe('RealTimeTwoWayWebSocketRecognitionClient', () => {
|
|
|
623
626
|
it('should use FileReader fallback when blob.arrayBuffer not available (Smart TV path)', async () => {
|
|
624
627
|
const audioData = new Uint8Array([1, 2, 3, 4]);
|
|
625
628
|
|
|
626
|
-
// Create a
|
|
627
|
-
const
|
|
628
|
-
size: audioData.length,
|
|
629
|
-
type: 'audio/raw',
|
|
630
|
-
// No arrayBuffer method - will trigger FileReader path
|
|
631
|
-
};
|
|
629
|
+
// Create a real Blob
|
|
630
|
+
const blob = new Blob([audioData], { type: 'audio/raw' });
|
|
632
631
|
|
|
633
|
-
// Mock FileReader
|
|
632
|
+
// Mock FileReader BEFORE removing arrayBuffer
|
|
634
633
|
const mockReadAsArrayBuffer = jest.fn();
|
|
635
634
|
const originalFileReader = (global as any).FileReader;
|
|
636
635
|
|
|
@@ -651,14 +650,21 @@ describe('RealTimeTwoWayWebSocketRecognitionClient', () => {
|
|
|
651
650
|
}, 10);
|
|
652
651
|
});
|
|
653
652
|
|
|
654
|
-
|
|
653
|
+
// Remove arrayBuffer method to simulate old Smart TV (must be done after blob creation)
|
|
654
|
+
Object.defineProperty(blob, 'arrayBuffer', {
|
|
655
|
+
value: undefined,
|
|
656
|
+
writable: true,
|
|
657
|
+
configurable: true
|
|
658
|
+
});
|
|
659
|
+
|
|
660
|
+
client.sendAudio(blob);
|
|
655
661
|
|
|
656
662
|
// Wait for FileReader async conversion
|
|
657
663
|
await new Promise(resolve => setTimeout(resolve, 150));
|
|
658
664
|
|
|
659
665
|
// Should have used FileReader
|
|
660
666
|
expect((global as any).FileReader).toHaveBeenCalled();
|
|
661
|
-
expect(mockReadAsArrayBuffer).toHaveBeenCalledWith(
|
|
667
|
+
expect(mockReadAsArrayBuffer).toHaveBeenCalledWith(blob);
|
|
662
668
|
|
|
663
669
|
// Should have buffered successfully
|
|
664
670
|
const stats = client.getStats();
|
|
@@ -766,4 +772,131 @@ describe('RealTimeTwoWayWebSocketRecognitionClient', () => {
|
|
|
766
772
|
});
|
|
767
773
|
messageHandler(readyMessage);
|
|
768
774
|
}
|
|
775
|
+
|
|
776
|
+
describe('stopAbnormally', () => {
|
|
777
|
+
beforeEach(() => {
|
|
778
|
+
// Recreate client with fresh mocks
|
|
779
|
+
jest.clearAllMocks();
|
|
780
|
+
|
|
781
|
+
client = new RealTimeTwoWayWebSocketRecognitionClient({
|
|
782
|
+
url: 'ws://localhost:3000',
|
|
783
|
+
onTranscript: jest.fn(),
|
|
784
|
+
onError: jest.fn(),
|
|
785
|
+
onConnected: jest.fn(),
|
|
786
|
+
onDisconnected: jest.fn()
|
|
787
|
+
});
|
|
788
|
+
|
|
789
|
+
// Access the mock WebSocket through the MockWebSocket constructor
|
|
790
|
+
const MockWsConstructor = MockWebSocket as jest.MockedClass<typeof MockWebSocket>;
|
|
791
|
+
mockWs = MockWsConstructor.mock.results[MockWsConstructor.mock.results.length - 1]?.value;
|
|
792
|
+
|
|
793
|
+
// Ensure mockWs has necessary methods
|
|
794
|
+
if (mockWs) {
|
|
795
|
+
mockWs.on = mockWs.on || jest.fn().mockReturnThis();
|
|
796
|
+
mockWs.send = mockWs.send || jest.fn();
|
|
797
|
+
mockWs.close = mockWs.close || jest.fn();
|
|
798
|
+
mockWs.readyState = MockWebSocket.CONNECTING;
|
|
799
|
+
}
|
|
800
|
+
});
|
|
801
|
+
|
|
802
|
+
it('should immediately close WebSocket connection', async () => {
|
|
803
|
+
await setupReadyClient();
|
|
804
|
+
expect(client.getState()).toBe(ClientState.READY);
|
|
805
|
+
|
|
806
|
+
client.stopAbnormally();
|
|
807
|
+
|
|
808
|
+
expect(mockWs.close).toHaveBeenCalledWith(1000, 'Client abnormal stop');
|
|
809
|
+
});
|
|
810
|
+
|
|
811
|
+
it('should update state to STOPPED', async () => {
|
|
812
|
+
await setupReadyClient();
|
|
813
|
+
|
|
814
|
+
client.stopAbnormally();
|
|
815
|
+
|
|
816
|
+
expect(client.getState()).toBe(ClientState.STOPPED);
|
|
817
|
+
});
|
|
818
|
+
|
|
819
|
+
it('should work from any state', () => {
|
|
820
|
+
// Test from INITIAL state
|
|
821
|
+
expect(client.getState()).toBe(ClientState.INITIAL);
|
|
822
|
+
client.stopAbnormally();
|
|
823
|
+
expect(client.getState()).toBe(ClientState.STOPPED);
|
|
824
|
+
});
|
|
825
|
+
|
|
826
|
+
it('should clean up resources', async () => {
|
|
827
|
+
await setupReadyClient();
|
|
828
|
+
|
|
829
|
+
// Send some audio to populate buffers
|
|
830
|
+
client.sendAudio(new ArrayBuffer(1000));
|
|
831
|
+
|
|
832
|
+
// Verify audio was sent
|
|
833
|
+
const statsBefore = client.getStats();
|
|
834
|
+
expect(statsBefore.audioBytesSent).toBeGreaterThan(0);
|
|
835
|
+
|
|
836
|
+
client.stopAbnormally();
|
|
837
|
+
|
|
838
|
+
// Cleanup resets stats
|
|
839
|
+
const statsAfter = client.getStats();
|
|
840
|
+
expect(statsAfter.audioBytesSent).toBe(0);
|
|
841
|
+
expect(statsAfter.audioChunksSent).toBe(0);
|
|
842
|
+
});
|
|
843
|
+
|
|
844
|
+
it('should not send stop signal to server (immediate disconnect)', async () => {
|
|
845
|
+
await setupReadyClient();
|
|
846
|
+
jest.clearAllMocks(); // Clear connection setup messages
|
|
847
|
+
|
|
848
|
+
client.stopAbnormally();
|
|
849
|
+
|
|
850
|
+
// Should NOT send stop recording signal (unlike stopRecording)
|
|
851
|
+
// Only closes the WebSocket
|
|
852
|
+
expect(mockWs.send).not.toHaveBeenCalled();
|
|
853
|
+
expect(mockWs.close).toHaveBeenCalled();
|
|
854
|
+
});
|
|
855
|
+
|
|
856
|
+
it('should differ from stopRecording behavior', async () => {
|
|
857
|
+
await setupReadyClient();
|
|
858
|
+
|
|
859
|
+
// stopRecording sends control signal and waits
|
|
860
|
+
jest.clearAllMocks();
|
|
861
|
+
const stopPromise = client.stopRecording();
|
|
862
|
+
|
|
863
|
+
// Verify control signal was sent
|
|
864
|
+
expect(mockWs.send).toHaveBeenCalled();
|
|
865
|
+
const sendCall = mockWs.send.mock.calls[0][0];
|
|
866
|
+
const message = JSON.parse(sendCall);
|
|
867
|
+
expect(message.data.signal).toBe('STOP_RECORDING');
|
|
868
|
+
|
|
869
|
+
// Complete the stop
|
|
870
|
+
const messageHandler = mockWs.on.mock.calls.find((call: any[]) => call[0] === 'message')[1];
|
|
871
|
+
messageHandler(JSON.stringify({
|
|
872
|
+
v: 1,
|
|
873
|
+
type: 'message',
|
|
874
|
+
data: {
|
|
875
|
+
type: RecognitionResultTypeV1.TRANSCRIPTION,
|
|
876
|
+
is_finished: true,
|
|
877
|
+
finalTranscript: 'test'
|
|
878
|
+
}
|
|
879
|
+
}));
|
|
880
|
+
await stopPromise;
|
|
881
|
+
|
|
882
|
+
// Now test stopAbnormally
|
|
883
|
+
await setupReadyClient();
|
|
884
|
+
jest.clearAllMocks();
|
|
885
|
+
|
|
886
|
+
client.stopAbnormally();
|
|
887
|
+
|
|
888
|
+
// stopAbnormally does NOT send messages, just closes
|
|
889
|
+
expect(mockWs.send).not.toHaveBeenCalled();
|
|
890
|
+
expect(mockWs.close).toHaveBeenCalled();
|
|
891
|
+
});
|
|
892
|
+
|
|
893
|
+
it('should be idempotent - safe to call multiple times', () => {
|
|
894
|
+
client.stopAbnormally();
|
|
895
|
+
expect(client.getState()).toBe(ClientState.STOPPED);
|
|
896
|
+
|
|
897
|
+
// Call again - should not throw
|
|
898
|
+
expect(() => client.stopAbnormally()).not.toThrow();
|
|
899
|
+
expect(client.getState()).toBe(ClientState.STOPPED);
|
|
900
|
+
});
|
|
901
|
+
});
|
|
769
902
|
});
|
|
@@ -544,6 +544,29 @@ export class RealTimeTwoWayWebSocketRecognitionClient
|
|
|
544
544
|
});
|
|
545
545
|
}
|
|
546
546
|
|
|
547
|
+
stopAbnormally(): void {
|
|
548
|
+
// Guard: If already in terminal state, do nothing
|
|
549
|
+
if (this.state === ClientState.STOPPED || this.state === ClientState.FAILED) {
|
|
550
|
+
this.log('debug', 'stopAbnormally called but already in terminal state', { state: this.state });
|
|
551
|
+
return;
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
this.log('warn', 'Abnormal stop requested - closing connection immediately', { state: this.state });
|
|
555
|
+
|
|
556
|
+
// Update state to STOPPED (skip STOPPING)
|
|
557
|
+
this.state = ClientState.STOPPED;
|
|
558
|
+
|
|
559
|
+
// Clean up resources
|
|
560
|
+
this.cleanup();
|
|
561
|
+
|
|
562
|
+
// Close WebSocket connection immediately
|
|
563
|
+
// Code 1000 = Normal Closure (even though abnormal for us, it's normal for WebSocket spec)
|
|
564
|
+
// Type assertion needed because closeConnection is a newly added protected method
|
|
565
|
+
(this as any).closeConnection(1000, 'Client abnormal stop');
|
|
566
|
+
|
|
567
|
+
// Note: onDisconnected will be called by WebSocket close event
|
|
568
|
+
// which will call cleanup again (idempotent) and trigger onDisconnected callback
|
|
569
|
+
}
|
|
547
570
|
|
|
548
571
|
getAudioUtteranceId(): string {
|
|
549
572
|
return this.config.audioUtteranceId;
|
|
@@ -627,6 +650,10 @@ export class RealTimeTwoWayWebSocketRecognitionClient
|
|
|
627
650
|
interimResults: this.config.asrRequestConfig.interimResults ?? false,
|
|
628
651
|
// Auto-enable useContext if gameContext is provided, or use explicit value if set
|
|
629
652
|
useContext: this.config.asrRequestConfig.useContext ?? !!this.config.gameContext,
|
|
653
|
+
// Include finalTranscriptStability if provided (it's already a string enum)
|
|
654
|
+
...(this.config.asrRequestConfig.finalTranscriptStability && {
|
|
655
|
+
finalTranscriptStability: this.config.asrRequestConfig.finalTranscriptStability
|
|
656
|
+
}),
|
|
630
657
|
...(debugCommand && { debugCommand })
|
|
631
658
|
};
|
|
632
659
|
|
|
@@ -220,6 +220,25 @@ export interface IRecognitionClient {
|
|
|
220
220
|
*/
|
|
221
221
|
stopRecording(): Promise<void>;
|
|
222
222
|
|
|
223
|
+
/**
|
|
224
|
+
* Force stop and immediately close connection without waiting for server
|
|
225
|
+
*
|
|
226
|
+
* WARNING: This is an abnormal shutdown that bypasses the graceful stop flow:
|
|
227
|
+
* - Does NOT wait for server to process remaining audio
|
|
228
|
+
* - Does NOT receive final transcript from server
|
|
229
|
+
* - Immediately closes WebSocket connection
|
|
230
|
+
* - Cleans up resources (buffers, listeners)
|
|
231
|
+
*
|
|
232
|
+
* Use Cases:
|
|
233
|
+
* - User explicitly cancels/abandons session
|
|
234
|
+
* - Timeout scenarios where waiting is not acceptable
|
|
235
|
+
* - Need immediate cleanup and can't wait for server
|
|
236
|
+
*
|
|
237
|
+
* RECOMMENDED: Use stopRecording() for normal shutdown.
|
|
238
|
+
* Only use this when immediate disconnection is required.
|
|
239
|
+
*/
|
|
240
|
+
stopAbnormally(): void;
|
|
241
|
+
|
|
223
242
|
/**
|
|
224
243
|
* Get the audio utterance ID for this session
|
|
225
244
|
* Available immediately after client construction.
|