@volley/recognition-client-sdk-node22 0.1.424
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +344 -0
- package/dist/browser.bundled.d.ts +1280 -0
- package/dist/browser.d.ts +10 -0
- package/dist/browser.d.ts.map +1 -0
- package/dist/config-builder.d.ts +134 -0
- package/dist/config-builder.d.ts.map +1 -0
- package/dist/errors.d.ts +41 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/factory.d.ts +36 -0
- package/dist/factory.d.ts.map +1 -0
- package/dist/index.bundled.d.ts +2572 -0
- package/dist/index.d.ts +16 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +10199 -0
- package/dist/index.js.map +7 -0
- package/dist/recog-client-sdk.browser.d.ts +10 -0
- package/dist/recog-client-sdk.browser.d.ts.map +1 -0
- package/dist/recog-client-sdk.browser.js +5746 -0
- package/dist/recog-client-sdk.browser.js.map +7 -0
- package/dist/recognition-client.d.ts +128 -0
- package/dist/recognition-client.d.ts.map +1 -0
- package/dist/recognition-client.types.d.ts +271 -0
- package/dist/recognition-client.types.d.ts.map +1 -0
- package/dist/simplified-vgf-recognition-client.d.ts +178 -0
- package/dist/simplified-vgf-recognition-client.d.ts.map +1 -0
- package/dist/utils/audio-ring-buffer.d.ts +69 -0
- package/dist/utils/audio-ring-buffer.d.ts.map +1 -0
- package/dist/utils/message-handler.d.ts +45 -0
- package/dist/utils/message-handler.d.ts.map +1 -0
- package/dist/utils/url-builder.d.ts +28 -0
- package/dist/utils/url-builder.d.ts.map +1 -0
- package/dist/vgf-recognition-mapper.d.ts +66 -0
- package/dist/vgf-recognition-mapper.d.ts.map +1 -0
- package/dist/vgf-recognition-state.d.ts +91 -0
- package/dist/vgf-recognition-state.d.ts.map +1 -0
- package/package.json +74 -0
- package/src/browser.ts +24 -0
- package/src/config-builder.spec.ts +265 -0
- package/src/config-builder.ts +240 -0
- package/src/errors.ts +84 -0
- package/src/factory.spec.ts +215 -0
- package/src/factory.ts +47 -0
- package/src/index.ts +127 -0
- package/src/recognition-client.spec.ts +889 -0
- package/src/recognition-client.ts +844 -0
- package/src/recognition-client.types.ts +338 -0
- package/src/simplified-vgf-recognition-client.integration.spec.ts +718 -0
- package/src/simplified-vgf-recognition-client.spec.ts +1525 -0
- package/src/simplified-vgf-recognition-client.ts +524 -0
- package/src/utils/audio-ring-buffer.spec.ts +335 -0
- package/src/utils/audio-ring-buffer.ts +170 -0
- package/src/utils/message-handler.spec.ts +311 -0
- package/src/utils/message-handler.ts +131 -0
- package/src/utils/url-builder.spec.ts +252 -0
- package/src/utils/url-builder.ts +92 -0
- package/src/vgf-recognition-mapper.spec.ts +78 -0
- package/src/vgf-recognition-mapper.ts +232 -0
- package/src/vgf-recognition-state.ts +102 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* URL Builder for Recognition Client
|
|
3
|
+
* Handles WebSocket URL construction with query parameters
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { getRecognitionServiceBase } from '@recog/shared-config';
|
|
7
|
+
import type { GameContextV1, Stage } from '@recog/shared-types';
|
|
8
|
+
import type { RecognitionCallbackUrl } from '../recognition-client.types.js';
|
|
9
|
+
|
|
10
|
+
export interface UrlBuilderConfig {
|
|
11
|
+
url?: string;
|
|
12
|
+
stage?: Stage | string;
|
|
13
|
+
audioUtteranceId: string;
|
|
14
|
+
callbackUrls?: RecognitionCallbackUrl[];
|
|
15
|
+
userId?: string;
|
|
16
|
+
gameSessionId?: string;
|
|
17
|
+
deviceId?: string;
|
|
18
|
+
accountId?: string;
|
|
19
|
+
questionAnswerId?: string;
|
|
20
|
+
platform?: string;
|
|
21
|
+
gameContext?: GameContextV1;
|
|
22
|
+
/** Standalone gameId - takes precedence over gameContext.gameId if both provided */
|
|
23
|
+
gameId?: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Build WebSocket URL with all query parameters
|
|
28
|
+
* Either `url` or `stage` must be provided (or defaults to production if neither provided)
|
|
29
|
+
* If both are provided, `url` takes precedence over `stage`
|
|
30
|
+
*/
|
|
31
|
+
export function buildWebSocketUrl(config: UrlBuilderConfig): string {
|
|
32
|
+
// Determine base URL based on precedence: url > stage > default production
|
|
33
|
+
let baseUrl: string;
|
|
34
|
+
|
|
35
|
+
if (config.url) {
|
|
36
|
+
// Explicit URL takes precedence
|
|
37
|
+
baseUrl = config.url;
|
|
38
|
+
} else if (config.stage) {
|
|
39
|
+
// Use stage to build URL
|
|
40
|
+
const stageBase = getRecognitionServiceBase(config.stage);
|
|
41
|
+
baseUrl = `${stageBase.wsBase}/ws/v1/recognize`;
|
|
42
|
+
} else {
|
|
43
|
+
// Default to production if neither provided
|
|
44
|
+
const defaultBase = getRecognitionServiceBase('production');
|
|
45
|
+
baseUrl = `${defaultBase.wsBase}/ws/v1/recognize`;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Build URL - add all optional identification parameters
|
|
49
|
+
const url = new URL(baseUrl);
|
|
50
|
+
|
|
51
|
+
// Add audioUtteranceId as query parameter (required for server to recognize it)
|
|
52
|
+
url.searchParams.set('audioUtteranceId', config.audioUtteranceId);
|
|
53
|
+
|
|
54
|
+
// Add callback URLs if provided (for server-side notifications)
|
|
55
|
+
if (config.callbackUrls && config.callbackUrls.length > 0) {
|
|
56
|
+
// Serialize as JSON for complex structure
|
|
57
|
+
url.searchParams.set('callbackUrls', JSON.stringify(config.callbackUrls));
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Add user/session/device/account identification if provided
|
|
61
|
+
if (config.userId) {
|
|
62
|
+
url.searchParams.set('userId', config.userId);
|
|
63
|
+
}
|
|
64
|
+
if (config.gameSessionId) {
|
|
65
|
+
url.searchParams.set('gameSessionId', config.gameSessionId);
|
|
66
|
+
}
|
|
67
|
+
if (config.deviceId) {
|
|
68
|
+
url.searchParams.set('deviceId', config.deviceId);
|
|
69
|
+
}
|
|
70
|
+
if (config.accountId) {
|
|
71
|
+
url.searchParams.set('accountId', config.accountId);
|
|
72
|
+
}
|
|
73
|
+
if (config.questionAnswerId) {
|
|
74
|
+
url.searchParams.set('questionAnswerId', config.questionAnswerId);
|
|
75
|
+
}
|
|
76
|
+
if (config.platform) {
|
|
77
|
+
url.searchParams.set('platform', config.platform);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Add gameId - standalone gameId takes precedence over gameContext.gameId
|
|
81
|
+
const gameId = config.gameId ?? config.gameContext?.gameId;
|
|
82
|
+
if (gameId) {
|
|
83
|
+
url.searchParams.set('gameId', gameId);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Add gamePhase from gameContext if provided
|
|
87
|
+
if (config.gameContext?.gamePhase) {
|
|
88
|
+
url.searchParams.set('gamePhase', config.gameContext.gamePhase);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return url.toString();
|
|
92
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for VGF Recognition Mapper functions
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
resetRecognitionVGFState
|
|
7
|
+
} from './vgf-recognition-mapper.js';
|
|
8
|
+
import {
|
|
9
|
+
RecognitionState,
|
|
10
|
+
RecordingStatus,
|
|
11
|
+
TranscriptionStatus,
|
|
12
|
+
RecognitionActionProcessingState
|
|
13
|
+
} from './vgf-recognition-state.js';
|
|
14
|
+
|
|
15
|
+
describe('resetRecognitionVGFState', () => {
|
|
16
|
+
it('should generate a new UUID', () => {
|
|
17
|
+
const originalState: RecognitionState = {
|
|
18
|
+
audioUtteranceId: 'old-uuid-123',
|
|
19
|
+
pendingTranscript: '',
|
|
20
|
+
transcriptionStatus: TranscriptionStatus.FINALIZED,
|
|
21
|
+
startRecordingStatus: RecordingStatus.FINISHED,
|
|
22
|
+
recognitionActionProcessingState: RecognitionActionProcessingState.COMPLETED,
|
|
23
|
+
finalTranscript: 'hello world'
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
const newState = resetRecognitionVGFState(originalState);
|
|
27
|
+
|
|
28
|
+
expect(newState.audioUtteranceId).not.toBe('old-uuid-123');
|
|
29
|
+
expect(newState.audioUtteranceId).toMatch(/^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i);
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it('should reset session-specific fields', () => {
|
|
33
|
+
const originalState: RecognitionState = {
|
|
34
|
+
audioUtteranceId: 'old-uuid-123',
|
|
35
|
+
pendingTranscript: 'partial',
|
|
36
|
+
transcriptionStatus: TranscriptionStatus.FINALIZED,
|
|
37
|
+
startRecordingStatus: RecordingStatus.FINISHED,
|
|
38
|
+
recognitionActionProcessingState: RecognitionActionProcessingState.COMPLETED,
|
|
39
|
+
finalTranscript: 'hello world'
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
const newState = resetRecognitionVGFState(originalState);
|
|
43
|
+
|
|
44
|
+
expect(newState.transcriptionStatus).toBe(TranscriptionStatus.NOT_STARTED);
|
|
45
|
+
expect(newState.startRecordingStatus).toBe(RecordingStatus.READY);
|
|
46
|
+
expect(newState.recognitionActionProcessingState).toBe(RecognitionActionProcessingState.NOT_STARTED);
|
|
47
|
+
expect(newState.finalTranscript).toBeUndefined();
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
it('should preserve non-session fields like promptSlotMap', () => {
|
|
51
|
+
const originalState: RecognitionState = {
|
|
52
|
+
audioUtteranceId: 'old-uuid-123',
|
|
53
|
+
pendingTranscript: '',
|
|
54
|
+
transcriptionStatus: TranscriptionStatus.FINALIZED,
|
|
55
|
+
promptSlotMap: { artist: ['taylor swift'], song: ['shake it off'] },
|
|
56
|
+
asrConfig: '{"provider":"deepgram"}'
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
const newState = resetRecognitionVGFState(originalState);
|
|
60
|
+
|
|
61
|
+
expect(newState.promptSlotMap).toEqual({ artist: ['taylor swift'], song: ['shake it off'] });
|
|
62
|
+
expect(newState.asrConfig).toBe('{"provider":"deepgram"}');
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
it('should work with minimal state', () => {
|
|
66
|
+
const originalState: RecognitionState = {
|
|
67
|
+
audioUtteranceId: 'old-uuid',
|
|
68
|
+
pendingTranscript: ''
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
const newState = resetRecognitionVGFState(originalState);
|
|
72
|
+
|
|
73
|
+
expect(newState.audioUtteranceId).not.toBe('old-uuid');
|
|
74
|
+
expect(newState.transcriptionStatus).toBe(TranscriptionStatus.NOT_STARTED);
|
|
75
|
+
expect(newState.startRecordingStatus).toBe(RecordingStatus.READY);
|
|
76
|
+
expect(newState.recognitionActionProcessingState).toBe(RecognitionActionProcessingState.NOT_STARTED);
|
|
77
|
+
});
|
|
78
|
+
});
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VGF Recognition Mapper
|
|
3
|
+
*
|
|
4
|
+
* Maps between the existing recognition client types and the simplified VGF state.
|
|
5
|
+
* This provides a clean abstraction layer for game developers.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import {
|
|
9
|
+
RecognitionState,
|
|
10
|
+
RecordingStatus,
|
|
11
|
+
TranscriptionStatus,
|
|
12
|
+
RecognitionActionProcessingState,
|
|
13
|
+
createInitialRecognitionState
|
|
14
|
+
} from './vgf-recognition-state.js';
|
|
15
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
16
|
+
import {
|
|
17
|
+
ClientState,
|
|
18
|
+
IRecognitionClientConfig
|
|
19
|
+
} from './recognition-client.types.js';
|
|
20
|
+
import {
|
|
21
|
+
TranscriptionResultV1,
|
|
22
|
+
ErrorResultV1
|
|
23
|
+
} from '@recog/shared-types';
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Maps ClientState to RecordingStatus for VGF state
|
|
27
|
+
*/
|
|
28
|
+
export function mapClientStateToRecordingStatus(clientState: ClientState): string {
|
|
29
|
+
switch (clientState) {
|
|
30
|
+
case ClientState.INITIAL:
|
|
31
|
+
case ClientState.CONNECTING:
|
|
32
|
+
case ClientState.CONNECTED:
|
|
33
|
+
return RecordingStatus.NOT_READY;
|
|
34
|
+
|
|
35
|
+
case ClientState.READY:
|
|
36
|
+
// Ready to record, but not recording yet
|
|
37
|
+
return RecordingStatus.READY;
|
|
38
|
+
|
|
39
|
+
case ClientState.STOPPING:
|
|
40
|
+
case ClientState.STOPPED:
|
|
41
|
+
case ClientState.FAILED:
|
|
42
|
+
return RecordingStatus.FINISHED;
|
|
43
|
+
|
|
44
|
+
default:
|
|
45
|
+
return RecordingStatus.NOT_READY;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Creates a VGF state from transcription result
|
|
51
|
+
*/
|
|
52
|
+
export function mapTranscriptionResultToState(
|
|
53
|
+
currentState: RecognitionState,
|
|
54
|
+
result: TranscriptionResultV1,
|
|
55
|
+
isRecording: boolean
|
|
56
|
+
): RecognitionState {
|
|
57
|
+
const newState = { ...currentState };
|
|
58
|
+
|
|
59
|
+
// Update recording status if actively recording
|
|
60
|
+
if (isRecording && currentState.startRecordingStatus !== RecordingStatus.FINISHED) {
|
|
61
|
+
newState.startRecordingStatus = RecordingStatus.RECORDING;
|
|
62
|
+
|
|
63
|
+
// Set start timestamp on first audio
|
|
64
|
+
if (!newState.startRecordingTimestamp) {
|
|
65
|
+
newState.startRecordingTimestamp = new Date().toISOString();
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Update transcription status
|
|
70
|
+
if (!result.is_finished) {
|
|
71
|
+
// Has pending transcript - STEP 2 support
|
|
72
|
+
newState.transcriptionStatus = TranscriptionStatus.IN_PROGRESS;
|
|
73
|
+
|
|
74
|
+
// Direct copy of pending transcript without any combination
|
|
75
|
+
newState.pendingTranscript = result.pendingTranscript || "";
|
|
76
|
+
|
|
77
|
+
// Direct copy of pending confidence
|
|
78
|
+
if (result.pendingTranscriptConfidence !== undefined) {
|
|
79
|
+
newState.pendingConfidence = result.pendingTranscriptConfidence;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Also update final transcript if we have it (even if not finished)
|
|
83
|
+
if (result.finalTranscript) {
|
|
84
|
+
newState.finalTranscript = result.finalTranscript;
|
|
85
|
+
if (result.finalTranscriptConfidence !== undefined) {
|
|
86
|
+
newState.finalConfidence = result.finalTranscriptConfidence;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
} else {
|
|
90
|
+
// Transcription is finished
|
|
91
|
+
newState.transcriptionStatus = TranscriptionStatus.FINALIZED;
|
|
92
|
+
newState.finalTranscript = result.finalTranscript || "";
|
|
93
|
+
if (result.finalTranscriptConfidence !== undefined) {
|
|
94
|
+
newState.finalConfidence = result.finalTranscriptConfidence;
|
|
95
|
+
}
|
|
96
|
+
newState.finalTranscriptionTimestamp = new Date().toISOString();
|
|
97
|
+
|
|
98
|
+
// Clear pending when we have final
|
|
99
|
+
newState.pendingTranscript = "";
|
|
100
|
+
newState.pendingConfidence = undefined;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return newState;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Maps error to state
|
|
108
|
+
*/
|
|
109
|
+
export function mapErrorToState(
|
|
110
|
+
currentState: RecognitionState,
|
|
111
|
+
error: ErrorResultV1
|
|
112
|
+
): RecognitionState {
|
|
113
|
+
return {
|
|
114
|
+
...currentState,
|
|
115
|
+
transcriptionStatus: TranscriptionStatus.ERROR,
|
|
116
|
+
startRecordingStatus: RecordingStatus.FINISHED,
|
|
117
|
+
finalRecordingTimestamp: new Date().toISOString()
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Creates initial VGF state from client config
|
|
123
|
+
*/
|
|
124
|
+
export function createVGFStateFromConfig(config: IRecognitionClientConfig): RecognitionState {
|
|
125
|
+
const audioUtteranceId = config.audioUtteranceId || generateUUID();
|
|
126
|
+
const state = createInitialRecognitionState(audioUtteranceId);
|
|
127
|
+
|
|
128
|
+
// Store ASR config as JSON if provided
|
|
129
|
+
if (config.asrRequestConfig) {
|
|
130
|
+
state.asrConfig = JSON.stringify(config.asrRequestConfig);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return state;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Updates state when recording stops
|
|
138
|
+
*/
|
|
139
|
+
export function updateStateOnStop(currentState: RecognitionState): RecognitionState {
|
|
140
|
+
return {
|
|
141
|
+
...currentState,
|
|
142
|
+
startRecordingStatus: RecordingStatus.FINISHED,
|
|
143
|
+
finalRecordingTimestamp: new Date().toISOString()
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Resets session state with a new UUID.
|
|
149
|
+
*
|
|
150
|
+
* This creates a fresh session state while preserving non-session fields
|
|
151
|
+
* (like promptSlotMap, asrConfig, etc.)
|
|
152
|
+
*
|
|
153
|
+
* Resets:
|
|
154
|
+
* - audioUtteranceId → new UUID
|
|
155
|
+
* - transcriptionStatus → NOT_STARTED
|
|
156
|
+
* - startRecordingStatus → READY
|
|
157
|
+
* - recognitionActionProcessingState → NOT_STARTED
|
|
158
|
+
* - finalTranscript → undefined
|
|
159
|
+
*
|
|
160
|
+
* @param currentState - The current recognition state
|
|
161
|
+
* @returns A new state with reset session fields and a new UUID
|
|
162
|
+
*/
|
|
163
|
+
export function resetRecognitionVGFState(currentState: RecognitionState): RecognitionState {
|
|
164
|
+
return {
|
|
165
|
+
...currentState,
|
|
166
|
+
audioUtteranceId: uuidv4(),
|
|
167
|
+
transcriptionStatus: TranscriptionStatus.NOT_STARTED,
|
|
168
|
+
startRecordingStatus: RecordingStatus.READY,
|
|
169
|
+
recognitionActionProcessingState: RecognitionActionProcessingState.NOT_STARTED,
|
|
170
|
+
finalTranscript: undefined
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Updates state when client becomes ready
|
|
176
|
+
*/
|
|
177
|
+
export function updateStateOnReady(currentState: RecognitionState): RecognitionState {
|
|
178
|
+
return {
|
|
179
|
+
...currentState,
|
|
180
|
+
startRecordingStatus: RecordingStatus.READY
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Parses function call from transcript (STEP 3 support)
|
|
186
|
+
* This is a placeholder - actual implementation would use NLP/LLM
|
|
187
|
+
*/
|
|
188
|
+
export function extractFunctionCallFromTranscript(
|
|
189
|
+
transcript: string,
|
|
190
|
+
gameContext?: any
|
|
191
|
+
): { metadata?: string; confidence?: number } | null {
|
|
192
|
+
// This would be replaced with actual function call extraction logic
|
|
193
|
+
// For example, using an LLM to parse intent from the transcript
|
|
194
|
+
// and map it to game actions
|
|
195
|
+
|
|
196
|
+
// Example stub implementation:
|
|
197
|
+
const lowerTranscript = transcript.toLowerCase();
|
|
198
|
+
|
|
199
|
+
// Simple pattern matching for demo
|
|
200
|
+
if (lowerTranscript.includes("play") && lowerTranscript.includes("artist")) {
|
|
201
|
+
return {
|
|
202
|
+
metadata: JSON.stringify({ action: "play", target: "artist" }),
|
|
203
|
+
confidence: 0.8
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
return null;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Updates state with function call results (STEP 3)
|
|
212
|
+
*/
|
|
213
|
+
export function updateStateWithFunctionCall(
|
|
214
|
+
currentState: RecognitionState,
|
|
215
|
+
functionCall: { metadata?: string; confidence?: number }
|
|
216
|
+
): RecognitionState {
|
|
217
|
+
return {
|
|
218
|
+
...currentState,
|
|
219
|
+
functionCallMetadata: functionCall.metadata,
|
|
220
|
+
functionCallConfidence: functionCall.confidence,
|
|
221
|
+
finalFunctionCallTimestamp: new Date().toISOString()
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// Helper function to generate UUID (simplified version)
|
|
226
|
+
function generateUUID(): string {
|
|
227
|
+
return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
|
|
228
|
+
const r = Math.random() * 16 | 0;
|
|
229
|
+
const v = c === 'x' ? r : (r & 0x3 | 0x8);
|
|
230
|
+
return v.toString(16);
|
|
231
|
+
});
|
|
232
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { z } from "zod"
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* VGF-style state schema for game-side recognition state/results management.
|
|
5
|
+
*
|
|
6
|
+
* This schema provides a standardized way for game developers to manage
|
|
7
|
+
* voice recognition state and results in their applications. It supports:
|
|
8
|
+
*
|
|
9
|
+
* STEP 1: Basic transcription flow
|
|
10
|
+
* STEP 2: Mic auto-stop upon correct answer (using partial transcripts)
|
|
11
|
+
* STEP 3: Semantic/function-call outcomes for game actions
|
|
12
|
+
*
|
|
13
|
+
* Ideally this should be part of a more centralized shared type library to free
|
|
14
|
+
* game developers and provide helper functions (VGF? Platform SDK?).
|
|
15
|
+
*/
|
|
16
|
+
export const RecognitionVGFStateSchema = z.object({
|
|
17
|
+
// Core STT state
|
|
18
|
+
audioUtteranceId: z.string(),
|
|
19
|
+
startRecordingStatus: z.string().optional(), // "NOT_READY", "READY", "RECORDING", "FINISHED". States follow this order.
|
|
20
|
+
// Streaming should only start when "READY". Other states control mic UI and recording.
|
|
21
|
+
transcriptionStatus: z.string().optional(), // "NOT_STARTED", "IN_PROGRESS", "FINALIZED", "ABORTED", "ERROR"
|
|
22
|
+
finalTranscript: z.string().optional(), // Full finalized transcript for the utterance. Will not change.
|
|
23
|
+
finalConfidence: z.number().optional(),
|
|
24
|
+
|
|
25
|
+
// Tracking-only metadata
|
|
26
|
+
asrConfig: z.string().optional(), // Json format of the ASR config
|
|
27
|
+
startRecordingTimestamp: z.string().optional(), // Start of recording. Immutable after set.
|
|
28
|
+
finalRecordingTimestamp: z.string().optional(), // End of recording. Immutable after set. Transcription may still be in progress.
|
|
29
|
+
finalTranscriptionTimestamp: z.string().optional(), // When the final transcript was produced. Immutable after set.
|
|
30
|
+
|
|
31
|
+
// STEP 2: Support for mic auto-stop upon correct answer
|
|
32
|
+
pendingTranscript: z.string().optional().default(""), // Non-final transcript that may change (matches existing naming)
|
|
33
|
+
pendingConfidence: z.number().optional(),
|
|
34
|
+
|
|
35
|
+
// STEP 3: Support for semantic/function-call outcomes
|
|
36
|
+
functionCallMetadata: z.string().optional(), // Function call metadata in JSON, e.g. "{artist: true, title: true}"
|
|
37
|
+
functionCallConfidence: z.number().optional(), // Confidence score for the function call.
|
|
38
|
+
finalFunctionCallTimestamp: z.string().optional(), // When the final action after interpreting the transcript was taken. Immutable.
|
|
39
|
+
|
|
40
|
+
// Support for prompt slot mapping - passed to recognition context when present
|
|
41
|
+
promptSlotMap: z.record(z.string(), z.array(z.string())).optional(), // Optional map of slot names to prompt values for recognition context
|
|
42
|
+
|
|
43
|
+
// Recognition action processing state - managed externally, SDK preserves but never modifies
|
|
44
|
+
recognitionActionProcessingState: z.string().optional(), // "NOT_STARTED", "IN_PROGRESS", "COMPLETED"
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
export type RecognitionState = z.infer<typeof RecognitionVGFStateSchema>
|
|
48
|
+
|
|
49
|
+
// Status constants for better type safety and consistency
|
|
50
|
+
export const RecordingStatus = {
|
|
51
|
+
NOT_READY: "NOT_READY",
|
|
52
|
+
READY: "READY",
|
|
53
|
+
RECORDING: "RECORDING",
|
|
54
|
+
FINISHED: "FINISHED",
|
|
55
|
+
} as const
|
|
56
|
+
|
|
57
|
+
export type RecordingStatusType = typeof RecordingStatus[keyof typeof RecordingStatus]
|
|
58
|
+
|
|
59
|
+
export const TranscriptionStatus = {
|
|
60
|
+
NOT_STARTED: "NOT_STARTED",
|
|
61
|
+
IN_PROGRESS: "IN_PROGRESS",
|
|
62
|
+
FINALIZED: "FINALIZED",
|
|
63
|
+
ABORTED: "ABORTED", // Session was cancelled/abandoned by user
|
|
64
|
+
ERROR: "ERROR",
|
|
65
|
+
} as const
|
|
66
|
+
|
|
67
|
+
export type TranscriptionStatusType = typeof TranscriptionStatus[keyof typeof TranscriptionStatus]
|
|
68
|
+
|
|
69
|
+
export const RecognitionActionProcessingState = {
|
|
70
|
+
NOT_STARTED: "NOT_STARTED",
|
|
71
|
+
IN_PROGRESS: "IN_PROGRESS",
|
|
72
|
+
COMPLETED: "COMPLETED",
|
|
73
|
+
} as const
|
|
74
|
+
|
|
75
|
+
export type RecognitionActionProcessingStateType = typeof RecognitionActionProcessingState[keyof typeof RecognitionActionProcessingState]
|
|
76
|
+
|
|
77
|
+
// Helper function to create initial state
|
|
78
|
+
export function createInitialRecognitionState(audioUtteranceId: string): RecognitionState {
|
|
79
|
+
return {
|
|
80
|
+
audioUtteranceId,
|
|
81
|
+
startRecordingStatus: RecordingStatus.NOT_READY,
|
|
82
|
+
transcriptionStatus: TranscriptionStatus.NOT_STARTED,
|
|
83
|
+
pendingTranscript: "",
|
|
84
|
+
recognitionActionProcessingState: RecognitionActionProcessingState.NOT_STARTED,
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Helper function to validate state transitions
|
|
89
|
+
export function isValidRecordingStatusTransition(from: string | undefined, to: string): boolean {
|
|
90
|
+
const statusOrder = [
|
|
91
|
+
RecordingStatus.NOT_READY,
|
|
92
|
+
RecordingStatus.READY,
|
|
93
|
+
RecordingStatus.RECORDING,
|
|
94
|
+
RecordingStatus.FINISHED,
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
const fromIndex = from ? statusOrder.indexOf(from as RecordingStatusType) : -1
|
|
98
|
+
const toIndex = statusOrder.indexOf(to as RecordingStatusType)
|
|
99
|
+
|
|
100
|
+
// Can only move forward in the status order
|
|
101
|
+
return toIndex > fromIndex && toIndex !== -1
|
|
102
|
+
}
|