@volley/recognition-client-sdk 0.1.200

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,225 @@
1
+ /**
2
+ * VGF Recognition Mapper
3
+ *
4
+ * Maps between the existing recognition client types and the simplified VGF state.
5
+ * This provides a clean abstraction layer for game developers.
6
+ */
7
+
8
+ import {
9
+ RecognitionState,
10
+ RecordingStatus,
11
+ TranscriptionStatus,
12
+ createInitialRecognitionState
13
+ } from './vgf-recognition-state.js';
14
+ import {
15
+ ClientState,
16
+ IRecognitionClientConfig
17
+ } from './recognition-client.types.js';
18
+ import {
19
+ TranscriptionResultV1,
20
+ MetadataResultV1,
21
+ ErrorResultV1,
22
+ ASRRequestConfig
23
+ } from '@recog/shared-types';
24
+
25
+ /**
26
+ * Maps ClientState to RecordingStatus for VGF state
27
+ */
28
+ export function mapClientStateToRecordingStatus(clientState: ClientState): string {
29
+ switch (clientState) {
30
+ case ClientState.INITIAL:
31
+ case ClientState.CONNECTING:
32
+ case ClientState.CONNECTED:
33
+ return RecordingStatus.NOT_READY;
34
+
35
+ case ClientState.READY:
36
+ // Ready to record, but not recording yet
37
+ return RecordingStatus.READY;
38
+
39
+ case ClientState.STOPPING:
40
+ case ClientState.STOPPED:
41
+ case ClientState.FAILED:
42
+ return RecordingStatus.FINISHED;
43
+
44
+ default:
45
+ return RecordingStatus.NOT_READY;
46
+ }
47
+ }
48
+
49
+ /**
50
+ * Creates a VGF state from transcription result
51
+ */
52
+ export function mapTranscriptionResultToState(
53
+ currentState: RecognitionState,
54
+ result: TranscriptionResultV1,
55
+ isRecording: boolean
56
+ ): RecognitionState {
57
+ const newState = { ...currentState };
58
+
59
+ // Update recording status if actively recording
60
+ if (isRecording && currentState.startRecordingStatus !== RecordingStatus.FINISHED) {
61
+ newState.startRecordingStatus = RecordingStatus.RECORDING;
62
+
63
+ // Set start timestamp on first audio
64
+ if (!newState.startRecordingTimestamp) {
65
+ newState.startRecordingTimestamp = new Date().toISOString();
66
+ }
67
+ }
68
+
69
+ // Update transcription status
70
+ if (!result.is_finished) {
71
+ // Has pending transcript - STEP 2 support
72
+ newState.transcriptionStatus = TranscriptionStatus.IN_PROGRESS;
73
+
74
+ // Direct copy of pending transcript without any combination
75
+ newState.pendingTranscript = result.pendingTranscript || "";
76
+
77
+ // Direct copy of pending confidence
78
+ if (result.pendingTranscriptConfidence !== undefined) {
79
+ newState.pendingConfidence = result.pendingTranscriptConfidence;
80
+ }
81
+
82
+ // Also update final transcript if we have it (even if not finished)
83
+ if (result.finalTranscript) {
84
+ newState.finalTranscript = result.finalTranscript;
85
+ if (result.finalTranscriptConfidence !== undefined) {
86
+ newState.finalConfidence = result.finalTranscriptConfidence;
87
+ }
88
+ }
89
+ } else {
90
+ // Transcription is finished
91
+ newState.transcriptionStatus = TranscriptionStatus.FINALIZED;
92
+ newState.finalTranscript = result.finalTranscript || "";
93
+ if (result.finalTranscriptConfidence !== undefined) {
94
+ newState.finalConfidence = result.finalTranscriptConfidence;
95
+ }
96
+ newState.finalTranscriptionTimestamp = new Date().toISOString();
97
+
98
+ // Clear pending when we have final
99
+ newState.pendingTranscript = "";
100
+ newState.pendingConfidence = undefined;
101
+ }
102
+
103
+ return newState;
104
+ }
105
+
106
+ /**
107
+ * Maps metadata result to update state timestamps
108
+ */
109
+ export function mapMetadataToState(
110
+ currentState: RecognitionState,
111
+ metadata: MetadataResultV1
112
+ ): RecognitionState {
113
+ const newState = { ...currentState };
114
+
115
+ // Update final recording timestamp when metadata arrives
116
+ if (!newState.finalRecordingTimestamp) {
117
+ newState.finalRecordingTimestamp = new Date().toISOString();
118
+ }
119
+
120
+ // Recording is finished when metadata arrives
121
+ newState.startRecordingStatus = RecordingStatus.FINISHED;
122
+
123
+ return newState;
124
+ }
125
+
126
+ /**
127
+ * Maps error to state
128
+ */
129
+ export function mapErrorToState(
130
+ currentState: RecognitionState,
131
+ error: ErrorResultV1
132
+ ): RecognitionState {
133
+ return {
134
+ ...currentState,
135
+ transcriptionStatus: TranscriptionStatus.ERROR,
136
+ startRecordingStatus: RecordingStatus.FINISHED,
137
+ finalRecordingTimestamp: new Date().toISOString()
138
+ };
139
+ }
140
+
141
+ /**
142
+ * Creates initial VGF state from client config
143
+ */
144
+ export function createVGFStateFromConfig(config: IRecognitionClientConfig): RecognitionState {
145
+ const audioUtteranceId = config.audioUtteranceId || generateUUID();
146
+ const state = createInitialRecognitionState(audioUtteranceId);
147
+
148
+ // Store ASR config as JSON if provided
149
+ if (config.asrRequestConfig) {
150
+ state.asrConfig = JSON.stringify(config.asrRequestConfig);
151
+ }
152
+
153
+ return state;
154
+ }
155
+
156
+ /**
157
+ * Updates state when recording stops
158
+ */
159
+ export function updateStateOnStop(currentState: RecognitionState): RecognitionState {
160
+ return {
161
+ ...currentState,
162
+ startRecordingStatus: RecordingStatus.FINISHED,
163
+ finalRecordingTimestamp: new Date().toISOString()
164
+ };
165
+ }
166
+
167
+ /**
168
+ * Updates state when client becomes ready
169
+ */
170
+ export function updateStateOnReady(currentState: RecognitionState): RecognitionState {
171
+ return {
172
+ ...currentState,
173
+ startRecordingStatus: RecordingStatus.READY
174
+ };
175
+ }
176
+
177
+ /**
178
+ * Parses function call from transcript (STEP 3 support)
179
+ * This is a placeholder - actual implementation would use NLP/LLM
180
+ */
181
+ export function extractFunctionCallFromTranscript(
182
+ transcript: string,
183
+ gameContext?: any
184
+ ): { metadata?: string; confidence?: number } | null {
185
+ // This would be replaced with actual function call extraction logic
186
+ // For example, using an LLM to parse intent from the transcript
187
+ // and map it to game actions
188
+
189
+ // Example stub implementation:
190
+ const lowerTranscript = transcript.toLowerCase();
191
+
192
+ // Simple pattern matching for demo
193
+ if (lowerTranscript.includes("play") && lowerTranscript.includes("artist")) {
194
+ return {
195
+ metadata: JSON.stringify({ action: "play", target: "artist" }),
196
+ confidence: 0.8
197
+ };
198
+ }
199
+
200
+ return null;
201
+ }
202
+
203
+ /**
204
+ * Updates state with function call results (STEP 3)
205
+ */
206
+ export function updateStateWithFunctionCall(
207
+ currentState: RecognitionState,
208
+ functionCall: { metadata?: string; confidence?: number }
209
+ ): RecognitionState {
210
+ return {
211
+ ...currentState,
212
+ functionCallMetadata: functionCall.metadata,
213
+ functionCallConfidence: functionCall.confidence,
214
+ finalFunctionCallTimestamp: new Date().toISOString()
215
+ };
216
+ }
217
+
218
+ // Helper function to generate UUID (simplified version)
219
+ function generateUUID(): string {
220
+ return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
221
+ const r = Math.random() * 16 | 0;
222
+ const v = c === 'x' ? r : (r & 0x3 | 0x8);
223
+ return v.toString(16);
224
+ });
225
+ }
@@ -0,0 +1,89 @@
1
+ import { z } from "zod"
2
+
3
+ /**
4
+ * VGF-style state schema for game-side recognition state/results management.
5
+ *
6
+ * This schema provides a standardized way for game developers to manage
7
+ * voice recognition state and results in their applications. It supports:
8
+ *
9
+ * STEP 1: Basic transcription flow
10
+ * STEP 2: Mic auto-stop upon correct answer (using partial transcripts)
11
+ * STEP 3: Semantic/function-call outcomes for game actions
12
+ *
13
+ * Ideally this should be part of a more centralized shared type library to free
14
+ * game developers and provide helper functions (VGF? Platform SDK?).
15
+ */
16
+ export const RecognitionVGFStateSchema = z.object({
17
+ // Core STT state
18
+ audioUtteranceId: z.string(),
19
+ startRecordingStatus: z.string().optional(), // "NOT_READY", "READY", "RECORDING", "FINISHED". States follow this order.
20
+ // Streaming should only start when "READY". Other states control mic UI and recording.
21
+ transcriptionStatus: z.string().optional(), // "NOT_STARTED", "IN_PROGRESS", "FINALIZED", "ERROR"
22
+ finalTranscript: z.string().optional(), // Full finalized transcript for the utterance. Will not change.
23
+ finalConfidence: z.number().optional(),
24
+
25
+ // Tracking-only metadata
26
+ asrConfig: z.string().optional(), // Json format of the ASR config
27
+ startRecordingTimestamp: z.string().optional(), // Start of recording. Immutable after set.
28
+ finalRecordingTimestamp: z.string().optional(), // End of recording. Immutable after set. Transcription may still be in progress.
29
+ finalTranscriptionTimestamp: z.string().optional(), // When the final transcript was produced. Immutable after set.
30
+
31
+ // STEP 2: Support for mic auto-stop upon correct answer
32
+ pendingTranscript: z.string().optional().default(""), // Non-final transcript that may change (matches existing naming)
33
+ pendingConfidence: z.number().optional(),
34
+
35
+ // STEP 3: Support for semantic/function-call outcomes
36
+ functionCallMetadata: z.string().optional(), // Function call metadata in JSON, e.g. "{artist: true, title: true}"
37
+ functionCallConfidence: z.number().optional(), // Confidence score for the function call.
38
+ finalFunctionCallTimestamp: z.string().optional(), // When the final action after interpreting the transcript was taken. Immutable.
39
+
40
+ // Support for prompt slot mapping - passed to recognition context when present
41
+ promptSlotMap: z.record(z.string(), z.array(z.string())).optional(), // Optional map of slot names to prompt values for recognition context
42
+ })
43
+
44
+ export type RecognitionState = z.infer<typeof RecognitionVGFStateSchema>
45
+
46
+ // Status constants for better type safety and consistency
47
+ export const RecordingStatus = {
48
+ NOT_READY: "NOT_READY",
49
+ READY: "READY",
50
+ RECORDING: "RECORDING",
51
+ FINISHED: "FINISHED",
52
+ } as const
53
+
54
+ export type RecordingStatusType = typeof RecordingStatus[keyof typeof RecordingStatus]
55
+
56
+ export const TranscriptionStatus = {
57
+ NOT_STARTED: "NOT_STARTED",
58
+ IN_PROGRESS: "IN_PROGRESS",
59
+ FINALIZED: "FINALIZED",
60
+ ERROR: "ERROR",
61
+ } as const
62
+
63
+ export type TranscriptionStatusType = typeof TranscriptionStatus[keyof typeof TranscriptionStatus]
64
+
65
+ // Helper function to create initial state
66
+ export function createInitialRecognitionState(audioUtteranceId: string): RecognitionState {
67
+ return {
68
+ audioUtteranceId,
69
+ startRecordingStatus: RecordingStatus.NOT_READY,
70
+ transcriptionStatus: TranscriptionStatus.NOT_STARTED,
71
+ pendingTranscript: "",
72
+ }
73
+ }
74
+
75
+ // Helper function to validate state transitions
76
+ export function isValidRecordingStatusTransition(from: string | undefined, to: string): boolean {
77
+ const statusOrder = [
78
+ RecordingStatus.NOT_READY,
79
+ RecordingStatus.READY,
80
+ RecordingStatus.RECORDING,
81
+ RecordingStatus.FINISHED,
82
+ ]
83
+
84
+ const fromIndex = from ? statusOrder.indexOf(from as RecordingStatusType) : -1
85
+ const toIndex = statusOrder.indexOf(to as RecordingStatusType)
86
+
87
+ // Can only move forward in the status order
88
+ return toIndex > fromIndex && toIndex !== -1
89
+ }