@volley/recognition-client-sdk 0.1.782 → 0.1.800
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.bundled.d.ts +75 -4
- package/dist/index.bundled.d.ts +198 -87
- package/dist/index.js +191 -20
- package/dist/index.js.map +4 -4
- package/dist/recog-client-sdk.browser.js +95 -4
- package/dist/recog-client-sdk.browser.js.map +4 -4
- package/dist/recognition-client.d.ts +23 -0
- package/dist/recognition-client.d.ts.map +1 -1
- package/dist/recognition-client.types.d.ts +32 -0
- package/dist/recognition-client.types.d.ts.map +1 -1
- package/dist/simplified-vgf-recognition-client.d.ts +22 -85
- package/dist/simplified-vgf-recognition-client.d.ts.map +1 -1
- package/dist/utils/audio-resampler.d.ts +32 -0
- package/dist/utils/audio-resampler.d.ts.map +1 -0
- package/dist/vgf-recognition-mapper.d.ts +9 -17
- package/dist/vgf-recognition-mapper.d.ts.map +1 -1
- package/dist/vgf-recognition-state.d.ts +103 -0
- package/dist/vgf-recognition-state.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/index.spec.ts +2 -0
- package/src/recognition-client.ts +65 -7
- package/src/recognition-client.types.ts +37 -0
- package/src/simplified-vgf-recognition-client.spec.ts +0 -27
- package/src/simplified-vgf-recognition-client.ts +97 -127
- package/src/utils/audio-resampler.spec.ts +69 -0
- package/src/utils/audio-resampler.ts +79 -0
- package/src/vgf-recognition-mapper.spec.ts +143 -0
- package/src/vgf-recognition-mapper.ts +35 -45
- package/src/vgf-recognition-state.ts +19 -1
|
@@ -89,6 +89,29 @@ export declare class RealTimeTwoWayWebSocketRecognitionClient extends WebSocketA
|
|
|
89
89
|
*/
|
|
90
90
|
private connectWithRetry;
|
|
91
91
|
sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
|
|
92
|
+
/**
|
|
93
|
+
* Send PCM16 mono audio captured at any sample rate. The SDK downsamples
|
|
94
|
+
* to the session's target rate (currently 16 kHz per server validator)
|
|
95
|
+
* before sending.
|
|
96
|
+
*
|
|
97
|
+
* Use this when your capture pipeline produces audio at the system's
|
|
98
|
+
* native rate — `AudioContext` defaults to 44.1 kHz or 48 kHz on most
|
|
99
|
+
* desktop/mobile hardware — and you don't want to bring your own
|
|
100
|
+
* resampler. If your audio is already at the target rate, prefer
|
|
101
|
+
* `sendAudio()` to skip the resample step.
|
|
102
|
+
*
|
|
103
|
+
* Algorithm: box-filter averaging (see audio-resampler.ts). Cheap, no
|
|
104
|
+
* dependencies, has a built-in low-pass effect so aliasing stays out of
|
|
105
|
+
* the speech band. Suitable for ASR; not a substitute for a high-quality
|
|
106
|
+
* resampler if you're doing music or full-fidelity processing.
|
|
107
|
+
*
|
|
108
|
+
* Audio must be signed 16-bit little-endian PCM, mono. Stereo must be
|
|
109
|
+
* mixed to mono by the caller.
|
|
110
|
+
*
|
|
111
|
+
* @param audioData - PCM16 mono audio at `sourceSampleRate`.
|
|
112
|
+
* @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
|
|
113
|
+
*/
|
|
114
|
+
sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
|
|
92
115
|
private sendAudioInternal;
|
|
93
116
|
/**
|
|
94
117
|
* Only active ehwne client is in READY state. otherwise it will return immediately.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"recognition-client.d.ts","sourceRoot":"","sources":["../src/recognition-client.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AAGH,OAAO,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AACxD,OAAO,EAML,KAAK,qBAAqB,EAS1B,KAAK,aAAa,EAGnB,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EAAE,WAAW,EAAE,MAAM,+BAA+B,CAAC;AAC5D,OAAO,KAAK,EACV,kBAAkB,EAClB,uBAAuB,EACvB,8CAA8C,EAE/C,MAAM,+BAA+B,CAAC;
|
|
1
|
+
{"version":3,"file":"recognition-client.d.ts","sourceRoot":"","sources":["../src/recognition-client.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AAGH,OAAO,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AACxD,OAAO,EAML,KAAK,qBAAqB,EAS1B,KAAK,aAAa,EAGnB,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EAAE,WAAW,EAAE,MAAM,+BAA+B,CAAC;AAC5D,OAAO,KAAK,EACV,kBAAkB,EAClB,uBAAuB,EACvB,8CAA8C,EAE/C,MAAM,+BAA+B,CAAC;AAWvC;;;;GAIG;AACH,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAE3D;AAgCD;;GAEG;AACH,MAAM,MAAM,mBAAmB,GAAG,qBAAqB,CAAC;AAGxD,YAAY,EAAE,8CAA8C,EAAE,MAAM,+BAA+B,CAAC;AAkCpG;;;;;GAKG;AACH,qBAAa,wCACX,SAAQ,oBAAoB,CAAC,MAAM,EAAE,GAAG,EAAE,GAAG,CAC7C,YAAW,kBAAkB;IAE7B,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,gBAAgB,CAAK;IAC7C,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,uBAAuB,CAAoB;IAEnE,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,WAAW,CAAkB;IACrC,OAAO,CAAC,YAAY,CAAyC;IAC7D,OAAO,CAAC,iBAAiB,CAAK;IAC9B,OAAO,CAAC,cAAc,CAAiB;IACvC,OAAO,CAAC,KAAK,CAAoC;IACjD,OAAO,CAAC,iBAAiB,CAA4B;IAGrD,OAAO,CAAC,iBAAiB,CAAS;IAGlC,OAAO,CAAC,cAAc,CAAK;IAC3B,OAAO,CAAC,eAAe,CAAK;IAC5B,OAAO,CAAC,qBAAqB,CAAO;IACpC,OAAO,CAAC,iBAAiB,CAAK;gBAElB,MAAM,EAAE,8CAA8C;IAkGlE;;;;;;OAMG;IACH,OAAO,CAAC,GAAG;IAWX;;;OAGG;IACH,OAAO,CAAC,OAAO;IAqBA,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IA6BvC;;;OAGG;YACW,gBAAgB;IAkIrB,SAAS,CAAC,SAAS,EAAE,WAAW,GAAG,eAAe,GAAG,IAAI,GAAG,IAAI;IAiBzE;;;;;;;;;;;;;;;;;;;;;OAqBG;IACH,uBAAuB,CACrB,SAAS,EAAE,WAAW,GAAG,eAAe,GAAG,IAAI,EAC/C,gBAAgB,EAAE,MAAM,GACvB,IAAI;IAsBP,OAAO,CAAC,iBAAiB;IAsCzB;;;OAGG;IAEG,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC;IAoCpC,cAAc,IAAI,IAAI;IAwBtB,mBAAmB,IAAI,MAAM;IAI7B,MAAM,IAAI,MAAM;IAIhB,QAAQ,IAAI,WAAW;IAIvB,WAAW,IAAI,OAAO;IAItB,YAAY,IAAI,OAAO;IAIvB,UAAU,IAAI,OAAO;IAIrB,uBAAuB,IAAI,OAAO;IAIlC,mBAAmB,IAAI,OAAO;IAI9B,aAAa,IAAI,OAAO;IAIxB,eAAe,CAAC,OAAO,EAAE,aAAa,GAAG,IAAI;IAmB7C,QAAQ,IAAI,uBAAuB;IAgBnC,SAAS,CAAC,WAAW,IAAI,IAAI;IAwF7B,SAAS,CAAC,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI;IA8C5D;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAwB/B,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI;cAYlB,SAAS,CAAC,GAAG,EAAE;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,GAAG,CAAA;KAAE,GAAG,IAAI;IAQ/E;;;OAGG;IACH,OAAO,CAAC,oBAAoB;IAiC5B;;;OAGG;IACH,OAAO,CAAC,YAAY;IAwBpB;;;;;;;;;OASG;IACH,eAAe,CAAC,SAAS,EAAE,WAAW,GAAG,eAAe,GAAG,IAAI,GAAG,IAAI;IAiBtE;;;OAGG;IACH,OAAO,CAAC,uBAAuB;IAiC/B;;;;OAIG;IACH,OAAO,CAAC,kBAAkB;CA2B3B"}
|
|
@@ -170,6 +170,11 @@ export interface IRecognitionClientConfig {
|
|
|
170
170
|
*
|
|
171
171
|
* Main interface for real-time speech recognition clients.
|
|
172
172
|
* Provides methods for connection management, audio streaming, and session control.
|
|
173
|
+
*
|
|
174
|
+
* NOTE for maintainers: `ISimplifiedVGFRecognitionClient` extends this interface,
|
|
175
|
+
* so any method added here must also be implemented (typically as a delegate) by
|
|
176
|
+
* `SimplifiedVGFRecognitionClient`. TypeScript will flag missing delegates at
|
|
177
|
+
* compile time — do not work around the error, add the delegate.
|
|
173
178
|
*/
|
|
174
179
|
export interface IRecognitionClient {
|
|
175
180
|
/**
|
|
@@ -184,6 +189,33 @@ export interface IRecognitionClient {
|
|
|
184
189
|
* @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob
|
|
185
190
|
*/
|
|
186
191
|
sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
|
|
192
|
+
/**
|
|
193
|
+
* Send PCM16 mono audio captured at `sourceSampleRate`; the SDK
|
|
194
|
+
* downsamples to the session's target rate (currently 16 kHz, set by the
|
|
195
|
+
* server validator) before transmitting.
|
|
196
|
+
*
|
|
197
|
+
* Use this when your capture pipeline produces audio at the system's
|
|
198
|
+
* native rate (browser `AudioContext` is typically 44.1 kHz or 48 kHz).
|
|
199
|
+
* If your audio is already at the target rate, prefer `sendAudio()` to
|
|
200
|
+
* skip the resample step.
|
|
201
|
+
*
|
|
202
|
+
* Audio must be signed 16-bit little-endian PCM, mono. Stereo must be
|
|
203
|
+
* mixed to mono by the caller.
|
|
204
|
+
*
|
|
205
|
+
* @param audioData - PCM16 mono audio at `sourceSampleRate`.
|
|
206
|
+
* @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
|
|
207
|
+
*/
|
|
208
|
+
sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
|
|
209
|
+
/**
|
|
210
|
+
* Send prefix audio (e.g. a TTS prompt) that primes the provider's language
|
|
211
|
+
* model before user audio is streamed. Chunks accepted — the server buffers
|
|
212
|
+
* until the session is READY and flushes. Must be sent BEFORE the first
|
|
213
|
+
* `sendAudio()` to take effect. Only meaningful when
|
|
214
|
+
* `asrRequestConfig.prefixMode === PrefixMode.CLIENT`.
|
|
215
|
+
*
|
|
216
|
+
* @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob
|
|
217
|
+
*/
|
|
218
|
+
sendPrefixAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
|
|
187
219
|
/**
|
|
188
220
|
* Stop recording and wait for final transcript
|
|
189
221
|
* The server will close the connection after sending the final transcript.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"recognition-client.types.d.ts","sourceRoot":"","sources":["../src/recognition-client.types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EACL,qBAAqB,EACrB,oBAAoB,EACpB,gBAAgB,EAChB,aAAa,EACb,mBAAmB,EACnB,oBAAoB,EACpB,gBAAgB,EAChB,aAAa,EACb,KAAK,EACN,MAAM,qBAAqB,CAAC;AAE7B;;;GAGG;AACH,oBAAY,WAAW;IACrB,+CAA+C;IAC/C,OAAO,YAAY;IAEnB,iDAAiD;IACjD,UAAU,eAAe;IAEzB,8DAA8D;IAC9D,SAAS,cAAc;IAEvB,mCAAmC;IACnC,KAAK,UAAU;IAEf,qDAAqD;IACrD,QAAQ,aAAa;IAErB,4CAA4C;IAC5C,OAAO,YAAY;IAEnB,6CAA6C;IAC7C,MAAM,WAAW;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,gCAAgC;IAChC,GAAG,EAAE,MAAM,CAAC;IAEZ,yFAAyF;IACzF,YAAY,CAAC,EAAE,KAAK,CAAC,MAAM,GAAG,MAAM,CAAC,CAAC;CACvC;AAGD,MAAM,MAAM,uBAAuB,GAAG,sBAAsB,CAAC;AAE7D,MAAM,WAAW,wBAAwB;IACvC;;;;;;;;;OASG;IACH,GAAG,CAAC,EAAE,MAAM,CAAC;IAEb;;;;;;;;;;;;;;;;OAgBG;IACH,KAAK,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC;IAEvB,qEAAqE;IACrE,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;IAEpC,qDAAqD;IACrD,WAAW,CAAC,EAAE,aAAa,CAAC;IAE5B;;;;OAIG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,mFAAmF;IACnF,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAE1B;;OAEG;IACH,YAAY,CAAC,EAAE,sBAAsB,EAAE,CAAC;IAExC,qCAAqC;IACrC,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,6FAA6F;IAC7F,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB,uCAAuC;IACvC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB,wCAAwC;IACxC,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,gGAAgG;IAChG,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAE1B,6FAA6F;IAC7F,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB,2EAA2E;IAC3E,gBAAgB,CAAC,EAAE,WAAW,GAAG,SAAS,CAAC;IAE3C,2GAA2G;IAC3G,sBAAsB,CAAC,EAAE,MAAM,CAAC;IAEhC,2CAA2C;IAC3C,YAAY,CAAC,EAAE,CAAC,MAAM,EAAE,qBAAqB,KAAK,IAAI,CAAC;IAEvD;;;OAGG;IACH,cAAc,CAAC,EAAE,CAAC,MAAM,EAAE,oBAAoB,KAAK,IAAI,CAAC;IAExD,oFAAoF;IACpF,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,gBAAgB,KAAK,IAAI,CAAC;IAElD;;;;OAIG;IACH,cAAc,CAAC,EAAE,CAAC,OAAO,EAAE,oBAAoB,KAAK,IAAI,CAAC;IAEzD,oFAAoF;IACpF,mBAAmB,CAAC,EAAE,CAAC,MAAM,EAAE,mBAAmB,KAAK,IAAI,CAAC;IAE5D,iCAAiC;IACjC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,aAAa,KAAK,IAAI,CAAC;IAEzC,2CAA2C;IAC3C,WAAW,CAAC,EAAE,MAAM,IAAI,CAAC;IAEzB;;;;OAIG;IACH,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,IAAI,CAAC;IAExD,uDAAuD;IACvD,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB,sDAAsD;IACtD,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB,wDAAwD;IACxD,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAE9B,uEAAuE;IACvE,eAAe,CAAC,EAAE,MAAM,CAAC;IAEzB;;;;;;;;;;;;;;;;;OAiBG;IACH,eAAe,CAAC,EAAE;QAChB,yEAAyE;QACzE,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,oEAAoE;QACpE,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC;IAEF;;;;;;OAMG;IAEH,MAAM,CAAC,EAAE,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,GAAG,KAAK,IAAI,CAAC;CAC5F;AAED
|
|
1
|
+
{"version":3,"file":"recognition-client.types.d.ts","sourceRoot":"","sources":["../src/recognition-client.types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EACL,qBAAqB,EACrB,oBAAoB,EACpB,gBAAgB,EAChB,aAAa,EACb,mBAAmB,EACnB,oBAAoB,EACpB,gBAAgB,EAChB,aAAa,EACb,KAAK,EACN,MAAM,qBAAqB,CAAC;AAE7B;;;GAGG;AACH,oBAAY,WAAW;IACrB,+CAA+C;IAC/C,OAAO,YAAY;IAEnB,iDAAiD;IACjD,UAAU,eAAe;IAEzB,8DAA8D;IAC9D,SAAS,cAAc;IAEvB,mCAAmC;IACnC,KAAK,UAAU;IAEf,qDAAqD;IACrD,QAAQ,aAAa;IAErB,4CAA4C;IAC5C,OAAO,YAAY;IAEnB,6CAA6C;IAC7C,MAAM,WAAW;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,gCAAgC;IAChC,GAAG,EAAE,MAAM,CAAC;IAEZ,yFAAyF;IACzF,YAAY,CAAC,EAAE,KAAK,CAAC,MAAM,GAAG,MAAM,CAAC,CAAC;CACvC;AAGD,MAAM,MAAM,uBAAuB,GAAG,sBAAsB,CAAC;AAE7D,MAAM,WAAW,wBAAwB;IACvC;;;;;;;;;OASG;IACH,GAAG,CAAC,EAAE,MAAM,CAAC;IAEb;;;;;;;;;;;;;;;;OAgBG;IACH,KAAK,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC;IAEvB,qEAAqE;IACrE,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;IAEpC,qDAAqD;IACrD,WAAW,CAAC,EAAE,aAAa,CAAC;IAE5B;;;;OAIG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,mFAAmF;IACnF,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAE1B;;OAEG;IACH,YAAY,CAAC,EAAE,sBAAsB,EAAE,CAAC;IAExC,qCAAqC;IACrC,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,6FAA6F;IAC7F,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB,uCAAuC;IACvC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB,wCAAwC;IACxC,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,gGAAgG;IAChG,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAE1B,6FAA6F;IAC7F,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB,2EAA2E;IAC3E,gBAAgB,CAAC,EAAE,WAAW,GAAG,SAAS,CAAC;IAE3C,2GAA2G;IAC3G,sBAAsB,CAAC,EAAE,MAAM,CAAC;IAEhC,2CAA2C;IAC3C,YAAY,CAAC,EAAE,CAAC,MAAM,EAAE,qBAAqB,KAAK,IAAI,CAAC;IAEvD;;;OAGG;IACH,cAAc,CAAC,EAAE,CAAC,MAAM,EAAE,oBAAoB,KAAK,IAAI,CAAC;IAExD,oFAAoF;IACpF,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,gBAAgB,KAAK,IAAI,CAAC;IAElD;;;;OAIG;IACH,cAAc,CAAC,EAAE,CAAC,OAAO,EAAE,oBAAoB,KAAK,IAAI,CAAC;IAEzD,oFAAoF;IACpF,mBAAmB,CAAC,EAAE,CAAC,MAAM,EAAE,mBAAmB,KAAK,IAAI,CAAC;IAE5D,iCAAiC;IACjC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,aAAa,KAAK,IAAI,CAAC;IAEzC,2CAA2C;IAC3C,WAAW,CAAC,EAAE,MAAM,IAAI,CAAC;IAEzB;;;;OAIG;IACH,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,IAAI,CAAC;IAExD,uDAAuD;IACvD,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB,sDAAsD;IACtD,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB,wDAAwD;IACxD,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAE9B,uEAAuE;IACvE,eAAe,CAAC,EAAE,MAAM,CAAC;IAEzB;;;;;;;;;;;;;;;;;OAiBG;IACH,eAAe,CAAC,EAAE;QAChB,yEAAyE;QACzE,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,oEAAoE;QACpE,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC;IAEF;;;;;;OAMG;IAEH,MAAM,CAAC,EAAE,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,GAAG,KAAK,IAAI,CAAC;CAC5F;AAED;;;;;;;;;;GAUG;AACH,MAAM,WAAW,kBAAkB;IACjC;;;;OAIG;IACH,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAEzB;;;;OAIG;IACH,SAAS,CAAC,SAAS,EAAE,WAAW,GAAG,eAAe,GAAG,IAAI,GAAG,IAAI,CAAC;IAEjE;;;;;;;;;;;;;;;OAeG;IACH,uBAAuB,CACrB,SAAS,EAAE,WAAW,GAAG,eAAe,GAAG,IAAI,EAC/C,gBAAgB,EAAE,MAAM,GACvB,IAAI,CAAC;IAER;;;;;;;;OAQG;IACH,eAAe,CAAC,SAAS,EAAE,WAAW,GAAG,eAAe,GAAG,IAAI,GAAG,IAAI,CAAC;IAEvE;;;;OAIG;IACH,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAE/B;;;;;;;;;;;;;;;;OAgBG;IACH,cAAc,IAAI,IAAI,CAAC;IAEvB;;;;OAIG;IACH,mBAAmB,IAAI,MAAM,CAAC;IAE9B;;;OAGG;IACH,QAAQ,IAAI,WAAW,CAAC;IAExB;;;OAGG;IACH,WAAW,IAAI,OAAO,CAAC;IAEvB;;;OAGG;IACH,YAAY,IAAI,OAAO,CAAC;IAExB;;;OAGG;IACH,UAAU,IAAI,OAAO,CAAC;IAEtB;;;OAGG;IACH,uBAAuB,IAAI,OAAO,CAAC;IAEnC;;;OAGG;IACH,mBAAmB,IAAI,OAAO,CAAC;IAE/B;;;OAGG;IACH,QAAQ,IAAI,uBAAuB,CAAC;IAEpC;;;;OAIG;IACH,MAAM,IAAI,MAAM,CAAC;IAEjB;;;;;;;;;;;;OAYG;IACH,eAAe,CAAC,OAAO,EAAE,aAAa,GAAG,IAAI,CAAC;IAE9C;;;;OAIG;IACH,aAAa,IAAI,OAAO,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACtC,uCAAuC;IACvC,cAAc,EAAE,MAAM,CAAC;IAEvB,wCAAwC;IACxC,eAAe,EAAE,MAAM,CAAC;IAExB,4CAA4C;IAC5C,mBAAmB,EAAE,MAAM,CAAC;IAE5B,iDAAiD;IACjD,mBAAmB,EAAE,MAAM,CAAC;IAE5B,yCAAyC;IACzC,qBAAqB,EAAE,MAAM,CAAC;IAE9B,iEAAiE;IACjE,UAAU,EAAE,OAAO,CAAC;CACrB;AAED;;;;GAIG;AAEH,MAAM,WAAW,8CAA+C,SAAQ,wBAAwB;CAG/F"}
|
|
@@ -8,8 +8,8 @@
|
|
|
8
8
|
* All functionality is delegated to the underlying client.
|
|
9
9
|
*/
|
|
10
10
|
import { RecognitionState } from './vgf-recognition-state.js';
|
|
11
|
-
import { IRecognitionClientConfig, ClientState } from './recognition-client.types.js';
|
|
12
|
-
import {
|
|
11
|
+
import { IRecognitionClient, IRecognitionClientConfig, IRecognitionClientStats, ClientState } from './recognition-client.types.js';
|
|
12
|
+
import type { GameContextV1 } from '@recog/shared-types';
|
|
13
13
|
/**
|
|
14
14
|
* Configuration for SimplifiedVGFRecognitionClient
|
|
15
15
|
*/
|
|
@@ -28,94 +28,22 @@ export interface SimplifiedVGFClientConfig extends IRecognitionClientConfig {
|
|
|
28
28
|
/**
|
|
29
29
|
* Interface for SimplifiedVGFRecognitionClient
|
|
30
30
|
*
|
|
31
|
-
*
|
|
32
|
-
*
|
|
31
|
+
* Inherits the full IRecognitionClient surface (connect, sendAudio,
|
|
32
|
+
* sendAudioWithSampleRate, sendPrefixAudio, stopRecording, stopAbnormally,
|
|
33
|
+
* status checks, sendGameContext, getStats, getUrl, getState, getAudioUtteranceId)
|
|
34
|
+
* — see recognition-client.types.ts for those. Adds VGF-specific state access.
|
|
35
|
+
*
|
|
36
|
+
* Extending IRecognitionClient (rather than redeclaring methods) means
|
|
37
|
+
* TypeScript catches any base-client method that's not delegated by the
|
|
38
|
+
* VGF wrapper at compile time — keeps the two surfaces in sync.
|
|
33
39
|
*/
|
|
34
|
-
export interface ISimplifiedVGFRecognitionClient {
|
|
35
|
-
/**
|
|
36
|
-
* Connect to the recognition service WebSocket
|
|
37
|
-
* @returns Promise that resolves when connected and ready
|
|
38
|
-
*/
|
|
39
|
-
connect(): Promise<void>;
|
|
40
|
-
/**
|
|
41
|
-
* Send audio data for transcription
|
|
42
|
-
* @param audioData - PCM audio data as ArrayBuffer, typed array, or Blob
|
|
43
|
-
*/
|
|
44
|
-
sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
|
|
45
|
-
/**
|
|
46
|
-
* Stop recording and wait for final transcription
|
|
47
|
-
* @returns Promise that resolves when transcription is complete
|
|
48
|
-
*/
|
|
49
|
-
stopRecording(): Promise<void>;
|
|
40
|
+
export interface ISimplifiedVGFRecognitionClient extends IRecognitionClient {
|
|
50
41
|
/**
|
|
51
|
-
*
|
|
52
|
-
*
|
|
53
|
-
* WARNING: This is an abnormal shutdown that bypasses the graceful stop flow:
|
|
54
|
-
* - Does NOT wait for server to process remaining audio
|
|
55
|
-
* - Does NOT receive final transcript from server (VGF state set to empty)
|
|
56
|
-
* - Immediately closes WebSocket connection
|
|
57
|
-
* - Cleans up resources (buffers, listeners)
|
|
58
|
-
*
|
|
59
|
-
* Use Cases:
|
|
60
|
-
* - User explicitly cancels/abandons the session
|
|
61
|
-
* - Timeout scenarios where waiting is not acceptable
|
|
62
|
-
* - Need immediate cleanup and can't wait for server
|
|
63
|
-
*
|
|
64
|
-
* RECOMMENDED: Use stopRecording() for normal shutdown.
|
|
65
|
-
* Only use this when immediate disconnection is required.
|
|
66
|
-
*/
|
|
67
|
-
stopAbnormally(): void;
|
|
68
|
-
/**
|
|
69
|
-
* Get the current VGF recognition state
|
|
42
|
+
* Get the current VGF recognition state — the single shared store
|
|
43
|
+
* of inputs and outputs for this utterance.
|
|
70
44
|
* @returns Current RecognitionState with all transcription data
|
|
71
45
|
*/
|
|
72
46
|
getVGFState(): RecognitionState;
|
|
73
|
-
/**
|
|
74
|
-
* Check if connected to the WebSocket
|
|
75
|
-
*/
|
|
76
|
-
isConnected(): boolean;
|
|
77
|
-
/**
|
|
78
|
-
* Check if currently connecting
|
|
79
|
-
*/
|
|
80
|
-
isConnecting(): boolean;
|
|
81
|
-
/**
|
|
82
|
-
* Check if currently stopping
|
|
83
|
-
*/
|
|
84
|
-
isStopping(): boolean;
|
|
85
|
-
/**
|
|
86
|
-
* Check if transcription has finished
|
|
87
|
-
*/
|
|
88
|
-
isTranscriptionFinished(): boolean;
|
|
89
|
-
/**
|
|
90
|
-
* Check if the audio buffer has overflowed
|
|
91
|
-
*/
|
|
92
|
-
isBufferOverflowing(): boolean;
|
|
93
|
-
/**
|
|
94
|
-
* Send game context after connection is established (for preconnect flow).
|
|
95
|
-
*
|
|
96
|
-
* Preconnect flow: Create client with asrRequestConfig (useContext: true) but
|
|
97
|
-
* WITHOUT gameContext → call connect() → later call sendGameContext() with slotMap.
|
|
98
|
-
*
|
|
99
|
-
* @param context - Game context including slotMap for keyword boosting
|
|
100
|
-
*/
|
|
101
|
-
sendGameContext(context: GameContextV1): void;
|
|
102
|
-
/**
|
|
103
|
-
* Check if server has sent READY signal (provider connected, ready for audio).
|
|
104
|
-
* In preconnect flow, this becomes true after sendGameContext() triggers provider attachment.
|
|
105
|
-
*/
|
|
106
|
-
isServerReady(): boolean;
|
|
107
|
-
/**
|
|
108
|
-
* Get the audio utterance ID for this session
|
|
109
|
-
*/
|
|
110
|
-
getAudioUtteranceId(): string;
|
|
111
|
-
/**
|
|
112
|
-
* Get the WebSocket URL being used
|
|
113
|
-
*/
|
|
114
|
-
getUrl(): string;
|
|
115
|
-
/**
|
|
116
|
-
* Get the underlying client state (for advanced usage)
|
|
117
|
-
*/
|
|
118
|
-
getState(): ClientState;
|
|
119
47
|
}
|
|
120
48
|
/**
|
|
121
49
|
* This wrapper ONLY maintains VGF state as a sink.
|
|
@@ -132,6 +60,15 @@ export declare class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRec
|
|
|
132
60
|
constructor(config: SimplifiedVGFClientConfig);
|
|
133
61
|
connect(): Promise<void>;
|
|
134
62
|
sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
|
|
63
|
+
sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
|
|
64
|
+
sendPrefixAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
|
|
65
|
+
getStats(): IRecognitionClientStats;
|
|
66
|
+
/**
|
|
67
|
+
* Set VGF recording status to RECORDING on the first audio chunk.
|
|
68
|
+
* Idempotent — subsequent calls are no-ops until disconnect/stop resets
|
|
69
|
+
* `isRecordingAudio`.
|
|
70
|
+
*/
|
|
71
|
+
private markRecordingStarted;
|
|
135
72
|
stopRecording(): Promise<void>;
|
|
136
73
|
stopAbnormally(): void;
|
|
137
74
|
getAudioUtteranceId(): string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"simplified-vgf-recognition-client.d.ts","sourceRoot":"","sources":["../src/simplified-vgf-recognition-client.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EACH,gBAAgB,EAInB,MAAM,4BAA4B,CAAC;AACpC,OAAO,
|
|
1
|
+
{"version":3,"file":"simplified-vgf-recognition-client.d.ts","sourceRoot":"","sources":["../src/simplified-vgf-recognition-client.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EACH,gBAAgB,EAInB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EACH,kBAAkB,EAClB,wBAAwB,EACxB,uBAAuB,EACvB,WAAW,EACd,MAAM,+BAA+B,CAAC;AAUvC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD;;GAEG;AACH,MAAM,WAAW,yBAA0B,SAAQ,wBAAwB;IACvE;;;OAGG;IACH,aAAa,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,IAAI,CAAC;IAElD;;;OAGG;IACH,YAAY,CAAC,EAAE,gBAAgB,CAAC;CACnC;AAED;;;;;;;;;;;GAWG;AACH,MAAM,WAAW,+BAAgC,SAAQ,kBAAkB;IACvE;;;;OAIG;IACH,WAAW,IAAI,gBAAgB,CAAC;CACnC;AAED;;;GAGG;AACH,qBAAa,8BAA+B,YAAW,+BAA+B;IAClF,OAAO,CAAC,MAAM,CAAqB;IACnC,OAAO,CAAC,KAAK,CAAmB;IAChC,OAAO,CAAC,gBAAgB,CAAkB;IAC1C,OAAO,CAAC,mBAAmB,CAAkD;IAC7E,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,MAAM,CAAqC;IACnD,OAAO,CAAC,oBAAoB,CAAuB;gBAEvC,MAAM,EAAE,yBAAyB;IAuMvC,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAK9B,SAAS,CAAC,SAAS,EAAE,WAAW,GAAG,eAAe,GAAG,IAAI,GAAG,IAAI;IAKhE,uBAAuB,CACnB,SAAS,EAAE,WAAW,GAAG,eAAe,GAAG,IAAI,EAC/C,gBAAgB,EAAE,MAAM,GACzB,IAAI;IAKP,eAAe,CAAC,SAAS,EAAE,WAAW,GAAG,eAAe,GAAG,IAAI,GAAG,IAAI;IAOtE,QAAQ,IAAI,uBAAuB;IAInC;;;;OAIG;IACH,OAAO,CAAC,oBAAoB;IAWtB,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC;IA4BpC,cAAc,IAAI,IAAI;IAiCtB,mBAAmB,IAAI,MAAM;IAI7B,MAAM,IAAI,MAAM;IAIhB,QAAQ,IAAI,WAAW;IAIvB,WAAW,IAAI,OAAO;IAItB,YAAY,IAAI,OAAO;IAIvB,UAAU,IAAI,OAAO;IAIrB,uBAAuB,IAAI,OAAO;IAIlC,mBAAmB,IAAI,OAAO;IAI9B,eAAe,CAAC,OAAO,EAAE,aAAa,GAAG,IAAI;IAI7C,aAAa,IAAI,OAAO;IAMxB,WAAW,IAAI,gBAAgB;IAI/B,OAAO,CAAC,gBAAgB;IAMxB,OAAO,CAAC,iBAAiB;CA8B5B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,wBAAgB,yBAAyB,CAAC,MAAM,EAAE,yBAAyB,GAAG,+BAA+B,CAE5G"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Downsample PCM16 mono audio to a target sample rate.
|
|
3
|
+
*
|
|
4
|
+
* Used by `sendAudioWithSampleRate()` so integrators whose capture pipeline
|
|
5
|
+
* produces audio at the system's native rate (AudioContext defaults to
|
|
6
|
+
* 44.1 kHz or 48 kHz on most desktop/mobile hardware) can hand raw bytes
|
|
7
|
+
* to the SDK without having to bring in their own resampler. The
|
|
8
|
+
* recognition-service `SampleRateValidator` accepts only 16 kHz, so the SDK
|
|
9
|
+
* resamples on the client side before sending.
|
|
10
|
+
*
|
|
11
|
+
* Algorithm: box-filter averaging. For each output sample we average the
|
|
12
|
+
* source samples that fall into its time window. This is the cheapest
|
|
13
|
+
* correct approach for speech ASR — it has a built-in low-pass effect that
|
|
14
|
+
* suppresses aliasing far better than naive decimation or linear
|
|
15
|
+
* interpolation, while staying O(n) with no FFT and no dependencies.
|
|
16
|
+
* For integer ratios (e.g. 48000 → 16000, ratio = 3) it degenerates to a
|
|
17
|
+
* plain 3-sample average; for fractional ratios (e.g. 44100 → 16000) the
|
|
18
|
+
* window count varies by ±1 across output samples.
|
|
19
|
+
*
|
|
20
|
+
* Assumes the input is signed 16-bit little-endian PCM (the SDK's
|
|
21
|
+
* documented `AudioEncoding.LINEAR16` input format). Mono only. Stereo
|
|
22
|
+
* audio must be mixed to mono by the caller.
|
|
23
|
+
*
|
|
24
|
+
* @param input - Source PCM16 audio (ArrayBuffer or any ArrayBufferView).
|
|
25
|
+
* @param srcRate - Source sample rate in Hz (e.g. 44100, 48000).
|
|
26
|
+
* @param targetRate - Target sample rate in Hz. Must be ≤ srcRate.
|
|
27
|
+
* @returns A new ArrayBuffer of PCM16 samples at `targetRate`.
|
|
28
|
+
* @throws Error if `targetRate > srcRate` (upsampling is not supported —
|
|
29
|
+
* capture at ≥ targetRate instead).
|
|
30
|
+
*/
|
|
31
|
+
export declare function downsamplePcm16(input: ArrayBuffer | ArrayBufferView, srcRate: number, targetRate: number): ArrayBuffer;
|
|
32
|
+
//# sourceMappingURL=audio-resampler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"audio-resampler.d.ts","sourceRoot":"","sources":["../../src/utils/audio-resampler.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACH,wBAAgB,eAAe,CAC7B,KAAK,EAAE,WAAW,GAAG,eAAe,EACpC,OAAO,EAAE,MAAM,EACf,UAAU,EAAE,MAAM,GACjB,WAAW,CA4Cb"}
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
import { RecognitionState } from './vgf-recognition-state.js';
|
|
8
8
|
import { ClientState, IRecognitionClientConfig } from './recognition-client.types.js';
|
|
9
|
-
import { TranscriptionResultV1,
|
|
9
|
+
import { TranscriptionResultV1, SessionConfiguredV1 } from '@recog/shared-types';
|
|
10
10
|
/**
|
|
11
11
|
* Maps ClientState to RecordingStatus for VGF state
|
|
12
12
|
*/
|
|
@@ -15,10 +15,17 @@ export declare function mapClientStateToRecordingStatus(clientState: ClientState
|
|
|
15
15
|
* Creates a VGF state from transcription result
|
|
16
16
|
*/
|
|
17
17
|
export declare function mapTranscriptionResultToState(currentState: RecognitionState, result: TranscriptionResultV1, isRecording: boolean): RecognitionState;
|
|
18
|
+
/**
|
|
19
|
+
* Mirrors the SessionConfiguredV1 message onto the VGF state.
|
|
20
|
+
* Carries the resolved provider/model/sampleRate/encoding/apiType/isFallback
|
|
21
|
+
* that the server actually chose (after circuit-breaker/fallback). Fires once
|
|
22
|
+
* per session, before audio streaming begins.
|
|
23
|
+
*/
|
|
24
|
+
export declare function mapSessionConfiguredToState(currentState: RecognitionState, sessionConfigured: SessionConfiguredV1): RecognitionState;
|
|
18
25
|
/**
|
|
19
26
|
* Maps error to state
|
|
20
27
|
*/
|
|
21
|
-
export declare function mapErrorToState(currentState: RecognitionState
|
|
28
|
+
export declare function mapErrorToState(currentState: RecognitionState): RecognitionState;
|
|
22
29
|
/**
|
|
23
30
|
* Creates initial VGF state from client config
|
|
24
31
|
*/
|
|
@@ -48,19 +55,4 @@ export declare function resetRecognitionVGFState(currentState: RecognitionState)
|
|
|
48
55
|
* Updates state when client becomes ready
|
|
49
56
|
*/
|
|
50
57
|
export declare function updateStateOnReady(currentState: RecognitionState): RecognitionState;
|
|
51
|
-
/**
|
|
52
|
-
* Parses function call from transcript (STEP 3 support)
|
|
53
|
-
* This is a placeholder - actual implementation would use NLP/LLM
|
|
54
|
-
*/
|
|
55
|
-
export declare function extractFunctionCallFromTranscript(transcript: string, gameContext?: any): {
|
|
56
|
-
metadata?: string;
|
|
57
|
-
confidence?: number;
|
|
58
|
-
} | null;
|
|
59
|
-
/**
|
|
60
|
-
* Updates state with function call results (STEP 3)
|
|
61
|
-
*/
|
|
62
|
-
export declare function updateStateWithFunctionCall(currentState: RecognitionState, functionCall: {
|
|
63
|
-
metadata?: string;
|
|
64
|
-
confidence?: number;
|
|
65
|
-
}): RecognitionState;
|
|
66
58
|
//# sourceMappingURL=vgf-recognition-mapper.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vgf-recognition-mapper.d.ts","sourceRoot":"","sources":["../src/vgf-recognition-mapper.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EACH,gBAAgB,EAKnB,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EACH,WAAW,EACX,wBAAwB,EAC3B,MAAM,+BAA+B,CAAC;AACvC,OAAO,EACH,qBAAqB,EACrB,
|
|
1
|
+
{"version":3,"file":"vgf-recognition-mapper.d.ts","sourceRoot":"","sources":["../src/vgf-recognition-mapper.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EACH,gBAAgB,EAKnB,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EACH,WAAW,EACX,wBAAwB,EAC3B,MAAM,+BAA+B,CAAC;AACvC,OAAO,EACH,qBAAqB,EACrB,mBAAmB,EACtB,MAAM,qBAAqB,CAAC;AAE7B;;GAEG;AACH,wBAAgB,+BAA+B,CAAC,WAAW,EAAE,WAAW,GAAG,MAAM,CAmBhF;AAED;;GAEG;AACH,wBAAgB,6BAA6B,CACzC,YAAY,EAAE,gBAAgB,EAC9B,MAAM,EAAE,qBAAqB,EAC7B,WAAW,EAAE,OAAO,GACrB,gBAAgB,CA6ElB;AAED;;;;;GAKG;AACH,wBAAgB,2BAA2B,CACvC,YAAY,EAAE,gBAAgB,EAC9B,iBAAiB,EAAE,mBAAmB,GACvC,gBAAgB,CAKlB;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC3B,YAAY,EAAE,gBAAgB,GAC/B,gBAAgB,CAOlB;AAED;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,wBAAwB,GAAG,gBAAgB,CAU3F;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,YAAY,EAAE,gBAAgB,GAAG,gBAAgB,CAMlF;AAED;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,wBAAwB,CAAC,YAAY,EAAE,gBAAgB,GAAG,gBAAgB,CAczF;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,YAAY,EAAE,gBAAgB,GAAG,gBAAgB,CAKnF"}
|
|
@@ -20,7 +20,42 @@ export declare const RecognitionVGFStateSchema: z.ZodObject<{
|
|
|
20
20
|
finalConfidence: z.ZodOptional<z.ZodNumber>;
|
|
21
21
|
voiceEnd: z.ZodOptional<z.ZodNumber>;
|
|
22
22
|
lastNonSilence: z.ZodOptional<z.ZodNumber>;
|
|
23
|
+
accumulatedAudioTimeMs: z.ZodOptional<z.ZodNumber>;
|
|
23
24
|
asrConfig: z.ZodOptional<z.ZodString>;
|
|
25
|
+
sessionConfigured: z.ZodOptional<z.ZodObject<{
|
|
26
|
+
type: z.ZodLiteral<import("@recog/shared-types").RecognitionResultTypeV1.SESSION_CONFIGURED>;
|
|
27
|
+
audioUtteranceId: z.ZodString;
|
|
28
|
+
provider: z.ZodOptional<z.ZodString>;
|
|
29
|
+
model: z.ZodOptional<z.ZodString>;
|
|
30
|
+
sampleRate: z.ZodOptional<z.ZodNumber>;
|
|
31
|
+
encoding: z.ZodOptional<z.ZodString>;
|
|
32
|
+
apiType: z.ZodOptional<z.ZodNativeEnum<typeof import("@recog/shared-types").ASRApiType>>;
|
|
33
|
+
isFallback: z.ZodOptional<z.ZodBoolean>;
|
|
34
|
+
asrRequest: z.ZodOptional<z.ZodString>;
|
|
35
|
+
providerConfig: z.ZodOptional<z.ZodString>;
|
|
36
|
+
}, "strip", z.ZodTypeAny, {
|
|
37
|
+
type: import("@recog/shared-types").RecognitionResultTypeV1.SESSION_CONFIGURED;
|
|
38
|
+
audioUtteranceId: string;
|
|
39
|
+
provider?: string | undefined;
|
|
40
|
+
model?: string | undefined;
|
|
41
|
+
sampleRate?: number | undefined;
|
|
42
|
+
encoding?: string | undefined;
|
|
43
|
+
apiType?: import("@recog/shared-types").ASRApiType | undefined;
|
|
44
|
+
isFallback?: boolean | undefined;
|
|
45
|
+
asrRequest?: string | undefined;
|
|
46
|
+
providerConfig?: string | undefined;
|
|
47
|
+
}, {
|
|
48
|
+
type: import("@recog/shared-types").RecognitionResultTypeV1.SESSION_CONFIGURED;
|
|
49
|
+
audioUtteranceId: string;
|
|
50
|
+
provider?: string | undefined;
|
|
51
|
+
model?: string | undefined;
|
|
52
|
+
sampleRate?: number | undefined;
|
|
53
|
+
encoding?: string | undefined;
|
|
54
|
+
apiType?: import("@recog/shared-types").ASRApiType | undefined;
|
|
55
|
+
isFallback?: boolean | undefined;
|
|
56
|
+
asrRequest?: string | undefined;
|
|
57
|
+
providerConfig?: string | undefined;
|
|
58
|
+
}>>;
|
|
24
59
|
startRecordingTimestamp: z.ZodOptional<z.ZodString>;
|
|
25
60
|
finalRecordingTimestamp: z.ZodOptional<z.ZodString>;
|
|
26
61
|
finalTranscriptionTimestamp: z.ZodOptional<z.ZodString>;
|
|
@@ -30,6 +65,28 @@ export declare const RecognitionVGFStateSchema: z.ZodObject<{
|
|
|
30
65
|
functionCallConfidence: z.ZodOptional<z.ZodNumber>;
|
|
31
66
|
finalFunctionCallTimestamp: z.ZodOptional<z.ZodString>;
|
|
32
67
|
promptSlotMap: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodString, "many">>>;
|
|
68
|
+
promptSTT: z.ZodOptional<z.ZodString>;
|
|
69
|
+
promptSTF: z.ZodOptional<z.ZodString>;
|
|
70
|
+
promptTTF: z.ZodOptional<z.ZodString>;
|
|
71
|
+
detections: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
72
|
+
type: z.ZodNativeEnum<typeof import("@recog/shared-types").DetectionTypeV1>;
|
|
73
|
+
query: z.ZodString;
|
|
74
|
+
score: z.ZodNumber;
|
|
75
|
+
startMs: z.ZodOptional<z.ZodNumber>;
|
|
76
|
+
endMs: z.ZodOptional<z.ZodNumber>;
|
|
77
|
+
}, "strip", z.ZodTypeAny, {
|
|
78
|
+
type: import("@recog/shared-types").DetectionTypeV1;
|
|
79
|
+
query: string;
|
|
80
|
+
score: number;
|
|
81
|
+
startMs?: number | undefined;
|
|
82
|
+
endMs?: number | undefined;
|
|
83
|
+
}, {
|
|
84
|
+
type: import("@recog/shared-types").DetectionTypeV1;
|
|
85
|
+
query: string;
|
|
86
|
+
score: number;
|
|
87
|
+
startMs?: number | undefined;
|
|
88
|
+
endMs?: number | undefined;
|
|
89
|
+
}>, "many">>;
|
|
33
90
|
recognitionActionProcessingState: z.ZodOptional<z.ZodString>;
|
|
34
91
|
}, "strip", z.ZodTypeAny, {
|
|
35
92
|
audioUtteranceId: string;
|
|
@@ -40,7 +97,20 @@ export declare const RecognitionVGFStateSchema: z.ZodObject<{
|
|
|
40
97
|
finalConfidence?: number | undefined;
|
|
41
98
|
voiceEnd?: number | undefined;
|
|
42
99
|
lastNonSilence?: number | undefined;
|
|
100
|
+
accumulatedAudioTimeMs?: number | undefined;
|
|
43
101
|
asrConfig?: string | undefined;
|
|
102
|
+
sessionConfigured?: {
|
|
103
|
+
type: import("@recog/shared-types").RecognitionResultTypeV1.SESSION_CONFIGURED;
|
|
104
|
+
audioUtteranceId: string;
|
|
105
|
+
provider?: string | undefined;
|
|
106
|
+
model?: string | undefined;
|
|
107
|
+
sampleRate?: number | undefined;
|
|
108
|
+
encoding?: string | undefined;
|
|
109
|
+
apiType?: import("@recog/shared-types").ASRApiType | undefined;
|
|
110
|
+
isFallback?: boolean | undefined;
|
|
111
|
+
asrRequest?: string | undefined;
|
|
112
|
+
providerConfig?: string | undefined;
|
|
113
|
+
} | undefined;
|
|
44
114
|
startRecordingTimestamp?: string | undefined;
|
|
45
115
|
finalRecordingTimestamp?: string | undefined;
|
|
46
116
|
finalTranscriptionTimestamp?: string | undefined;
|
|
@@ -49,6 +119,16 @@ export declare const RecognitionVGFStateSchema: z.ZodObject<{
|
|
|
49
119
|
functionCallConfidence?: number | undefined;
|
|
50
120
|
finalFunctionCallTimestamp?: string | undefined;
|
|
51
121
|
promptSlotMap?: Record<string, string[]> | undefined;
|
|
122
|
+
promptSTT?: string | undefined;
|
|
123
|
+
promptSTF?: string | undefined;
|
|
124
|
+
promptTTF?: string | undefined;
|
|
125
|
+
detections?: {
|
|
126
|
+
type: import("@recog/shared-types").DetectionTypeV1;
|
|
127
|
+
query: string;
|
|
128
|
+
score: number;
|
|
129
|
+
startMs?: number | undefined;
|
|
130
|
+
endMs?: number | undefined;
|
|
131
|
+
}[] | undefined;
|
|
52
132
|
recognitionActionProcessingState?: string | undefined;
|
|
53
133
|
}, {
|
|
54
134
|
audioUtteranceId: string;
|
|
@@ -58,7 +138,20 @@ export declare const RecognitionVGFStateSchema: z.ZodObject<{
|
|
|
58
138
|
finalConfidence?: number | undefined;
|
|
59
139
|
voiceEnd?: number | undefined;
|
|
60
140
|
lastNonSilence?: number | undefined;
|
|
141
|
+
accumulatedAudioTimeMs?: number | undefined;
|
|
61
142
|
asrConfig?: string | undefined;
|
|
143
|
+
sessionConfigured?: {
|
|
144
|
+
type: import("@recog/shared-types").RecognitionResultTypeV1.SESSION_CONFIGURED;
|
|
145
|
+
audioUtteranceId: string;
|
|
146
|
+
provider?: string | undefined;
|
|
147
|
+
model?: string | undefined;
|
|
148
|
+
sampleRate?: number | undefined;
|
|
149
|
+
encoding?: string | undefined;
|
|
150
|
+
apiType?: import("@recog/shared-types").ASRApiType | undefined;
|
|
151
|
+
isFallback?: boolean | undefined;
|
|
152
|
+
asrRequest?: string | undefined;
|
|
153
|
+
providerConfig?: string | undefined;
|
|
154
|
+
} | undefined;
|
|
62
155
|
startRecordingTimestamp?: string | undefined;
|
|
63
156
|
finalRecordingTimestamp?: string | undefined;
|
|
64
157
|
finalTranscriptionTimestamp?: string | undefined;
|
|
@@ -68,6 +161,16 @@ export declare const RecognitionVGFStateSchema: z.ZodObject<{
|
|
|
68
161
|
functionCallConfidence?: number | undefined;
|
|
69
162
|
finalFunctionCallTimestamp?: string | undefined;
|
|
70
163
|
promptSlotMap?: Record<string, string[]> | undefined;
|
|
164
|
+
promptSTT?: string | undefined;
|
|
165
|
+
promptSTF?: string | undefined;
|
|
166
|
+
promptTTF?: string | undefined;
|
|
167
|
+
detections?: {
|
|
168
|
+
type: import("@recog/shared-types").DetectionTypeV1;
|
|
169
|
+
query: string;
|
|
170
|
+
score: number;
|
|
171
|
+
startMs?: number | undefined;
|
|
172
|
+
endMs?: number | undefined;
|
|
173
|
+
}[] | undefined;
|
|
71
174
|
recognitionActionProcessingState?: string | undefined;
|
|
72
175
|
}>;
|
|
73
176
|
export type RecognitionState = z.infer<typeof RecognitionVGFStateSchema>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vgf-recognition-state.d.ts","sourceRoot":"","sources":["../src/vgf-recognition-state.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAA;
|
|
1
|
+
{"version":3,"file":"vgf-recognition-state.d.ts","sourceRoot":"","sources":["../src/vgf-recognition-state.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAA;AAGvB;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,yBAAyB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAkDpC,CAAA;AAEF,MAAM,MAAM,gBAAgB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,yBAAyB,CAAC,CAAA;AAGxE,eAAO,MAAM,eAAe;;;;;CAKlB,CAAA;AAEV,MAAM,MAAM,mBAAmB,GAAG,OAAO,eAAe,CAAC,MAAM,OAAO,eAAe,CAAC,CAAA;AAEtF,eAAO,MAAM,mBAAmB;;;;;;CAMtB,CAAA;AAEV,MAAM,MAAM,uBAAuB,GAAG,OAAO,mBAAmB,CAAC,MAAM,OAAO,mBAAmB,CAAC,CAAA;AAElG,eAAO,MAAM,gCAAgC;;;;CAInC,CAAA;AAEV,MAAM,MAAM,oCAAoC,GAAG,OAAO,gCAAgC,CAAC,MAAM,OAAO,gCAAgC,CAAC,CAAA;AAGzI,wBAAgB,6BAA6B,CAAC,gBAAgB,EAAE,MAAM,GAAG,gBAAgB,CAQxF;AAGD,wBAAgB,gCAAgC,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE,EAAE,EAAE,MAAM,GAAG,OAAO,CAa9F"}
|
package/package.json
CHANGED
package/src/index.spec.ts
CHANGED
|
@@ -6,6 +6,8 @@ describe('SDK top-level exports', () => {
|
|
|
6
6
|
expect(RecognitionProvider.SELF_SERVE_VLLM).toBe('self-serve-vllm');
|
|
7
7
|
expect(CartesiaModel.INK_WHISPER).toBe('ink-whisper');
|
|
8
8
|
expect(CartesiaModel.INK_WHISPER_20250604).toBe('ink-whisper-2025-06-04');
|
|
9
|
+
expect(SelfServeVllmModel.QWEN3_ASR_0_6B).toBe('qwen3-asr-0.6b');
|
|
10
|
+
expect(SelfServeVllmModel.QWEN3_ASR_0_6B_WOF_LETTER).toBe('qwen3-asr-0.6b-wof-letter');
|
|
9
11
|
expect(SelfServeVllmModel.QWEN3_ASR_1_7B).toBe('qwen3-asr-1.7b');
|
|
10
12
|
});
|
|
11
13
|
});
|
|
@@ -65,6 +65,7 @@ import type {
|
|
|
65
65
|
import { buildWebSocketUrl } from './utils/url-builder.js';
|
|
66
66
|
import { AudioRingBuffer } from './utils/audio-ring-buffer.js';
|
|
67
67
|
import { MessageHandler } from './utils/message-handler.js';
|
|
68
|
+
import { downsamplePcm16 } from './utils/audio-resampler.js';
|
|
68
69
|
import { ConnectionError } from './errors.js';
|
|
69
70
|
|
|
70
71
|
// ============================================================================
|
|
@@ -215,11 +216,24 @@ export class RealTimeTwoWayWebSocketRecognitionClient
|
|
|
215
216
|
const maxAttempts = Math.max(1, Math.min(5, retryConfig.maxAttempts ?? 4)); // Default: 4 attempts (3 retries), clamp 1-5
|
|
216
217
|
const delayMs = retryConfig.delayMs ?? 200; // Fast retry for short audio sessions
|
|
217
218
|
|
|
219
|
+
// Normalize encoding to AudioEncoding enum at construction so the binary
|
|
220
|
+
// frame header (uint32) and ASRRequest serialization both get a number.
|
|
221
|
+
// String inputs ('linear16', 'LINEAR16') are accepted with a warn-level
|
|
222
|
+
// log; invalid strings throw via AudioEncoding.coerce().
|
|
223
|
+
const normalizedASRConfig = config.asrRequestConfig
|
|
224
|
+
? {
|
|
225
|
+
...config.asrRequestConfig,
|
|
226
|
+
encoding: AudioEncoding.coerce(config.asrRequestConfig.encoding, (warning) =>
|
|
227
|
+
config.logger?.('warn', warning)
|
|
228
|
+
),
|
|
229
|
+
}
|
|
230
|
+
: undefined;
|
|
231
|
+
|
|
218
232
|
// Process config with defaults
|
|
219
233
|
this.config = {
|
|
220
234
|
url,
|
|
221
235
|
audioUtteranceId,
|
|
222
|
-
...(
|
|
236
|
+
...(normalizedASRConfig && { asrRequestConfig: normalizedASRConfig }),
|
|
223
237
|
...(config.gameContext && { gameContext: config.gameContext }),
|
|
224
238
|
...(config.callbackUrls && { callbackUrls: config.callbackUrls }),
|
|
225
239
|
onTranscript: config.onTranscript || (() => {}),
|
|
@@ -488,6 +502,53 @@ export class RealTimeTwoWayWebSocketRecognitionClient
|
|
|
488
502
|
this.sendAudioInternal(audioData);
|
|
489
503
|
}
|
|
490
504
|
|
|
505
|
+
/**
|
|
506
|
+
* Send PCM16 mono audio captured at any sample rate. The SDK downsamples
|
|
507
|
+
* to the session's target rate (currently 16 kHz per server validator)
|
|
508
|
+
* before sending.
|
|
509
|
+
*
|
|
510
|
+
* Use this when your capture pipeline produces audio at the system's
|
|
511
|
+
* native rate — `AudioContext` defaults to 44.1 kHz or 48 kHz on most
|
|
512
|
+
* desktop/mobile hardware — and you don't want to bring your own
|
|
513
|
+
* resampler. If your audio is already at the target rate, prefer
|
|
514
|
+
* `sendAudio()` to skip the resample step.
|
|
515
|
+
*
|
|
516
|
+
* Algorithm: box-filter averaging (see audio-resampler.ts). Cheap, no
|
|
517
|
+
* dependencies, has a built-in low-pass effect so aliasing stays out of
|
|
518
|
+
* the speech band. Suitable for ASR; not a substitute for a high-quality
|
|
519
|
+
* resampler if you're doing music or full-fidelity processing.
|
|
520
|
+
*
|
|
521
|
+
* Audio must be signed 16-bit little-endian PCM, mono. Stereo must be
|
|
522
|
+
* mixed to mono by the caller.
|
|
523
|
+
*
|
|
524
|
+
* @param audioData - PCM16 mono audio at `sourceSampleRate`.
|
|
525
|
+
* @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
|
|
526
|
+
*/
|
|
527
|
+
sendAudioWithSampleRate(
|
|
528
|
+
audioData: ArrayBuffer | ArrayBufferView | Blob,
|
|
529
|
+
sourceSampleRate: number
|
|
530
|
+
): void {
|
|
531
|
+
const targetRate =
|
|
532
|
+
typeof this.config.asrRequestConfig?.sampleRate === 'number'
|
|
533
|
+
? this.config.asrRequestConfig.sampleRate
|
|
534
|
+
: SampleRate.RATE_16000;
|
|
535
|
+
|
|
536
|
+
if (audioData instanceof Blob) {
|
|
537
|
+
blobToArrayBuffer(audioData)
|
|
538
|
+
.then((arrayBuffer) => {
|
|
539
|
+
this.sendAudioInternal(
|
|
540
|
+
downsamplePcm16(arrayBuffer, sourceSampleRate, targetRate)
|
|
541
|
+
);
|
|
542
|
+
})
|
|
543
|
+
.catch((error) => {
|
|
544
|
+
this.log('warn', 'Failed to convert Blob to ArrayBuffer', error);
|
|
545
|
+
});
|
|
546
|
+
return;
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
this.sendAudioInternal(downsamplePcm16(audioData, sourceSampleRate, targetRate));
|
|
550
|
+
}
|
|
551
|
+
|
|
491
552
|
private sendAudioInternal(audioData: ArrayBuffer | ArrayBufferView): void {
|
|
492
553
|
const bytes = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
|
|
493
554
|
if (bytes === 0) return;
|
|
@@ -692,10 +753,7 @@ export class RealTimeTwoWayWebSocketRecognitionClient
|
|
|
692
753
|
typeof this.config.asrRequestConfig.sampleRate === 'number'
|
|
693
754
|
? this.config.asrRequestConfig.sampleRate
|
|
694
755
|
: SampleRate.RATE_16000,
|
|
695
|
-
encoding:
|
|
696
|
-
typeof this.config.asrRequestConfig.encoding === 'number'
|
|
697
|
-
? this.config.asrRequestConfig.encoding
|
|
698
|
-
: AudioEncoding.LINEAR16,
|
|
756
|
+
encoding: this.config.asrRequestConfig.encoding as AudioEncoding,
|
|
699
757
|
interimResults: this.config.asrRequestConfig.interimResults ?? false,
|
|
700
758
|
// Auto-enable useContext if gameContext is provided, or use explicit value if set
|
|
701
759
|
useContext: this.config.asrRequestConfig.useContext ?? !!this.config.gameContext,
|
|
@@ -892,7 +950,7 @@ export class RealTimeTwoWayWebSocketRecognitionClient
|
|
|
892
950
|
? audioData.byteLength
|
|
893
951
|
: audioData.byteLength;
|
|
894
952
|
|
|
895
|
-
const encodingId = (this.config.asrRequestConfig?.encoding
|
|
953
|
+
const encodingId = (this.config.asrRequestConfig?.encoding ??
|
|
896
954
|
AudioEncoding.LINEAR16) as AudioEncoding;
|
|
897
955
|
|
|
898
956
|
const sampleRate =
|
|
@@ -987,7 +1045,7 @@ export class RealTimeTwoWayWebSocketRecognitionClient
|
|
|
987
1045
|
|
|
988
1046
|
if (byteLength === 0) return;
|
|
989
1047
|
|
|
990
|
-
const baseEncodingId = (this.config.asrRequestConfig?.encoding
|
|
1048
|
+
const baseEncodingId = (this.config.asrRequestConfig?.encoding ??
|
|
991
1049
|
AudioEncoding.LINEAR16) as AudioEncoding;
|
|
992
1050
|
|
|
993
1051
|
// Add offset to mark as prefix audio
|