@neta-art/cohub 1.21.0 → 1.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,394 @@
1
+ import { s as resolveVoiceInputWebsocketUrl } from "./chunks/environment.js";
2
+ //#region src/voice-input.ts
3
+ const TARGET_SAMPLE_RATE = 16e3;
4
+ const CHUNK_SAMPLES = TARGET_SAMPLE_RATE * 200 / 1e3;
5
+ const DEFAULT_CONNECTION_TIMEOUT_MS = 1e4;
6
+ const DEFAULT_IDLE_CONNECTION_TIMEOUT_MS = 30 * 6e4;
7
+ const WEBSOCKET_OPEN = 1;
8
+ const getDefaultWebSocket = () => {
9
+ const WebSocketImpl = globalThis.WebSocket;
10
+ if (!WebSocketImpl) throw new Error("WebSocket is not available in this environment");
11
+ return WebSocketImpl;
12
+ };
13
+ const getDefaultAudioContext = () => {
14
+ const context = globalThis;
15
+ return context.AudioContext ?? context.webkitAudioContext;
16
+ };
17
+ const encodeBase64 = (bytes) => {
18
+ if (typeof btoa === "function") {
19
+ let binary = "";
20
+ for (let i = 0; i < bytes.length; i += 1) binary += String.fromCharCode(bytes[i] ?? 0);
21
+ return btoa(binary);
22
+ }
23
+ const maybeBuffer = globalThis.Buffer;
24
+ if (maybeBuffer) return maybeBuffer.from(bytes).toString("base64");
25
+ throw new Error("Base64 encoding is not available in this environment");
26
+ };
27
+ const floatToPcm16 = (samples) => {
28
+ const buffer = /* @__PURE__ */ new ArrayBuffer(samples.length * 2);
29
+ const view = new DataView(buffer);
30
+ for (let i = 0; i < samples.length; i += 1) {
31
+ const sample = Math.max(-1, Math.min(1, samples[i] ?? 0));
32
+ view.setInt16(i * 2, sample < 0 ? sample * 32768 : sample * 32767, true);
33
+ }
34
+ return new Uint8Array(buffer);
35
+ };
36
+ const resampleTo16k = (input, inputSampleRate) => {
37
+ if (inputSampleRate === TARGET_SAMPLE_RATE) return input;
38
+ const ratio = inputSampleRate / TARGET_SAMPLE_RATE;
39
+ const length = Math.floor(input.length / ratio);
40
+ const output = new Float32Array(length);
41
+ for (let i = 0; i < length; i += 1) {
42
+ const index = i * ratio;
43
+ const left = Math.floor(index);
44
+ const right = Math.min(left + 1, input.length - 1);
45
+ const weight = index - left;
46
+ output[i] = (input[left] ?? 0) * (1 - weight) + (input[right] ?? 0) * weight;
47
+ }
48
+ return output;
49
+ };
50
+ const getErrorCode = (event) => {
51
+ const code = event.payload?.code;
52
+ return typeof code === "string" ? code : null;
53
+ };
54
+ const getErrorMessage = (event) => {
55
+ const message = event.payload?.message;
56
+ return typeof message === "string" ? message : "Voice input failed";
57
+ };
58
+ var VoiceInputClient = class {
59
+ url;
60
+ getAccessToken;
61
+ WebSocketImpl;
62
+ connectionTimeoutMs;
63
+ idleConnectionTimeoutMs;
64
+ callbacks;
65
+ socket = null;
66
+ stream = null;
67
+ audioContext = null;
68
+ processor = null;
69
+ source = null;
70
+ pendingSamples = [];
71
+ pendingAudio = [];
72
+ started = false;
73
+ asrStarted = false;
74
+ authenticated = false;
75
+ intentionalClose = false;
76
+ startPromise = null;
77
+ socketOpenPromise = null;
78
+ idleCloseTimer = null;
79
+ authWaiter = null;
80
+ asrStartWaiter = null;
81
+ constructor(options = {}) {
82
+ this.url = resolveVoiceInputWebsocketUrl({
83
+ env: options.env,
84
+ url: options.url
85
+ });
86
+ this.getAccessToken = options.getAccessToken;
87
+ this.WebSocketImpl = options.WebSocketImpl ?? getDefaultWebSocket();
88
+ this.connectionTimeoutMs = options.connectionTimeoutMs ?? DEFAULT_CONNECTION_TIMEOUT_MS;
89
+ this.idleConnectionTimeoutMs = options.idleConnectionTimeoutMs ?? DEFAULT_IDLE_CONNECTION_TIMEOUT_MS;
90
+ this.callbacks = options.callbacks ?? {};
91
+ }
92
+ async start() {
93
+ if (this.startPromise) return this.startPromise;
94
+ if (this.started) return;
95
+ this.startPromise = this.startInternal().finally(() => {
96
+ this.startPromise = null;
97
+ });
98
+ return this.startPromise;
99
+ }
100
+ stop() {
101
+ if (this.pendingSamples.length > 0) this.sendAudio(new Float32Array(this.pendingSamples.splice(0)));
102
+ this.flushPendingAudio();
103
+ if (this.asrStarted) this.send({ type: "asr.stop" });
104
+ this.cleanupAudio();
105
+ this.started = false;
106
+ this.scheduleIdleClose();
107
+ }
108
+ cancel() {
109
+ if (this.asrStarted) this.send({ type: "asr.cancel" });
110
+ this.cleanupAudio();
111
+ this.started = false;
112
+ this.scheduleIdleClose();
113
+ }
114
+ close() {
115
+ this.intentionalClose = true;
116
+ this.clearIdleCloseTimer();
117
+ this.cleanupAudio();
118
+ this.closeSocket();
119
+ this.started = false;
120
+ }
121
+ async startInternal() {
122
+ this.clearIdleCloseTimer();
123
+ this.started = true;
124
+ this.asrStarted = false;
125
+ this.pendingAudio = [];
126
+ this.intentionalClose = false;
127
+ try {
128
+ await this.withConnectionTimeout(Promise.all([this.setupAudio(), this.ensureAuthenticatedSocket()]));
129
+ await this.withConnectionTimeout(this.startAsrSession());
130
+ } catch (error) {
131
+ this.cleanupAudio();
132
+ this.started = false;
133
+ this.scheduleIdleClose();
134
+ throw error;
135
+ }
136
+ }
137
+ async withConnectionTimeout(promise) {
138
+ let timeout = null;
139
+ try {
140
+ return await Promise.race([promise, new Promise((_, reject) => {
141
+ timeout = globalThis.setTimeout(() => reject(/* @__PURE__ */ new Error("Voice connection timed out")), this.connectionTimeoutMs);
142
+ })]);
143
+ } finally {
144
+ if (timeout) globalThis.clearTimeout(timeout);
145
+ }
146
+ }
147
+ async ensureAuthenticatedSocket() {
148
+ if (this.socket?.readyState === WEBSOCKET_OPEN && this.authenticated) return;
149
+ await this.ensureSocketOpen();
150
+ if (this.authenticated) return;
151
+ try {
152
+ await this.authenticate(false);
153
+ } catch (error) {
154
+ if (!(error instanceof Error) || error.message !== "UNAUTHORIZED") throw error;
155
+ await this.authenticate(true);
156
+ }
157
+ }
158
+ async ensureSocketOpen() {
159
+ if (this.socket?.readyState === WEBSOCKET_OPEN) return;
160
+ if (this.socketOpenPromise) return this.socketOpenPromise;
161
+ this.authenticated = false;
162
+ this.intentionalClose = false;
163
+ this.socket = new this.WebSocketImpl(this.url);
164
+ this.socketOpenPromise = new Promise((resolve, reject) => {
165
+ const socket = this.socket;
166
+ if (!socket) return reject(/* @__PURE__ */ new Error("Voice service unavailable"));
167
+ socket.onopen = () => resolve();
168
+ socket.onerror = () => reject(/* @__PURE__ */ new Error("Voice service unavailable"));
169
+ socket.onclose = (event) => {
170
+ this.authenticated = false;
171
+ this.socketOpenPromise = null;
172
+ this.rejectAuthWaiter(/* @__PURE__ */ new Error("Voice connection closed"));
173
+ this.rejectAsrStartWaiter(/* @__PURE__ */ new Error("Voice connection closed"));
174
+ if (this.socket === socket) this.socket = null;
175
+ if (!this.intentionalClose && this.started) {
176
+ this.cleanupAudio();
177
+ this.started = false;
178
+ this.callbacks.onError?.("Voice connection closed. Try again.");
179
+ this.callbacks.onDone?.();
180
+ }
181
+ if (socket.readyState !== WEBSOCKET_OPEN) reject(new Error(event.reason || "Voice connection closed"));
182
+ };
183
+ socket.onmessage = (event) => {
184
+ try {
185
+ this.handleMessage(event);
186
+ } catch {
187
+ this.closeWithError("Voice service sent invalid data. Try again.");
188
+ }
189
+ };
190
+ }).finally(() => {
191
+ this.socketOpenPromise = null;
192
+ });
193
+ return this.socketOpenPromise;
194
+ }
195
+ async authenticate(forceRefresh) {
196
+ const token = await this.getAccessToken?.({ forceRefresh });
197
+ if (!token) throw new Error("Sign in to use voice input");
198
+ const waiter = this.createAuthWaiter();
199
+ this.send({
200
+ type: "auth",
201
+ payload: { token }
202
+ });
203
+ await waiter.promise;
204
+ }
205
+ async startAsrSession() {
206
+ const waiter = this.createAsrStartWaiter();
207
+ this.send({ type: "asr.start" });
208
+ await waiter.promise;
209
+ }
210
+ createAuthWaiter() {
211
+ this.rejectAuthWaiter(/* @__PURE__ */ new Error("superseded auth waiter"));
212
+ let resolve;
213
+ let reject;
214
+ const promise = new Promise((res, rej) => {
215
+ resolve = res;
216
+ reject = rej;
217
+ });
218
+ this.authWaiter = {
219
+ promise,
220
+ resolve,
221
+ reject
222
+ };
223
+ return this.authWaiter;
224
+ }
225
+ resolveAuthWaiter() {
226
+ if (!this.authWaiter) return;
227
+ this.authWaiter.resolve();
228
+ this.authWaiter = null;
229
+ }
230
+ rejectAuthWaiter(error) {
231
+ if (!this.authWaiter) return;
232
+ this.authWaiter.reject(error);
233
+ this.authWaiter = null;
234
+ }
235
+ createAsrStartWaiter() {
236
+ this.rejectAsrStartWaiter(/* @__PURE__ */ new Error("superseded asr start waiter"));
237
+ let resolve;
238
+ let reject;
239
+ const promise = new Promise((res, rej) => {
240
+ resolve = res;
241
+ reject = rej;
242
+ });
243
+ this.asrStartWaiter = {
244
+ promise,
245
+ resolve,
246
+ reject
247
+ };
248
+ return this.asrStartWaiter;
249
+ }
250
+ resolveAsrStartWaiter() {
251
+ if (!this.asrStartWaiter) return;
252
+ this.asrStartWaiter.resolve();
253
+ this.asrStartWaiter = null;
254
+ }
255
+ rejectAsrStartWaiter(error) {
256
+ if (!this.asrStartWaiter) return;
257
+ this.asrStartWaiter.reject(error);
258
+ this.asrStartWaiter = null;
259
+ }
260
+ closeWithError(message) {
261
+ this.callbacks.onError?.(message);
262
+ this.close();
263
+ this.callbacks.onDone?.();
264
+ }
265
+ async setupAudio() {
266
+ const mediaDevices = globalThis.navigator?.mediaDevices;
267
+ if (!mediaDevices) throw new Error("Microphone input is not available in this environment");
268
+ const AudioContextImpl = getDefaultAudioContext();
269
+ if (!AudioContextImpl) throw new Error("AudioContext is not available in this environment");
270
+ this.stream = await mediaDevices.getUserMedia({ audio: true });
271
+ this.audioContext = new AudioContextImpl();
272
+ await this.audioContext.resume().catch(() => void 0);
273
+ this.source = this.audioContext.createMediaStreamSource(this.stream);
274
+ this.processor = this.audioContext.createScriptProcessor(4096, 1, 1);
275
+ this.processor.onaudioprocess = (event) => {
276
+ const resampled = resampleTo16k(event.inputBuffer.getChannelData(0), this.audioContext?.sampleRate ?? TARGET_SAMPLE_RATE);
277
+ for (const sample of resampled) this.pendingSamples.push(sample);
278
+ while (this.pendingSamples.length >= CHUNK_SAMPLES) {
279
+ const chunk = this.pendingSamples.splice(0, CHUNK_SAMPLES);
280
+ this.sendAudio(new Float32Array(chunk));
281
+ }
282
+ };
283
+ this.source.connect(this.processor);
284
+ this.processor.connect(this.audioContext.destination);
285
+ }
286
+ sendAudio(samples) {
287
+ const audio = encodeBase64(floatToPcm16(samples));
288
+ if (!this.asrStarted) {
289
+ this.pendingAudio.push(audio);
290
+ return;
291
+ }
292
+ this.send({
293
+ type: "asr.audio",
294
+ payload: { audio }
295
+ });
296
+ }
297
+ flushPendingAudio() {
298
+ if (!this.asrStarted) return;
299
+ for (const audio of this.pendingAudio.splice(0)) this.send({
300
+ type: "asr.audio",
301
+ payload: { audio }
302
+ });
303
+ }
304
+ send(message) {
305
+ if (this.socket?.readyState === WEBSOCKET_OPEN) this.socket.send(JSON.stringify(message));
306
+ }
307
+ handleMessage(event) {
308
+ const data = JSON.parse(String(event.data));
309
+ const text = typeof data.payload?.text === "string" ? data.payload.text : "";
310
+ if (data.type === "system.auth.ok") {
311
+ this.authenticated = true;
312
+ this.resolveAuthWaiter();
313
+ return data;
314
+ }
315
+ if (data.type === "asr.started") {
316
+ this.asrStarted = true;
317
+ this.flushPendingAudio();
318
+ this.resolveAsrStartWaiter();
319
+ return data;
320
+ }
321
+ if (data.type === "asr.error") {
322
+ const message = getErrorMessage(data);
323
+ if (getErrorCode(data) === "UNAUTHORIZED") {
324
+ this.authenticated = false;
325
+ this.rejectAuthWaiter(/* @__PURE__ */ new Error("UNAUTHORIZED"));
326
+ }
327
+ this.rejectAsrStartWaiter(new Error(message));
328
+ this.callbacks.onError?.(message);
329
+ return data;
330
+ }
331
+ if (data.type === "asr.partial") this.callbacks.onPartial?.(text);
332
+ if (data.type === "asr.final") this.callbacks.onFinal?.(text);
333
+ if (data.type === "asr.done") {
334
+ this.asrStarted = false;
335
+ this.started = false;
336
+ this.scheduleIdleClose();
337
+ this.callbacks.onDone?.();
338
+ }
339
+ return data;
340
+ }
341
+ cleanupAudio() {
342
+ this.processor?.disconnect();
343
+ if (this.processor) this.processor.onaudioprocess = null;
344
+ this.source?.disconnect();
345
+ this.stream?.getTracks().forEach((track) => {
346
+ track.stop();
347
+ });
348
+ this.audioContext?.close().catch(() => void 0);
349
+ this.processor = null;
350
+ this.source = null;
351
+ this.stream = null;
352
+ this.audioContext = null;
353
+ this.pendingSamples = [];
354
+ this.pendingAudio = [];
355
+ this.asrStarted = false;
356
+ }
357
+ scheduleIdleClose() {
358
+ this.clearIdleCloseTimer();
359
+ if (!this.socket || this.idleConnectionTimeoutMs <= 0) return;
360
+ this.idleCloseTimer = globalThis.setTimeout(() => {
361
+ this.intentionalClose = true;
362
+ this.closeSocket();
363
+ }, this.idleConnectionTimeoutMs);
364
+ }
365
+ clearIdleCloseTimer() {
366
+ if (!this.idleCloseTimer) return;
367
+ globalThis.clearTimeout(this.idleCloseTimer);
368
+ this.idleCloseTimer = null;
369
+ }
370
+ closeSocket() {
371
+ this.rejectAuthWaiter(/* @__PURE__ */ new Error("Voice connection closed"));
372
+ this.rejectAsrStartWaiter(/* @__PURE__ */ new Error("Voice connection closed"));
373
+ this.authenticated = false;
374
+ this.socketOpenPromise = null;
375
+ this.socket?.close();
376
+ this.socket = null;
377
+ }
378
+ };
379
+ var VoiceApi = class {
380
+ defaults;
381
+ constructor(defaults = {}) {
382
+ this.defaults = defaults;
383
+ }
384
+ createInputClient(callbacks = {}, options = {}) {
385
+ return new VoiceInputClient({
386
+ ...this.defaults,
387
+ ...options,
388
+ callbacks
389
+ });
390
+ }
391
+ };
392
+ const createVoiceInputClient = (options) => new VoiceInputClient(options);
393
+ //#endregion
394
+ export { VoiceApi, VoiceInputClient, createVoiceInputClient };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@neta-art/cohub",
3
- "version": "1.21.0",
3
+ "version": "1.23.0",
4
4
  "description": "Cohub SDK for spaces, sessions, checkpoints, and realtime agent collaboration.",
5
5
  "license": "UNLICENSED",
6
6
  "private": false,
@@ -37,6 +37,10 @@
37
37
  "types": "./dist/websocket.d.ts",
38
38
  "import": "./dist/websocket.js"
39
39
  },
40
+ "./voice-input": {
41
+ "types": "./dist/voice-input.d.ts",
42
+ "import": "./dist/voice-input.js"
43
+ },
40
44
  "./debugger": {
41
45
  "types": "./dist/debugger.d.ts",
42
46
  "import": "./dist/debugger.js"