@craftedxp/voice-js 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.js +22 -2
- package/dist/browser.js.map +1 -1
- package/dist/browser.mjs +21 -2
- package/dist/browser.mjs.map +1 -1
- package/dist/embed.iife.js +235 -219
- package/package.json +1 -1
package/dist/embed.iife.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
'use strict'
|
|
2
2
|
var VoissiaEmbedBundle = (() => {
|
|
3
|
-
function
|
|
3
|
+
function P(n) {
|
|
4
4
|
if (!n) throw new Error('configureVoiceClient: config is required')
|
|
5
5
|
if ('apiKey' in n)
|
|
6
6
|
throw new Error(
|
|
@@ -11,7 +11,7 @@ var VoissiaEmbedBundle = (() => {
|
|
|
11
11
|
throw new Error('configureVoiceClient: fetchToken must be a function')
|
|
12
12
|
return { ...n, apiBase: n.apiBase.replace(/\/+$/, '') }
|
|
13
13
|
}
|
|
14
|
-
function
|
|
14
|
+
function N(n, e) {
|
|
15
15
|
let t =
|
|
16
16
|
n.defaultContext || e.context
|
|
17
17
|
? { ...(n.defaultContext ?? {}), ...(e.context ?? {}) }
|
|
@@ -22,7 +22,7 @@ var VoissiaEmbedBundle = (() => {
|
|
|
22
22
|
: void 0
|
|
23
23
|
return { context: t, metadata: r }
|
|
24
24
|
}
|
|
25
|
-
var
|
|
25
|
+
var L = `// AudioWorklet \u2014 runs off the main thread in the audio rendering graph.
|
|
26
26
|
//
|
|
27
27
|
// Input: Float32 samples at the AudioContext's native sampleRate (typically
|
|
28
28
|
// 48000 Hz on desktop, 44100 Hz on some iOS devices).
|
|
@@ -105,24 +105,24 @@ class MicDownsampler extends AudioWorkletProcessor {
|
|
|
105
105
|
registerProcessor('mic-downsampler', MicDownsampler)
|
|
106
106
|
`
|
|
107
107
|
var K = 100,
|
|
108
|
-
|
|
108
|
+
$ = (n) => {
|
|
109
109
|
let e = null,
|
|
110
110
|
t = null,
|
|
111
111
|
r = null,
|
|
112
112
|
o = null,
|
|
113
113
|
a = null,
|
|
114
114
|
s = null,
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
let
|
|
119
|
-
for (let
|
|
120
|
-
let
|
|
121
|
-
return Math.min(1,
|
|
115
|
+
p = !1,
|
|
116
|
+
d = !1,
|
|
117
|
+
g = (u) => {
|
|
118
|
+
let h = 0
|
|
119
|
+
for (let b = 0; b < u.length; b++) h += u[b] * u[b]
|
|
120
|
+
let k = Math.sqrt(h / u.length)
|
|
121
|
+
return Math.min(1, k * 1.8)
|
|
122
122
|
}
|
|
123
123
|
return {
|
|
124
124
|
start: async () => {
|
|
125
|
-
if (!
|
|
125
|
+
if (!d)
|
|
126
126
|
try {
|
|
127
127
|
;((t = await navigator.mediaDevices.getUserMedia({
|
|
128
128
|
audio: {
|
|
@@ -134,129 +134,129 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
134
134
|
})),
|
|
135
135
|
(e = new AudioContext()),
|
|
136
136
|
e.state === 'suspended' && (await e.resume()))
|
|
137
|
-
let
|
|
138
|
-
|
|
137
|
+
let u = new Blob([L], { type: 'application/javascript' }),
|
|
138
|
+
h = URL.createObjectURL(u)
|
|
139
139
|
try {
|
|
140
|
-
await e.audioWorklet.addModule(
|
|
140
|
+
await e.audioWorklet.addModule(h)
|
|
141
141
|
} finally {
|
|
142
|
-
URL.revokeObjectURL(
|
|
142
|
+
URL.revokeObjectURL(h)
|
|
143
143
|
}
|
|
144
144
|
if (
|
|
145
145
|
((r = e.createMediaStreamSource(t)),
|
|
146
146
|
(o = new AudioWorkletNode(e, 'mic-downsampler')),
|
|
147
|
-
(o.port.onmessage = (
|
|
148
|
-
|
|
147
|
+
(o.port.onmessage = (b) => {
|
|
148
|
+
p || n.onChunk(b.data)
|
|
149
149
|
}),
|
|
150
150
|
n.onVolume)
|
|
151
151
|
) {
|
|
152
152
|
;((a = e.createAnalyser()), (a.fftSize = 256), r.connect(a))
|
|
153
|
-
let
|
|
153
|
+
let b = new Float32Array(a.fftSize)
|
|
154
154
|
s = setInterval(() => {
|
|
155
|
-
a && (a.getFloatTimeDomainData(
|
|
155
|
+
a && (a.getFloatTimeDomainData(b), n.onVolume?.(g(b)))
|
|
156
156
|
}, K)
|
|
157
157
|
}
|
|
158
158
|
r.connect(o)
|
|
159
|
-
let
|
|
160
|
-
;((
|
|
161
|
-
} catch (
|
|
162
|
-
let
|
|
163
|
-
|
|
164
|
-
throw (n.onError?.(
|
|
159
|
+
let k = e.createGain()
|
|
160
|
+
;((k.gain.value = 0), o.connect(k).connect(e.destination), (d = !0))
|
|
161
|
+
} catch (u) {
|
|
162
|
+
let h =
|
|
163
|
+
u instanceof Error ? u : new Error(typeof u == 'string' ? u : 'capture failed')
|
|
164
|
+
throw (n.onError?.(h), h)
|
|
165
165
|
}
|
|
166
166
|
},
|
|
167
167
|
stop: () => {
|
|
168
|
-
if (
|
|
169
|
-
;((
|
|
168
|
+
if (d) {
|
|
169
|
+
;((d = !1), s && (clearInterval(s), (s = null)))
|
|
170
170
|
try {
|
|
171
171
|
;(o?.disconnect(), a?.disconnect(), r?.disconnect())
|
|
172
172
|
} catch {}
|
|
173
173
|
if (((o = null), (a = null), (r = null), t)) {
|
|
174
|
-
for (let
|
|
174
|
+
for (let u of t.getTracks()) u.stop()
|
|
175
175
|
t = null
|
|
176
176
|
}
|
|
177
177
|
;(e && e.state !== 'closed' && e.close().catch(() => {}), (e = null))
|
|
178
178
|
}
|
|
179
179
|
},
|
|
180
|
-
mute: (
|
|
181
|
-
|
|
180
|
+
mute: (u) => {
|
|
181
|
+
p = u
|
|
182
182
|
},
|
|
183
|
-
isCapturing: () =>
|
|
183
|
+
isCapturing: () => d,
|
|
184
184
|
}
|
|
185
185
|
}
|
|
186
|
-
var
|
|
186
|
+
var U = (n = {}) => {
|
|
187
187
|
let e = n.sampleRate ?? 16e3,
|
|
188
188
|
t = null,
|
|
189
189
|
r = null,
|
|
190
190
|
o = null,
|
|
191
191
|
a = null,
|
|
192
192
|
s = 0,
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
193
|
+
p = [],
|
|
194
|
+
d = !1,
|
|
195
|
+
g = async () => {
|
|
196
196
|
if (t) {
|
|
197
197
|
t.state === 'suspended' && (await t.resume())
|
|
198
198
|
return
|
|
199
199
|
}
|
|
200
200
|
if (((t = new AudioContext({ sampleRate: e })), (r = t.createGain()), n.onVolume)) {
|
|
201
201
|
;((o = t.createAnalyser()), (o.fftSize = 256), r.connect(o))
|
|
202
|
-
let
|
|
202
|
+
let f = new Float32Array(o.fftSize)
|
|
203
203
|
a = setInterval(() => {
|
|
204
204
|
if (!o) return
|
|
205
|
-
o.getFloatTimeDomainData(
|
|
206
|
-
let
|
|
207
|
-
for (let
|
|
208
|
-
let
|
|
209
|
-
n.onVolume?.(Math.min(1,
|
|
205
|
+
o.getFloatTimeDomainData(f)
|
|
206
|
+
let i = 0
|
|
207
|
+
for (let C = 0; C < f.length; C++) i += f[C] * f[C]
|
|
208
|
+
let m = Math.sqrt(i / f.length)
|
|
209
|
+
n.onVolume?.(Math.min(1, m * 1.8))
|
|
210
210
|
}, 100)
|
|
211
211
|
}
|
|
212
212
|
;(r.connect(t.destination), (s = t.currentTime))
|
|
213
213
|
},
|
|
214
|
-
|
|
215
|
-
|
|
214
|
+
l = (f) => {
|
|
215
|
+
f !== d && ((d = f), n.onSpeakingChange?.(f))
|
|
216
216
|
},
|
|
217
|
-
|
|
218
|
-
let
|
|
219
|
-
;((
|
|
217
|
+
c = () => {
|
|
218
|
+
let f = t?.currentTime ?? 0
|
|
219
|
+
;((p = p.filter((i) => (i._endsAt ?? 0) > f)), p.length === 0 && l(!1))
|
|
220
220
|
},
|
|
221
|
-
|
|
221
|
+
u = (f) => {
|
|
222
222
|
if (!t) {
|
|
223
|
-
|
|
223
|
+
g().then(() => u(f))
|
|
224
224
|
return
|
|
225
225
|
}
|
|
226
226
|
if (!t || !r) return
|
|
227
|
-
let
|
|
228
|
-
if (
|
|
229
|
-
let
|
|
230
|
-
|
|
231
|
-
for (let w = 0; w <
|
|
232
|
-
let
|
|
233
|
-
;((
|
|
227
|
+
let i = new Int16Array(f)
|
|
228
|
+
if (i.length === 0) return
|
|
229
|
+
let m = t.createBuffer(1, i.length, e),
|
|
230
|
+
C = m.getChannelData(0)
|
|
231
|
+
for (let w = 0; w < i.length; w++) C[w] = i[w] / 32768
|
|
232
|
+
let y = t.createBufferSource()
|
|
233
|
+
;((y.buffer = m), y.connect(r))
|
|
234
234
|
let X = t.currentTime,
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
let
|
|
238
|
-
;((
|
|
235
|
+
_ = Math.max(X, s)
|
|
236
|
+
y.start(_)
|
|
237
|
+
let W = i.length / e
|
|
238
|
+
;((y._endsAt = _ + W), (s = _ + W), p.push(y), l(!0), (y.onended = () => c()))
|
|
239
239
|
},
|
|
240
|
-
|
|
240
|
+
h = () => {
|
|
241
241
|
if (!(!t || !r)) {
|
|
242
|
-
for (let
|
|
242
|
+
for (let f of p)
|
|
243
243
|
try {
|
|
244
|
-
|
|
244
|
+
f.stop()
|
|
245
245
|
} catch {}
|
|
246
|
-
;((
|
|
246
|
+
;((p = []),
|
|
247
247
|
r.disconnect(),
|
|
248
248
|
(r = t.createGain()),
|
|
249
249
|
o && (o.disconnect(), r.connect(o)),
|
|
250
250
|
r.connect(t.destination),
|
|
251
251
|
(s = t.currentTime),
|
|
252
|
-
|
|
252
|
+
l(!1))
|
|
253
253
|
}
|
|
254
254
|
}
|
|
255
255
|
return {
|
|
256
|
-
enqueue:
|
|
257
|
-
flush:
|
|
256
|
+
enqueue: u,
|
|
257
|
+
flush: h,
|
|
258
258
|
close: () => {
|
|
259
|
-
;(
|
|
259
|
+
;(h(),
|
|
260
260
|
a && (clearInterval(a), (a = null)),
|
|
261
261
|
t && t.state !== 'closed' && t.close().catch(() => {}),
|
|
262
262
|
(t = null),
|
|
@@ -264,58 +264,58 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
264
264
|
(o = null))
|
|
265
265
|
},
|
|
266
266
|
resume: async () => {
|
|
267
|
-
await
|
|
267
|
+
await g()
|
|
268
268
|
},
|
|
269
269
|
}
|
|
270
270
|
}
|
|
271
|
-
var
|
|
271
|
+
var D = (n, e) => {
|
|
272
272
|
let t = n.maxRetries ?? 3,
|
|
273
273
|
r = n.initialBackoffMs ?? 500,
|
|
274
274
|
o = n.maxBackoffMs ?? 8e3,
|
|
275
275
|
a = null,
|
|
276
276
|
s = !1,
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
277
|
+
p = 0,
|
|
278
|
+
d = r,
|
|
279
|
+
g = null,
|
|
280
|
+
l = () => {
|
|
281
281
|
;((a = n.wsFactory(n.url)),
|
|
282
282
|
(a.binaryType = 'arraybuffer'),
|
|
283
283
|
(a.onopen = () => {
|
|
284
|
-
;(e(
|
|
284
|
+
;(e(p === 0 ? { type: 'open' } : { type: 'reconnected' }), (p = 0), (d = r))
|
|
285
285
|
}),
|
|
286
|
-
(a.onmessage = (
|
|
287
|
-
e({ type: 'message', data:
|
|
286
|
+
(a.onmessage = (c) => {
|
|
287
|
+
e({ type: 'message', data: c.data })
|
|
288
288
|
}),
|
|
289
289
|
(a.onerror = () => {
|
|
290
290
|
e({ type: 'error', error: new Error('WebSocket error') })
|
|
291
291
|
}),
|
|
292
|
-
(a.onclose = (
|
|
293
|
-
if (((a = null), !(!s &&
|
|
294
|
-
e({ type: 'close', code:
|
|
292
|
+
(a.onclose = (c) => {
|
|
293
|
+
if (((a = null), !(!s && p < t))) {
|
|
294
|
+
e({ type: 'close', code: c.code, reason: c.reason, permanent: !0 })
|
|
295
295
|
return
|
|
296
296
|
}
|
|
297
|
-
;(e({ type: 'close', code:
|
|
298
|
-
let
|
|
299
|
-
;((
|
|
297
|
+
;(e({ type: 'close', code: c.code, reason: c.reason, permanent: !1 }), p++)
|
|
298
|
+
let h = Math.min(d, o)
|
|
299
|
+
;((d = Math.min(d * 2, o)), (g = setTimeout(l, h)))
|
|
300
300
|
}))
|
|
301
301
|
}
|
|
302
302
|
return (
|
|
303
|
-
|
|
303
|
+
l(),
|
|
304
304
|
{
|
|
305
|
-
send: (
|
|
306
|
-
a && a.readyState === 1 && a.send(
|
|
305
|
+
send: (c) => {
|
|
306
|
+
a && a.readyState === 1 && a.send(c)
|
|
307
307
|
},
|
|
308
|
-
close: (
|
|
309
|
-
;((s = !0),
|
|
308
|
+
close: (c = 1e3, u = 'client-requested') => {
|
|
309
|
+
;((s = !0), g && (clearTimeout(g), (g = null)))
|
|
310
310
|
try {
|
|
311
|
-
a?.close(
|
|
311
|
+
a?.close(c, u)
|
|
312
312
|
} catch {}
|
|
313
313
|
},
|
|
314
314
|
readyState: () => a?.readyState ?? 3,
|
|
315
315
|
}
|
|
316
316
|
)
|
|
317
317
|
}
|
|
318
|
-
var
|
|
318
|
+
var v = () => ({
|
|
319
319
|
state: 'idle',
|
|
320
320
|
transcript: [],
|
|
321
321
|
agentBubbleId: null,
|
|
@@ -339,13 +339,13 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
339
339
|
}
|
|
340
340
|
switch (r.type) {
|
|
341
341
|
case 'connected':
|
|
342
|
-
;(t.onConnected(),
|
|
342
|
+
;(t.onConnected(), E(e, 'listening', t))
|
|
343
343
|
return
|
|
344
344
|
case 'transcript': {
|
|
345
345
|
let o = r.text ?? ''
|
|
346
346
|
if (!o) return
|
|
347
347
|
let a = !!r.isFinal
|
|
348
|
-
;(a ||
|
|
348
|
+
;(a || E(e, 'user_speaking', t), Q(e, o, a), t.onTranscript(e.transcript))
|
|
349
349
|
return
|
|
350
350
|
}
|
|
351
351
|
case 'agent_turn_start': {
|
|
@@ -354,7 +354,7 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
354
354
|
(e.transcript = [...e.transcript, { id: o, role: 'agent', text: '' }]),
|
|
355
355
|
t.onTranscript(e.transcript))
|
|
356
356
|
let a = typeof r.seq == 'number' ? r.seq : void 0
|
|
357
|
-
;(t.onAgentTurnStart(a),
|
|
357
|
+
;(t.onAgentTurnStart(a), E(e, 'agent_speaking', t))
|
|
358
358
|
return
|
|
359
359
|
}
|
|
360
360
|
case 'agent_text': {
|
|
@@ -370,7 +370,7 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
370
370
|
case 'agent_turn_end': {
|
|
371
371
|
e.agentBubbleId = null
|
|
372
372
|
let o = typeof r.seq == 'number' ? r.seq : void 0
|
|
373
|
-
;(t.onAgentTurnEnd(o),
|
|
373
|
+
;(t.onAgentTurnEnd(o), E(e, 'listening', t))
|
|
374
374
|
return
|
|
375
375
|
}
|
|
376
376
|
case 'interrupt':
|
|
@@ -440,14 +440,14 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
440
440
|
}
|
|
441
441
|
}
|
|
442
442
|
}
|
|
443
|
-
var
|
|
443
|
+
var E = (n, e, t) => {
|
|
444
444
|
n.state !== e && t.onState(e)
|
|
445
445
|
},
|
|
446
446
|
Q = (n, e, t) => {
|
|
447
447
|
let r = -1
|
|
448
448
|
for (let s = n.transcript.length - 1; s >= 0; s--) {
|
|
449
|
-
let
|
|
450
|
-
if (
|
|
449
|
+
let p = n.transcript[s]
|
|
450
|
+
if (p.role === 'user' && p.committed === !1) {
|
|
451
451
|
r = s
|
|
452
452
|
break
|
|
453
453
|
}
|
|
@@ -463,14 +463,14 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
463
463
|
a = [...n.transcript]
|
|
464
464
|
;((a[r] = { ...o, text: e, committed: t }), (n.transcript = a))
|
|
465
465
|
}
|
|
466
|
-
function
|
|
466
|
+
function j(n) {
|
|
467
467
|
let e = new URL(n.apiBase),
|
|
468
468
|
t = e.protocol === 'https:' ? 'wss:' : 'ws:',
|
|
469
469
|
r = n.bargeIn === !1 ? '&barge=off' : ''
|
|
470
470
|
return `${t}//${e.host}/v1/agents/${encodeURIComponent(n.agentId)}/call?token=${encodeURIComponent(n.token)}${r}`
|
|
471
471
|
}
|
|
472
472
|
var ee = /^[a-zA-Z_][a-zA-Z0-9_]*$/
|
|
473
|
-
var
|
|
473
|
+
var T = (n) => {
|
|
474
474
|
if (n === void 0) return
|
|
475
475
|
if (typeof n != 'object' || n === null || Array.isArray(n))
|
|
476
476
|
throw new Error('clientTools must be an object keyed by tool name')
|
|
@@ -496,7 +496,7 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
496
496
|
throw new Error(`clientTools["${t}"]: timeoutMs must be in (0, 30000]`)
|
|
497
497
|
}
|
|
498
498
|
},
|
|
499
|
-
|
|
499
|
+
S = (n) => ({
|
|
500
500
|
type: 'client_tools_register',
|
|
501
501
|
tools: Object.entries(n).map(([e, t]) => ({
|
|
502
502
|
name: e,
|
|
@@ -506,7 +506,7 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
506
506
|
...(t.timeoutMs !== void 0 ? { timeoutMs: t.timeoutMs } : {}),
|
|
507
507
|
})),
|
|
508
508
|
}),
|
|
509
|
-
|
|
509
|
+
A = (n, e, t) => {
|
|
510
510
|
let r = (a) => {
|
|
511
511
|
try {
|
|
512
512
|
n(a)
|
|
@@ -542,46 +542,46 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
542
542
|
let e = n.now ?? (() => performance.now()),
|
|
543
543
|
t = null,
|
|
544
544
|
r = new Map(),
|
|
545
|
-
o = (
|
|
546
|
-
let
|
|
547
|
-
if (!
|
|
548
|
-
let
|
|
549
|
-
;(
|
|
550
|
-
|
|
551
|
-
(
|
|
552
|
-
n.send({ type: 'client_marks', seq:
|
|
553
|
-
r.delete(
|
|
545
|
+
o = (l) => {
|
|
546
|
+
let c = r.get(l)
|
|
547
|
+
if (!c || !c.ended) return
|
|
548
|
+
let u = {}
|
|
549
|
+
;(c.firstOutboundAt !== null &&
|
|
550
|
+
c.firstAudibleAt !== null &&
|
|
551
|
+
(u.client_mic_to_first_audible_ms = c.firstAudibleAt - c.firstOutboundAt),
|
|
552
|
+
n.send({ type: 'client_marks', seq: l, marks: u, clientNow: Date.now() }),
|
|
553
|
+
r.delete(l))
|
|
554
554
|
}
|
|
555
555
|
return {
|
|
556
556
|
markFirstOutboundAudio: () => {
|
|
557
557
|
t === null && (t = e())
|
|
558
558
|
},
|
|
559
559
|
markFirstAudibleOutput: () => {
|
|
560
|
-
let
|
|
561
|
-
for (let
|
|
562
|
-
|
|
560
|
+
let l
|
|
561
|
+
for (let c of r.values()) c.ended || (l = c)
|
|
562
|
+
l && l.firstAudibleAt === null && (l.firstAudibleAt = e())
|
|
563
563
|
},
|
|
564
|
-
onAgentTurnStart: (
|
|
565
|
-
;(r.set(
|
|
564
|
+
onAgentTurnStart: (l) => {
|
|
565
|
+
;(r.set(l, { firstOutboundAt: t, firstAudibleAt: null, ended: !1 }), (t = null))
|
|
566
566
|
},
|
|
567
|
-
onAgentTurnEnd: (
|
|
568
|
-
let
|
|
569
|
-
if (!
|
|
570
|
-
n.send({ type: 'client_marks', seq:
|
|
567
|
+
onAgentTurnEnd: (l) => {
|
|
568
|
+
let c = r.get(l)
|
|
569
|
+
if (!c) {
|
|
570
|
+
n.send({ type: 'client_marks', seq: l, marks: {}, clientNow: Date.now() })
|
|
571
571
|
return
|
|
572
572
|
}
|
|
573
|
-
;((
|
|
573
|
+
;((c.ended = !0), o(l))
|
|
574
574
|
},
|
|
575
575
|
flush: () => {
|
|
576
|
-
for (let
|
|
577
|
-
let
|
|
578
|
-
;((
|
|
576
|
+
for (let l of [...r.keys()]) {
|
|
577
|
+
let c = r.get(l)
|
|
578
|
+
;((c.ended = !0), o(l))
|
|
579
579
|
}
|
|
580
580
|
t = null
|
|
581
581
|
},
|
|
582
582
|
}
|
|
583
583
|
}
|
|
584
|
-
var
|
|
584
|
+
var M = class {
|
|
585
585
|
constructor(e) {
|
|
586
586
|
this.rws = null
|
|
587
587
|
this.capture = null
|
|
@@ -602,7 +602,7 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
602
602
|
this.muted && ((this.muted = !1), this.capture?.mute(!1))
|
|
603
603
|
}
|
|
604
604
|
this.sendClientToolsRegister = () => {
|
|
605
|
-
let e =
|
|
605
|
+
let e = S(this.args.options.clientTools ?? {})
|
|
606
606
|
this.rws?.send(JSON.stringify(e))
|
|
607
607
|
}
|
|
608
608
|
this.setState = (e) => {
|
|
@@ -642,7 +642,7 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
642
642
|
onCallEnd: (t) => this.teardown(t),
|
|
643
643
|
onConnected: () => this.sendClientToolsRegister(),
|
|
644
644
|
onClientToolCall: (t) =>
|
|
645
|
-
|
|
645
|
+
A(
|
|
646
646
|
(r) => this.rws?.send(JSON.stringify(r)),
|
|
647
647
|
this.args.options.clientTools ?? {},
|
|
648
648
|
t,
|
|
@@ -663,7 +663,7 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
663
663
|
}
|
|
664
664
|
this.startCapture = async () => {
|
|
665
665
|
if (!this.capture?.isCapturing()) {
|
|
666
|
-
;((this.capture =
|
|
666
|
+
;((this.capture = $({
|
|
667
667
|
onChunk: (e) => {
|
|
668
668
|
;(this.marks.markFirstOutboundAudio(), this.rws?.send(e))
|
|
669
669
|
},
|
|
@@ -708,8 +708,8 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
708
708
|
})
|
|
709
709
|
}
|
|
710
710
|
;((this.args = e),
|
|
711
|
-
(this.proto =
|
|
712
|
-
|
|
711
|
+
(this.proto = v()),
|
|
712
|
+
T(e.options.clientTools),
|
|
713
713
|
(this.marks = z({
|
|
714
714
|
send: (t) => {
|
|
715
715
|
try {
|
|
@@ -729,13 +729,13 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
729
729
|
}
|
|
730
730
|
async start() {
|
|
731
731
|
;(this.setState('connecting'), (this.startedAt = Date.now()))
|
|
732
|
-
let e =
|
|
732
|
+
let e = j({
|
|
733
733
|
apiBase: this.args.config.apiBase,
|
|
734
734
|
agentId: this.args.options.agentId,
|
|
735
735
|
token: this.args.token,
|
|
736
736
|
bargeIn: this.args.options.bargeIn,
|
|
737
737
|
})
|
|
738
|
-
this.playback =
|
|
738
|
+
this.playback = U({
|
|
739
739
|
onVolume: (t) => {
|
|
740
740
|
;((this.outputVolume = t),
|
|
741
741
|
this.args.options.onVolume?.({ input: this.inputVolume, output: t }))
|
|
@@ -744,92 +744,107 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
744
744
|
try {
|
|
745
745
|
await this.playback.resume()
|
|
746
746
|
} catch {}
|
|
747
|
-
this.rws =
|
|
747
|
+
this.rws = D({ url: e, wsFactory: this.args.wsFactory, maxRetries: 3 }, (t) =>
|
|
748
748
|
this.handleSocketEvent(t),
|
|
749
749
|
)
|
|
750
750
|
}
|
|
751
751
|
}
|
|
752
752
|
async function q(n) {
|
|
753
|
-
|
|
753
|
+
T(n.clientTools)
|
|
754
|
+
let e = v(),
|
|
754
755
|
t = !1,
|
|
755
756
|
r = !1,
|
|
756
|
-
o =
|
|
757
|
+
o = n.clientTools ?? {},
|
|
758
|
+
a = (i) => {
|
|
759
|
+
if (c?.readyState === 'open')
|
|
760
|
+
try {
|
|
761
|
+
c.send(JSON.stringify(i))
|
|
762
|
+
} catch {}
|
|
763
|
+
},
|
|
764
|
+
s = (i) => {
|
|
757
765
|
e.state !== i && ((e.state = i), n.onStateChange?.(i))
|
|
758
766
|
},
|
|
759
|
-
|
|
767
|
+
p = (i) => {
|
|
760
768
|
x(i, e, {
|
|
761
|
-
onState:
|
|
762
|
-
onTranscript: (
|
|
763
|
-
onError: (
|
|
769
|
+
onState: s,
|
|
770
|
+
onTranscript: (m) => n.onTranscript?.(m),
|
|
771
|
+
onError: (m) => n.onError?.(m),
|
|
764
772
|
onInterrupt: () => n.onInterrupt?.(),
|
|
765
773
|
onAgentTurnStart: () => n.onAgentTurnStart?.(),
|
|
766
774
|
onAgentTurnEnd: () => {},
|
|
767
|
-
onCallEnd: () =>
|
|
768
|
-
onConnected: () => {
|
|
769
|
-
|
|
775
|
+
onCallEnd: () => f(),
|
|
776
|
+
onConnected: () => {
|
|
777
|
+
Object.keys(o).length > 0 && a(S(o))
|
|
778
|
+
},
|
|
779
|
+
onClientToolCall: (m) => {
|
|
780
|
+
A(a, o, m)
|
|
781
|
+
},
|
|
770
782
|
})
|
|
771
783
|
}
|
|
772
|
-
|
|
773
|
-
let
|
|
774
|
-
|
|
775
|
-
;((
|
|
776
|
-
(
|
|
777
|
-
document.body.appendChild(
|
|
778
|
-
(
|
|
779
|
-
|
|
784
|
+
s('connecting')
|
|
785
|
+
let d = new RTCPeerConnection({ iceServers: [{ urls: 'stun:stun.l.google.com:19302' }] }),
|
|
786
|
+
g = document.createElement('audio')
|
|
787
|
+
;((g.autoplay = !0),
|
|
788
|
+
(g.style.display = 'none'),
|
|
789
|
+
document.body.appendChild(g),
|
|
790
|
+
(d.ontrack = (i) => {
|
|
791
|
+
g.srcObject = i.streams[0] ?? new MediaStream([i.track])
|
|
780
792
|
}))
|
|
781
|
-
let
|
|
793
|
+
let l
|
|
782
794
|
try {
|
|
783
|
-
|
|
795
|
+
l = await navigator.mediaDevices.getUserMedia({ audio: !0 })
|
|
784
796
|
} catch (i) {
|
|
785
|
-
let
|
|
797
|
+
let m =
|
|
786
798
|
i instanceof DOMException && i.name === 'NotAllowedError'
|
|
787
799
|
? 'mic_denied'
|
|
788
800
|
: 'mic_start_failed'
|
|
789
801
|
throw (
|
|
790
|
-
n.onError?.({ code:
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
802
|
+
n.onError?.({ code: m, message: i instanceof Error ? i.message : 'getUserMedia failed' }),
|
|
803
|
+
s('error'),
|
|
804
|
+
d.close(),
|
|
805
|
+
g.remove(),
|
|
794
806
|
i
|
|
795
807
|
)
|
|
796
808
|
}
|
|
797
|
-
for (let i of
|
|
798
|
-
let
|
|
799
|
-
;((
|
|
800
|
-
typeof i.data == 'string' &&
|
|
809
|
+
for (let i of l.getAudioTracks()) d.addTrack(i, l)
|
|
810
|
+
let c = d.createDataChannel('control', { ordered: !0 })
|
|
811
|
+
;((c.onmessage = (i) => {
|
|
812
|
+
typeof i.data == 'string' && p(i.data)
|
|
801
813
|
}),
|
|
802
|
-
(
|
|
814
|
+
(c.onerror = () => {
|
|
803
815
|
n.onError?.({ code: 'socket_error', message: 'control channel error' })
|
|
816
|
+
}),
|
|
817
|
+
(c.onopen = () => {
|
|
818
|
+
Object.keys(o).length > 0 && a(S(o))
|
|
804
819
|
}))
|
|
805
|
-
let
|
|
806
|
-
|
|
807
|
-
? `${
|
|
820
|
+
let u = n.webrtcGatewayBase || '',
|
|
821
|
+
h = u
|
|
822
|
+
? `${u}/webrtc/offer?token=${encodeURIComponent(n.token)}`
|
|
808
823
|
: `${n.apiBase}/v1/agents/${encodeURIComponent(n.agentId)}/webrtc/offer?token=${encodeURIComponent(n.token)}`,
|
|
809
|
-
|
|
810
|
-
? `${
|
|
824
|
+
k = u
|
|
825
|
+
? `${u}/webrtc/ice?token=${encodeURIComponent(n.token)}`
|
|
811
826
|
: `${n.apiBase}/v1/agents/${encodeURIComponent(n.agentId)}/webrtc/ice?token=${encodeURIComponent(n.token)}`
|
|
812
|
-
await
|
|
813
|
-
let
|
|
827
|
+
await d.setLocalDescription(await d.createOffer())
|
|
828
|
+
let b
|
|
814
829
|
try {
|
|
815
|
-
let i = await fetch(
|
|
830
|
+
let i = await fetch(h, {
|
|
816
831
|
method: 'POST',
|
|
817
832
|
headers: { 'content-type': 'application/json' },
|
|
818
|
-
body: JSON.stringify({ sdp:
|
|
833
|
+
body: JSON.stringify({ sdp: d.localDescription.sdp, type: 'offer', agentId: n.agentId }),
|
|
819
834
|
})
|
|
820
835
|
if (!i.ok) {
|
|
821
|
-
let
|
|
836
|
+
let C = i.status === 401 ? 'unauthorized' : 'server_error'
|
|
822
837
|
throw (
|
|
823
|
-
n.onError?.({ code:
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
838
|
+
n.onError?.({ code: C, message: `signaling failed: HTTP ${i.status}` }),
|
|
839
|
+
s('error'),
|
|
840
|
+
l.getTracks().forEach((y) => y.stop()),
|
|
841
|
+
d.close(),
|
|
842
|
+
g.remove(),
|
|
828
843
|
new Error(`webrtc offer failed: ${i.status}`)
|
|
829
844
|
)
|
|
830
845
|
}
|
|
831
|
-
let
|
|
832
|
-
;((
|
|
846
|
+
let m = await i.json()
|
|
847
|
+
;((b = m.callId), await d.setRemoteDescription({ type: 'answer', sdp: m.sdp }))
|
|
833
848
|
} catch (i) {
|
|
834
849
|
throw (
|
|
835
850
|
r ||
|
|
@@ -837,41 +852,41 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
837
852
|
code: 'network_unreachable',
|
|
838
853
|
message: i instanceof Error ? i.message : 'signaling failed',
|
|
839
854
|
}),
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
855
|
+
s('error'),
|
|
856
|
+
l.getTracks().forEach((m) => m.stop()),
|
|
857
|
+
d.close(),
|
|
858
|
+
g.remove()),
|
|
844
859
|
i
|
|
845
860
|
)
|
|
846
861
|
}
|
|
847
|
-
;((
|
|
862
|
+
;((d.onicecandidate = (i) => {
|
|
848
863
|
i.candidate &&
|
|
849
|
-
fetch(
|
|
864
|
+
fetch(k, {
|
|
850
865
|
method: 'POST',
|
|
851
866
|
headers: { 'content-type': 'application/json' },
|
|
852
|
-
body: JSON.stringify({ callId:
|
|
867
|
+
body: JSON.stringify({ callId: b, candidate: i.candidate }),
|
|
853
868
|
}).catch(() => {})
|
|
854
869
|
}),
|
|
855
|
-
(
|
|
856
|
-
let i =
|
|
857
|
-
;(i === 'connected' &&
|
|
870
|
+
(d.onconnectionstatechange = () => {
|
|
871
|
+
let i = d.connectionState
|
|
872
|
+
;(i === 'connected' && s('listening'),
|
|
858
873
|
(i === 'failed' || i === 'disconnected') &&
|
|
859
|
-
(n.onError?.({ code: 'socket_error', message: `webrtc connection ${i}` }),
|
|
860
|
-
i === 'closed' && !r &&
|
|
874
|
+
(n.onError?.({ code: 'socket_error', message: `webrtc connection ${i}` }), f()),
|
|
875
|
+
i === 'closed' && !r && f())
|
|
861
876
|
}))
|
|
862
|
-
let
|
|
877
|
+
let f = () => {
|
|
863
878
|
if (!r) {
|
|
864
879
|
r = !0
|
|
865
880
|
try {
|
|
866
|
-
|
|
881
|
+
l.getTracks().forEach((i) => i.stop())
|
|
867
882
|
} catch {}
|
|
868
883
|
try {
|
|
869
|
-
|
|
884
|
+
d.close()
|
|
870
885
|
} catch {}
|
|
871
886
|
try {
|
|
872
|
-
|
|
887
|
+
g.remove()
|
|
873
888
|
} catch {}
|
|
874
|
-
;(
|
|
889
|
+
;(s('ended'), n.onEnd?.())
|
|
875
890
|
}
|
|
876
891
|
}
|
|
877
892
|
return {
|
|
@@ -884,28 +899,28 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
884
899
|
get isMuted() {
|
|
885
900
|
return t
|
|
886
901
|
},
|
|
887
|
-
end: () =>
|
|
902
|
+
end: () => f(),
|
|
888
903
|
mute: () => {
|
|
889
|
-
t || ((t = !0),
|
|
904
|
+
t || ((t = !0), l.getAudioTracks().forEach((i) => (i.enabled = !1)))
|
|
890
905
|
},
|
|
891
906
|
unmute: () => {
|
|
892
|
-
t && ((t = !1),
|
|
907
|
+
t && ((t = !1), l.getAudioTracks().forEach((i) => (i.enabled = !0)))
|
|
893
908
|
},
|
|
894
909
|
}
|
|
895
910
|
}
|
|
896
911
|
var te = (n) => new globalThis.WebSocket(n),
|
|
897
|
-
|
|
912
|
+
I = class {
|
|
898
913
|
constructor(e) {
|
|
899
914
|
this.startCall = async (e) => {
|
|
900
915
|
if (!e.agentId) throw new Error('startCall: agentId is required')
|
|
901
|
-
let { context: t, metadata: r } =
|
|
916
|
+
let { context: t, metadata: r } = N(this.config, e),
|
|
902
917
|
o = { agentId: e.agentId, userId: e.userId, context: t, metadata: r },
|
|
903
918
|
a
|
|
904
919
|
if (e.token) a = { token: e.token, transport: 'ws' }
|
|
905
920
|
else {
|
|
906
|
-
let
|
|
907
|
-
if (!
|
|
908
|
-
if (((a = typeof
|
|
921
|
+
let p = await this.config.fetchToken(o)
|
|
922
|
+
if (!p) throw new Error('configureVoiceClient.fetchToken returned empty token')
|
|
923
|
+
if (((a = typeof p == 'string' ? { token: p, transport: 'ws' } : p), !a.token))
|
|
909
924
|
throw new Error('configureVoiceClient.fetchToken returned an object without `token`')
|
|
910
925
|
}
|
|
911
926
|
if (a.transport === 'webrtc')
|
|
@@ -920,8 +935,9 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
920
935
|
onEnd: e.onEnd ? () => e.onEnd({ reason: 'agent_ended', durationMs: 0 }) : void 0,
|
|
921
936
|
onInterrupt: e.onInterrupt,
|
|
922
937
|
onAgentTurnStart: e.onAgentTurnStart,
|
|
938
|
+
clientTools: e.clientTools,
|
|
923
939
|
})
|
|
924
|
-
let s = new
|
|
940
|
+
let s = new M({
|
|
925
941
|
config: this.config,
|
|
926
942
|
options: { ...e, context: t, metadata: r },
|
|
927
943
|
token: a.token,
|
|
@@ -933,16 +949,16 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
933
949
|
}
|
|
934
950
|
}
|
|
935
951
|
function G(n) {
|
|
936
|
-
return new
|
|
952
|
+
return new I(P(n))
|
|
937
953
|
}
|
|
938
954
|
var H = 'voice-agent-embed-root',
|
|
939
955
|
re =
|
|
940
956
|
'M12 14a3 3 0 0 0 3-3V5a3 3 0 0 0-6 0v6a3 3 0 0 0 3 3zm5.3-3a.7.7 0 0 1 1.4 0 6.7 6.7 0 0 1-6 6.66V21h-1.4v-3.34A6.7 6.7 0 0 1 5.3 11a.7.7 0 0 1 1.4 0 5.3 5.3 0 0 0 10.6 0z',
|
|
941
|
-
|
|
957
|
+
O = 'http://www.w3.org/2000/svg',
|
|
942
958
|
J = (n) => {
|
|
943
|
-
let e = document.createElementNS(
|
|
959
|
+
let e = document.createElementNS(O, 'svg')
|
|
944
960
|
if ((e.setAttribute('viewBox', '0 0 24 24'), e.setAttribute('class', 'icon'), n.rect)) {
|
|
945
|
-
let t = document.createElementNS(
|
|
961
|
+
let t = document.createElementNS(O, 'rect')
|
|
946
962
|
;(t.setAttribute('x', '7'),
|
|
947
963
|
t.setAttribute('y', '7'),
|
|
948
964
|
t.setAttribute('width', '10'),
|
|
@@ -950,7 +966,7 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
950
966
|
t.setAttribute('rx', '1.5'),
|
|
951
967
|
e.appendChild(t))
|
|
952
968
|
} else if (n.path) {
|
|
953
|
-
let t = document.createElementNS(
|
|
969
|
+
let t = document.createElementNS(O, 'path')
|
|
954
970
|
;(t.setAttribute('d', n.path), e.appendChild(t))
|
|
955
971
|
}
|
|
956
972
|
return e
|
|
@@ -994,7 +1010,7 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
994
1010
|
}
|
|
995
1011
|
return null
|
|
996
1012
|
},
|
|
997
|
-
|
|
1013
|
+
B = class {
|
|
998
1014
|
constructor(e) {
|
|
999
1015
|
this.call = null
|
|
1000
1016
|
;((this.cfg = e),
|
|
@@ -1194,22 +1210,22 @@ registerProcessor('mic-downsampler', MicDownsampler)
|
|
|
1194
1210
|
this.transcriptEl.scrollTop = this.transcriptEl.scrollHeight
|
|
1195
1211
|
}
|
|
1196
1212
|
},
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
;(
|
|
1213
|
+
R = null,
|
|
1214
|
+
V = (n) => {
|
|
1215
|
+
;(R?.destroy(), (R = new B(n)))
|
|
1200
1216
|
},
|
|
1201
1217
|
ie = () => {
|
|
1202
|
-
;(
|
|
1218
|
+
;(R?.destroy(), (R = null))
|
|
1203
1219
|
}
|
|
1204
|
-
window.PlatformWidget = { init:
|
|
1205
|
-
var
|
|
1206
|
-
if (
|
|
1220
|
+
window.PlatformWidget = { init: V, destroy: ie }
|
|
1221
|
+
var F = ae()
|
|
1222
|
+
if (F) {
|
|
1207
1223
|
let n = document.querySelectorAll('script[src*="embed.js"]'),
|
|
1208
1224
|
e = !0
|
|
1209
1225
|
for (let t of Array.from(n)) t.dataset.autoInit === 'false' && (e = !1)
|
|
1210
1226
|
e &&
|
|
1211
1227
|
(document.readyState !== 'loading'
|
|
1212
|
-
?
|
|
1213
|
-
: document.addEventListener('DOMContentLoaded', () =>
|
|
1228
|
+
? V(F)
|
|
1229
|
+
: document.addEventListener('DOMContentLoaded', () => V(F)))
|
|
1214
1230
|
}
|
|
1215
1231
|
})()
|