@apteva/apteva-kit 0.1.136 → 0.1.138
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +202 -116
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +115 -29
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -4457,22 +4457,6 @@ function base64ToFloat32(base64) {
|
|
|
4457
4457
|
}
|
|
4458
4458
|
return float32Array;
|
|
4459
4459
|
}
|
|
4460
|
-
function resampleAudio(inputData, inputSampleRate, outputSampleRate) {
|
|
4461
|
-
if (inputSampleRate === outputSampleRate) {
|
|
4462
|
-
return inputData;
|
|
4463
|
-
}
|
|
4464
|
-
const ratio = inputSampleRate / outputSampleRate;
|
|
4465
|
-
const outputLength = Math.floor(inputData.length / ratio);
|
|
4466
|
-
const output = new Float32Array(outputLength);
|
|
4467
|
-
for (let i = 0; i < outputLength; i++) {
|
|
4468
|
-
const srcIndex = i * ratio;
|
|
4469
|
-
const srcIndexFloor = Math.floor(srcIndex);
|
|
4470
|
-
const srcIndexCeil = Math.min(srcIndexFloor + 1, inputData.length - 1);
|
|
4471
|
-
const t = srcIndex - srcIndexFloor;
|
|
4472
|
-
output[i] = inputData[srcIndexFloor] * (1 - t) + inputData[srcIndexCeil] * t;
|
|
4473
|
-
}
|
|
4474
|
-
return output;
|
|
4475
|
-
}
|
|
4476
4460
|
|
|
4477
4461
|
// src/hooks/useVoiceSession.ts
|
|
4478
4462
|
function useVoiceSession(config) {
|
|
@@ -4491,6 +4475,10 @@ function useVoiceSession(config) {
|
|
|
4491
4475
|
const mutedRef = useRef9(false);
|
|
4492
4476
|
const configRef = useRef9(config);
|
|
4493
4477
|
configRef.current = config;
|
|
4478
|
+
const activeSourcesRef = useRef9([]);
|
|
4479
|
+
const responseStartTimeRef = useRef9(0);
|
|
4480
|
+
const totalAudioDurationMsRef = useRef9(0);
|
|
4481
|
+
const interruptedRef = useRef9(false);
|
|
4494
4482
|
const cleanup = useCallback4(() => {
|
|
4495
4483
|
if (durationIntervalRef.current) {
|
|
4496
4484
|
clearInterval(durationIntervalRef.current);
|
|
@@ -4527,10 +4515,26 @@ function useVoiceSession(config) {
|
|
|
4527
4515
|
}
|
|
4528
4516
|
nextPlayTimeRef.current = 0;
|
|
4529
4517
|
mutedRef.current = false;
|
|
4518
|
+
activeSourcesRef.current = [];
|
|
4519
|
+
responseStartTimeRef.current = 0;
|
|
4520
|
+
totalAudioDurationMsRef.current = 0;
|
|
4521
|
+
interruptedRef.current = false;
|
|
4530
4522
|
setMuted(false);
|
|
4531
4523
|
setPartialTranscript("");
|
|
4532
4524
|
setDuration(0);
|
|
4533
4525
|
}, []);
|
|
4526
|
+
const resetPlayback = useCallback4(() => {
|
|
4527
|
+
activeSourcesRef.current.forEach((source) => {
|
|
4528
|
+
try {
|
|
4529
|
+
source.stop();
|
|
4530
|
+
} catch (_) {
|
|
4531
|
+
}
|
|
4532
|
+
});
|
|
4533
|
+
activeSourcesRef.current = [];
|
|
4534
|
+
nextPlayTimeRef.current = 0;
|
|
4535
|
+
responseStartTimeRef.current = 0;
|
|
4536
|
+
totalAudioDurationMsRef.current = 0;
|
|
4537
|
+
}, []);
|
|
4534
4538
|
useEffect9(() => {
|
|
4535
4539
|
return () => {
|
|
4536
4540
|
cleanup();
|
|
@@ -4550,10 +4554,18 @@ function useVoiceSession(config) {
|
|
|
4550
4554
|
const source = ctx.createBufferSource();
|
|
4551
4555
|
source.buffer = audioBuffer;
|
|
4552
4556
|
source.connect(ctx.destination);
|
|
4557
|
+
activeSourcesRef.current.push(source);
|
|
4558
|
+
source.onended = () => {
|
|
4559
|
+
activeSourcesRef.current = activeSourcesRef.current.filter((s) => s !== source);
|
|
4560
|
+
};
|
|
4553
4561
|
const currentTime = ctx.currentTime;
|
|
4554
4562
|
const startTime = Math.max(currentTime, nextPlayTimeRef.current);
|
|
4555
4563
|
source.start(startTime);
|
|
4556
4564
|
nextPlayTimeRef.current = startTime + audioBuffer.duration;
|
|
4565
|
+
if (responseStartTimeRef.current === 0) {
|
|
4566
|
+
responseStartTimeRef.current = startTime;
|
|
4567
|
+
}
|
|
4568
|
+
totalAudioDurationMsRef.current += Math.floor(audioBuffer.duration * 1e3);
|
|
4557
4569
|
}, []);
|
|
4558
4570
|
const startCaptureRef = useRef9(() => {
|
|
4559
4571
|
});
|
|
@@ -4569,10 +4581,43 @@ function useVoiceSession(config) {
|
|
|
4569
4581
|
startCaptureRef.current();
|
|
4570
4582
|
break;
|
|
4571
4583
|
case "audio_delta":
|
|
4584
|
+
if (interruptedRef.current) break;
|
|
4572
4585
|
if (msg.data?.chunk) {
|
|
4573
4586
|
playAudioChunk(msg.data.chunk);
|
|
4574
4587
|
}
|
|
4575
4588
|
break;
|
|
4589
|
+
case "audio_complete":
|
|
4590
|
+
interruptedRef.current = false;
|
|
4591
|
+
break;
|
|
4592
|
+
case "audio_interrupt": {
|
|
4593
|
+
if (activeSourcesRef.current.length === 0) break;
|
|
4594
|
+
let audioEndMs = 0;
|
|
4595
|
+
if (playbackCtxRef.current && responseStartTimeRef.current > 0) {
|
|
4596
|
+
const elapsedMs = Math.max(0, Math.floor(
|
|
4597
|
+
(playbackCtxRef.current.currentTime - responseStartTimeRef.current) * 1e3
|
|
4598
|
+
));
|
|
4599
|
+
audioEndMs = Math.min(elapsedMs, totalAudioDurationMsRef.current);
|
|
4600
|
+
}
|
|
4601
|
+
const itemId = msg.data?.item_id;
|
|
4602
|
+
const contentIndex = msg.data?.content_index || 0;
|
|
4603
|
+
resetPlayback();
|
|
4604
|
+
if (itemId) {
|
|
4605
|
+
interruptedRef.current = true;
|
|
4606
|
+
}
|
|
4607
|
+
const ws = wsRef.current;
|
|
4608
|
+
if (ws && ws.readyState === WebSocket.OPEN && itemId) {
|
|
4609
|
+
ws.send(JSON.stringify({
|
|
4610
|
+
type: "control",
|
|
4611
|
+
data: {
|
|
4612
|
+
action: "truncate",
|
|
4613
|
+
item_id: itemId,
|
|
4614
|
+
content_index: contentIndex,
|
|
4615
|
+
audio_end_ms: audioEndMs
|
|
4616
|
+
}
|
|
4617
|
+
}));
|
|
4618
|
+
}
|
|
4619
|
+
break;
|
|
4620
|
+
}
|
|
4576
4621
|
case "transcript":
|
|
4577
4622
|
if (msg.data) {
|
|
4578
4623
|
if (msg.data.partial) {
|
|
@@ -4591,7 +4636,7 @@ function useVoiceSession(config) {
|
|
|
4591
4636
|
break;
|
|
4592
4637
|
case "tool_call":
|
|
4593
4638
|
if (msg.data) {
|
|
4594
|
-
|
|
4639
|
+
resetPlayback();
|
|
4595
4640
|
cfg.onTranscript?.({
|
|
4596
4641
|
id: `vt-tool-${Date.now()}`,
|
|
4597
4642
|
role: "system",
|
|
@@ -4603,43 +4648,78 @@ function useVoiceSession(config) {
|
|
|
4603
4648
|
break;
|
|
4604
4649
|
case "tool_result":
|
|
4605
4650
|
if (msg.data) {
|
|
4606
|
-
|
|
4651
|
+
const status = msg.data.error ? "failed" : "completed";
|
|
4652
|
+
cfg.onTranscript?.({
|
|
4653
|
+
id: `vt-toolresult-${Date.now()}`,
|
|
4654
|
+
role: "system",
|
|
4655
|
+
content: `Tool ${status}: ${msg.data.name || msg.data.call_id}`,
|
|
4656
|
+
partial: false,
|
|
4657
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
4658
|
+
});
|
|
4607
4659
|
}
|
|
4608
4660
|
break;
|
|
4661
|
+
case "turn_end":
|
|
4662
|
+
interruptedRef.current = false;
|
|
4663
|
+
break;
|
|
4609
4664
|
case "error":
|
|
4610
4665
|
setState("error");
|
|
4611
4666
|
cfg.onError?.(new Error(msg.data?.message || "Voice session error"));
|
|
4612
4667
|
break;
|
|
4613
4668
|
}
|
|
4614
|
-
}, [playAudioChunk]);
|
|
4669
|
+
}, [playAudioChunk, resetPlayback]);
|
|
4615
4670
|
const startCapture = useCallback4(async () => {
|
|
4616
4671
|
const ws = wsRef.current;
|
|
4617
4672
|
if (!ws) return;
|
|
4673
|
+
if (processorRef.current) {
|
|
4674
|
+
processorRef.current.disconnect();
|
|
4675
|
+
processorRef.current = null;
|
|
4676
|
+
}
|
|
4677
|
+
if (mediaStreamRef.current) {
|
|
4678
|
+
mediaStreamRef.current.getTracks().forEach((t) => t.stop());
|
|
4679
|
+
mediaStreamRef.current = null;
|
|
4680
|
+
}
|
|
4681
|
+
if (captureCtxRef.current) {
|
|
4682
|
+
try {
|
|
4683
|
+
captureCtxRef.current.close();
|
|
4684
|
+
} catch (_) {
|
|
4685
|
+
}
|
|
4686
|
+
captureCtxRef.current = null;
|
|
4687
|
+
}
|
|
4618
4688
|
try {
|
|
4619
|
-
captureCtxRef.current = new AudioContext();
|
|
4620
|
-
|
|
4621
|
-
|
|
4689
|
+
captureCtxRef.current = new AudioContext({ sampleRate: 24e3 });
|
|
4690
|
+
if (captureCtxRef.current.state === "suspended") {
|
|
4691
|
+
await captureCtxRef.current.resume();
|
|
4692
|
+
}
|
|
4693
|
+
mediaStreamRef.current = await navigator.mediaDevices.getUserMedia({
|
|
4694
|
+
audio: {
|
|
4695
|
+
echoCancellation: true,
|
|
4696
|
+
noiseSuppression: true,
|
|
4697
|
+
autoGainControl: true
|
|
4698
|
+
}
|
|
4699
|
+
});
|
|
4622
4700
|
const source = captureCtxRef.current.createMediaStreamSource(mediaStreamRef.current);
|
|
4623
|
-
processorRef.current = captureCtxRef.current.createScriptProcessor(
|
|
4701
|
+
processorRef.current = captureCtxRef.current.createScriptProcessor(4096, 1, 1);
|
|
4624
4702
|
processorRef.current.onaudioprocess = (e) => {
|
|
4625
4703
|
if (!ws || ws.readyState !== WebSocket.OPEN) return;
|
|
4626
4704
|
if (mutedRef.current) return;
|
|
4627
4705
|
const inputData = e.inputBuffer.getChannelData(0);
|
|
4628
|
-
const
|
|
4629
|
-
const int16Data = float32ToInt16(resampledData);
|
|
4706
|
+
const int16Data = float32ToInt16(inputData);
|
|
4630
4707
|
const base64Data = int16ToBase64(int16Data);
|
|
4631
4708
|
ws.send(JSON.stringify({
|
|
4632
4709
|
type: "audio",
|
|
4633
|
-
data: { chunk: base64Data }
|
|
4710
|
+
data: { chunk: base64Data, sample_rate: 24e3 }
|
|
4634
4711
|
}));
|
|
4635
4712
|
};
|
|
4636
4713
|
source.connect(processorRef.current);
|
|
4637
|
-
|
|
4714
|
+
const silentGain = captureCtxRef.current.createGain();
|
|
4715
|
+
silentGain.gain.value = 0;
|
|
4716
|
+
processorRef.current.connect(silentGain);
|
|
4717
|
+
silentGain.connect(captureCtxRef.current.destination);
|
|
4638
4718
|
} catch (e) {
|
|
4639
4719
|
console.warn("Microphone access denied:", e);
|
|
4640
4720
|
configRef.current.onError?.(new Error("Microphone access denied"));
|
|
4641
4721
|
}
|
|
4642
|
-
}, [
|
|
4722
|
+
}, []);
|
|
4643
4723
|
startCaptureRef.current = startCapture;
|
|
4644
4724
|
const start = useCallback4(() => {
|
|
4645
4725
|
if (state !== "idle") return;
|
|
@@ -4656,6 +4736,12 @@ function useVoiceSession(config) {
|
|
|
4656
4736
|
}
|
|
4657
4737
|
const ws = new WebSocket(wsUrl);
|
|
4658
4738
|
wsRef.current = ws;
|
|
4739
|
+
if (!playbackCtxRef.current) {
|
|
4740
|
+
playbackCtxRef.current = new AudioContext({ sampleRate: 24e3 });
|
|
4741
|
+
}
|
|
4742
|
+
if (playbackCtxRef.current.state === "suspended") {
|
|
4743
|
+
playbackCtxRef.current.resume();
|
|
4744
|
+
}
|
|
4659
4745
|
ws.onopen = () => {
|
|
4660
4746
|
const provider = configRef.current.provider || "openai";
|
|
4661
4747
|
const voice = configRef.current.voice || "ash";
|
|
@@ -4679,7 +4765,7 @@ function useVoiceSession(config) {
|
|
|
4679
4765
|
cleanup();
|
|
4680
4766
|
setState("idle");
|
|
4681
4767
|
};
|
|
4682
|
-
}, [state, config.apiUrl, handleMessage, cleanup]);
|
|
4768
|
+
}, [state, config.apiUrl, config.apiKey, handleMessage, cleanup]);
|
|
4683
4769
|
const stop = useCallback4(() => {
|
|
4684
4770
|
cleanup();
|
|
4685
4771
|
setState("idle");
|