@apteva/apteva-kit 0.1.137 → 0.1.138
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +186 -131
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +99 -44
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -4457,22 +4457,6 @@ function base64ToFloat32(base64) {
|
|
|
4457
4457
|
}
|
|
4458
4458
|
return float32Array;
|
|
4459
4459
|
}
|
|
4460
|
-
function resampleAudio(inputData, inputSampleRate, outputSampleRate) {
|
|
4461
|
-
if (inputSampleRate === outputSampleRate) {
|
|
4462
|
-
return inputData;
|
|
4463
|
-
}
|
|
4464
|
-
const ratio = inputSampleRate / outputSampleRate;
|
|
4465
|
-
const outputLength = Math.floor(inputData.length / ratio);
|
|
4466
|
-
const output = new Float32Array(outputLength);
|
|
4467
|
-
for (let i = 0; i < outputLength; i++) {
|
|
4468
|
-
const srcIndex = i * ratio;
|
|
4469
|
-
const srcIndexFloor = Math.floor(srcIndex);
|
|
4470
|
-
const srcIndexCeil = Math.min(srcIndexFloor + 1, inputData.length - 1);
|
|
4471
|
-
const t = srcIndex - srcIndexFloor;
|
|
4472
|
-
output[i] = inputData[srcIndexFloor] * (1 - t) + inputData[srcIndexCeil] * t;
|
|
4473
|
-
}
|
|
4474
|
-
return output;
|
|
4475
|
-
}
|
|
4476
4460
|
|
|
4477
4461
|
// src/hooks/useVoiceSession.ts
|
|
4478
4462
|
function useVoiceSession(config) {
|
|
@@ -4491,8 +4475,10 @@ function useVoiceSession(config) {
|
|
|
4491
4475
|
const mutedRef = useRef9(false);
|
|
4492
4476
|
const configRef = useRef9(config);
|
|
4493
4477
|
configRef.current = config;
|
|
4494
|
-
const
|
|
4495
|
-
const
|
|
4478
|
+
const activeSourcesRef = useRef9([]);
|
|
4479
|
+
const responseStartTimeRef = useRef9(0);
|
|
4480
|
+
const totalAudioDurationMsRef = useRef9(0);
|
|
4481
|
+
const interruptedRef = useRef9(false);
|
|
4496
4482
|
const cleanup = useCallback4(() => {
|
|
4497
4483
|
if (durationIntervalRef.current) {
|
|
4498
4484
|
clearInterval(durationIntervalRef.current);
|
|
@@ -4529,15 +4515,26 @@ function useVoiceSession(config) {
|
|
|
4529
4515
|
}
|
|
4530
4516
|
nextPlayTimeRef.current = 0;
|
|
4531
4517
|
mutedRef.current = false;
|
|
4532
|
-
|
|
4533
|
-
|
|
4534
|
-
|
|
4535
|
-
|
|
4536
|
-
}
|
|
4518
|
+
activeSourcesRef.current = [];
|
|
4519
|
+
responseStartTimeRef.current = 0;
|
|
4520
|
+
totalAudioDurationMsRef.current = 0;
|
|
4521
|
+
interruptedRef.current = false;
|
|
4537
4522
|
setMuted(false);
|
|
4538
4523
|
setPartialTranscript("");
|
|
4539
4524
|
setDuration(0);
|
|
4540
4525
|
}, []);
|
|
4526
|
+
const resetPlayback = useCallback4(() => {
|
|
4527
|
+
activeSourcesRef.current.forEach((source) => {
|
|
4528
|
+
try {
|
|
4529
|
+
source.stop();
|
|
4530
|
+
} catch (_) {
|
|
4531
|
+
}
|
|
4532
|
+
});
|
|
4533
|
+
activeSourcesRef.current = [];
|
|
4534
|
+
nextPlayTimeRef.current = 0;
|
|
4535
|
+
responseStartTimeRef.current = 0;
|
|
4536
|
+
totalAudioDurationMsRef.current = 0;
|
|
4537
|
+
}, []);
|
|
4541
4538
|
useEffect9(() => {
|
|
4542
4539
|
return () => {
|
|
4543
4540
|
cleanup();
|
|
@@ -4557,18 +4554,18 @@ function useVoiceSession(config) {
|
|
|
4557
4554
|
const source = ctx.createBufferSource();
|
|
4558
4555
|
source.buffer = audioBuffer;
|
|
4559
4556
|
source.connect(ctx.destination);
|
|
4557
|
+
activeSourcesRef.current.push(source);
|
|
4558
|
+
source.onended = () => {
|
|
4559
|
+
activeSourcesRef.current = activeSourcesRef.current.filter((s) => s !== source);
|
|
4560
|
+
};
|
|
4560
4561
|
const currentTime = ctx.currentTime;
|
|
4561
4562
|
const startTime = Math.max(currentTime, nextPlayTimeRef.current);
|
|
4562
4563
|
source.start(startTime);
|
|
4563
4564
|
nextPlayTimeRef.current = startTime + audioBuffer.duration;
|
|
4564
|
-
|
|
4565
|
-
|
|
4566
|
-
|
|
4567
|
-
|
|
4568
|
-
const remainingMs = (nextPlayTimeRef.current - currentTime) * 1e3 + 150;
|
|
4569
|
-
agentSpeakingTimeoutRef.current = setTimeout(() => {
|
|
4570
|
-
agentSpeakingRef.current = false;
|
|
4571
|
-
}, remainingMs);
|
|
4565
|
+
if (responseStartTimeRef.current === 0) {
|
|
4566
|
+
responseStartTimeRef.current = startTime;
|
|
4567
|
+
}
|
|
4568
|
+
totalAudioDurationMsRef.current += Math.floor(audioBuffer.duration * 1e3);
|
|
4572
4569
|
}, []);
|
|
4573
4570
|
const startCaptureRef = useRef9(() => {
|
|
4574
4571
|
});
|
|
@@ -4584,10 +4581,43 @@ function useVoiceSession(config) {
|
|
|
4584
4581
|
startCaptureRef.current();
|
|
4585
4582
|
break;
|
|
4586
4583
|
case "audio_delta":
|
|
4584
|
+
if (interruptedRef.current) break;
|
|
4587
4585
|
if (msg.data?.chunk) {
|
|
4588
4586
|
playAudioChunk(msg.data.chunk);
|
|
4589
4587
|
}
|
|
4590
4588
|
break;
|
|
4589
|
+
case "audio_complete":
|
|
4590
|
+
interruptedRef.current = false;
|
|
4591
|
+
break;
|
|
4592
|
+
case "audio_interrupt": {
|
|
4593
|
+
if (activeSourcesRef.current.length === 0) break;
|
|
4594
|
+
let audioEndMs = 0;
|
|
4595
|
+
if (playbackCtxRef.current && responseStartTimeRef.current > 0) {
|
|
4596
|
+
const elapsedMs = Math.max(0, Math.floor(
|
|
4597
|
+
(playbackCtxRef.current.currentTime - responseStartTimeRef.current) * 1e3
|
|
4598
|
+
));
|
|
4599
|
+
audioEndMs = Math.min(elapsedMs, totalAudioDurationMsRef.current);
|
|
4600
|
+
}
|
|
4601
|
+
const itemId = msg.data?.item_id;
|
|
4602
|
+
const contentIndex = msg.data?.content_index || 0;
|
|
4603
|
+
resetPlayback();
|
|
4604
|
+
if (itemId) {
|
|
4605
|
+
interruptedRef.current = true;
|
|
4606
|
+
}
|
|
4607
|
+
const ws = wsRef.current;
|
|
4608
|
+
if (ws && ws.readyState === WebSocket.OPEN && itemId) {
|
|
4609
|
+
ws.send(JSON.stringify({
|
|
4610
|
+
type: "control",
|
|
4611
|
+
data: {
|
|
4612
|
+
action: "truncate",
|
|
4613
|
+
item_id: itemId,
|
|
4614
|
+
content_index: contentIndex,
|
|
4615
|
+
audio_end_ms: audioEndMs
|
|
4616
|
+
}
|
|
4617
|
+
}));
|
|
4618
|
+
}
|
|
4619
|
+
break;
|
|
4620
|
+
}
|
|
4591
4621
|
case "transcript":
|
|
4592
4622
|
if (msg.data) {
|
|
4593
4623
|
if (msg.data.partial) {
|
|
@@ -4606,7 +4636,7 @@ function useVoiceSession(config) {
|
|
|
4606
4636
|
break;
|
|
4607
4637
|
case "tool_call":
|
|
4608
4638
|
if (msg.data) {
|
|
4609
|
-
|
|
4639
|
+
resetPlayback();
|
|
4610
4640
|
cfg.onTranscript?.({
|
|
4611
4641
|
id: `vt-tool-${Date.now()}`,
|
|
4612
4642
|
role: "system",
|
|
@@ -4618,24 +4648,48 @@ function useVoiceSession(config) {
|
|
|
4618
4648
|
break;
|
|
4619
4649
|
case "tool_result":
|
|
4620
4650
|
if (msg.data) {
|
|
4621
|
-
|
|
4651
|
+
const status = msg.data.error ? "failed" : "completed";
|
|
4652
|
+
cfg.onTranscript?.({
|
|
4653
|
+
id: `vt-toolresult-${Date.now()}`,
|
|
4654
|
+
role: "system",
|
|
4655
|
+
content: `Tool ${status}: ${msg.data.name || msg.data.call_id}`,
|
|
4656
|
+
partial: false,
|
|
4657
|
+
timestamp: /* @__PURE__ */ new Date()
|
|
4658
|
+
});
|
|
4622
4659
|
}
|
|
4623
4660
|
break;
|
|
4661
|
+
case "turn_end":
|
|
4662
|
+
interruptedRef.current = false;
|
|
4663
|
+
break;
|
|
4624
4664
|
case "error":
|
|
4625
4665
|
setState("error");
|
|
4626
4666
|
cfg.onError?.(new Error(msg.data?.message || "Voice session error"));
|
|
4627
4667
|
break;
|
|
4628
4668
|
}
|
|
4629
|
-
}, [playAudioChunk]);
|
|
4669
|
+
}, [playAudioChunk, resetPlayback]);
|
|
4630
4670
|
const startCapture = useCallback4(async () => {
|
|
4631
4671
|
const ws = wsRef.current;
|
|
4632
4672
|
if (!ws) return;
|
|
4673
|
+
if (processorRef.current) {
|
|
4674
|
+
processorRef.current.disconnect();
|
|
4675
|
+
processorRef.current = null;
|
|
4676
|
+
}
|
|
4677
|
+
if (mediaStreamRef.current) {
|
|
4678
|
+
mediaStreamRef.current.getTracks().forEach((t) => t.stop());
|
|
4679
|
+
mediaStreamRef.current = null;
|
|
4680
|
+
}
|
|
4681
|
+
if (captureCtxRef.current) {
|
|
4682
|
+
try {
|
|
4683
|
+
captureCtxRef.current.close();
|
|
4684
|
+
} catch (_) {
|
|
4685
|
+
}
|
|
4686
|
+
captureCtxRef.current = null;
|
|
4687
|
+
}
|
|
4633
4688
|
try {
|
|
4634
|
-
captureCtxRef.current = new AudioContext();
|
|
4689
|
+
captureCtxRef.current = new AudioContext({ sampleRate: 24e3 });
|
|
4635
4690
|
if (captureCtxRef.current.state === "suspended") {
|
|
4636
4691
|
await captureCtxRef.current.resume();
|
|
4637
4692
|
}
|
|
4638
|
-
const nativeSampleRate = captureCtxRef.current.sampleRate;
|
|
4639
4693
|
mediaStreamRef.current = await navigator.mediaDevices.getUserMedia({
|
|
4640
4694
|
audio: {
|
|
4641
4695
|
echoCancellation: true,
|
|
@@ -4644,27 +4698,28 @@ function useVoiceSession(config) {
|
|
|
4644
4698
|
}
|
|
4645
4699
|
});
|
|
4646
4700
|
const source = captureCtxRef.current.createMediaStreamSource(mediaStreamRef.current);
|
|
4647
|
-
processorRef.current = captureCtxRef.current.createScriptProcessor(
|
|
4701
|
+
processorRef.current = captureCtxRef.current.createScriptProcessor(4096, 1, 1);
|
|
4648
4702
|
processorRef.current.onaudioprocess = (e) => {
|
|
4649
4703
|
if (!ws || ws.readyState !== WebSocket.OPEN) return;
|
|
4650
4704
|
if (mutedRef.current) return;
|
|
4651
|
-
if (agentSpeakingRef.current) return;
|
|
4652
4705
|
const inputData = e.inputBuffer.getChannelData(0);
|
|
4653
|
-
const
|
|
4654
|
-
const int16Data = float32ToInt16(resampledData);
|
|
4706
|
+
const int16Data = float32ToInt16(inputData);
|
|
4655
4707
|
const base64Data = int16ToBase64(int16Data);
|
|
4656
4708
|
ws.send(JSON.stringify({
|
|
4657
4709
|
type: "audio",
|
|
4658
|
-
data: { chunk: base64Data }
|
|
4710
|
+
data: { chunk: base64Data, sample_rate: 24e3 }
|
|
4659
4711
|
}));
|
|
4660
4712
|
};
|
|
4661
4713
|
source.connect(processorRef.current);
|
|
4662
|
-
|
|
4714
|
+
const silentGain = captureCtxRef.current.createGain();
|
|
4715
|
+
silentGain.gain.value = 0;
|
|
4716
|
+
processorRef.current.connect(silentGain);
|
|
4717
|
+
silentGain.connect(captureCtxRef.current.destination);
|
|
4663
4718
|
} catch (e) {
|
|
4664
4719
|
console.warn("Microphone access denied:", e);
|
|
4665
4720
|
configRef.current.onError?.(new Error("Microphone access denied"));
|
|
4666
4721
|
}
|
|
4667
|
-
}, [
|
|
4722
|
+
}, []);
|
|
4668
4723
|
startCaptureRef.current = startCapture;
|
|
4669
4724
|
const start = useCallback4(() => {
|
|
4670
4725
|
if (state !== "idle") return;
|
|
@@ -4710,7 +4765,7 @@ function useVoiceSession(config) {
|
|
|
4710
4765
|
cleanup();
|
|
4711
4766
|
setState("idle");
|
|
4712
4767
|
};
|
|
4713
|
-
}, [state, config.apiUrl, handleMessage, cleanup]);
|
|
4768
|
+
}, [state, config.apiUrl, config.apiKey, handleMessage, cleanup]);
|
|
4714
4769
|
const stop = useCallback4(() => {
|
|
4715
4770
|
cleanup();
|
|
4716
4771
|
setState("idle");
|